1 #include "clapack.h"
2 #include "f2cP.h"
3 
dlar1v_(integer * n,integer * b1,integer * bn,double * lambda,double * d__,double * l,double * ld,double * lld,double * pivmin,double * gaptol,double * z__,bool * wantnc,integer * negcnt,double * ztz,double * mingma,integer * r__,integer * isuppz,double * nrminv,double * resid,double * rqcorr,double * work)4 /* Subroutine */ int dlar1v_(integer *n, integer *b1, integer *bn, double
5 	*lambda, double *d__, double *l, double *ld, double *
6 	lld, double *pivmin, double *gaptol, double *z__, bool
7 	*wantnc, integer *negcnt, double *ztz, double *mingma,
8 	integer *r__, integer *isuppz, double *nrminv, double *resid,
9 	double *rqcorr, double *work)
10 {
11     /* System generated locals */
12     integer i__1;
13     double d__1, d__2, d__3;
14 
15     /* Builtin functions
16     double sqrt(double); */
17 
18     /* Local variables */
19     integer i__;
20     double s;
21     integer r1, r2;
22     double eps, tmp;
23     integer neg1, neg2, indp, inds;
24     double dplus;
25 
26 
27     integer indlpl, indumn;
28     double dminus;
29     bool sawnan1, sawnan2;
30 
31 
32 /*  -- LAPACK auxiliary routine (version 3.1) -- */
33 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
34 /*     November 2006 */
35 
36 /*     .. Scalar Arguments .. */
37 /*     .. */
38 /*     .. Array Arguments .. */
39 /*     .. */
40 
41 /*  Purpose */
42 /*  ======= */
43 
44 /*  DLAR1V computes the (scaled) r-th column of the inverse of */
45 /*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
46 /*  L D L^T - sigma I. When sigma is close to an eigenvalue, the */
47 /*  computed vector is an accurate eigenvector. Usually, r corresponds */
48 /*  to the index where the eigenvector is largest in magnitude. */
49 /*  The following steps accomplish this computation : */
50 /*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T, */
51 /*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
52 /*  (c) Computation of the diagonal elements of the inverse of */
53 /*      L D L^T - sigma I by combining the above transforms, and choosing */
54 /*      r as the index where the diagonal of the inverse is (one of the) */
55 /*      largest in magnitude. */
56 /*  (d) Computation of the (scaled) r-th column of the inverse using the */
57 /*      twisted factorization obtained by combining the top part of the */
58 /*      the stationary and the bottom part of the progressive transform. */
59 
60 /*  Arguments */
61 /*  ========= */
62 
63 /*  N        (input) INTEGER */
64 /*           The order of the matrix L D L^T. */
65 
66 /*  B1       (input) INTEGER */
67 /*           First index of the submatrix of L D L^T. */
68 
69 /*  BN       (input) INTEGER */
70 /*           Last index of the submatrix of L D L^T. */
71 
72 /*  LAMBDA    (input) DOUBLE PRECISION */
73 /*           The shift. In order to compute an accurate eigenvector, */
74 /*           LAMBDA should be a good approximation to an eigenvalue */
75 /*           of L D L^T. */
76 
77 /*  L        (input) DOUBLE PRECISION array, dimension (N-1) */
78 /*           The (n-1) subdiagonal elements of the unit bidiagonal matrix */
79 /*           L, in elements 1 to N-1. */
80 
81 /*  D        (input) DOUBLE PRECISION array, dimension (N) */
82 /*           The n diagonal elements of the diagonal matrix D. */
83 
84 /*  LD       (input) DOUBLE PRECISION array, dimension (N-1) */
85 /*           The n-1 elements L(i)*D(i). */
86 
87 /*  LLD      (input) DOUBLE PRECISION array, dimension (N-1) */
88 /*           The n-1 elements L(i)*L(i)*D(i). */
89 
90 /*  PIVMIN   (input) DOUBLE PRECISION */
91 /*           The minimum pivot in the Sturm sequence. */
92 
93 /*  GAPTOL   (input) DOUBLE PRECISION */
94 /*           Tolerance that indicates when eigenvector entries are negligible */
95 /*           w.r.t. their contribution to the residual. */
96 
97 /*  Z        (input/output) DOUBLE PRECISION array, dimension (N) */
98 /*           On input, all entries of Z must be set to 0. */
99 /*           On output, Z contains the (scaled) r-th column of the */
100 /*           inverse. The scaling is such that Z(R) equals 1. */
101 
102 /*  WANTNC   (input) LOGICAL */
103 /*           Specifies whether NEGCNT has to be computed. */
104 
105 /*  NEGCNT   (output) INTEGER */
106 /*           If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
107 /*           in the  matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
108 
109 /*  ZTZ      (output) DOUBLE PRECISION */
110 /*           The square of the 2-norm of Z. */
111 
112 /*  MINGMA   (output) DOUBLE PRECISION */
113 /*           The reciprocal of the largest (in magnitude) diagonal */
114 /*           element of the inverse of L D L^T - sigma I. */
115 
116 /*  R        (input/output) INTEGER */
117 /*           The twist index for the twisted factorization used to */
118 /*           compute Z. */
119 /*           On input, 0 <= R <= N. If R is input as 0, R is set to */
120 /*           the index where (L D L^T - sigma I)^{-1} is largest */
121 /*           in magnitude. If 1 <= R <= N, R is unchanged. */
122 /*           On output, R contains the twist index used to compute Z. */
123 /*           Ideally, R designates the position of the maximum entry in the */
124 /*           eigenvector. */
125 
126 /*  ISUPPZ   (output) INTEGER array, dimension (2) */
127 /*           The support of the vector in Z, i.e., the vector Z is */
128 /*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
129 
130 /*  NRMINV   (output) DOUBLE PRECISION */
131 /*           NRMINV = 1/SQRT( ZTZ ) */
132 
133 /*  RESID    (output) DOUBLE PRECISION */
134 /*           The residual of the FP vector. */
135 /*           RESID = ABS( MINGMA )/SQRT( ZTZ ) */
136 
137 /*  RQCORR   (output) DOUBLE PRECISION */
138 /*           The Rayleigh Quotient correction to LAMBDA. */
139 /*           RQCORR = MINGMA*TMP */
140 
141 /*  WORK     (workspace) DOUBLE PRECISION array, dimension (4*N) */
142 
143 /*  Further Details */
144 /*  =============== */
145 
146 /*  Based on contributions by */
147 /*     Beresford Parlett, University of California, Berkeley, USA */
148 /*     Jim Demmel, University of California, Berkeley, USA */
149 /*     Inderjit Dhillon, University of Texas, Austin, USA */
150 /*     Osni Marques, LBNL/NERSC, USA */
151 /*     Christof Voemel, University of California, Berkeley, USA */
152 
153 /*  ===================================================================== */
154 
155 /*     .. Parameters .. */
156 /*     .. */
157 /*     .. Local Scalars .. */
158 /*     .. */
159 /*     .. External Functions .. */
160 /*     .. */
161 /*     .. Intrinsic Functions .. */
162 /*     .. */
163 /*     .. Executable Statements .. */
164 
165     /* Parameter adjustments */
166     --work;
167     --isuppz;
168     --z__;
169     --lld;
170     --ld;
171     --l;
172     --d__;
173 
174     /* Function Body */
175     eps = dlamch_("Precision");
176     if (*r__ == 0) {
177 	r1 = *b1;
178 	r2 = *bn;
179     } else {
180 	r1 = *r__;
181 	r2 = *r__;
182     }
183 /*     Storage for LPLUS */
184     indlpl = 0;
185 /*     Storage for UMINUS */
186     indumn = *n;
187     inds = (*n << 1) + 1;
188     indp = *n * 3 + 1;
189     if (*b1 == 1) {
190 	work[inds] = 0.;
191     } else {
192 	work[inds + *b1 - 1] = lld[*b1 - 1];
193     }
194 
195 /*     Compute the stationary transform (using the differential form) */
196 /*     until the index R2. */
197 
198     sawnan1 = false;
199     neg1 = 0;
200     s = work[inds + *b1 - 1] - *lambda;
201     i__1 = r1 - 1;
202     for (i__ = *b1; i__ <= i__1; ++i__) {
203 	dplus = d__[i__] + s;
204 	work[indlpl + i__] = ld[i__] / dplus;
205 	if (dplus < 0.) {
206 	    ++neg1;
207 	}
208 	work[inds + i__] = s * work[indlpl + i__] * l[i__];
209 	s = work[inds + i__] - *lambda;
210 /* L50: */
211     }
212     sawnan1 = disnan_(&s);
213     if (sawnan1) {
214 	goto L60;
215     }
216     i__1 = r2 - 1;
217     for (i__ = r1; i__ <= i__1; ++i__) {
218 	dplus = d__[i__] + s;
219 	work[indlpl + i__] = ld[i__] / dplus;
220 	work[inds + i__] = s * work[indlpl + i__] * l[i__];
221 	s = work[inds + i__] - *lambda;
222 /* L51: */
223     }
224     sawnan1 = disnan_(&s);
225 
226 L60:
227     if (sawnan1) {
228 /*        Runs a slower version of the above loop if a NaN is detected */
229 	neg1 = 0;
230 	s = work[inds + *b1 - 1] - *lambda;
231 	i__1 = r1 - 1;
232 	for (i__ = *b1; i__ <= i__1; ++i__) {
233 	    dplus = d__[i__] + s;
234 	    if (abs(dplus) < *pivmin) {
235 		dplus = -(*pivmin);
236 	    }
237 	    work[indlpl + i__] = ld[i__] / dplus;
238 	    if (dplus < 0.) {
239 		++neg1;
240 	    }
241 	    work[inds + i__] = s * work[indlpl + i__] * l[i__];
242 	    if (work[indlpl + i__] == 0.) {
243 		work[inds + i__] = lld[i__];
244 	    }
245 	    s = work[inds + i__] - *lambda;
246 /* L70: */
247 	}
248 	i__1 = r2 - 1;
249 	for (i__ = r1; i__ <= i__1; ++i__) {
250 	    dplus = d__[i__] + s;
251 	    if (abs(dplus) < *pivmin) {
252 		dplus = -(*pivmin);
253 	    }
254 	    work[indlpl + i__] = ld[i__] / dplus;
255 	    work[inds + i__] = s * work[indlpl + i__] * l[i__];
256 	    if (work[indlpl + i__] == 0.) {
257 		work[inds + i__] = lld[i__];
258 	    }
259 	    s = work[inds + i__] - *lambda;
260 /* L71: */
261 	}
262     }
263 
264 /*     Compute the progressive transform (using the differential form) */
265 /*     until the index R1 */
266 
267     sawnan2 = false;
268     neg2 = 0;
269     work[indp + *bn - 1] = d__[*bn] - *lambda;
270     i__1 = r1;
271     for (i__ = *bn - 1; i__ >= i__1; --i__) {
272 	dminus = lld[i__] + work[indp + i__];
273 	tmp = d__[i__] / dminus;
274 	if (dminus < 0.) {
275 	    ++neg2;
276 	}
277 	work[indumn + i__] = l[i__] * tmp;
278 	work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
279 /* L80: */
280     }
281     tmp = work[indp + r1 - 1];
282     sawnan2 = disnan_(&tmp);
283     if (sawnan2) {
284 /*        Runs a slower version of the above loop if a NaN is detected */
285 	neg2 = 0;
286 	i__1 = r1;
287 	for (i__ = *bn - 1; i__ >= i__1; --i__) {
288 	    dminus = lld[i__] + work[indp + i__];
289 	    if (abs(dminus) < *pivmin) {
290 		dminus = -(*pivmin);
291 	    }
292 	    tmp = d__[i__] / dminus;
293 	    if (dminus < 0.) {
294 		++neg2;
295 	    }
296 	    work[indumn + i__] = l[i__] * tmp;
297 	    work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
298 	    if (tmp == 0.) {
299 		work[indp + i__ - 1] = d__[i__] - *lambda;
300 	    }
301 /* L100: */
302 	}
303     }
304 
305 /*     Find the index (from R1 to R2) of the largest (in magnitude) */
306 /*     diagonal element of the inverse */
307 
308     *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
309     if (*mingma < 0.) {
310 	++neg1;
311     }
312     if (*wantnc) {
313 	*negcnt = neg1 + neg2;
314     } else {
315 	*negcnt = -1;
316     }
317     if (abs(*mingma) == 0.) {
318 	*mingma = eps * work[inds + r1 - 1];
319     }
320     *r__ = r1;
321     i__1 = r2 - 1;
322     for (i__ = r1; i__ <= i__1; ++i__) {
323 	tmp = work[inds + i__] + work[indp + i__];
324 	if (tmp == 0.) {
325 	    tmp = eps * work[inds + i__];
326 	}
327 	if (abs(tmp) <= abs(*mingma)) {
328 	    *mingma = tmp;
329 	    *r__ = i__ + 1;
330 	}
331 /* L110: */
332     }
333 
334 /*     Compute the FP vector: solve N^T v = e_r */
335 
336     isuppz[1] = *b1;
337     isuppz[2] = *bn;
338     z__[*r__] = 1.;
339     *ztz = 1.;
340 
341 /*     Compute the FP vector upwards from R */
342 
343     if (! sawnan1 && ! sawnan2) {
344 	i__1 = *b1;
345 	for (i__ = *r__ - 1; i__ >= i__1; --i__) {
346 	    z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
347 	    if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
348 		    d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
349 		z__[i__] = 0.;
350 		isuppz[1] = i__ + 1;
351 		goto L220;
352 	    }
353 	    *ztz += z__[i__] * z__[i__];
354 /* L210: */
355 	}
356 L220:
357 	;
358     } else {
359 /*        Run slower loop if NaN occurred. */
360 	i__1 = *b1;
361 	for (i__ = *r__ - 1; i__ >= i__1; --i__) {
362 	    if (z__[i__ + 1] == 0.) {
363 		z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
364 	    } else {
365 		z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
366 	    }
367 	    if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
368 		    d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
369 		z__[i__] = 0.;
370 		isuppz[1] = i__ + 1;
371 		goto L240;
372 	    }
373 	    *ztz += z__[i__] * z__[i__];
374 /* L230: */
375 	}
376 L240:
377 	;
378     }
379 /*     Compute the FP vector downwards from R in blocks of size BLKSIZ */
380     if (! sawnan1 && ! sawnan2) {
381 	i__1 = *bn - 1;
382 	for (i__ = *r__; i__ <= i__1; ++i__) {
383 	    z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
384 	    if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
385 		    d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
386 		z__[i__ + 1] = 0.;
387 		isuppz[2] = i__;
388 		goto L260;
389 	    }
390 	    *ztz += z__[i__ + 1] * z__[i__ + 1];
391 /* L250: */
392 	}
393 L260:
394 	;
395     } else {
396 /*        Run slower loop if NaN occurred. */
397 	i__1 = *bn - 1;
398 	for (i__ = *r__; i__ <= i__1; ++i__) {
399 	    if (z__[i__] == 0.) {
400 		z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
401 	    } else {
402 		z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
403 	    }
404 	    if (((d__1 = z__[i__], abs(d__1)) + (d__2 = z__[i__ + 1], abs(
405 		    d__2))) * (d__3 = ld[i__], abs(d__3)) < *gaptol) {
406 		z__[i__ + 1] = 0.;
407 		isuppz[2] = i__;
408 		goto L280;
409 	    }
410 	    *ztz += z__[i__ + 1] * z__[i__ + 1];
411 /* L270: */
412 	}
413 L280:
414 	;
415     }
416 
417 /*     Compute quantities for convergence test */
418 
419     tmp = 1. / *ztz;
420     *nrminv = sqrt(tmp);
421     *resid = abs(*mingma) * *nrminv;
422     *rqcorr = *mingma * tmp;
423 
424 
425     return 0;
426 
427 /*     End of DLAR1V */
428 
429 } /* dlar1v_ */
430 
dlar2v_(integer * n,double * x,double * y,double * z__,integer * incx,double * c__,double * s,integer * incc)431 /* Subroutine */ int dlar2v_(integer *n, double *x, double *y,
432 	double *z__, integer *incx, double *c__, double *s,
433 	integer *incc)
434 {
435     /* System generated locals */
436     integer i__1;
437 
438     /* Local variables */
439     integer i__;
440     double t1, t2, t3, t4, t5, t6;
441     integer ic;
442     double ci, si;
443     integer ix;
444     double xi, yi, zi;
445 
446 
447 /*  -- LAPACK auxiliary routine (version 3.1) -- */
448 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
449 /*     November 2006 */
450 
451 /*     .. Scalar Arguments .. */
452 /*     .. */
453 /*     .. Array Arguments .. */
454 /*     .. */
455 
456 /*  Purpose */
457 /*  ======= */
458 
459 /*  DLAR2V applies a vector of real plane rotations from both sides to */
460 /*  a sequence of 2-by-2 real symmetric matrices, defined by the elements */
461 /*  of the vectors x, y and z. For i = 1,2,...,n */
462 
463 /*     ( x(i)  z(i) ) := (  c(i)  s(i) ) ( x(i)  z(i) ) ( c(i) -s(i) ) */
464 /*     ( z(i)  y(i) )    ( -s(i)  c(i) ) ( z(i)  y(i) ) ( s(i)  c(i) ) */
465 
466 /*  Arguments */
467 /*  ========= */
468 
469 /*  N       (input) INTEGER */
470 /*          The number of plane rotations to be applied. */
471 
472 /*  X       (input/output) DOUBLE PRECISION array, */
473 /*                         dimension (1+(N-1)*INCX) */
474 /*          The vector x. */
475 
476 /*  Y       (input/output) DOUBLE PRECISION array, */
477 /*                         dimension (1+(N-1)*INCX) */
478 /*          The vector y. */
479 
480 /*  Z       (input/output) DOUBLE PRECISION array, */
481 /*                         dimension (1+(N-1)*INCX) */
482 /*          The vector z. */
483 
484 /*  INCX    (input) INTEGER */
485 /*          The increment between elements of X, Y and Z. INCX > 0. */
486 
487 /*  C       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */
488 /*          The cosines of the plane rotations. */
489 
490 /*  S       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */
491 /*          The sines of the plane rotations. */
492 
493 /*  INCC    (input) INTEGER */
494 /*          The increment between elements of C and S. INCC > 0. */
495 
496 /*  ===================================================================== */
497 
498 /*     .. Local Scalars .. */
499 /*     .. */
500 /*     .. Executable Statements .. */
501 
502     /* Parameter adjustments */
503     --s;
504     --c__;
505     --z__;
506     --y;
507     --x;
508 
509     /* Function Body */
510     ix = 1;
511     ic = 1;
512     i__1 = *n;
513     for (i__ = 1; i__ <= i__1; ++i__) {
514 	xi = x[ix];
515 	yi = y[ix];
516 	zi = z__[ix];
517 	ci = c__[ic];
518 	si = s[ic];
519 	t1 = si * zi;
520 	t2 = ci * zi;
521 	t3 = t2 - si * xi;
522 	t4 = t2 + si * yi;
523 	t5 = ci * xi + t1;
524 	t6 = ci * yi - t1;
525 	x[ix] = ci * t5 + si * t4;
526 	y[ix] = ci * t6 - si * t3;
527 	z__[ix] = ci * t4 - si * t5;
528 	ix += *incx;
529 	ic += *incc;
530 /* L10: */
531     }
532 
533 /*     End of DLAR2V */
534 
535     return 0;
536 } /* dlar2v_ */
537 
dlarf_(const char * side,integer * m,integer * n,double * v,integer * incv,double * tau,double * c__,integer * ldc,double * work)538 /* Subroutine */ int dlarf_(const char *side, integer *m, integer *n, double *v, integer *incv, double *tau, double *c__,
539 	integer *ldc, double *work)
540 {
541 	/* Table of constant values */
542 	static double c_b4 = 1.;
543 	static double c_b5 = 0.;
544 	static integer c__1 = 1;
545 
546     /* System generated locals */
547     integer c_dim1, c_offset;
548     double d__1;
549 
550     /* Local variables */
551     integer i__;
552     bool applyleft;
553 	integer lastc, lastv;
554 
555 
556 /*  -- LAPACK auxiliary routine (version 3.2) -- */
557 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
558 /*     November 2006 */
559 
560 /*     .. Scalar Arguments .. */
561 /*     .. */
562 /*     .. Array Arguments .. */
563 /*     .. */
564 
565 /*  Purpose */
566 /*  ======= */
567 
568 /*  DLARF applies a real elementary reflector H to a real m by n matrix */
569 /*  C, from either the left or the right. H is represented in the form */
570 
571 /*        H = I - tau * v * v' */
572 
573 /*  where tau is a real scalar and v is a real vector. */
574 
575 /*  If tau = 0, then H is taken to be the unit matrix. */
576 
577 /*  Arguments */
578 /*  ========= */
579 
580 /*  SIDE    (input) CHARACTER*1 */
581 /*          = 'L': form  H * C */
582 /*          = 'R': form  C * H */
583 
584 /*  M       (input) INTEGER */
585 /*          The number of rows of the matrix C. */
586 
587 /*  N       (input) INTEGER */
588 /*          The number of columns of the matrix C. */
589 
590 /*  V       (input) DOUBLE PRECISION array, dimension */
591 /*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L' */
592 /*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */
593 /*          The vector v in the representation of H. V is not used if */
594 /*          TAU = 0. */
595 
596 /*  INCV    (input) INTEGER */
597 /*          The increment between elements of v. INCV <> 0. */
598 
599 /*  TAU     (input) DOUBLE PRECISION */
600 /*          The value tau in the representation of H. */
601 
602 /*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
603 /*          On entry, the m by n matrix C. */
604 /*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
605 /*          or C * H if SIDE = 'R'. */
606 
607 /*  LDC     (input) INTEGER */
608 /*          The leading dimension of the array C. LDC >= max(1,M). */
609 
610 /*  WORK    (workspace) DOUBLE PRECISION array, dimension */
611 /*                         (N) if SIDE = 'L' */
612 /*                      or (M) if SIDE = 'R' */
613 
614 /*  ===================================================================== */
615 
616 /*     .. Parameters .. */
617 /*     .. */
618 /*     .. Local Scalars .. */
619 /*     .. */
620 /*     .. External Subroutines .. */
621 /*     .. */
622 /*     .. External Functions .. */
623 /*     .. */
624 /*     .. Executable Statements .. */
625 
626     /* Parameter adjustments */
627     --v;
628     c_dim1 = *ldc;
629     c_offset = 1 + c_dim1;
630     c__ -= c_offset;
631     --work;
632 
633     /* Function Body */
634     applyleft = lsame_(side, "L");
635     lastv = 0;
636     lastc = 0;
637     if (*tau != 0.) {
638 /*     Set up variables for scanning V.  LASTV begins pointing to the end */
639 /*     of V. */
640 	if (applyleft) {
641 	    lastv = *m;
642 	} else {
643 	    lastv = *n;
644 	}
645 	if (*incv > 0) {
646 	    i__ = (lastv - 1) * *incv + 1;
647 	} else {
648 	    i__ = 1;
649 	}
650 /*     Look for the last non-zero row in V. */
651 	while(lastv > 0 && v[i__] == 0.) {
652 	    --lastv;
653 	    i__ -= *incv;
654 	}
655 	if (applyleft) {
656 /*     Scan for the last non-zero column in C(1:lastv,:). */
657 	    lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
658 	} else {
659 /*     Scan for the last non-zero row in C(:,1:lastv). */
660 	    lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
661 	}
662     }
663 /*     Note that lastc.eq.0 renders the BLAS operations null; no special */
664 /*     case is needed at this level. */
665     if (applyleft) {
666 
667 /*        Form  H * C */
668 
669 	if (lastv > 0) {
670 
671 /*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */
672 
673 	    dgemv_("Transpose", &lastv, &lastc, &c_b4, &c__[c_offset], ldc, &
674 		    v[1], incv, &c_b5, &work[1], &c__1);
675 
676 /*           C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */
677 
678 	    d__1 = -(*tau);
679 	    dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[
680 		    c_offset], ldc);
681 	}
682     } else {
683 
684 /*        Form  C * H */
685 
686 	if (lastv > 0) {
687 
688 /*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */
689 
690 	    dgemv_("No transpose", &lastc, &lastv, &c_b4, &c__[c_offset], ldc,
691 		     &v[1], incv, &c_b5, &work[1], &c__1);
692 
693 /*           C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */
694 
695 	    d__1 = -(*tau);
696 	    dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[
697 		    c_offset], ldc);
698 	}
699     }
700     return 0;
701 
702 /*     End of DLARF */
703 
704 } /* dlarf_ */
705 
dlarfb_(const char * side,const char * trans,const char * direct,const char * storev,integer * m,integer * n,integer * k,double * v,integer * ldv,double * t,integer * ldt,double * c__,integer * ldc,double * work,integer * ldwork)706 /* Subroutine */ int dlarfb_(const char *side, const char *trans, const char *direct, const char *storev, integer *m,
707 	integer *n, integer *k, double *v, integer *ldv, double *t, integer *ldt, double *c__,
708 	integer *ldc, double *work, integer *ldwork)
709 {
710 	/* Table of constant values */
711 	static integer c__1 = 1;
712 	static double c_b14 = 1.;
713 	static double c_b25 = -1.;
714 
715     /* System generated locals */
716     integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
717 	    work_offset, i__1, i__2;
718 
719     /* Local variables */
720     integer i__, j;
721     integer lastc;
722     integer lastv;
723 	char transt[1];
724 
725 
726 /*  -- LAPACK auxiliary routine (version 3.2) -- */
727 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
728 /*     November 2006 */
729 
730 /*     .. Scalar Arguments .. */
731 /*     .. */
732 /*     .. Array Arguments .. */
733 /*     .. */
734 
735 /*  Purpose */
736 /*  ======= */
737 
738 /*  DLARFB applies a real block reflector H or its transpose H' to a */
739 /*  real m by n matrix C, from either the left or the right. */
740 
741 /*  Arguments */
742 /*  ========= */
743 
744 /*  SIDE    (input) CHARACTER*1 */
745 /*          = 'L': apply H or H' from the Left */
746 /*          = 'R': apply H or H' from the Right */
747 
748 /*  TRANS   (input) CHARACTER*1 */
749 /*          = 'N': apply H (No transpose) */
750 /*          = 'T': apply H' (Transpose) */
751 
752 /*  DIRECT  (input) CHARACTER*1 */
753 /*          Indicates how H is formed from a product of elementary */
754 /*          reflectors */
755 /*          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
756 /*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
757 
758 /*  STOREV  (input) CHARACTER*1 */
759 /*          Indicates how the vectors which define the elementary */
760 /*          reflectors are stored: */
761 /*          = 'C': Columnwise */
762 /*          = 'R': Rowwise */
763 
764 /*  M       (input) INTEGER */
765 /*          The number of rows of the matrix C. */
766 
767 /*  N       (input) INTEGER */
768 /*          The number of columns of the matrix C. */
769 
770 /*  K       (input) INTEGER */
771 /*          The order of the matrix T (= the number of elementary */
772 /*          reflectors whose product defines the block reflector). */
773 
774 /*  V       (input) DOUBLE PRECISION array, dimension */
775 /*                                (LDV,K) if STOREV = 'C' */
776 /*                                (LDV,M) if STOREV = 'R' and SIDE = 'L' */
777 /*                                (LDV,N) if STOREV = 'R' and SIDE = 'R' */
778 /*          The matrix V. See further details. */
779 
780 /*  LDV     (input) INTEGER */
781 /*          The leading dimension of the array V. */
782 /*          If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); */
783 /*          if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); */
784 /*          if STOREV = 'R', LDV >= K. */
785 
786 /*  T       (input) DOUBLE PRECISION array, dimension (LDT,K) */
787 /*          The triangular k by k matrix T in the representation of the */
788 /*          block reflector. */
789 
790 /*  LDT     (input) INTEGER */
791 /*          The leading dimension of the array T. LDT >= K. */
792 
793 /*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
794 /*          On entry, the m by n matrix C. */
795 /*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */
796 
797 /*  LDC     (input) INTEGER */
798 /*          The leading dimension of the array C. LDA >= max(1,M). */
799 
800 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */
801 
802 /*  LDWORK  (input) INTEGER */
803 /*          The leading dimension of the array WORK. */
804 /*          If SIDE = 'L', LDWORK >= max(1,N); */
805 /*          if SIDE = 'R', LDWORK >= max(1,M). */
806 
807 /*  ===================================================================== */
808 
809 /*     .. Parameters .. */
810 /*     .. */
811 /*     .. Local Scalars .. */
812 /*     .. */
813 /*     .. External Functions .. */
814 /*     .. */
815 /*     .. External Subroutines .. */
816 /*     .. */
817 /*     .. Executable Statements .. */
818 
819 /*     Quick return if possible */
820 
821     /* Parameter adjustments */
822     v_dim1 = *ldv;
823     v_offset = 1 + v_dim1;
824     v -= v_offset;
825     t_dim1 = *ldt;
826     t_offset = 1 + t_dim1;
827     t -= t_offset;
828     c_dim1 = *ldc;
829     c_offset = 1 + c_dim1;
830     c__ -= c_offset;
831     work_dim1 = *ldwork;
832     work_offset = 1 + work_dim1;
833     work -= work_offset;
834 
835     /* Function Body */
836     if (*m <= 0 || *n <= 0) {
837 	return 0;
838     }
839 
840     if (lsame_(trans, "N")) {
841 	*(unsigned char *)transt = 'T';
842     } else {
843 	*(unsigned char *)transt = 'N';
844     }
845 
846     if (lsame_(storev, "C")) {
847 
848 	if (lsame_(direct, "F")) {
849 
850 /*           Let  V =  ( V1 )    (first K rows) */
851 /*                     ( V2 ) */
852 /*           where  V1  is unit lower triangular. */
853 
854 	    if (lsame_(side, "L")) {
855 
856 /*              Form  H * C  or  H' * C  where  C = ( C1 ) */
857 /*                                                  ( C2 ) */
858 
859 /* Computing MAX */
860 		i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv);
861 		lastv = std::max(i__1,i__2);
862 		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
863 
864 /*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK) */
865 
866 /*              W := C1' */
867 
868 		i__1 = *k;
869 		for (j = 1; j <= i__1; ++j) {
870 		    dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
871 			    + 1], &c__1);
872 /* L10: */
873 		}
874 
875 /*              W := W * V1 */
876 
877 		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
878 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
879 		if (lastv > *k) {
880 
881 /*                 W := W + C2'*V2 */
882 
883 		    i__1 = lastv - *k;
884 		    dgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
885 			    c_b14, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
886 			    v_dim1], ldv, &c_b14, &work[work_offset], ldwork);
887 		}
888 
889 /*              W := W * T'  or  W * T */
890 
891 		dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
892 			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
893 
894 /*              C := C - V * W' */
895 
896 		if (lastv > *k) {
897 
898 /*                 C2 := C2 - V2 * W' */
899 
900 		    i__1 = lastv - *k;
901 		    dgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
902 			    c_b25, &v[*k + 1 + v_dim1], ldv, &work[
903 			    work_offset], ldwork, &c_b14, &c__[*k + 1 +
904 			    c_dim1], ldc);
905 		}
906 
907 /*              W := W * V1' */
908 
909 		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
910 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
911 
912 /*              C1 := C1 - W' */
913 
914 		i__1 = *k;
915 		for (j = 1; j <= i__1; ++j) {
916 		    i__2 = lastc;
917 		    for (i__ = 1; i__ <= i__2; ++i__) {
918 			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
919 /* L20: */
920 		    }
921 /* L30: */
922 		}
923 
924 	    } else if (lsame_(side, "R")) {
925 
926 /*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
927 
928 /* Computing MAX */
929 		i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv);
930 		lastv = std::max(i__1,i__2);
931 		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
932 
933 /*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK) */
934 
935 /*              W := C1 */
936 
937 		i__1 = *k;
938 		for (j = 1; j <= i__1; ++j) {
939 		    dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
940 			    work_dim1 + 1], &c__1);
941 /* L40: */
942 		}
943 
944 /*              W := W * V1 */
945 
946 		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
947 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
948 		if (lastv > *k) {
949 
950 /*                 W := W + C2 * V2 */
951 
952 		    i__1 = lastv - *k;
953 		    dgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
954 			    c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
955 			    1 + v_dim1], ldv, &c_b14, &work[work_offset],
956 			    ldwork);
957 		}
958 
959 /*              W := W * T  or  W * T' */
960 
961 		dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14,
962 			 &t[t_offset], ldt, &work[work_offset], ldwork);
963 
964 /*              C := C - W * V' */
965 
966 		if (lastv > *k) {
967 
968 /*                 C2 := C2 - W * V2' */
969 
970 		    i__1 = lastv - *k;
971 		    dgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
972 			    c_b25, &work[work_offset], ldwork, &v[*k + 1 +
973 			    v_dim1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1],
974 			     ldc);
975 		}
976 
977 /*              W := W * V1' */
978 
979 		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
980 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
981 
982 /*              C1 := C1 - W */
983 
984 		i__1 = *k;
985 		for (j = 1; j <= i__1; ++j) {
986 		    i__2 = lastc;
987 		    for (i__ = 1; i__ <= i__2; ++i__) {
988 			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
989 /* L50: */
990 		    }
991 /* L60: */
992 		}
993 	    }
994 
995 	} else {
996 
997 /*           Let  V =  ( V1 ) */
998 /*                     ( V2 )    (last K rows) */
999 /*           where  V2  is unit upper triangular. */
1000 
1001 	    if (lsame_(side, "L")) {
1002 
1003 /*              Form  H * C  or  H' * C  where  C = ( C1 ) */
1004 /*                                                  ( C2 ) */
1005 
1006 /* Computing MAX */
1007 		i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv);
1008 		lastv = std::max(i__1,i__2);
1009 		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
1010 
1011 /*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK) */
1012 
1013 /*              W := C2' */
1014 
1015 		i__1 = *k;
1016 		for (j = 1; j <= i__1; ++j) {
1017 		    dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
1018 			    j * work_dim1 + 1], &c__1);
1019 /* L70: */
1020 		}
1021 
1022 /*              W := W * V2 */
1023 
1024 		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
1025 			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
1026 			work_offset], ldwork);
1027 		if (lastv > *k) {
1028 
1029 /*                 W := W + C1'*V1 */
1030 
1031 		    i__1 = lastv - *k;
1032 		    dgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
1033 			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
1034 			    c_b14, &work[work_offset], ldwork);
1035 		}
1036 
1037 /*              W := W * T'  or  W * T */
1038 
1039 		dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
1040 			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
1041 
1042 /*              C := C - V * W' */
1043 
1044 		if (lastv > *k) {
1045 
1046 /*                 C1 := C1 - V1 * W' */
1047 
1048 		    i__1 = lastv - *k;
1049 		    dgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
1050 			    c_b25, &v[v_offset], ldv, &work[work_offset],
1051 			    ldwork, &c_b14, &c__[c_offset], ldc);
1052 		}
1053 
1054 /*              W := W * V2' */
1055 
1056 		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
1057 			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
1058 			work_offset], ldwork);
1059 
1060 /*              C2 := C2 - W' */
1061 
1062 		i__1 = *k;
1063 		for (j = 1; j <= i__1; ++j) {
1064 		    i__2 = lastc;
1065 		    for (i__ = 1; i__ <= i__2; ++i__) {
1066 			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j *
1067 				work_dim1];
1068 /* L80: */
1069 		    }
1070 /* L90: */
1071 		}
1072 
1073 	    } else if (lsame_(side, "R")) {
1074 
1075 /*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
1076 
1077 /* Computing MAX */
1078 		i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv);
1079 		lastv = std::max(i__1,i__2);
1080 		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
1081 
1082 /*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK) */
1083 
1084 /*              W := C2 */
1085 
1086 		i__1 = *k;
1087 		for (j = 1; j <= i__1; ++j) {
1088 		    dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &
1089 			    work[j * work_dim1 + 1], &c__1);
1090 /* L100: */
1091 		}
1092 
1093 /*              W := W * V2 */
1094 
1095 		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
1096 			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
1097 			work_offset], ldwork);
1098 		if (lastv > *k) {
1099 
1100 /*                 W := W + C1 * V1 */
1101 
1102 		    i__1 = lastv - *k;
1103 		    dgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
1104 			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
1105 			    c_b14, &work[work_offset], ldwork);
1106 		}
1107 
1108 /*              W := W * T  or  W * T' */
1109 
1110 		dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14,
1111 			 &t[t_offset], ldt, &work[work_offset], ldwork);
1112 
1113 /*              C := C - W * V' */
1114 
1115 		if (lastv > *k) {
1116 
1117 /*                 C1 := C1 - W * V1' */
1118 
1119 		    i__1 = lastv - *k;
1120 		    dgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
1121 			    c_b25, &work[work_offset], ldwork, &v[v_offset],
1122 			    ldv, &c_b14, &c__[c_offset], ldc);
1123 		}
1124 
1125 /*              W := W * V2' */
1126 
1127 		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
1128 			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
1129 			work_offset], ldwork);
1130 
1131 /*              C2 := C2 - W */
1132 
1133 		i__1 = *k;
1134 		for (j = 1; j <= i__1; ++j) {
1135 		    i__2 = lastc;
1136 		    for (i__ = 1; i__ <= i__2; ++i__) {
1137 			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
1138 				 work_dim1];
1139 /* L110: */
1140 		    }
1141 /* L120: */
1142 		}
1143 	    }
1144 	}
1145 
1146     } else if (lsame_(storev, "R")) {
1147 
1148 	if (lsame_(direct, "F")) {
1149 
1150 /*           Let  V =  ( V1  V2 )    (V1: first K columns) */
1151 /*           where  V1  is unit upper triangular. */
1152 
1153 	    if (lsame_(side, "L")) {
1154 
1155 /*              Form  H * C  or  H' * C  where  C = ( C1 ) */
1156 /*                                                  ( C2 ) */
1157 
1158 /* Computing MAX */
1159 		i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv);
1160 		lastv = std::max(i__1,i__2);
1161 		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
1162 
1163 /*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK) */
1164 
1165 /*              W := C1' */
1166 
1167 		i__1 = *k;
1168 		for (j = 1; j <= i__1; ++j) {
1169 		    dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
1170 			    + 1], &c__1);
1171 /* L130: */
1172 		}
1173 
1174 /*              W := W * V1' */
1175 
1176 		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
1177 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
1178 		if (lastv > *k) {
1179 
1180 /*                 W := W + C2'*V2' */
1181 
1182 		    i__1 = lastv - *k;
1183 		    dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14,
1184 			     &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1
1185 			    + 1], ldv, &c_b14, &work[work_offset], ldwork);
1186 		}
1187 
1188 /*              W := W * T'  or  W * T */
1189 
1190 		dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
1191 			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
1192 
1193 /*              C := C - V' * W' */
1194 
1195 		if (lastv > *k) {
1196 
1197 /*                 C2 := C2 - V2' * W' */
1198 
1199 		    i__1 = lastv - *k;
1200 		    dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25,
1201 			     &v[(*k + 1) * v_dim1 + 1], ldv, &work[
1202 			    work_offset], ldwork, &c_b14, &c__[*k + 1 +
1203 			    c_dim1], ldc);
1204 		}
1205 
1206 /*              W := W * V1 */
1207 
1208 		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
1209 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
1210 
1211 /*              C1 := C1 - W' */
1212 
1213 		i__1 = *k;
1214 		for (j = 1; j <= i__1; ++j) {
1215 		    i__2 = lastc;
1216 		    for (i__ = 1; i__ <= i__2; ++i__) {
1217 			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
1218 /* L140: */
1219 		    }
1220 /* L150: */
1221 		}
1222 
1223 	    } else if (lsame_(side, "R")) {
1224 
1225 /*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
1226 
1227 /* Computing MAX */
1228 		i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv);
1229 		lastv = std::max(i__1,i__2);
1230 		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
1231 
1232 /*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK) */
1233 
1234 /*              W := C1 */
1235 
1236 		i__1 = *k;
1237 		for (j = 1; j <= i__1; ++j) {
1238 		    dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
1239 			    work_dim1 + 1], &c__1);
1240 /* L160: */
1241 		}
1242 
1243 /*              W := W * V1' */
1244 
1245 		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
1246 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
1247 		if (lastv > *k) {
1248 
1249 /*                 W := W + C2 * V2' */
1250 
1251 		    i__1 = lastv - *k;
1252 		    dgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
1253 			    c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k +
1254 			    1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset],
1255 			     ldwork);
1256 		}
1257 
1258 /*              W := W * T  or  W * T' */
1259 
1260 		dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14,
1261 			 &t[t_offset], ldt, &work[work_offset], ldwork);
1262 
1263 /*              C := C - W * V */
1264 
1265 		if (lastv > *k) {
1266 
1267 /*                 C2 := C2 - W * V2 */
1268 
1269 		    i__1 = lastv - *k;
1270 		    dgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
1271 			    c_b25, &work[work_offset], ldwork, &v[(*k + 1) *
1272 			    v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1
1273 			    + 1], ldc);
1274 		}
1275 
1276 /*              W := W * V1 */
1277 
1278 		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
1279 			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
1280 
1281 /*              C1 := C1 - W */
1282 
1283 		i__1 = *k;
1284 		for (j = 1; j <= i__1; ++j) {
1285 		    i__2 = lastc;
1286 		    for (i__ = 1; i__ <= i__2; ++i__) {
1287 			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
1288 /* L170: */
1289 		    }
1290 /* L180: */
1291 		}
1292 
1293 	    }
1294 
1295 	} else {
1296 
1297 /*           Let  V =  ( V1  V2 )    (V2: last K columns) */
1298 /*           where  V2  is unit lower triangular. */
1299 
1300 	    if (lsame_(side, "L")) {
1301 
1302 /*              Form  H * C  or  H' * C  where  C = ( C1 ) */
1303 /*                                                  ( C2 ) */
1304 
1305 /* Computing MAX */
1306 		i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv);
1307 		lastv = std::max(i__1,i__2);
1308 		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
1309 
1310 /*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK) */
1311 
1312 /*              W := C2' */
1313 
1314 		i__1 = *k;
1315 		for (j = 1; j <= i__1; ++j) {
1316 		    dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
1317 			    j * work_dim1 + 1], &c__1);
1318 /* L190: */
1319 		}
1320 
1321 /*              W := W * V2' */
1322 
1323 		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
1324 			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
1325 			work_offset], ldwork);
1326 		if (lastv > *k) {
1327 
1328 /*                 W := W + C1'*V1' */
1329 
1330 		    i__1 = lastv - *k;
1331 		    dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14,
1332 			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, &
1333 			    work[work_offset], ldwork);
1334 		}
1335 
1336 /*              W := W * T'  or  W * T */
1337 
1338 		dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
1339 			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);
1340 
1341 /*              C := C - V' * W' */
1342 
1343 		if (lastv > *k) {
1344 
1345 /*                 C1 := C1 - V1' * W' */
1346 
1347 		    i__1 = lastv - *k;
1348 		    dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25,
1349 			     &v[v_offset], ldv, &work[work_offset], ldwork, &
1350 			    c_b14, &c__[c_offset], ldc);
1351 		}
1352 
1353 /*              W := W * V2 */
1354 
1355 		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
1356 			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
1357 			work_offset], ldwork);
1358 
1359 /*              C2 := C2 - W' */
1360 
1361 		i__1 = *k;
1362 		for (j = 1; j <= i__1; ++j) {
1363 		    i__2 = lastc;
1364 		    for (i__ = 1; i__ <= i__2; ++i__) {
1365 			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j *
1366 				work_dim1];
1367 /* L200: */
1368 		    }
1369 /* L210: */
1370 		}
1371 
1372 	    } else if (lsame_(side, "R")) {
1373 
1374 /*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */
1375 
1376 /* Computing MAX */
1377 		i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv);
1378 		lastv = std::max(i__1,i__2);
1379 		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
1380 
1381 /*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK) */
1382 
1383 /*              W := C2 */
1384 
1385 		i__1 = *k;
1386 		for (j = 1; j <= i__1; ++j) {
1387 		    dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1,
1388 			     &work[j * work_dim1 + 1], &c__1);
1389 /* L220: */
1390 		}
1391 
1392 /*              W := W * V2' */
1393 
1394 		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
1395 			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
1396 			work_offset], ldwork);
1397 		if (lastv > *k) {
1398 
1399 /*                 W := W + C1 * V1' */
1400 
1401 		    i__1 = lastv - *k;
1402 		    dgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
1403 			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
1404 			    c_b14, &work[work_offset], ldwork);
1405 		}
1406 
1407 /*              W := W * T  or  W * T' */
1408 
1409 		dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14,
1410 			 &t[t_offset], ldt, &work[work_offset], ldwork);
1411 
1412 /*              C := C - W * V */
1413 
1414 		if (lastv > *k) {
1415 
1416 /*                 C1 := C1 - W * V1 */
1417 
1418 		    i__1 = lastv - *k;
1419 		    dgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
1420 			    c_b25, &work[work_offset], ldwork, &v[v_offset],
1421 			    ldv, &c_b14, &c__[c_offset], ldc);
1422 		}
1423 
1424 /*              W := W * V2 */
1425 
1426 		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
1427 			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
1428 			work_offset], ldwork);
1429 
1430 /*              C1 := C1 - W */
1431 
1432 		i__1 = *k;
1433 		for (j = 1; j <= i__1; ++j) {
1434 		    i__2 = lastc;
1435 		    for (i__ = 1; i__ <= i__2; ++i__) {
1436 			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
1437 				 work_dim1];
1438 /* L230: */
1439 		    }
1440 /* L240: */
1441 		}
1442 
1443 	    }
1444 
1445 	}
1446     }
1447 
1448     return 0;
1449 
1450 /*     End of DLARFB */
1451 
1452 } /* dlarfb_ */
1453 
dlarfg_(integer * n,double * alpha,double * x,integer * incx,double * tau)1454 /* Subroutine */ int dlarfg_(integer *n, double *alpha, double *x, integer *incx, double *tau)
1455 {
1456     /* System generated locals */
1457     integer i__1;
1458     double d__1;
1459 
1460     /* Local variables */
1461     integer j, knt;
1462     double beta, xnorm, safmin, rsafmn;
1463 
1464 
1465 /*  -- LAPACK auxiliary routine (version 3.2) -- */
1466 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
1467 /*     November 2006 */
1468 
1469 /*     .. Scalar Arguments .. */
1470 /*     .. */
1471 /*     .. Array Arguments .. */
1472 /*     .. */
1473 
1474 /*  Purpose */
1475 /*  ======= */
1476 
1477 /*  DLARFG generates a real elementary reflector H of order n, such */
1478 /*  that */
1479 
1480 /*        H * ( alpha ) = ( beta ),   H' * H = I. */
1481 /*            (   x   )   (   0  ) */
1482 
1483 /*  where alpha and beta are scalars, and x is an (n-1)-element real */
1484 /*  vector. H is represented in the form */
1485 
1486 /*        H = I - tau * ( 1 ) * ( 1 v' ) , */
1487 /*                      ( v ) */
1488 
1489 /*  where tau is a real scalar and v is a real (n-1)-element */
1490 /*  vector. */
1491 
1492 /*  If the elements of x are all zero, then tau = 0 and H is taken to be */
1493 /*  the unit matrix. */
1494 
1495 /*  Otherwise  1 <= tau <= 2. */
1496 
1497 /*  Arguments */
1498 /*  ========= */
1499 
1500 /*  N       (input) INTEGER */
1501 /*          The order of the elementary reflector. */
1502 
1503 /*  ALPHA   (input/output) DOUBLE PRECISION */
1504 /*          On entry, the value alpha. */
1505 /*          On exit, it is overwritten with the value beta. */
1506 
1507 /*  X       (input/output) DOUBLE PRECISION array, dimension */
1508 /*                         (1+(N-2)*abs(INCX)) */
1509 /*          On entry, the vector x. */
1510 /*          On exit, it is overwritten with the vector v. */
1511 
1512 /*  INCX    (input) INTEGER */
1513 /*          The increment between elements of X. INCX > 0. */
1514 
1515 /*  TAU     (output) DOUBLE PRECISION */
1516 /*          The value tau. */
1517 
1518 /*  ===================================================================== */
1519 
1520 /*     .. Parameters .. */
1521 /*     .. */
1522 /*     .. Local Scalars .. */
1523 /*     .. */
1524 /*     .. External Functions .. */
1525 /*     .. */
1526 /*     .. Intrinsic Functions .. */
1527 /*     .. */
1528 /*     .. External Subroutines .. */
1529 /*     .. */
1530 /*     .. Executable Statements .. */
1531 
1532     /* Parameter adjustments */
1533     --x;
1534 
1535     /* Function Body */
1536     if (*n <= 1) {
1537 	*tau = 0.;
1538 	return 0;
1539     }
1540 
1541     i__1 = *n - 1;
1542     xnorm = dnrm2_(&i__1, &x[1], incx);
1543 
1544     if (xnorm == 0.) {
1545 
1546 /*        H  =  I */
1547 
1548 	*tau = 0.;
1549     } else {
1550 
1551 /*        general case */
1552 
1553 	d__1 = dlapy2_(alpha, &xnorm);
1554 	beta = -d_sign(&d__1, alpha);
1555 	safmin = dlamch_("S") / dlamch_("E");
1556 	knt = 0;
1557 	if (abs(beta) < safmin) {
1558 
1559 /*           XNORM, BETA may be inaccurate; scale X and recompute them */
1560 
1561 	    rsafmn = 1. / safmin;
1562 L10:
1563 	    ++knt;
1564 	    i__1 = *n - 1;
1565 	    dscal_(&i__1, &rsafmn, &x[1], incx);
1566 	    beta *= rsafmn;
1567 	    *alpha *= rsafmn;
1568 	    if (abs(beta) < safmin) {
1569 		goto L10;
1570 	    }
1571 
1572 /*           New BETA is at most 1, at least SAFMIN */
1573 
1574 	    i__1 = *n - 1;
1575 	    xnorm = dnrm2_(&i__1, &x[1], incx);
1576 	    d__1 = dlapy2_(alpha, &xnorm);
1577 	    beta = -d_sign(&d__1, alpha);
1578 	}
1579 	*tau = (beta - *alpha) / beta;
1580 	i__1 = *n - 1;
1581 	d__1 = 1. / (*alpha - beta);
1582 	dscal_(&i__1, &d__1, &x[1], incx);
1583 
1584 /*        If ALPHA is subnormal, it may lose relative accuracy */
1585 
1586 	i__1 = knt;
1587 	for (j = 1; j <= i__1; ++j) {
1588 	    beta *= safmin;
1589 /* L20: */
1590 	}
1591 	*alpha = beta;
1592     }
1593 
1594     return 0;
1595 
1596 /*     End of DLARFG */
1597 
1598 } /* dlarfg_ */
1599 
dlarfp_(integer * n,double * alpha,double * x,integer * incx,double * tau)1600 /* Subroutine */ int dlarfp_(integer *n, double *alpha, double *x, integer *incx, double *tau)
1601 {
1602     /* System generated locals */
1603     integer i__1;
1604     double d__1;
1605 
1606     /* Local variables */
1607     integer j, knt;
1608     double beta;
1609     double xnorm;
1610     double safmin, rsafmn;
1611 
1612 
1613 /*  -- LAPACK auxiliary routine (version 3.2) -- */
1614 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
1615 /*     November 2006 */
1616 
1617 /*     .. Scalar Arguments .. */
1618 /*     .. */
1619 /*     .. Array Arguments .. */
1620 /*     .. */
1621 
1622 /*  Purpose */
1623 /*  ======= */
1624 
1625 /*  DLARFP generates a real elementary reflector H of order n, such */
1626 /*  that */
1627 
1628 /*        H * ( alpha ) = ( beta ),   H' * H = I. */
1629 /*            (   x   )   (   0  ) */
1630 
1631 /*  where alpha and beta are scalars, beta is non-negative, and x is */
1632 /*  an (n-1)-element real vector.  H is represented in the form */
1633 
1634 /*        H = I - tau * ( 1 ) * ( 1 v' ) , */
1635 /*                      ( v ) */
1636 
1637 /*  where tau is a real scalar and v is a real (n-1)-element */
1638 /*  vector. */
1639 
1640 /*  If the elements of x are all zero, then tau = 0 and H is taken to be */
1641 /*  the unit matrix. */
1642 
1643 /*  Otherwise  1 <= tau <= 2. */
1644 
1645 /*  Arguments */
1646 /*  ========= */
1647 
1648 /*  N       (input) INTEGER */
1649 /*          The order of the elementary reflector. */
1650 
1651 /*  ALPHA   (input/output) DOUBLE PRECISION */
1652 /*          On entry, the value alpha. */
1653 /*          On exit, it is overwritten with the value beta. */
1654 
1655 /*  X       (input/output) DOUBLE PRECISION array, dimension */
1656 /*                         (1+(N-2)*abs(INCX)) */
1657 /*          On entry, the vector x. */
1658 /*          On exit, it is overwritten with the vector v. */
1659 
1660 /*  INCX    (input) INTEGER */
1661 /*          The increment between elements of X. INCX > 0. */
1662 
1663 /*  TAU     (output) DOUBLE PRECISION */
1664 /*          The value tau. */
1665 
1666 /*  ===================================================================== */
1667 
1668 /*     .. Parameters .. */
1669 /*     .. */
1670 /*     .. Local Scalars .. */
1671 /*     .. */
1672 /*     .. External Functions .. */
1673 /*     .. */
1674 /*     .. Intrinsic Functions .. */
1675 /*     .. */
1676 /*     .. External Subroutines .. */
1677 /*     .. */
1678 /*     .. Executable Statements .. */
1679 
1680     /* Parameter adjustments */
1681     --x;
1682 
1683     /* Function Body */
1684     if (*n <= 0) {
1685 	*tau = 0.;
1686 	return 0;
1687     }
1688 
1689     i__1 = *n - 1;
1690     xnorm = dnrm2_(&i__1, &x[1], incx);
1691 
1692     if (xnorm == 0.) {
1693 
1694 /*        H  =  [+/-1, 0; I], sign chosen so ALPHA >= 0 */
1695 
1696 	if (*alpha >= 0.) {
1697 /*           When TAU.eq.ZERO, the vector is special-cased to be */
1698 /*           all zeros in the application routines.  We do not need */
1699 /*           to clear it. */
1700 	    *tau = 0.;
1701 	} else {
1702 /*           However, the application routines rely on explicit */
1703 /*           zero checks when TAU.ne.ZERO, and we must clear X. */
1704 	    *tau = 2.;
1705 	    i__1 = *n - 1;
1706 	    for (j = 1; j <= i__1; ++j) {
1707 		x[(j - 1) * *incx + 1] = 0.;
1708 	    }
1709 	    *alpha = -(*alpha);
1710 	}
1711     } else {
1712 
1713 /*        general case */
1714 
1715 	d__1 = dlapy2_(alpha, &xnorm);
1716 	beta = d_sign(&d__1, alpha);
1717 	safmin = dlamch_("S") / dlamch_("E");
1718 	knt = 0;
1719 	if (abs(beta) < safmin) {
1720 
1721 /*           XNORM, BETA may be inaccurate; scale X and recompute them */
1722 
1723 	    rsafmn = 1. / safmin;
1724 L10:
1725 	    ++knt;
1726 	    i__1 = *n - 1;
1727 	    dscal_(&i__1, &rsafmn, &x[1], incx);
1728 	    beta *= rsafmn;
1729 	    *alpha *= rsafmn;
1730 	    if (abs(beta) < safmin) {
1731 		goto L10;
1732 	    }
1733 
1734 /*           New BETA is at most 1, at least SAFMIN */
1735 
1736 	    i__1 = *n - 1;
1737 	    xnorm = dnrm2_(&i__1, &x[1], incx);
1738 	    d__1 = dlapy2_(alpha, &xnorm);
1739 	    beta = d_sign(&d__1, alpha);
1740 	}
1741 	*alpha += beta;
1742 	if (beta < 0.) {
1743 	    beta = -beta;
1744 	    *tau = -(*alpha) / beta;
1745 	} else {
1746 	    *alpha = xnorm * (xnorm / *alpha);
1747 	    *tau = *alpha / beta;
1748 	    *alpha = -(*alpha);
1749 	}
1750 	i__1 = *n - 1;
1751 	d__1 = 1. / *alpha;
1752 	dscal_(&i__1, &d__1, &x[1], incx);
1753 
1754 /*        If BETA is subnormal, it may lose relative accuracy */
1755 
1756 	i__1 = knt;
1757 	for (j = 1; j <= i__1; ++j) {
1758 	    beta *= safmin;
1759 /* L20: */
1760 	}
1761 	*alpha = beta;
1762     }
1763 
1764     return 0;
1765 
1766 /*     End of DLARFP */
1767 
1768 } /* dlarfp_ */
1769 
dlarft_(const char * direct,const char * storev,integer * n,integer * k,double * v,integer * ldv,double * tau,double * t,integer * ldt)1770 /* Subroutine */ int dlarft_(const char *direct, const char *storev, integer *n, integer *k, double *v, integer *ldv,
1771 	double *tau, double *t, integer *ldt)
1772 {
1773 	/* Table of constant values */
1774 	static integer c__1 = 1;
1775 	static double c_b8 = 0.;
1776 
1777     /* System generated locals */
1778     integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
1779     double d__1;
1780 
1781     /* Local variables */
1782     integer i__, j, prevlastv;
1783     double vii;
1784     integer lastv;
1785 
1786 
1787 /*  -- LAPACK auxiliary routine (version 3.2) -- */
1788 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
1789 /*     November 2006 */
1790 
1791 /*     .. Scalar Arguments .. */
1792 /*     .. */
1793 /*     .. Array Arguments .. */
1794 /*     .. */
1795 
1796 /*  Purpose */
1797 /*  ======= */
1798 
1799 /*  DLARFT forms the triangular factor T of a real block reflector H */
1800 /*  of order n, which is defined as a product of k elementary reflectors. */
1801 
1802 /*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
1803 
1804 /*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
1805 
1806 /*  If STOREV = 'C', the vector which defines the elementary reflector */
1807 /*  H(i) is stored in the i-th column of the array V, and */
1808 
1809 /*     H  =  I - V * T * V' */
1810 
1811 /*  If STOREV = 'R', the vector which defines the elementary reflector */
1812 /*  H(i) is stored in the i-th row of the array V, and */
1813 
1814 /*     H  =  I - V' * T * V */
1815 
1816 /*  Arguments */
1817 /*  ========= */
1818 
1819 /*  DIRECT  (input) CHARACTER*1 */
1820 /*          Specifies the order in which the elementary reflectors are */
1821 /*          multiplied to form the block reflector: */
1822 /*          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
1823 /*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
1824 
1825 /*  STOREV  (input) CHARACTER*1 */
1826 /*          Specifies how the vectors which define the elementary */
1827 /*          reflectors are stored (see also Further Details): */
1828 /*          = 'C': columnwise */
1829 /*          = 'R': rowwise */
1830 
1831 /*  N       (input) INTEGER */
1832 /*          The order of the block reflector H. N >= 0. */
1833 
1834 /*  K       (input) INTEGER */
1835 /*          The order of the triangular factor T (= the number of */
1836 /*          elementary reflectors). K >= 1. */
1837 
1838 /*  V       (input/output) DOUBLE PRECISION array, dimension */
1839 /*                               (LDV,K) if STOREV = 'C' */
1840 /*                               (LDV,N) if STOREV = 'R' */
1841 /*          The matrix V. See further details. */
1842 
1843 /*  LDV     (input) INTEGER */
1844 /*          The leading dimension of the array V. */
1845 /*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */
1846 
1847 /*  TAU     (input) DOUBLE PRECISION array, dimension (K) */
1848 /*          TAU(i) must contain the scalar factor of the elementary */
1849 /*          reflector H(i). */
1850 
1851 /*  T       (output) DOUBLE PRECISION array, dimension (LDT,K) */
1852 /*          The k by k triangular factor T of the block reflector. */
1853 /*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
1854 /*          lower triangular. The rest of the array is not used. */
1855 
1856 /*  LDT     (input) INTEGER */
1857 /*          The leading dimension of the array T. LDT >= K. */
1858 
1859 /*  Further Details */
1860 /*  =============== */
1861 
1862 /*  The shape of the matrix V and the storage of the vectors which define */
1863 /*  the H(i) is best illustrated by the following example with n = 5 and */
1864 /*  k = 3. The elements equal to 1 are not stored; the corresponding */
1865 /*  array elements are modified but restored on exit. The rest of the */
1866 /*  array is not used. */
1867 
1868 /*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R': */
1869 
1870 /*               V = (  1       )                 V = (  1 v1 v1 v1 v1 ) */
1871 /*                   ( v1  1    )                     (     1 v2 v2 v2 ) */
1872 /*                   ( v1 v2  1 )                     (        1 v3 v3 ) */
1873 /*                   ( v1 v2 v3 ) */
1874 /*                   ( v1 v2 v3 ) */
1875 
1876 /*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R': */
1877 
1878 /*               V = ( v1 v2 v3 )                 V = ( v1 v1  1       ) */
1879 /*                   ( v1 v2 v3 )                     ( v2 v2 v2  1    ) */
1880 /*                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 ) */
1881 /*                   (     1 v3 ) */
1882 /*                   (        1 ) */
1883 
1884 /*  ===================================================================== */
1885 
1886 /*     .. Parameters .. */
1887 /*     .. */
1888 /*     .. Local Scalars .. */
1889 /*     .. */
1890 /*     .. External Subroutines .. */
1891 /*     .. */
1892 /*     .. External Functions .. */
1893 /*     .. */
1894 /*     .. Executable Statements .. */
1895 
1896 /*     Quick return if possible */
1897 
1898     /* Parameter adjustments */
1899     v_dim1 = *ldv;
1900     v_offset = 1 + v_dim1;
1901     v -= v_offset;
1902     --tau;
1903     t_dim1 = *ldt;
1904     t_offset = 1 + t_dim1;
1905     t -= t_offset;
1906 
1907     /* Function Body */
1908     if (*n == 0) {
1909 	return 0;
1910     }
1911 
1912     if (lsame_(direct, "F")) {
1913 	prevlastv = *n;
1914 	i__1 = *k;
1915 	for (i__ = 1; i__ <= i__1; ++i__) {
1916 	    prevlastv = std::max(i__,prevlastv);
1917 	    if (tau[i__] == 0.) {
1918 
1919 /*              H(i)  =  I */
1920 
1921 		i__2 = i__;
1922 		for (j = 1; j <= i__2; ++j) {
1923 		    t[j + i__ * t_dim1] = 0.;
1924 /* L10: */
1925 		}
1926 	    } else {
1927 
1928 /*              general case */
1929 
1930 		vii = v[i__ + i__ * v_dim1];
1931 		v[i__ + i__ * v_dim1] = 1.;
1932 		if (lsame_(storev, "C")) {
1933 /*                 Skip any trailing zeros. */
1934 		    i__2 = i__ + 1;
1935 		    for (lastv = *n; lastv >= i__2; --lastv) {
1936 			if (v[lastv + i__ * v_dim1] != 0.) {
1937 			    break;
1938 			}
1939 		    }
1940 		    j = std::min(lastv,prevlastv);
1941 
1942 /*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */
1943 
1944 		    i__2 = j - i__ + 1;
1945 		    i__3 = i__ - 1;
1946 		    d__1 = -tau[i__];
1947 		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1],
1948 			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b8, &t[
1949 			    i__ * t_dim1 + 1], &c__1);
1950 		} else {
1951 /*                 Skip any trailing zeros. */
1952 		    i__2 = i__ + 1;
1953 		    for (lastv = *n; lastv >= i__2; --lastv) {
1954 			if (v[i__ + lastv * v_dim1] != 0.) {
1955 			    break;
1956 			}
1957 		    }
1958 		    j = std::min(lastv,prevlastv);
1959 
1960 /*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */
1961 
1962 		    i__2 = i__ - 1;
1963 		    i__3 = j - i__ + 1;
1964 		    d__1 = -tau[i__];
1965 		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ *
1966 			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
1967 			    c_b8, &t[i__ * t_dim1 + 1], &c__1);
1968 		}
1969 		v[i__ + i__ * v_dim1] = vii;
1970 
1971 /*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
1972 
1973 		i__2 = i__ - 1;
1974 		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
1975 			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
1976 		t[i__ + i__ * t_dim1] = tau[i__];
1977 		if (i__ > 1) {
1978 		    prevlastv = std::max(prevlastv,lastv);
1979 		} else {
1980 		    prevlastv = lastv;
1981 		}
1982 	    }
1983 /* L20: */
1984 	}
1985     } else {
1986 	prevlastv = 1;
1987 	for (i__ = *k; i__ >= 1; --i__) {
1988 	    if (tau[i__] == 0.) {
1989 
1990 /*              H(i)  =  I */
1991 
1992 		i__1 = *k;
1993 		for (j = i__; j <= i__1; ++j) {
1994 		    t[j + i__ * t_dim1] = 0.;
1995 /* L30: */
1996 		}
1997 	    } else {
1998 
1999 /*              general case */
2000 
2001 		if (i__ < *k) {
2002 		    if (lsame_(storev, "C")) {
2003 			vii = v[*n - *k + i__ + i__ * v_dim1];
2004 			v[*n - *k + i__ + i__ * v_dim1] = 1.;
2005 /*                    Skip any leading zeros. */
2006 			i__1 = i__ - 1;
2007 			for (lastv = 1; lastv <= i__1; ++lastv) {
2008 			    if (v[lastv + i__ * v_dim1] != 0.) {
2009 				break;
2010 			    }
2011 			}
2012 			j = std::max(lastv,prevlastv);
2013 
2014 /*                    T(i+1:k,i) := */
2015 /*                            - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i) */
2016 
2017 			i__1 = *n - *k + i__ - j + 1;
2018 			i__2 = *k - i__;
2019 			d__1 = -tau[i__];
2020 			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__
2021 				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
2022 				c__1, &c_b8, &t[i__ + 1 + i__ * t_dim1], &
2023 				c__1);
2024 			v[*n - *k + i__ + i__ * v_dim1] = vii;
2025 		    } else {
2026 			vii = v[i__ + (*n - *k + i__) * v_dim1];
2027 			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
2028 /*                    Skip any leading zeros. */
2029 			i__1 = i__ - 1;
2030 			for (lastv = 1; lastv <= i__1; ++lastv) {
2031 			    if (v[i__ + lastv * v_dim1] != 0.) {
2032 				break;
2033 			    }
2034 			}
2035 			j = std::max(lastv,prevlastv);
2036 
2037 /*                    T(i+1:k,i) := */
2038 /*                            - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)' */
2039 
2040 			i__1 = *k - i__;
2041 			i__2 = *n - *k + i__ - j + 1;
2042 			d__1 = -tau[i__];
2043 			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
2044 				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
2045 				ldv, &c_b8, &t[i__ + 1 + i__ * t_dim1], &c__1);
2046 			v[i__ + (*n - *k + i__) * v_dim1] = vii;
2047 		    }
2048 
2049 /*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
2050 
2051 		    i__1 = *k - i__;
2052 		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
2053 			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
2054 			     t_dim1], &c__1)
2055 			    ;
2056 		    if (i__ > 1) {
2057 			prevlastv = std::min(prevlastv,lastv);
2058 		    } else {
2059 			prevlastv = lastv;
2060 		    }
2061 		}
2062 		t[i__ + i__ * t_dim1] = tau[i__];
2063 	    }
2064 /* L40: */
2065 	}
2066     }
2067     return 0;
2068 
2069 /*     End of DLARFT */
2070 
2071 } /* dlarft_ */
2072 
dlarfx_(const char * side,integer * m,integer * n,double * v,double * tau,double * c__,integer * ldc,double * work)2073 /* Subroutine */ int dlarfx_(const char *side, integer *m, integer *n, double *v, double *tau, double *c__, integer *ldc, double *work)
2074 {
2075 	/* Table of constant values */
2076 	static integer c__1 = 1;
2077 
2078     /* System generated locals */
2079     integer c_dim1, c_offset, i__1;
2080 
2081     /* Local variables */
2082     integer j;
2083     double t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6, v7,
2084 	     v8, v9, t10, v10, sum;
2085 
2086 /*  -- LAPACK auxiliary routine (version 3.2) -- */
2087 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
2088 /*     November 2006 */
2089 
2090 /*     .. Scalar Arguments .. */
2091 /*     .. */
2092 /*     .. Array Arguments .. */
2093 /*     .. */
2094 
2095 /*  Purpose */
2096 /*  ======= */
2097 
2098 /*  DLARFX applies a real elementary reflector H to a real m by n */
2099 /*  matrix C, from either the left or the right. H is represented in the */
2100 /*  form */
2101 
2102 /*        H = I - tau * v * v' */
2103 
2104 /*  where tau is a real scalar and v is a real vector. */
2105 
2106 /*  If tau = 0, then H is taken to be the unit matrix */
2107 
2108 /*  This version uses inline code if H has order < 11. */
2109 
2110 /*  Arguments */
2111 /*  ========= */
2112 
2113 /*  SIDE    (input) CHARACTER*1 */
2114 /*          = 'L': form  H * C */
2115 /*          = 'R': form  C * H */
2116 
2117 /*  M       (input) INTEGER */
2118 /*          The number of rows of the matrix C. */
2119 
2120 /*  N       (input) INTEGER */
2121 /*          The number of columns of the matrix C. */
2122 
2123 /*  V       (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L' */
2124 /*                                     or (N) if SIDE = 'R' */
2125 /*          The vector v in the representation of H. */
2126 
2127 /*  TAU     (input) DOUBLE PRECISION */
2128 /*          The value tau in the representation of H. */
2129 
2130 /*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
2131 /*          On entry, the m by n matrix C. */
2132 /*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
2133 /*          or C * H if SIDE = 'R'. */
2134 
2135 /*  LDC     (input) INTEGER */
2136 /*          The leading dimension of the array C. LDA >= (1,M). */
2137 
2138 /*  WORK    (workspace) DOUBLE PRECISION array, dimension */
2139 /*                      (N) if SIDE = 'L' */
2140 /*                      or (M) if SIDE = 'R' */
2141 /*          WORK is not referenced if H has order < 11. */
2142 
2143 /*  ===================================================================== */
2144 
2145 /*     .. Parameters .. */
2146 /*     .. */
2147 /*     .. Local Scalars .. */
2148 /*     .. */
2149 /*     .. External Functions .. */
2150 /*     .. */
2151 /*     .. External Subroutines .. */
2152 /*     .. */
2153 /*     .. Executable Statements .. */
2154 
2155     /* Parameter adjustments */
2156     --v;
2157     c_dim1 = *ldc;
2158     c_offset = 1 + c_dim1;
2159     c__ -= c_offset;
2160     --work;
2161 
2162     /* Function Body */
2163     if (*tau == 0.) {
2164 	return 0;
2165     }
2166     if (lsame_(side, "L")) {
2167 
2168 /*        Form  H * C, where H has order m. */
2169 
2170 	switch (*m) {
2171 	    case 1:  goto L10;
2172 	    case 2:  goto L30;
2173 	    case 3:  goto L50;
2174 	    case 4:  goto L70;
2175 	    case 5:  goto L90;
2176 	    case 6:  goto L110;
2177 	    case 7:  goto L130;
2178 	    case 8:  goto L150;
2179 	    case 9:  goto L170;
2180 	    case 10:  goto L190;
2181 	}
2182 
2183 /*        Code for general M */
2184 
2185 	dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]);
2186 	goto L410;
2187 L10:
2188 
2189 /*        Special code for 1 x 1 Householder */
2190 
2191 	t1 = 1. - *tau * v[1] * v[1];
2192 	i__1 = *n;
2193 	for (j = 1; j <= i__1; ++j) {
2194 	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
2195 /* L20: */
2196 	}
2197 	goto L410;
2198 L30:
2199 
2200 /*        Special code for 2 x 2 Householder */
2201 
2202 	v1 = v[1];
2203 	t1 = *tau * v1;
2204 	v2 = v[2];
2205 	t2 = *tau * v2;
2206 	i__1 = *n;
2207 	for (j = 1; j <= i__1; ++j) {
2208 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2];
2209 	    c__[j * c_dim1 + 1] -= sum * t1;
2210 	    c__[j * c_dim1 + 2] -= sum * t2;
2211 /* L40: */
2212 	}
2213 	goto L410;
2214 L50:
2215 
2216 /*        Special code for 3 x 3 Householder */
2217 
2218 	v1 = v[1];
2219 	t1 = *tau * v1;
2220 	v2 = v[2];
2221 	t2 = *tau * v2;
2222 	v3 = v[3];
2223 	t3 = *tau * v3;
2224 	i__1 = *n;
2225 	for (j = 1; j <= i__1; ++j) {
2226 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2227 		    c__[j * c_dim1 + 3];
2228 	    c__[j * c_dim1 + 1] -= sum * t1;
2229 	    c__[j * c_dim1 + 2] -= sum * t2;
2230 	    c__[j * c_dim1 + 3] -= sum * t3;
2231 /* L60: */
2232 	}
2233 	goto L410;
2234 L70:
2235 
2236 /*        Special code for 4 x 4 Householder */
2237 
2238 	v1 = v[1];
2239 	t1 = *tau * v1;
2240 	v2 = v[2];
2241 	t2 = *tau * v2;
2242 	v3 = v[3];
2243 	t3 = *tau * v3;
2244 	v4 = v[4];
2245 	t4 = *tau * v4;
2246 	i__1 = *n;
2247 	for (j = 1; j <= i__1; ++j) {
2248 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2249 		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4];
2250 	    c__[j * c_dim1 + 1] -= sum * t1;
2251 	    c__[j * c_dim1 + 2] -= sum * t2;
2252 	    c__[j * c_dim1 + 3] -= sum * t3;
2253 	    c__[j * c_dim1 + 4] -= sum * t4;
2254 /* L80: */
2255 	}
2256 	goto L410;
2257 L90:
2258 
2259 /*        Special code for 5 x 5 Householder */
2260 
2261 	v1 = v[1];
2262 	t1 = *tau * v1;
2263 	v2 = v[2];
2264 	t2 = *tau * v2;
2265 	v3 = v[3];
2266 	t3 = *tau * v3;
2267 	v4 = v[4];
2268 	t4 = *tau * v4;
2269 	v5 = v[5];
2270 	t5 = *tau * v5;
2271 	i__1 = *n;
2272 	for (j = 1; j <= i__1; ++j) {
2273 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2274 		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
2275 		    j * c_dim1 + 5];
2276 	    c__[j * c_dim1 + 1] -= sum * t1;
2277 	    c__[j * c_dim1 + 2] -= sum * t2;
2278 	    c__[j * c_dim1 + 3] -= sum * t3;
2279 	    c__[j * c_dim1 + 4] -= sum * t4;
2280 	    c__[j * c_dim1 + 5] -= sum * t5;
2281 /* L100: */
2282 	}
2283 	goto L410;
2284 L110:
2285 
2286 /*        Special code for 6 x 6 Householder */
2287 
2288 	v1 = v[1];
2289 	t1 = *tau * v1;
2290 	v2 = v[2];
2291 	t2 = *tau * v2;
2292 	v3 = v[3];
2293 	t3 = *tau * v3;
2294 	v4 = v[4];
2295 	t4 = *tau * v4;
2296 	v5 = v[5];
2297 	t5 = *tau * v5;
2298 	v6 = v[6];
2299 	t6 = *tau * v6;
2300 	i__1 = *n;
2301 	for (j = 1; j <= i__1; ++j) {
2302 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2303 		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
2304 		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6];
2305 	    c__[j * c_dim1 + 1] -= sum * t1;
2306 	    c__[j * c_dim1 + 2] -= sum * t2;
2307 	    c__[j * c_dim1 + 3] -= sum * t3;
2308 	    c__[j * c_dim1 + 4] -= sum * t4;
2309 	    c__[j * c_dim1 + 5] -= sum * t5;
2310 	    c__[j * c_dim1 + 6] -= sum * t6;
2311 /* L120: */
2312 	}
2313 	goto L410;
2314 L130:
2315 
2316 /*        Special code for 7 x 7 Householder */
2317 
2318 	v1 = v[1];
2319 	t1 = *tau * v1;
2320 	v2 = v[2];
2321 	t2 = *tau * v2;
2322 	v3 = v[3];
2323 	t3 = *tau * v3;
2324 	v4 = v[4];
2325 	t4 = *tau * v4;
2326 	v5 = v[5];
2327 	t5 = *tau * v5;
2328 	v6 = v[6];
2329 	t6 = *tau * v6;
2330 	v7 = v[7];
2331 	t7 = *tau * v7;
2332 	i__1 = *n;
2333 	for (j = 1; j <= i__1; ++j) {
2334 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2335 		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
2336 		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
2337 		    c_dim1 + 7];
2338 	    c__[j * c_dim1 + 1] -= sum * t1;
2339 	    c__[j * c_dim1 + 2] -= sum * t2;
2340 	    c__[j * c_dim1 + 3] -= sum * t3;
2341 	    c__[j * c_dim1 + 4] -= sum * t4;
2342 	    c__[j * c_dim1 + 5] -= sum * t5;
2343 	    c__[j * c_dim1 + 6] -= sum * t6;
2344 	    c__[j * c_dim1 + 7] -= sum * t7;
2345 /* L140: */
2346 	}
2347 	goto L410;
2348 L150:
2349 
2350 /*        Special code for 8 x 8 Householder */
2351 
2352 	v1 = v[1];
2353 	t1 = *tau * v1;
2354 	v2 = v[2];
2355 	t2 = *tau * v2;
2356 	v3 = v[3];
2357 	t3 = *tau * v3;
2358 	v4 = v[4];
2359 	t4 = *tau * v4;
2360 	v5 = v[5];
2361 	t5 = *tau * v5;
2362 	v6 = v[6];
2363 	t6 = *tau * v6;
2364 	v7 = v[7];
2365 	t7 = *tau * v7;
2366 	v8 = v[8];
2367 	t8 = *tau * v8;
2368 	i__1 = *n;
2369 	for (j = 1; j <= i__1; ++j) {
2370 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2371 		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
2372 		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
2373 		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8];
2374 	    c__[j * c_dim1 + 1] -= sum * t1;
2375 	    c__[j * c_dim1 + 2] -= sum * t2;
2376 	    c__[j * c_dim1 + 3] -= sum * t3;
2377 	    c__[j * c_dim1 + 4] -= sum * t4;
2378 	    c__[j * c_dim1 + 5] -= sum * t5;
2379 	    c__[j * c_dim1 + 6] -= sum * t6;
2380 	    c__[j * c_dim1 + 7] -= sum * t7;
2381 	    c__[j * c_dim1 + 8] -= sum * t8;
2382 /* L160: */
2383 	}
2384 	goto L410;
2385 L170:
2386 
2387 /*        Special code for 9 x 9 Householder */
2388 
2389 	v1 = v[1];
2390 	t1 = *tau * v1;
2391 	v2 = v[2];
2392 	t2 = *tau * v2;
2393 	v3 = v[3];
2394 	t3 = *tau * v3;
2395 	v4 = v[4];
2396 	t4 = *tau * v4;
2397 	v5 = v[5];
2398 	t5 = *tau * v5;
2399 	v6 = v[6];
2400 	t6 = *tau * v6;
2401 	v7 = v[7];
2402 	t7 = *tau * v7;
2403 	v8 = v[8];
2404 	t8 = *tau * v8;
2405 	v9 = v[9];
2406 	t9 = *tau * v9;
2407 	i__1 = *n;
2408 	for (j = 1; j <= i__1; ++j) {
2409 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2410 		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
2411 		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
2412 		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
2413 		    c_dim1 + 9];
2414 	    c__[j * c_dim1 + 1] -= sum * t1;
2415 	    c__[j * c_dim1 + 2] -= sum * t2;
2416 	    c__[j * c_dim1 + 3] -= sum * t3;
2417 	    c__[j * c_dim1 + 4] -= sum * t4;
2418 	    c__[j * c_dim1 + 5] -= sum * t5;
2419 	    c__[j * c_dim1 + 6] -= sum * t6;
2420 	    c__[j * c_dim1 + 7] -= sum * t7;
2421 	    c__[j * c_dim1 + 8] -= sum * t8;
2422 	    c__[j * c_dim1 + 9] -= sum * t9;
2423 /* L180: */
2424 	}
2425 	goto L410;
2426 L190:
2427 
2428 /*        Special code for 10 x 10 Householder */
2429 
2430 	v1 = v[1];
2431 	t1 = *tau * v1;
2432 	v2 = v[2];
2433 	t2 = *tau * v2;
2434 	v3 = v[3];
2435 	t3 = *tau * v3;
2436 	v4 = v[4];
2437 	t4 = *tau * v4;
2438 	v5 = v[5];
2439 	t5 = *tau * v5;
2440 	v6 = v[6];
2441 	t6 = *tau * v6;
2442 	v7 = v[7];
2443 	t7 = *tau * v7;
2444 	v8 = v[8];
2445 	t8 = *tau * v8;
2446 	v9 = v[9];
2447 	t9 = *tau * v9;
2448 	v10 = v[10];
2449 	t10 = *tau * v10;
2450 	i__1 = *n;
2451 	for (j = 1; j <= i__1; ++j) {
2452 	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
2453 		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
2454 		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
2455 		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
2456 		    c_dim1 + 9] + v10 * c__[j * c_dim1 + 10];
2457 	    c__[j * c_dim1 + 1] -= sum * t1;
2458 	    c__[j * c_dim1 + 2] -= sum * t2;
2459 	    c__[j * c_dim1 + 3] -= sum * t3;
2460 	    c__[j * c_dim1 + 4] -= sum * t4;
2461 	    c__[j * c_dim1 + 5] -= sum * t5;
2462 	    c__[j * c_dim1 + 6] -= sum * t6;
2463 	    c__[j * c_dim1 + 7] -= sum * t7;
2464 	    c__[j * c_dim1 + 8] -= sum * t8;
2465 	    c__[j * c_dim1 + 9] -= sum * t9;
2466 	    c__[j * c_dim1 + 10] -= sum * t10;
2467 /* L200: */
2468 	}
2469 	goto L410;
2470     } else {
2471 
2472 /*        Form  C * H, where H has order n. */
2473 
2474 	switch (*n) {
2475 	    case 1:  goto L210;
2476 	    case 2:  goto L230;
2477 	    case 3:  goto L250;
2478 	    case 4:  goto L270;
2479 	    case 5:  goto L290;
2480 	    case 6:  goto L310;
2481 	    case 7:  goto L330;
2482 	    case 8:  goto L350;
2483 	    case 9:  goto L370;
2484 	    case 10:  goto L390;
2485 	}
2486 
2487 /*        Code for general N */
2488 
2489 	dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]);
2490 	goto L410;
2491 L210:
2492 
2493 /*        Special code for 1 x 1 Householder */
2494 
2495 	t1 = 1. - *tau * v[1] * v[1];
2496 	i__1 = *m;
2497 	for (j = 1; j <= i__1; ++j) {
2498 	    c__[j + c_dim1] = t1 * c__[j + c_dim1];
2499 /* L220: */
2500 	}
2501 	goto L410;
2502 L230:
2503 
2504 /*        Special code for 2 x 2 Householder */
2505 
2506 	v1 = v[1];
2507 	t1 = *tau * v1;
2508 	v2 = v[2];
2509 	t2 = *tau * v2;
2510 	i__1 = *m;
2511 	for (j = 1; j <= i__1; ++j) {
2512 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)];
2513 	    c__[j + c_dim1] -= sum * t1;
2514 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2515 /* L240: */
2516 	}
2517 	goto L410;
2518 L250:
2519 
2520 /*        Special code for 3 x 3 Householder */
2521 
2522 	v1 = v[1];
2523 	t1 = *tau * v1;
2524 	v2 = v[2];
2525 	t2 = *tau * v2;
2526 	v3 = v[3];
2527 	t3 = *tau * v3;
2528 	i__1 = *m;
2529 	for (j = 1; j <= i__1; ++j) {
2530 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2531 		    c__[j + c_dim1 * 3];
2532 	    c__[j + c_dim1] -= sum * t1;
2533 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2534 	    c__[j + c_dim1 * 3] -= sum * t3;
2535 /* L260: */
2536 	}
2537 	goto L410;
2538 L270:
2539 
2540 /*        Special code for 4 x 4 Householder */
2541 
2542 	v1 = v[1];
2543 	t1 = *tau * v1;
2544 	v2 = v[2];
2545 	t2 = *tau * v2;
2546 	v3 = v[3];
2547 	t3 = *tau * v3;
2548 	v4 = v[4];
2549 	t4 = *tau * v4;
2550 	i__1 = *m;
2551 	for (j = 1; j <= i__1; ++j) {
2552 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2553 		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)];
2554 	    c__[j + c_dim1] -= sum * t1;
2555 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2556 	    c__[j + c_dim1 * 3] -= sum * t3;
2557 	    c__[j + (c_dim1 << 2)] -= sum * t4;
2558 /* L280: */
2559 	}
2560 	goto L410;
2561 L290:
2562 
2563 /*        Special code for 5 x 5 Householder */
2564 
2565 	v1 = v[1];
2566 	t1 = *tau * v1;
2567 	v2 = v[2];
2568 	t2 = *tau * v2;
2569 	v3 = v[3];
2570 	t3 = *tau * v3;
2571 	v4 = v[4];
2572 	t4 = *tau * v4;
2573 	v5 = v[5];
2574 	t5 = *tau * v5;
2575 	i__1 = *m;
2576 	for (j = 1; j <= i__1; ++j) {
2577 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2578 		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
2579 		    c__[j + c_dim1 * 5];
2580 	    c__[j + c_dim1] -= sum * t1;
2581 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2582 	    c__[j + c_dim1 * 3] -= sum * t3;
2583 	    c__[j + (c_dim1 << 2)] -= sum * t4;
2584 	    c__[j + c_dim1 * 5] -= sum * t5;
2585 /* L300: */
2586 	}
2587 	goto L410;
2588 L310:
2589 
2590 /*        Special code for 6 x 6 Householder */
2591 
2592 	v1 = v[1];
2593 	t1 = *tau * v1;
2594 	v2 = v[2];
2595 	t2 = *tau * v2;
2596 	v3 = v[3];
2597 	t3 = *tau * v3;
2598 	v4 = v[4];
2599 	t4 = *tau * v4;
2600 	v5 = v[5];
2601 	t5 = *tau * v5;
2602 	v6 = v[6];
2603 	t6 = *tau * v6;
2604 	i__1 = *m;
2605 	for (j = 1; j <= i__1; ++j) {
2606 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2607 		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
2608 		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6];
2609 	    c__[j + c_dim1] -= sum * t1;
2610 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2611 	    c__[j + c_dim1 * 3] -= sum * t3;
2612 	    c__[j + (c_dim1 << 2)] -= sum * t4;
2613 	    c__[j + c_dim1 * 5] -= sum * t5;
2614 	    c__[j + c_dim1 * 6] -= sum * t6;
2615 /* L320: */
2616 	}
2617 	goto L410;
2618 L330:
2619 
2620 /*        Special code for 7 x 7 Householder */
2621 
2622 	v1 = v[1];
2623 	t1 = *tau * v1;
2624 	v2 = v[2];
2625 	t2 = *tau * v2;
2626 	v3 = v[3];
2627 	t3 = *tau * v3;
2628 	v4 = v[4];
2629 	t4 = *tau * v4;
2630 	v5 = v[5];
2631 	t5 = *tau * v5;
2632 	v6 = v[6];
2633 	t6 = *tau * v6;
2634 	v7 = v[7];
2635 	t7 = *tau * v7;
2636 	i__1 = *m;
2637 	for (j = 1; j <= i__1; ++j) {
2638 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2639 		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
2640 		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
2641 		    j + c_dim1 * 7];
2642 	    c__[j + c_dim1] -= sum * t1;
2643 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2644 	    c__[j + c_dim1 * 3] -= sum * t3;
2645 	    c__[j + (c_dim1 << 2)] -= sum * t4;
2646 	    c__[j + c_dim1 * 5] -= sum * t5;
2647 	    c__[j + c_dim1 * 6] -= sum * t6;
2648 	    c__[j + c_dim1 * 7] -= sum * t7;
2649 /* L340: */
2650 	}
2651 	goto L410;
2652 L350:
2653 
2654 /*        Special code for 8 x 8 Householder */
2655 
2656 	v1 = v[1];
2657 	t1 = *tau * v1;
2658 	v2 = v[2];
2659 	t2 = *tau * v2;
2660 	v3 = v[3];
2661 	t3 = *tau * v3;
2662 	v4 = v[4];
2663 	t4 = *tau * v4;
2664 	v5 = v[5];
2665 	t5 = *tau * v5;
2666 	v6 = v[6];
2667 	t6 = *tau * v6;
2668 	v7 = v[7];
2669 	t7 = *tau * v7;
2670 	v8 = v[8];
2671 	t8 = *tau * v8;
2672 	i__1 = *m;
2673 	for (j = 1; j <= i__1; ++j) {
2674 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2675 		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
2676 		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
2677 		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)];
2678 	    c__[j + c_dim1] -= sum * t1;
2679 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2680 	    c__[j + c_dim1 * 3] -= sum * t3;
2681 	    c__[j + (c_dim1 << 2)] -= sum * t4;
2682 	    c__[j + c_dim1 * 5] -= sum * t5;
2683 	    c__[j + c_dim1 * 6] -= sum * t6;
2684 	    c__[j + c_dim1 * 7] -= sum * t7;
2685 	    c__[j + (c_dim1 << 3)] -= sum * t8;
2686 /* L360: */
2687 	}
2688 	goto L410;
2689 L370:
2690 
2691 /*        Special code for 9 x 9 Householder */
2692 
2693 	v1 = v[1];
2694 	t1 = *tau * v1;
2695 	v2 = v[2];
2696 	t2 = *tau * v2;
2697 	v3 = v[3];
2698 	t3 = *tau * v3;
2699 	v4 = v[4];
2700 	t4 = *tau * v4;
2701 	v5 = v[5];
2702 	t5 = *tau * v5;
2703 	v6 = v[6];
2704 	t6 = *tau * v6;
2705 	v7 = v[7];
2706 	t7 = *tau * v7;
2707 	v8 = v[8];
2708 	t8 = *tau * v8;
2709 	v9 = v[9];
2710 	t9 = *tau * v9;
2711 	i__1 = *m;
2712 	for (j = 1; j <= i__1; ++j) {
2713 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2714 		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
2715 		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
2716 		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[
2717 		    j + c_dim1 * 9];
2718 	    c__[j + c_dim1] -= sum * t1;
2719 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2720 	    c__[j + c_dim1 * 3] -= sum * t3;
2721 	    c__[j + (c_dim1 << 2)] -= sum * t4;
2722 	    c__[j + c_dim1 * 5] -= sum * t5;
2723 	    c__[j + c_dim1 * 6] -= sum * t6;
2724 	    c__[j + c_dim1 * 7] -= sum * t7;
2725 	    c__[j + (c_dim1 << 3)] -= sum * t8;
2726 	    c__[j + c_dim1 * 9] -= sum * t9;
2727 /* L380: */
2728 	}
2729 	goto L410;
2730 L390:
2731 
2732 /*        Special code for 10 x 10 Householder */
2733 
2734 	v1 = v[1];
2735 	t1 = *tau * v1;
2736 	v2 = v[2];
2737 	t2 = *tau * v2;
2738 	v3 = v[3];
2739 	t3 = *tau * v3;
2740 	v4 = v[4];
2741 	t4 = *tau * v4;
2742 	v5 = v[5];
2743 	t5 = *tau * v5;
2744 	v6 = v[6];
2745 	t6 = *tau * v6;
2746 	v7 = v[7];
2747 	t7 = *tau * v7;
2748 	v8 = v[8];
2749 	t8 = *tau * v8;
2750 	v9 = v[9];
2751 	t9 = *tau * v9;
2752 	v10 = v[10];
2753 	t10 = *tau * v10;
2754 	i__1 = *m;
2755 	for (j = 1; j <= i__1; ++j) {
2756 	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + (c_dim1 << 1)] + v3 *
2757 		    c__[j + c_dim1 * 3] + v4 * c__[j + (c_dim1 << 2)] + v5 *
2758 		    c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 * c__[
2759 		    j + c_dim1 * 7] + v8 * c__[j + (c_dim1 << 3)] + v9 * c__[
2760 		    j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10];
2761 	    c__[j + c_dim1] -= sum * t1;
2762 	    c__[j + (c_dim1 << 1)] -= sum * t2;
2763 	    c__[j + c_dim1 * 3] -= sum * t3;
2764 	    c__[j + (c_dim1 << 2)] -= sum * t4;
2765 	    c__[j + c_dim1 * 5] -= sum * t5;
2766 	    c__[j + c_dim1 * 6] -= sum * t6;
2767 	    c__[j + c_dim1 * 7] -= sum * t7;
2768 	    c__[j + (c_dim1 << 3)] -= sum * t8;
2769 	    c__[j + c_dim1 * 9] -= sum * t9;
2770 	    c__[j + c_dim1 * 10] -= sum * t10;
2771 /* L400: */
2772 	}
2773 	goto L410;
2774     }
2775 L410:
2776     return 0;
2777 
2778 /*     End of DLARFX */
2779 
2780 } /* dlarfx_ */
2781 
dlargv_(integer * n,double * x,integer * incx,double * y,integer * incy,double * c__,integer * incc)2782 /* Subroutine */ int dlargv_(integer *n, double *x, integer *incx,
2783 	double *y, integer *incy, double *c__, integer *incc)
2784 {
2785     /* System generated locals */
2786     integer i__1;
2787 
2788     /* Builtin functions
2789     double sqrt(double); */
2790 
2791     /* Local variables */
2792     double f, g;
2793     integer i__;
2794     double t;
2795     integer ic, ix, iy;
2796     double tt;
2797 
2798 
2799 /*  -- LAPACK auxiliary routine (version 3.1) -- */
2800 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
2801 /*     November 2006 */
2802 
2803 /*     .. Scalar Arguments .. */
2804 /*     .. */
2805 /*     .. Array Arguments .. */
2806 /*     .. */
2807 
2808 /*  Purpose */
2809 /*  ======= */
2810 
2811 /*  DLARGV generates a vector of real plane rotations, determined by */
2812 /*  elements of the real vectors x and y. For i = 1,2,...,n */
2813 
2814 /*     (  c(i)  s(i) ) ( x(i) ) = ( a(i) ) */
2815 /*     ( -s(i)  c(i) ) ( y(i) ) = (   0  ) */
2816 
2817 /*  Arguments */
2818 /*  ========= */
2819 
2820 /*  N       (input) INTEGER */
2821 /*          The number of plane rotations to be generated. */
2822 
2823 /*  X       (input/output) DOUBLE PRECISION array, */
2824 /*                         dimension (1+(N-1)*INCX) */
2825 /*          On entry, the vector x. */
2826 /*          On exit, x(i) is overwritten by a(i), for i = 1,...,n. */
2827 
2828 /*  INCX    (input) INTEGER */
2829 /*          The increment between elements of X. INCX > 0. */
2830 
2831 /*  Y       (input/output) DOUBLE PRECISION array, */
2832 /*                         dimension (1+(N-1)*INCY) */
2833 /*          On entry, the vector y. */
2834 /*          On exit, the sines of the plane rotations. */
2835 
2836 /*  INCY    (input) INTEGER */
2837 /*          The increment between elements of Y. INCY > 0. */
2838 
2839 /*  C       (output) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */
2840 /*          The cosines of the plane rotations. */
2841 
2842 /*  INCC    (input) INTEGER */
2843 /*          The increment between elements of C. INCC > 0. */
2844 
2845 /*  ===================================================================== */
2846 
2847 /*     .. Parameters .. */
2848 /*     .. */
2849 /*     .. Local Scalars .. */
2850 /*     .. */
2851 /*     .. Intrinsic Functions .. */
2852 /*     .. */
2853 /*     .. Executable Statements .. */
2854 
2855     /* Parameter adjustments */
2856     --c__;
2857     --y;
2858     --x;
2859 
2860     /* Function Body */
2861     ix = 1;
2862     iy = 1;
2863     ic = 1;
2864     i__1 = *n;
2865     for (i__ = 1; i__ <= i__1; ++i__) {
2866 	f = x[ix];
2867 	g = y[iy];
2868 	if (g == 0.) {
2869 	    c__[ic] = 1.;
2870 	} else if (f == 0.) {
2871 	    c__[ic] = 0.;
2872 	    y[iy] = 1.;
2873 	    x[ix] = g;
2874 	} else if (abs(f) > abs(g)) {
2875 	    t = g / f;
2876 	    tt = sqrt(t * t + 1.);
2877 	    c__[ic] = 1. / tt;
2878 	    y[iy] = t * c__[ic];
2879 	    x[ix] = f * tt;
2880 	} else {
2881 	    t = f / g;
2882 	    tt = sqrt(t * t + 1.);
2883 	    y[iy] = 1. / tt;
2884 	    c__[ic] = t * y[iy];
2885 	    x[ix] = g * tt;
2886 	}
2887 	ic += *incc;
2888 	iy += *incy;
2889 	ix += *incx;
2890 /* L10: */
2891     }
2892     return 0;
2893 
2894 /*     End of DLARGV */
2895 
2896 } /* dlargv_ */
2897 
dlarnv_(integer * idist,integer * iseed,integer * n,double * x)2898 /* Subroutine */ int dlarnv_(integer *idist, integer *iseed, integer *n,
2899 	double *x)
2900 {
2901     /* System generated locals */
2902     integer i__1, i__2, i__3;
2903 
2904     /* Builtin functions
2905     double log(double), sqrt(double), cos(double); */
2906 
2907     /* Local variables */
2908     integer i__;
2909     double u[128];
2910     integer il, iv, il2;
2911 
2912 
2913 
2914 /*  -- LAPACK auxiliary routine (version 3.1) -- */
2915 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
2916 /*     November 2006 */
2917 
2918 /*     .. Scalar Arguments .. */
2919 /*     .. */
2920 /*     .. Array Arguments .. */
2921 /*     .. */
2922 
2923 /*  Purpose */
2924 /*  ======= */
2925 
2926 /*  DLARNV returns a vector of n random real numbers from a uniform or */
2927 /*  normal distribution. */
2928 
2929 /*  Arguments */
2930 /*  ========= */
2931 
2932 /*  IDIST   (input) INTEGER */
2933 /*          Specifies the distribution of the random numbers: */
2934 /*          = 1:  uniform (0,1) */
2935 /*          = 2:  uniform (-1,1) */
2936 /*          = 3:  normal (0,1) */
2937 
2938 /*  ISEED   (input/output) INTEGER array, dimension (4) */
2939 /*          On entry, the seed of the random number generator; the array */
2940 /*          elements must be between 0 and 4095, and ISEED(4) must be */
2941 /*          odd. */
2942 /*          On exit, the seed is updated. */
2943 
2944 /*  N       (input) INTEGER */
2945 /*          The number of random numbers to be generated. */
2946 
2947 /*  X       (output) DOUBLE PRECISION array, dimension (N) */
2948 /*          The generated random numbers. */
2949 
2950 /*  Further Details */
2951 /*  =============== */
2952 
2953 /*  This routine calls the auxiliary routine DLARUV to generate random */
2954 /*  real numbers from a uniform (0,1) distribution, in batches of up to */
2955 /*  128 using vectorisable code. The Box-Muller method is used to */
2956 /*  transform numbers from a uniform to a normal distribution. */
2957 
2958 /*  ===================================================================== */
2959 
2960 /*     .. Parameters .. */
2961 /*     .. */
2962 /*     .. Local Scalars .. */
2963 /*     .. */
2964 /*     .. Local Arrays .. */
2965 /*     .. */
2966 /*     .. Intrinsic Functions .. */
2967 /*     .. */
2968 /*     .. External Subroutines .. */
2969 /*     .. */
2970 /*     .. Executable Statements .. */
2971 
2972     /* Parameter adjustments */
2973     --x;
2974     --iseed;
2975 
2976     /* Function Body */
2977     i__1 = *n;
2978     for (iv = 1; iv <= i__1; iv += 64) {
2979 /* Computing MIN */
2980 	i__2 = 64, i__3 = *n - iv + 1;
2981 	il = std::min(i__2,i__3);
2982 	if (*idist == 3) {
2983 	    il2 = il << 1;
2984 	} else {
2985 	    il2 = il;
2986 	}
2987 
2988 /*        Call DLARUV to generate IL2 numbers from a uniform (0,1) */
2989 /*        distribution (IL2 <= LV) */
2990 
2991 	dlaruv_(&iseed[1], &il2, u);
2992 
2993 	if (*idist == 1) {
2994 
2995 /*           Copy generated numbers */
2996 
2997 	    i__2 = il;
2998 	    for (i__ = 1; i__ <= i__2; ++i__) {
2999 		x[iv + i__ - 1] = u[i__ - 1];
3000 /* L10: */
3001 	    }
3002 	} else if (*idist == 2) {
3003 
3004 /*           Convert generated numbers to uniform (-1,1) distribution */
3005 
3006 	    i__2 = il;
3007 	    for (i__ = 1; i__ <= i__2; ++i__) {
3008 		x[iv + i__ - 1] = u[i__ - 1] * 2. - 1.;
3009 /* L20: */
3010 	    }
3011 	} else if (*idist == 3) {
3012 
3013 /*           Convert generated numbers to normal (0,1) distribution */
3014 
3015 	    i__2 = il;
3016 	    for (i__ = 1; i__ <= i__2; ++i__) {
3017 		x[iv + i__ - 1] = sqrt(log(u[(i__ << 1) - 2]) * -2.) * cos(u[(
3018 			i__ << 1) - 1] * 6.2831853071795864769252867663);
3019 /* L30: */
3020 	    }
3021 	}
3022 /* L40: */
3023     }
3024     return 0;
3025 
3026 /*     End of DLARNV */
3027 
3028 } /* dlarnv_ */
3029 
dlarra_(integer * n,double * d__,double * e,double * e2,double * spltol,double * tnrm,integer * nsplit,integer * isplit,integer * info)3030 /* Subroutine */ int dlarra_(integer *n, double *d__, double *e,
3031 	double *e2, double *spltol, double *tnrm, integer *nsplit,
3032 	 integer *isplit, integer *info)
3033 {
3034     /* System generated locals */
3035     integer i__1;
3036     double d__1, d__2;
3037 
3038     /* Builtin functions
3039     double sqrt(double); */
3040 
3041     /* Local variables */
3042     integer i__;
3043     double tmp1, eabs;
3044 
3045 
3046 /*  -- LAPACK auxiliary routine (version 3.1) -- */
3047 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
3048 /*     November 2006 */
3049 
3050 /*     .. Scalar Arguments .. */
3051 /*     .. */
3052 /*     .. Array Arguments .. */
3053 /*     .. */
3054 
3055 /*  Purpose */
3056 /*  ======= */
3057 
3058 /*  Compute the splitting points with threshold SPLTOL. */
3059 /*  DLARRA sets any "small" off-diagonal elements to zero. */
3060 
3061 /*  Arguments */
3062 /*  ========= */
3063 
3064 /*  N       (input) INTEGER */
3065 /*          The order of the matrix. N > 0. */
3066 
3067 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
3068 /*          On entry, the N diagonal elements of the tridiagonal */
3069 /*          matrix T. */
3070 
3071 /*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
3072 /*          On entry, the first (N-1) entries contain the subdiagonal */
3073 /*          elements of the tridiagonal matrix T; E(N) need not be set. */
3074 /*          On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, */
3075 /*          are set to zero, the other entries of E are untouched. */
3076 
3077 /*  E2      (input/output) DOUBLE PRECISION array, dimension (N) */
3078 /*          On entry, the first (N-1) entries contain the SQUARES of the */
3079 /*          subdiagonal elements of the tridiagonal matrix T; */
3080 /*          E2(N) need not be set. */
3081 /*          On exit, the entries E2( ISPLIT( I ) ), */
3082 /*          1 <= I <= NSPLIT, have been set to zero */
3083 
3084 /*  SPLTOL (input) DOUBLE PRECISION */
3085 /*          The threshold for splitting. Two criteria can be used: */
3086 /*          SPLTOL<0 : criterion based on absolute off-diagonal value */
3087 /*          SPLTOL>0 : criterion that preserves relative accuracy */
3088 
3089 /*  TNRM (input) DOUBLE PRECISION */
3090 /*          The norm of the matrix. */
3091 
3092 /*  NSPLIT  (output) INTEGER */
3093 /*          The number of blocks T splits into. 1 <= NSPLIT <= N. */
3094 
3095 /*  ISPLIT  (output) INTEGER array, dimension (N) */
3096 /*          The splitting points, at which T breaks up into blocks. */
3097 /*          The first block consists of rows/columns 1 to ISPLIT(1), */
3098 /*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
3099 /*          etc., and the NSPLIT-th consists of rows/columns */
3100 /*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
3101 
3102 
3103 /*  INFO    (output) INTEGER */
3104 /*          = 0:  successful exit */
3105 
3106 /*  Further Details */
3107 /*  =============== */
3108 
3109 /*  Based on contributions by */
3110 /*     Beresford Parlett, University of California, Berkeley, USA */
3111 /*     Jim Demmel, University of California, Berkeley, USA */
3112 /*     Inderjit Dhillon, University of Texas, Austin, USA */
3113 /*     Osni Marques, LBNL/NERSC, USA */
3114 /*     Christof Voemel, University of California, Berkeley, USA */
3115 
3116 /*  ===================================================================== */
3117 
3118 /*     .. Parameters .. */
3119 /*     .. */
3120 /*     .. Local Scalars .. */
3121 /*     .. */
3122 /*     .. Intrinsic Functions .. */
3123 /*     .. */
3124 /*     .. Executable Statements .. */
3125 
3126     /* Parameter adjustments */
3127     --isplit;
3128     --e2;
3129     --e;
3130     --d__;
3131 
3132     /* Function Body */
3133     *info = 0;
3134 /*     Compute splitting points */
3135     *nsplit = 1;
3136     if (*spltol < 0.) {
3137 /*        Criterion based on absolute off-diagonal value */
3138 	tmp1 = abs(*spltol) * *tnrm;
3139 	i__1 = *n - 1;
3140 	for (i__ = 1; i__ <= i__1; ++i__) {
3141 	    eabs = (d__1 = e[i__], abs(d__1));
3142 	    if (eabs <= tmp1) {
3143 		e[i__] = 0.;
3144 		e2[i__] = 0.;
3145 		isplit[*nsplit] = i__;
3146 		++(*nsplit);
3147 	    }
3148 /* L9: */
3149 	}
3150     } else {
3151 /*        Criterion that guarantees relative accuracy */
3152 	i__1 = *n - 1;
3153 	for (i__ = 1; i__ <= i__1; ++i__) {
3154 	    eabs = (d__1 = e[i__], abs(d__1));
3155 	    if (eabs <= *spltol * sqrt((d__1 = d__[i__], abs(d__1))) * sqrt((
3156 		    d__2 = d__[i__ + 1], abs(d__2)))) {
3157 		e[i__] = 0.;
3158 		e2[i__] = 0.;
3159 		isplit[*nsplit] = i__;
3160 		++(*nsplit);
3161 	    }
3162 /* L10: */
3163 	}
3164     }
3165     isplit[*nsplit] = *n;
3166     return 0;
3167 
3168 /*     End of DLARRA */
3169 
3170 } /* dlarra_ */
3171 
dlarrb_(integer * n,double * d__,double * lld,integer * ifirst,integer * ilast,double * rtol1,double * rtol2,integer * offset,double * w,double * wgap,double * werr,double * work,integer * iwork,double * pivmin,double * spdiam,integer * twist,integer * info)3172 /* Subroutine */ int dlarrb_(integer *n, double *d__, double *lld,
3173 	integer *ifirst, integer *ilast, double *rtol1, double *rtol2,
3174 	 integer *offset, double *w, double *wgap, double *werr,
3175 	double *work, integer *iwork, double *pivmin, double *
3176 	spdiam, integer *twist, integer *info)
3177 {
3178     /* System generated locals */
3179     integer i__1;
3180     double d__1, d__2;
3181 
3182     /* Local variables */
3183     integer i__, k, r__, i1, ii, ip;
3184     double gap, mid, tmp, back, lgap, rgap, left;
3185     integer iter, nint, prev, next;
3186     double cvrgd, right, width;
3187     integer negcnt;
3188     double mnwdth;
3189     integer olnint, maxitr;
3190 
3191 
3192 /*  -- LAPACK auxiliary routine (version 3.1) -- */
3193 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
3194 /*     November 2006 */
3195 
3196 /*     .. Scalar Arguments .. */
3197 /*     .. */
3198 /*     .. Array Arguments .. */
3199 /*     .. */
3200 
3201 /*  Purpose */
3202 /*  ======= */
3203 
3204 /*  Given the relatively robust representation(RRR) L D L^T, DLARRB */
3205 /*  does "limited" bisection to refine the eigenvalues of L D L^T, */
3206 /*  W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
3207 /*  guesses for these eigenvalues are input in W, the corresponding estimate */
3208 /*  of the error in these guesses and their gaps are input in WERR */
3209 /*  and WGAP, respectively. During bisection, intervals */
3210 /*  [left, right] are maintained by storing their mid-points and */
3211 /*  semi-widths in the arrays W and WERR respectively. */
3212 
3213 /*  Arguments */
3214 /*  ========= */
3215 
3216 /*  N       (input) INTEGER */
3217 /*          The order of the matrix. */
3218 
3219 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
3220 /*          The N diagonal elements of the diagonal matrix D. */
3221 
3222 /*  LLD     (input) DOUBLE PRECISION array, dimension (N-1) */
3223 /*          The (N-1) elements L(i)*L(i)*D(i). */
3224 
3225 /*  IFIRST  (input) INTEGER */
3226 /*          The index of the first eigenvalue to be computed. */
3227 
3228 /*  ILAST   (input) INTEGER */
3229 /*          The index of the last eigenvalue to be computed. */
3230 
3231 /*  RTOL1   (input) DOUBLE PRECISION */
3232 /*  RTOL2   (input) DOUBLE PRECISION */
3233 /*          Tolerance for the convergence of the bisection intervals. */
3234 /*          An interval [LEFT,RIGHT] has converged if */
3235 /*          RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
3236 /*          where GAP is the (estimated) distance to the nearest */
3237 /*          eigenvalue. */
3238 
3239 /*  OFFSET  (input) INTEGER */
3240 /*          Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */
3241 /*          through ILAST-OFFSET elements of these arrays are to be used. */
3242 
3243 /*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
3244 /*          On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
3245 /*          estimates of the eigenvalues of L D L^T indexed IFIRST throug */
3246 /*          ILAST. */
3247 /*          On output, these estimates are refined. */
3248 
3249 /*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N-1) */
3250 /*          On input, the (estimated) gaps between consecutive */
3251 /*          eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */
3252 /*          eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */
3253 /*          then WGAP(IFIRST-OFFSET) must be set to ZERO. */
3254 /*          On output, these gaps are refined. */
3255 
3256 /*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
3257 /*          On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
3258 /*          the errors in the estimates of the corresponding elements in W. */
3259 /*          On output, these errors are refined. */
3260 
3261 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
3262 /*          Workspace. */
3263 
3264 /*  IWORK   (workspace) INTEGER array, dimension (2*N) */
3265 /*          Workspace. */
3266 
3267 /*  PIVMIN  (input) DOUBLE PRECISION */
3268 /*          The minimum pivot in the Sturm sequence. */
3269 
3270 /*  SPDIAM  (input) DOUBLE PRECISION */
3271 /*          The spectral diameter of the matrix. */
3272 
3273 /*  TWIST   (input) INTEGER */
3274 /*          The twist index for the twisted factorization that is used */
3275 /*          for the negcount. */
3276 /*          TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */
3277 /*          TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */
3278 /*          TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */
3279 
3280 /*  INFO    (output) INTEGER */
3281 /*          Error flag. */
3282 
3283 /*  Further Details */
3284 /*  =============== */
3285 
3286 /*  Based on contributions by */
3287 /*     Beresford Parlett, University of California, Berkeley, USA */
3288 /*     Jim Demmel, University of California, Berkeley, USA */
3289 /*     Inderjit Dhillon, University of Texas, Austin, USA */
3290 /*     Osni Marques, LBNL/NERSC, USA */
3291 /*     Christof Voemel, University of California, Berkeley, USA */
3292 
3293 /*  ===================================================================== */
3294 
3295 /*     .. Parameters .. */
3296 /*     .. */
3297 /*     .. Local Scalars .. */
3298 /*     .. */
3299 /*     .. External Functions .. */
3300 
3301 /*     .. */
3302 /*     .. Intrinsic Functions .. */
3303 /*     .. */
3304 /*     .. Executable Statements .. */
3305 
3306     /* Parameter adjustments */
3307     --iwork;
3308     --work;
3309     --werr;
3310     --wgap;
3311     --w;
3312     --lld;
3313     --d__;
3314 
3315     /* Function Body */
3316     *info = 0;
3317 
3318     maxitr = (integer) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) +
3319 	    2;
3320     mnwdth = *pivmin * 2.;
3321 
3322     r__ = *twist;
3323     if (r__ < 1 || r__ > *n) {
3324 	r__ = *n;
3325     }
3326 
3327 /*     Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
3328 /*     The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
3329 /*     Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */
3330 /*     for an unconverged interval is set to the index of the next unconverged */
3331 /*     interval, and is -1 or 0 for a converged interval. Thus a linked */
3332 /*     list of unconverged intervals is set up. */
3333 
3334     i1 = *ifirst;
3335 /*     The number of unconverged intervals */
3336     nint = 0;
3337 /*     The last unconverged interval found */
3338     prev = 0;
3339     rgap = wgap[i1 - *offset];
3340     i__1 = *ilast;
3341     for (i__ = i1; i__ <= i__1; ++i__) {
3342 	k = i__ << 1;
3343 	ii = i__ - *offset;
3344 	left = w[ii] - werr[ii];
3345 	right = w[ii] + werr[ii];
3346 	lgap = rgap;
3347 	rgap = wgap[ii];
3348 	gap = std::min(lgap,rgap);
3349 /*        Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
3350 /*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */
3351 
3352 /*        Do while( NEGCNT(LEFT).GT.I-1 ) */
3353 
3354 	back = werr[ii];
3355 L20:
3356 	negcnt = dlaneg_(n, &d__[1], &lld[1], &left, pivmin, &r__);
3357 	if (negcnt > i__ - 1) {
3358 	    left -= back;
3359 	    back *= 2.;
3360 	    goto L20;
3361 	}
3362 
3363 /*        Do while( NEGCNT(RIGHT).LT.I ) */
3364 /*        Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */
3365 
3366 	back = werr[ii];
3367 L50:
3368 	negcnt = dlaneg_(n, &d__[1], &lld[1], &right, pivmin, &r__);
3369 	if (negcnt < i__) {
3370 	    right += back;
3371 	    back *= 2.;
3372 	    goto L50;
3373 	}
3374 	width = (d__1 = left - right, abs(d__1)) * .5;
3375 /* Computing MAX */
3376 	d__1 = abs(left), d__2 = abs(right);
3377 	tmp = std::max(d__1,d__2);
3378 /* Computing MAX */
3379 	d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
3380 	cvrgd = std::max(d__1,d__2);
3381 	if (width <= cvrgd || width <= mnwdth) {
3382 /*           This interval has already converged and does not need refinement. */
3383 /*           (Note that the gaps might change through refining the */
3384 /*            eigenvalues, however, they can only get bigger.) */
3385 /*           Remove it from the list. */
3386 	    iwork[k - 1] = -1;
3387 /*           Make sure that I1 always points to the first unconverged interval */
3388 	    if (i__ == i1 && i__ < *ilast) {
3389 		i1 = i__ + 1;
3390 	    }
3391 	    if (prev >= i1 && i__ <= *ilast) {
3392 		iwork[(prev << 1) - 1] = i__ + 1;
3393 	    }
3394 	} else {
3395 /*           unconverged interval found */
3396 	    prev = i__;
3397 	    ++nint;
3398 	    iwork[k - 1] = i__ + 1;
3399 	    iwork[k] = negcnt;
3400 	}
3401 	work[k - 1] = left;
3402 	work[k] = right;
3403 /* L75: */
3404     }
3405 
3406 /*     Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
3407 /*     and while (ITER.LT.MAXITR) */
3408 
3409     iter = 0;
3410 L80:
3411     prev = i1 - 1;
3412     i__ = i1;
3413     olnint = nint;
3414     i__1 = olnint;
3415     for (ip = 1; ip <= i__1; ++ip) {
3416 	k = i__ << 1;
3417 	ii = i__ - *offset;
3418 	rgap = wgap[ii];
3419 	lgap = rgap;
3420 	if (ii > 1) {
3421 	    lgap = wgap[ii - 1];
3422 	}
3423 	gap = std::min(lgap,rgap);
3424 	next = iwork[k - 1];
3425 	left = work[k - 1];
3426 	right = work[k];
3427 	mid = (left + right) * .5;
3428 /*        semiwidth of interval */
3429 	width = right - mid;
3430 /* Computing MAX */
3431 	d__1 = abs(left), d__2 = abs(right);
3432 	tmp = std::max(d__1,d__2);
3433 /* Computing MAX */
3434 	d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp;
3435 	cvrgd = std::max(d__1,d__2);
3436 	if (width <= cvrgd || width <= mnwdth || iter == maxitr) {
3437 /*           reduce number of unconverged intervals */
3438 	    --nint;
3439 /*           Mark interval as converged. */
3440 	    iwork[k - 1] = 0;
3441 	    if (i1 == i__) {
3442 		i1 = next;
3443 	    } else {
3444 /*              Prev holds the last unconverged interval previously examined */
3445 		if (prev >= i1) {
3446 		    iwork[(prev << 1) - 1] = next;
3447 		}
3448 	    }
3449 	    i__ = next;
3450 	    goto L100;
3451 	}
3452 	prev = i__;
3453 
3454 /*        Perform one bisection step */
3455 
3456 	negcnt = dlaneg_(n, &d__[1], &lld[1], &mid, pivmin, &r__);
3457 	if (negcnt <= i__ - 1) {
3458 	    work[k - 1] = mid;
3459 	} else {
3460 	    work[k] = mid;
3461 	}
3462 	i__ = next;
3463 L100:
3464 	;
3465     }
3466     ++iter;
3467 /*     do another loop if there are still unconverged intervals */
3468 /*     However, in the last iteration, all intervals are accepted */
3469 /*     since this is the best we can do. */
3470     if (nint > 0 && iter <= maxitr) {
3471 	goto L80;
3472     }
3473 
3474 
3475 /*     At this point, all the intervals have converged */
3476     i__1 = *ilast;
3477     for (i__ = *ifirst; i__ <= i__1; ++i__) {
3478 	k = i__ << 1;
3479 	ii = i__ - *offset;
3480 /*        All intervals marked by '0' have been refined. */
3481 	if (iwork[k - 1] == 0) {
3482 	    w[ii] = (work[k - 1] + work[k]) * .5;
3483 	    werr[ii] = work[k] - w[ii];
3484 	}
3485 /* L110: */
3486     }
3487 
3488     i__1 = *ilast;
3489     for (i__ = *ifirst + 1; i__ <= i__1; ++i__) {
3490 	k = i__ << 1;
3491 	ii = i__ - *offset;
3492 /* Computing MAX */
3493 	d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1];
3494 	wgap[ii - 1] = std::max(d__1,d__2);
3495 /* L111: */
3496     }
3497     return 0;
3498 
3499 /*     End of DLARRB */
3500 
3501 } /* dlarrb_ */
3502 
dlarrc_(const char * jobt,integer * n,double * vl,double * vu,double * d__,double * e,double * pivmin,integer * eigcnt,integer * lcnt,integer * rcnt,integer * info)3503 /* Subroutine */ int dlarrc_(const char *jobt, integer *n, double *vl,
3504 	double *vu, double *d__, double *e, double *pivmin,
3505 	integer *eigcnt, integer *lcnt, integer *rcnt, integer *info)
3506 {
3507     /* System generated locals */
3508     integer i__1;
3509     double d__1;
3510 
3511     /* Local variables */
3512     integer i__;
3513     double sl, su, tmp, tmp2;
3514     bool matt;
3515 
3516     double lpivot, rpivot;
3517 
3518 
3519 /*  -- LAPACK auxiliary routine (version 3.1) -- */
3520 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
3521 /*     November 2006 */
3522 
3523 /*     .. Scalar Arguments .. */
3524 /*     .. */
3525 /*     .. Array Arguments .. */
3526 /*     .. */
3527 
3528 /*  Purpose */
3529 /*  ======= */
3530 
3531 /*  Find the number of eigenvalues of the symmetric tridiagonal matrix T */
3532 /*  that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T */
3533 /*  if JOBT = 'L'. */
3534 
3535 /*  Arguments */
3536 /*  ========= */
3537 
3538 /*  JOBT    (input) CHARACTER*1 */
3539 /*          = 'T':  Compute Sturm count for matrix T. */
3540 /*          = 'L':  Compute Sturm count for matrix L D L^T. */
3541 
3542 /*  N       (input) INTEGER */
3543 /*          The order of the matrix. N > 0. */
3544 
3545 /*  VL      (input) DOUBLE PRECISION */
3546 /*  VU      (input) DOUBLE PRECISION */
3547 /*          The lower and upper bounds for the eigenvalues. */
3548 
3549 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
3550 /*          JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. */
3551 /*          JOBT = 'L': The N diagonal elements of the diagonal matrix D. */
3552 
3553 /*  E       (input) DOUBLE PRECISION array, dimension (N) */
3554 /*          JOBT = 'T': The N-1 offdiagonal elements of the matrix T. */
3555 /*          JOBT = 'L': The N-1 offdiagonal elements of the matrix L. */
3556 
3557 /*  PIVMIN  (input) DOUBLE PRECISION */
3558 /*          The minimum pivot in the Sturm sequence for T. */
3559 
3560 /*  EIGCNT  (output) INTEGER */
3561 /*          The number of eigenvalues of the symmetric tridiagonal matrix T */
3562 /*          that are in the interval (VL,VU] */
3563 
3564 /*  LCNT    (output) INTEGER */
3565 /*  RCNT    (output) INTEGER */
3566 /*          The left and right negcounts of the interval. */
3567 
3568 /*  INFO    (output) INTEGER */
3569 
3570 /*  Further Details */
3571 /*  =============== */
3572 
3573 /*  Based on contributions by */
3574 /*     Beresford Parlett, University of California, Berkeley, USA */
3575 /*     Jim Demmel, University of California, Berkeley, USA */
3576 /*     Inderjit Dhillon, University of Texas, Austin, USA */
3577 /*     Osni Marques, LBNL/NERSC, USA */
3578 /*     Christof Voemel, University of California, Berkeley, USA */
3579 
3580 /*  ===================================================================== */
3581 
3582 /*     .. Parameters .. */
3583 /*     .. */
3584 /*     .. Local Scalars .. */
3585 /*     .. */
3586 /*     .. External Functions .. */
3587 /*     .. */
3588 /*     .. Executable Statements .. */
3589 
3590     /* Parameter adjustments */
3591     --e;
3592     --d__;
3593 
3594     /* Function Body */
3595     *info = 0;
3596     *lcnt = 0;
3597     *rcnt = 0;
3598     *eigcnt = 0;
3599     matt = lsame_(jobt, "T");
3600     if (matt) {
3601 /*        Sturm sequence count on T */
3602 	lpivot = d__[1] - *vl;
3603 	rpivot = d__[1] - *vu;
3604 	if (lpivot <= 0.) {
3605 	    ++(*lcnt);
3606 	}
3607 	if (rpivot <= 0.) {
3608 	    ++(*rcnt);
3609 	}
3610 	i__1 = *n - 1;
3611 	for (i__ = 1; i__ <= i__1; ++i__) {
3612 /* Computing 2nd power */
3613 	    d__1 = e[i__];
3614 	    tmp = d__1 * d__1;
3615 	    lpivot = d__[i__ + 1] - *vl - tmp / lpivot;
3616 	    rpivot = d__[i__ + 1] - *vu - tmp / rpivot;
3617 	    if (lpivot <= 0.) {
3618 		++(*lcnt);
3619 	    }
3620 	    if (rpivot <= 0.) {
3621 		++(*rcnt);
3622 	    }
3623 /* L10: */
3624 	}
3625     } else {
3626 /*        Sturm sequence count on L D L^T */
3627 	sl = -(*vl);
3628 	su = -(*vu);
3629 	i__1 = *n - 1;
3630 	for (i__ = 1; i__ <= i__1; ++i__) {
3631 	    lpivot = d__[i__] + sl;
3632 	    rpivot = d__[i__] + su;
3633 	    if (lpivot <= 0.) {
3634 		++(*lcnt);
3635 	    }
3636 	    if (rpivot <= 0.) {
3637 		++(*rcnt);
3638 	    }
3639 	    tmp = e[i__] * d__[i__] * e[i__];
3640 
3641 	    tmp2 = tmp / lpivot;
3642 	    if (tmp2 == 0.) {
3643 		sl = tmp - *vl;
3644 	    } else {
3645 		sl = sl * tmp2 - *vl;
3646 	    }
3647 
3648 	    tmp2 = tmp / rpivot;
3649 	    if (tmp2 == 0.) {
3650 		su = tmp - *vu;
3651 	    } else {
3652 		su = su * tmp2 - *vu;
3653 	    }
3654 /* L20: */
3655 	}
3656 	lpivot = d__[*n] + sl;
3657 	rpivot = d__[*n] + su;
3658 	if (lpivot <= 0.) {
3659 	    ++(*lcnt);
3660 	}
3661 	if (rpivot <= 0.) {
3662 	    ++(*rcnt);
3663 	}
3664     }
3665     *eigcnt = *rcnt - *lcnt;
3666     return 0;
3667 
3668 /*     end of DLARRC */
3669 
3670 } /* dlarrc_ */
3671 
dlarrd_(const char * range,const char * order,integer * n,double * vl,double * vu,integer * il,integer * iu,double * gers,double * reltol,double * d__,double * e,double * e2,double * pivmin,integer * nsplit,integer * isplit,integer * m,double * w,double * werr,double * wl,double * wu,integer * iblock,integer * indexw,double * work,integer * iwork,integer * info)3672 /* Subroutine */ int dlarrd_(const char *range, const char *order, integer *n, double *vl, double *vu, integer *il,
3673 	integer *iu, double *gers, double *reltol, double *d__, double *e, double *e2,	double *pivmin,
3674 	integer *nsplit, integer *isplit, integer *m, double *w, double *werr, double *wl, double *wu,
3675 	integer *iblock, integer *indexw, double *work, integer *iwork, integer *info)
3676 {
3677 	/* Table of constant values */
3678 	static integer c__1 = 1;
3679 	static integer c_n1 = -1;
3680 	static integer c__3 = 3;
3681 	static integer c__2 = 2;
3682 	static integer c__0 = 0;
3683 
3684     /* System generated locals */
3685     integer i__1, i__2, i__3;
3686     double d__1, d__2;
3687 
3688     /* Local variables */
3689     integer i__, j, ib, ie, je, nb;
3690     double gl;
3691     integer im, in;
3692     double gu;
3693     integer iw, jee;
3694     double eps;
3695     integer nwl;
3696     double wlu, wul;
3697     integer nwu;
3698     double tmp1, tmp2;
3699     integer iend, jblk, ioff, iout, itmp1, itmp2, jdisc, iinfo;
3700     double atoli;
3701     integer iwoff, itmax;
3702     double wkill, rtoli, uflow, tnorm;
3703     integer ibegin,irange, idiscl, idumma[1], idiscu;
3704     bool ncnvrg, toofew;
3705 
3706 
3707 /*  -- LAPACK auxiliary routine (version 3.2.1)                        -- */
3708 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
3709 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
3710 /*  -- April 2009                                                      -- */
3711 
3712 /*     .. Scalar Arguments .. */
3713 /*     .. */
3714 /*     .. Array Arguments .. */
3715 /*     .. */
3716 
3717 /*  Purpose */
3718 /*  ======= */
3719 
3720 /*  DLARRD computes the eigenvalues of a symmetric tridiagonal */
3721 /*  matrix T to suitable accuracy. This is an auxiliary code to be */
3722 /*  called from DSTEMR. */
3723 /*  The user may ask for all eigenvalues, all eigenvalues */
3724 /*  in the half-open interval (VL, VU], or the IL-th through IU-th */
3725 /*  eigenvalues. */
3726 
3727 /*  To avoid overflow, the matrix must be scaled so that its */
3728 /*  largest element is no greater than overflow**(1/2) * */
3729 /*  underflow**(1/4) in absolute value, and for greatest */
3730 /*  accuracy, it should not be much smaller than that. */
3731 
3732 /*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
3733 /*  Matrix", Report CS41, Computer Science Dept., Stanford */
3734 /*  University, July 21, 1966. */
3735 
3736 /*  Arguments */
3737 /*  ========= */
3738 
3739 /*  RANGE   (input) CHARACTER */
3740 /*          = 'A': ("All")   all eigenvalues will be found. */
3741 /*          = 'V': ("Value") all eigenvalues in the half-open interval */
3742 /*                           (VL, VU] will be found. */
3743 /*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
3744 /*                           entire matrix) will be found. */
3745 
3746 /*  ORDER   (input) CHARACTER */
3747 /*          = 'B': ("By Block") the eigenvalues will be grouped by */
3748 /*                              split-off block (see IBLOCK, ISPLIT) and */
3749 /*                              ordered from smallest to largest within */
3750 /*                              the block. */
3751 /*          = 'E': ("Entire matrix") */
3752 /*                              the eigenvalues for the entire matrix */
3753 /*                              will be ordered from smallest to */
3754 /*                              largest. */
3755 
3756 /*  N       (input) INTEGER */
3757 /*          The order of the tridiagonal matrix T.  N >= 0. */
3758 
3759 /*  VL      (input) DOUBLE PRECISION */
3760 /*  VU      (input) DOUBLE PRECISION */
3761 /*          If RANGE='V', the lower and upper bounds of the interval to */
3762 /*          be searched for eigenvalues.  Eigenvalues less than or equal */
3763 /*          to VL, or greater than VU, will not be returned.  VL < VU. */
3764 /*          Not referenced if RANGE = 'A' or 'I'. */
3765 
3766 /*  IL      (input) INTEGER */
3767 /*  IU      (input) INTEGER */
3768 /*          If RANGE='I', the indices (in ascending order) of the */
3769 /*          smallest and largest eigenvalues to be returned. */
3770 /*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
3771 /*          Not referenced if RANGE = 'A' or 'V'. */
3772 
3773 /*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) */
3774 /*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
3775 /*          is (GERS(2*i-1), GERS(2*i)). */
3776 
3777 /*  RELTOL  (input) DOUBLE PRECISION */
3778 /*          The minimum relative width of an interval.  When an interval */
3779 /*          is narrower than RELTOL times the larger (in */
3780 /*          magnitude) endpoint, then it is considered to be */
3781 /*          sufficiently small, i.e., converged.  Note: this should */
3782 /*          always be at least radix*machine epsilon. */
3783 
3784 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
3785 /*          The n diagonal elements of the tridiagonal matrix T. */
3786 
3787 /*  E       (input) DOUBLE PRECISION array, dimension (N-1) */
3788 /*          The (n-1) off-diagonal elements of the tridiagonal matrix T. */
3789 
3790 /*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
3791 /*          The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */
3792 
3793 /*  PIVMIN  (input) DOUBLE PRECISION */
3794 /*          The minimum pivot allowed in the Sturm sequence for T. */
3795 
3796 /*  NSPLIT  (input) INTEGER */
3797 /*          The number of diagonal blocks in the matrix T. */
3798 /*          1 <= NSPLIT <= N. */
3799 
3800 /*  ISPLIT  (input) INTEGER array, dimension (N) */
3801 /*          The splitting points, at which T breaks up into submatrices. */
3802 /*          The first submatrix consists of rows/columns 1 to ISPLIT(1), */
3803 /*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
3804 /*          etc., and the NSPLIT-th consists of rows/columns */
3805 /*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
3806 /*          (Only the first NSPLIT elements will actually be used, but */
3807 /*          since the user cannot know a priori what value NSPLIT will */
3808 /*          have, N words must be reserved for ISPLIT.) */
3809 
3810 /*  M       (output) INTEGER */
3811 /*          The actual number of eigenvalues found. 0 <= M <= N. */
3812 /*          (See also the description of INFO=2,3.) */
3813 
3814 /*  W       (output) DOUBLE PRECISION array, dimension (N) */
3815 /*          On exit, the first M elements of W will contain the */
3816 /*          eigenvalue approximations. DLARRD computes an interval */
3817 /*          I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue */
3818 /*          approximation is given as the interval midpoint */
3819 /*          W(j)= ( a_j + b_j)/2. The corresponding error is bounded by */
3820 /*          WERR(j) = abs( a_j - b_j)/2 */
3821 
3822 /*  WERR    (output) DOUBLE PRECISION array, dimension (N) */
3823 /*          The error bound on the corresponding eigenvalue approximation */
3824 /*          in W. */
3825 
3826 /*  WL      (output) DOUBLE PRECISION */
3827 /*  WU      (output) DOUBLE PRECISION */
3828 /*          The interval (WL, WU] contains all the wanted eigenvalues. */
3829 /*          If RANGE='V', then WL=VL and WU=VU. */
3830 /*          If RANGE='A', then WL and WU are the global Gerschgorin bounds */
3831 /*                        on the spectrum. */
3832 /*          If RANGE='I', then WL and WU are computed by DLAEBZ from the */
3833 /*                        index range specified. */
3834 
3835 /*  IBLOCK  (output) INTEGER array, dimension (N) */
3836 /*          At each row/column j where E(j) is zero or small, the */
3837 /*          matrix T is considered to split into a block diagonal */
3838 /*          matrix.  On exit, if INFO = 0, IBLOCK(i) specifies to which */
3839 /*          block (from 1 to the number of blocks) the eigenvalue W(i) */
3840 /*          belongs.  (DLARRD may use the remaining N-M elements as */
3841 /*          workspace.) */
3842 
3843 /*  INDEXW  (output) INTEGER array, dimension (N) */
3844 /*          The indices of the eigenvalues within each block (submatrix); */
3845 /*          for example, INDEXW(i)= j and IBLOCK(i)=k imply that the */
3846 /*          i-th eigenvalue W(i) is the j-th eigenvalue in block k. */
3847 
3848 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N) */
3849 
3850 /*  IWORK   (workspace) INTEGER array, dimension (3*N) */
3851 
3852 /*  INFO    (output) INTEGER */
3853 /*          = 0:  successful exit */
3854 /*          < 0:  if INFO = -i, the i-th argument had an illegal value */
3855 /*          > 0:  some or all of the eigenvalues failed to converge or */
3856 /*                were not computed: */
3857 /*                =1 or 3: Bisection failed to converge for some */
3858 /*                        eigenvalues; these eigenvalues are flagged by a */
3859 /*                        negative block number.  The effect is that the */
3860 /*                        eigenvalues may not be as accurate as the */
3861 /*                        absolute and relative tolerances.  This is */
3862 /*                        generally caused by unexpectedly inaccurate */
3863 /*                        arithmetic. */
3864 /*                =2 or 3: RANGE='I' only: Not all of the eigenvalues */
3865 /*                        IL:IU were found. */
3866 /*                        Effect: M < IU+1-IL */
3867 /*                        Cause:  non-monotonic arithmetic, causing the */
3868 /*                                Sturm sequence to be non-monotonic. */
3869 /*                        Cure:   recalculate, using RANGE='A', and pick */
3870 /*                                out eigenvalues IL:IU.  In some cases, */
3871 /*                                increasing the PARAMETER "FUDGE" may */
3872 /*                                make things work. */
3873 /*                = 4:    RANGE='I', and the Gershgorin interval */
3874 /*                        initially used was too small.  No eigenvalues */
3875 /*                        were computed. */
3876 /*                        Probable cause: your machine has sloppy */
3877 /*                                        floating-point arithmetic. */
3878 /*                        Cure: Increase the PARAMETER "FUDGE", */
3879 /*                              recompile, and try again. */
3880 
3881 /*  Internal Parameters */
3882 /*  =================== */
3883 
3884 /*  FUDGE   DOUBLE PRECISION, default = 2 */
3885 /*          A "fudge factor" to widen the Gershgorin intervals.  Ideally, */
3886 /*          a value of 1 should work, but on machines with sloppy */
3887 /*          arithmetic, this needs to be larger.  The default for */
3888 /*          publicly released versions should be large enough to handle */
3889 /*          the worst machine around.  Note that this has no effect */
3890 /*          on accuracy of the solution. */
3891 
3892 /*  Based on contributions by */
3893 /*     W. Kahan, University of California, Berkeley, USA */
3894 /*     Beresford Parlett, University of California, Berkeley, USA */
3895 /*     Jim Demmel, University of California, Berkeley, USA */
3896 /*     Inderjit Dhillon, University of Texas, Austin, USA */
3897 /*     Osni Marques, LBNL/NERSC, USA */
3898 /*     Christof Voemel, University of California, Berkeley, USA */
3899 
3900 /*  ===================================================================== */
3901 
3902 /*     .. Parameters .. */
3903 /*     .. */
3904 /*     .. Local Scalars .. */
3905 /*     .. */
3906 /*     .. Local Arrays .. */
3907 /*     .. */
3908 /*     .. External Functions .. */
3909 /*     .. */
3910 /*     .. External Subroutines .. */
3911 /*     .. */
3912 /*     .. Intrinsic Functions .. */
3913 /*     .. */
3914 /*     .. Executable Statements .. */
3915 
3916     /* Parameter adjustments */
3917     --iwork;
3918     --work;
3919     --indexw;
3920     --iblock;
3921     --werr;
3922     --w;
3923     --isplit;
3924     --e2;
3925     --e;
3926     --d__;
3927     --gers;
3928 
3929     /* Function Body */
3930     *info = 0;
3931 
3932 /*     Decode RANGE */
3933 
3934     if (lsame_(range, "A")) {
3935 	irange = 1;
3936     } else if (lsame_(range, "V")) {
3937 	irange = 2;
3938     } else if (lsame_(range, "I")) {
3939 	irange = 3;
3940     } else {
3941 	irange = 0;
3942     }
3943 
3944 /*     Check for Errors */
3945 
3946     if (irange <= 0) {
3947 	*info = -1;
3948     } else if (! (lsame_(order, "B") || lsame_(order,
3949 	    "E"))) {
3950 	*info = -2;
3951     } else if (*n < 0) {
3952 	*info = -3;
3953     } else if (irange == 2) {
3954 	if (*vl >= *vu) {
3955 	    *info = -5;
3956 	}
3957     } else if (irange == 3 && (*il < 1 || *il > std::max(1_integer,*n))) {
3958 	*info = -6;
3959     } else if (irange == 3 && (*iu < std::min(*n,*il) || *iu > *n)) {
3960 	*info = -7;
3961     }
3962 
3963     if (*info != 0) {
3964 	return 0;
3965     }
3966 /*     Initialize error flags */
3967     *info = 0;
3968     ncnvrg = false;
3969     toofew = false;
3970 /*     Quick return if possible */
3971     *m = 0;
3972     if (*n == 0) {
3973 	return 0;
3974     }
3975 /*     Simplification: */
3976     if (irange == 3 && *il == 1 && *iu == *n) {
3977 	irange = 1;
3978     }
3979 /*     Get machine constants */
3980     eps = dlamch_("P");
3981     uflow = dlamch_("U");
3982 /*     Special Case when N=1 */
3983 /*     Treat case of 1x1 matrix for quick return */
3984     if (*n == 1) {
3985 	if (irange == 1 || irange == 2 && d__[1] > *vl && d__[1] <= *vu ||
3986 		irange == 3 && *il == 1 && *iu == 1) {
3987 	    *m = 1;
3988 	    w[1] = d__[1];
3989 /*           The computation error of the eigenvalue is zero */
3990 	    werr[1] = 0.;
3991 	    iblock[1] = 1;
3992 	    indexw[1] = 1;
3993 	}
3994 	return 0;
3995     }
3996 /*     NB is the minimum vector length for vector bisection, or 0 */
3997 /*     if only scalar is to be done. */
3998     nb = ilaenv_(&c__1, "DSTEBZ", " ", n, &c_n1, &c_n1, &c_n1);
3999     if (nb <= 1) {
4000 	nb = 0;
4001     }
4002 /*     Find global spectral radius */
4003     gl = d__[1];
4004     gu = d__[1];
4005     i__1 = *n;
4006     for (i__ = 1; i__ <= i__1; ++i__) {
4007 /* Computing MIN */
4008 	d__1 = gl, d__2 = gers[(i__ << 1) - 1];
4009 	gl = std::min(d__1,d__2);
4010 /* Computing MAX */
4011 	d__1 = gu, d__2 = gers[i__ * 2];
4012 	gu = std::max(d__1,d__2);
4013 /* L5: */
4014     }
4015 /*     Compute global Gerschgorin bounds and spectral diameter */
4016 /* Computing MAX */
4017     d__1 = abs(gl), d__2 = abs(gu);
4018     tnorm = std::max(d__1,d__2);
4019     gl = gl - tnorm * 2. * eps * *n - *pivmin * 4.;
4020     gu = gu + tnorm * 2. * eps * *n + *pivmin * 4.;
4021 /*     [JAN/28/2009] remove the line below since SPDIAM variable not use */
4022 /*     SPDIAM = GU - GL */
4023 /*     Input arguments for DLAEBZ: */
4024 /*     The relative tolerance.  An interval (a,b] lies within */
4025 /*     "relative tolerance" if  b-a < RELTOL*max(|a|,|b|), */
4026     rtoli = *reltol;
4027 /*     Set the absolute tolerance for interval convergence to zero to force */
4028 /*     interval convergence based on relative size of the interval. */
4029 /*     This is dangerous because intervals might not converge when RELTOL is */
4030 /*     small. But at least a very small number should be selected so that for */
4031 /*     strongly graded matrices, the code can get relatively accurate */
4032 /*     eigenvalues. */
4033     atoli = uflow * 4. + *pivmin * 4.;
4034     if (irange == 3) {
4035 /*        RANGE='I': Compute an interval containing eigenvalues */
4036 /*        IL through IU. The initial interval [GL,GU] from the global */
4037 /*        Gerschgorin bounds GL and GU is refined by DLAEBZ. */
4038 	itmax = (integer) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) +
4039 		2;
4040 	work[*n + 1] = gl;
4041 	work[*n + 2] = gl;
4042 	work[*n + 3] = gu;
4043 	work[*n + 4] = gu;
4044 	work[*n + 5] = gl;
4045 	work[*n + 6] = gu;
4046 	iwork[1] = -1;
4047 	iwork[2] = -1;
4048 	iwork[3] = *n + 1;
4049 	iwork[4] = *n + 1;
4050 	iwork[5] = *il - 1;
4051 	iwork[6] = *iu;
4052 
4053 	dlaebz_(&c__3, &itmax, n, &c__2, &c__2, &nb, &atoli, &rtoli, pivmin, &
4054 		d__[1], &e[1], &e2[1], &iwork[5], &work[*n + 1], &work[*n + 5]
4055 , &iout, &iwork[1], &w[1], &iblock[1], &iinfo);
4056 	if (iinfo != 0) {
4057 	    *info = iinfo;
4058 	    return 0;
4059 	}
4060 /*        On exit, output intervals may not be ordered by ascending negcount */
4061 	if (iwork[6] == *iu) {
4062 	    *wl = work[*n + 1];
4063 	    wlu = work[*n + 3];
4064 	    nwl = iwork[1];
4065 	    *wu = work[*n + 4];
4066 	    wul = work[*n + 2];
4067 	    nwu = iwork[4];
4068 	} else {
4069 	    *wl = work[*n + 2];
4070 	    wlu = work[*n + 4];
4071 	    nwl = iwork[2];
4072 	    *wu = work[*n + 3];
4073 	    wul = work[*n + 1];
4074 	    nwu = iwork[3];
4075 	}
4076 /*        On exit, the interval [WL, WLU] contains a value with negcount NWL, */
4077 /*        and [WUL, WU] contains a value with negcount NWU. */
4078 	if (nwl < 0 || nwl >= *n || nwu < 1 || nwu > *n) {
4079 	    *info = 4;
4080 	    return 0;
4081 	}
4082     } else if (irange == 2) {
4083 	*wl = *vl;
4084 	*wu = *vu;
4085     } else if (irange == 1) {
4086 	*wl = gl;
4087 	*wu = gu;
4088     }
4089 /*     Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. */
4090 /*     NWL accumulates the number of eigenvalues .le. WL, */
4091 /*     NWU accumulates the number of eigenvalues .le. WU */
4092     *m = 0;
4093     iend = 0;
4094     *info = 0;
4095     nwl = 0;
4096     nwu = 0;
4097 
4098     i__1 = *nsplit;
4099     for (jblk = 1; jblk <= i__1; ++jblk) {
4100 	ioff = iend;
4101 	ibegin = ioff + 1;
4102 	iend = isplit[jblk];
4103 	in = iend - ioff;
4104 
4105 	if (in == 1) {
4106 /*           1x1 block */
4107 	    if (*wl >= d__[ibegin] - *pivmin) {
4108 		++nwl;
4109 	    }
4110 	    if (*wu >= d__[ibegin] - *pivmin) {
4111 		++nwu;
4112 	    }
4113 	    if (irange == 1 || *wl < d__[ibegin] - *pivmin && *wu >= d__[
4114 		    ibegin] - *pivmin) {
4115 		++(*m);
4116 		w[*m] = d__[ibegin];
4117 		werr[*m] = 0.;
4118 /*              The gap for a single block doesn't matter for the later */
4119 /*              algorithm and is assigned an arbitrary large value */
4120 		iblock[*m] = jblk;
4121 		indexw[*m] = 1;
4122 	    }
4123 /*        Disabled 2x2 case because of a failure on the following matrix */
4124 /*        RANGE = 'I', IL = IU = 4 */
4125 /*          Original Tridiagonal, d = [ */
4126 /*           -0.150102010615740E+00 */
4127 /*           -0.849897989384260E+00 */
4128 /*           -0.128208148052635E-15 */
4129 /*            0.128257718286320E-15 */
4130 /*          ]; */
4131 /*          e = [ */
4132 /*           -0.357171383266986E+00 */
4133 /*           -0.180411241501588E-15 */
4134 /*           -0.175152352710251E-15 */
4135 /*          ]; */
4136 
4137 /*         ELSE IF( IN.EQ.2 ) THEN */
4138 /* *           2x2 block */
4139 /*            DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) */
4140 /*            TMP1 = HALF*(D(IBEGIN)+D(IEND)) */
4141 /*            L1 = TMP1 - DISC */
4142 /*            IF( WL.GE. L1-PIVMIN ) */
4143 /*     $         NWL = NWL + 1 */
4144 /*            IF( WU.GE. L1-PIVMIN ) */
4145 /*     $         NWU = NWU + 1 */
4146 /*            IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. */
4147 /*     $          L1-PIVMIN ) ) THEN */
4148 /*               M = M + 1 */
4149 /*               W( M ) = L1 */
4150 /* *              The uncertainty of eigenvalues of a 2x2 matrix is very small */
4151 /*               WERR( M ) = EPS * ABS( W( M ) ) * TWO */
4152 /*               IBLOCK( M ) = JBLK */
4153 /*               INDEXW( M ) = 1 */
4154 /*            ENDIF */
4155 /*            L2 = TMP1 + DISC */
4156 /*            IF( WL.GE. L2-PIVMIN ) */
4157 /*     $         NWL = NWL + 1 */
4158 /*            IF( WU.GE. L2-PIVMIN ) */
4159 /*     $         NWU = NWU + 1 */
4160 /*            IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. */
4161 /*     $          L2-PIVMIN ) ) THEN */
4162 /*               M = M + 1 */
4163 /*               W( M ) = L2 */
4164 /* *              The uncertainty of eigenvalues of a 2x2 matrix is very small */
4165 /*               WERR( M ) = EPS * ABS( W( M ) ) * TWO */
4166 /*               IBLOCK( M ) = JBLK */
4167 /*               INDEXW( M ) = 2 */
4168 /*            ENDIF */
4169 	} else {
4170 /*           General Case - block of size IN >= 2 */
4171 /*           Compute local Gerschgorin interval and use it as the initial */
4172 /*           interval for DLAEBZ */
4173 	    gu = d__[ibegin];
4174 	    gl = d__[ibegin];
4175 	    tmp1 = 0.;
4176 	    i__2 = iend;
4177 	    for (j = ibegin; j <= i__2; ++j) {
4178 /* Computing MIN */
4179 		d__1 = gl, d__2 = gers[(j << 1) - 1];
4180 		gl = std::min(d__1,d__2);
4181 /* Computing MAX */
4182 		d__1 = gu, d__2 = gers[j * 2];
4183 		gu = std::max(d__1,d__2);
4184 /* L40: */
4185 	    }
4186 /*           [JAN/28/2009] */
4187 /*           change SPDIAM by TNORM in lines 2 and 3 thereafter */
4188 /*           line 1: remove computation of SPDIAM (not useful anymore) */
4189 /*           SPDIAM = GU - GL */
4190 /*           GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN */
4191 /*           GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN */
4192 	    gl = gl - tnorm * 2. * eps * in - *pivmin * 2.;
4193 	    gu = gu + tnorm * 2. * eps * in + *pivmin * 2.;
4194 
4195 	    if (irange > 1) {
4196 		if (gu < *wl) {
4197 /*                 the local block contains none of the wanted eigenvalues */
4198 		    nwl += in;
4199 		    nwu += in;
4200 		    goto L70;
4201 		}
4202 /*              refine search interval if possible, only range (WL,WU] matters */
4203 		gl = std::max(gl,*wl);
4204 		gu = std::min(gu,*wu);
4205 		if (gl >= gu) {
4206 		    goto L70;
4207 		}
4208 	    }
4209 /*           Find negcount of initial interval boundaries GL and GU */
4210 	    work[*n + 1] = gl;
4211 	    work[*n + in + 1] = gu;
4212 	    dlaebz_(&c__1, &c__0, &in, &in, &c__1, &nb, &atoli, &rtoli,
4213 		    pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, &
4214 		    work[*n + 1], &work[*n + (in << 1) + 1], &im, &iwork[1], &
4215 		    w[*m + 1], &iblock[*m + 1], &iinfo);
4216 	    if (iinfo != 0) {
4217 		*info = iinfo;
4218 		return 0;
4219 	    }
4220 
4221 	    nwl += iwork[1];
4222 	    nwu += iwork[in + 1];
4223 	    iwoff = *m - iwork[1];
4224 /*           Compute Eigenvalues */
4225 	    itmax = (integer) ((log(gu - gl + *pivmin) - log(*pivmin)) / log(
4226 		    2.)) + 2;
4227 	    dlaebz_(&c__2, &itmax, &in, &in, &c__1, &nb, &atoli, &rtoli,
4228 		    pivmin, &d__[ibegin], &e[ibegin], &e2[ibegin], idumma, &
4229 		    work[*n + 1], &work[*n + (in << 1) + 1], &iout, &iwork[1],
4230 		     &w[*m + 1], &iblock[*m + 1], &iinfo);
4231 	    if (iinfo != 0) {
4232 		*info = iinfo;
4233 		return 0;
4234 	    }
4235 
4236 /*           Copy eigenvalues into W and IBLOCK */
4237 /*           Use -JBLK for block number for unconverged eigenvalues. */
4238 /*           Loop over the number of output intervals from DLAEBZ */
4239 	    i__2 = iout;
4240 	    for (j = 1; j <= i__2; ++j) {
4241 /*              eigenvalue approximation is middle point of interval */
4242 		tmp1 = (work[j + *n] + work[j + in + *n]) * .5;
4243 /*              semi length of error interval */
4244 		tmp2 = (d__1 = work[j + *n] - work[j + in + *n], abs(d__1)) *
4245 			.5;
4246 		if (j > iout - iinfo) {
4247 /*                 Flag non-convergence. */
4248 		    ncnvrg = true;
4249 		    ib = -jblk;
4250 		} else {
4251 		    ib = jblk;
4252 		}
4253 		i__3 = iwork[j + in] + iwoff;
4254 		for (je = iwork[j] + 1 + iwoff; je <= i__3; ++je) {
4255 		    w[je] = tmp1;
4256 		    werr[je] = tmp2;
4257 		    indexw[je] = je - iwoff;
4258 		    iblock[je] = ib;
4259 /* L50: */
4260 		}
4261 /* L60: */
4262 	    }
4263 
4264 	    *m += im;
4265 	}
4266 L70:
4267 	;
4268     }
4269 /*     If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU */
4270 /*     If NWL+1 < IL or NWU > IU, discard extra eigenvalues. */
4271     if (irange == 3) {
4272 	idiscl = *il - 1 - nwl;
4273 	idiscu = nwu - *iu;
4274 
4275 	if (idiscl > 0) {
4276 	    im = 0;
4277 	    i__1 = *m;
4278 	    for (je = 1; je <= i__1; ++je) {
4279 /*              Remove some of the smallest eigenvalues from the left so that */
4280 /*              at the end IDISCL =0. Move all eigenvalues up to the left. */
4281 		if (w[je] <= wlu && idiscl > 0) {
4282 		    --idiscl;
4283 		} else {
4284 		    ++im;
4285 		    w[im] = w[je];
4286 		    werr[im] = werr[je];
4287 		    indexw[im] = indexw[je];
4288 		    iblock[im] = iblock[je];
4289 		}
4290 /* L80: */
4291 	    }
4292 	    *m = im;
4293 	}
4294 	if (idiscu > 0) {
4295 /*           Remove some of the largest eigenvalues from the right so that */
4296 /*           at the end IDISCU =0. Move all eigenvalues up to the left. */
4297 	    im = *m + 1;
4298 	    for (je = *m; je >= 1; --je) {
4299 		if (w[je] >= wul && idiscu > 0) {
4300 		    --idiscu;
4301 		} else {
4302 		    --im;
4303 		    w[im] = w[je];
4304 		    werr[im] = werr[je];
4305 		    indexw[im] = indexw[je];
4306 		    iblock[im] = iblock[je];
4307 		}
4308 /* L81: */
4309 	    }
4310 	    jee = 0;
4311 	    i__1 = *m;
4312 	    for (je = im; je <= i__1; ++je) {
4313 		++jee;
4314 		w[jee] = w[je];
4315 		werr[jee] = werr[je];
4316 		indexw[jee] = indexw[je];
4317 		iblock[jee] = iblock[je];
4318 /* L82: */
4319 	    }
4320 	    *m = *m - im + 1;
4321 	}
4322 	if (idiscl > 0 || idiscu > 0) {
4323 /*           Code to deal with effects of bad arithmetic. (If N(w) is */
4324 /*           monotone non-decreasing, this should never happen.) */
4325 /*           Some low eigenvalues to be discarded are not in (WL,WLU], */
4326 /*           or high eigenvalues to be discarded are not in (WUL,WU] */
4327 /*           so just kill off the smallest IDISCL/largest IDISCU */
4328 /*           eigenvalues, by marking the corresponding IBLOCK = 0 */
4329 	    if (idiscl > 0) {
4330 		wkill = *wu;
4331 		i__1 = idiscl;
4332 		for (jdisc = 1; jdisc <= i__1; ++jdisc) {
4333 		    iw = 0;
4334 		    i__2 = *m;
4335 		    for (je = 1; je <= i__2; ++je) {
4336 			if (iblock[je] != 0 && (w[je] < wkill || iw == 0)) {
4337 			    iw = je;
4338 			    wkill = w[je];
4339 			}
4340 /* L90: */
4341 		    }
4342 		    iblock[iw] = 0;
4343 /* L100: */
4344 		}
4345 	    }
4346 	    if (idiscu > 0) {
4347 		wkill = *wl;
4348 		i__1 = idiscu;
4349 		for (jdisc = 1; jdisc <= i__1; ++jdisc) {
4350 		    iw = 0;
4351 		    i__2 = *m;
4352 		    for (je = 1; je <= i__2; ++je) {
4353 			if (iblock[je] != 0 && (w[je] >= wkill || iw == 0)) {
4354 			    iw = je;
4355 			    wkill = w[je];
4356 			}
4357 /* L110: */
4358 		    }
4359 		    iblock[iw] = 0;
4360 /* L120: */
4361 		}
4362 	    }
4363 /*           Now erase all eigenvalues with IBLOCK set to zero */
4364 	    im = 0;
4365 	    i__1 = *m;
4366 	    for (je = 1; je <= i__1; ++je) {
4367 		if (iblock[je] != 0) {
4368 		    ++im;
4369 		    w[im] = w[je];
4370 		    werr[im] = werr[je];
4371 		    indexw[im] = indexw[je];
4372 		    iblock[im] = iblock[je];
4373 		}
4374 /* L130: */
4375 	    }
4376 	    *m = im;
4377 	}
4378 	if (idiscl < 0 || idiscu < 0) {
4379 	    toofew = true;
4380 	}
4381     }
4382 
4383     if (irange == 1 && *m != *n || irange == 3 && *m != *iu - *il + 1) {
4384 	toofew = true;
4385     }
4386 /*     If ORDER='B', do nothing the eigenvalues are already sorted by */
4387 /*        block. */
4388 /*     If ORDER='E', sort the eigenvalues from smallest to largest */
4389     if (lsame_(order, "E") && *nsplit > 1) {
4390 	i__1 = *m - 1;
4391 	for (je = 1; je <= i__1; ++je) {
4392 	    ie = 0;
4393 	    tmp1 = w[je];
4394 	    i__2 = *m;
4395 	    for (j = je + 1; j <= i__2; ++j) {
4396 		if (w[j] < tmp1) {
4397 		    ie = j;
4398 		    tmp1 = w[j];
4399 		}
4400 /* L140: */
4401 	    }
4402 	    if (ie != 0) {
4403 		tmp2 = werr[ie];
4404 		itmp1 = iblock[ie];
4405 		itmp2 = indexw[ie];
4406 		w[ie] = w[je];
4407 		werr[ie] = werr[je];
4408 		iblock[ie] = iblock[je];
4409 		indexw[ie] = indexw[je];
4410 		w[je] = tmp1;
4411 		werr[je] = tmp2;
4412 		iblock[je] = itmp1;
4413 		indexw[je] = itmp2;
4414 	    }
4415 /* L150: */
4416 	}
4417     }
4418 
4419     *info = 0;
4420     if (ncnvrg) {
4421 	++(*info);
4422     }
4423     if (toofew) {
4424 	*info += 2;
4425     }
4426     return 0;
4427 
4428 /*     End of DLARRD */
4429 
4430 } /* dlarrd_ */
4431 
dlarre_(const char * range,integer * n,double * vl,double * vu,integer * il,integer * iu,double * d__,double * e,double * e2,double * rtol1,double * rtol2,double * spltol,integer * nsplit,integer * isplit,integer * m,double * w,double * werr,double * wgap,integer * iblock,integer * indexw,double * gers,double * pivmin,double * work,integer * iwork,integer * info)4432 /* Subroutine */ int dlarre_(const char *range, integer *n, double *vl,
4433 	double *vu, integer *il, integer *iu, double *d__, double
4434 	*e, double *e2, double *rtol1, double *rtol2, double *
4435 	spltol, integer *nsplit, integer *isplit, integer *m, double *w,
4436 	double *werr, double *wgap, integer *iblock, integer *indexw,
4437 	double *gers, double *pivmin, double *work, integer *
4438 	iwork, integer *info)
4439 {
4440 	/* Table of constant values */
4441 	static integer c__1 = 1;
4442 	static integer c__2 = 2;
4443 
4444     /* System generated locals */
4445     integer i__1, i__2;
4446     double d__1, d__2, d__3;
4447 
4448     /* Builtin functions
4449     double sqrt(double), log(double); */
4450 
4451     /* Local variables */
4452     integer i__, j;
4453     double s1, s2;
4454     integer mb;
4455     double gl;
4456     integer in, mm;
4457     double gu;
4458     integer cnt;
4459     double eps, tau, tmp, rtl;
4460     integer cnt1, cnt2;
4461     double tmp1, eabs;
4462     integer iend, jblk;
4463     double eold;
4464     integer indl;
4465     double dmax__, emax;
4466     integer wend, idum, indu;
4467     double rtol;
4468     integer iseed[4];
4469     double avgap, sigma;
4470     integer iinfo;
4471     bool norep;
4472     integer ibegin;
4473     bool forceb;
4474     integer irange;
4475     double sgndef;
4476     integer wbegin;
4477     double safmin, spdiam;
4478     bool usedqd;
4479     double clwdth, isleft;
4480     double isrght, bsrtol, dpivot;
4481 
4482 
4483 /*  -- LAPACK auxiliary routine (version 3.1) -- */
4484 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
4485 /*     November 2006 */
4486 
4487 /*     .. Scalar Arguments .. */
4488 /*     .. */
4489 /*     .. Array Arguments .. */
4490 /*     .. */
4491 
4492 /*  Purpose */
4493 /*  ======= */
4494 
4495 /*  To find the desired eigenvalues of a given real symmetric */
4496 /*  tridiagonal matrix T, DLARRE sets any "small" off-diagonal */
4497 /*  elements to zero, and for each unreduced block T_i, it finds */
4498 /*  (a) a suitable shift at one end of the block's spectrum, */
4499 /*  (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */
4500 /*  (c) eigenvalues of each L_i D_i L_i^T. */
4501 /*  The representations and eigenvalues found are then used by */
4502 /*  DSTEMR to compute the eigenvectors of T. */
4503 /*  The accuracy varies depending on whether bisection is used to */
4504 /*  find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */
4505 /*  conpute all and then discard any unwanted one. */
4506 /*  As an added benefit, DLARRE also outputs the n */
4507 /*  Gerschgorin intervals for the matrices L_i D_i L_i^T. */
4508 
4509 /*  Arguments */
4510 /*  ========= */
4511 
4512 /*  RANGE   (input) CHARACTER */
4513 /*          = 'A': ("All")   all eigenvalues will be found. */
4514 /*          = 'V': ("Value") all eigenvalues in the half-open interval */
4515 /*                           (VL, VU] will be found. */
4516 /*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
4517 /*                           entire matrix) will be found. */
4518 
4519 /*  N       (input) INTEGER */
4520 /*          The order of the matrix. N > 0. */
4521 
4522 /*  VL      (input/output) DOUBLE PRECISION */
4523 /*  VU      (input/output) DOUBLE PRECISION */
4524 /*          If RANGE='V', the lower and upper bounds for the eigenvalues. */
4525 /*          Eigenvalues less than or equal to VL, or greater than VU, */
4526 /*          will not be returned.  VL < VU. */
4527 /*          If RANGE='I' or ='A', DLARRE computes bounds on the desired */
4528 /*          part of the spectrum. */
4529 
4530 /*  IL      (input) INTEGER */
4531 /*  IU      (input) INTEGER */
4532 /*          If RANGE='I', the indices (in ascending order) of the */
4533 /*          smallest and largest eigenvalues to be returned. */
4534 /*          1 <= IL <= IU <= N. */
4535 
4536 /*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
4537 /*          On entry, the N diagonal elements of the tridiagonal */
4538 /*          matrix T. */
4539 /*          On exit, the N diagonal elements of the diagonal */
4540 /*          matrices D_i. */
4541 
4542 /*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
4543 /*          On entry, the first (N-1) entries contain the subdiagonal */
4544 /*          elements of the tridiagonal matrix T; E(N) need not be set. */
4545 /*          On exit, E contains the subdiagonal elements of the unit */
4546 /*          bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */
4547 /*          1 <= I <= NSPLIT, contain the base points sigma_i on output. */
4548 
4549 /*  E2      (input/output) DOUBLE PRECISION array, dimension (N) */
4550 /*          On entry, the first (N-1) entries contain the SQUARES of the */
4551 /*          subdiagonal elements of the tridiagonal matrix T; */
4552 /*          E2(N) need not be set. */
4553 /*          On exit, the entries E2( ISPLIT( I ) ), */
4554 /*          1 <= I <= NSPLIT, have been set to zero */
4555 
4556 /*  RTOL1   (input) DOUBLE PRECISION */
4557 /*  RTOL2   (input) DOUBLE PRECISION */
4558 /*           Parameters for bisection. */
4559 /*           An interval [LEFT,RIGHT] has converged if */
4560 /*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
4561 
4562 /*  SPLTOL (input) DOUBLE PRECISION */
4563 /*          The threshold for splitting. */
4564 
4565 /*  NSPLIT  (output) INTEGER */
4566 /*          The number of blocks T splits into. 1 <= NSPLIT <= N. */
4567 
4568 /*  ISPLIT  (output) INTEGER array, dimension (N) */
4569 /*          The splitting points, at which T breaks up into blocks. */
4570 /*          The first block consists of rows/columns 1 to ISPLIT(1), */
4571 /*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
4572 /*          etc., and the NSPLIT-th consists of rows/columns */
4573 /*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
4574 
4575 /*  M       (output) INTEGER */
4576 /*          The total number of eigenvalues (of all L_i D_i L_i^T) */
4577 /*          found. */
4578 
4579 /*  W       (output) DOUBLE PRECISION array, dimension (N) */
4580 /*          The first M elements contain the eigenvalues. The */
4581 /*          eigenvalues of each of the blocks, L_i D_i L_i^T, are */
4582 /*          sorted in ascending order ( DLARRE may use the */
4583 /*          remaining N-M elements as workspace). */
4584 
4585 /*  WERR    (output) DOUBLE PRECISION array, dimension (N) */
4586 /*          The error bound on the corresponding eigenvalue in W. */
4587 
4588 /*  WGAP    (output) DOUBLE PRECISION array, dimension (N) */
4589 /*          The separation from the right neighbor eigenvalue in W. */
4590 /*          The gap is only with respect to the eigenvalues of the same block */
4591 /*          as each block has its own representation tree. */
4592 /*          Exception: at the right end of a block we store the left gap */
4593 
4594 /*  IBLOCK  (output) INTEGER array, dimension (N) */
4595 /*          The indices of the blocks (submatrices) associated with the */
4596 /*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
4597 /*          W(i) belongs to the first block from the top, =2 if W(i) */
4598 /*          belongs to the second block, etc. */
4599 
4600 /*  INDEXW  (output) INTEGER array, dimension (N) */
4601 /*          The indices of the eigenvalues within each block (submatrix); */
4602 /*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
4603 /*          i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */
4604 
4605 /*  GERS    (output) DOUBLE PRECISION array, dimension (2*N) */
4606 /*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
4607 /*          is (GERS(2*i-1), GERS(2*i)). */
4608 
4609 /*  PIVMIN  (output) DOUBLE PRECISION */
4610 /*          The minimum pivot in the Sturm sequence for T. */
4611 
4612 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (6*N) */
4613 /*          Workspace. */
4614 
4615 /*  IWORK   (workspace) INTEGER array, dimension (5*N) */
4616 /*          Workspace. */
4617 
4618 /*  INFO    (output) INTEGER */
4619 /*          = 0:  successful exit */
4620 /*          > 0:  A problem occured in DLARRE. */
4621 /*          < 0:  One of the called subroutines signaled an internal problem. */
4622 /*                Needs inspection of the corresponding parameter IINFO */
4623 /*                for further information. */
4624 
4625 /*          =-1:  Problem in DLARRD. */
4626 /*          = 2:  No base representation could be found in MAXTRY iterations. */
4627 /*                Increasing MAXTRY and recompilation might be a remedy. */
4628 /*          =-3:  Problem in DLARRB when computing the refined root */
4629 /*                representation for DLASQ2. */
4630 /*          =-4:  Problem in DLARRB when preforming bisection on the */
4631 /*                desired part of the spectrum. */
4632 /*          =-5:  Problem in DLASQ2. */
4633 /*          =-6:  Problem in DLASQ2. */
4634 
4635 /*  Further Details */
4636 /*  The base representations are required to suffer very little */
4637 /*  element growth and consequently define all their eigenvalues to */
4638 /*  high relative accuracy. */
4639 /*  =============== */
4640 
4641 /*  Based on contributions by */
4642 /*     Beresford Parlett, University of California, Berkeley, USA */
4643 /*     Jim Demmel, University of California, Berkeley, USA */
4644 /*     Inderjit Dhillon, University of Texas, Austin, USA */
4645 /*     Osni Marques, LBNL/NERSC, USA */
4646 /*     Christof Voemel, University of California, Berkeley, USA */
4647 
4648 /*  ===================================================================== */
4649 
4650 /*     .. Parameters .. */
4651 /*     .. */
4652 /*     .. Local Scalars .. */
4653 /*     .. */
4654 /*     .. Local Arrays .. */
4655 /*     .. */
4656 /*     .. External Functions .. */
4657 /*     .. */
4658 /*     .. External Subroutines .. */
4659 /*     .. */
4660 /*     .. Intrinsic Functions .. */
4661 /*     .. */
4662 /*     .. Executable Statements .. */
4663 
4664     /* Parameter adjustments */
4665     --iwork;
4666     --work;
4667     --gers;
4668     --indexw;
4669     --iblock;
4670     --wgap;
4671     --werr;
4672     --w;
4673     --isplit;
4674     --e2;
4675     --e;
4676     --d__;
4677 
4678     /* Function Body */
4679     *info = 0;
4680 
4681 /*     Decode RANGE */
4682 
4683     if (lsame_(range, "A")) {
4684 	irange = 1;
4685     } else if (lsame_(range, "V")) {
4686 	irange = 3;
4687     } else if (lsame_(range, "I")) {
4688 	irange = 2;
4689     }
4690     *m = 0;
4691 /*     Get machine constants */
4692     safmin = dlamch_("S");
4693     eps = dlamch_("P");
4694 /*     Set parameters */
4695     rtl = sqrt(eps);
4696     bsrtol = sqrt(eps);
4697 /*     Treat case of 1x1 matrix for quick return */
4698     if (*n == 1) {
4699 	if (irange == 1 || irange == 3 && d__[1] > *vl && d__[1] <= *vu ||
4700 		irange == 2 && *il == 1 && *iu == 1) {
4701 	    *m = 1;
4702 	    w[1] = d__[1];
4703 /*           The computation error of the eigenvalue is zero */
4704 	    werr[1] = 0.;
4705 	    wgap[1] = 0.;
4706 	    iblock[1] = 1;
4707 	    indexw[1] = 1;
4708 	    gers[1] = d__[1];
4709 	    gers[2] = d__[1];
4710 	}
4711 /*        store the shift for the initial RRR, which is zero in this case */
4712 	e[1] = 0.;
4713 	return 0;
4714     }
4715 /*     General case: tridiagonal matrix of order > 1 */
4716 
4717 /*     Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */
4718 /*     Compute maximum off-diagonal entry and pivmin. */
4719     gl = d__[1];
4720     gu = d__[1];
4721     eold = 0.;
4722     emax = 0.;
4723     e[*n] = 0.;
4724     i__1 = *n;
4725     for (i__ = 1; i__ <= i__1; ++i__) {
4726 	werr[i__] = 0.;
4727 	wgap[i__] = 0.;
4728 	eabs = (d__1 = e[i__], abs(d__1));
4729 	if (eabs >= emax) {
4730 	    emax = eabs;
4731 	}
4732 	tmp1 = eabs + eold;
4733 	gers[(i__ << 1) - 1] = d__[i__] - tmp1;
4734 /* Computing MIN */
4735 	d__1 = gl, d__2 = gers[(i__ << 1) - 1];
4736 	gl = std::min(d__1,d__2);
4737 	gers[i__ * 2] = d__[i__] + tmp1;
4738 /* Computing MAX */
4739 	d__1 = gu, d__2 = gers[i__ * 2];
4740 	gu = std::max(d__1,d__2);
4741 	eold = eabs;
4742 /* L5: */
4743     }
4744 /*     The minimum pivot allowed in the Sturm sequence for T */
4745 /* Computing MAX */
4746 /* Computing 2nd power */
4747     d__3 = emax;
4748     d__1 = 1., d__2 = d__3 * d__3;
4749     *pivmin = safmin * std::max(d__1,d__2);
4750 /*     Compute spectral diameter. The Gerschgorin bounds give an */
4751 /*     estimate that is wrong by at most a factor of SQRT(2) */
4752     spdiam = gu - gl;
4753 /*     Compute splitting points */
4754     dlarra_(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], &
4755 	    iinfo);
4756 /*     Can force use of bisection instead of faster DQDS. */
4757 /*     Option left in the code for future multisection work. */
4758     forceb = false;
4759     if (irange == 1 && ! forceb) {
4760 /*        Set interval [VL,VU] that contains all eigenvalues */
4761 	*vl = gl;
4762 	*vu = gu;
4763     } else {
4764 /*        We call DLARRD to find crude approximations to the eigenvalues */
4765 /*        in the desired range. In case IRANGE = INDRNG, we also obtain the */
4766 /*        interval (VL,VU] that contains all the wanted eigenvalues. */
4767 /*        An interval [LEFT,RIGHT] has converged if */
4768 /*        RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */
4769 /*        DLARRD needs a WORK of size 4*N, IWORK of size 3*N */
4770 	dlarrd_(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[
4771 		1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1],
4772 		vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo);
4773 	if (iinfo != 0) {
4774 	    *info = -1;
4775 	    return 0;
4776 	}
4777 /*        Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */
4778 	i__1 = *n;
4779 	for (i__ = mm + 1; i__ <= i__1; ++i__) {
4780 	    w[i__] = 0.;
4781 	    werr[i__] = 0.;
4782 	    iblock[i__] = 0;
4783 	    indexw[i__] = 0;
4784 /* L14: */
4785 	}
4786     }
4787 /* ** */
4788 /*     Loop over unreduced blocks */
4789     ibegin = 1;
4790     wbegin = 1;
4791     i__1 = *nsplit;
4792     for (jblk = 1; jblk <= i__1; ++jblk) {
4793 	iend = isplit[jblk];
4794 	in = iend - ibegin + 1;
4795 /*        1 X 1 block */
4796 	if (in == 1) {
4797 	    if (irange == 1 || irange == 3 && d__[ibegin] > *vl && d__[ibegin]
4798 		     <= *vu || irange == 2 && iblock[wbegin] == jblk) {
4799 		++(*m);
4800 		w[*m] = d__[ibegin];
4801 		werr[*m] = 0.;
4802 /*              The gap for a single block doesn't matter for the later */
4803 /*              algorithm and is assigned an arbitrary large value */
4804 		wgap[*m] = 0.;
4805 		iblock[*m] = jblk;
4806 		indexw[*m] = 1;
4807 		++wbegin;
4808 	    }
4809 /*           E( IEND ) holds the shift for the initial RRR */
4810 	    e[iend] = 0.;
4811 	    ibegin = iend + 1;
4812 	    goto L170;
4813 	}
4814 
4815 /*        Blocks of size larger than 1x1 */
4816 
4817 /*        E( IEND ) will hold the shift for the initial RRR, for now set it =0 */
4818 	e[iend] = 0.;
4819 
4820 /*        Find local outer bounds GL,GU for the block */
4821 	gl = d__[ibegin];
4822 	gu = d__[ibegin];
4823 	i__2 = iend;
4824 	for (i__ = ibegin; i__ <= i__2; ++i__) {
4825 /* Computing MIN */
4826 	    d__1 = gers[(i__ << 1) - 1];
4827 	    gl = std::min(d__1,gl);
4828 /* Computing MAX */
4829 	    d__1 = gers[i__ * 2];
4830 	    gu = std::max(d__1,gu);
4831 /* L15: */
4832 	}
4833 	spdiam = gu - gl;
4834 	if (! (irange == 1 && ! forceb)) {
4835 /*           Count the number of eigenvalues in the current block. */
4836 	    mb = 0;
4837 	    i__2 = mm;
4838 	    for (i__ = wbegin; i__ <= i__2; ++i__) {
4839 		if (iblock[i__] == jblk) {
4840 		    ++mb;
4841 		} else {
4842 		    goto L21;
4843 		}
4844 /* L20: */
4845 	    }
4846 L21:
4847 	    if (mb == 0) {
4848 /*              No eigenvalue in the current block lies in the desired range */
4849 /*              E( IEND ) holds the shift for the initial RRR */
4850 		e[iend] = 0.;
4851 		ibegin = iend + 1;
4852 		goto L170;
4853 	    } else {
4854 /*              Decide whether dqds or bisection is more efficient */
4855 		usedqd = (double) mb > in * .5 && ! forceb;
4856 		wend = wbegin + mb - 1;
4857 /*              Calculate gaps for the current block */
4858 /*              In later stages, when representations for individual */
4859 /*              eigenvalues are different, we use SIGMA = E( IEND ). */
4860 		sigma = 0.;
4861 		i__2 = wend - 1;
4862 		for (i__ = wbegin; i__ <= i__2; ++i__) {
4863 /* Computing MAX */
4864 		    d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] +
4865 			    werr[i__]);
4866 		    wgap[i__] = std::max(d__1,d__2);
4867 /* L30: */
4868 		}
4869 /* Computing MAX */
4870 		d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
4871 		wgap[wend] = std::max(d__1,d__2);
4872 /*              Find local index of the first and last desired evalue. */
4873 		indl = indexw[wbegin];
4874 		indu = indexw[wend];
4875 	    }
4876 	}
4877 	if (irange == 1 && ! forceb || usedqd) {
4878 /*           Case of DQDS */
4879 /*           Find approximations to the extremal eigenvalues of the block */
4880 	    dlarrk_(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
4881 		    rtl, &tmp, &tmp1, &iinfo);
4882 	    if (iinfo != 0) {
4883 		*info = -1;
4884 		return 0;
4885 	    }
4886 /* Computing MAX */
4887 	    d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1,
4888 		    abs(d__1));
4889 	    isleft = std::max(d__2,d__3);
4890 	    dlarrk_(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
4891 		    rtl, &tmp, &tmp1, &iinfo);
4892 	    if (iinfo != 0) {
4893 		*info = -1;
4894 		return 0;
4895 	    }
4896 /* Computing MIN */
4897 	    d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1,
4898 		    abs(d__1));
4899 	    isrght = std::min(d__2,d__3);
4900 /*           Improve the estimate of the spectral diameter */
4901 	    spdiam = isrght - isleft;
4902 	} else {
4903 /*           Case of bisection */
4904 /*           Find approximations to the wanted extremal eigenvalues */
4905 /* Computing MAX */
4906 	    d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 =
4907 		    w[wbegin] - werr[wbegin], abs(d__1));
4908 	    isleft = std::max(d__2,d__3);
4909 /* Computing MIN */
4910 	    d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[
4911 		    wend] + werr[wend], abs(d__1));
4912 	    isrght = std::min(d__2,d__3);
4913 	}
4914 /*        Decide whether the base representation for the current block */
4915 /*        L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */
4916 /*        should be on the left or the right end of the current block. */
4917 /*        The strategy is to shift to the end which is "more populated" */
4918 /*        Furthermore, decide whether to use DQDS for the computation of */
4919 /*        the eigenvalue approximations at the end of DLARRE or bisection. */
4920 /*        dqds is chosen if all eigenvalues are desired or the number of */
4921 /*        eigenvalues to be computed is large compared to the blocksize. */
4922 	if (irange == 1 && ! forceb) {
4923 /*           If all the eigenvalues have to be computed, we use dqd */
4924 	    usedqd = true;
4925 /*           INDL is the local index of the first eigenvalue to compute */
4926 	    indl = 1;
4927 	    indu = in;
4928 /*           MB =  number of eigenvalues to compute */
4929 	    mb = in;
4930 	    wend = wbegin + mb - 1;
4931 /*           Define 1/4 and 3/4 points of the spectrum */
4932 	    s1 = isleft + spdiam * .25;
4933 	    s2 = isrght - spdiam * .25;
4934 	} else {
4935 /*           DLARRD has computed IBLOCK and INDEXW for each eigenvalue */
4936 /*           approximation. */
4937 /*           choose sigma */
4938 	    if (usedqd) {
4939 		s1 = isleft + spdiam * .25;
4940 		s2 = isrght - spdiam * .25;
4941 	    } else {
4942 		tmp = std::min(isrght,*vu) - std::max(isleft,*vl);
4943 		s1 = std::max(isleft,*vl) + tmp * .25;
4944 		s2 = std::min(isrght,*vu) - tmp * .25;
4945 	    }
4946 	}
4947 /*        Compute the negcount at the 1/4 and 3/4 points */
4948 	if (mb > 1) {
4949 	    dlarrc_("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, &
4950 		    cnt, &cnt1, &cnt2, &iinfo);
4951 	}
4952 	if (mb == 1) {
4953 	    sigma = gl;
4954 	    sgndef = 1.;
4955 	} else if (cnt1 - indl >= indu - cnt2) {
4956 	    if (irange == 1 && ! forceb) {
4957 		sigma = std::max(isleft,gl);
4958 	    } else if (usedqd) {
4959 /*              use Gerschgorin bound as shift to get pos def matrix */
4960 /*              for dqds */
4961 		sigma = isleft;
4962 	    } else {
4963 /*              use approximation of the first desired eigenvalue of the */
4964 /*              block as shift */
4965 		sigma = std::max(isleft,*vl);
4966 	    }
4967 	    sgndef = 1.;
4968 	} else {
4969 	    if (irange == 1 && ! forceb) {
4970 		sigma = std::min(isrght,gu);
4971 	    } else if (usedqd) {
4972 /*              use Gerschgorin bound as shift to get neg def matrix */
4973 /*              for dqds */
4974 		sigma = isrght;
4975 	    } else {
4976 /*              use approximation of the first desired eigenvalue of the */
4977 /*              block as shift */
4978 		sigma = std::min(isrght,*vu);
4979 	    }
4980 	    sgndef = -1.;
4981 	}
4982 /*        An initial SIGMA has been chosen that will be used for computing */
4983 /*        T - SIGMA I = L D L^T */
4984 /*        Define the increment TAU of the shift in case the initial shift */
4985 /*        needs to be refined to obtain a factorization with not too much */
4986 /*        element growth. */
4987 	if (usedqd) {
4988 /*           The initial SIGMA was to the outer end of the spectrum */
4989 /*           the matrix is definite and we need not retreat. */
4990 	    tau = spdiam * eps * *n + *pivmin * 2.;
4991 	} else {
4992 	    if (mb > 1) {
4993 		clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin];
4994 		avgap = (d__1 = clwdth / (double) (wend - wbegin), abs(
4995 			d__1));
4996 		if (sgndef == 1.) {
4997 /* Computing MAX */
4998 		    d__1 = wgap[wbegin];
4999 		    tau = std::max(d__1,avgap) * .5;
5000 /* Computing MAX */
5001 		    d__1 = tau, d__2 = werr[wbegin];
5002 		    tau = std::max(d__1,d__2);
5003 		} else {
5004 /* Computing MAX */
5005 		    d__1 = wgap[wend - 1];
5006 		    tau = std::max(d__1,avgap) * .5;
5007 /* Computing MAX */
5008 		    d__1 = tau, d__2 = werr[wend];
5009 		    tau = std::max(d__1,d__2);
5010 		}
5011 	    } else {
5012 		tau = werr[wbegin];
5013 	    }
5014 	}
5015 
5016 	for (idum = 1; idum <= 6; ++idum) {
5017 /*           Compute L D L^T factorization of tridiagonal matrix T - sigma I. */
5018 /*           Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */
5019 /*           pivots in WORK(2*IN+1:3*IN) */
5020 	    dpivot = d__[ibegin] - sigma;
5021 	    work[1] = dpivot;
5022 	    dmax__ = abs(work[1]);
5023 	    j = ibegin;
5024 	    i__2 = in - 1;
5025 	    for (i__ = 1; i__ <= i__2; ++i__) {
5026 		work[(in << 1) + i__] = 1. / work[i__];
5027 		tmp = e[j] * work[(in << 1) + i__];
5028 		work[in + i__] = tmp;
5029 		dpivot = d__[j + 1] - sigma - tmp * e[j];
5030 		work[i__ + 1] = dpivot;
5031 /* Computing MAX */
5032 		d__1 = dmax__, d__2 = abs(dpivot);
5033 		dmax__ = std::max(d__1,d__2);
5034 		++j;
5035 /* L70: */
5036 	    }
5037 /*           check for element growth */
5038 	    if (dmax__ > spdiam * 64.) {
5039 		norep = true;
5040 	    } else {
5041 		norep = false;
5042 	    }
5043 	    if (usedqd && ! norep) {
5044 /*              Ensure the definiteness of the representation */
5045 /*              All entries of D (of L D L^T) must have the same sign */
5046 		i__2 = in;
5047 		for (i__ = 1; i__ <= i__2; ++i__) {
5048 		    tmp = sgndef * work[i__];
5049 		    if (tmp < 0.) {
5050 			norep = true;
5051 		    }
5052 /* L71: */
5053 		}
5054 	    }
5055 	    if (norep) {
5056 /*              Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */
5057 /*              shift which makes the matrix definite. So we should end up */
5058 /*              here really only in the case of IRANGE = VALRNG or INDRNG. */
5059 		if (idum == 5) {
5060 		    if (sgndef == 1.) {
5061 /*                    The fudged Gerschgorin shift should succeed */
5062 			sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.;
5063 		    } else {
5064 			sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.;
5065 		    }
5066 		} else {
5067 		    sigma -= sgndef * tau;
5068 		    tau *= 2.;
5069 		}
5070 	    } else {
5071 /*              an initial RRR is found */
5072 		goto L83;
5073 	    }
5074 /* L80: */
5075 	}
5076 /*        if the program reaches this point, no base representation could be */
5077 /*        found in MAXTRY iterations. */
5078 	*info = 2;
5079 	return 0;
5080 L83:
5081 /*        At this point, we have found an initial base representation */
5082 /*        T - SIGMA I = L D L^T with not too much element growth. */
5083 /*        Store the shift. */
5084 	e[iend] = sigma;
5085 /*        Store D and L. */
5086 	dcopy_(&in, &work[1], &c__1, &d__[ibegin], &c__1);
5087 	i__2 = in - 1;
5088 	dcopy_(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1);
5089 	if (mb > 1) {
5090 
5091 /*           Perturb each entry of the base representation by a small */
5092 /*           (but random) relative amount to overcome difficulties with */
5093 /*           glued matrices. */
5094 
5095 	    for (i__ = 1; i__ <= 4; ++i__) {
5096 		iseed[i__ - 1] = 1;
5097 /* L122: */
5098 	    }
5099 	    i__2 = (in << 1) - 1;
5100 	    dlarnv_(&c__2, iseed, &i__2, &work[1]);
5101 	    i__2 = in - 1;
5102 	    for (i__ = 1; i__ <= i__2; ++i__) {
5103 		d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.;
5104 		e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.;
5105 /* L125: */
5106 	    }
5107 	    d__[iend] *= eps * 4. * work[in] + 1.;
5108 
5109 	}
5110 
5111 /*        Don't update the Gerschgorin intervals because keeping track */
5112 /*        of the updates would be too much work in DLARRV. */
5113 /*        We update W instead and use it to locate the proper Gerschgorin */
5114 /*        intervals. */
5115 /*        Compute the required eigenvalues of L D L' by bisection or dqds */
5116 	if (! usedqd) {
5117 /*           If DLARRD has been used, shift the eigenvalue approximations */
5118 /*           according to their representation. This is necessary for */
5119 /*           a uniform DLARRV since dqds computes eigenvalues of the */
5120 /*           shifted representation. In DLARRV, W will always hold the */
5121 /*           UNshifted eigenvalue approximation. */
5122 	    i__2 = wend;
5123 	    for (j = wbegin; j <= i__2; ++j) {
5124 		w[j] -= sigma;
5125 		werr[j] += (d__1 = w[j], abs(d__1)) * eps;
5126 /* L134: */
5127 	    }
5128 /*           call DLARRB to reduce eigenvalue error of the approximations */
5129 /*           from DLARRD */
5130 	    i__2 = iend - 1;
5131 	    for (i__ = ibegin; i__ <= i__2; ++i__) {
5132 /* Computing 2nd power */
5133 		d__1 = e[i__];
5134 		work[i__] = d__[i__] * (d__1 * d__1);
5135 /* L135: */
5136 	    }
5137 /*           use bisection to find EV from INDL to INDU */
5138 	    i__2 = indl - 1;
5139 	    dlarrb_(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1,
5140 		    rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], &
5141 		    work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, &
5142 		    iinfo);
5143 	    if (iinfo != 0) {
5144 		*info = -4;
5145 		return 0;
5146 	    }
5147 /*           DLARRB computes all gaps correctly except for the last one */
5148 /*           Record distance to VU/GU */
5149 /* Computing MAX */
5150 	    d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
5151 	    wgap[wend] = std::max(d__1,d__2);
5152 	    i__2 = indu;
5153 	    for (i__ = indl; i__ <= i__2; ++i__) {
5154 		++(*m);
5155 		iblock[*m] = jblk;
5156 		indexw[*m] = i__;
5157 /* L138: */
5158 	    }
5159 	} else {
5160 /*           Call dqds to get all eigs (and then possibly delete unwanted */
5161 /*           eigenvalues). */
5162 /*           Note that dqds finds the eigenvalues of the L D L^T representation */
5163 /*           of T to high relative accuracy. High relative accuracy */
5164 /*           might be lost when the shift of the RRR is subtracted to obtain */
5165 /*           the eigenvalues of T. However, T is not guaranteed to define its */
5166 /*           eigenvalues to high relative accuracy anyway. */
5167 /*           Set RTOL to the order of the tolerance used in DLASQ2 */
5168 /*           This is an ESTIMATED error, the worst case bound is 4*N*EPS */
5169 /*           which is usually too large and requires unnecessary work to be */
5170 /*           done by bisection when computing the eigenvectors */
5171 	    rtol = log((double) in) * 4. * eps;
5172 	    j = ibegin;
5173 	    i__2 = in - 1;
5174 	    for (i__ = 1; i__ <= i__2; ++i__) {
5175 		work[(i__ << 1) - 1] = (d__1 = d__[j], abs(d__1));
5176 		work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1];
5177 		++j;
5178 /* L140: */
5179 	    }
5180 	    work[(in << 1) - 1] = (d__1 = d__[iend], abs(d__1));
5181 	    work[in * 2] = 0.;
5182 	    dlasq2_(&in, &work[1], &iinfo);
5183 	    if (iinfo != 0) {
5184 /*              If IINFO = -5 then an index is part of a tight cluster */
5185 /*              and should be changed. The index is in IWORK(1) and the */
5186 /*              gap is in WORK(N+1) */
5187 		*info = -5;
5188 		return 0;
5189 	    } else {
5190 /*              Test that all eigenvalues are positive as expected */
5191 		i__2 = in;
5192 		for (i__ = 1; i__ <= i__2; ++i__) {
5193 		    if (work[i__] < 0.) {
5194 			*info = -6;
5195 			return 0;
5196 		    }
5197 /* L149: */
5198 		}
5199 	    }
5200 	    if (sgndef > 0.) {
5201 		i__2 = indu;
5202 		for (i__ = indl; i__ <= i__2; ++i__) {
5203 		    ++(*m);
5204 		    w[*m] = work[in - i__ + 1];
5205 		    iblock[*m] = jblk;
5206 		    indexw[*m] = i__;
5207 /* L150: */
5208 		}
5209 	    } else {
5210 		i__2 = indu;
5211 		for (i__ = indl; i__ <= i__2; ++i__) {
5212 		    ++(*m);
5213 		    w[*m] = -work[i__];
5214 		    iblock[*m] = jblk;
5215 		    indexw[*m] = i__;
5216 /* L160: */
5217 		}
5218 	    }
5219 	    i__2 = *m;
5220 	    for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
5221 /*              the value of RTOL below should be the tolerance in DLASQ2 */
5222 		werr[i__] = rtol * (d__1 = w[i__], abs(d__1));
5223 /* L165: */
5224 	    }
5225 	    i__2 = *m - 1;
5226 	    for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
5227 /*              compute the right gap between the intervals */
5228 /* Computing MAX */
5229 		d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[
5230 			i__]);
5231 		wgap[i__] = std::max(d__1,d__2);
5232 /* L166: */
5233 	    }
5234 /* Computing MAX */
5235 	    d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]);
5236 	    wgap[*m] = std::max(d__1,d__2);
5237 	}
5238 /*        proceed with next block */
5239 	ibegin = iend + 1;
5240 	wbegin = wend + 1;
5241 L170:
5242 	;
5243     }
5244 
5245     return 0;
5246 
5247 /*     end of DLARRE */
5248 
5249 } /* dlarre_ */
5250 
dlarrf_(integer * n,double * d__,double * l,double * ld,integer * clstrt,integer * clend,double * w,double * wgap,double * werr,double * spdiam,double * clgapl,double * clgapr,double * pivmin,double * sigma,double * dplus,double * lplus,double * work,integer * info)5251 /* Subroutine */ int dlarrf_(integer *n, double *d__, double *l,
5252 	double *ld, integer *clstrt, integer *clend, double *w,
5253 	double *wgap, double *werr, double *spdiam, double *
5254 	clgapl, double *clgapr, double *pivmin, double *sigma,
5255 	double *dplus, double *lplus, double *work, integer *info)
5256 {
5257 	/* Table of constant values */
5258 	static integer c__1 = 1;
5259 
5260     /* System generated locals */
5261     integer i__1;
5262     double d__1, d__2, d__3;
5263 
5264     /* Local variables */
5265     integer i__;
5266     double s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2,
5267 	    znm2, growthbound, fail, fact, oldp;
5268     integer indx;
5269     double prod;
5270     integer ktry;
5271     double fail2, avgap, ldmax, rdmax;
5272     integer shift;
5273     bool dorrr1;
5274     double ldelta;
5275     bool nofail;
5276     double mingap, lsigma, rdelta;
5277     bool forcer;
5278     double rsigma, clwdth;
5279     bool sawnan1, sawnan2, tryrrr1;
5280 
5281 
5282 /*  -- LAPACK auxiliary routine (version 3.1) -- */
5283 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
5284 /*     November 2006 */
5285 /* * */
5286 /*     .. Scalar Arguments .. */
5287 /*     .. */
5288 /*     .. Array Arguments .. */
5289 /*     .. */
5290 
5291 /*  Purpose */
5292 /*  ======= */
5293 
5294 /*  Given the initial representation L D L^T and its cluster of close */
5295 /*  eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
5296 /*  W( CLEND ), DLARRF finds a new relatively robust representation */
5297 /*  L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
5298 /*  eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
5299 
5300 /*  Arguments */
5301 /*  ========= */
5302 
5303 /*  N       (input) INTEGER */
5304 /*          The order of the matrix (subblock, if the matrix splitted). */
5305 
5306 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
5307 /*          The N diagonal elements of the diagonal matrix D. */
5308 
5309 /*  L       (input) DOUBLE PRECISION array, dimension (N-1) */
5310 /*          The (N-1) subdiagonal elements of the unit bidiagonal */
5311 /*          matrix L. */
5312 
5313 /*  LD      (input) DOUBLE PRECISION array, dimension (N-1) */
5314 /*          The (N-1) elements L(i)*D(i). */
5315 
5316 /*  CLSTRT  (input) INTEGER */
5317 /*          The index of the first eigenvalue in the cluster. */
5318 
5319 /*  CLEND   (input) INTEGER */
5320 /*          The index of the last eigenvalue in the cluster. */
5321 
5322 /*  W       (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
5323 /*          The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
5324 /*          W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
5325 /*          close eigenalues. */
5326 
5327 /*  WGAP    (input/output) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
5328 /*          The separation from the right neighbor eigenvalue in W. */
5329 
5330 /*  WERR    (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
5331 /*          WERR contain the semiwidth of the uncertainty */
5332 /*          interval of the corresponding eigenvalue APPROXIMATION in W */
5333 
5334 /*  SPDIAM (input) estimate of the spectral diameter obtained from the */
5335 /*          Gerschgorin intervals */
5336 
5337 /*  CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
5338 /*          Set by the calling routine to protect against shifts too close */
5339 /*          to eigenvalues outside the cluster. */
5340 
5341 /*  PIVMIN  (input) DOUBLE PRECISION */
5342 /*          The minimum pivot allowed in the Sturm sequence. */
5343 
5344 /*  SIGMA   (output) DOUBLE PRECISION */
5345 /*          The shift used to form L(+) D(+) L(+)^T. */
5346 
5347 /*  DPLUS   (output) DOUBLE PRECISION array, dimension (N) */
5348 /*          The N diagonal elements of the diagonal matrix D(+). */
5349 
5350 /*  LPLUS   (output) DOUBLE PRECISION array, dimension (N-1) */
5351 /*          The first (N-1) elements of LPLUS contain the subdiagonal */
5352 /*          elements of the unit bidiagonal matrix L(+). */
5353 
5354 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
5355 /*          Workspace. */
5356 
5357 /*  Further Details */
5358 /*  =============== */
5359 
5360 /*  Based on contributions by */
5361 /*     Beresford Parlett, University of California, Berkeley, USA */
5362 /*     Jim Demmel, University of California, Berkeley, USA */
5363 /*     Inderjit Dhillon, University of Texas, Austin, USA */
5364 /*     Osni Marques, LBNL/NERSC, USA */
5365 /*     Christof Voemel, University of California, Berkeley, USA */
5366 
5367 /*  ===================================================================== */
5368 
5369 /*     .. Parameters .. */
5370 /*     .. */
5371 /*     .. Local Scalars .. */
5372 /*     .. */
5373 /*     .. External Functions .. */
5374 /*     .. */
5375 /*     .. External Subroutines .. */
5376 /*     .. */
5377 /*     .. Intrinsic Functions .. */
5378 /*     .. */
5379 /*     .. Executable Statements .. */
5380 
5381     /* Parameter adjustments */
5382     --work;
5383     --lplus;
5384     --dplus;
5385     --werr;
5386     --wgap;
5387     --w;
5388     --ld;
5389     --l;
5390     --d__;
5391 
5392     /* Function Body */
5393     *info = 0;
5394     fact = 2.;
5395     eps = dlamch_("Precision");
5396     shift = 0;
5397     forcer = false;
5398 /*     Note that we cannot guarantee that for any of the shifts tried, */
5399 /*     the factorization has a small or even moderate element growth. */
5400 /*     There could be Ritz values at both ends of the cluster and despite */
5401 /*     backing off, there are examples where all factorizations tried */
5402 /*     (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
5403 /*     element growth. */
5404 /*     For this reason, we should use PIVMIN in this subroutine so that at */
5405 /*     least the L D L^T factorization exists. It can be checked afterwards */
5406 /*     whether the element growth caused bad residuals/orthogonality. */
5407 /*     Decide whether the code should accept the best among all */
5408 /*     representations despite large element growth or signal INFO=1 */
5409     nofail = true;
5410 
5411 /*     Compute the average gap length of the cluster */
5412     clwdth = (d__1 = w[*clend] - w[*clstrt], abs(d__1)) + werr[*clend] + werr[
5413 	    *clstrt];
5414     avgap = clwdth / (double) (*clend - *clstrt);
5415     mingap = std::min(*clgapl,*clgapr);
5416 /*     Initial values for shifts to both ends of cluster */
5417 /* Computing MIN */
5418     d__1 = w[*clstrt], d__2 = w[*clend];
5419     lsigma = std::min(d__1,d__2) - werr[*clstrt];
5420 /* Computing MAX */
5421     d__1 = w[*clstrt], d__2 = w[*clend];
5422     rsigma = std::max(d__1,d__2) + werr[*clend];
5423 /*     Use a small fudge to make sure that we really shift to the outside */
5424     lsigma -= abs(lsigma) * 4. * eps;
5425     rsigma += abs(rsigma) * 4. * eps;
5426 /*     Compute upper bounds for how much to back off the initial shifts */
5427     ldmax = mingap * .25 + *pivmin * 2.;
5428     rdmax = mingap * .25 + *pivmin * 2.;
5429 /* Computing MAX */
5430     d__1 = avgap, d__2 = wgap[*clstrt];
5431     ldelta = std::max(d__1,d__2) / fact;
5432 /* Computing MAX */
5433     d__1 = avgap, d__2 = wgap[*clend - 1];
5434     rdelta = std::max(d__1,d__2) / fact;
5435 
5436 /*     Initialize the record of the best representation found */
5437 
5438     s = dlamch_("S");
5439     smlgrowth = 1. / s;
5440     fail = (double) (*n - 1) * mingap / (*spdiam * eps);
5441     fail2 = (double) (*n - 1) * mingap / (*spdiam * sqrt(eps));
5442     bestshift = lsigma;
5443 
5444 /*     while (KTRY <= KTRYMAX) */
5445     ktry = 0;
5446     growthbound = *spdiam * 8.;
5447 L5:
5448     sawnan1 = false;
5449     sawnan2 = false;
5450 /*     Ensure that we do not back off too much of the initial shifts */
5451     ldelta = std::min(ldmax,ldelta);
5452     rdelta = std::min(rdmax,rdelta);
5453 /*     Compute the element growth when shifting to both ends of the cluster */
5454 /*     accept the shift if there is no element growth at one of the two ends */
5455 /*     Left end */
5456     s = -lsigma;
5457     dplus[1] = d__[1] + s;
5458     if (abs(dplus[1]) < *pivmin) {
5459 	dplus[1] = -(*pivmin);
5460 /*        Need to set SAWNAN1 because refined RRR test should not be used */
5461 /*        in this case */
5462 	sawnan1 = true;
5463     }
5464     max1 = abs(dplus[1]);
5465     i__1 = *n - 1;
5466     for (i__ = 1; i__ <= i__1; ++i__) {
5467 	lplus[i__] = ld[i__] / dplus[i__];
5468 	s = s * lplus[i__] * l[i__] - lsigma;
5469 	dplus[i__ + 1] = d__[i__ + 1] + s;
5470 	if ((d__1 = dplus[i__ + 1], abs(d__1)) < *pivmin) {
5471 	    dplus[i__ + 1] = -(*pivmin);
5472 /*           Need to set SAWNAN1 because refined RRR test should not be used */
5473 /*           in this case */
5474 	    sawnan1 = true;
5475 	}
5476 /* Computing MAX */
5477 	d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], abs(d__1));
5478 	max1 = std::max(d__2,d__3);
5479 /* L6: */
5480     }
5481     sawnan1 = sawnan1 || disnan_(&max1);
5482     if (forcer || max1 <= growthbound && ! sawnan1) {
5483 	*sigma = lsigma;
5484 	shift = 1;
5485 	goto L100;
5486     }
5487 /*     Right end */
5488     s = -rsigma;
5489     work[1] = d__[1] + s;
5490     if (abs(work[1]) < *pivmin) {
5491 	work[1] = -(*pivmin);
5492 /*        Need to set SAWNAN2 because refined RRR test should not be used */
5493 /*        in this case */
5494 	sawnan2 = true;
5495     }
5496     max2 = abs(work[1]);
5497     i__1 = *n - 1;
5498     for (i__ = 1; i__ <= i__1; ++i__) {
5499 	work[*n + i__] = ld[i__] / work[i__];
5500 	s = s * work[*n + i__] * l[i__] - rsigma;
5501 	work[i__ + 1] = d__[i__ + 1] + s;
5502 	if ((d__1 = work[i__ + 1], abs(d__1)) < *pivmin) {
5503 	    work[i__ + 1] = -(*pivmin);
5504 /*           Need to set SAWNAN2 because refined RRR test should not be used */
5505 /*           in this case */
5506 	    sawnan2 = true;
5507 	}
5508 /* Computing MAX */
5509 	d__2 = max2, d__3 = (d__1 = work[i__ + 1], abs(d__1));
5510 	max2 = std::max(d__2,d__3);
5511 /* L7: */
5512     }
5513     sawnan2 = sawnan2 || disnan_(&max2);
5514     if (forcer || max2 <= growthbound && ! sawnan2) {
5515 	*sigma = rsigma;
5516 	shift = 2;
5517 	goto L100;
5518     }
5519 /*     If we are at this point, both shifts led to too much element growth */
5520 /*     Record the better of the two shifts (provided it didn't lead to NaN) */
5521     if (sawnan1 && sawnan2) {
5522 /*        both MAX1 and MAX2 are NaN */
5523 	goto L50;
5524     } else {
5525 	if (! sawnan1) {
5526 	    indx = 1;
5527 	    if (max1 <= smlgrowth) {
5528 		smlgrowth = max1;
5529 		bestshift = lsigma;
5530 	    }
5531 	}
5532 	if (! sawnan2) {
5533 	    if (sawnan1 || max2 <= max1) {
5534 		indx = 2;
5535 	    }
5536 	    if (max2 <= smlgrowth) {
5537 		smlgrowth = max2;
5538 		bestshift = rsigma;
5539 	    }
5540 	}
5541     }
5542 /*     If we are here, both the left and the right shift led to */
5543 /*     element growth. If the element growth is moderate, then */
5544 /*     we may still accept the representation, if it passes a */
5545 /*     refined test for RRR. This test supposes that no NaN occurred. */
5546 /*     Moreover, we use the refined RRR test only for isolated clusters. */
5547     if (clwdth < mingap / 128. && std::min(max1,max2) < fail2 && ! sawnan1 && !
5548 	    sawnan2) {
5549 	dorrr1 = true;
5550     } else {
5551 	dorrr1 = false;
5552     }
5553     tryrrr1 = true;
5554     if (tryrrr1 && dorrr1) {
5555 	if (indx == 1) {
5556 	    tmp = (d__1 = dplus[*n], abs(d__1));
5557 	    znm2 = 1.;
5558 	    prod = 1.;
5559 	    oldp = 1.;
5560 	    for (i__ = *n - 1; i__ >= 1; --i__) {
5561 		if (prod <= eps) {
5562 		    prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
5563 			     work[*n + i__]) * oldp;
5564 		} else {
5565 		    prod *= (d__1 = work[*n + i__], abs(d__1));
5566 		}
5567 		oldp = prod;
5568 /* Computing 2nd power */
5569 		d__1 = prod;
5570 		znm2 += d__1 * d__1;
5571 /* Computing MAX */
5572 		d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, abs(d__1));
5573 		tmp = std::max(d__2,d__3);
5574 /* L15: */
5575 	    }
5576 	    rrr1 = tmp / (*spdiam * sqrt(znm2));
5577 	    if (rrr1 <= 8.) {
5578 		*sigma = lsigma;
5579 		shift = 1;
5580 		goto L100;
5581 	    }
5582 	} else if (indx == 2) {
5583 	    tmp = (d__1 = work[*n], abs(d__1));
5584 	    znm2 = 1.;
5585 	    prod = 1.;
5586 	    oldp = 1.;
5587 	    for (i__ = *n - 1; i__ >= 1; --i__) {
5588 		if (prod <= eps) {
5589 		    prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] *
5590 			    lplus[i__]) * oldp;
5591 		} else {
5592 		    prod *= (d__1 = lplus[i__], abs(d__1));
5593 		}
5594 		oldp = prod;
5595 /* Computing 2nd power */
5596 		d__1 = prod;
5597 		znm2 += d__1 * d__1;
5598 /* Computing MAX */
5599 		d__2 = tmp, d__3 = (d__1 = work[i__] * prod, abs(d__1));
5600 		tmp = std::max(d__2,d__3);
5601 /* L16: */
5602 	    }
5603 	    rrr2 = tmp / (*spdiam * sqrt(znm2));
5604 	    if (rrr2 <= 8.) {
5605 		*sigma = rsigma;
5606 		shift = 2;
5607 		goto L100;
5608 	    }
5609 	}
5610     }
5611 L50:
5612     if (ktry < 1) {
5613 /*        If we are here, both shifts failed also the RRR test. */
5614 /*        Back off to the outside */
5615 /* Computing MAX */
5616 	d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
5617 	lsigma = std::max(d__1,d__2);
5618 /* Computing MIN */
5619 	d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
5620 	rsigma = std::min(d__1,d__2);
5621 	ldelta *= 2.;
5622 	rdelta *= 2.;
5623 	++ktry;
5624 	goto L5;
5625     } else {
5626 /*        None of the representations investigated satisfied our */
5627 /*        criteria. Take the best one we found. */
5628 	if (smlgrowth < fail || nofail) {
5629 	    lsigma = bestshift;
5630 	    rsigma = bestshift;
5631 	    forcer = true;
5632 	    goto L5;
5633 	} else {
5634 	    *info = 1;
5635 	    return 0;
5636 	}
5637     }
5638 L100:
5639     if (shift == 1) {
5640     } else if (shift == 2) {
5641 /*        store new L and D back into DPLUS, LPLUS */
5642 	dcopy_(n, &work[1], &c__1, &dplus[1], &c__1);
5643 	i__1 = *n - 1;
5644 	dcopy_(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
5645     }
5646     return 0;
5647 
5648 /*     End of DLARRF */
5649 
5650 } /* dlarrf_ */
5651 
dlarrj_(integer * n,double * d__,double * e2,integer * ifirst,integer * ilast,double * rtol,integer * offset,double * w,double * werr,double * work,integer * iwork,double * pivmin,double * spdiam,integer * info)5652 /* Subroutine */ int dlarrj_(integer *n, double *d__, double *e2,
5653 	integer *ifirst, integer *ilast, double *rtol, integer *offset,
5654 	double *w, double *werr, double *work, integer *iwork,
5655 	double *pivmin, double *spdiam, integer *info)
5656 {
5657     /* System generated locals */
5658     integer i__1, i__2;
5659     double d__1, d__2;
5660 
5661     /* Builtin functions
5662     double log(double); */
5663 
5664     /* Local variables */
5665     integer i__, j, k, p;
5666     double s;
5667     integer i1, i2, ii;
5668     double fac, mid;
5669     integer cnt;
5670     double tmp, left;
5671     integer iter, nint, prev, next, savi1;
5672     double right, width, dplus;
5673     integer olnint, maxitr;
5674 
5675 
5676 /*  -- LAPACK auxiliary routine (version 3.1) -- */
5677 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
5678 /*     November 2006 */
5679 
5680 /*     .. Scalar Arguments .. */
5681 /*     .. */
5682 /*     .. Array Arguments .. */
5683 /*     .. */
5684 
5685 /*  Purpose */
5686 /*  ======= */
5687 
5688 /*  Given the initial eigenvalue approximations of T, DLARRJ */
5689 /*  does  bisection to refine the eigenvalues of T, */
5690 /*  W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
5691 /*  guesses for these eigenvalues are input in W, the corresponding estimate */
5692 /*  of the error in these guesses in WERR. During bisection, intervals */
5693 /*  [left, right] are maintained by storing their mid-points and */
5694 /*  semi-widths in the arrays W and WERR respectively. */
5695 
5696 /*  Arguments */
5697 /*  ========= */
5698 
5699 /*  N       (input) INTEGER */
5700 /*          The order of the matrix. */
5701 
5702 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
5703 /*          The N diagonal elements of T. */
5704 
5705 /*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
5706 /*          The Squares of the (N-1) subdiagonal elements of T. */
5707 
5708 /*  IFIRST  (input) INTEGER */
5709 /*          The index of the first eigenvalue to be computed. */
5710 
5711 /*  ILAST   (input) INTEGER */
5712 /*          The index of the last eigenvalue to be computed. */
5713 
5714 /*  RTOL   (input) DOUBLE PRECISION */
5715 /*          Tolerance for the convergence of the bisection intervals. */
5716 /*          An interval [LEFT,RIGHT] has converged if */
5717 /*          RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */
5718 
5719 /*  OFFSET  (input) INTEGER */
5720 /*          Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */
5721 /*          through ILAST-OFFSET elements of these arrays are to be used. */
5722 
5723 /*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
5724 /*          On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
5725 /*          estimates of the eigenvalues of L D L^T indexed IFIRST through */
5726 /*          ILAST. */
5727 /*          On output, these estimates are refined. */
5728 
5729 /*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
5730 /*          On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
5731 /*          the errors in the estimates of the corresponding elements in W. */
5732 /*          On output, these errors are refined. */
5733 
5734 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
5735 /*          Workspace. */
5736 
5737 /*  IWORK   (workspace) INTEGER array, dimension (2*N) */
5738 /*          Workspace. */
5739 
5740 /*  PIVMIN  (input) DOUBLE PRECISION */
5741 /*          The minimum pivot in the Sturm sequence for T. */
5742 
5743 /*  SPDIAM  (input) DOUBLE PRECISION */
5744 /*          The spectral diameter of T. */
5745 
5746 /*  INFO    (output) INTEGER */
5747 /*          Error flag. */
5748 
5749 /*  Further Details */
5750 /*  =============== */
5751 
5752 /*  Based on contributions by */
5753 /*     Beresford Parlett, University of California, Berkeley, USA */
5754 /*     Jim Demmel, University of California, Berkeley, USA */
5755 /*     Inderjit Dhillon, University of Texas, Austin, USA */
5756 /*     Osni Marques, LBNL/NERSC, USA */
5757 /*     Christof Voemel, University of California, Berkeley, USA */
5758 
5759 /*  ===================================================================== */
5760 
5761 /*     .. Parameters .. */
5762 /*     .. */
5763 /*     .. Local Scalars .. */
5764 
5765 /*     .. */
5766 /*     .. Intrinsic Functions .. */
5767 /*     .. */
5768 /*     .. Executable Statements .. */
5769 
5770     /* Parameter adjustments */
5771     --iwork;
5772     --work;
5773     --werr;
5774     --w;
5775     --e2;
5776     --d__;
5777 
5778     /* Function Body */
5779     *info = 0;
5780 
5781     maxitr = (integer) ((log(*spdiam + *pivmin) - log(*pivmin)) / log(2.)) +
5782 	    2;
5783 
5784 /*     Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
5785 /*     The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
5786 /*     Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */
5787 /*     for an unconverged interval is set to the index of the next unconverged */
5788 /*     interval, and is -1 or 0 for a converged interval. Thus a linked */
5789 /*     list of unconverged intervals is set up. */
5790 
5791     i1 = *ifirst;
5792     i2 = *ilast;
5793 /*     The number of unconverged intervals */
5794     nint = 0;
5795 /*     The last unconverged interval found */
5796     prev = 0;
5797     i__1 = i2;
5798     for (i__ = i1; i__ <= i__1; ++i__) {
5799 	k = i__ << 1;
5800 	ii = i__ - *offset;
5801 	left = w[ii] - werr[ii];
5802 	mid = w[ii];
5803 	right = w[ii] + werr[ii];
5804 	width = right - mid;
5805 /* Computing MAX */
5806 	d__1 = abs(left), d__2 = abs(right);
5807 	tmp = std::max(d__1,d__2);
5808 /*        The following test prevents the test of converged intervals */
5809 	if (width < *rtol * tmp) {
5810 /*           This interval has already converged and does not need refinement. */
5811 /*           (Note that the gaps might change through refining the */
5812 /*            eigenvalues, however, they can only get bigger.) */
5813 /*           Remove it from the list. */
5814 	    iwork[k - 1] = -1;
5815 /*           Make sure that I1 always points to the first unconverged interval */
5816 	    if (i__ == i1 && i__ < i2) {
5817 		i1 = i__ + 1;
5818 	    }
5819 	    if (prev >= i1 && i__ <= i2) {
5820 		iwork[(prev << 1) - 1] = i__ + 1;
5821 	    }
5822 	} else {
5823 /*           unconverged interval found */
5824 	    prev = i__;
5825 /*           Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
5826 
5827 /*           Do while( CNT(LEFT).GT.I-1 ) */
5828 
5829 	    fac = 1.;
5830 L20:
5831 	    cnt = 0;
5832 	    s = left;
5833 	    dplus = d__[1] - s;
5834 	    if (dplus < 0.) {
5835 		++cnt;
5836 	    }
5837 	    i__2 = *n;
5838 	    for (j = 2; j <= i__2; ++j) {
5839 		dplus = d__[j] - s - e2[j - 1] / dplus;
5840 		if (dplus < 0.) {
5841 		    ++cnt;
5842 		}
5843 /* L30: */
5844 	    }
5845 	    if (cnt > i__ - 1) {
5846 		left -= werr[ii] * fac;
5847 		fac *= 2.;
5848 		goto L20;
5849 	    }
5850 
5851 /*           Do while( CNT(RIGHT).LT.I ) */
5852 
5853 	    fac = 1.;
5854 L50:
5855 	    cnt = 0;
5856 	    s = right;
5857 	    dplus = d__[1] - s;
5858 	    if (dplus < 0.) {
5859 		++cnt;
5860 	    }
5861 	    i__2 = *n;
5862 	    for (j = 2; j <= i__2; ++j) {
5863 		dplus = d__[j] - s - e2[j - 1] / dplus;
5864 		if (dplus < 0.) {
5865 		    ++cnt;
5866 		}
5867 /* L60: */
5868 	    }
5869 	    if (cnt < i__) {
5870 		right += werr[ii] * fac;
5871 		fac *= 2.;
5872 		goto L50;
5873 	    }
5874 	    ++nint;
5875 	    iwork[k - 1] = i__ + 1;
5876 	    iwork[k] = cnt;
5877 	}
5878 	work[k - 1] = left;
5879 	work[k] = right;
5880 /* L75: */
5881     }
5882     savi1 = i1;
5883 
5884 /*     Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
5885 /*     and while (ITER.LT.MAXITR) */
5886 
5887     iter = 0;
5888 L80:
5889     prev = i1 - 1;
5890     i__ = i1;
5891     olnint = nint;
5892     i__1 = olnint;
5893     for (p = 1; p <= i__1; ++p) {
5894 	k = i__ << 1;
5895 	ii = i__ - *offset;
5896 	next = iwork[k - 1];
5897 	left = work[k - 1];
5898 	right = work[k];
5899 	mid = (left + right) * .5;
5900 /*        semiwidth of interval */
5901 	width = right - mid;
5902 /* Computing MAX */
5903 	d__1 = abs(left), d__2 = abs(right);
5904 	tmp = std::max(d__1,d__2);
5905 	if (width < *rtol * tmp || iter == maxitr) {
5906 /*           reduce number of unconverged intervals */
5907 	    --nint;
5908 /*           Mark interval as converged. */
5909 	    iwork[k - 1] = 0;
5910 	    if (i1 == i__) {
5911 		i1 = next;
5912 	    } else {
5913 /*              Prev holds the last unconverged interval previously examined */
5914 		if (prev >= i1) {
5915 		    iwork[(prev << 1) - 1] = next;
5916 		}
5917 	    }
5918 	    i__ = next;
5919 	    goto L100;
5920 	}
5921 	prev = i__;
5922 
5923 /*        Perform one bisection step */
5924 
5925 	cnt = 0;
5926 	s = mid;
5927 	dplus = d__[1] - s;
5928 	if (dplus < 0.) {
5929 	    ++cnt;
5930 	}
5931 	i__2 = *n;
5932 	for (j = 2; j <= i__2; ++j) {
5933 	    dplus = d__[j] - s - e2[j - 1] / dplus;
5934 	    if (dplus < 0.) {
5935 		++cnt;
5936 	    }
5937 /* L90: */
5938 	}
5939 	if (cnt <= i__ - 1) {
5940 	    work[k - 1] = mid;
5941 	} else {
5942 	    work[k] = mid;
5943 	}
5944 	i__ = next;
5945 L100:
5946 	;
5947     }
5948     ++iter;
5949 /*     do another loop if there are still unconverged intervals */
5950 /*     However, in the last iteration, all intervals are accepted */
5951 /*     since this is the best we can do. */
5952     if (nint > 0 && iter <= maxitr) {
5953 	goto L80;
5954     }
5955 
5956 
5957 /*     At this point, all the intervals have converged */
5958     i__1 = *ilast;
5959     for (i__ = savi1; i__ <= i__1; ++i__) {
5960 	k = i__ << 1;
5961 	ii = i__ - *offset;
5962 /*        All intervals marked by '0' have been refined. */
5963 	if (iwork[k - 1] == 0) {
5964 	    w[ii] = (work[k - 1] + work[k]) * .5;
5965 	    werr[ii] = work[k] - w[ii];
5966 	}
5967 /* L110: */
5968     }
5969 
5970     return 0;
5971 
5972 /*     End of DLARRJ */
5973 
5974 } /* dlarrj_ */
5975 
dlarrk_(integer * n,integer * iw,double * gl,double * gu,double * d__,double * e2,double * pivmin,double * reltol,double * w,double * werr,integer * info)5976 /* Subroutine */ int dlarrk_(integer *n, integer *iw, double *gl,
5977 	double *gu, double *d__, double *e2, double *pivmin,
5978 	double *reltol, double *w, double *werr, integer *info)
5979 {
5980     /* System generated locals */
5981     integer i__1;
5982     double d__1, d__2;
5983 
5984     /* Builtin functions
5985     double log(double); */
5986 
5987     /* Local variables */
5988     integer i__, it;
5989     double mid, eps, tmp1, tmp2, left, atoli, right;
5990     integer itmax;
5991     double rtoli, tnorm;
5992 
5993     integer negcnt;
5994 
5995 
5996 /*  -- LAPACK auxiliary routine (version 3.1) -- */
5997 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
5998 /*     November 2006 */
5999 
6000 /*     .. Scalar Arguments .. */
6001 /*     .. */
6002 /*     .. Array Arguments .. */
6003 /*     .. */
6004 
6005 /*  Purpose */
6006 /*  ======= */
6007 
6008 /*  DLARRK computes one eigenvalue of a symmetric tridiagonal */
6009 /*  matrix T to suitable accuracy. This is an auxiliary code to be */
6010 /*  called from DSTEMR. */
6011 
6012 /*  To avoid overflow, the matrix must be scaled so that its */
6013 /*  largest element is no greater than overflow**(1/2) * */
6014 /*  underflow**(1/4) in absolute value, and for greatest */
6015 /*  accuracy, it should not be much smaller than that. */
6016 
6017 /*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal */
6018 /*  Matrix", Report CS41, Computer Science Dept., Stanford */
6019 /*  University, July 21, 1966. */
6020 
6021 /*  Arguments */
6022 /*  ========= */
6023 
6024 /*  N       (input) INTEGER */
6025 /*          The order of the tridiagonal matrix T.  N >= 0. */
6026 
6027 /*  IW      (input) INTEGER */
6028 /*          The index of the eigenvalues to be returned. */
6029 
6030 /*  GL      (input) DOUBLE PRECISION */
6031 /*  GU      (input) DOUBLE PRECISION */
6032 /*          An upper and a lower bound on the eigenvalue. */
6033 
6034 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
6035 /*          The n diagonal elements of the tridiagonal matrix T. */
6036 
6037 /*  E2      (input) DOUBLE PRECISION array, dimension (N-1) */
6038 /*          The (n-1) squared off-diagonal elements of the tridiagonal matrix T. */
6039 
6040 /*  PIVMIN  (input) DOUBLE PRECISION */
6041 /*          The minimum pivot allowed in the Sturm sequence for T. */
6042 
6043 /*  RELTOL  (input) DOUBLE PRECISION */
6044 /*          The minimum relative width of an interval.  When an interval */
6045 /*          is narrower than RELTOL times the larger (in */
6046 /*          magnitude) endpoint, then it is considered to be */
6047 /*          sufficiently small, i.e., converged.  Note: this should */
6048 /*          always be at least radix*machine epsilon. */
6049 
6050 /*  W       (output) DOUBLE PRECISION */
6051 
6052 /*  WERR    (output) DOUBLE PRECISION */
6053 /*          The error bound on the corresponding eigenvalue approximation */
6054 /*          in W. */
6055 
6056 /*  INFO    (output) INTEGER */
6057 /*          = 0:       Eigenvalue converged */
6058 /*          = -1:      Eigenvalue did NOT converge */
6059 
6060 /*  Internal Parameters */
6061 /*  =================== */
6062 
6063 /*  FUDGE   DOUBLE PRECISION, default = 2 */
6064 /*          A "fudge factor" to widen the Gershgorin intervals. */
6065 
6066 /*  ===================================================================== */
6067 
6068 /*     .. Parameters .. */
6069 /*     .. */
6070 /*     .. Local Scalars .. */
6071 /*     .. */
6072 /*     .. External Functions .. */
6073 /*     .. */
6074 /*     .. Intrinsic Functions .. */
6075 /*     .. */
6076 /*     .. Executable Statements .. */
6077 
6078 /*     Get machine constants */
6079     /* Parameter adjustments */
6080     --e2;
6081     --d__;
6082 
6083     /* Function Body */
6084     eps = dlamch_("P");
6085 /* Computing MAX */
6086     d__1 = abs(*gl), d__2 = abs(*gu);
6087     tnorm = std::max(d__1,d__2);
6088     rtoli = *reltol;
6089     atoli = *pivmin * 4.;
6090     itmax = (integer) ((log(tnorm + *pivmin) - log(*pivmin)) / log(2.)) + 2;
6091     *info = -1;
6092     left = *gl - tnorm * 2. * eps * *n - *pivmin * 4.;
6093     right = *gu + tnorm * 2. * eps * *n + *pivmin * 4.;
6094     it = 0;
6095 L10:
6096 
6097 /*     Check if interval converged or maximum number of iterations reached */
6098 
6099     tmp1 = (d__1 = right - left, abs(d__1));
6100 /* Computing MAX */
6101     d__1 = abs(right), d__2 = abs(left);
6102     tmp2 = std::max(d__1,d__2);
6103 /* Computing MAX */
6104     d__1 = std::max(atoli,*pivmin), d__2 = rtoli * tmp2;
6105     if (tmp1 < std::max(d__1,d__2)) {
6106 	*info = 0;
6107 	goto L30;
6108     }
6109     if (it > itmax) {
6110 	goto L30;
6111     }
6112 
6113 /*     Count number of negative pivots for mid-point */
6114 
6115     ++it;
6116     mid = (left + right) * .5;
6117     negcnt = 0;
6118     tmp1 = d__[1] - mid;
6119     if (abs(tmp1) < *pivmin) {
6120 	tmp1 = -(*pivmin);
6121     }
6122     if (tmp1 <= 0.) {
6123 	++negcnt;
6124     }
6125 
6126     i__1 = *n;
6127     for (i__ = 2; i__ <= i__1; ++i__) {
6128 	tmp1 = d__[i__] - e2[i__ - 1] / tmp1 - mid;
6129 	if (abs(tmp1) < *pivmin) {
6130 	    tmp1 = -(*pivmin);
6131 	}
6132 	if (tmp1 <= 0.) {
6133 	    ++negcnt;
6134 	}
6135 /* L20: */
6136     }
6137     if (negcnt >= *iw) {
6138 	right = mid;
6139     } else {
6140 	left = mid;
6141     }
6142     goto L10;
6143 L30:
6144 
6145 /*     Converged or maximum number of iterations reached */
6146 
6147     *w = (left + right) * .5;
6148     *werr = (d__1 = right - left, abs(d__1)) * .5;
6149     return 0;
6150 
6151 /*     End of DLARRK */
6152 
6153 } /* dlarrk_ */
6154 
dlarrr_(integer * n,double * d__,double * e,integer * info)6155 /* Subroutine */ int dlarrr_(integer *n, double *d__, double *e,
6156 	integer *info)
6157 {
6158     /* System generated locals */
6159     integer i__1;
6160     double d__1;
6161 
6162     /* Local variables */
6163     integer i__;
6164     double eps, tmp, tmp2, rmin;
6165 
6166     double offdig, safmin;
6167     bool yesrel;
6168     double smlnum, offdig2;
6169 
6170 
6171 /*  -- LAPACK auxiliary routine (version 3.1) -- */
6172 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
6173 /*     November 2006 */
6174 
6175 /*     .. Scalar Arguments .. */
6176 /*     .. */
6177 /*     .. Array Arguments .. */
6178 /*     .. */
6179 
6180 
6181 /*  Purpose */
6182 /*  ======= */
6183 
6184 /*  Perform tests to decide whether the symmetric tridiagonal matrix T */
6185 /*  warrants expensive computations which guarantee high relative accuracy */
6186 /*  in the eigenvalues. */
6187 
6188 /*  Arguments */
6189 /*  ========= */
6190 
6191 /*  N       (input) INTEGER */
6192 /*          The order of the matrix. N > 0. */
6193 
6194 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
6195 /*          The N diagonal elements of the tridiagonal matrix T. */
6196 
6197 /*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
6198 /*          On entry, the first (N-1) entries contain the subdiagonal */
6199 /*          elements of the tridiagonal matrix T; E(N) is set to ZERO. */
6200 
6201 /*  INFO    (output) INTEGER */
6202 /*          INFO = 0(default) : the matrix warrants computations preserving */
6203 /*                              relative accuracy. */
6204 /*          INFO = 1          : the matrix warrants computations guaranteeing */
6205 /*                              only absolute accuracy. */
6206 
6207 /*  Further Details */
6208 /*  =============== */
6209 
6210 /*  Based on contributions by */
6211 /*     Beresford Parlett, University of California, Berkeley, USA */
6212 /*     Jim Demmel, University of California, Berkeley, USA */
6213 /*     Inderjit Dhillon, University of Texas, Austin, USA */
6214 /*     Osni Marques, LBNL/NERSC, USA */
6215 /*     Christof Voemel, University of California, Berkeley, USA */
6216 
6217 /*  ===================================================================== */
6218 
6219 /*     .. Parameters .. */
6220 /*     .. */
6221 /*     .. Local Scalars .. */
6222 /*     .. */
6223 /*     .. External Functions .. */
6224 /*     .. */
6225 /*     .. Intrinsic Functions .. */
6226 /*     .. */
6227 /*     .. Executable Statements .. */
6228 
6229 /*     As a default, do NOT go for relative-accuracy preserving computations. */
6230     /* Parameter adjustments */
6231     --e;
6232     --d__;
6233 
6234     /* Function Body */
6235     *info = 1;
6236     safmin = dlamch_("Safe minimum");
6237     eps = dlamch_("Precision");
6238     smlnum = safmin / eps;
6239     rmin = sqrt(smlnum);
6240 /*     Tests for relative accuracy */
6241 
6242 /*     Test for scaled diagonal dominance */
6243 /*     Scale the diagonal entries to one and check whether the sum of the */
6244 /*     off-diagonals is less than one */
6245 
6246 /*     The sdd relative error bounds have a 1/(1- 2*x) factor in them, */
6247 /*     x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative */
6248 /*     accuracy is promised.  In the notation of the code fragment below, */
6249 /*     1/(1 - (OFFDIG + OFFDIG2)) is the condition number. */
6250 /*     We don't think it is worth going into "sdd mode" unless the relative */
6251 /*     condition number is reasonable, not 1/macheps. */
6252 /*     The threshold should be compatible with other thresholds used in the */
6253 /*     code. We set  OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds */
6254 /*     to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 */
6255 /*     instead of the current OFFDIG + OFFDIG2 < 1 */
6256 
6257     yesrel = true;
6258     offdig = 0.;
6259     tmp = sqrt((abs(d__[1])));
6260     if (tmp < rmin) {
6261 	yesrel = false;
6262     }
6263     if (! yesrel) {
6264 	goto L11;
6265     }
6266     i__1 = *n;
6267     for (i__ = 2; i__ <= i__1; ++i__) {
6268 	tmp2 = sqrt((d__1 = d__[i__], abs(d__1)));
6269 	if (tmp2 < rmin) {
6270 	    yesrel = false;
6271 	}
6272 	if (! yesrel) {
6273 	    goto L11;
6274 	}
6275 	offdig2 = (d__1 = e[i__ - 1], abs(d__1)) / (tmp * tmp2);
6276 	if (offdig + offdig2 >= .999) {
6277 	    yesrel = false;
6278 	}
6279 	if (! yesrel) {
6280 	    goto L11;
6281 	}
6282 	tmp = tmp2;
6283 	offdig = offdig2;
6284 /* L10: */
6285     }
6286 L11:
6287     if (yesrel) {
6288 	*info = 0;
6289 	return 0;
6290     } else {
6291     }
6292 
6293 
6294 /*     *** MORE TO BE IMPLEMENTED *** */
6295 
6296 
6297 /*     Test if the lower bidiagonal matrix L from T = L D L^T */
6298 /*     (zero shift facto) is well conditioned */
6299 
6300 
6301 /*     Test if the upper bidiagonal matrix U from T = U D U^T */
6302 /*     (zero shift facto) is well conditioned. */
6303 /*     In this case, the matrix needs to be flipped and, at the end */
6304 /*     of the eigenvector computation, the flip needs to be applied */
6305 /*     to the computed eigenvectors (and the support) */
6306 
6307 
6308     return 0;
6309 
6310 /*     END OF DLARRR */
6311 
6312 } /* dlarrr_ */
6313 
dlarrv_(integer * n,double * vl,double * vu,double * d__,double * l,double * pivmin,integer * isplit,integer * m,integer * dol,integer * dou,double * minrgp,double * rtol1,double * rtol2,double * w,double * werr,double * wgap,integer * iblock,integer * indexw,double * gers,double * z__,integer * ldz,integer * isuppz,double * work,integer * iwork,integer * info)6314 /* Subroutine */ int dlarrv_(integer *n, double *vl, double *vu,
6315 	double *d__, double *l, double *pivmin, integer *isplit,
6316 	integer *m, integer *dol, integer *dou, double *minrgp,
6317 	double *rtol1, double *rtol2, double *w, double *werr,
6318 	double *wgap, integer *iblock, integer *indexw, double *gers,
6319 	double *z__, integer *ldz, integer *isuppz, double *work,
6320 	integer *iwork, integer *info)
6321 {
6322 	/* Table of constant values */
6323 	static double c_b5 = 0.;
6324 	static integer c__1 = 1;
6325 	static integer c__2 = 2;
6326 
6327     /* System generated locals */
6328     integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
6329     double d__1, d__2;
6330     bool L__1;
6331 
6332     /* Builtin functions
6333     double log(double); */
6334 
6335     /* Local variables */
6336     integer minwsize, i__, j, k, p, q, miniwsize, ii;
6337     double gl;
6338     integer im, in;
6339     double gu, gap, eps, tau, tol, tmp;
6340     integer zto;
6341     double ztz;
6342     integer iend, jblk;
6343     double lgap;
6344     integer done;
6345     double rgap, left;
6346     integer wend, iter;
6347     double bstw;
6348     integer itmp1;
6349     integer indld;
6350     double fudge;
6351     integer idone;
6352     double sigma;
6353     integer iinfo, iindr;
6354     double resid;
6355     bool eskip;
6356     double right;
6357     integer nclus, zfrom;
6358     double rqtol;
6359     integer iindc1, iindc2;
6360     bool stp2ii;
6361     double lambda;
6362     integer ibegin, indeig;
6363     bool needbs;
6364     integer indlld;
6365     double sgndef, mingma;
6366     integer oldien, oldncl, wbegin;
6367     double spdiam;
6368     integer negcnt;
6369     integer oldcls;
6370     double savgap;
6371     integer ndepth;
6372     double ssigma;
6373     bool usedbs;
6374     integer iindwk, offset;
6375     double gaptol;
6376     integer newcls, oldfst, indwrk, windex, oldlst;
6377     bool usedrq;
6378     integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
6379     double bstres;
6380     integer newsiz, zusedu, zusedw;
6381     double nrminv, rqcorr;
6382     bool tryrqc;
6383     integer isupmx;
6384 
6385 
6386 /*  -- LAPACK auxiliary routine (version 3.1.1) -- */
6387 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
6388 /*     November 2006 */
6389 
6390 /*     .. Scalar Arguments .. */
6391 /*     .. */
6392 /*     .. Array Arguments .. */
6393 /*     .. */
6394 
6395 /*  Purpose */
6396 /*  ======= */
6397 
6398 /*  DLARRV computes the eigenvectors of the tridiagonal matrix */
6399 /*  T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
6400 /*  The input eigenvalues should have been computed by DLARRE. */
6401 
6402 /*  Arguments */
6403 /*  ========= */
6404 
6405 /*  N       (input) INTEGER */
6406 /*          The order of the matrix.  N >= 0. */
6407 
6408 /*  VL      (input) DOUBLE PRECISION */
6409 /*  VU      (input) DOUBLE PRECISION */
6410 /*          Lower and upper bounds of the interval that contains the desired */
6411 /*          eigenvalues. VL < VU. Needed to compute gaps on the left or right */
6412 /*          end of the extremal eigenvalues in the desired RANGE. */
6413 
6414 /*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
6415 /*          On entry, the N diagonal elements of the diagonal matrix D. */
6416 /*          On exit, D may be overwritten. */
6417 
6418 /*  L       (input/output) DOUBLE PRECISION array, dimension (N) */
6419 /*          On entry, the (N-1) subdiagonal elements of the unit */
6420 /*          bidiagonal matrix L are in elements 1 to N-1 of L */
6421 /*          (if the matrix is not splitted.) At the end of each block */
6422 /*          is stored the corresponding shift as given by DLARRE. */
6423 /*          On exit, L is overwritten. */
6424 
6425 /*  PIVMIN  (in) DOUBLE PRECISION */
6426 /*          The minimum pivot allowed in the Sturm sequence. */
6427 
6428 /*  ISPLIT  (input) INTEGER array, dimension (N) */
6429 /*          The splitting points, at which T breaks up into blocks. */
6430 /*          The first block consists of rows/columns 1 to */
6431 /*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
6432 /*          through ISPLIT( 2 ), etc. */
6433 
6434 /*  M       (input) INTEGER */
6435 /*          The total number of input eigenvalues.  0 <= M <= N. */
6436 
6437 /*  DOL     (input) INTEGER */
6438 /*  DOU     (input) INTEGER */
6439 /*          If the user wants to compute only selected eigenvectors from all */
6440 /*          the eigenvalues supplied, he can specify an index range DOL:DOU. */
6441 /*          Or else the setting DOL=1, DOU=M should be applied. */
6442 /*          Note that DOL and DOU refer to the order in which the eigenvalues */
6443 /*          are stored in W. */
6444 /*          If the user wants to compute only selected eigenpairs, then */
6445 /*          the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
6446 /*          computed eigenvectors. All other columns of Z are set to zero. */
6447 
6448 /*  MINRGP  (input) DOUBLE PRECISION */
6449 
6450 /*  RTOL1   (input) DOUBLE PRECISION */
6451 /*  RTOL2   (input) DOUBLE PRECISION */
6452 /*           Parameters for bisection. */
6453 /*           An interval [LEFT,RIGHT] has converged if */
6454 /*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
6455 
6456 /*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
6457 /*          The first M elements of W contain the APPROXIMATE eigenvalues for */
6458 /*          which eigenvectors are to be computed.  The eigenvalues */
6459 /*          should be grouped by split-off block and ordered from */
6460 /*          smallest to largest within the block ( The output array */
6461 /*          W from DLARRE is expected here ). Furthermore, they are with */
6462 /*          respect to the shift of the corresponding root representation */
6463 /*          for their block. On exit, W holds the eigenvalues of the */
6464 /*          UNshifted matrix. */
6465 
6466 /*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
6467 /*          The first M elements contain the semiwidth of the uncertainty */
6468 /*          interval of the corresponding eigenvalue in W */
6469 
6470 /*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N) */
6471 /*          The separation from the right neighbor eigenvalue in W. */
6472 
6473 /*  IBLOCK  (input) INTEGER array, dimension (N) */
6474 /*          The indices of the blocks (submatrices) associated with the */
6475 /*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
6476 /*          W(i) belongs to the first block from the top, =2 if W(i) */
6477 /*          belongs to the second block, etc. */
6478 
6479 /*  INDEXW  (input) INTEGER array, dimension (N) */
6480 /*          The indices of the eigenvalues within each block (submatrix); */
6481 /*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
6482 /*          i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
6483 
6484 /*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) */
6485 /*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
6486 /*          is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
6487 /*          be computed from the original UNshifted matrix. */
6488 
6489 /*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
6490 /*          If INFO = 0, the first M columns of Z contain the */
6491 /*          orthonormal eigenvectors of the matrix T */
6492 /*          corresponding to the input eigenvalues, with the i-th */
6493 /*          column of Z holding the eigenvector associated with W(i). */
6494 /*          Note: the user must ensure that at least max(1,M) columns are */
6495 /*          supplied in the array Z. */
6496 
6497 /*  LDZ     (input) INTEGER */
6498 /*          The leading dimension of the array Z.  LDZ >= 1, and if */
6499 /*          JOBZ = 'V', LDZ >= max(1,N). */
6500 
6501 /*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) ) */
6502 /*          The support of the eigenvectors in Z, i.e., the indices */
6503 /*          indicating the nonzero elements in Z. The I-th eigenvector */
6504 /*          is nonzero only in elements ISUPPZ( 2*I-1 ) through */
6505 /*          ISUPPZ( 2*I ). */
6506 
6507 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (12*N) */
6508 
6509 /*  IWORK   (workspace) INTEGER array, dimension (7*N) */
6510 
6511 /*  INFO    (output) INTEGER */
6512 /*          = 0:  successful exit */
6513 
6514 /*          > 0:  A problem occured in DLARRV. */
6515 /*          < 0:  One of the called subroutines signaled an internal problem. */
6516 /*                Needs inspection of the corresponding parameter IINFO */
6517 /*                for further information. */
6518 
6519 /*          =-1:  Problem in DLARRB when refining a child's eigenvalues. */
6520 /*          =-2:  Problem in DLARRF when computing the RRR of a child. */
6521 /*                When a child is inside a tight cluster, it can be difficult */
6522 /*                to find an RRR. A partial remedy from the user's point of */
6523 /*                view is to make the parameter MINRGP smaller and recompile. */
6524 /*                However, as the orthogonality of the computed vectors is */
6525 /*                proportional to 1/MINRGP, the user should be aware that */
6526 /*                he might be trading in precision when he decreases MINRGP. */
6527 /*          =-3:  Problem in DLARRB when refining a single eigenvalue */
6528 /*                after the Rayleigh correction was rejected. */
6529 /*          = 5:  The Rayleigh Quotient Iteration failed to converge to */
6530 /*                full accuracy in MAXITR steps. */
6531 
6532 /*  Further Details */
6533 /*  =============== */
6534 
6535 /*  Based on contributions by */
6536 /*     Beresford Parlett, University of California, Berkeley, USA */
6537 /*     Jim Demmel, University of California, Berkeley, USA */
6538 /*     Inderjit Dhillon, University of Texas, Austin, USA */
6539 /*     Osni Marques, LBNL/NERSC, USA */
6540 /*     Christof Voemel, University of California, Berkeley, USA */
6541 
6542 /*  ===================================================================== */
6543 
6544 /*     .. Parameters .. */
6545 /*     .. */
6546 /*     .. Local Scalars .. */
6547 /*     .. */
6548 /*     .. External Functions .. */
6549 /*     .. */
6550 /*     .. External Subroutines .. */
6551 /*     .. */
6552 /*     .. Intrinsic Functions .. */
6553 /*     .. */
6554 /*     .. Executable Statements .. */
6555 /*     .. */
6556 /*     The first N entries of WORK are reserved for the eigenvalues */
6557     /* Parameter adjustments */
6558     --d__;
6559     --l;
6560     --isplit;
6561     --w;
6562     --werr;
6563     --wgap;
6564     --iblock;
6565     --indexw;
6566     --gers;
6567     z_dim1 = *ldz;
6568     z_offset = 1 + z_dim1;
6569     z__ -= z_offset;
6570     --isuppz;
6571     --work;
6572     --iwork;
6573 
6574     /* Function Body */
6575     indld = *n + 1;
6576     indlld = (*n << 1) + 1;
6577     indwrk = *n * 3 + 1;
6578     minwsize = *n * 12;
6579     i__1 = minwsize;
6580     for (i__ = 1; i__ <= i__1; ++i__) {
6581 	work[i__] = 0.;
6582 /* L5: */
6583     }
6584 /*     IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
6585 /*     factorization used to compute the FP vector */
6586     iindr = 0;
6587 /*     IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
6588 /*     layer and the one above. */
6589     iindc1 = *n;
6590     iindc2 = *n << 1;
6591     iindwk = *n * 3 + 1;
6592     miniwsize = *n * 7;
6593     i__1 = miniwsize;
6594     for (i__ = 1; i__ <= i__1; ++i__) {
6595 	iwork[i__] = 0;
6596 /* L10: */
6597     }
6598     zusedl = 1;
6599     if (*dol > 1) {
6600 /*        Set lower bound for use of Z */
6601 	zusedl = *dol - 1;
6602     }
6603     zusedu = *m;
6604     if (*dou < *m) {
6605 /*        Set lower bound for use of Z */
6606 	zusedu = *dou + 1;
6607     }
6608 /*     The width of the part of Z that is used */
6609     zusedw = zusedu - zusedl + 1;
6610     dlaset_("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
6611     eps = dlamch_("Precision");
6612     rqtol = eps * 2.;
6613 
6614 /*     Set expert flags for standard code. */
6615     tryrqc = true;
6616     if (*dol == 1 && *dou == *m) {
6617     } else {
6618 /*        Only selected eigenpairs are computed. Since the other evalues */
6619 /*        are not refined by RQ iteration, bisection has to compute to full */
6620 /*        accuracy. */
6621 	*rtol1 = eps * 4.;
6622 	*rtol2 = eps * 4.;
6623     }
6624 /*     The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
6625 /*     desired eigenvalues. The support of the nonzero eigenvector */
6626 /*     entries is contained in the interval IBEGIN:IEND. */
6627 /*     Remark that if k eigenpairs are desired, then the eigenvectors */
6628 /*     are stored in k contiguous columns of Z. */
6629 /*     DONE is the number of eigenvectors already computed */
6630     done = 0;
6631     ibegin = 1;
6632     wbegin = 1;
6633     i__1 = iblock[*m];
6634     for (jblk = 1; jblk <= i__1; ++jblk) {
6635 	iend = isplit[jblk];
6636 	sigma = l[iend];
6637 /*        Find the eigenvectors of the submatrix indexed IBEGIN */
6638 /*        through IEND. */
6639 	wend = wbegin - 1;
6640 L15:
6641 	if (wend < *m) {
6642 	    if (iblock[wend + 1] == jblk) {
6643 		++wend;
6644 		goto L15;
6645 	    }
6646 	}
6647 	if (wend < wbegin) {
6648 	    ibegin = iend + 1;
6649 	    goto L170;
6650 	} else if (wend < *dol || wbegin > *dou) {
6651 	    ibegin = iend + 1;
6652 	    wbegin = wend + 1;
6653 	    goto L170;
6654 	}
6655 /*        Find local spectral diameter of the block */
6656 	gl = gers[(ibegin << 1) - 1];
6657 	gu = gers[ibegin * 2];
6658 	i__2 = iend;
6659 	for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
6660 /* Computing MIN */
6661 	    d__1 = gers[(i__ << 1) - 1];
6662 	    gl = std::min(d__1,gl);
6663 /* Computing MAX */
6664 	    d__1 = gers[i__ * 2];
6665 	    gu = std::max(d__1,gu);
6666 /* L20: */
6667 	}
6668 	spdiam = gu - gl;
6669 /*        OLDIEN is the last index of the previous block */
6670 	oldien = ibegin - 1;
6671 /*        Calculate the size of the current block */
6672 	in = iend - ibegin + 1;
6673 /*        The number of eigenvalues in the current block */
6674 	im = wend - wbegin + 1;
6675 /*        This is for a 1x1 block */
6676 	if (ibegin == iend) {
6677 	    ++done;
6678 	    z__[ibegin + wbegin * z_dim1] = 1.;
6679 	    isuppz[(wbegin << 1) - 1] = ibegin;
6680 	    isuppz[wbegin * 2] = ibegin;
6681 	    w[wbegin] += sigma;
6682 	    work[wbegin] = w[wbegin];
6683 	    ibegin = iend + 1;
6684 	    ++wbegin;
6685 	    goto L170;
6686 	}
6687 /*        The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
6688 /*        Note that these can be approximations, in this case, the corresp. */
6689 /*        entries of WERR give the size of the uncertainty interval. */
6690 /*        The eigenvalue approximations will be refined when necessary as */
6691 /*        high relative accuracy is required for the computation of the */
6692 /*        corresponding eigenvectors. */
6693 	dcopy_(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
6694 /*        We store in W the eigenvalue approximations w.r.t. the original */
6695 /*        matrix T. */
6696 	i__2 = im;
6697 	for (i__ = 1; i__ <= i__2; ++i__) {
6698 	    w[wbegin + i__ - 1] += sigma;
6699 /* L30: */
6700 	}
6701 /*        NDEPTH is the current depth of the representation tree */
6702 	ndepth = 0;
6703 /*        PARITY is either 1 or 0 */
6704 	parity = 1;
6705 /*        NCLUS is the number of clusters for the next level of the */
6706 /*        representation tree, we start with NCLUS = 1 for the root */
6707 	nclus = 1;
6708 	iwork[iindc1 + 1] = 1;
6709 	iwork[iindc1 + 2] = im;
6710 /*        IDONE is the number of eigenvectors already computed in the current */
6711 /*        block */
6712 	idone = 0;
6713 /*        loop while( IDONE.LT.IM ) */
6714 /*        generate the representation tree for the current block and */
6715 /*        compute the eigenvectors */
6716 L40:
6717 	if (idone < im) {
6718 /*           This is a crude protection against infinitely deep trees */
6719 	    if (ndepth > *m) {
6720 		*info = -2;
6721 		return 0;
6722 	    }
6723 /*           breadth first processing of the current level of the representation */
6724 /*           tree: OLDNCL = number of clusters on current level */
6725 	    oldncl = nclus;
6726 /*           reset NCLUS to count the number of child clusters */
6727 	    nclus = 0;
6728 
6729 	    parity = 1 - parity;
6730 	    if (parity == 0) {
6731 		oldcls = iindc1;
6732 		newcls = iindc2;
6733 	    } else {
6734 		oldcls = iindc2;
6735 		newcls = iindc1;
6736 	    }
6737 /*           Process the clusters on the current level */
6738 	    i__2 = oldncl;
6739 	    for (i__ = 1; i__ <= i__2; ++i__) {
6740 		j = oldcls + (i__ << 1);
6741 /*              OLDFST, OLDLST = first, last index of current cluster. */
6742 /*                               cluster indices start with 1 and are relative */
6743 /*                               to WBEGIN when accessing W, WGAP, WERR, Z */
6744 		oldfst = iwork[j - 1];
6745 		oldlst = iwork[j];
6746 		if (ndepth > 0) {
6747 /*                 Retrieve relatively robust representation (RRR) of cluster */
6748 /*                 that has been computed at the previous level */
6749 /*                 The RRR is stored in Z and overwritten once the eigenvectors */
6750 /*                 have been computed or when the cluster is refined */
6751 		    if (*dol == 1 && *dou == *m) {
6752 /*                    Get representation from location of the leftmost evalue */
6753 /*                    of the cluster */
6754 			j = wbegin + oldfst - 1;
6755 		    } else {
6756 			if (wbegin + oldfst - 1 < *dol) {
6757 /*                       Get representation from the left end of Z array */
6758 			    j = *dol - 1;
6759 			} else if (wbegin + oldfst - 1 > *dou) {
6760 /*                       Get representation from the right end of Z array */
6761 			    j = *dou;
6762 			} else {
6763 			    j = wbegin + oldfst - 1;
6764 			}
6765 		    }
6766 		    dcopy_(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
6767 , &c__1);
6768 		    i__3 = in - 1;
6769 		    dcopy_(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
6770 			    ibegin], &c__1);
6771 		    sigma = z__[iend + (j + 1) * z_dim1];
6772 /*                 Set the corresponding entries in Z to zero */
6773 		    dlaset_("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j
6774 			    * z_dim1], ldz);
6775 		}
6776 /*              Compute DL and DLL of current RRR */
6777 		i__3 = iend - 1;
6778 		for (j = ibegin; j <= i__3; ++j) {
6779 		    tmp = d__[j] * l[j];
6780 		    work[indld - 1 + j] = tmp;
6781 		    work[indlld - 1 + j] = tmp * l[j];
6782 /* L50: */
6783 		}
6784 		if (ndepth > 0) {
6785 /*                 P and Q are index of the first and last eigenvalue to compute */
6786 /*                 within the current block */
6787 		    p = indexw[wbegin - 1 + oldfst];
6788 		    q = indexw[wbegin - 1 + oldlst];
6789 /*                 Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
6790 /*                 thru' Q-OFFSET elements of these arrays are to be used. */
6791 /*                  OFFSET = P-OLDFST */
6792 		    offset = indexw[wbegin] - 1;
6793 /*                 perform limited bisection (if necessary) to get approximate */
6794 /*                 eigenvalues to the precision needed. */
6795 		    dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p,
6796 			     &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
6797 			    wbegin], &werr[wbegin], &work[indwrk], &iwork[
6798 			    iindwk], pivmin, &spdiam, &in, &iinfo);
6799 		    if (iinfo != 0) {
6800 			*info = -1;
6801 			return 0;
6802 		    }
6803 /*                 We also recompute the extremal gaps. W holds all eigenvalues */
6804 /*                 of the unshifted matrix and must be used for computation */
6805 /*                 of WGAP, the entries of WORK might stem from RRRs with */
6806 /*                 different shifts. The gaps from WBEGIN-1+OLDFST to */
6807 /*                 WBEGIN-1+OLDLST are correctly computed in DLARRB. */
6808 /*                 However, we only allow the gaps to become greater since */
6809 /*                 this is what should happen when we decrease WERR */
6810 		    if (oldfst > 1) {
6811 /* Computing MAX */
6812 			d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin +
6813 				oldfst - 1] - werr[wbegin + oldfst - 1] - w[
6814 				wbegin + oldfst - 2] - werr[wbegin + oldfst -
6815 				2];
6816 			wgap[wbegin + oldfst - 2] = std::max(d__1,d__2);
6817 		    }
6818 		    if (wbegin + oldlst - 1 < wend) {
6819 /* Computing MAX */
6820 			d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin +
6821 				oldlst] - werr[wbegin + oldlst] - w[wbegin +
6822 				oldlst - 1] - werr[wbegin + oldlst - 1];
6823 			wgap[wbegin + oldlst - 1] = std::max(d__1,d__2);
6824 		    }
6825 /*                 Each time the eigenvalues in WORK get refined, we store */
6826 /*                 the newly found approximation with all shifts applied in W */
6827 		    i__3 = oldlst;
6828 		    for (j = oldfst; j <= i__3; ++j) {
6829 			w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
6830 /* L53: */
6831 		    }
6832 		}
6833 /*              Process the current node. */
6834 		newfst = oldfst;
6835 		i__3 = oldlst;
6836 		for (j = oldfst; j <= i__3; ++j) {
6837 		    if (j == oldlst) {
6838 /*                    we are at the right end of the cluster, this is also the */
6839 /*                    boundary of the child cluster */
6840 			newlst = j;
6841 		    } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
6842 			    wbegin + j - 1], abs(d__1))) {
6843 /*                    the right relative gap is big enough, the child cluster */
6844 /*                    (NEWFST,..,NEWLST) is well separated from the following */
6845 			newlst = j;
6846 		    } else {
6847 /*                    inside a child cluster, the relative gap is not */
6848 /*                    big enough. */
6849 			goto L140;
6850 		    }
6851 /*                 Compute size of child cluster found */
6852 		    newsiz = newlst - newfst + 1;
6853 /*                 NEWFTT is the place in Z where the new RRR or the computed */
6854 /*                 eigenvector is to be stored */
6855 		    if (*dol == 1 && *dou == *m) {
6856 /*                    Store representation at location of the leftmost evalue */
6857 /*                    of the cluster */
6858 			newftt = wbegin + newfst - 1;
6859 		    } else {
6860 			if (wbegin + newfst - 1 < *dol) {
6861 /*                       Store representation at the left end of Z array */
6862 			    newftt = *dol - 1;
6863 			} else if (wbegin + newfst - 1 > *dou) {
6864 /*                       Store representation at the right end of Z array */
6865 			    newftt = *dou;
6866 			} else {
6867 			    newftt = wbegin + newfst - 1;
6868 			}
6869 		    }
6870 		    if (newsiz > 1) {
6871 
6872 /*                    Current child is not a singleton but a cluster. */
6873 /*                    Compute and store new representation of child. */
6874 
6875 
6876 /*                    Compute left and right cluster gap. */
6877 
6878 /*                    LGAP and RGAP are not computed from WORK because */
6879 /*                    the eigenvalue approximations may stem from RRRs */
6880 /*                    different shifts. However, W hold all eigenvalues */
6881 /*                    of the unshifted matrix. Still, the entries in WGAP */
6882 /*                    have to be computed from WORK since the entries */
6883 /*                    in W might be of the same order so that gaps are not */
6884 /*                    exhibited correctly for very close eigenvalues. */
6885 			if (newfst == 1) {
6886 /* Computing MAX */
6887 			    d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
6888 			    lgap = std::max(d__1,d__2);
6889 			} else {
6890 			    lgap = wgap[wbegin + newfst - 2];
6891 			}
6892 			rgap = wgap[wbegin + newlst - 1];
6893 
6894 /*                    Compute left- and rightmost eigenvalue of child */
6895 /*                    to high precision in order to shift as close */
6896 /*                    as possible and obtain as large relative gaps */
6897 /*                    as possible */
6898 
6899 			for (k = 1; k <= 2; ++k) {
6900 			    if (k == 1) {
6901 				p = indexw[wbegin - 1 + newfst];
6902 			    } else {
6903 				p = indexw[wbegin - 1 + newlst];
6904 			    }
6905 			    offset = indexw[wbegin] - 1;
6906 			    dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin
6907 				    - 1], &p, &p, &rqtol, &rqtol, &offset, &
6908 				    work[wbegin], &wgap[wbegin], &werr[wbegin]
6909 , &work[indwrk], &iwork[iindwk], pivmin, &
6910 				    spdiam, &in, &iinfo);
6911 /* L55: */
6912 			}
6913 
6914 			if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1
6915 				> *dou) {
6916 /*                       if the cluster contains no desired eigenvalues */
6917 /*                       skip the computation of that branch of the rep. tree */
6918 
6919 /*                       We could skip before the refinement of the extremal */
6920 /*                       eigenvalues of the child, but then the representation */
6921 /*                       tree could be different from the one when nothing is */
6922 /*                       skipped. For this reason we skip at this place. */
6923 			    idone = idone + newlst - newfst + 1;
6924 			    goto L139;
6925 			}
6926 
6927 /*                    Compute RRR of child cluster. */
6928 /*                    Note that the new RRR is stored in Z */
6929 
6930 /*                    DLARRF needs LWORK = 2*N */
6931 			dlarrf_(&in, &d__[ibegin], &l[ibegin], &work[indld +
6932 				ibegin - 1], &newfst, &newlst, &work[wbegin],
6933 				&wgap[wbegin], &werr[wbegin], &spdiam, &lgap,
6934 				&rgap, pivmin, &tau, &z__[ibegin + newftt *
6935 				z_dim1], &z__[ibegin + (newftt + 1) * z_dim1],
6936 				 &work[indwrk], &iinfo);
6937 			if (iinfo == 0) {
6938 /*                       a new RRR for the cluster was found by DLARRF */
6939 /*                       update shift and store it */
6940 			    ssigma = sigma + tau;
6941 			    z__[iend + (newftt + 1) * z_dim1] = ssigma;
6942 /*                       WORK() are the midpoints and WERR() the semi-width */
6943 /*                       Note that the entries in W are unchanged. */
6944 			    i__4 = newlst;
6945 			    for (k = newfst; k <= i__4; ++k) {
6946 				fudge = eps * 3. * (d__1 = work[wbegin + k -
6947 					1], abs(d__1));
6948 				work[wbegin + k - 1] -= tau;
6949 				fudge += eps * 4. * (d__1 = work[wbegin + k -
6950 					1], abs(d__1));
6951 /*                          Fudge errors */
6952 				werr[wbegin + k - 1] += fudge;
6953 /*                          Gaps are not fudged. Provided that WERR is small */
6954 /*                          when eigenvalues are close, a zero gap indicates */
6955 /*                          that a new representation is needed for resolving */
6956 /*                          the cluster. A fudge could lead to a wrong decision */
6957 /*                          of judging eigenvalues 'separated' which in */
6958 /*                          reality are not. This could have a negative impact */
6959 /*                          on the orthogonality of the computed eigenvectors. */
6960 /* L116: */
6961 			    }
6962 			    ++nclus;
6963 			    k = newcls + (nclus << 1);
6964 			    iwork[k - 1] = newfst;
6965 			    iwork[k] = newlst;
6966 			} else {
6967 			    *info = -2;
6968 			    return 0;
6969 			}
6970 		    } else {
6971 
6972 /*                    Compute eigenvector of singleton */
6973 
6974 			iter = 0;
6975 
6976 			tol = log((double) in) * 4. * eps;
6977 
6978 			k = newfst;
6979 			windex = wbegin + k - 1;
6980 /* Computing MAX */
6981 			i__4 = windex - 1;
6982 			windmn = std::max(i__4,1_integer);
6983 /* Computing MIN */
6984 			i__4 = windex + 1;
6985 			windpl = std::min(i__4,*m);
6986 			lambda = work[windex];
6987 			++done;
6988 /*                    Check if eigenvector computation is to be skipped */
6989 			if (windex < *dol || windex > *dou) {
6990 			    eskip = true;
6991 			    goto L125;
6992 			} else {
6993 			    eskip = false;
6994 			}
6995 			left = work[windex] - werr[windex];
6996 			right = work[windex] + werr[windex];
6997 			indeig = indexw[windex];
6998 /*                    Note that since we compute the eigenpairs for a child, */
6999 /*                    all eigenvalue approximations are w.r.t the same shift. */
7000 /*                    In this case, the entries in WORK should be used for */
7001 /*                    computing the gaps since they exhibit even very small */
7002 /*                    differences in the eigenvalues, as opposed to the */
7003 /*                    entries in W which might "look" the same. */
7004 			if (k == 1) {
7005 /*                       In the case RANGE='I' and with not much initial */
7006 /*                       accuracy in LAMBDA and VL, the formula */
7007 /*                       LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
7008 /*                       can lead to an overestimation of the left gap and */
7009 /*                       thus to inadequately early RQI 'convergence'. */
7010 /*                       Prevent this by forcing a small left gap. */
7011 /* Computing MAX */
7012 			    d__1 = abs(left), d__2 = abs(right);
7013 			    lgap = eps * std::max(d__1,d__2);
7014 			} else {
7015 			    lgap = wgap[windmn];
7016 			}
7017 			if (k == im) {
7018 /*                       In the case RANGE='I' and with not much initial */
7019 /*                       accuracy in LAMBDA and VU, the formula */
7020 /*                       can lead to an overestimation of the right gap and */
7021 /*                       thus to inadequately early RQI 'convergence'. */
7022 /*                       Prevent this by forcing a small right gap. */
7023 /* Computing MAX */
7024 			    d__1 = abs(left), d__2 = abs(right);
7025 			    rgap = eps * std::max(d__1,d__2);
7026 			} else {
7027 			    rgap = wgap[windex];
7028 			}
7029 			gap = std::min(lgap,rgap);
7030 			if (k == 1 || k == im) {
7031 /*                       The eigenvector support can become wrong */
7032 /*                       because significant entries could be cut off due to a */
7033 /*                       large GAPTOL parameter in LAR1V. Prevent this. */
7034 			    gaptol = 0.;
7035 			} else {
7036 			    gaptol = gap * eps;
7037 			}
7038 			isupmn = in;
7039 			isupmx = 1;
7040 /*                    Update WGAP so that it holds the minimum gap */
7041 /*                    to the left or the right. This is crucial in the */
7042 /*                    case where bisection is used to ensure that the */
7043 /*                    eigenvalue is refined up to the required precision. */
7044 /*                    The correct value is restored afterwards. */
7045 			savgap = wgap[windex];
7046 			wgap[windex] = gap;
7047 /*                    We want to use the Rayleigh Quotient Correction */
7048 /*                    as often as possible since it converges quadratically */
7049 /*                    when we are close enough to the desired eigenvalue. */
7050 /*                    However, the Rayleigh Quotient can have the wrong sign */
7051 /*                    and lead us away from the desired eigenvalue. In this */
7052 /*                    case, the best we can do is to use bisection. */
7053 			usedbs = false;
7054 			usedrq = false;
7055 /*                    Bisection is initially turned off unless it is forced */
7056 			needbs = ! tryrqc;
7057 L120:
7058 /*                    Check if bisection should be used to refine eigenvalue */
7059 			if (needbs) {
7060 /*                       Take the bisection as new iterate */
7061 			    usedbs = true;
7062 			    itmp1 = iwork[iindr + windex];
7063 			    offset = indexw[wbegin] - 1;
7064 			    d__1 = eps * 2.;
7065 			    dlarrb_(&in, &d__[ibegin], &work[indlld + ibegin
7066 				    - 1], &indeig, &indeig, &c_b5, &d__1, &
7067 				    offset, &work[wbegin], &wgap[wbegin], &
7068 				    werr[wbegin], &work[indwrk], &iwork[
7069 				    iindwk], pivmin, &spdiam, &itmp1, &iinfo);
7070 			    if (iinfo != 0) {
7071 				*info = -3;
7072 				return 0;
7073 			    }
7074 			    lambda = work[windex];
7075 /*                       Reset twist index from inaccurate LAMBDA to */
7076 /*                       force computation of true MINGMA */
7077 			    iwork[iindr + windex] = 0;
7078 			}
7079 /*                    Given LAMBDA, compute the eigenvector. */
7080 			L__1 = ! usedbs;
7081 			dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
7082 				ibegin], &work[indld + ibegin - 1], &work[
7083 				indlld + ibegin - 1], pivmin, &gaptol, &z__[
7084 				ibegin + windex * z_dim1], &L__1, &negcnt, &
7085 				ztz, &mingma, &iwork[iindr + windex], &isuppz[
7086 				(windex << 1) - 1], &nrminv, &resid, &rqcorr,
7087 				&work[indwrk]);
7088 			if (iter == 0) {
7089 			    bstres = resid;
7090 			    bstw = lambda;
7091 			} else if (resid < bstres) {
7092 			    bstres = resid;
7093 			    bstw = lambda;
7094 			}
7095 /* Computing MIN */
7096 			i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
7097 			isupmn = std::min(i__4,i__5);
7098 /* Computing MAX */
7099 			i__4 = isupmx, i__5 = isuppz[windex * 2];
7100 			isupmx = std::max(i__4,i__5);
7101 			++iter;
7102 /*                    sin alpha <= |resid|/gap */
7103 /*                    Note that both the residual and the gap are */
7104 /*                    proportional to the matrix, so ||T|| doesn't play */
7105 /*                    a role in the quotient */
7106 
7107 /*                    Convergence test for Rayleigh-Quotient iteration */
7108 /*                    (omitted when Bisection has been used) */
7109 
7110 			if (resid > tol * gap && abs(rqcorr) > rqtol * abs(
7111 				lambda) && ! usedbs) {
7112 /*                       We need to check that the RQCORR update doesn't */
7113 /*                       move the eigenvalue away from the desired one and */
7114 /*                       towards a neighbor. -> protection with bisection */
7115 			    if (indeig <= negcnt) {
7116 /*                          The wanted eigenvalue lies to the left */
7117 				sgndef = -1.;
7118 			    } else {
7119 /*                          The wanted eigenvalue lies to the right */
7120 				sgndef = 1.;
7121 			    }
7122 /*                       We only use the RQCORR if it improves the */
7123 /*                       the iterate reasonably. */
7124 			    if (rqcorr * sgndef >= 0. && lambda + rqcorr <=
7125 				    right && lambda + rqcorr >= left) {
7126 				usedrq = true;
7127 /*                          Store new midpoint of bisection interval in WORK */
7128 				if (sgndef == 1.) {
7129 /*                             The current LAMBDA is on the left of the true */
7130 /*                             eigenvalue */
7131 				    left = lambda;
7132 /*                             We prefer to assume that the error estimate */
7133 /*                             is correct. We could make the interval not */
7134 /*                             as a bracket but to be modified if the RQCORR */
7135 /*                             chooses to. In this case, the RIGHT side should */
7136 /*                             be modified as follows: */
7137 /*                              RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
7138 				} else {
7139 /*                             The current LAMBDA is on the right of the true */
7140 /*                             eigenvalue */
7141 				    right = lambda;
7142 /*                             See comment about assuming the error estimate is */
7143 /*                             correct above. */
7144 /*                              LEFT = MIN(LEFT, LAMBDA + RQCORR) */
7145 				}
7146 				work[windex] = (right + left) * .5;
7147 /*                          Take RQCORR since it has the correct sign and */
7148 /*                          improves the iterate reasonably */
7149 				lambda += rqcorr;
7150 /*                          Update width of error interval */
7151 				werr[windex] = (right - left) * .5;
7152 			    } else {
7153 				needbs = true;
7154 			    }
7155 			    if (right - left < rqtol * abs(lambda)) {
7156 /*                             The eigenvalue is computed to bisection accuracy */
7157 /*                             compute eigenvector and stop */
7158 				usedbs = true;
7159 				goto L120;
7160 			    } else if (iter < 10) {
7161 				goto L120;
7162 			    } else if (iter == 10) {
7163 				needbs = true;
7164 				goto L120;
7165 			    } else {
7166 				*info = 5;
7167 				return 0;
7168 			    }
7169 			} else {
7170 			    stp2ii = false;
7171 			    if (usedrq && usedbs && bstres <= resid) {
7172 				lambda = bstw;
7173 				stp2ii = true;
7174 			    }
7175 			    if (stp2ii) {
7176 /*                          improve error angle by second step */
7177 				L__1 = ! usedbs;
7178 				dlar1v_(&in, &c__1, &in, &lambda, &d__[ibegin]
7179 , &l[ibegin], &work[indld + ibegin -
7180 					1], &work[indlld + ibegin - 1],
7181 					pivmin, &gaptol, &z__[ibegin + windex
7182 					* z_dim1], &L__1, &negcnt, &ztz, &
7183 					mingma, &iwork[iindr + windex], &
7184 					isuppz[(windex << 1) - 1], &nrminv, &
7185 					resid, &rqcorr, &work[indwrk]);
7186 			    }
7187 			    work[windex] = lambda;
7188 			}
7189 
7190 /*                    Compute FP-vector support w.r.t. whole matrix */
7191 
7192 			isuppz[(windex << 1) - 1] += oldien;
7193 			isuppz[windex * 2] += oldien;
7194 			zfrom = isuppz[(windex << 1) - 1];
7195 			zto = isuppz[windex * 2];
7196 			isupmn += oldien;
7197 			isupmx += oldien;
7198 /*                    Ensure vector is ok if support in the RQI has changed */
7199 			if (isupmn < zfrom) {
7200 			    i__4 = zfrom - 1;
7201 			    for (ii = isupmn; ii <= i__4; ++ii) {
7202 				z__[ii + windex * z_dim1] = 0.;
7203 /* L122: */
7204 			    }
7205 			}
7206 			if (isupmx > zto) {
7207 			    i__4 = isupmx;
7208 			    for (ii = zto + 1; ii <= i__4; ++ii) {
7209 				z__[ii + windex * z_dim1] = 0.;
7210 /* L123: */
7211 			    }
7212 			}
7213 			i__4 = zto - zfrom + 1;
7214 			dscal_(&i__4, &nrminv, &z__[zfrom + windex * z_dim1],
7215 				&c__1);
7216 L125:
7217 /*                    Update W */
7218 			w[windex] = lambda + sigma;
7219 /*                    Recompute the gaps on the left and right */
7220 /*                    But only allow them to become larger and not */
7221 /*                    smaller (which can only happen through "bad" */
7222 /*                    cancellation and doesn't reflect the theory */
7223 /*                    where the initial gaps are underestimated due */
7224 /*                    to WERR being too crude.) */
7225 			if (! eskip) {
7226 			    if (k > 1) {
7227 /* Computing MAX */
7228 				d__1 = wgap[windmn], d__2 = w[windex] - werr[
7229 					windex] - w[windmn] - werr[windmn];
7230 				wgap[windmn] = std::max(d__1,d__2);
7231 			    }
7232 			    if (windex < wend) {
7233 /* Computing MAX */
7234 				d__1 = savgap, d__2 = w[windpl] - werr[windpl]
7235 					 - w[windex] - werr[windex];
7236 				wgap[windex] = std::max(d__1,d__2);
7237 			    }
7238 			}
7239 			++idone;
7240 		    }
7241 /*                 here ends the code for the current child */
7242 
7243 L139:
7244 /*                 Proceed to any remaining child nodes */
7245 		    newfst = j + 1;
7246 L140:
7247 		    ;
7248 		}
7249 /* L150: */
7250 	    }
7251 	    ++ndepth;
7252 	    goto L40;
7253 	}
7254 	ibegin = iend + 1;
7255 	wbegin = wend + 1;
7256 L170:
7257 	;
7258     }
7259 
7260     return 0;
7261 
7262 /*     End of DLARRV */
7263 
7264 } /* dlarrv_ */
7265 
dlarscl2_(integer * m,integer * n,double * d__,double * x,integer * ldx)7266 int dlarscl2_(integer *m, integer *n, double *d__, double *x, integer *ldx)
7267 {
7268     /* System generated locals */
7269     integer x_dim1, x_offset, i__1, i__2;
7270 
7271     /* Local variables */
7272     integer i__, j;
7273 
7274 
7275 /*     -- LAPACK routine (version 3.2.1)                               -- */
7276 /*     -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
7277 /*     -- Jason Riedy of Univ. of California Berkeley.                 -- */
7278 /*     -- April 2009                                                   -- */
7279 
7280 /*     -- LAPACK is a software package provided by Univ. of Tennessee, -- */
7281 /*     -- Univ. of California Berkeley and NAG Ltd.                    -- */
7282 
7283 /*     .. */
7284 /*     .. Scalar Arguments .. */
7285 /*     .. */
7286 /*     .. Array Arguments .. */
7287 /*     .. */
7288 
7289 /*  Purpose */
7290 /*  ======= */
7291 
7292 /*  DLARSCL2 performs a reciprocal diagonal scaling on an vector: */
7293 /*    x <-- inv(D) * x */
7294 /*  where the diagonal matrix D is stored as a vector. */
7295 
7296 /*  Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS */
7297 /*  standard. */
7298 
7299 /*  Arguments */
7300 /*  ========= */
7301 
7302 /*     M       (input) INTEGER */
7303 /*     The number of rows of D and X. M >= 0. */
7304 
7305 /*     N       (input) INTEGER */
7306 /*     The number of columns of D and X. N >= 0. */
7307 
7308 /*     D       (input) DOUBLE PRECISION array, length M */
7309 /*     Diagonal matrix D, stored as a vector of length M. */
7310 
7311 /*     X       (input/output) DOUBLE PRECISION array, dimension (LDX,N) */
7312 /*     On entry, the vector X to be scaled by D. */
7313 /*     On exit, the scaled vector. */
7314 
7315 /*     LDX     (input) INTEGER */
7316 /*     The leading dimension of the vector X. LDX >= 0. */
7317 
7318 /*  ===================================================================== */
7319 
7320 /*     .. Local Scalars .. */
7321 /*     .. */
7322 /*     .. Executable Statements .. */
7323 
7324     /* Parameter adjustments */
7325     --d__;
7326     x_dim1 = *ldx;
7327     x_offset = 1 + x_dim1;
7328     x -= x_offset;
7329 
7330     /* Function Body */
7331     i__1 = *n;
7332     for (j = 1; j <= i__1; ++j) {
7333 	i__2 = *m;
7334 	for (i__ = 1; i__ <= i__2; ++i__) {
7335 	    x[i__ + j * x_dim1] /= d__[i__];
7336 	}
7337     }
7338     return 0;
7339 } /* dlarscl2_ */
7340 
dlartg_(double * f,double * g,double * cs,double * sn,double * r__)7341 /* Subroutine */ int dlartg_(double *f, double *g, double *cs,
7342 	double *sn, double *r__)
7343 {
7344     /* System generated locals */
7345     integer i__1;
7346     double d__1, d__2;
7347 
7348     /* Local variables */
7349     integer i__;
7350     double f1, g1, eps, scale;
7351     integer count;
7352     double safmn2, safmx2;
7353 
7354     double safmin;
7355 
7356 
7357 /*  -- LAPACK auxiliary routine (version 3.1) -- */
7358 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
7359 /*     November 2006 */
7360 
7361 /*     .. Scalar Arguments .. */
7362 /*     .. */
7363 
7364 /*  Purpose */
7365 /*  ======= */
7366 
7367 /*  DLARTG generate a plane rotation so that */
7368 
7369 /*     [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1. */
7370 /*     [ -SN  CS  ]     [ G ]     [ 0 ] */
7371 
7372 /*  This is a slower, more accurate version of the BLAS1 routine DROTG, */
7373 /*  with the following other differences: */
7374 /*     F and G are unchanged on return. */
7375 /*     If G=0, then CS=1 and SN=0. */
7376 /*     If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any */
7377 /*        floating point operations (saves work in DBDSQR when */
7378 /*        there are zeros on the diagonal). */
7379 
7380 /*  If F exceeds G in magnitude, CS will be positive. */
7381 
7382 /*  Arguments */
7383 /*  ========= */
7384 
7385 /*  F       (input) DOUBLE PRECISION */
7386 /*          The first component of vector to be rotated. */
7387 
7388 /*  G       (input) DOUBLE PRECISION */
7389 /*          The second component of vector to be rotated. */
7390 
7391 /*  CS      (output) DOUBLE PRECISION */
7392 /*          The cosine of the rotation. */
7393 
7394 /*  SN      (output) DOUBLE PRECISION */
7395 /*          The sine of the rotation. */
7396 
7397 /*  R       (output) DOUBLE PRECISION */
7398 /*          The nonzero component of the rotated vector. */
7399 
7400 /*  This version has a few statements commented out for thread safety */
7401 /*  (machine parameters are computed on each entry). 10 feb 03, SJH. */
7402 
7403 /*  ===================================================================== */
7404 
7405 /*     .. Parameters .. */
7406 /*     .. */
7407 /*     .. Local Scalars .. */
7408 /*     LOGICAL            FIRST */
7409 /*     .. */
7410 /*     .. External Functions .. */
7411 /*     .. */
7412 /*     .. Intrinsic Functions .. */
7413 /*     .. */
7414 /*     .. Save statement .. */
7415 /*     SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2 */
7416 /*     .. */
7417 /*     .. Data statements .. */
7418 /*     DATA               FIRST / .TRUE. / */
7419 /*     .. */
7420 /*     .. Executable Statements .. */
7421 
7422 /*     IF( FIRST ) THEN */
7423     safmin = dlamch_("S");
7424     eps = dlamch_("E");
7425     d__1 = dlamch_("B");
7426     i__1 = (integer) (log(safmin / eps) / log(dlamch_("B")) / 2.);
7427     safmn2 = pow_di(&d__1, &i__1);
7428     safmx2 = 1. / safmn2;
7429 /*        FIRST = .FALSE. */
7430 /*     END IF */
7431     if (*g == 0.) {
7432 	*cs = 1.;
7433 	*sn = 0.;
7434 	*r__ = *f;
7435     } else if (*f == 0.) {
7436 	*cs = 0.;
7437 	*sn = 1.;
7438 	*r__ = *g;
7439     } else {
7440 	f1 = *f;
7441 	g1 = *g;
7442 /* Computing MAX */
7443 	d__1 = abs(f1), d__2 = abs(g1);
7444 	scale = std::max(d__1,d__2);
7445 	if (scale >= safmx2) {
7446 	    count = 0;
7447 L10:
7448 	    ++count;
7449 	    f1 *= safmn2;
7450 	    g1 *= safmn2;
7451 /* Computing MAX */
7452 	    d__1 = abs(f1), d__2 = abs(g1);
7453 	    scale = std::max(d__1,d__2);
7454 	    if (scale >= safmx2) {
7455 		goto L10;
7456 	    }
7457 /* Computing 2nd power */
7458 	    d__1 = f1;
7459 /* Computing 2nd power */
7460 	    d__2 = g1;
7461 	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
7462 	    *cs = f1 / *r__;
7463 	    *sn = g1 / *r__;
7464 	    i__1 = count;
7465 	    for (i__ = 1; i__ <= i__1; ++i__) {
7466 		*r__ *= safmx2;
7467 /* L20: */
7468 	    }
7469 	} else if (scale <= safmn2) {
7470 	    count = 0;
7471 L30:
7472 	    ++count;
7473 	    f1 *= safmx2;
7474 	    g1 *= safmx2;
7475 /* Computing MAX */
7476 	    d__1 = abs(f1), d__2 = abs(g1);
7477 	    scale = std::max(d__1,d__2);
7478 	    if (scale <= safmn2) {
7479 		goto L30;
7480 	    }
7481 /* Computing 2nd power */
7482 	    d__1 = f1;
7483 /* Computing 2nd power */
7484 	    d__2 = g1;
7485 	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
7486 	    *cs = f1 / *r__;
7487 	    *sn = g1 / *r__;
7488 	    i__1 = count;
7489 	    for (i__ = 1; i__ <= i__1; ++i__) {
7490 		*r__ *= safmn2;
7491 /* L40: */
7492 	    }
7493 	} else {
7494 /* Computing 2nd power */
7495 	    d__1 = f1;
7496 /* Computing 2nd power */
7497 	    d__2 = g1;
7498 	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
7499 	    *cs = f1 / *r__;
7500 	    *sn = g1 / *r__;
7501 	}
7502 	if (abs(*f) > abs(*g) && *cs < 0.) {
7503 	    *cs = -(*cs);
7504 	    *sn = -(*sn);
7505 	    *r__ = -(*r__);
7506 	}
7507     }
7508     return 0;
7509 
7510 /*     End of DLARTG */
7511 
7512 } /* dlartg_ */
7513 
dlartv_(integer * n,double * x,integer * incx,double * y,integer * incy,double * c__,double * s,integer * incc)7514 /* Subroutine */ int dlartv_(integer *n, double *x, integer *incx,
7515 	double *y, integer *incy, double *c__, double *s, integer
7516 	*incc)
7517 {
7518     /* System generated locals */
7519     integer i__1;
7520 
7521     /* Local variables */
7522     integer i__, ic, ix, iy;
7523     double xi, yi;
7524 
7525 
7526 /*  -- LAPACK auxiliary routine (version 3.1) -- */
7527 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
7528 /*     November 2006 */
7529 
7530 /*     .. Scalar Arguments .. */
7531 /*     .. */
7532 /*     .. Array Arguments .. */
7533 /*     .. */
7534 
7535 /*  Purpose */
7536 /*  ======= */
7537 
7538 /*  DLARTV applies a vector of real plane rotations to elements of the */
7539 /*  real vectors x and y. For i = 1,2,...,n */
7540 
7541 /*     ( x(i) ) := (  c(i)  s(i) ) ( x(i) ) */
7542 /*     ( y(i) )    ( -s(i)  c(i) ) ( y(i) ) */
7543 
7544 /*  Arguments */
7545 /*  ========= */
7546 
7547 /*  N       (input) INTEGER */
7548 /*          The number of plane rotations to be applied. */
7549 
7550 /*  X       (input/output) DOUBLE PRECISION array, */
7551 /*                         dimension (1+(N-1)*INCX) */
7552 /*          The vector x. */
7553 
7554 /*  INCX    (input) INTEGER */
7555 /*          The increment between elements of X. INCX > 0. */
7556 
7557 /*  Y       (input/output) DOUBLE PRECISION array, */
7558 /*                         dimension (1+(N-1)*INCY) */
7559 /*          The vector y. */
7560 
7561 /*  INCY    (input) INTEGER */
7562 /*          The increment between elements of Y. INCY > 0. */
7563 
7564 /*  C       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */
7565 /*          The cosines of the plane rotations. */
7566 
7567 /*  S       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC) */
7568 /*          The sines of the plane rotations. */
7569 
7570 /*  INCC    (input) INTEGER */
7571 /*          The increment between elements of C and S. INCC > 0. */
7572 
7573 /*  ===================================================================== */
7574 
7575 /*     .. Local Scalars .. */
7576 /*     .. */
7577 /*     .. Executable Statements .. */
7578 
7579     /* Parameter adjustments */
7580     --s;
7581     --c__;
7582     --y;
7583     --x;
7584 
7585     /* Function Body */
7586     ix = 1;
7587     iy = 1;
7588     ic = 1;
7589     i__1 = *n;
7590     for (i__ = 1; i__ <= i__1; ++i__) {
7591 	xi = x[ix];
7592 	yi = y[iy];
7593 	x[ix] = c__[ic] * xi + s[ic] * yi;
7594 	y[iy] = c__[ic] * yi - s[ic] * xi;
7595 	ix += *incx;
7596 	iy += *incy;
7597 	ic += *incc;
7598 /* L10: */
7599     }
7600     return 0;
7601 
7602 /*     End of DLARTV */
7603 
7604 } /* dlartv_ */
7605 
dlaruv_(integer * iseed,integer * n,double * x)7606 /* Subroutine */ int dlaruv_(integer *iseed, integer *n, double *x)
7607 {
7608     /* Initialized data */
7609     static integer mm[512]	/* was [128][4] */ = { 494,2637,255,2008,1253,
7610 	    3344,4084,1739,3143,3468,688,1657,1238,3166,1292,3422,1270,2016,
7611 	    154,2862,697,1706,491,931,1444,444,3577,3944,2184,1661,3482,657,
7612 	    3023,3618,1267,1828,164,3798,3087,2400,2870,3876,1905,1593,1797,
7613 	    1234,3460,328,2861,1950,617,2070,3331,769,1558,2412,2800,189,287,
7614 	    2045,1227,2838,209,2770,3654,3993,192,2253,3491,2889,2857,2094,
7615 	    1818,688,1407,634,3231,815,3524,1914,516,164,303,2144,3480,119,
7616 	    3357,837,2826,2332,2089,3780,1700,3712,150,2000,3375,1621,3090,
7617 	    3765,1149,3146,33,3082,2741,359,3316,1749,185,2784,2202,2199,1364,
7618 	    1244,2020,3160,2785,2772,1217,1822,1245,2252,3904,2774,997,2573,
7619 	    1148,545,322,789,1440,752,2859,123,1848,643,2405,2638,2344,46,
7620 	    3814,913,3649,339,3808,822,2832,3078,3633,2970,637,2249,2081,4019,
7621 	    1478,242,481,2075,4058,622,3376,812,234,641,4005,1122,3135,2640,
7622 	    2302,40,1832,2247,2034,2637,1287,1691,496,1597,2394,2584,1843,336,
7623 	    1472,2407,433,2096,1761,2810,566,442,41,1238,1086,603,840,3168,
7624 	    1499,1084,3438,2408,1589,2391,288,26,512,1456,171,1677,2657,2270,
7625 	    2587,2961,1970,1817,676,1410,3723,2803,3185,184,663,499,3784,1631,
7626 	    1925,3912,1398,1349,1441,2224,2411,1907,3192,2786,382,37,759,2948,
7627 	    1862,3802,2423,2051,2295,1332,1832,2405,3638,3661,327,3660,716,
7628 	    1842,3987,1368,1848,2366,2508,3754,1766,3572,2893,307,1297,3966,
7629 	    758,2598,3406,2922,1038,2934,2091,2451,1580,1958,2055,1507,1078,
7630 	    3273,17,854,2916,3971,2889,3831,2621,1541,893,736,3992,787,2125,
7631 	    2364,2460,257,1574,3912,1216,3248,3401,2124,2762,149,2245,166,466,
7632 	    4018,1399,190,2879,153,2320,18,712,2159,2318,2091,3443,1510,449,
7633 	    1956,2201,3137,3399,1321,2271,3667,2703,629,2365,2431,1113,3922,
7634 	    2554,184,2099,3228,4012,1921,3452,3901,572,3309,3171,817,3039,
7635 	    1696,1256,3715,2077,3019,1497,1101,717,51,981,1978,1813,3881,76,
7636 	    3846,3694,1682,124,1660,3997,479,1141,886,3514,1301,3604,1888,
7637 	    1836,1990,2058,692,1194,20,3285,2046,2107,3508,3525,3801,2549,
7638 	    1145,2253,305,3301,1065,3133,2913,3285,1241,1197,3729,2501,1673,
7639 	    541,2753,949,2361,1165,4081,2725,3305,3069,3617,3733,409,2157,
7640 	    1361,3973,1865,2525,1409,3445,3577,77,3761,2149,1449,3005,225,85,
7641 	    3673,3117,3089,1349,2057,413,65,1845,697,3085,3441,1573,3689,2941,
7642 	    929,533,2841,4077,721,2821,2249,2397,2817,245,1913,1997,3121,997,
7643 	    1833,2877,1633,981,2009,941,2449,197,2441,285,1473,2741,3129,909,
7644 	    2801,421,4073,2813,2337,1429,1177,1901,81,1669,2633,2269,129,1141,
7645 	    249,3917,2481,3941,2217,2749,3041,1877,345,2861,1809,3141,2825,
7646 	    157,2881,3637,1465,2829,2161,3365,361,2685,3745,2325,3609,3821,
7647 	    3537,517,3017,2141,1537 };
7648 
7649     /* System generated locals */
7650     integer i__1;
7651 
7652     /* Local variables */
7653     integer i__, i1, i2, i3, i4, it1, it2, it3, it4;
7654 
7655 
7656 /*  -- LAPACK auxiliary routine (version 3.1) -- */
7657 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
7658 /*     November 2006 */
7659 
7660 /*     .. Scalar Arguments .. */
7661 /*     .. */
7662 /*     .. Array Arguments .. */
7663 /*     .. */
7664 
7665 /*  Purpose */
7666 /*  ======= */
7667 
7668 /*  DLARUV returns a vector of n random real numbers from a uniform (0,1) */
7669 /*  distribution (n <= 128). */
7670 
7671 /*  This is an auxiliary routine called by DLARNV and ZLARNV. */
7672 
7673 /*  Arguments */
7674 /*  ========= */
7675 
7676 /*  ISEED   (input/output) INTEGER array, dimension (4) */
7677 /*          On entry, the seed of the random number generator; the array */
7678 /*          elements must be between 0 and 4095, and ISEED(4) must be */
7679 /*          odd. */
7680 /*          On exit, the seed is updated. */
7681 
7682 /*  N       (input) INTEGER */
7683 /*          The number of random numbers to be generated. N <= 128. */
7684 
7685 /*  X       (output) DOUBLE PRECISION array, dimension (N) */
7686 /*          The generated random numbers. */
7687 
7688 /*  Further Details */
7689 /*  =============== */
7690 
7691 /*  This routine uses a multiplicative congruential method with modulus */
7692 /*  2**48 and multiplier 33952834046453 (see G.S.Fishman, */
7693 /*  'Multiplicative congruential random number generators with modulus */
7694 /*  2**b: an exhaustive analysis for b = 32 and a partial analysis for */
7695 /*  b = 48', Math. Comp. 189, pp 331-344, 1990). */
7696 
7697 /*  48-bit integers are stored in 4 integer array elements with 12 bits */
7698 /*  per element. Hence the routine is portable across machines with */
7699 /*  integers of 32 bits or more. */
7700 
7701 /*  ===================================================================== */
7702 
7703 /*     .. Parameters .. */
7704 /*     .. */
7705 /*     .. Local Scalars .. */
7706 /*     .. */
7707 /*     .. Local Arrays .. */
7708 /*     .. */
7709 /*     .. Intrinsic Functions .. */
7710 /*     .. */
7711 /*     .. Data statements .. */
7712     /* Parameter adjustments */
7713     --iseed;
7714     --x;
7715 
7716     /* Function Body */
7717 /*     .. */
7718 /*     .. Executable Statements .. */
7719 
7720     i1 = iseed[1];
7721     i2 = iseed[2];
7722     i3 = iseed[3];
7723     i4 = iseed[4];
7724 
7725     i__1 = std::min(*n,128_integer);
7726     for (i__ = 1; i__ <= i__1; ++i__) {
7727 
7728 L20:
7729 
7730 /*        Multiply the seed by i-th power of the multiplier modulo 2**48 */
7731 
7732 	it4 = i4 * mm[i__ + 383];
7733 	it3 = it4 / 4096;
7734 	it4 -= it3 << 12;
7735 	it3 = it3 + i3 * mm[i__ + 383] + i4 * mm[i__ + 255];
7736 	it2 = it3 / 4096;
7737 	it3 -= it2 << 12;
7738 	it2 = it2 + i2 * mm[i__ + 383] + i3 * mm[i__ + 255] + i4 * mm[i__ +
7739 		127];
7740 	it1 = it2 / 4096;
7741 	it2 -= it1 << 12;
7742 	it1 = it1 + i1 * mm[i__ + 383] + i2 * mm[i__ + 255] + i3 * mm[i__ +
7743 		127] + i4 * mm[i__ - 1];
7744 	it1 %= 4096;
7745 
7746 /*        Convert 48-bit integer to a real number in the interval (0,1) */
7747 
7748 	x[i__] = ((double) it1 + ((double) it2 + ((double) it3 + (
7749 		double) it4 * 2.44140625e-4) * 2.44140625e-4) *
7750 		2.44140625e-4) * 2.44140625e-4;
7751 
7752 	if (x[i__] == 1.) {
7753 /*           If a real number has n bits of precision, and the first */
7754 /*           n bits of the 48-bit integer above happen to be all 1 (which */
7755 /*           will occur about once every 2**n calls), then X( I ) will */
7756 /*           be rounded to exactly 1.0. */
7757 /*           Since X( I ) is not supposed to return exactly 0.0 or 1.0, */
7758 /*           the statistically correct thing to do in this situation is */
7759 /*           simply to iterate again. */
7760 /*           N.B. the case X( I ) = 0.0 should not be possible. */
7761 	    i1 += 2;
7762 	    i2 += 2;
7763 	    i3 += 2;
7764 	    i4 += 2;
7765 	    goto L20;
7766 	}
7767 
7768 /* L10: */
7769     }
7770 
7771 /*     Return final value of seed */
7772 
7773     iseed[1] = it1;
7774     iseed[2] = it2;
7775     iseed[3] = it3;
7776     iseed[4] = it4;
7777     return 0;
7778 
7779 /*     End of DLARUV */
7780 
7781 } /* dlaruv_ */
7782 
dlarz_(const char * side,integer * m,integer * n,integer * l,double * v,integer * incv,double * tau,double * c__,integer * ldc,double * work)7783 /* Subroutine */ int dlarz_(const char *side, integer *m, integer *n, integer *l,
7784 	double *v, integer *incv, double *tau, double *c__,
7785 	integer *ldc, double *work)
7786 {
7787 	/* Table of constant values */
7788 	static integer c__1 = 1;
7789 	static double c_b5 = 1.;
7790 
7791     /* System generated locals */
7792     integer c_dim1, c_offset;
7793     double d__1;
7794 
7795 /*  -- LAPACK routine (version 3.1) -- */
7796 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
7797 /*     November 2006 */
7798 
7799 /*     .. Scalar Arguments .. */
7800 /*     .. */
7801 /*     .. Array Arguments .. */
7802 /*     .. */
7803 
7804 /*  Purpose */
7805 /*  ======= */
7806 
7807 /*  DLARZ applies a real elementary reflector H to a real M-by-N */
7808 /*  matrix C, from either the left or the right. H is represented in the */
7809 /*  form */
7810 
7811 /*        H = I - tau * v * v' */
7812 
7813 /*  where tau is a real scalar and v is a real vector. */
7814 
7815 /*  If tau = 0, then H is taken to be the unit matrix. */
7816 
7817 
7818 /*  H is a product of k elementary reflectors as returned by DTZRZF. */
7819 
7820 /*  Arguments */
7821 /*  ========= */
7822 
7823 /*  SIDE    (input) CHARACTER*1 */
7824 /*          = 'L': form  H * C */
7825 /*          = 'R': form  C * H */
7826 
7827 /*  M       (input) INTEGER */
7828 /*          The number of rows of the matrix C. */
7829 
7830 /*  N       (input) INTEGER */
7831 /*          The number of columns of the matrix C. */
7832 
7833 /*  L       (input) INTEGER */
7834 /*          The number of entries of the vector V containing */
7835 /*          the meaningful part of the Householder vectors. */
7836 /*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */
7837 
7838 /*  V       (input) DOUBLE PRECISION array, dimension (1+(L-1)*abs(INCV)) */
7839 /*          The vector v in the representation of H as returned by */
7840 /*          DTZRZF. V is not used if TAU = 0. */
7841 
7842 /*  INCV    (input) INTEGER */
7843 /*          The increment between elements of v. INCV <> 0. */
7844 
7845 /*  TAU     (input) DOUBLE PRECISION */
7846 /*          The value tau in the representation of H. */
7847 
7848 /*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
7849 /*          On entry, the M-by-N matrix C. */
7850 /*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
7851 /*          or C * H if SIDE = 'R'. */
7852 
7853 /*  LDC     (input) INTEGER */
7854 /*          The leading dimension of the array C. LDC >= max(1,M). */
7855 
7856 /*  WORK    (workspace) DOUBLE PRECISION array, dimension */
7857 /*                         (N) if SIDE = 'L' */
7858 /*                      or (M) if SIDE = 'R' */
7859 
7860 /*  Further Details */
7861 /*  =============== */
7862 
7863 /*  Based on contributions by */
7864 /*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */
7865 
7866 /*  ===================================================================== */
7867 
7868 /*     .. Parameters .. */
7869 /*     .. */
7870 /*     .. External Subroutines .. */
7871 /*     .. */
7872 /*     .. External Functions .. */
7873 /*     .. */
7874 /*     .. Executable Statements .. */
7875 
7876     /* Parameter adjustments */
7877     --v;
7878     c_dim1 = *ldc;
7879     c_offset = 1 + c_dim1;
7880     c__ -= c_offset;
7881     --work;
7882 
7883     /* Function Body */
7884     if (lsame_(side, "L")) {
7885 
7886 /*        Form  H * C */
7887 
7888 	if (*tau != 0.) {
7889 
7890 /*           w( 1:n ) = C( 1, 1:n ) */
7891 
7892 	    dcopy_(n, &c__[c_offset], ldc, &work[1], &c__1);
7893 
7894 /*           w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) */
7895 
7896 	    dgemv_("Transpose", l, n, &c_b5, &c__[*m - *l + 1 + c_dim1], ldc,
7897 		    &v[1], incv, &c_b5, &work[1], &c__1);
7898 
7899 /*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) */
7900 
7901 	    d__1 = -(*tau);
7902 	    daxpy_(n, &d__1, &work[1], &c__1, &c__[c_offset], ldc);
7903 
7904 /*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
7905 /*                               tau * v( 1:l ) * w( 1:n )' */
7906 
7907 	    d__1 = -(*tau);
7908 	    dger_(l, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[*m - *l + 1
7909 		    + c_dim1], ldc);
7910 	}
7911 
7912     } else {
7913 
7914 /*        Form  C * H */
7915 
7916 	if (*tau != 0.) {
7917 
7918 /*           w( 1:m ) = C( 1:m, 1 ) */
7919 
7920 	    dcopy_(m, &c__[c_offset], &c__1, &work[1], &c__1);
7921 
7922 /*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) */
7923 
7924 	    dgemv_("No transpose", m, l, &c_b5, &c__[(*n - *l + 1) * c_dim1 +
7925 		    1], ldc, &v[1], incv, &c_b5, &work[1], &c__1);
7926 
7927 /*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) */
7928 
7929 	    d__1 = -(*tau);
7930 	    daxpy_(m, &d__1, &work[1], &c__1, &c__[c_offset], &c__1);
7931 
7932 /*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
7933 /*                               tau * w( 1:m ) * v( 1:l )' */
7934 
7935 	    d__1 = -(*tau);
7936 	    dger_(m, l, &d__1, &work[1], &c__1, &v[1], incv, &c__[(*n - *l +
7937 		    1) * c_dim1 + 1], ldc);
7938 
7939 	}
7940 
7941     }
7942 
7943     return 0;
7944 
7945 /*     End of DLARZ */
7946 
7947 } /* dlarz_ */
7948 
dlarzb_(const char * side,const char * trans,const char * direct,const char * storev,integer * m,integer * n,integer * k,integer * l,double * v,integer * ldv,double * t,integer * ldt,double * c__,integer * ldc,double * work,integer * ldwork)7949 /* Subroutine */ int dlarzb_(const char *side, const char *trans, const char *direct, const char *
7950 	storev, integer *m, integer *n, integer *k, integer *l, double *v,
7951 	integer *ldv, double *t, integer *ldt, double *c__, integer *
7952 	ldc, double *work, integer *ldwork)
7953 {
7954 	/* Table of constant values */
7955 	static integer c__1 = 1;
7956 	static double c_b13 = 1.;
7957 	static double c_b23 = -1.;
7958 
7959     /* System generated locals */
7960     integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
7961 	    work_offset, i__1, i__2;
7962 
7963     /* Local variables */
7964     integer i__, j, info;
7965     char transt[1];
7966 
7967 
7968 /*  -- LAPACK routine (version 3.1) -- */
7969 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
7970 /*     November 2006 */
7971 
7972 /*     .. Scalar Arguments .. */
7973 /*     .. */
7974 /*     .. Array Arguments .. */
7975 /*     .. */
7976 
7977 /*  Purpose */
7978 /*  ======= */
7979 
7980 /*  DLARZB applies a real block reflector H or its transpose H**T to */
7981 /*  a real distributed M-by-N  C from the left or the right. */
7982 
7983 /*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */
7984 
7985 /*  Arguments */
7986 /*  ========= */
7987 
7988 /*  SIDE    (input) CHARACTER*1 */
7989 /*          = 'L': apply H or H' from the Left */
7990 /*          = 'R': apply H or H' from the Right */
7991 
7992 /*  TRANS   (input) CHARACTER*1 */
7993 /*          = 'N': apply H (No transpose) */
7994 /*          = 'C': apply H' (Transpose) */
7995 
7996 /*  DIRECT  (input) CHARACTER*1 */
7997 /*          Indicates how H is formed from a product of elementary */
7998 /*          reflectors */
7999 /*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */
8000 /*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
8001 
8002 /*  STOREV  (input) CHARACTER*1 */
8003 /*          Indicates how the vectors which define the elementary */
8004 /*          reflectors are stored: */
8005 /*          = 'C': Columnwise                        (not supported yet) */
8006 /*          = 'R': Rowwise */
8007 
8008 /*  M       (input) INTEGER */
8009 /*          The number of rows of the matrix C. */
8010 
8011 /*  N       (input) INTEGER */
8012 /*          The number of columns of the matrix C. */
8013 
8014 /*  K       (input) INTEGER */
8015 /*          The order of the matrix T (= the number of elementary */
8016 /*          reflectors whose product defines the block reflector). */
8017 
8018 /*  L       (input) INTEGER */
8019 /*          The number of columns of the matrix V containing the */
8020 /*          meaningful part of the Householder reflectors. */
8021 /*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */
8022 
8023 /*  V       (input) DOUBLE PRECISION array, dimension (LDV,NV). */
8024 /*          If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. */
8025 
8026 /*  LDV     (input) INTEGER */
8027 /*          The leading dimension of the array V. */
8028 /*          If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. */
8029 
8030 /*  T       (input) DOUBLE PRECISION array, dimension (LDT,K) */
8031 /*          The triangular K-by-K matrix T in the representation of the */
8032 /*          block reflector. */
8033 
8034 /*  LDT     (input) INTEGER */
8035 /*          The leading dimension of the array T. LDT >= K. */
8036 
8037 /*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N) */
8038 /*          On entry, the M-by-N matrix C. */
8039 /*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */
8040 
8041 /*  LDC     (input) INTEGER */
8042 /*          The leading dimension of the array C. LDC >= max(1,M). */
8043 
8044 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K) */
8045 
8046 /*  LDWORK  (input) INTEGER */
8047 /*          The leading dimension of the array WORK. */
8048 /*          If SIDE = 'L', LDWORK >= max(1,N); */
8049 /*          if SIDE = 'R', LDWORK >= max(1,M). */
8050 
8051 /*  Further Details */
8052 /*  =============== */
8053 
8054 /*  Based on contributions by */
8055 /*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */
8056 
8057 /*  ===================================================================== */
8058 
8059 /*     .. Parameters .. */
8060 /*     .. */
8061 /*     .. Local Scalars .. */
8062 /*     .. */
8063 /*     .. External Functions .. */
8064 /*     .. */
8065 /*     .. External Subroutines .. */
8066 /*     .. */
8067 /*     .. Executable Statements .. */
8068 
8069 /*     Quick return if possible */
8070 
8071     /* Parameter adjustments */
8072     v_dim1 = *ldv;
8073     v_offset = 1 + v_dim1;
8074     v -= v_offset;
8075     t_dim1 = *ldt;
8076     t_offset = 1 + t_dim1;
8077     t -= t_offset;
8078     c_dim1 = *ldc;
8079     c_offset = 1 + c_dim1;
8080     c__ -= c_offset;
8081     work_dim1 = *ldwork;
8082     work_offset = 1 + work_dim1;
8083     work -= work_offset;
8084 
8085     /* Function Body */
8086     if (*m <= 0 || *n <= 0) {
8087 	return 0;
8088     }
8089 
8090 /*     Check for currently supported options */
8091 
8092     info = 0;
8093     if (! lsame_(direct, "B")) {
8094 	info = -3;
8095     } else if (! lsame_(storev, "R")) {
8096 	info = -4;
8097     }
8098     if (info != 0) {
8099 	i__1 = -info;
8100 	xerbla_("DLARZB", &i__1);
8101 	return 0;
8102     }
8103 
8104     if (lsame_(trans, "N")) {
8105 	*(unsigned char *)transt = 'T';
8106     } else {
8107 	*(unsigned char *)transt = 'N';
8108     }
8109 
8110     if (lsame_(side, "L")) {
8111 
8112 /*        Form  H * C  or  H' * C */
8113 
8114 /*        W( 1:n, 1:k ) = C( 1:k, 1:n )' */
8115 
8116 	i__1 = *k;
8117 	for (j = 1; j <= i__1; ++j) {
8118 	    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1);
8119 /* L10: */
8120 	}
8121 
8122 /*        W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... */
8123 /*                        C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' */
8124 
8125 	if (*l > 0) {
8126 	    dgemm_("Transpose", "Transpose", n, k, l, &c_b13, &c__[*m - *l +
8127 		    1 + c_dim1], ldc, &v[v_offset], ldv, &c_b13, &work[
8128 		    work_offset], ldwork);
8129 	}
8130 
8131 /*        W( 1:n, 1:k ) = W( 1:n, 1:k ) * T'  or  W( 1:m, 1:k ) * T */
8132 
8133 	dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b13, &t[
8134 		t_offset], ldt, &work[work_offset], ldwork);
8135 
8136 /*        C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' */
8137 
8138 	i__1 = *n;
8139 	for (j = 1; j <= i__1; ++j) {
8140 	    i__2 = *k;
8141 	    for (i__ = 1; i__ <= i__2; ++i__) {
8142 		c__[i__ + j * c_dim1] -= work[j + i__ * work_dim1];
8143 /* L20: */
8144 	    }
8145 /* L30: */
8146 	}
8147 
8148 /*        C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
8149 /*                            V( 1:k, 1:l )' * W( 1:n, 1:k )' */
8150 
8151 	if (*l > 0) {
8152 	    dgemm_("Transpose", "Transpose", l, n, k, &c_b23, &v[v_offset],
8153 		    ldv, &work[work_offset], ldwork, &c_b13, &c__[*m - *l + 1
8154 		    + c_dim1], ldc);
8155 	}
8156 
8157     } else if (lsame_(side, "R")) {
8158 
8159 /*        Form  C * H  or  C * H' */
8160 
8161 /*        W( 1:m, 1:k ) = C( 1:m, 1:k ) */
8162 
8163 	i__1 = *k;
8164 	for (j = 1; j <= i__1; ++j) {
8165 	    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &
8166 		    c__1);
8167 /* L40: */
8168 	}
8169 
8170 /*        W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... */
8171 /*                        C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' */
8172 
8173 	if (*l > 0) {
8174 	    dgemm_("No transpose", "Transpose", m, k, l, &c_b13, &c__[(*n - *
8175 		    l + 1) * c_dim1 + 1], ldc, &v[v_offset], ldv, &c_b13, &
8176 		    work[work_offset], ldwork);
8177 	}
8178 
8179 /*        W( 1:m, 1:k ) = W( 1:m, 1:k ) * T  or  W( 1:m, 1:k ) * T' */
8180 
8181 	dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b13, &t[t_offset]
8182 , ldt, &work[work_offset], ldwork);
8183 
8184 /*        C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) */
8185 
8186 	i__1 = *k;
8187 	for (j = 1; j <= i__1; ++j) {
8188 	    i__2 = *m;
8189 	    for (i__ = 1; i__ <= i__2; ++i__) {
8190 		c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
8191 /* L50: */
8192 	    }
8193 /* L60: */
8194 	}
8195 
8196 /*        C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
8197 /*                            W( 1:m, 1:k ) * V( 1:k, 1:l ) */
8198 
8199 	if (*l > 0) {
8200 	    dgemm_("No transpose", "No transpose", m, l, k, &c_b23, &work[
8201 		    work_offset], ldwork, &v[v_offset], ldv, &c_b13, &c__[(*n
8202 		    - *l + 1) * c_dim1 + 1], ldc);
8203 	}
8204 
8205     }
8206 
8207     return 0;
8208 
8209 /*     End of DLARZB */
8210 
8211 } /* dlarzb_ */
8212 
dlarzt_(const char * direct,const char * storev,integer * n,integer * k,double * v,integer * ldv,double * tau,double * t,integer * ldt)8213 /* Subroutine */ int dlarzt_(const char *direct, const char *storev, integer *n, integer *
8214 	k, double *v, integer *ldv, double *tau, double *t,
8215 	integer *ldt)
8216 {
8217 	/* Table of constant values */
8218 	static double c_b8 = 0.;
8219 	static integer c__1 = 1;
8220 
8221     /* System generated locals */
8222     integer t_dim1, t_offset, v_dim1, v_offset, i__1;
8223     double d__1;
8224 
8225     /* Local variables */
8226     integer i__, j, info;
8227 
8228 /*  -- LAPACK routine (version 3.1) -- */
8229 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
8230 /*     November 2006 */
8231 
8232 /*     .. Scalar Arguments .. */
8233 /*     .. */
8234 /*     .. Array Arguments .. */
8235 /*     .. */
8236 
8237 /*  Purpose */
8238 /*  ======= */
8239 
8240 /*  DLARZT forms the triangular factor T of a real block reflector */
8241 /*  H of order > n, which is defined as a product of k elementary */
8242 /*  reflectors. */
8243 
8244 /*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; */
8245 
8246 /*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. */
8247 
8248 /*  If STOREV = 'C', the vector which defines the elementary reflector */
8249 /*  H(i) is stored in the i-th column of the array V, and */
8250 
8251 /*     H  =  I - V * T * V' */
8252 
8253 /*  If STOREV = 'R', the vector which defines the elementary reflector */
8254 /*  H(i) is stored in the i-th row of the array V, and */
8255 
8256 /*     H  =  I - V' * T * V */
8257 
8258 /*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */
8259 
8260 /*  Arguments */
8261 /*  ========= */
8262 
8263 /*  DIRECT  (input) CHARACTER*1 */
8264 /*          Specifies the order in which the elementary reflectors are */
8265 /*          multiplied to form the block reflector: */
8266 /*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */
8267 /*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */
8268 
8269 /*  STOREV  (input) CHARACTER*1 */
8270 /*          Specifies how the vectors which define the elementary */
8271 /*          reflectors are stored (see also Further Details): */
8272 /*          = 'C': columnwise                        (not supported yet) */
8273 /*          = 'R': rowwise */
8274 
8275 /*  N       (input) INTEGER */
8276 /*          The order of the block reflector H. N >= 0. */
8277 
8278 /*  K       (input) INTEGER */
8279 /*          The order of the triangular factor T (= the number of */
8280 /*          elementary reflectors). K >= 1. */
8281 
8282 /*  V       (input/output) DOUBLE PRECISION array, dimension */
8283 /*                               (LDV,K) if STOREV = 'C' */
8284 /*                               (LDV,N) if STOREV = 'R' */
8285 /*          The matrix V. See further details. */
8286 
8287 /*  LDV     (input) INTEGER */
8288 /*          The leading dimension of the array V. */
8289 /*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. */
8290 
8291 /*  TAU     (input) DOUBLE PRECISION array, dimension (K) */
8292 /*          TAU(i) must contain the scalar factor of the elementary */
8293 /*          reflector H(i). */
8294 
8295 /*  T       (output) DOUBLE PRECISION array, dimension (LDT,K) */
8296 /*          The k by k triangular factor T of the block reflector. */
8297 /*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is */
8298 /*          lower triangular. The rest of the array is not used. */
8299 
8300 /*  LDT     (input) INTEGER */
8301 /*          The leading dimension of the array T. LDT >= K. */
8302 
8303 /*  Further Details */
8304 /*  =============== */
8305 
8306 /*  Based on contributions by */
8307 /*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */
8308 
8309 /*  The shape of the matrix V and the storage of the vectors which define */
8310 /*  the H(i) is best illustrated by the following example with n = 5 and */
8311 /*  k = 3. The elements equal to 1 are not stored; the corresponding */
8312 /*  array elements are modified but restored on exit. The rest of the */
8313 /*  array is not used. */
8314 
8315 /*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R': */
8316 
8317 /*                                              ______V_____ */
8318 /*         ( v1 v2 v3 )                        /            \ */
8319 /*         ( v1 v2 v3 )                      ( v1 v1 v1 v1 v1 . . . . 1 ) */
8320 /*     V = ( v1 v2 v3 )                      ( v2 v2 v2 v2 v2 . . . 1   ) */
8321 /*         ( v1 v2 v3 )                      ( v3 v3 v3 v3 v3 . . 1     ) */
8322 /*         ( v1 v2 v3 ) */
8323 /*            .  .  . */
8324 /*            .  .  . */
8325 /*            1  .  . */
8326 /*               1  . */
8327 /*                  1 */
8328 
8329 /*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R': */
8330 
8331 /*                                                        ______V_____ */
8332 /*            1                                          /            \ */
8333 /*            .  1                           ( 1 . . . . v1 v1 v1 v1 v1 ) */
8334 /*            .  .  1                        ( . 1 . . . v2 v2 v2 v2 v2 ) */
8335 /*            .  .  .                        ( . . 1 . . v3 v3 v3 v3 v3 ) */
8336 /*            .  .  . */
8337 /*         ( v1 v2 v3 ) */
8338 /*         ( v1 v2 v3 ) */
8339 /*     V = ( v1 v2 v3 ) */
8340 /*         ( v1 v2 v3 ) */
8341 /*         ( v1 v2 v3 ) */
8342 
8343 /*  ===================================================================== */
8344 
8345 /*     .. Parameters .. */
8346 /*     .. */
8347 /*     .. Local Scalars .. */
8348 /*     .. */
8349 /*     .. External Subroutines .. */
8350 /*     .. */
8351 /*     .. External Functions .. */
8352 /*     .. */
8353 /*     .. Executable Statements .. */
8354 
8355 /*     Check for currently supported options */
8356 
8357     /* Parameter adjustments */
8358     v_dim1 = *ldv;
8359     v_offset = 1 + v_dim1;
8360     v -= v_offset;
8361     --tau;
8362     t_dim1 = *ldt;
8363     t_offset = 1 + t_dim1;
8364     t -= t_offset;
8365 
8366     /* Function Body */
8367     info = 0;
8368     if (! lsame_(direct, "B")) {
8369 	info = -1;
8370     } else if (! lsame_(storev, "R")) {
8371 	info = -2;
8372     }
8373     if (info != 0) {
8374 	i__1 = -info;
8375 	xerbla_("DLARZT", &i__1);
8376 	return 0;
8377     }
8378 
8379     for (i__ = *k; i__ >= 1; --i__) {
8380 	if (tau[i__] == 0.) {
8381 
8382 /*           H(i)  =  I */
8383 
8384 	    i__1 = *k;
8385 	    for (j = i__; j <= i__1; ++j) {
8386 		t[j + i__ * t_dim1] = 0.;
8387 /* L10: */
8388 	    }
8389 	} else {
8390 
8391 /*           general case */
8392 
8393 	    if (i__ < *k) {
8394 
8395 /*              T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)' */
8396 
8397 		i__1 = *k - i__;
8398 		d__1 = -tau[i__];
8399 		dgemv_("No transpose", &i__1, n, &d__1, &v[i__ + 1 + v_dim1],
8400 			ldv, &v[i__ + v_dim1], ldv, &c_b8, &t[i__ + 1 + i__ *
8401 			t_dim1], &c__1);
8402 
8403 /*              T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) */
8404 
8405 		i__1 = *k - i__;
8406 		dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__ + 1
8407 			+ (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ * t_dim1]
8408 , &c__1);
8409 	    }
8410 	    t[i__ + i__ * t_dim1] = tau[i__];
8411 	}
8412 /* L20: */
8413     }
8414     return 0;
8415 
8416 /*     End of DLARZT */
8417 
8418 } /* dlarzt_ */
8419 
dlas2_(double * f,double * g,double * h__,double * ssmin,double * ssmax)8420 /* Subroutine */ int dlas2_(double *f, double *g, double *h__,
8421 	double *ssmin, double *ssmax)
8422 {
8423     /* System generated locals */
8424     double d__1, d__2;
8425 
8426     /* Builtin functions
8427     double sqrt(double); */
8428 
8429     /* Local variables */
8430     double c__, fa, ga, ha, as, at, au, fhmn, fhmx;
8431 
8432 
8433 /*  -- LAPACK auxiliary routine (version 3.1) -- */
8434 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
8435 /*     November 2006 */
8436 
8437 /*     .. Scalar Arguments .. */
8438 /*     .. */
8439 
8440 /*  Purpose */
8441 /*  ======= */
8442 
8443 /*  DLAS2  computes the singular values of the 2-by-2 matrix */
8444 /*     [  F   G  ] */
8445 /*     [  0   H  ]. */
8446 /*  On return, SSMIN is the smaller singular value and SSMAX is the */
8447 /*  larger singular value. */
8448 
8449 /*  Arguments */
8450 /*  ========= */
8451 
8452 /*  F       (input) DOUBLE PRECISION */
8453 /*          The (1,1) element of the 2-by-2 matrix. */
8454 
8455 /*  G       (input) DOUBLE PRECISION */
8456 /*          The (1,2) element of the 2-by-2 matrix. */
8457 
8458 /*  H       (input) DOUBLE PRECISION */
8459 /*          The (2,2) element of the 2-by-2 matrix. */
8460 
8461 /*  SSMIN   (output) DOUBLE PRECISION */
8462 /*          The smaller singular value. */
8463 
8464 /*  SSMAX   (output) DOUBLE PRECISION */
8465 /*          The larger singular value. */
8466 
8467 /*  Further Details */
8468 /*  =============== */
8469 
8470 /*  Barring over/underflow, all output quantities are correct to within */
8471 /*  a few units in the last place (ulps), even in the absence of a guard */
8472 /*  digit in addition/subtraction. */
8473 
8474 /*  In IEEE arithmetic, the code works correctly if one matrix element is */
8475 /*  infinite. */
8476 
8477 /*  Overflow will not occur unless the largest singular value itself */
8478 /*  overflows, or is within a few ulps of overflow. (On machines with */
8479 /*  partial overflow, like the Cray, overflow may occur if the largest */
8480 /*  singular value is within a factor of 2 of overflow.) */
8481 
8482 /*  Underflow is harmless if underflow is gradual. Otherwise, results */
8483 /*  may correspond to a matrix modified by perturbations of size near */
8484 /*  the underflow threshold. */
8485 
8486 /*  ==================================================================== */
8487 
8488 /*     .. Parameters .. */
8489 /*     .. */
8490 /*     .. Local Scalars .. */
8491 /*     .. */
8492 /*     .. Intrinsic Functions .. */
8493 /*     .. */
8494 /*     .. Executable Statements .. */
8495 
8496     fa = abs(*f);
8497     ga = abs(*g);
8498     ha = abs(*h__);
8499     fhmn = std::min(fa,ha);
8500     fhmx = std::max(fa,ha);
8501     if (fhmn == 0.) {
8502 	*ssmin = 0.;
8503 	if (fhmx == 0.) {
8504 	    *ssmax = ga;
8505 	} else {
8506 /* Computing 2nd power */
8507 	    d__1 = std::min(fhmx,ga) / std::max(fhmx,ga);
8508 	    *ssmax = std::max(fhmx,ga) * sqrt(d__1 * d__1 + 1.);
8509 	}
8510     } else {
8511 	if (ga < fhmx) {
8512 	    as = fhmn / fhmx + 1.;
8513 	    at = (fhmx - fhmn) / fhmx;
8514 /* Computing 2nd power */
8515 	    d__1 = ga / fhmx;
8516 	    au = d__1 * d__1;
8517 	    c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au));
8518 	    *ssmin = fhmn * c__;
8519 	    *ssmax = fhmx / c__;
8520 	} else {
8521 	    au = fhmx / ga;
8522 	    if (au == 0.) {
8523 
8524 /*              Avoid possible harmful underflow if exponent range */
8525 /*              asymmetric (true SSMIN may not underflow even if */
8526 /*              AU underflows) */
8527 
8528 		*ssmin = fhmn * fhmx / ga;
8529 		*ssmax = ga;
8530 	    } else {
8531 		as = fhmn / fhmx + 1.;
8532 		at = (fhmx - fhmn) / fhmx;
8533 /* Computing 2nd power */
8534 		d__1 = as * au;
8535 /* Computing 2nd power */
8536 		d__2 = at * au;
8537 		c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.));
8538 		*ssmin = fhmn * c__ * au;
8539 		*ssmin += *ssmin;
8540 		*ssmax = ga / (c__ + c__);
8541 	    }
8542 	}
8543     }
8544     return 0;
8545 
8546 /*     End of DLAS2 */
8547 
8548 } /* dlas2_ */
8549 
dlascl_(const char * type__,integer * kl,integer * ku,double * cfrom,double * cto,integer * m,integer * n,double * a,integer * lda,integer * info)8550 /* Subroutine */ int dlascl_(const char *type__, integer *kl, integer *ku, double *cfrom, double *cto,
8551 	integer *m,	integer *n, double *a, integer *lda, integer *info)
8552 {
8553     /* System generated locals */
8554     integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
8555 
8556     /* Local variables */
8557     integer i__, j, k1, k2, k3, k4;
8558     double mul, cto1;
8559     bool done;
8560     double ctoc;
8561     integer itype;
8562     double cfrom1;
8563     double cfromc;
8564     double bignum, smlnum;
8565 
8566 
8567 /*  -- LAPACK auxiliary routine (version 3.2) -- */
8568 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
8569 /*     November 2006 */
8570 
8571 /*     .. Scalar Arguments .. */
8572 /*     .. */
8573 /*     .. Array Arguments .. */
8574 /*     .. */
8575 
8576 /*  Purpose */
8577 /*  ======= */
8578 
8579 /*  DLASCL multiplies the M by N real matrix A by the real scalar */
8580 /*  CTO/CFROM.  This is done without over/underflow as long as the final */
8581 /*  result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that */
8582 /*  A may be full, upper triangular, lower triangular, upper Hessenberg, */
8583 /*  or banded. */
8584 
8585 /*  Arguments */
8586 /*  ========= */
8587 
8588 /*  TYPE    (input) CHARACTER*1 */
8589 /*          TYPE indices the storage type of the input matrix. */
8590 /*          = 'G':  A is a full matrix. */
8591 /*          = 'L':  A is a lower triangular matrix. */
8592 /*          = 'U':  A is an upper triangular matrix. */
8593 /*          = 'H':  A is an upper Hessenberg matrix. */
8594 /*          = 'B':  A is a symmetric band matrix with lower bandwidth KL */
8595 /*                  and upper bandwidth KU and with the only the lower */
8596 /*                  half stored. */
8597 /*          = 'Q':  A is a symmetric band matrix with lower bandwidth KL */
8598 /*                  and upper bandwidth KU and with the only the upper */
8599 /*                  half stored. */
8600 /*          = 'Z':  A is a band matrix with lower bandwidth KL and upper */
8601 /*                  bandwidth KU. */
8602 
8603 /*  KL      (input) INTEGER */
8604 /*          The lower bandwidth of A.  Referenced only if TYPE = 'B', */
8605 /*          'Q' or 'Z'. */
8606 
8607 /*  KU      (input) INTEGER */
8608 /*          The upper bandwidth of A.  Referenced only if TYPE = 'B', */
8609 /*          'Q' or 'Z'. */
8610 
8611 /*  CFROM   (input) DOUBLE PRECISION */
8612 /*  CTO     (input) DOUBLE PRECISION */
8613 /*          The matrix A is multiplied by CTO/CFROM. A(I,J) is computed */
8614 /*          without over/underflow if the final result CTO*A(I,J)/CFROM */
8615 /*          can be represented without over/underflow.  CFROM must be */
8616 /*          nonzero. */
8617 
8618 /*  M       (input) INTEGER */
8619 /*          The number of rows of the matrix A.  M >= 0. */
8620 
8621 /*  N       (input) INTEGER */
8622 /*          The number of columns of the matrix A.  N >= 0. */
8623 
8624 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
8625 /*          The matrix to be multiplied by CTO/CFROM.  See TYPE for the */
8626 /*          storage type. */
8627 
8628 /*  LDA     (input) INTEGER */
8629 /*          The leading dimension of the array A.  LDA >= max(1,M). */
8630 
8631 /*  INFO    (output) INTEGER */
8632 /*          0  - successful exit */
8633 /*          <0 - if INFO = -i, the i-th argument had an illegal value. */
8634 
8635 /*  ===================================================================== */
8636 
8637 /*     .. Parameters .. */
8638 /*     .. */
8639 /*     .. Local Scalars .. */
8640 /*     .. */
8641 /*     .. External Functions .. */
8642 /*     .. */
8643 /*     .. Intrinsic Functions .. */
8644 /*     .. */
8645 /*     .. External Subroutines .. */
8646 /*     .. */
8647 /*     .. Executable Statements .. */
8648 
8649 /*     Test the input arguments */
8650 
8651     /* Parameter adjustments */
8652     a_dim1 = *lda;
8653     a_offset = 1 + a_dim1;
8654     a -= a_offset;
8655 
8656     /* Function Body */
8657     *info = 0;
8658 
8659     if (lsame_(type__, "G")) {
8660 	itype = 0;
8661     } else if (lsame_(type__, "L")) {
8662 	itype = 1;
8663     } else if (lsame_(type__, "U")) {
8664 	itype = 2;
8665     } else if (lsame_(type__, "H")) {
8666 	itype = 3;
8667     } else if (lsame_(type__, "B")) {
8668 	itype = 4;
8669     } else if (lsame_(type__, "Q")) {
8670 	itype = 5;
8671     } else if (lsame_(type__, "Z")) {
8672 	itype = 6;
8673     } else {
8674 	itype = -1;
8675     }
8676 
8677     if (itype == -1) {
8678 	*info = -1;
8679     } else if (*cfrom == 0. || disnan_(cfrom)) {
8680 	*info = -4;
8681     } else if (disnan_(cto)) {
8682 	*info = -5;
8683     } else if (*m < 0) {
8684 	*info = -6;
8685     } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
8686 	*info = -7;
8687     } else if (itype <= 3 && *lda <  std::max(1_integer,*m)) {
8688 	*info = -9;
8689     } else if (itype >= 4) {
8690 /* Computing MAX */
8691 	i__1 = *m - 1;
8692 	if (*kl < 0 || *kl >  std::max(i__1,0_integer)) {
8693 	    *info = -2;
8694 	} else /* if(complicated condition) */ {
8695 /* Computing MAX */
8696 	    i__1 = *n - 1;
8697 	    if (*ku < 0 || *ku >  std::max(i__1,0_integer) || (itype == 4 || itype == 5) &&
8698 		    *kl != *ku) {
8699 		*info = -3;
8700 	    } else if (itype == 4 && *lda < *kl + 1 || itype == 5 && *lda < *
8701 		    ku + 1 || itype == 6 && *lda < (*kl << 1) + *ku + 1) {
8702 		*info = -9;
8703 	    }
8704 	}
8705     }
8706 
8707     if (*info != 0) {
8708 	i__1 = -(*info);
8709 	xerbla_("DLASCL", &i__1);
8710 	return 0;
8711     }
8712 
8713 /*     Quick return if possible */
8714 
8715     if (*n == 0 || *m == 0) {
8716 	return 0;
8717     }
8718 
8719 /*     Get machine parameters */
8720 
8721     smlnum = dlamch_("S");
8722     bignum = 1. / smlnum;
8723 
8724     cfromc = *cfrom;
8725     ctoc = *cto;
8726 
8727 L10:
8728     cfrom1 = cfromc * smlnum;
8729     if (cfrom1 == cfromc) {
8730 /*        CFROMC is an inf.  Multiply by a correctly signed zero for */
8731 /*        finite CTOC, or a NaN if CTOC is infinite. */
8732 	mul = ctoc / cfromc;
8733 	done = true;
8734 	cto1 = ctoc;
8735     } else {
8736 	cto1 = ctoc / bignum;
8737 	if (cto1 == ctoc) {
8738 /*           CTOC is either 0 or an inf.  In both cases, CTOC itself */
8739 /*           serves as the correct multiplication factor. */
8740 	    mul = ctoc;
8741 	    done = true;
8742 	    cfromc = 1.;
8743 	} else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
8744 	    mul = smlnum;
8745 	    done = false;
8746 	    cfromc = cfrom1;
8747 	} else if (abs(cto1) > abs(cfromc)) {
8748 	    mul = bignum;
8749 	    done = false;
8750 	    ctoc = cto1;
8751 	} else {
8752 	    mul = ctoc / cfromc;
8753 	    done = true;
8754 	}
8755     }
8756 
8757     if (itype == 0) {
8758 
8759 /*        Full matrix */
8760 
8761 	i__1 = *n;
8762 	for (j = 1; j <= i__1; ++j) {
8763 	    i__2 = *m;
8764 	    for (i__ = 1; i__ <= i__2; ++i__) {
8765 		a[i__ + j * a_dim1] *= mul;
8766 /* L20: */
8767 	    }
8768 /* L30: */
8769 	}
8770 
8771     } else if (itype == 1) {
8772 
8773 /*        Lower triangular matrix */
8774 
8775 	i__1 = *n;
8776 	for (j = 1; j <= i__1; ++j) {
8777 	    i__2 = *m;
8778 	    for (i__ = j; i__ <= i__2; ++i__) {
8779 		a[i__ + j * a_dim1] *= mul;
8780 /* L40: */
8781 	    }
8782 /* L50: */
8783 	}
8784 
8785     } else if (itype == 2) {
8786 
8787 /*        Upper triangular matrix */
8788 
8789 	i__1 = *n;
8790 	for (j = 1; j <= i__1; ++j) {
8791 	    i__2 = std::min(j,*m);
8792 	    for (i__ = 1; i__ <= i__2; ++i__) {
8793 		a[i__ + j * a_dim1] *= mul;
8794 /* L60: */
8795 	    }
8796 /* L70: */
8797 	}
8798 
8799     } else if (itype == 3) {
8800 
8801 /*        Upper Hessenberg matrix */
8802 
8803 	i__1 = *n;
8804 	for (j = 1; j <= i__1; ++j) {
8805 /* Computing MIN */
8806 	    i__3 = j + 1;
8807 	    i__2 = std::min(i__3,*m);
8808 	    for (i__ = 1; i__ <= i__2; ++i__) {
8809 		a[i__ + j * a_dim1] *= mul;
8810 /* L80: */
8811 	    }
8812 /* L90: */
8813 	}
8814 
8815     } else if (itype == 4) {
8816 
8817 /*        Lower half of a symmetric band matrix */
8818 
8819 	k3 = *kl + 1;
8820 	k4 = *n + 1;
8821 	i__1 = *n;
8822 	for (j = 1; j <= i__1; ++j) {
8823 /* Computing MIN */
8824 	    i__3 = k3, i__4 = k4 - j;
8825 	    i__2 = std::min(i__3,i__4);
8826 	    for (i__ = 1; i__ <= i__2; ++i__) {
8827 		a[i__ + j * a_dim1] *= mul;
8828 /* L100: */
8829 	    }
8830 /* L110: */
8831 	}
8832 
8833     } else if (itype == 5) {
8834 
8835 /*        Upper half of a symmetric band matrix */
8836 
8837 	k1 = *ku + 2;
8838 	k3 = *ku + 1;
8839 	i__1 = *n;
8840 	for (j = 1; j <= i__1; ++j) {
8841 /* Computing MAX */
8842 	    i__2 = k1 - j;
8843 	    i__3 = k3;
8844 	    for (i__ = std::max(i__2,1_integer); i__ <= i__3; ++i__) {
8845 		a[i__ + j * a_dim1] *= mul;
8846 /* L120: */
8847 	    }
8848 /* L130: */
8849 	}
8850 
8851     } else if (itype == 6) {
8852 
8853 /*        Band matrix */
8854 
8855 	k1 = *kl + *ku + 2;
8856 	k2 = *kl + 1;
8857 	k3 = (*kl << 1) + *ku + 1;
8858 	k4 = *kl + *ku + 1 + *m;
8859 	i__1 = *n;
8860 	for (j = 1; j <= i__1; ++j) {
8861 /* Computing MAX */
8862 	    i__3 = k1 - j;
8863 /* Computing MIN */
8864 	    i__4 = k3, i__5 = k4 - j;
8865 	    i__2 = std::min(i__4,i__5);
8866 	    for (i__ = std::max(i__3,k2); i__ <= i__2; ++i__) {
8867 		a[i__ + j * a_dim1] *= mul;
8868 /* L140: */
8869 	    }
8870 /* L150: */
8871 	}
8872 
8873     }
8874 
8875     if (! done) {
8876 	goto L10;
8877     }
8878 
8879     return 0;
8880 
8881 /*     End of DLASCL */
8882 
8883 } /* dlascl_ */
8884 
dlascl2_(integer * m,integer * n,double * d__,double * x,integer * ldx)8885 int dlascl2_(integer *m, integer *n, double *d__, double *x, integer *ldx)
8886 {
8887     /* System generated locals */
8888     integer x_dim1, x_offset, i__1, i__2;
8889 
8890     /* Local variables */
8891     integer i__, j;
8892 
8893 
8894 /*     -- LAPACK routine (version 3.2.1)                               -- */
8895 /*     -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
8896 /*     -- Jason Riedy of Univ. of California Berkeley.                 -- */
8897 /*     -- April 2009                                                   -- */
8898 
8899 /*     -- LAPACK is a software package provided by Univ. of Tennessee, -- */
8900 /*     -- Univ. of California Berkeley and NAG Ltd.                    -- */
8901 
8902 /*     .. */
8903 /*     .. Scalar Arguments .. */
8904 /*     .. */
8905 /*     .. Array Arguments .. */
8906 /*     .. */
8907 
8908 /*  Purpose */
8909 /*  ======= */
8910 
8911 /*  DLASCL2 performs a diagonal scaling on a vector: */
8912 /*    x <-- D * x */
8913 /*  where the diagonal matrix D is stored as a vector. */
8914 
8915 /*  Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS */
8916 /*  standard. */
8917 
8918 /*  Arguments */
8919 /*  ========= */
8920 
8921 /*     M       (input) INTEGER */
8922 /*     The number of rows of D and X. M >= 0. */
8923 
8924 /*     N       (input) INTEGER */
8925 /*     The number of columns of D and X. N >= 0. */
8926 
8927 /*     D       (input) DOUBLE PRECISION array, length M */
8928 /*     Diagonal matrix D, stored as a vector of length M. */
8929 
8930 /*     X       (input/output) DOUBLE PRECISION array, dimension (LDX,N) */
8931 /*     On entry, the vector X to be scaled by D. */
8932 /*     On exit, the scaled vector. */
8933 
8934 /*     LDX     (input) INTEGER */
8935 /*     The leading dimension of the vector X. LDX >= 0. */
8936 
8937 /*  ===================================================================== */
8938 
8939 /*     .. Local Scalars .. */
8940 /*     .. */
8941 /*     .. Executable Statements .. */
8942 
8943     /* Parameter adjustments */
8944     --d__;
8945     x_dim1 = *ldx;
8946     x_offset = 1 + x_dim1;
8947     x -= x_offset;
8948 
8949     /* Function Body */
8950     i__1 = *n;
8951     for (j = 1; j <= i__1; ++j) {
8952 	i__2 = *m;
8953 	for (i__ = 1; i__ <= i__2; ++i__) {
8954 	    x[i__ + j * x_dim1] *= d__[i__];
8955 	}
8956     }
8957     return 0;
8958 } /* dlascl2_ */
8959 
dlasd0_(integer * n,integer * sqre,double * d__,double * e,double * u,integer * ldu,double * vt,integer * ldvt,integer * smlsiz,integer * iwork,double * work,integer * info)8960 /* Subroutine */ int dlasd0_(integer *n, integer *sqre, double *d__,
8961 	double *e, double *u, integer *ldu, double *vt, integer *
8962 	ldvt, integer *smlsiz, integer *iwork, double *work, integer *
8963 	info)
8964 {
8965 	/* Table of constant values */
8966 	static integer c__0 = 0;
8967 	static integer c__2 = 2;
8968 
8969     /* System generated locals */
8970     integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
8971 
8972     /* Local variables */
8973     integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf, iwk,
8974 	    lvl, ndb1, nlp1, nrp1;
8975     double beta;
8976     integer idxq, nlvl;
8977     double alpha;
8978     integer inode, ndiml, idxqc, ndimr, itemp, sqrei;
8979 
8980 /*  -- LAPACK auxiliary routine (version 3.1) -- */
8981 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
8982 /*     November 2006 */
8983 
8984 /*     .. Scalar Arguments .. */
8985 /*     .. */
8986 /*     .. Array Arguments .. */
8987 /*     .. */
8988 
8989 /*  Purpose */
8990 /*  ======= */
8991 
8992 /*  Using a divide and conquer approach, DLASD0 computes the singular */
8993 /*  value decomposition (SVD) of a real upper bidiagonal N-by-M */
8994 /*  matrix B with diagonal D and offdiagonal E, where M = N + SQRE. */
8995 /*  The algorithm computes orthogonal matrices U and VT such that */
8996 /*  B = U * S * VT. The singular values S are overwritten on D. */
8997 
8998 /*  A related subroutine, DLASDA, computes only the singular values, */
8999 /*  and optionally, the singular vectors in compact form. */
9000 
9001 /*  Arguments */
9002 /*  ========= */
9003 
9004 /*  N      (input) INTEGER */
9005 /*         On entry, the row dimension of the upper bidiagonal matrix. */
9006 /*         This is also the dimension of the main diagonal array D. */
9007 
9008 /*  SQRE   (input) INTEGER */
9009 /*         Specifies the column dimension of the bidiagonal matrix. */
9010 /*         = 0: The bidiagonal matrix has column dimension M = N; */
9011 /*         = 1: The bidiagonal matrix has column dimension M = N+1; */
9012 
9013 /*  D      (input/output) DOUBLE PRECISION array, dimension (N) */
9014 /*         On entry D contains the main diagonal of the bidiagonal */
9015 /*         matrix. */
9016 /*         On exit D, if INFO = 0, contains its singular values. */
9017 
9018 /*  E      (input) DOUBLE PRECISION array, dimension (M-1) */
9019 /*         Contains the subdiagonal entries of the bidiagonal matrix. */
9020 /*         On exit, E has been destroyed. */
9021 
9022 /*  U      (output) DOUBLE PRECISION array, dimension at least (LDQ, N) */
9023 /*         On exit, U contains the left singular vectors. */
9024 
9025 /*  LDU    (input) INTEGER */
9026 /*         On entry, leading dimension of U. */
9027 
9028 /*  VT     (output) DOUBLE PRECISION array, dimension at least (LDVT, M) */
9029 /*         On exit, VT' contains the right singular vectors. */
9030 
9031 /*  LDVT   (input) INTEGER */
9032 /*         On entry, leading dimension of VT. */
9033 
9034 /*  SMLSIZ (input) INTEGER */
9035 /*         On entry, maximum size of the subproblems at the */
9036 /*         bottom of the computation tree. */
9037 
9038 /*  IWORK  (workspace) INTEGER work array. */
9039 /*         Dimension must be at least (8 * N) */
9040 
9041 /*  WORK   (workspace) DOUBLE PRECISION work array. */
9042 /*         Dimension must be at least (3 * M**2 + 2 * M) */
9043 
9044 /*  INFO   (output) INTEGER */
9045 /*          = 0:  successful exit. */
9046 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
9047 /*          > 0:  if INFO = 1, an singular value did not converge */
9048 
9049 /*  Further Details */
9050 /*  =============== */
9051 
9052 /*  Based on contributions by */
9053 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
9054 /*     California at Berkeley, USA */
9055 
9056 /*  ===================================================================== */
9057 
9058 /*     .. Local Scalars .. */
9059 /*     .. */
9060 /*     .. External Subroutines .. */
9061 /*     .. */
9062 /*     .. Executable Statements .. */
9063 
9064 /*     Test the input parameters. */
9065 
9066     /* Parameter adjustments */
9067     --d__;
9068     --e;
9069     u_dim1 = *ldu;
9070     u_offset = 1 + u_dim1;
9071     u -= u_offset;
9072     vt_dim1 = *ldvt;
9073     vt_offset = 1 + vt_dim1;
9074     vt -= vt_offset;
9075     --iwork;
9076     --work;
9077 
9078     /* Function Body */
9079     *info = 0;
9080 
9081     if (*n < 0) {
9082 	*info = -1;
9083     } else if (*sqre < 0 || *sqre > 1) {
9084 	*info = -2;
9085     }
9086 
9087     m = *n + *sqre;
9088 
9089     if (*ldu < *n) {
9090 	*info = -6;
9091     } else if (*ldvt < m) {
9092 	*info = -8;
9093     } else if (*smlsiz < 3) {
9094 	*info = -9;
9095     }
9096     if (*info != 0) {
9097 	i__1 = -(*info);
9098 	xerbla_("DLASD0", &i__1);
9099 	return 0;
9100     }
9101 
9102 /*     If the input matrix is too small, call DLASDQ to find the SVD. */
9103 
9104     if (*n <= *smlsiz) {
9105 	dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
9106 		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
9107 	return 0;
9108     }
9109 
9110 /*     Set up the computation tree. */
9111 
9112     inode = 1;
9113     ndiml = inode + *n;
9114     ndimr = ndiml + *n;
9115     idxq = ndimr + *n;
9116     iwk = idxq + *n;
9117     dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
9118 	    smlsiz);
9119 
9120 /*     For the nodes on bottom level of the tree, solve */
9121 /*     their subproblems by DLASDQ. */
9122 
9123     ndb1 = (nd + 1) / 2;
9124     ncc = 0;
9125     i__1 = nd;
9126     for (i__ = ndb1; i__ <= i__1; ++i__) {
9127 
9128 /*     IC : center row of each node */
9129 /*     NL : number of rows of left  subproblem */
9130 /*     NR : number of rows of right subproblem */
9131 /*     NLF: starting row of the left   subproblem */
9132 /*     NRF: starting row of the right  subproblem */
9133 
9134 	i1 = i__ - 1;
9135 	ic = iwork[inode + i1];
9136 	nl = iwork[ndiml + i1];
9137 	nlp1 = nl + 1;
9138 	nr = iwork[ndimr + i1];
9139 	nrp1 = nr + 1;
9140 	nlf = ic - nl;
9141 	nrf = ic + 1;
9142 	sqrei = 1;
9143 	dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
9144 		nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[
9145 		nlf + nlf * u_dim1], ldu, &work[1], info);
9146 	if (*info != 0) {
9147 	    return 0;
9148 	}
9149 	itemp = idxq + nlf - 2;
9150 	i__2 = nl;
9151 	for (j = 1; j <= i__2; ++j) {
9152 	    iwork[itemp + j] = j;
9153 /* L10: */
9154 	}
9155 	if (i__ == nd) {
9156 	    sqrei = *sqre;
9157 	} else {
9158 	    sqrei = 1;
9159 	}
9160 	nrp1 = nr + sqrei;
9161 	dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
9162 		nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[
9163 		nrf + nrf * u_dim1], ldu, &work[1], info);
9164 	if (*info != 0) {
9165 	    return 0;
9166 	}
9167 	itemp = idxq + ic;
9168 	i__2 = nr;
9169 	for (j = 1; j <= i__2; ++j) {
9170 	    iwork[itemp + j - 1] = j;
9171 /* L20: */
9172 	}
9173 /* L30: */
9174     }
9175 
9176 /*     Now conquer each subproblem bottom-up. */
9177 
9178     for (lvl = nlvl; lvl >= 1; --lvl) {
9179 
9180 /*        Find the first node LF and last node LL on the */
9181 /*        current level LVL. */
9182 
9183 	if (lvl == 1) {
9184 	    lf = 1;
9185 	    ll = 1;
9186 	} else {
9187 	    i__1 = lvl - 1;
9188 	    lf = pow_ii(&c__2, &i__1);
9189 	    ll = (lf << 1) - 1;
9190 	}
9191 	i__1 = ll;
9192 	for (i__ = lf; i__ <= i__1; ++i__) {
9193 	    im1 = i__ - 1;
9194 	    ic = iwork[inode + im1];
9195 	    nl = iwork[ndiml + im1];
9196 	    nr = iwork[ndimr + im1];
9197 	    nlf = ic - nl;
9198 	    if (*sqre == 0 && i__ == ll) {
9199 		sqrei = *sqre;
9200 	    } else {
9201 		sqrei = 1;
9202 	    }
9203 	    idxqc = idxq + nlf - 1;
9204 	    alpha = d__[ic];
9205 	    beta = e[ic];
9206 	    dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
9207 		     u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[
9208 		    idxqc], &iwork[iwk], &work[1], info);
9209 	    if (*info != 0) {
9210 		return 0;
9211 	    }
9212 /* L40: */
9213 	}
9214 /* L50: */
9215     }
9216 
9217     return 0;
9218 
9219 /*     End of DLASD0 */
9220 
9221 } /* dlasd0_ */
9222 
dlasd1_(integer * nl,integer * nr,integer * sqre,double * d__,double * alpha,double * beta,double * u,integer * ldu,double * vt,integer * ldvt,integer * idxq,integer * iwork,double * work,integer * info)9223 /* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre,
9224 	double *d__, double *alpha, double *beta, double *u,
9225 	integer *ldu, double *vt, integer *ldvt, integer *idxq, integer *
9226 	iwork, double *work, integer *info)
9227 {
9228 	/* Table of constant values */
9229 	static integer c__0 = 0;
9230 	static double c_b7 = 1.;
9231 	static integer c__1 = 1;
9232 	static integer c_n1 = -1;
9233 
9234     /* System generated locals */
9235     integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
9236     double d__1, d__2;
9237 
9238     /* Local variables */
9239     integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2, idxc,
9240 	    idxp, ldvt2;
9241     integer isigma;
9242     double orgnrm;
9243     integer coltyp;
9244 
9245 
9246 /*  -- LAPACK auxiliary routine (version 3.1) -- */
9247 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
9248 /*     November 2006 */
9249 
9250 /*     .. Scalar Arguments .. */
9251 /*     .. */
9252 /*     .. Array Arguments .. */
9253 /*     .. */
9254 
9255 /*  Purpose */
9256 /*  ======= */
9257 
9258 /*  DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, */
9259 /*  where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. */
9260 
9261 /*  A related subroutine DLASD7 handles the case in which the singular */
9262 /*  values (and the singular vectors in factored form) are desired. */
9263 
9264 /*  DLASD1 computes the SVD as follows: */
9265 
9266 /*                ( D1(in)  0    0     0 ) */
9267 /*    B = U(in) * (   Z1'   a   Z2'    b ) * VT(in) */
9268 /*                (   0     0   D2(in) 0 ) */
9269 
9270 /*      = U(out) * ( D(out) 0) * VT(out) */
9271 
9272 /*  where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M */
9273 /*  with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */
9274 /*  elsewhere; and the entry b is empty if SQRE = 0. */
9275 
9276 /*  The left singular vectors of the original matrix are stored in U, and */
9277 /*  the transpose of the right singular vectors are stored in VT, and the */
9278 /*  singular values are in D.  The algorithm consists of three stages: */
9279 
9280 /*     The first stage consists of deflating the size of the problem */
9281 /*     when there are multiple singular values or when there are zeros in */
9282 /*     the Z vector.  For each such occurrence the dimension of the */
9283 /*     secular equation problem is reduced by one.  This stage is */
9284 /*     performed by the routine DLASD2. */
9285 
9286 /*     The second stage consists of calculating the updated */
9287 /*     singular values. This is done by finding the square roots of the */
9288 /*     roots of the secular equation via the routine DLASD4 (as called */
9289 /*     by DLASD3). This routine also calculates the singular vectors of */
9290 /*     the current problem. */
9291 
9292 /*     The final stage consists of computing the updated singular vectors */
9293 /*     directly using the updated singular values.  The singular vectors */
9294 /*     for the current problem are multiplied with the singular vectors */
9295 /*     from the overall problem. */
9296 
9297 /*  Arguments */
9298 /*  ========= */
9299 
9300 /*  NL     (input) INTEGER */
9301 /*         The row dimension of the upper block.  NL >= 1. */
9302 
9303 /*  NR     (input) INTEGER */
9304 /*         The row dimension of the lower block.  NR >= 1. */
9305 
9306 /*  SQRE   (input) INTEGER */
9307 /*         = 0: the lower block is an NR-by-NR square matrix. */
9308 /*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */
9309 
9310 /*         The bidiagonal matrix has row dimension N = NL + NR + 1, */
9311 /*         and column dimension M = N + SQRE. */
9312 
9313 /*  D      (input/output) DOUBLE PRECISION array, */
9314 /*                        dimension (N = NL+NR+1). */
9315 /*         On entry D(1:NL,1:NL) contains the singular values of the */
9316 /*         upper block; and D(NL+2:N) contains the singular values of */
9317 /*         the lower block. On exit D(1:N) contains the singular values */
9318 /*         of the modified matrix. */
9319 
9320 /*  ALPHA  (input/output) DOUBLE PRECISION */
9321 /*         Contains the diagonal element associated with the added row. */
9322 
9323 /*  BETA   (input/output) DOUBLE PRECISION */
9324 /*         Contains the off-diagonal element associated with the added */
9325 /*         row. */
9326 
9327 /*  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N) */
9328 /*         On entry U(1:NL, 1:NL) contains the left singular vectors of */
9329 /*         the upper block; U(NL+2:N, NL+2:N) contains the left singular */
9330 /*         vectors of the lower block. On exit U contains the left */
9331 /*         singular vectors of the bidiagonal matrix. */
9332 
9333 /*  LDU    (input) INTEGER */
9334 /*         The leading dimension of the array U.  LDU >= max( 1, N ). */
9335 
9336 /*  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M) */
9337 /*         where M = N + SQRE. */
9338 /*         On entry VT(1:NL+1, 1:NL+1)' contains the right singular */
9339 /*         vectors of the upper block; VT(NL+2:M, NL+2:M)' contains */
9340 /*         the right singular vectors of the lower block. On exit */
9341 /*         VT' contains the right singular vectors of the */
9342 /*         bidiagonal matrix. */
9343 
9344 /*  LDVT   (input) INTEGER */
9345 /*         The leading dimension of the array VT.  LDVT >= max( 1, M ). */
9346 
9347 /*  IDXQ  (output) INTEGER array, dimension(N) */
9348 /*         This contains the permutation which will reintegrate the */
9349 /*         subproblem just solved back into sorted order, i.e. */
9350 /*         D( IDXQ( I = 1, N ) ) will be in ascending order. */
9351 
9352 /*  IWORK  (workspace) INTEGER array, dimension( 4 * N ) */
9353 
9354 /*  WORK   (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) */
9355 
9356 /*  INFO   (output) INTEGER */
9357 /*          = 0:  successful exit. */
9358 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
9359 /*          > 0:  if INFO = 1, an singular value did not converge */
9360 
9361 /*  Further Details */
9362 /*  =============== */
9363 
9364 /*  Based on contributions by */
9365 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
9366 /*     California at Berkeley, USA */
9367 
9368 /*  ===================================================================== */
9369 
9370 /*     .. Parameters .. */
9371 
9372 /*     .. */
9373 /*     .. Local Scalars .. */
9374 /*     .. */
9375 /*     .. External Subroutines .. */
9376 /*     .. */
9377 /*     .. Intrinsic Functions .. */
9378 /*     .. */
9379 /*     .. Executable Statements .. */
9380 
9381 /*     Test the input parameters. */
9382 
9383     /* Parameter adjustments */
9384     --d__;
9385     u_dim1 = *ldu;
9386     u_offset = 1 + u_dim1;
9387     u -= u_offset;
9388     vt_dim1 = *ldvt;
9389     vt_offset = 1 + vt_dim1;
9390     vt -= vt_offset;
9391     --idxq;
9392     --iwork;
9393     --work;
9394 
9395     /* Function Body */
9396     *info = 0;
9397 
9398     if (*nl < 1) {
9399 	*info = -1;
9400     } else if (*nr < 1) {
9401 	*info = -2;
9402     } else if (*sqre < 0 || *sqre > 1) {
9403 	*info = -3;
9404     }
9405     if (*info != 0) {
9406 	i__1 = -(*info);
9407 	xerbla_("DLASD1", &i__1);
9408 	return 0;
9409     }
9410 
9411     n = *nl + *nr + 1;
9412     m = n + *sqre;
9413 
9414 /*     The following values are for bookkeeping purposes only.  They are */
9415 /*     integer pointers which indicate the portion of the workspace */
9416 /*     used by a particular array in DLASD2 and DLASD3. */
9417 
9418     ldu2 = n;
9419     ldvt2 = m;
9420 
9421     iz = 1;
9422     isigma = iz + m;
9423     iu2 = isigma + n;
9424     ivt2 = iu2 + ldu2 * n;
9425     iq = ivt2 + ldvt2 * m;
9426 
9427     idx = 1;
9428     idxc = idx + n;
9429     coltyp = idxc + n;
9430     idxp = coltyp + n;
9431 
9432 /*     Scale. */
9433 
9434 /* Computing MAX */
9435     d__1 = abs(*alpha), d__2 = abs(*beta);
9436     orgnrm = std::max(d__1,d__2);
9437     d__[*nl + 1] = 0.;
9438     i__1 = n;
9439     for (i__ = 1; i__ <= i__1; ++i__) {
9440 	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
9441 	    orgnrm = (d__1 = d__[i__], abs(d__1));
9442 	}
9443 /* L10: */
9444     }
9445     dlascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info);
9446     *alpha /= orgnrm;
9447     *beta /= orgnrm;
9448 
9449 /*     Deflate singular values. */
9450 
9451     dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
9452 	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
9453 	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
9454 	    idxq[1], &iwork[coltyp], info);
9455 
9456 /*     Solve Secular Equation and update singular vectors. */
9457 
9458     ldq = k;
9459     dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
9460 	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
9461 	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
9462     if (*info != 0) {
9463 	return 0;
9464     }
9465 
9466 /*     Unscale. */
9467 
9468     dlascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info);
9469 
9470 /*     Prepare the IDXQ sorting permutation. */
9471 
9472     n1 = k;
9473     n2 = n - k;
9474     dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
9475 
9476     return 0;
9477 
9478 /*     End of DLASD1 */
9479 
9480 } /* dlasd1_ */
9481 
dlasd2_(integer * nl,integer * nr,integer * sqre,integer * k,double * d__,double * z__,double * alpha,double * beta,double * u,integer * ldu,double * vt,integer * ldvt,double * dsigma,double * u2,integer * ldu2,double * vt2,integer * ldvt2,integer * idxp,integer * idx,integer * idxc,integer * idxq,integer * coltyp,integer * info)9482 /* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer
9483 	*k, double *d__, double *z__, double *alpha, double *
9484 	beta, double *u, integer *ldu, double *vt, integer *ldvt,
9485 	double *dsigma, double *u2, integer *ldu2, double *vt2,
9486 	integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *
9487 	idxq, integer *coltyp, integer *info)
9488 {
9489 	/* Table of constant values */
9490 	static integer c__1 = 1;
9491 	static double c_b30 = 0.;
9492 
9493     /* System generated locals */
9494     integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
9495 	    vt2_dim1, vt2_offset, i__1;
9496     double d__1, d__2;
9497 
9498     /* Local variables */
9499     double c__;
9500     integer i__, j, m, n;
9501     double s;
9502     integer k2;
9503     double z1;
9504     integer ct, jp;
9505     double eps, tau, tol;
9506     integer psm[4], nlp1, nlp2, idxi, idxj;
9507     integer ctot[4], idxjp;
9508     integer jprev;
9509     double hlftol;
9510 
9511 
9512 /*  -- LAPACK auxiliary routine (version 3.1) -- */
9513 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
9514 /*     November 2006 */
9515 
9516 /*     .. Scalar Arguments .. */
9517 /*     .. */
9518 /*     .. Array Arguments .. */
9519 /*     .. */
9520 
9521 /*  Purpose */
9522 /*  ======= */
9523 
9524 /*  DLASD2 merges the two sets of singular values together into a single */
9525 /*  sorted set.  Then it tries to deflate the size of the problem. */
9526 /*  There are two ways in which deflation can occur:  when two or more */
9527 /*  singular values are close together or if there is a tiny entry in the */
9528 /*  Z vector.  For each such occurrence the order of the related secular */
9529 /*  equation problem is reduced by one. */
9530 
9531 /*  DLASD2 is called from DLASD1. */
9532 
9533 /*  Arguments */
9534 /*  ========= */
9535 
9536 /*  NL     (input) INTEGER */
9537 /*         The row dimension of the upper block.  NL >= 1. */
9538 
9539 /*  NR     (input) INTEGER */
9540 /*         The row dimension of the lower block.  NR >= 1. */
9541 
9542 /*  SQRE   (input) INTEGER */
9543 /*         = 0: the lower block is an NR-by-NR square matrix. */
9544 /*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */
9545 
9546 /*         The bidiagonal matrix has N = NL + NR + 1 rows and */
9547 /*         M = N + SQRE >= N columns. */
9548 
9549 /*  K      (output) INTEGER */
9550 /*         Contains the dimension of the non-deflated matrix, */
9551 /*         This is the order of the related secular equation. 1 <= K <=N. */
9552 
9553 /*  D      (input/output) DOUBLE PRECISION array, dimension(N) */
9554 /*         On entry D contains the singular values of the two submatrices */
9555 /*         to be combined.  On exit D contains the trailing (N-K) updated */
9556 /*         singular values (those which were deflated) sorted into */
9557 /*         increasing order. */
9558 
9559 /*  Z      (output) DOUBLE PRECISION array, dimension(N) */
9560 /*         On exit Z contains the updating row vector in the secular */
9561 /*         equation. */
9562 
9563 /*  ALPHA  (input) DOUBLE PRECISION */
9564 /*         Contains the diagonal element associated with the added row. */
9565 
9566 /*  BETA   (input) DOUBLE PRECISION */
9567 /*         Contains the off-diagonal element associated with the added */
9568 /*         row. */
9569 
9570 /*  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N) */
9571 /*         On entry U contains the left singular vectors of two */
9572 /*         submatrices in the two square blocks with corners at (1,1), */
9573 /*         (NL, NL), and (NL+2, NL+2), (N,N). */
9574 /*         On exit U contains the trailing (N-K) updated left singular */
9575 /*         vectors (those which were deflated) in its last N-K columns. */
9576 
9577 /*  LDU    (input) INTEGER */
9578 /*         The leading dimension of the array U.  LDU >= N. */
9579 
9580 /*  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M) */
9581 /*         On entry VT' contains the right singular vectors of two */
9582 /*         submatrices in the two square blocks with corners at (1,1), */
9583 /*         (NL+1, NL+1), and (NL+2, NL+2), (M,M). */
9584 /*         On exit VT' contains the trailing (N-K) updated right singular */
9585 /*         vectors (those which were deflated) in its last N-K columns. */
9586 /*         In case SQRE =1, the last row of VT spans the right null */
9587 /*         space. */
9588 
9589 /*  LDVT   (input) INTEGER */
9590 /*         The leading dimension of the array VT.  LDVT >= M. */
9591 
9592 /*  DSIGMA (output) DOUBLE PRECISION array, dimension (N) */
9593 /*         Contains a copy of the diagonal elements (K-1 singular values */
9594 /*         and one zero) in the secular equation. */
9595 
9596 /*  U2     (output) DOUBLE PRECISION array, dimension(LDU2,N) */
9597 /*         Contains a copy of the first K-1 left singular vectors which */
9598 /*         will be used by DLASD3 in a matrix multiply (DGEMM) to solve */
9599 /*         for the new left singular vectors. U2 is arranged into four */
9600 /*         blocks. The first block contains a column with 1 at NL+1 and */
9601 /*         zero everywhere else; the second block contains non-zero */
9602 /*         entries only at and above NL; the third contains non-zero */
9603 /*         entries only below NL+1; and the fourth is dense. */
9604 
9605 /*  LDU2   (input) INTEGER */
9606 /*         The leading dimension of the array U2.  LDU2 >= N. */
9607 
9608 /*  VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N) */
9609 /*         VT2' contains a copy of the first K right singular vectors */
9610 /*         which will be used by DLASD3 in a matrix multiply (DGEMM) to */
9611 /*         solve for the new right singular vectors. VT2 is arranged into */
9612 /*         three blocks. The first block contains a row that corresponds */
9613 /*         to the special 0 diagonal element in SIGMA; the second block */
9614 /*         contains non-zeros only at and before NL +1; the third block */
9615 /*         contains non-zeros only at and after  NL +2. */
9616 
9617 /*  LDVT2  (input) INTEGER */
9618 /*         The leading dimension of the array VT2.  LDVT2 >= M. */
9619 
9620 /*  IDXP   (workspace) INTEGER array dimension(N) */
9621 /*         This will contain the permutation used to place deflated */
9622 /*         values of D at the end of the array. On output IDXP(2:K) */
9623 /*         points to the nondeflated D-values and IDXP(K+1:N) */
9624 /*         points to the deflated singular values. */
9625 
9626 /*  IDX    (workspace) INTEGER array dimension(N) */
9627 /*         This will contain the permutation used to sort the contents of */
9628 /*         D into ascending order. */
9629 
9630 /*  IDXC   (output) INTEGER array dimension(N) */
9631 /*         This will contain the permutation used to arrange the columns */
9632 /*         of the deflated U matrix into three groups:  the first group */
9633 /*         contains non-zero entries only at and above NL, the second */
9634 /*         contains non-zero entries only below NL+2, and the third is */
9635 /*         dense. */
9636 
9637 /*  IDXQ   (input/output) INTEGER array dimension(N) */
9638 /*         This contains the permutation which separately sorts the two */
9639 /*         sub-problems in D into ascending order.  Note that entries in */
9640 /*         the first hlaf of this permutation must first be moved one */
9641 /*         position backward; and entries in the second half */
9642 /*         must first have NL+1 added to their values. */
9643 
9644 /*  COLTYP (workspace/output) INTEGER array dimension(N) */
9645 /*         As workspace, this will contain a label which will indicate */
9646 /*         which of the following types a column in the U2 matrix or a */
9647 /*         row in the VT2 matrix is: */
9648 /*         1 : non-zero in the upper half only */
9649 /*         2 : non-zero in the lower half only */
9650 /*         3 : dense */
9651 /*         4 : deflated */
9652 
9653 /*         On exit, it is an array of dimension 4, with COLTYP(I) being */
9654 /*         the dimension of the I-th type columns. */
9655 
9656 /*  INFO   (output) INTEGER */
9657 /*          = 0:  successful exit. */
9658 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
9659 
9660 /*  Further Details */
9661 /*  =============== */
9662 
9663 /*  Based on contributions by */
9664 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
9665 /*     California at Berkeley, USA */
9666 
9667 /*  ===================================================================== */
9668 
9669 /*     .. Parameters .. */
9670 /*     .. */
9671 /*     .. Local Arrays .. */
9672 /*     .. */
9673 /*     .. Local Scalars .. */
9674 /*     .. */
9675 /*     .. External Functions .. */
9676 /*     .. */
9677 /*     .. External Subroutines .. */
9678 /*     .. */
9679 /*     .. Intrinsic Functions .. */
9680 /*     .. */
9681 /*     .. Executable Statements .. */
9682 
9683 /*     Test the input parameters. */
9684 
9685     /* Parameter adjustments */
9686     --d__;
9687     --z__;
9688     u_dim1 = *ldu;
9689     u_offset = 1 + u_dim1;
9690     u -= u_offset;
9691     vt_dim1 = *ldvt;
9692     vt_offset = 1 + vt_dim1;
9693     vt -= vt_offset;
9694     --dsigma;
9695     u2_dim1 = *ldu2;
9696     u2_offset = 1 + u2_dim1;
9697     u2 -= u2_offset;
9698     vt2_dim1 = *ldvt2;
9699     vt2_offset = 1 + vt2_dim1;
9700     vt2 -= vt2_offset;
9701     --idxp;
9702     --idx;
9703     --idxc;
9704     --idxq;
9705     --coltyp;
9706 
9707     /* Function Body */
9708     *info = 0;
9709 
9710     if (*nl < 1) {
9711 	*info = -1;
9712     } else if (*nr < 1) {
9713 	*info = -2;
9714     } else if (*sqre != 1 && *sqre != 0) {
9715 	*info = -3;
9716     }
9717 
9718     n = *nl + *nr + 1;
9719     m = n + *sqre;
9720 
9721     if (*ldu < n) {
9722 	*info = -10;
9723     } else if (*ldvt < m) {
9724 	*info = -12;
9725     } else if (*ldu2 < n) {
9726 	*info = -15;
9727     } else if (*ldvt2 < m) {
9728 	*info = -17;
9729     }
9730     if (*info != 0) {
9731 	i__1 = -(*info);
9732 	xerbla_("DLASD2", &i__1);
9733 	return 0;
9734     }
9735 
9736     nlp1 = *nl + 1;
9737     nlp2 = *nl + 2;
9738 
9739 /*     Generate the first part of the vector Z; and move the singular */
9740 /*     values in the first part of D one position backward. */
9741 
9742     z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
9743     z__[1] = z1;
9744     for (i__ = *nl; i__ >= 1; --i__) {
9745 	z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
9746 	d__[i__ + 1] = d__[i__];
9747 	idxq[i__ + 1] = idxq[i__] + 1;
9748 /* L10: */
9749     }
9750 
9751 /*     Generate the second part of the vector Z. */
9752 
9753     i__1 = m;
9754     for (i__ = nlp2; i__ <= i__1; ++i__) {
9755 	z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
9756 /* L20: */
9757     }
9758 
9759 /*     Initialize some reference arrays. */
9760 
9761     i__1 = nlp1;
9762     for (i__ = 2; i__ <= i__1; ++i__) {
9763 	coltyp[i__] = 1;
9764 /* L30: */
9765     }
9766     i__1 = n;
9767     for (i__ = nlp2; i__ <= i__1; ++i__) {
9768 	coltyp[i__] = 2;
9769 /* L40: */
9770     }
9771 
9772 /*     Sort the singular values into increasing order */
9773 
9774     i__1 = n;
9775     for (i__ = nlp2; i__ <= i__1; ++i__) {
9776 	idxq[i__] += nlp1;
9777 /* L50: */
9778     }
9779 
9780 /*     DSIGMA, IDXC, IDXC, and the first column of U2 */
9781 /*     are used as storage space. */
9782 
9783     i__1 = n;
9784     for (i__ = 2; i__ <= i__1; ++i__) {
9785 	dsigma[i__] = d__[idxq[i__]];
9786 	u2[i__ + u2_dim1] = z__[idxq[i__]];
9787 	idxc[i__] = coltyp[idxq[i__]];
9788 /* L60: */
9789     }
9790 
9791     dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
9792 
9793     i__1 = n;
9794     for (i__ = 2; i__ <= i__1; ++i__) {
9795 	idxi = idx[i__] + 1;
9796 	d__[i__] = dsigma[idxi];
9797 	z__[i__] = u2[idxi + u2_dim1];
9798 	coltyp[i__] = idxc[idxi];
9799 /* L70: */
9800     }
9801 
9802 /*     Calculate the allowable deflation tolerance */
9803 
9804     eps = dlamch_("Epsilon");
9805 /* Computing MAX */
9806     d__1 = abs(*alpha), d__2 = abs(*beta);
9807     tol = std::max(d__1,d__2);
9808 /* Computing MAX */
9809     d__2 = (d__1 = d__[n], abs(d__1));
9810     tol = eps * 8. * std::max(d__2,tol);
9811 
9812 /*     There are 2 kinds of deflation -- first a value in the z-vector */
9813 /*     is small, second two (or more) singular values are very close */
9814 /*     together (their difference is small). */
9815 
9816 /*     If the value in the z-vector is small, we simply permute the */
9817 /*     array so that the corresponding singular value is moved to the */
9818 /*     end. */
9819 
9820 /*     If two values in the D-vector are close, we perform a two-sided */
9821 /*     rotation designed to make one of the corresponding z-vector */
9822 /*     entries zero, and then permute the array so that the deflated */
9823 /*     singular value is moved to the end. */
9824 
9825 /*     If there are multiple singular values then the problem deflates. */
9826 /*     Here the number of equal singular values are found.  As each equal */
9827 /*     singular value is found, an elementary reflector is computed to */
9828 /*     rotate the corresponding singular subspace so that the */
9829 /*     corresponding components of Z are zero in this new basis. */
9830 
9831     *k = 1;
9832     k2 = n + 1;
9833     i__1 = n;
9834     for (j = 2; j <= i__1; ++j) {
9835 	if ((d__1 = z__[j], abs(d__1)) <= tol) {
9836 
9837 /*           Deflate due to small z component. */
9838 
9839 	    --k2;
9840 	    idxp[k2] = j;
9841 	    coltyp[j] = 4;
9842 	    if (j == n) {
9843 		goto L120;
9844 	    }
9845 	} else {
9846 	    jprev = j;
9847 	    goto L90;
9848 	}
9849 /* L80: */
9850     }
9851 L90:
9852     j = jprev;
9853 L100:
9854     ++j;
9855     if (j > n) {
9856 	goto L110;
9857     }
9858     if ((d__1 = z__[j], abs(d__1)) <= tol) {
9859 
9860 /*        Deflate due to small z component. */
9861 
9862 	--k2;
9863 	idxp[k2] = j;
9864 	coltyp[j] = 4;
9865     } else {
9866 
9867 /*        Check if singular values are close enough to allow deflation. */
9868 
9869 	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
9870 
9871 /*           Deflation is possible. */
9872 
9873 	    s = z__[jprev];
9874 	    c__ = z__[j];
9875 
9876 /*           Find sqrt(a**2+b**2) without overflow or */
9877 /*           destructive underflow. */
9878 
9879 	    tau = dlapy2_(&c__, &s);
9880 	    c__ /= tau;
9881 	    s = -s / tau;
9882 	    z__[j] = tau;
9883 	    z__[jprev] = 0.;
9884 
9885 /*           Apply back the Givens rotation to the left and right */
9886 /*           singular vector matrices. */
9887 
9888 	    idxjp = idxq[idx[jprev] + 1];
9889 	    idxj = idxq[idx[j] + 1];
9890 	    if (idxjp <= nlp1) {
9891 		--idxjp;
9892 	    }
9893 	    if (idxj <= nlp1) {
9894 		--idxj;
9895 	    }
9896 	    drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
9897 		    c__1, &c__, &s);
9898 	    drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
9899 		    c__, &s);
9900 	    if (coltyp[j] != coltyp[jprev]) {
9901 		coltyp[j] = 3;
9902 	    }
9903 	    coltyp[jprev] = 4;
9904 	    --k2;
9905 	    idxp[k2] = jprev;
9906 	    jprev = j;
9907 	} else {
9908 	    ++(*k);
9909 	    u2[*k + u2_dim1] = z__[jprev];
9910 	    dsigma[*k] = d__[jprev];
9911 	    idxp[*k] = jprev;
9912 	    jprev = j;
9913 	}
9914     }
9915     goto L100;
9916 L110:
9917 
9918 /*     Record the last singular value. */
9919 
9920     ++(*k);
9921     u2[*k + u2_dim1] = z__[jprev];
9922     dsigma[*k] = d__[jprev];
9923     idxp[*k] = jprev;
9924 
9925 L120:
9926 
9927 /*     Count up the total number of the various types of columns, then */
9928 /*     form a permutation which positions the four column types into */
9929 /*     four groups of uniform structure (although one or more of these */
9930 /*     groups may be empty). */
9931 
9932     for (j = 1; j <= 4; ++j) {
9933 	ctot[j - 1] = 0;
9934 /* L130: */
9935     }
9936     i__1 = n;
9937     for (j = 2; j <= i__1; ++j) {
9938 	ct = coltyp[j];
9939 	++ctot[ct - 1];
9940 /* L140: */
9941     }
9942 
9943 /*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
9944 
9945     psm[0] = 2;
9946     psm[1] = ctot[0] + 2;
9947     psm[2] = psm[1] + ctot[1];
9948     psm[3] = psm[2] + ctot[2];
9949 
9950 /*     Fill out the IDXC array so that the permutation which it induces */
9951 /*     will place all type-1 columns first, all type-2 columns next, */
9952 /*     then all type-3's, and finally all type-4's, starting from the */
9953 /*     second column. This applies similarly to the rows of VT. */
9954 
9955     i__1 = n;
9956     for (j = 2; j <= i__1; ++j) {
9957 	jp = idxp[j];
9958 	ct = coltyp[jp];
9959 	idxc[psm[ct - 1]] = j;
9960 	++psm[ct - 1];
9961 /* L150: */
9962     }
9963 
9964 /*     Sort the singular values and corresponding singular vectors into */
9965 /*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors */
9966 /*     which were not deflated go into the first K slots of DSIGMA, U2, */
9967 /*     and VT2 respectively, while those which were deflated go into the */
9968 /*     last N - K slots, except that the first column/row will be treated */
9969 /*     separately. */
9970 
9971     i__1 = n;
9972     for (j = 2; j <= i__1; ++j) {
9973 	jp = idxp[j];
9974 	dsigma[j] = d__[jp];
9975 	idxj = idxq[idx[idxp[idxc[j]]] + 1];
9976 	if (idxj <= nlp1) {
9977 	    --idxj;
9978 	}
9979 	dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
9980 	dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
9981 /* L160: */
9982     }
9983 
9984 /*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */
9985 
9986     dsigma[1] = 0.;
9987     hlftol = tol / 2.;
9988     if (abs(dsigma[2]) <= hlftol) {
9989 	dsigma[2] = hlftol;
9990     }
9991     if (m > n) {
9992 	z__[1] = dlapy2_(&z1, &z__[m]);
9993 	if (z__[1] <= tol) {
9994 	    c__ = 1.;
9995 	    s = 0.;
9996 	    z__[1] = tol;
9997 	} else {
9998 	    c__ = z1 / z__[1];
9999 	    s = z__[m] / z__[1];
10000 	}
10001     } else {
10002 	if (abs(z1) <= tol) {
10003 	    z__[1] = tol;
10004 	} else {
10005 	    z__[1] = z1;
10006 	}
10007     }
10008 
10009 /*     Move the rest of the updating row to Z. */
10010 
10011     i__1 = *k - 1;
10012     dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
10013 
10014 /*     Determine the first column of U2, the first row of VT2 and the */
10015 /*     last row of VT. */
10016 
10017     dlaset_("A", &n, &c__1, &c_b30, &c_b30, &u2[u2_offset], ldu2);
10018     u2[nlp1 + u2_dim1] = 1.;
10019     if (m > n) {
10020 	i__1 = nlp1;
10021 	for (i__ = 1; i__ <= i__1; ++i__) {
10022 	    vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
10023 	    vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
10024 /* L170: */
10025 	}
10026 	i__1 = m;
10027 	for (i__ = nlp2; i__ <= i__1; ++i__) {
10028 	    vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
10029 	    vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
10030 /* L180: */
10031 	}
10032     } else {
10033 	dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
10034     }
10035     if (m > n) {
10036 	dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
10037     }
10038 
10039 /*     The deflated singular values and their corresponding vectors go */
10040 /*     into the back of D, U, and V respectively. */
10041 
10042     if (n > *k) {
10043 	i__1 = n - *k;
10044 	dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
10045 	i__1 = n - *k;
10046 	dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
10047 		 * u_dim1 + 1], ldu);
10048 	i__1 = n - *k;
10049 	dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
10050 		vt_dim1], ldvt);
10051     }
10052 
10053 /*     Copy CTOT into COLTYP for referencing in DLASD3. */
10054 
10055     for (j = 1; j <= 4; ++j) {
10056 	coltyp[j] = ctot[j - 1];
10057 /* L190: */
10058     }
10059 
10060     return 0;
10061 
10062 /*     End of DLASD2 */
10063 
10064 } /* dlasd2_ */
10065 
dlasd3_(integer * nl,integer * nr,integer * sqre,integer * k,double * d__,double * q,integer * ldq,double * dsigma,double * u,integer * ldu,double * u2,integer * ldu2,double * vt,integer * ldvt,double * vt2,integer * ldvt2,integer * idxc,integer * ctot,double * z__,integer * info)10066 /* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer
10067 	*k, double *d__, double *q, integer *ldq, double *dsigma,
10068 	double *u, integer *ldu, double *u2, integer *ldu2,
10069 	double *vt, integer *ldvt, double *vt2, integer *ldvt2,
10070 	integer *idxc, integer *ctot, double *z__, integer *info)
10071 {
10072 	/* Table of constant values */
10073 	static integer c__1 = 1;
10074 	static integer c__0 = 0;
10075 	static double c_b13 = 1.;
10076 	static double c_b26 = 0.;
10077 
10078     /* System generated locals */
10079     integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
10080 	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
10081     double d__1, d__2;
10082 
10083     /* Local variables */
10084     integer i__, j, m, n, jc;
10085     double rho;
10086     integer nlp1, nlp2, nrp1;
10087     double temp;
10088     integer ctemp;
10089     integer ktemp;
10090 
10091 /*  -- LAPACK auxiliary routine (version 3.1) -- */
10092 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
10093 /*     November 2006 */
10094 
10095 /*     .. Scalar Arguments .. */
10096 /*     .. */
10097 /*     .. Array Arguments .. */
10098 /*     .. */
10099 
10100 /*  Purpose */
10101 /*  ======= */
10102 
10103 /*  DLASD3 finds all the square roots of the roots of the secular */
10104 /*  equation, as defined by the values in D and Z.  It makes the */
10105 /*  appropriate calls to DLASD4 and then updates the singular */
10106 /*  vectors by matrix multiplication. */
10107 
10108 /*  This code makes very mild assumptions about floating point */
10109 /*  arithmetic. It will work on machines with a guard digit in */
10110 /*  add/subtract, or on those binary machines without guard digits */
10111 /*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */
10112 /*  It could conceivably fail on hexadecimal or decimal machines */
10113 /*  without guard digits, but we know of none. */
10114 
10115 /*  DLASD3 is called from DLASD1. */
10116 
10117 /*  Arguments */
10118 /*  ========= */
10119 
10120 /*  NL     (input) INTEGER */
10121 /*         The row dimension of the upper block.  NL >= 1. */
10122 
10123 /*  NR     (input) INTEGER */
10124 /*         The row dimension of the lower block.  NR >= 1. */
10125 
10126 /*  SQRE   (input) INTEGER */
10127 /*         = 0: the lower block is an NR-by-NR square matrix. */
10128 /*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */
10129 
10130 /*         The bidiagonal matrix has N = NL + NR + 1 rows and */
10131 /*         M = N + SQRE >= N columns. */
10132 
10133 /*  K      (input) INTEGER */
10134 /*         The size of the secular equation, 1 =< K = < N. */
10135 
10136 /*  D      (output) DOUBLE PRECISION array, dimension(K) */
10137 /*         On exit the square roots of the roots of the secular equation, */
10138 /*         in ascending order. */
10139 
10140 /*  Q      (workspace) DOUBLE PRECISION array, */
10141 /*                     dimension at least (LDQ,K). */
10142 
10143 /*  LDQ    (input) INTEGER */
10144 /*         The leading dimension of the array Q.  LDQ >= K. */
10145 
10146 /*  DSIGMA (input) DOUBLE PRECISION array, dimension(K) */
10147 /*         The first K elements of this array contain the old roots */
10148 /*         of the deflated updating problem.  These are the poles */
10149 /*         of the secular equation. */
10150 
10151 /*  U      (output) DOUBLE PRECISION array, dimension (LDU, N) */
10152 /*         The last N - K columns of this matrix contain the deflated */
10153 /*         left singular vectors. */
10154 
10155 /*  LDU    (input) INTEGER */
10156 /*         The leading dimension of the array U.  LDU >= N. */
10157 
10158 /*  U2     (input/output) DOUBLE PRECISION array, dimension (LDU2, N) */
10159 /*         The first K columns of this matrix contain the non-deflated */
10160 /*         left singular vectors for the split problem. */
10161 
10162 /*  LDU2   (input) INTEGER */
10163 /*         The leading dimension of the array U2.  LDU2 >= N. */
10164 
10165 /*  VT     (output) DOUBLE PRECISION array, dimension (LDVT, M) */
10166 /*         The last M - K columns of VT' contain the deflated */
10167 /*         right singular vectors. */
10168 
10169 /*  LDVT   (input) INTEGER */
10170 /*         The leading dimension of the array VT.  LDVT >= N. */
10171 
10172 /*  VT2    (input/output) DOUBLE PRECISION array, dimension (LDVT2, N) */
10173 /*         The first K columns of VT2' contain the non-deflated */
10174 /*         right singular vectors for the split problem. */
10175 
10176 /*  LDVT2  (input) INTEGER */
10177 /*         The leading dimension of the array VT2.  LDVT2 >= N. */
10178 
10179 /*  IDXC   (input) INTEGER array, dimension ( N ) */
10180 /*         The permutation used to arrange the columns of U (and rows of */
10181 /*         VT) into three groups:  the first group contains non-zero */
10182 /*         entries only at and above (or before) NL +1; the second */
10183 /*         contains non-zero entries only at and below (or after) NL+2; */
10184 /*         and the third is dense. The first column of U and the row of */
10185 /*         VT are treated separately, however. */
10186 
10187 /*         The rows of the singular vectors found by DLASD4 */
10188 /*         must be likewise permuted before the matrix multiplies can */
10189 /*         take place. */
10190 
10191 /*  CTOT   (input) INTEGER array, dimension ( 4 ) */
10192 /*         A count of the total number of the various types of columns */
10193 /*         in U (or rows in VT), as described in IDXC. The fourth column */
10194 /*         type is any column which has been deflated. */
10195 
10196 /*  Z      (input) DOUBLE PRECISION array, dimension (K) */
10197 /*         The first K elements of this array contain the components */
10198 /*         of the deflation-adjusted updating row vector. */
10199 
10200 /*  INFO   (output) INTEGER */
10201 /*         = 0:  successful exit. */
10202 /*         < 0:  if INFO = -i, the i-th argument had an illegal value. */
10203 /*         > 0:  if INFO = 1, an singular value did not converge */
10204 
10205 /*  Further Details */
10206 /*  =============== */
10207 
10208 /*  Based on contributions by */
10209 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
10210 /*     California at Berkeley, USA */
10211 
10212 /*  ===================================================================== */
10213 
10214 /*     .. Parameters .. */
10215 /*     .. */
10216 /*     .. Local Scalars .. */
10217 /*     .. */
10218 /*     .. External Functions .. */
10219 /*     .. */
10220 /*     .. External Subroutines .. */
10221 /*     .. */
10222 /*     .. Intrinsic Functions .. */
10223 /*     .. */
10224 /*     .. Executable Statements .. */
10225 
10226 /*     Test the input parameters. */
10227 
10228     /* Parameter adjustments */
10229     --d__;
10230     q_dim1 = *ldq;
10231     q_offset = 1 + q_dim1;
10232     q -= q_offset;
10233     --dsigma;
10234     u_dim1 = *ldu;
10235     u_offset = 1 + u_dim1;
10236     u -= u_offset;
10237     u2_dim1 = *ldu2;
10238     u2_offset = 1 + u2_dim1;
10239     u2 -= u2_offset;
10240     vt_dim1 = *ldvt;
10241     vt_offset = 1 + vt_dim1;
10242     vt -= vt_offset;
10243     vt2_dim1 = *ldvt2;
10244     vt2_offset = 1 + vt2_dim1;
10245     vt2 -= vt2_offset;
10246     --idxc;
10247     --ctot;
10248     --z__;
10249 
10250     /* Function Body */
10251     *info = 0;
10252 
10253     if (*nl < 1) {
10254 	*info = -1;
10255     } else if (*nr < 1) {
10256 	*info = -2;
10257     } else if (*sqre != 1 && *sqre != 0) {
10258 	*info = -3;
10259     }
10260 
10261     n = *nl + *nr + 1;
10262     m = n + *sqre;
10263     nlp1 = *nl + 1;
10264     nlp2 = *nl + 2;
10265 
10266     if (*k < 1 || *k > n) {
10267 	*info = -4;
10268     } else if (*ldq < *k) {
10269 	*info = -7;
10270     } else if (*ldu < n) {
10271 	*info = -10;
10272     } else if (*ldu2 < n) {
10273 	*info = -12;
10274     } else if (*ldvt < m) {
10275 	*info = -14;
10276     } else if (*ldvt2 < m) {
10277 	*info = -16;
10278     }
10279     if (*info != 0) {
10280 	i__1 = -(*info);
10281 	xerbla_("DLASD3", &i__1);
10282 	return 0;
10283     }
10284 
10285 /*     Quick return if possible */
10286 
10287     if (*k == 1) {
10288 	d__[1] = abs(z__[1]);
10289 	dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
10290 	if (z__[1] > 0.) {
10291 	    dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
10292 	} else {
10293 	    i__1 = n;
10294 	    for (i__ = 1; i__ <= i__1; ++i__) {
10295 		u[i__ + u_dim1] = -u2[i__ + u2_dim1];
10296 /* L10: */
10297 	    }
10298 	}
10299 	return 0;
10300     }
10301 
10302 /*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */
10303 /*     be computed with high relative accuracy (barring over/underflow). */
10304 /*     This is a problem on machines without a guard digit in */
10305 /*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
10306 /*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */
10307 /*     which on any of these machines zeros out the bottommost */
10308 /*     bit of DSIGMA(I) if it is 1; this makes the subsequent */
10309 /*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */
10310 /*     occurs. On binary machines with a guard digit (almost all */
10311 /*     machines) it does not change DSIGMA(I) at all. On hexadecimal */
10312 /*     and decimal machines with a guard digit, it slightly */
10313 /*     changes the bottommost bits of DSIGMA(I). It does not account */
10314 /*     for hexadecimal or decimal machines without guard digits */
10315 /*     (we know of none). We use a subroutine call to compute */
10316 /*     2*DSIGMA(I) to prevent optimizing compilers from eliminating */
10317 /*     this code. */
10318 
10319     i__1 = *k;
10320     for (i__ = 1; i__ <= i__1; ++i__) {
10321 	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
10322 /* L20: */
10323     }
10324 
10325 /*     Keep a copy of Z. */
10326 
10327     dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
10328 
10329 /*     Normalize Z. */
10330 
10331     rho = dnrm2_(k, &z__[1], &c__1);
10332     dlascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info);
10333     rho *= rho;
10334 
10335 /*     Find the new singular values. */
10336 
10337     i__1 = *k;
10338     for (j = 1; j <= i__1; ++j) {
10339 	dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
10340 		 &vt[j * vt_dim1 + 1], info);
10341 
10342 /*        If the zero finder fails, the computation is terminated. */
10343 
10344 	if (*info != 0) {
10345 	    return 0;
10346 	}
10347 /* L30: */
10348     }
10349 
10350 /*     Compute updated Z. */
10351 
10352     i__1 = *k;
10353     for (i__ = 1; i__ <= i__1; ++i__) {
10354 	z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
10355 	i__2 = i__ - 1;
10356 	for (j = 1; j <= i__2; ++j) {
10357 	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
10358 		    i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
10359 /* L40: */
10360 	}
10361 	i__2 = *k - 1;
10362 	for (j = i__; j <= i__2; ++j) {
10363 	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
10364 		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
10365 /* L50: */
10366 	}
10367 	d__2 = sqrt((d__1 = z__[i__], abs(d__1)));
10368 	z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]);
10369 /* L60: */
10370     }
10371 
10372 /*     Compute left singular vectors of the modified diagonal matrix, */
10373 /*     and store related information for the right singular vectors. */
10374 
10375     i__1 = *k;
10376     for (i__ = 1; i__ <= i__1; ++i__) {
10377 	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
10378 		vt_dim1 + 1];
10379 	u[i__ * u_dim1 + 1] = -1.;
10380 	i__2 = *k;
10381 	for (j = 2; j <= i__2; ++j) {
10382 	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
10383 		    * vt_dim1];
10384 	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
10385 /* L70: */
10386 	}
10387 	temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
10388 	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
10389 	i__2 = *k;
10390 	for (j = 2; j <= i__2; ++j) {
10391 	    jc = idxc[j];
10392 	    q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
10393 /* L80: */
10394 	}
10395 /* L90: */
10396     }
10397 
10398 /*     Update the left singular vector matrix. */
10399 
10400     if (*k == 2) {
10401 	dgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset],
10402 		 ldq, &c_b26, &u[u_offset], ldu);
10403 	goto L100;
10404     }
10405     if (ctot[1] > 0) {
10406 	dgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1],
10407 		ldu2, &q[q_dim1 + 2], ldq, &c_b26, &u[u_dim1 + 1], ldu);
10408 	if (ctot[3] > 0) {
10409 	    ktemp = ctot[1] + 2 + ctot[2];
10410 	    dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1]
10411 , ldu2, &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1],
10412 		    ldu);
10413 	}
10414     } else if (ctot[3] > 0) {
10415 	ktemp = ctot[1] + 2 + ctot[2];
10416 	dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1],
10417 		ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu);
10418     } else {
10419 	dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
10420     }
10421     dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
10422     ktemp = ctot[1] + 2;
10423     ctemp = ctot[2] + ctot[3];
10424     dgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2,
10425 	     &q[ktemp + q_dim1], ldq, &c_b26, &u[nlp2 + u_dim1], ldu);
10426 
10427 /*     Generate the right singular vectors. */
10428 
10429 L100:
10430     i__1 = *k;
10431     for (i__ = 1; i__ <= i__1; ++i__) {
10432 	temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
10433 	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
10434 	i__2 = *k;
10435 	for (j = 2; j <= i__2; ++j) {
10436 	    jc = idxc[j];
10437 	    q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
10438 /* L110: */
10439 	}
10440 /* L120: */
10441     }
10442 
10443 /*     Update the right singular vector matrix. */
10444 
10445     if (*k == 2) {
10446 	dgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset]
10447 , ldvt2, &c_b26, &vt[vt_offset], ldvt);
10448 	return 0;
10449     }
10450     ktemp = ctot[1] + 1;
10451     dgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[
10452 	    vt2_dim1 + 1], ldvt2, &c_b26, &vt[vt_dim1 + 1], ldvt);
10453     ktemp = ctot[1] + 2 + ctot[2];
10454     if (ktemp <= *ldvt2) {
10455 	dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1],
10456 		ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1],
10457 		ldvt);
10458     }
10459 
10460     ktemp = ctot[1] + 1;
10461     nrp1 = *nr + *sqre;
10462     if (ktemp > 1) {
10463 	i__1 = *k;
10464 	for (i__ = 1; i__ <= i__1; ++i__) {
10465 	    q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
10466 /* L130: */
10467 	}
10468 	i__1 = m;
10469 	for (i__ = nlp2; i__ <= i__1; ++i__) {
10470 	    vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
10471 /* L140: */
10472 	}
10473     }
10474     ctemp = ctot[2] + 1 + ctot[3];
10475     dgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, &
10476 	    vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 +
10477 	    1], ldvt);
10478 
10479     return 0;
10480 
10481 /*     End of DLASD3 */
10482 
10483 } /* dlasd3_ */
10484 
dlasd4_(integer * n,integer * i__,double * d__,double * z__,double * delta,double * rho,double * sigma,double * work,integer * info)10485 /* Subroutine */ int dlasd4_(integer *n, integer *i__, double *d__,
10486 	double *z__, double *delta, double *rho, double *
10487 	sigma, double *work, integer *info)
10488 {
10489     /* System generated locals */
10490     integer i__1;
10491     double d__1;
10492 
10493     /* Builtin functions
10494     double sqrt(double); */
10495 
10496     /* Local variables */
10497     double a, b, c__;
10498     integer j;
10499     double w, dd[3];
10500     integer ii;
10501     double dw, zz[3];
10502     integer ip1;
10503     double eta, phi, eps, tau, psi;
10504     integer iim1, iip1;
10505     double dphi, dpsi;
10506     integer iter;
10507     double temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip;
10508     integer niter;
10509     double dtisq;
10510     bool swtch;
10511     double dtnsq;
10512     double delsq2, dtnsq1;
10513     bool swtch3;
10514    bool orgati;
10515     double erretm, dtipsq, rhoinv;
10516 
10517 
10518 /*  -- LAPACK auxiliary routine (version 3.1) -- */
10519 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
10520 /*     November 2006 */
10521 
10522 /*     .. Scalar Arguments .. */
10523 /*     .. */
10524 /*     .. Array Arguments .. */
10525 /*     .. */
10526 
10527 /*  Purpose */
10528 /*  ======= */
10529 
10530 /*  This subroutine computes the square root of the I-th updated */
10531 /*  eigenvalue of a positive symmetric rank-one modification to */
10532 /*  a positive diagonal matrix whose entries are given as the squares */
10533 /*  of the corresponding entries in the array d, and that */
10534 
10535 /*         0 <= D(i) < D(j)  for  i < j */
10536 
10537 /*  and that RHO > 0. This is arranged by the calling routine, and is */
10538 /*  no loss in generality.  The rank-one modified system is thus */
10539 
10540 /*         diag( D ) * diag( D ) +  RHO *  Z * Z_transpose. */
10541 
10542 /*  where we assume the Euclidean norm of Z is 1. */
10543 
10544 /*  The method consists of approximating the rational functions in the */
10545 /*  secular equation by simpler interpolating rational functions. */
10546 
10547 /*  Arguments */
10548 /*  ========= */
10549 
10550 /*  N      (input) INTEGER */
10551 /*         The length of all arrays. */
10552 
10553 /*  I      (input) INTEGER */
10554 /*         The index of the eigenvalue to be computed.  1 <= I <= N. */
10555 
10556 /*  D      (input) DOUBLE PRECISION array, dimension ( N ) */
10557 /*         The original eigenvalues.  It is assumed that they are in */
10558 /*         order, 0 <= D(I) < D(J)  for I < J. */
10559 
10560 /*  Z      (input) DOUBLE PRECISION array, dimension ( N ) */
10561 /*         The components of the updating vector. */
10562 
10563 /*  DELTA  (output) DOUBLE PRECISION array, dimension ( N ) */
10564 /*         If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th */
10565 /*         component.  If N = 1, then DELTA(1) = 1.  The vector DELTA */
10566 /*         contains the information necessary to construct the */
10567 /*         (singular) eigenvectors. */
10568 
10569 /*  RHO    (input) DOUBLE PRECISION */
10570 /*         The scalar in the symmetric updating formula. */
10571 
10572 /*  SIGMA  (output) DOUBLE PRECISION */
10573 /*         The computed sigma_I, the I-th updated eigenvalue. */
10574 
10575 /*  WORK   (workspace) DOUBLE PRECISION array, dimension ( N ) */
10576 /*         If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th */
10577 /*         component.  If N = 1, then WORK( 1 ) = 1. */
10578 
10579 /*  INFO   (output) INTEGER */
10580 /*         = 0:  successful exit */
10581 /*         > 0:  if INFO = 1, the updating process failed. */
10582 
10583 /*  Internal Parameters */
10584 /*  =================== */
10585 
10586 /*  Logical variable ORGATI (origin-at-i?) is used for distinguishing */
10587 /*  whether D(i) or D(i+1) is treated as the origin. */
10588 
10589 /*            ORGATI = .true.    origin at i */
10590 /*            ORGATI = .false.   origin at i+1 */
10591 
10592 /*  Logical variable SWTCH3 (switch-for-3-poles?) is for noting */
10593 /*  if we are working with THREE poles! */
10594 
10595 /*  MAXIT is the maximum number of iterations allowed for each */
10596 /*  eigenvalue. */
10597 
10598 /*  Further Details */
10599 /*  =============== */
10600 
10601 /*  Based on contributions by */
10602 /*     Ren-Cang Li, Computer Science Division, University of California */
10603 /*     at Berkeley, USA */
10604 
10605 /*  ===================================================================== */
10606 
10607 /*     .. Parameters .. */
10608 /*     .. */
10609 /*     .. Local Scalars .. */
10610 /*     .. */
10611 /*     .. Local Arrays .. */
10612 /*     .. */
10613 /*     .. External Subroutines .. */
10614 /*     .. */
10615 /*     .. External Functions .. */
10616 /*     .. */
10617 /*     .. Intrinsic Functions .. */
10618 /*     .. */
10619 /*     .. Executable Statements .. */
10620 
10621 /*     Since this routine is called in an inner loop, we do no argument */
10622 /*     checking. */
10623 
10624 /*     Quick return for N=1 and 2. */
10625 
10626     /* Parameter adjustments */
10627     --work;
10628     --delta;
10629     --z__;
10630     --d__;
10631 
10632     /* Function Body */
10633     *info = 0;
10634     if (*n == 1) {
10635 
10636 /*        Presumably, I=1 upon entry */
10637 
10638 	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
10639 	delta[1] = 1.;
10640 	work[1] = 1.;
10641 	return 0;
10642     }
10643     if (*n == 2) {
10644 	dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
10645 	return 0;
10646     }
10647 
10648 /*     Compute machine epsilon */
10649 
10650     eps = dlamch_("Epsilon");
10651     rhoinv = 1. / *rho;
10652 
10653 /*     The case I = N */
10654 
10655     if (*i__ == *n) {
10656 
10657 /*        Initialize some basic variables */
10658 
10659 	ii = *n - 1;
10660 	niter = 1;
10661 
10662 /*        Calculate initial guess */
10663 
10664 	temp = *rho / 2.;
10665 
10666 /*        If ||Z||_2 is not one, then TEMP should be set to */
10667 /*        RHO * ||Z||_2^2 / TWO */
10668 
10669 	temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp));
10670 	i__1 = *n;
10671 	for (j = 1; j <= i__1; ++j) {
10672 	    work[j] = d__[j] + d__[*n] + temp1;
10673 	    delta[j] = d__[j] - d__[*n] - temp1;
10674 /* L10: */
10675 	}
10676 
10677 	psi = 0.;
10678 	i__1 = *n - 2;
10679 	for (j = 1; j <= i__1; ++j) {
10680 	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
10681 /* L20: */
10682 	}
10683 
10684 	c__ = rhoinv + psi;
10685 	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
10686 		n] / (delta[*n] * work[*n]);
10687 
10688 	if (w <= 0.) {
10689 	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
10690 	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
10691 		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
10692 		    z__[*n] / *rho;
10693 
10694 /*           The following TAU is to approximate */
10695 /*           SIGMA_n^2 - D( N )*D( N ) */
10696 
10697 	    if (c__ <= temp) {
10698 		tau = *rho;
10699 	    } else {
10700 		delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
10701 		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
10702 			n];
10703 		b = z__[*n] * z__[*n] * delsq;
10704 		if (a < 0.) {
10705 		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
10706 		} else {
10707 		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
10708 		}
10709 	    }
10710 
10711 /*           It can be proved that */
10712 /*               D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO */
10713 
10714 	} else {
10715 	    delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
10716 	    a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
10717 	    b = z__[*n] * z__[*n] * delsq;
10718 
10719 /*           The following TAU is to approximate */
10720 /*           SIGMA_n^2 - D( N )*D( N ) */
10721 
10722 	    if (a < 0.) {
10723 		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
10724 	    } else {
10725 		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
10726 	    }
10727 
10728 /*           It can be proved that */
10729 /*           D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 */
10730 
10731 	}
10732 
10733 /*        The following ETA is to approximate SIGMA_n - D( N ) */
10734 
10735 	eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau));
10736 
10737 	*sigma = d__[*n] + eta;
10738 	i__1 = *n;
10739 	for (j = 1; j <= i__1; ++j) {
10740 	    delta[j] = d__[j] - d__[*i__] - eta;
10741 	    work[j] = d__[j] + d__[*i__] + eta;
10742 /* L30: */
10743 	}
10744 
10745 /*        Evaluate PSI and the derivative DPSI */
10746 
10747 	dpsi = 0.;
10748 	psi = 0.;
10749 	erretm = 0.;
10750 	i__1 = ii;
10751 	for (j = 1; j <= i__1; ++j) {
10752 	    temp = z__[j] / (delta[j] * work[j]);
10753 	    psi += z__[j] * temp;
10754 	    dpsi += temp * temp;
10755 	    erretm += psi;
10756 /* L40: */
10757 	}
10758 	erretm = abs(erretm);
10759 
10760 /*        Evaluate PHI and the derivative DPHI */
10761 
10762 	temp = z__[*n] / (delta[*n] * work[*n]);
10763 	phi = z__[*n] * temp;
10764 	dphi = temp * temp;
10765 	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
10766 		+ dphi);
10767 
10768 	w = rhoinv + phi + psi;
10769 
10770 /*        Test for convergence */
10771 
10772 	if (abs(w) <= eps * erretm) {
10773 	    goto L240;
10774 	}
10775 
10776 /*        Calculate the new step */
10777 
10778 	++niter;
10779 	dtnsq1 = work[*n - 1] * delta[*n - 1];
10780 	dtnsq = work[*n] * delta[*n];
10781 	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
10782 	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
10783 	b = dtnsq * dtnsq1 * w;
10784 	if (c__ < 0.) {
10785 	    c__ = abs(c__);
10786 	}
10787 	if (c__ == 0.) {
10788 	    eta = *rho - *sigma * *sigma;
10789 	} else if (a >= 0.) {
10790 	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
10791 		    * 2.);
10792 	} else {
10793 	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
10794 		    );
10795 	}
10796 
10797 /*        Note, eta should be positive if w is negative, and */
10798 /*        eta should be negative otherwise. However, */
10799 /*        if for some reason caused by roundoff, eta*w > 0, */
10800 /*        we simply use one Newton step instead. This way */
10801 /*        will guarantee eta*w < 0. */
10802 
10803 	if (w * eta > 0.) {
10804 	    eta = -w / (dpsi + dphi);
10805 	}
10806 	temp = eta - dtnsq;
10807 	if (temp > *rho) {
10808 	    eta = *rho + dtnsq;
10809 	}
10810 
10811 	tau += eta;
10812 	eta /= *sigma + sqrt(eta + *sigma * *sigma);
10813 	i__1 = *n;
10814 	for (j = 1; j <= i__1; ++j) {
10815 	    delta[j] -= eta;
10816 	    work[j] += eta;
10817 /* L50: */
10818 	}
10819 
10820 	*sigma += eta;
10821 
10822 /*        Evaluate PSI and the derivative DPSI */
10823 
10824 	dpsi = 0.;
10825 	psi = 0.;
10826 	erretm = 0.;
10827 	i__1 = ii;
10828 	for (j = 1; j <= i__1; ++j) {
10829 	    temp = z__[j] / (work[j] * delta[j]);
10830 	    psi += z__[j] * temp;
10831 	    dpsi += temp * temp;
10832 	    erretm += psi;
10833 /* L60: */
10834 	}
10835 	erretm = abs(erretm);
10836 
10837 /*        Evaluate PHI and the derivative DPHI */
10838 
10839 	temp = z__[*n] / (work[*n] * delta[*n]);
10840 	phi = z__[*n] * temp;
10841 	dphi = temp * temp;
10842 	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
10843 		+ dphi);
10844 
10845 	w = rhoinv + phi + psi;
10846 
10847 /*        Main loop to update the values of the array   DELTA */
10848 
10849 	iter = niter + 1;
10850 
10851 	for (niter = iter; niter <= 20; ++niter) {
10852 
10853 /*           Test for convergence */
10854 
10855 	    if (abs(w) <= eps * erretm) {
10856 		goto L240;
10857 	    }
10858 
10859 /*           Calculate the new step */
10860 
10861 	    dtnsq1 = work[*n - 1] * delta[*n - 1];
10862 	    dtnsq = work[*n] * delta[*n];
10863 	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
10864 	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
10865 	    b = dtnsq1 * dtnsq * w;
10866 	    if (a >= 0.) {
10867 		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
10868 			c__ * 2.);
10869 	    } else {
10870 		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
10871 			d__1))));
10872 	    }
10873 
10874 /*           Note, eta should be positive if w is negative, and */
10875 /*           eta should be negative otherwise. However, */
10876 /*           if for some reason caused by roundoff, eta*w > 0, */
10877 /*           we simply use one Newton step instead. This way */
10878 /*           will guarantee eta*w < 0. */
10879 
10880 	    if (w * eta > 0.) {
10881 		eta = -w / (dpsi + dphi);
10882 	    }
10883 	    temp = eta - dtnsq;
10884 	    if (temp <= 0.) {
10885 		eta /= 2.;
10886 	    }
10887 
10888 	    tau += eta;
10889 	    eta /= *sigma + sqrt(eta + *sigma * *sigma);
10890 	    i__1 = *n;
10891 	    for (j = 1; j <= i__1; ++j) {
10892 		delta[j] -= eta;
10893 		work[j] += eta;
10894 /* L70: */
10895 	    }
10896 
10897 	    *sigma += eta;
10898 
10899 /*           Evaluate PSI and the derivative DPSI */
10900 
10901 	    dpsi = 0.;
10902 	    psi = 0.;
10903 	    erretm = 0.;
10904 	    i__1 = ii;
10905 	    for (j = 1; j <= i__1; ++j) {
10906 		temp = z__[j] / (work[j] * delta[j]);
10907 		psi += z__[j] * temp;
10908 		dpsi += temp * temp;
10909 		erretm += psi;
10910 /* L80: */
10911 	    }
10912 	    erretm = abs(erretm);
10913 
10914 /*           Evaluate PHI and the derivative DPHI */
10915 
10916 	    temp = z__[*n] / (work[*n] * delta[*n]);
10917 	    phi = z__[*n] * temp;
10918 	    dphi = temp * temp;
10919 	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
10920 		    dpsi + dphi);
10921 
10922 	    w = rhoinv + phi + psi;
10923 /* L90: */
10924 	}
10925 
10926 /*        Return with INFO = 1, NITER = MAXIT and not converged */
10927 
10928 	*info = 1;
10929 	goto L240;
10930 
10931 /*        End for the case I = N */
10932 
10933     } else {
10934 
10935 /*        The case for I < N */
10936 
10937 	niter = 1;
10938 	ip1 = *i__ + 1;
10939 
10940 /*        Calculate initial guess */
10941 
10942 	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
10943 	delsq2 = delsq / 2.;
10944 	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
10945 	i__1 = *n;
10946 	for (j = 1; j <= i__1; ++j) {
10947 	    work[j] = d__[j] + d__[*i__] + temp;
10948 	    delta[j] = d__[j] - d__[*i__] - temp;
10949 /* L100: */
10950 	}
10951 
10952 	psi = 0.;
10953 	i__1 = *i__ - 1;
10954 	for (j = 1; j <= i__1; ++j) {
10955 	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
10956 /* L110: */
10957 	}
10958 
10959 	phi = 0.;
10960 	i__1 = *i__ + 2;
10961 	for (j = *n; j >= i__1; --j) {
10962 	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
10963 /* L120: */
10964 	}
10965 	c__ = rhoinv + psi + phi;
10966 	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
10967 		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
10968 
10969 	if (w > 0.) {
10970 
10971 /*           d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 */
10972 
10973 /*           We choose d(i) as origin. */
10974 
10975 	    orgati = true;
10976 	    sg2lb = 0.;
10977 	    sg2ub = delsq2;
10978 	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
10979 	    b = z__[*i__] * z__[*i__] * delsq;
10980 	    if (a > 0.) {
10981 		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
10982 			d__1))));
10983 	    } else {
10984 		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
10985 			c__ * 2.);
10986 	    }
10987 
10988 /*           TAU now is an estimation of SIGMA^2 - D( I )^2. The */
10989 /*           following, however, is the corresponding estimation of */
10990 /*           SIGMA - D( I ). */
10991 
10992 	    eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau));
10993 	} else {
10994 
10995 /*           (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 */
10996 
10997 /*           We choose d(i+1) as origin. */
10998 
10999 	    orgati = false;
11000 	    sg2lb = -delsq2;
11001 	    sg2ub = 0.;
11002 	    a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
11003 	    b = z__[ip1] * z__[ip1] * delsq;
11004 	    if (a < 0.) {
11005 		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
11006 			d__1))));
11007 	    } else {
11008 		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
11009 			(c__ * 2.);
11010 	    }
11011 
11012 /*           TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The */
11013 /*           following, however, is the corresponding estimation of */
11014 /*           SIGMA - D( IP1 ). */
11015 
11016 	    eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau,
11017 		    abs(d__1))));
11018 	}
11019 
11020 	if (orgati) {
11021 	    ii = *i__;
11022 	    *sigma = d__[*i__] + eta;
11023 	    i__1 = *n;
11024 	    for (j = 1; j <= i__1; ++j) {
11025 		work[j] = d__[j] + d__[*i__] + eta;
11026 		delta[j] = d__[j] - d__[*i__] - eta;
11027 /* L130: */
11028 	    }
11029 	} else {
11030 	    ii = *i__ + 1;
11031 	    *sigma = d__[ip1] + eta;
11032 	    i__1 = *n;
11033 	    for (j = 1; j <= i__1; ++j) {
11034 		work[j] = d__[j] + d__[ip1] + eta;
11035 		delta[j] = d__[j] - d__[ip1] - eta;
11036 /* L140: */
11037 	    }
11038 	}
11039 	iim1 = ii - 1;
11040 	iip1 = ii + 1;
11041 
11042 /*        Evaluate PSI and the derivative DPSI */
11043 
11044 	dpsi = 0.;
11045 	psi = 0.;
11046 	erretm = 0.;
11047 	i__1 = iim1;
11048 	for (j = 1; j <= i__1; ++j) {
11049 	    temp = z__[j] / (work[j] * delta[j]);
11050 	    psi += z__[j] * temp;
11051 	    dpsi += temp * temp;
11052 	    erretm += psi;
11053 /* L150: */
11054 	}
11055 	erretm = abs(erretm);
11056 
11057 /*        Evaluate PHI and the derivative DPHI */
11058 
11059 	dphi = 0.;
11060 	phi = 0.;
11061 	i__1 = iip1;
11062 	for (j = *n; j >= i__1; --j) {
11063 	    temp = z__[j] / (work[j] * delta[j]);
11064 	    phi += z__[j] * temp;
11065 	    dphi += temp * temp;
11066 	    erretm += phi;
11067 /* L160: */
11068 	}
11069 
11070 	w = rhoinv + phi + psi;
11071 
11072 /*        W is the value of the secular function with */
11073 /*        its ii-th element removed. */
11074 
11075 	swtch3 = false;
11076 	if (orgati) {
11077 	    if (w < 0.) {
11078 		swtch3 = true;
11079 	    }
11080 	} else {
11081 	    if (w > 0.) {
11082 		swtch3 = true;
11083 	    }
11084 	}
11085 	if (ii == 1 || ii == *n) {
11086 	    swtch3 = false;
11087 	}
11088 
11089 	temp = z__[ii] / (work[ii] * delta[ii]);
11090 	dw = dpsi + dphi + temp * temp;
11091 	temp = z__[ii] * temp;
11092 	w += temp;
11093 	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
11094 		abs(tau) * dw;
11095 
11096 /*        Test for convergence */
11097 
11098 	if (abs(w) <= eps * erretm) {
11099 	    goto L240;
11100 	}
11101 
11102 	if (w <= 0.) {
11103 	    sg2lb = std::max(sg2lb,tau);
11104 	} else {
11105 	    sg2ub = std::min(sg2ub,tau);
11106 	}
11107 
11108 /*        Calculate the new step */
11109 
11110 	++niter;
11111 	if (! swtch3) {
11112 	    dtipsq = work[ip1] * delta[ip1];
11113 	    dtisq = work[*i__] * delta[*i__];
11114 	    if (orgati) {
11115 /* Computing 2nd power */
11116 		d__1 = z__[*i__] / dtisq;
11117 		c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
11118 	    } else {
11119 /* Computing 2nd power */
11120 		d__1 = z__[ip1] / dtipsq;
11121 		c__ = w - dtisq * dw - delsq * (d__1 * d__1);
11122 	    }
11123 	    a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
11124 	    b = dtipsq * dtisq * w;
11125 	    if (c__ == 0.) {
11126 		if (a == 0.) {
11127 		    if (orgati) {
11128 			a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
11129 				dphi);
11130 		    } else {
11131 			a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi +
11132 				dphi);
11133 		    }
11134 		}
11135 		eta = b / a;
11136 	    } else if (a <= 0.) {
11137 		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
11138 			c__ * 2.);
11139 	    } else {
11140 		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
11141 			d__1))));
11142 	    }
11143 	} else {
11144 
11145 /*           Interpolation using THREE most relevant poles */
11146 
11147 	    dtiim = work[iim1] * delta[iim1];
11148 	    dtiip = work[iip1] * delta[iip1];
11149 	    temp = rhoinv + psi + phi;
11150 	    if (orgati) {
11151 		temp1 = z__[iim1] / dtiim;
11152 		temp1 *= temp1;
11153 		c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) *
11154 			 (d__[iim1] + d__[iip1]) * temp1;
11155 		zz[0] = z__[iim1] * z__[iim1];
11156 		if (dpsi < temp1) {
11157 		    zz[2] = dtiip * dtiip * dphi;
11158 		} else {
11159 		    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
11160 		}
11161 	    } else {
11162 		temp1 = z__[iip1] / dtiip;
11163 		temp1 *= temp1;
11164 		c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) *
11165 			 (d__[iim1] + d__[iip1]) * temp1;
11166 		if (dphi < temp1) {
11167 		    zz[0] = dtiim * dtiim * dpsi;
11168 		} else {
11169 		    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
11170 		}
11171 		zz[2] = z__[iip1] * z__[iip1];
11172 	    }
11173 	    zz[1] = z__[ii] * z__[ii];
11174 	    dd[0] = dtiim;
11175 	    dd[1] = delta[ii] * work[ii];
11176 	    dd[2] = dtiip;
11177 	    dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
11178 	    if (*info != 0) {
11179 		goto L240;
11180 	    }
11181 	}
11182 
11183 /*        Note, eta should be positive if w is negative, and */
11184 /*        eta should be negative otherwise. However, */
11185 /*        if for some reason caused by roundoff, eta*w > 0, */
11186 /*        we simply use one Newton step instead. This way */
11187 /*        will guarantee eta*w < 0. */
11188 
11189 	if (w * eta >= 0.) {
11190 	    eta = -w / dw;
11191 	}
11192 	if (orgati) {
11193 	    temp1 = work[*i__] * delta[*i__];
11194 	    temp = eta - temp1;
11195 	} else {
11196 	    temp1 = work[ip1] * delta[ip1];
11197 	    temp = eta - temp1;
11198 	}
11199 	if (temp > sg2ub || temp < sg2lb) {
11200 	    if (w < 0.) {
11201 		eta = (sg2ub - tau) / 2.;
11202 	    } else {
11203 		eta = (sg2lb - tau) / 2.;
11204 	    }
11205 	}
11206 
11207 	tau += eta;
11208 	eta /= *sigma + sqrt(*sigma * *sigma + eta);
11209 
11210 	prew = w;
11211 
11212 	*sigma += eta;
11213 	i__1 = *n;
11214 	for (j = 1; j <= i__1; ++j) {
11215 	    work[j] += eta;
11216 	    delta[j] -= eta;
11217 /* L170: */
11218 	}
11219 
11220 /*        Evaluate PSI and the derivative DPSI */
11221 
11222 	dpsi = 0.;
11223 	psi = 0.;
11224 	erretm = 0.;
11225 	i__1 = iim1;
11226 	for (j = 1; j <= i__1; ++j) {
11227 	    temp = z__[j] / (work[j] * delta[j]);
11228 	    psi += z__[j] * temp;
11229 	    dpsi += temp * temp;
11230 	    erretm += psi;
11231 /* L180: */
11232 	}
11233 	erretm = abs(erretm);
11234 
11235 /*        Evaluate PHI and the derivative DPHI */
11236 
11237 	dphi = 0.;
11238 	phi = 0.;
11239 	i__1 = iip1;
11240 	for (j = *n; j >= i__1; --j) {
11241 	    temp = z__[j] / (work[j] * delta[j]);
11242 	    phi += z__[j] * temp;
11243 	    dphi += temp * temp;
11244 	    erretm += phi;
11245 /* L190: */
11246 	}
11247 
11248 	temp = z__[ii] / (work[ii] * delta[ii]);
11249 	dw = dpsi + dphi + temp * temp;
11250 	temp = z__[ii] * temp;
11251 	w = rhoinv + phi + psi + temp;
11252 	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
11253 		abs(tau) * dw;
11254 
11255 	if (w <= 0.) {
11256 	    sg2lb = std::max(sg2lb,tau);
11257 	} else {
11258 	    sg2ub = std::min(sg2ub,tau);
11259 	}
11260 
11261 	swtch = false;
11262 	if (orgati) {
11263 	    if (-w > abs(prew) / 10.) {
11264 		swtch = true;
11265 	    }
11266 	} else {
11267 	    if (w > abs(prew) / 10.) {
11268 		swtch = true;
11269 	    }
11270 	}
11271 
11272 /*        Main loop to update the values of the array   DELTA and WORK */
11273 
11274 	iter = niter + 1;
11275 
11276 	for (niter = iter; niter <= 20; ++niter) {
11277 
11278 /*           Test for convergence */
11279 
11280 	    if (abs(w) <= eps * erretm) {
11281 		goto L240;
11282 	    }
11283 
11284 /*           Calculate the new step */
11285 
11286 	    if (! swtch3) {
11287 		dtipsq = work[ip1] * delta[ip1];
11288 		dtisq = work[*i__] * delta[*i__];
11289 		if (! swtch) {
11290 		    if (orgati) {
11291 /* Computing 2nd power */
11292 			d__1 = z__[*i__] / dtisq;
11293 			c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
11294 		    } else {
11295 /* Computing 2nd power */
11296 			d__1 = z__[ip1] / dtipsq;
11297 			c__ = w - dtisq * dw - delsq * (d__1 * d__1);
11298 		    }
11299 		} else {
11300 		    temp = z__[ii] / (work[ii] * delta[ii]);
11301 		    if (orgati) {
11302 			dpsi += temp * temp;
11303 		    } else {
11304 			dphi += temp * temp;
11305 		    }
11306 		    c__ = w - dtisq * dpsi - dtipsq * dphi;
11307 		}
11308 		a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
11309 		b = dtipsq * dtisq * w;
11310 		if (c__ == 0.) {
11311 		    if (a == 0.) {
11312 			if (! swtch) {
11313 			    if (orgati) {
11314 				a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
11315 					(dpsi + dphi);
11316 			    } else {
11317 				a = z__[ip1] * z__[ip1] + dtisq * dtisq * (
11318 					dpsi + dphi);
11319 			    }
11320 			} else {
11321 			    a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi;
11322 			}
11323 		    }
11324 		    eta = b / a;
11325 		} else if (a <= 0.) {
11326 		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
11327 			     / (c__ * 2.);
11328 		} else {
11329 		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
11330 			    abs(d__1))));
11331 		}
11332 	    } else {
11333 
11334 /*              Interpolation using THREE most relevant poles */
11335 
11336 		dtiim = work[iim1] * delta[iim1];
11337 		dtiip = work[iip1] * delta[iip1];
11338 		temp = rhoinv + psi + phi;
11339 		if (swtch) {
11340 		    c__ = temp - dtiim * dpsi - dtiip * dphi;
11341 		    zz[0] = dtiim * dtiim * dpsi;
11342 		    zz[2] = dtiip * dtiip * dphi;
11343 		} else {
11344 		    if (orgati) {
11345 			temp1 = z__[iim1] / dtiim;
11346 			temp1 *= temp1;
11347 			temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[
11348 				iip1]) * temp1;
11349 			c__ = temp - dtiip * (dpsi + dphi) - temp2;
11350 			zz[0] = z__[iim1] * z__[iim1];
11351 			if (dpsi < temp1) {
11352 			    zz[2] = dtiip * dtiip * dphi;
11353 			} else {
11354 			    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
11355 			}
11356 		    } else {
11357 			temp1 = z__[iip1] / dtiip;
11358 			temp1 *= temp1;
11359 			temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[
11360 				iip1]) * temp1;
11361 			c__ = temp - dtiim * (dpsi + dphi) - temp2;
11362 			if (dphi < temp1) {
11363 			    zz[0] = dtiim * dtiim * dpsi;
11364 			} else {
11365 			    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
11366 			}
11367 			zz[2] = z__[iip1] * z__[iip1];
11368 		    }
11369 		}
11370 		dd[0] = dtiim;
11371 		dd[1] = delta[ii] * work[ii];
11372 		dd[2] = dtiip;
11373 		dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
11374 		if (*info != 0) {
11375 		    goto L240;
11376 		}
11377 	    }
11378 
11379 /*           Note, eta should be positive if w is negative, and */
11380 /*           eta should be negative otherwise. However, */
11381 /*           if for some reason caused by roundoff, eta*w > 0, */
11382 /*           we simply use one Newton step instead. This way */
11383 /*           will guarantee eta*w < 0. */
11384 
11385 	    if (w * eta >= 0.) {
11386 		eta = -w / dw;
11387 	    }
11388 	    if (orgati) {
11389 		temp1 = work[*i__] * delta[*i__];
11390 		temp = eta - temp1;
11391 	    } else {
11392 		temp1 = work[ip1] * delta[ip1];
11393 		temp = eta - temp1;
11394 	    }
11395 	    if (temp > sg2ub || temp < sg2lb) {
11396 		if (w < 0.) {
11397 		    eta = (sg2ub - tau) / 2.;
11398 		} else {
11399 		    eta = (sg2lb - tau) / 2.;
11400 		}
11401 	    }
11402 
11403 	    tau += eta;
11404 	    eta /= *sigma + sqrt(*sigma * *sigma + eta);
11405 
11406 	    *sigma += eta;
11407 	    i__1 = *n;
11408 	    for (j = 1; j <= i__1; ++j) {
11409 		work[j] += eta;
11410 		delta[j] -= eta;
11411 /* L200: */
11412 	    }
11413 
11414 	    prew = w;
11415 
11416 /*           Evaluate PSI and the derivative DPSI */
11417 
11418 	    dpsi = 0.;
11419 	    psi = 0.;
11420 	    erretm = 0.;
11421 	    i__1 = iim1;
11422 	    for (j = 1; j <= i__1; ++j) {
11423 		temp = z__[j] / (work[j] * delta[j]);
11424 		psi += z__[j] * temp;
11425 		dpsi += temp * temp;
11426 		erretm += psi;
11427 /* L210: */
11428 	    }
11429 	    erretm = abs(erretm);
11430 
11431 /*           Evaluate PHI and the derivative DPHI */
11432 
11433 	    dphi = 0.;
11434 	    phi = 0.;
11435 	    i__1 = iip1;
11436 	    for (j = *n; j >= i__1; --j) {
11437 		temp = z__[j] / (work[j] * delta[j]);
11438 		phi += z__[j] * temp;
11439 		dphi += temp * temp;
11440 		erretm += phi;
11441 /* L220: */
11442 	    }
11443 
11444 	    temp = z__[ii] / (work[ii] * delta[ii]);
11445 	    dw = dpsi + dphi + temp * temp;
11446 	    temp = z__[ii] * temp;
11447 	    w = rhoinv + phi + psi + temp;
11448 	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
11449 		    + abs(tau) * dw;
11450 	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
11451 		swtch = ! swtch;
11452 	    }
11453 
11454 	    if (w <= 0.) {
11455 		sg2lb = std::max(sg2lb,tau);
11456 	    } else {
11457 		sg2ub = std::min(sg2ub,tau);
11458 	    }
11459 
11460 /* L230: */
11461 	}
11462 
11463 /*        Return with INFO = 1, NITER = MAXIT and not converged */
11464 
11465 	*info = 1;
11466 
11467     }
11468 
11469 L240:
11470     return 0;
11471 
11472 /*     End of DLASD4 */
11473 
11474 } /* dlasd4_ */
11475 
dlasd5_(integer * i__,double * d__,double * z__,double * delta,double * rho,double * dsigma,double * work)11476 /* Subroutine */ int dlasd5_(integer *i__, double *d__, double *z__,
11477 	double *delta, double *rho, double *dsigma, double *
11478 	work)
11479 {
11480     /* System generated locals */
11481     double d__1;
11482 
11483     /* Builtin functions
11484     double sqrt(double); */
11485 
11486     /* Local variables */
11487     double b, c__, w, del, tau, delsq;
11488 
11489 
11490 /*  -- LAPACK auxiliary routine (version 3.1) -- */
11491 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
11492 /*     November 2006 */
11493 
11494 /*     .. Scalar Arguments .. */
11495 /*     .. */
11496 /*     .. Array Arguments .. */
11497 /*     .. */
11498 
11499 /*  Purpose */
11500 /*  ======= */
11501 
11502 /*  This subroutine computes the square root of the I-th eigenvalue */
11503 /*  of a positive symmetric rank-one modification of a 2-by-2 diagonal */
11504 /*  matrix */
11505 
11506 /*             diag( D ) * diag( D ) +  RHO *  Z * transpose(Z) . */
11507 
11508 /*  The diagonal entries in the array D are assumed to satisfy */
11509 
11510 /*             0 <= D(i) < D(j)  for  i < j . */
11511 
11512 /*  We also assume RHO > 0 and that the Euclidean norm of the vector */
11513 /*  Z is one. */
11514 
11515 /*  Arguments */
11516 /*  ========= */
11517 
11518 /*  I      (input) INTEGER */
11519 /*         The index of the eigenvalue to be computed.  I = 1 or I = 2. */
11520 
11521 /*  D      (input) DOUBLE PRECISION array, dimension ( 2 ) */
11522 /*         The original eigenvalues.  We assume 0 <= D(1) < D(2). */
11523 
11524 /*  Z      (input) DOUBLE PRECISION array, dimension ( 2 ) */
11525 /*         The components of the updating vector. */
11526 
11527 /*  DELTA  (output) DOUBLE PRECISION array, dimension ( 2 ) */
11528 /*         Contains (D(j) - sigma_I) in its  j-th component. */
11529 /*         The vector DELTA contains the information necessary */
11530 /*         to construct the eigenvectors. */
11531 
11532 /*  RHO    (input) DOUBLE PRECISION */
11533 /*         The scalar in the symmetric updating formula. */
11534 
11535 /*  DSIGMA (output) DOUBLE PRECISION */
11536 /*         The computed sigma_I, the I-th updated eigenvalue. */
11537 
11538 /*  WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 ) */
11539 /*         WORK contains (D(j) + sigma_I) in its  j-th component. */
11540 
11541 /*  Further Details */
11542 /*  =============== */
11543 
11544 /*  Based on contributions by */
11545 /*     Ren-Cang Li, Computer Science Division, University of California */
11546 /*     at Berkeley, USA */
11547 
11548 /*  ===================================================================== */
11549 
11550 /*     .. Parameters .. */
11551 /*     .. */
11552 /*     .. Local Scalars .. */
11553 /*     .. */
11554 /*     .. Intrinsic Functions .. */
11555 /*     .. */
11556 /*     .. Executable Statements .. */
11557 
11558     /* Parameter adjustments */
11559     --work;
11560     --delta;
11561     --z__;
11562     --d__;
11563 
11564     /* Function Body */
11565     del = d__[2] - d__[1];
11566     delsq = del * (d__[2] + d__[1]);
11567     if (*i__ == 1) {
11568 	w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] *
11569 		z__[1] / (d__[1] * 3. + d__[2])) / del + 1.;
11570 	if (w > 0.) {
11571 	    b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
11572 	    c__ = *rho * z__[1] * z__[1] * delsq;
11573 
11574 /*           B > ZERO, always */
11575 
11576 /*           The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) */
11577 
11578 	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
11579 
11580 /*           The following TAU is DSIGMA - D( 1 ) */
11581 
11582 	    tau /= d__[1] + sqrt(d__[1] * d__[1] + tau);
11583 	    *dsigma = d__[1] + tau;
11584 	    delta[1] = -tau;
11585 	    delta[2] = del - tau;
11586 	    work[1] = d__[1] * 2. + tau;
11587 	    work[2] = d__[1] + tau + d__[2];
11588 /*           DELTA( 1 ) = -Z( 1 ) / TAU */
11589 /*           DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) */
11590 	} else {
11591 	    b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
11592 	    c__ = *rho * z__[2] * z__[2] * delsq;
11593 
11594 /*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
11595 
11596 	    if (b > 0.) {
11597 		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
11598 	    } else {
11599 		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
11600 	    }
11601 
11602 /*           The following TAU is DSIGMA - D( 2 ) */
11603 
11604 	    tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1)));
11605 	    *dsigma = d__[2] + tau;
11606 	    delta[1] = -(del + tau);
11607 	    delta[2] = -tau;
11608 	    work[1] = d__[1] + tau + d__[2];
11609 	    work[2] = d__[2] * 2. + tau;
11610 /*           DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */
11611 /*           DELTA( 2 ) = -Z( 2 ) / TAU */
11612 	}
11613 /*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */
11614 /*        DELTA( 1 ) = DELTA( 1 ) / TEMP */
11615 /*        DELTA( 2 ) = DELTA( 2 ) / TEMP */
11616     } else {
11617 
11618 /*        Now I=2 */
11619 
11620 	b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
11621 	c__ = *rho * z__[2] * z__[2] * delsq;
11622 
11623 /*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
11624 
11625 	if (b > 0.) {
11626 	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
11627 	} else {
11628 	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
11629 	}
11630 
11631 /*        The following TAU is DSIGMA - D( 2 ) */
11632 
11633 	tau /= d__[2] + sqrt(d__[2] * d__[2] + tau);
11634 	*dsigma = d__[2] + tau;
11635 	delta[1] = -(del + tau);
11636 	delta[2] = -tau;
11637 	work[1] = d__[1] + tau + d__[2];
11638 	work[2] = d__[2] * 2. + tau;
11639 /*        DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) */
11640 /*        DELTA( 2 ) = -Z( 2 ) / TAU */
11641 /*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) */
11642 /*        DELTA( 1 ) = DELTA( 1 ) / TEMP */
11643 /*        DELTA( 2 ) = DELTA( 2 ) / TEMP */
11644     }
11645     return 0;
11646 
11647 /*     End of DLASD5 */
11648 
11649 } /* dlasd5_ */
11650 
dlasd6_(integer * icompq,integer * nl,integer * nr,integer * sqre,double * d__,double * vf,double * vl,double * alpha,double * beta,integer * idxq,integer * perm,integer * givptr,integer * givcol,integer * ldgcol,double * givnum,integer * ldgnum,double * poles,double * difl,double * difr,double * z__,integer * k,double * c__,double * s,double * work,integer * iwork,integer * info)11651 /* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr,
11652 	integer *sqre, double *d__, double *vf, double *vl,
11653 	double *alpha, double *beta, integer *idxq, integer *perm,
11654 	integer *givptr, integer *givcol, integer *ldgcol, double *givnum,
11655 	integer *ldgnum, double *poles, double *difl, double *
11656 	difr, double *z__, integer *k, double *c__, double *s,
11657 	double *work, integer *iwork, integer *info)
11658 {
11659 	/* Table of constant values */
11660 	static integer c__0 = 0;
11661 	static double c_b7 = 1.;
11662 	static integer c__1 = 1;
11663 	static integer c_n1 = -1;
11664 
11665     /* System generated locals */
11666     integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset,
11667 	    poles_dim1, poles_offset, i__1;
11668     double d__1, d__2;
11669 
11670     /* Local variables */
11671     integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
11672     integer isigma;
11673     double orgnrm;
11674 
11675 
11676 /*  -- LAPACK auxiliary routine (version 3.1) -- */
11677 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
11678 /*     November 2006 */
11679 
11680 /*     .. Scalar Arguments .. */
11681 /*     .. */
11682 /*     .. Array Arguments .. */
11683 /*     .. */
11684 
11685 /*  Purpose */
11686 /*  ======= */
11687 
11688 /*  DLASD6 computes the SVD of an updated upper bidiagonal matrix B */
11689 /*  obtained by merging two smaller ones by appending a row. This */
11690 /*  routine is used only for the problem which requires all singular */
11691 /*  values and optionally singular vector matrices in factored form. */
11692 /*  B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. */
11693 /*  A related subroutine, DLASD1, handles the case in which all singular */
11694 /*  values and singular vectors of the bidiagonal matrix are desired. */
11695 
11696 /*  DLASD6 computes the SVD as follows: */
11697 
11698 /*                ( D1(in)  0    0     0 ) */
11699 /*    B = U(in) * (   Z1'   a   Z2'    b ) * VT(in) */
11700 /*                (   0     0   D2(in) 0 ) */
11701 
11702 /*      = U(out) * ( D(out) 0) * VT(out) */
11703 
11704 /*  where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M */
11705 /*  with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros */
11706 /*  elsewhere; and the entry b is empty if SQRE = 0. */
11707 
11708 /*  The singular values of B can be computed using D1, D2, the first */
11709 /*  components of all the right singular vectors of the lower block, and */
11710 /*  the last components of all the right singular vectors of the upper */
11711 /*  block. These components are stored and updated in VF and VL, */
11712 /*  respectively, in DLASD6. Hence U and VT are not explicitly */
11713 /*  referenced. */
11714 
11715 /*  The singular values are stored in D. The algorithm consists of two */
11716 /*  stages: */
11717 
11718 /*        The first stage consists of deflating the size of the problem */
11719 /*        when there are multiple singular values or if there is a zero */
11720 /*        in the Z vector. For each such occurrence the dimension of the */
11721 /*        secular equation problem is reduced by one. This stage is */
11722 /*        performed by the routine DLASD7. */
11723 
11724 /*        The second stage consists of calculating the updated */
11725 /*        singular values. This is done by finding the roots of the */
11726 /*        secular equation via the routine DLASD4 (as called by DLASD8). */
11727 /*        This routine also updates VF and VL and computes the distances */
11728 /*        between the updated singular values and the old singular */
11729 /*        values. */
11730 
11731 /*  DLASD6 is called from DLASDA. */
11732 
11733 /*  Arguments */
11734 /*  ========= */
11735 
11736 /*  ICOMPQ (input) INTEGER */
11737 /*         Specifies whether singular vectors are to be computed in */
11738 /*         factored form: */
11739 /*         = 0: Compute singular values only. */
11740 /*         = 1: Compute singular vectors in factored form as well. */
11741 
11742 /*  NL     (input) INTEGER */
11743 /*         The row dimension of the upper block.  NL >= 1. */
11744 
11745 /*  NR     (input) INTEGER */
11746 /*         The row dimension of the lower block.  NR >= 1. */
11747 
11748 /*  SQRE   (input) INTEGER */
11749 /*         = 0: the lower block is an NR-by-NR square matrix. */
11750 /*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */
11751 
11752 /*         The bidiagonal matrix has row dimension N = NL + NR + 1, */
11753 /*         and column dimension M = N + SQRE. */
11754 
11755 /*  D      (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ). */
11756 /*         On entry D(1:NL,1:NL) contains the singular values of the */
11757 /*         upper block, and D(NL+2:N) contains the singular values */
11758 /*         of the lower block. On exit D(1:N) contains the singular */
11759 /*         values of the modified matrix. */
11760 
11761 /*  VF     (input/output) DOUBLE PRECISION array, dimension ( M ) */
11762 /*         On entry, VF(1:NL+1) contains the first components of all */
11763 /*         right singular vectors of the upper block; and VF(NL+2:M) */
11764 /*         contains the first components of all right singular vectors */
11765 /*         of the lower block. On exit, VF contains the first components */
11766 /*         of all right singular vectors of the bidiagonal matrix. */
11767 
11768 /*  VL     (input/output) DOUBLE PRECISION array, dimension ( M ) */
11769 /*         On entry, VL(1:NL+1) contains the  last components of all */
11770 /*         right singular vectors of the upper block; and VL(NL+2:M) */
11771 /*         contains the last components of all right singular vectors of */
11772 /*         the lower block. On exit, VL contains the last components of */
11773 /*         all right singular vectors of the bidiagonal matrix. */
11774 
11775 /*  ALPHA  (input/output) DOUBLE PRECISION */
11776 /*         Contains the diagonal element associated with the added row. */
11777 
11778 /*  BETA   (input/output) DOUBLE PRECISION */
11779 /*         Contains the off-diagonal element associated with the added */
11780 /*         row. */
11781 
11782 /*  IDXQ   (output) INTEGER array, dimension ( N ) */
11783 /*         This contains the permutation which will reintegrate the */
11784 /*         subproblem just solved back into sorted order, i.e. */
11785 /*         D( IDXQ( I = 1, N ) ) will be in ascending order. */
11786 
11787 /*  PERM   (output) INTEGER array, dimension ( N ) */
11788 /*         The permutations (from deflation and sorting) to be applied */
11789 /*         to each block. Not referenced if ICOMPQ = 0. */
11790 
11791 /*  GIVPTR (output) INTEGER */
11792 /*         The number of Givens rotations which took place in this */
11793 /*         subproblem. Not referenced if ICOMPQ = 0. */
11794 
11795 /*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */
11796 /*         Each pair of numbers indicates a pair of columns to take place */
11797 /*         in a Givens rotation. Not referenced if ICOMPQ = 0. */
11798 
11799 /*  LDGCOL (input) INTEGER */
11800 /*         leading dimension of GIVCOL, must be at least N. */
11801 
11802 /*  GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */
11803 /*         Each number indicates the C or S value to be used in the */
11804 /*         corresponding Givens rotation. Not referenced if ICOMPQ = 0. */
11805 
11806 /*  LDGNUM (input) INTEGER */
11807 /*         The leading dimension of GIVNUM and POLES, must be at least N. */
11808 
11809 /*  POLES  (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */
11810 /*         On exit, POLES(1,*) is an array containing the new singular */
11811 /*         values obtained from solving the secular equation, and */
11812 /*         POLES(2,*) is an array containing the poles in the secular */
11813 /*         equation. Not referenced if ICOMPQ = 0. */
11814 
11815 /*  DIFL   (output) DOUBLE PRECISION array, dimension ( N ) */
11816 /*         On exit, DIFL(I) is the distance between I-th updated */
11817 /*         (undeflated) singular value and the I-th (undeflated) old */
11818 /*         singular value. */
11819 
11820 /*  DIFR   (output) DOUBLE PRECISION array, */
11821 /*                  dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and */
11822 /*                  dimension ( N ) if ICOMPQ = 0. */
11823 /*         On exit, DIFR(I, 1) is the distance between I-th updated */
11824 /*         (undeflated) singular value and the I+1-th (undeflated) old */
11825 /*         singular value. */
11826 
11827 /*         If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */
11828 /*         normalizing factors for the right singular vector matrix. */
11829 
11830 /*         See DLASD8 for details on DIFL and DIFR. */
11831 
11832 /*  Z      (output) DOUBLE PRECISION array, dimension ( M ) */
11833 /*         The first elements of this array contain the components */
11834 /*         of the deflation-adjusted updating row vector. */
11835 
11836 /*  K      (output) INTEGER */
11837 /*         Contains the dimension of the non-deflated matrix, */
11838 /*         This is the order of the related secular equation. 1 <= K <=N. */
11839 
11840 /*  C      (output) DOUBLE PRECISION */
11841 /*         C contains garbage if SQRE =0 and the C-value of a Givens */
11842 /*         rotation related to the right null space if SQRE = 1. */
11843 
11844 /*  S      (output) DOUBLE PRECISION */
11845 /*         S contains garbage if SQRE =0 and the S-value of a Givens */
11846 /*         rotation related to the right null space if SQRE = 1. */
11847 
11848 /*  WORK   (workspace) DOUBLE PRECISION array, dimension ( 4 * M ) */
11849 
11850 /*  IWORK  (workspace) INTEGER array, dimension ( 3 * N ) */
11851 
11852 /*  INFO   (output) INTEGER */
11853 /*          = 0:  successful exit. */
11854 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
11855 /*          > 0:  if INFO = 1, an singular value did not converge */
11856 
11857 /*  Further Details */
11858 /*  =============== */
11859 
11860 /*  Based on contributions by */
11861 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
11862 /*     California at Berkeley, USA */
11863 
11864 /*  ===================================================================== */
11865 
11866 /*     .. Parameters .. */
11867 /*     .. */
11868 /*     .. Local Scalars .. */
11869 /*     .. */
11870 /*     .. External Subroutines .. */
11871 /*     .. */
11872 /*     .. Intrinsic Functions .. */
11873 /*     .. */
11874 /*     .. Executable Statements .. */
11875 
11876 /*     Test the input parameters. */
11877 
11878     /* Parameter adjustments */
11879     --d__;
11880     --vf;
11881     --vl;
11882     --idxq;
11883     --perm;
11884     givcol_dim1 = *ldgcol;
11885     givcol_offset = 1 + givcol_dim1;
11886     givcol -= givcol_offset;
11887     poles_dim1 = *ldgnum;
11888     poles_offset = 1 + poles_dim1;
11889     poles -= poles_offset;
11890     givnum_dim1 = *ldgnum;
11891     givnum_offset = 1 + givnum_dim1;
11892     givnum -= givnum_offset;
11893     --difl;
11894     --difr;
11895     --z__;
11896     --work;
11897     --iwork;
11898 
11899     /* Function Body */
11900     *info = 0;
11901     n = *nl + *nr + 1;
11902     m = n + *sqre;
11903 
11904     if (*icompq < 0 || *icompq > 1) {
11905 	*info = -1;
11906     } else if (*nl < 1) {
11907 	*info = -2;
11908     } else if (*nr < 1) {
11909 	*info = -3;
11910     } else if (*sqre < 0 || *sqre > 1) {
11911 	*info = -4;
11912     } else if (*ldgcol < n) {
11913 	*info = -14;
11914     } else if (*ldgnum < n) {
11915 	*info = -16;
11916     }
11917     if (*info != 0) {
11918 	i__1 = -(*info);
11919 	xerbla_("DLASD6", &i__1);
11920 	return 0;
11921     }
11922 
11923 /*     The following values are for bookkeeping purposes only.  They are */
11924 /*     integer pointers which indicate the portion of the workspace */
11925 /*     used by a particular array in DLASD7 and DLASD8. */
11926 
11927     isigma = 1;
11928     iw = isigma + n;
11929     ivfw = iw + m;
11930     ivlw = ivfw + m;
11931 
11932     idx = 1;
11933     idxc = idx + n;
11934     idxp = idxc + n;
11935 
11936 /*     Scale. */
11937 
11938 /* Computing MAX */
11939     d__1 = abs(*alpha), d__2 = abs(*beta);
11940     orgnrm = std::max(d__1,d__2);
11941     d__[*nl + 1] = 0.;
11942     i__1 = n;
11943     for (i__ = 1; i__ <= i__1; ++i__) {
11944 	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
11945 	    orgnrm = (d__1 = d__[i__], abs(d__1));
11946 	}
11947 /* L10: */
11948     }
11949     dlascl_("G", &c__0, &c__0, &orgnrm, &c_b7, &n, &c__1, &d__[1], &n, info);
11950     *alpha /= orgnrm;
11951     *beta /= orgnrm;
11952 
11953 /*     Sort and Deflate singular values. */
11954 
11955     dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
11956 	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
11957 	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
11958 	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s,
11959 	    info);
11960 
11961 /*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */
11962 
11963     dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
11964 	    ldgnum, &work[isigma], &work[iw], info);
11965 
11966 /*     Save the poles if ICOMPQ = 1. */
11967 
11968     if (*icompq == 1) {
11969 	dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
11970 	dcopy_(k, &work[isigma], &c__1, &poles[(poles_dim1 << 1) + 1], &c__1);
11971     }
11972 
11973 /*     Unscale. */
11974 
11975     dlascl_("G", &c__0, &c__0, &c_b7, &orgnrm, &n, &c__1, &d__[1], &n, info);
11976 
11977 /*     Prepare the IDXQ sorting permutation. */
11978 
11979     n1 = *k;
11980     n2 = n - *k;
11981     dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
11982 
11983     return 0;
11984 
11985 /*     End of DLASD6 */
11986 
11987 } /* dlasd6_ */
11988 
dlasd7_(integer * icompq,integer * nl,integer * nr,integer * sqre,integer * k,double * d__,double * z__,double * zw,double * vf,double * vfw,double * vl,double * vlw,double * alpha,double * beta,double * dsigma,integer * idx,integer * idxp,integer * idxq,integer * perm,integer * givptr,integer * givcol,integer * ldgcol,double * givnum,integer * ldgnum,double * c__,double * s,integer * info)11989 /* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr,
11990 	integer *sqre, integer *k, double *d__, double *z__,
11991 	double *zw, double *vf, double *vfw, double *vl,
11992 	double *vlw, double *alpha, double *beta, double *
11993 	dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm,
11994 	integer *givptr, integer *givcol, integer *ldgcol, double *givnum,
11995 	integer *ldgnum, double *c__, double *s, integer *info)
11996 {
11997 	/* Table of constant values */
11998 	static integer c__1 = 1;
11999 
12000     /* System generated locals */
12001     integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
12002     double d__1, d__2;
12003 
12004     /* Local variables */
12005     integer i__, j, m, n, k2;
12006     double z1;
12007     integer jp;
12008     double eps, tau, tol;
12009     integer nlp1, nlp2, idxi, idxj;
12010     integer idxjp;
12011     integer jprev;
12012     double hlftol;
12013 
12014 
12015 /*  -- LAPACK auxiliary routine (version 3.1) -- */
12016 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
12017 /*     November 2006 */
12018 
12019 /*     .. Scalar Arguments .. */
12020 /*     .. */
12021 /*     .. Array Arguments .. */
12022 /*     .. */
12023 
12024 /*  Purpose */
12025 /*  ======= */
12026 
12027 /*  DLASD7 merges the two sets of singular values together into a single */
12028 /*  sorted set. Then it tries to deflate the size of the problem. There */
12029 /*  are two ways in which deflation can occur:  when two or more singular */
12030 /*  values are close together or if there is a tiny entry in the Z */
12031 /*  vector. For each such occurrence the order of the related */
12032 /*  secular equation problem is reduced by one. */
12033 
12034 /*  DLASD7 is called from DLASD6. */
12035 
12036 /*  Arguments */
12037 /*  ========= */
12038 
12039 /*  ICOMPQ  (input) INTEGER */
12040 /*          Specifies whether singular vectors are to be computed */
12041 /*          in compact form, as follows: */
12042 /*          = 0: Compute singular values only. */
12043 /*          = 1: Compute singular vectors of upper */
12044 /*               bidiagonal matrix in compact form. */
12045 
12046 /*  NL     (input) INTEGER */
12047 /*         The row dimension of the upper block. NL >= 1. */
12048 
12049 /*  NR     (input) INTEGER */
12050 /*         The row dimension of the lower block. NR >= 1. */
12051 
12052 /*  SQRE   (input) INTEGER */
12053 /*         = 0: the lower block is an NR-by-NR square matrix. */
12054 /*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */
12055 
12056 /*         The bidiagonal matrix has */
12057 /*         N = NL + NR + 1 rows and */
12058 /*         M = N + SQRE >= N columns. */
12059 
12060 /*  K      (output) INTEGER */
12061 /*         Contains the dimension of the non-deflated matrix, this is */
12062 /*         the order of the related secular equation. 1 <= K <=N. */
12063 
12064 /*  D      (input/output) DOUBLE PRECISION array, dimension ( N ) */
12065 /*         On entry D contains the singular values of the two submatrices */
12066 /*         to be combined. On exit D contains the trailing (N-K) updated */
12067 /*         singular values (those which were deflated) sorted into */
12068 /*         increasing order. */
12069 
12070 /*  Z      (output) DOUBLE PRECISION array, dimension ( M ) */
12071 /*         On exit Z contains the updating row vector in the secular */
12072 /*         equation. */
12073 
12074 /*  ZW     (workspace) DOUBLE PRECISION array, dimension ( M ) */
12075 /*         Workspace for Z. */
12076 
12077 /*  VF     (input/output) DOUBLE PRECISION array, dimension ( M ) */
12078 /*         On entry, VF(1:NL+1) contains the first components of all */
12079 /*         right singular vectors of the upper block; and VF(NL+2:M) */
12080 /*         contains the first components of all right singular vectors */
12081 /*         of the lower block. On exit, VF contains the first components */
12082 /*         of all right singular vectors of the bidiagonal matrix. */
12083 
12084 /*  VFW    (workspace) DOUBLE PRECISION array, dimension ( M ) */
12085 /*         Workspace for VF. */
12086 
12087 /*  VL     (input/output) DOUBLE PRECISION array, dimension ( M ) */
12088 /*         On entry, VL(1:NL+1) contains the  last components of all */
12089 /*         right singular vectors of the upper block; and VL(NL+2:M) */
12090 /*         contains the last components of all right singular vectors */
12091 /*         of the lower block. On exit, VL contains the last components */
12092 /*         of all right singular vectors of the bidiagonal matrix. */
12093 
12094 /*  VLW    (workspace) DOUBLE PRECISION array, dimension ( M ) */
12095 /*         Workspace for VL. */
12096 
12097 /*  ALPHA  (input) DOUBLE PRECISION */
12098 /*         Contains the diagonal element associated with the added row. */
12099 
12100 /*  BETA   (input) DOUBLE PRECISION */
12101 /*         Contains the off-diagonal element associated with the added */
12102 /*         row. */
12103 
12104 /*  DSIGMA (output) DOUBLE PRECISION array, dimension ( N ) */
12105 /*         Contains a copy of the diagonal elements (K-1 singular values */
12106 /*         and one zero) in the secular equation. */
12107 
12108 /*  IDX    (workspace) INTEGER array, dimension ( N ) */
12109 /*         This will contain the permutation used to sort the contents of */
12110 /*         D into ascending order. */
12111 
12112 /*  IDXP   (workspace) INTEGER array, dimension ( N ) */
12113 /*         This will contain the permutation used to place deflated */
12114 /*         values of D at the end of the array. On output IDXP(2:K) */
12115 /*         points to the nondeflated D-values and IDXP(K+1:N) */
12116 /*         points to the deflated singular values. */
12117 
12118 /*  IDXQ   (input) INTEGER array, dimension ( N ) */
12119 /*         This contains the permutation which separately sorts the two */
12120 /*         sub-problems in D into ascending order.  Note that entries in */
12121 /*         the first half of this permutation must first be moved one */
12122 /*         position backward; and entries in the second half */
12123 /*         must first have NL+1 added to their values. */
12124 
12125 /*  PERM   (output) INTEGER array, dimension ( N ) */
12126 /*         The permutations (from deflation and sorting) to be applied */
12127 /*         to each singular block. Not referenced if ICOMPQ = 0. */
12128 
12129 /*  GIVPTR (output) INTEGER */
12130 /*         The number of Givens rotations which took place in this */
12131 /*         subproblem. Not referenced if ICOMPQ = 0. */
12132 
12133 /*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 ) */
12134 /*         Each pair of numbers indicates a pair of columns to take place */
12135 /*         in a Givens rotation. Not referenced if ICOMPQ = 0. */
12136 
12137 /*  LDGCOL (input) INTEGER */
12138 /*         The leading dimension of GIVCOL, must be at least N. */
12139 
12140 /*  GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) */
12141 /*         Each number indicates the C or S value to be used in the */
12142 /*         corresponding Givens rotation. Not referenced if ICOMPQ = 0. */
12143 
12144 /*  LDGNUM (input) INTEGER */
12145 /*         The leading dimension of GIVNUM, must be at least N. */
12146 
12147 /*  C      (output) DOUBLE PRECISION */
12148 /*         C contains garbage if SQRE =0 and the C-value of a Givens */
12149 /*         rotation related to the right null space if SQRE = 1. */
12150 
12151 /*  S      (output) DOUBLE PRECISION */
12152 /*         S contains garbage if SQRE =0 and the S-value of a Givens */
12153 /*         rotation related to the right null space if SQRE = 1. */
12154 
12155 /*  INFO   (output) INTEGER */
12156 /*         = 0:  successful exit. */
12157 /*         < 0:  if INFO = -i, the i-th argument had an illegal value. */
12158 
12159 /*  Further Details */
12160 /*  =============== */
12161 
12162 /*  Based on contributions by */
12163 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
12164 /*     California at Berkeley, USA */
12165 
12166 /*  ===================================================================== */
12167 
12168 /*     .. Parameters .. */
12169 /*     .. */
12170 /*     .. Local Scalars .. */
12171 
12172 /*     .. */
12173 /*     .. External Subroutines .. */
12174 /*     .. */
12175 /*     .. External Functions .. */
12176 /*     .. */
12177 /*     .. Intrinsic Functions .. */
12178 /*     .. */
12179 /*     .. Executable Statements .. */
12180 
12181 /*     Test the input parameters. */
12182 
12183     /* Parameter adjustments */
12184     --d__;
12185     --z__;
12186     --zw;
12187     --vf;
12188     --vfw;
12189     --vl;
12190     --vlw;
12191     --dsigma;
12192     --idx;
12193     --idxp;
12194     --idxq;
12195     --perm;
12196     givcol_dim1 = *ldgcol;
12197     givcol_offset = 1 + givcol_dim1;
12198     givcol -= givcol_offset;
12199     givnum_dim1 = *ldgnum;
12200     givnum_offset = 1 + givnum_dim1;
12201     givnum -= givnum_offset;
12202 
12203     /* Function Body */
12204     *info = 0;
12205     n = *nl + *nr + 1;
12206     m = n + *sqre;
12207 
12208     if (*icompq < 0 || *icompq > 1) {
12209 	*info = -1;
12210     } else if (*nl < 1) {
12211 	*info = -2;
12212     } else if (*nr < 1) {
12213 	*info = -3;
12214     } else if (*sqre < 0 || *sqre > 1) {
12215 	*info = -4;
12216     } else if (*ldgcol < n) {
12217 	*info = -22;
12218     } else if (*ldgnum < n) {
12219 	*info = -24;
12220     }
12221     if (*info != 0) {
12222 	i__1 = -(*info);
12223 	xerbla_("DLASD7", &i__1);
12224 	return 0;
12225     }
12226 
12227     nlp1 = *nl + 1;
12228     nlp2 = *nl + 2;
12229     if (*icompq == 1) {
12230 	*givptr = 0;
12231     }
12232 
12233 /*     Generate the first part of the vector Z and move the singular */
12234 /*     values in the first part of D one position backward. */
12235 
12236     z1 = *alpha * vl[nlp1];
12237     vl[nlp1] = 0.;
12238     tau = vf[nlp1];
12239     for (i__ = *nl; i__ >= 1; --i__) {
12240 	z__[i__ + 1] = *alpha * vl[i__];
12241 	vl[i__] = 0.;
12242 	vf[i__ + 1] = vf[i__];
12243 	d__[i__ + 1] = d__[i__];
12244 	idxq[i__ + 1] = idxq[i__] + 1;
12245 /* L10: */
12246     }
12247     vf[1] = tau;
12248 
12249 /*     Generate the second part of the vector Z. */
12250 
12251     i__1 = m;
12252     for (i__ = nlp2; i__ <= i__1; ++i__) {
12253 	z__[i__] = *beta * vf[i__];
12254 	vf[i__] = 0.;
12255 /* L20: */
12256     }
12257 
12258 /*     Sort the singular values into increasing order */
12259 
12260     i__1 = n;
12261     for (i__ = nlp2; i__ <= i__1; ++i__) {
12262 	idxq[i__] += nlp1;
12263 /* L30: */
12264     }
12265 
12266 /*     DSIGMA, IDXC, IDXC, and ZW are used as storage space. */
12267 
12268     i__1 = n;
12269     for (i__ = 2; i__ <= i__1; ++i__) {
12270 	dsigma[i__] = d__[idxq[i__]];
12271 	zw[i__] = z__[idxq[i__]];
12272 	vfw[i__] = vf[idxq[i__]];
12273 	vlw[i__] = vl[idxq[i__]];
12274 /* L40: */
12275     }
12276 
12277     dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
12278 
12279     i__1 = n;
12280     for (i__ = 2; i__ <= i__1; ++i__) {
12281 	idxi = idx[i__] + 1;
12282 	d__[i__] = dsigma[idxi];
12283 	z__[i__] = zw[idxi];
12284 	vf[i__] = vfw[idxi];
12285 	vl[i__] = vlw[idxi];
12286 /* L50: */
12287     }
12288 
12289 /*     Calculate the allowable deflation tolerence */
12290 
12291     eps = dlamch_("Epsilon");
12292 /* Computing MAX */
12293     d__1 = abs(*alpha), d__2 = abs(*beta);
12294     tol = std::max(d__1,d__2);
12295 /* Computing MAX */
12296     d__2 = (d__1 = d__[n], abs(d__1));
12297     tol = eps * 64. * std::max(d__2,tol);
12298 
12299 /*     There are 2 kinds of deflation -- first a value in the z-vector */
12300 /*     is small, second two (or more) singular values are very close */
12301 /*     together (their difference is small). */
12302 
12303 /*     If the value in the z-vector is small, we simply permute the */
12304 /*     array so that the corresponding singular value is moved to the */
12305 /*     end. */
12306 
12307 /*     If two values in the D-vector are close, we perform a two-sided */
12308 /*     rotation designed to make one of the corresponding z-vector */
12309 /*     entries zero, and then permute the array so that the deflated */
12310 /*     singular value is moved to the end. */
12311 
12312 /*     If there are multiple singular values then the problem deflates. */
12313 /*     Here the number of equal singular values are found.  As each equal */
12314 /*     singular value is found, an elementary reflector is computed to */
12315 /*     rotate the corresponding singular subspace so that the */
12316 /*     corresponding components of Z are zero in this new basis. */
12317 
12318     *k = 1;
12319     k2 = n + 1;
12320     i__1 = n;
12321     for (j = 2; j <= i__1; ++j) {
12322 	if ((d__1 = z__[j], abs(d__1)) <= tol) {
12323 
12324 /*           Deflate due to small z component. */
12325 
12326 	    --k2;
12327 	    idxp[k2] = j;
12328 	    if (j == n) {
12329 		goto L100;
12330 	    }
12331 	} else {
12332 	    jprev = j;
12333 	    goto L70;
12334 	}
12335 /* L60: */
12336     }
12337 L70:
12338     j = jprev;
12339 L80:
12340     ++j;
12341     if (j > n) {
12342 	goto L90;
12343     }
12344     if ((d__1 = z__[j], abs(d__1)) <= tol) {
12345 
12346 /*        Deflate due to small z component. */
12347 
12348 	--k2;
12349 	idxp[k2] = j;
12350     } else {
12351 
12352 /*        Check if singular values are close enough to allow deflation. */
12353 
12354 	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
12355 
12356 /*           Deflation is possible. */
12357 
12358 	    *s = z__[jprev];
12359 	    *c__ = z__[j];
12360 
12361 /*           Find sqrt(a**2+b**2) without overflow or */
12362 /*           destructive underflow. */
12363 
12364 	    tau = dlapy2_(c__, s);
12365 	    z__[j] = tau;
12366 	    z__[jprev] = 0.;
12367 	    *c__ /= tau;
12368 	    *s = -(*s) / tau;
12369 
12370 /*           Record the appropriate Givens rotation */
12371 
12372 	    if (*icompq == 1) {
12373 		++(*givptr);
12374 		idxjp = idxq[idx[jprev] + 1];
12375 		idxj = idxq[idx[j] + 1];
12376 		if (idxjp <= nlp1) {
12377 		    --idxjp;
12378 		}
12379 		if (idxj <= nlp1) {
12380 		    --idxj;
12381 		}
12382 		givcol[*givptr + (givcol_dim1 << 1)] = idxjp;
12383 		givcol[*givptr + givcol_dim1] = idxj;
12384 		givnum[*givptr + (givnum_dim1 << 1)] = *c__;
12385 		givnum[*givptr + givnum_dim1] = *s;
12386 	    }
12387 	    drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
12388 	    drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
12389 	    --k2;
12390 	    idxp[k2] = jprev;
12391 	    jprev = j;
12392 	} else {
12393 	    ++(*k);
12394 	    zw[*k] = z__[jprev];
12395 	    dsigma[*k] = d__[jprev];
12396 	    idxp[*k] = jprev;
12397 	    jprev = j;
12398 	}
12399     }
12400     goto L80;
12401 L90:
12402 
12403 /*     Record the last singular value. */
12404 
12405     ++(*k);
12406     zw[*k] = z__[jprev];
12407     dsigma[*k] = d__[jprev];
12408     idxp[*k] = jprev;
12409 
12410 L100:
12411 
12412 /*     Sort the singular values into DSIGMA. The singular values which */
12413 /*     were not deflated go into the first K slots of DSIGMA, except */
12414 /*     that DSIGMA(1) is treated separately. */
12415 
12416     i__1 = n;
12417     for (j = 2; j <= i__1; ++j) {
12418 	jp = idxp[j];
12419 	dsigma[j] = d__[jp];
12420 	vfw[j] = vf[jp];
12421 	vlw[j] = vl[jp];
12422 /* L110: */
12423     }
12424     if (*icompq == 1) {
12425 	i__1 = n;
12426 	for (j = 2; j <= i__1; ++j) {
12427 	    jp = idxp[j];
12428 	    perm[j] = idxq[idx[jp] + 1];
12429 	    if (perm[j] <= nlp1) {
12430 		--perm[j];
12431 	    }
12432 /* L120: */
12433 	}
12434     }
12435 
12436 /*     The deflated singular values go back into the last N - K slots of */
12437 /*     D. */
12438 
12439     i__1 = n - *k;
12440     dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
12441 
12442 /*     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and */
12443 /*     VL(M). */
12444 
12445     dsigma[1] = 0.;
12446     hlftol = tol / 2.;
12447     if (abs(dsigma[2]) <= hlftol) {
12448 	dsigma[2] = hlftol;
12449     }
12450     if (m > n) {
12451 	z__[1] = dlapy2_(&z1, &z__[m]);
12452 	if (z__[1] <= tol) {
12453 	    *c__ = 1.;
12454 	    *s = 0.;
12455 	    z__[1] = tol;
12456 	} else {
12457 	    *c__ = z1 / z__[1];
12458 	    *s = -z__[m] / z__[1];
12459 	}
12460 	drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
12461 	drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
12462     } else {
12463 	if (abs(z1) <= tol) {
12464 	    z__[1] = tol;
12465 	} else {
12466 	    z__[1] = z1;
12467 	}
12468     }
12469 
12470 /*     Restore Z, VF, and VL. */
12471 
12472     i__1 = *k - 1;
12473     dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
12474     i__1 = n - 1;
12475     dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
12476     i__1 = n - 1;
12477     dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
12478 
12479     return 0;
12480 
12481 /*     End of DLASD7 */
12482 
12483 } /* dlasd7_ */
12484 
dlasd8_(integer * icompq,integer * k,double * d__,double * z__,double * vf,double * vl,double * difl,double * difr,integer * lddifr,double * dsigma,double * work,integer * info)12485 /* Subroutine */ int dlasd8_(integer *icompq, integer *k, double *d__,
12486 	double *z__, double *vf, double *vl, double *difl,
12487 	double *difr, integer *lddifr, double *dsigma, double *
12488 	work, integer *info)
12489 {
12490 	/* Table of constant values */
12491 	static integer c__1 = 1;
12492 	static integer c__0 = 0;
12493 	static double c_b8 = 1.;
12494 
12495     /* System generated locals */
12496     integer difr_dim1, difr_offset, i__1, i__2;
12497     double d__1, d__2;
12498 
12499     /* Local variables */
12500     integer i__, j;
12501     double dj, rho;
12502     integer iwk1, iwk2, iwk3;
12503 	double temp;
12504     integer iwk2i, iwk3i;
12505     double diflj, difrj, dsigj;
12506     double dsigjp;
12507 
12508 
12509 /*  -- LAPACK auxiliary routine (version 3.1) -- */
12510 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
12511 /*     November 2006 */
12512 
12513 /*     .. Scalar Arguments .. */
12514 /*     .. */
12515 /*     .. Array Arguments .. */
12516 /*     .. */
12517 
12518 /*  Purpose */
12519 /*  ======= */
12520 
12521 /*  DLASD8 finds the square roots of the roots of the secular equation, */
12522 /*  as defined by the values in DSIGMA and Z. It makes the appropriate */
12523 /*  calls to DLASD4, and stores, for each  element in D, the distance */
12524 /*  to its two nearest poles (elements in DSIGMA). It also updates */
12525 /*  the arrays VF and VL, the first and last components of all the */
12526 /*  right singular vectors of the original bidiagonal matrix. */
12527 
12528 /*  DLASD8 is called from DLASD6. */
12529 
12530 /*  Arguments */
12531 /*  ========= */
12532 
12533 /*  ICOMPQ  (input) INTEGER */
12534 /*          Specifies whether singular vectors are to be computed in */
12535 /*          factored form in the calling routine: */
12536 /*          = 0: Compute singular values only. */
12537 /*          = 1: Compute singular vectors in factored form as well. */
12538 
12539 /*  K       (input) INTEGER */
12540 /*          The number of terms in the rational function to be solved */
12541 /*          by DLASD4.  K >= 1. */
12542 
12543 /*  D       (output) DOUBLE PRECISION array, dimension ( K ) */
12544 /*          On output, D contains the updated singular values. */
12545 
12546 /*  Z       (input) DOUBLE PRECISION array, dimension ( K ) */
12547 /*          The first K elements of this array contain the components */
12548 /*          of the deflation-adjusted updating row vector. */
12549 
12550 /*  VF      (input/output) DOUBLE PRECISION array, dimension ( K ) */
12551 /*          On entry, VF contains  information passed through DBEDE8. */
12552 /*          On exit, VF contains the first K components of the first */
12553 /*          components of all right singular vectors of the bidiagonal */
12554 /*          matrix. */
12555 
12556 /*  VL      (input/output) DOUBLE PRECISION array, dimension ( K ) */
12557 /*          On entry, VL contains  information passed through DBEDE8. */
12558 /*          On exit, VL contains the first K components of the last */
12559 /*          components of all right singular vectors of the bidiagonal */
12560 /*          matrix. */
12561 
12562 /*  DIFL    (output) DOUBLE PRECISION array, dimension ( K ) */
12563 /*          On exit, DIFL(I) = D(I) - DSIGMA(I). */
12564 
12565 /*  DIFR    (output) DOUBLE PRECISION array, */
12566 /*                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */
12567 /*                   dimension ( K ) if ICOMPQ = 0. */
12568 /*          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */
12569 /*          defined and will not be referenced. */
12570 
12571 /*          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */
12572 /*          normalizing factors for the right singular vector matrix. */
12573 
12574 /*  LDDIFR  (input) INTEGER */
12575 /*          The leading dimension of DIFR, must be at least K. */
12576 
12577 /*  DSIGMA  (input) DOUBLE PRECISION array, dimension ( K ) */
12578 /*          The first K elements of this array contain the old roots */
12579 /*          of the deflated updating problem.  These are the poles */
12580 /*          of the secular equation. */
12581 
12582 /*  WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K */
12583 
12584 /*  INFO    (output) INTEGER */
12585 /*          = 0:  successful exit. */
12586 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
12587 /*          > 0:  if INFO = 1, an singular value did not converge */
12588 
12589 /*  Further Details */
12590 /*  =============== */
12591 
12592 /*  Based on contributions by */
12593 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
12594 /*     California at Berkeley, USA */
12595 
12596 /*  ===================================================================== */
12597 
12598 /*     .. Parameters .. */
12599 /*     .. */
12600 /*     .. Local Scalars .. */
12601 /*     .. */
12602 /*     .. External Subroutines .. */
12603 /*     .. */
12604 /*     .. External Functions .. */
12605 /*     .. */
12606 /*     .. Intrinsic Functions .. */
12607 /*     .. */
12608 /*     .. Executable Statements .. */
12609 
12610 /*     Test the input parameters. */
12611 
12612     /* Parameter adjustments */
12613     --d__;
12614     --z__;
12615     --vf;
12616     --vl;
12617     --difl;
12618     difr_dim1 = *lddifr;
12619     difr_offset = 1 + difr_dim1;
12620     difr -= difr_offset;
12621     --dsigma;
12622     --work;
12623 
12624     /* Function Body */
12625     *info = 0;
12626 
12627     if (*icompq < 0 || *icompq > 1) {
12628 	*info = -1;
12629     } else if (*k < 1) {
12630 	*info = -2;
12631     } else if (*lddifr < *k) {
12632 	*info = -9;
12633     }
12634     if (*info != 0) {
12635 	i__1 = -(*info);
12636 	xerbla_("DLASD8", &i__1);
12637 	return 0;
12638     }
12639 
12640 /*     Quick return if possible */
12641 
12642     if (*k == 1) {
12643 	d__[1] = abs(z__[1]);
12644 	difl[1] = d__[1];
12645 	if (*icompq == 1) {
12646 	    difl[2] = 1.;
12647 	    difr[(difr_dim1 << 1) + 1] = 1.;
12648 	}
12649 	return 0;
12650     }
12651 
12652 /*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */
12653 /*     be computed with high relative accuracy (barring over/underflow). */
12654 /*     This is a problem on machines without a guard digit in */
12655 /*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
12656 /*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */
12657 /*     which on any of these machines zeros out the bottommost */
12658 /*     bit of DSIGMA(I) if it is 1; this makes the subsequent */
12659 /*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */
12660 /*     occurs. On binary machines with a guard digit (almost all */
12661 /*     machines) it does not change DSIGMA(I) at all. On hexadecimal */
12662 /*     and decimal machines with a guard digit, it slightly */
12663 /*     changes the bottommost bits of DSIGMA(I). It does not account */
12664 /*     for hexadecimal or decimal machines without guard digits */
12665 /*     (we know of none). We use a subroutine call to compute */
12666 /*     2*DSIGMA(I) to prevent optimizing compilers from eliminating */
12667 /*     this code. */
12668 
12669     i__1 = *k;
12670     for (i__ = 1; i__ <= i__1; ++i__) {
12671 	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
12672 /* L10: */
12673     }
12674 
12675 /*     Book keeping. */
12676 
12677     iwk1 = 1;
12678     iwk2 = iwk1 + *k;
12679     iwk3 = iwk2 + *k;
12680     iwk2i = iwk2 - 1;
12681     iwk3i = iwk3 - 1;
12682 
12683 /*     Normalize Z. */
12684 
12685     rho = dnrm2_(k, &z__[1], &c__1);
12686     dlascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info);
12687     rho *= rho;
12688 
12689 /*     Initialize WORK(IWK3). */
12690 
12691     dlaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k);
12692 
12693 /*     Compute the updated singular values, the arrays DIFL, DIFR, */
12694 /*     and the updated Z. */
12695 
12696     i__1 = *k;
12697     for (j = 1; j <= i__1; ++j) {
12698 	dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
12699 		iwk2], info);
12700 
12701 /*        If the root finder fails, the computation is terminated. */
12702 
12703 	if (*info != 0) {
12704 	    return 0;
12705 	}
12706 	work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j];
12707 	difl[j] = -work[j];
12708 	difr[j + difr_dim1] = -work[j + 1];
12709 	i__2 = j - 1;
12710 	for (i__ = 1; i__ <= i__2; ++i__) {
12711 	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
12712 		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
12713 		    j]);
12714 /* L20: */
12715 	}
12716 	i__2 = *k;
12717 	for (i__ = j + 1; i__ <= i__2; ++i__) {
12718 	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
12719 		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
12720 		    j]);
12721 /* L30: */
12722 	}
12723 /* L40: */
12724     }
12725 
12726 /*     Compute updated Z. */
12727 
12728     i__1 = *k;
12729     for (i__ = 1; i__ <= i__1; ++i__) {
12730 	d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1)));
12731 	z__[i__] = d_sign(&d__2, &z__[i__]);
12732 /* L50: */
12733     }
12734 
12735 /*     Update VF and VL. */
12736 
12737     i__1 = *k;
12738     for (j = 1; j <= i__1; ++j) {
12739 	diflj = difl[j];
12740 	dj = d__[j];
12741 	dsigj = -dsigma[j];
12742 	if (j < *k) {
12743 	    difrj = -difr[j + difr_dim1];
12744 	    dsigjp = -dsigma[j + 1];
12745 	}
12746 	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
12747 	i__2 = j - 1;
12748 	for (i__ = 1; i__ <= i__2; ++i__) {
12749 	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / (
12750 		    dsigma[i__] + dj);
12751 /* L60: */
12752 	}
12753 	i__2 = *k;
12754 	for (i__ = j + 1; i__ <= i__2; ++i__) {
12755 	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) /
12756 		    (dsigma[i__] + dj);
12757 /* L70: */
12758 	}
12759 	temp = dnrm2_(k, &work[1], &c__1);
12760 	work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
12761 	work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
12762 	if (*icompq == 1) {
12763 	    difr[j + (difr_dim1 << 1)] = temp;
12764 	}
12765 /* L80: */
12766     }
12767 
12768     dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
12769     dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
12770 
12771     return 0;
12772 
12773 /*     End of DLASD8 */
12774 
12775 } /* dlasd8_ */
12776 
dlasda_(integer * icompq,integer * smlsiz,integer * n,integer * sqre,double * d__,double * e,double * u,integer * ldu,double * vt,integer * k,double * difl,double * difr,double * z__,double * poles,integer * givptr,integer * givcol,integer * ldgcol,integer * perm,double * givnum,double * c__,double * s,double * work,integer * iwork,integer * info)12777 /* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n,
12778 	integer *sqre, double *d__, double *e, double *u, integer
12779 	*ldu, double *vt, integer *k, double *difl, double *difr,
12780 	double *z__, double *poles, integer *givptr, integer *givcol,
12781 	integer *ldgcol, integer *perm, double *givnum, double *c__,
12782 	double *s, double *work, integer *iwork, integer *info)
12783 {
12784 	/* Table of constant values */
12785 	static integer c__0 = 0;
12786 	static double c_b11 = 0.;
12787 	static double c_b12 = 1.;
12788 	static integer c__1 = 1;
12789 	static integer c__2 = 2;
12790 
12791     /* System generated locals */
12792     integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
12793 	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
12794 	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
12795 	    z_dim1, z_offset, i__1, i__2;
12796 
12797     /* Local variables */
12798     integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc, nlf, nrf,
12799 	     vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1;
12800     double beta;
12801     integer idxq, nlvl;
12802     double alpha;
12803     integer inode, ndiml, ndimr, idxqi, itemp;
12804     integer sqrei;
12805     integer nwork1, nwork2;
12806 	integer smlszp;
12807 
12808 
12809 /*  -- LAPACK auxiliary routine (version 3.1) -- */
12810 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
12811 /*     November 2006 */
12812 
12813 /*     .. Scalar Arguments .. */
12814 /*     .. */
12815 /*     .. Array Arguments .. */
12816 /*     .. */
12817 
12818 /*  Purpose */
12819 /*  ======= */
12820 
12821 /*  Using a divide and conquer approach, DLASDA computes the singular */
12822 /*  value decomposition (SVD) of a real upper bidiagonal N-by-M matrix */
12823 /*  B with diagonal D and offdiagonal E, where M = N + SQRE. The */
12824 /*  algorithm computes the singular values in the SVD B = U * S * VT. */
12825 /*  The orthogonal matrices U and VT are optionally computed in */
12826 /*  compact form. */
12827 
12828 /*  A related subroutine, DLASD0, computes the singular values and */
12829 /*  the singular vectors in explicit form. */
12830 
12831 /*  Arguments */
12832 /*  ========= */
12833 
12834 /*  ICOMPQ (input) INTEGER */
12835 /*         Specifies whether singular vectors are to be computed */
12836 /*         in compact form, as follows */
12837 /*         = 0: Compute singular values only. */
12838 /*         = 1: Compute singular vectors of upper bidiagonal */
12839 /*              matrix in compact form. */
12840 
12841 /*  SMLSIZ (input) INTEGER */
12842 /*         The maximum size of the subproblems at the bottom of the */
12843 /*         computation tree. */
12844 
12845 /*  N      (input) INTEGER */
12846 /*         The row dimension of the upper bidiagonal matrix. This is */
12847 /*         also the dimension of the main diagonal array D. */
12848 
12849 /*  SQRE   (input) INTEGER */
12850 /*         Specifies the column dimension of the bidiagonal matrix. */
12851 /*         = 0: The bidiagonal matrix has column dimension M = N; */
12852 /*         = 1: The bidiagonal matrix has column dimension M = N + 1. */
12853 
12854 /*  D      (input/output) DOUBLE PRECISION array, dimension ( N ) */
12855 /*         On entry D contains the main diagonal of the bidiagonal */
12856 /*         matrix. On exit D, if INFO = 0, contains its singular values. */
12857 
12858 /*  E      (input) DOUBLE PRECISION array, dimension ( M-1 ) */
12859 /*         Contains the subdiagonal entries of the bidiagonal matrix. */
12860 /*         On exit, E has been destroyed. */
12861 
12862 /*  U      (output) DOUBLE PRECISION array, */
12863 /*         dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced */
12864 /*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left */
12865 /*         singular vector matrices of all subproblems at the bottom */
12866 /*         level. */
12867 
12868 /*  LDU    (input) INTEGER, LDU = > N. */
12869 /*         The leading dimension of arrays U, VT, DIFL, DIFR, POLES, */
12870 /*         GIVNUM, and Z. */
12871 
12872 /*  VT     (output) DOUBLE PRECISION array, */
12873 /*         dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced */
12874 /*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right */
12875 /*         singular vector matrices of all subproblems at the bottom */
12876 /*         level. */
12877 
12878 /*  K      (output) INTEGER array, */
12879 /*         dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. */
12880 /*         If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th */
12881 /*         secular equation on the computation tree. */
12882 
12883 /*  DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ), */
12884 /*         where NLVL = floor(log_2 (N/SMLSIZ))). */
12885 
12886 /*  DIFR   (output) DOUBLE PRECISION array, */
12887 /*                  dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and */
12888 /*                  dimension ( N ) if ICOMPQ = 0. */
12889 /*         If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) */
12890 /*         record distances between singular values on the I-th */
12891 /*         level and singular values on the (I -1)-th level, and */
12892 /*         DIFR(1:N, 2 * I ) contains the normalizing factors for */
12893 /*         the right singular vector matrix. See DLASD8 for details. */
12894 
12895 /*  Z      (output) DOUBLE PRECISION array, */
12896 /*                  dimension ( LDU, NLVL ) if ICOMPQ = 1 and */
12897 /*                  dimension ( N ) if ICOMPQ = 0. */
12898 /*         The first K elements of Z(1, I) contain the components of */
12899 /*         the deflation-adjusted updating row vector for subproblems */
12900 /*         on the I-th level. */
12901 
12902 /*  POLES  (output) DOUBLE PRECISION array, */
12903 /*         dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced */
12904 /*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and */
12905 /*         POLES(1, 2*I) contain  the new and old singular values */
12906 /*         involved in the secular equations on the I-th level. */
12907 
12908 /*  GIVPTR (output) INTEGER array, */
12909 /*         dimension ( N ) if ICOMPQ = 1, and not referenced if */
12910 /*         ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records */
12911 /*         the number of Givens rotations performed on the I-th */
12912 /*         problem on the computation tree. */
12913 
12914 /*  GIVCOL (output) INTEGER array, */
12915 /*         dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not */
12916 /*         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */
12917 /*         GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations */
12918 /*         of Givens rotations performed on the I-th level on the */
12919 /*         computation tree. */
12920 
12921 /*  LDGCOL (input) INTEGER, LDGCOL = > N. */
12922 /*         The leading dimension of arrays GIVCOL and PERM. */
12923 
12924 /*  PERM   (output) INTEGER array, */
12925 /*         dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced */
12926 /*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records */
12927 /*         permutations done on the I-th level of the computation tree. */
12928 
12929 /*  GIVNUM (output) DOUBLE PRECISION array, */
12930 /*         dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not */
12931 /*         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, */
12932 /*         GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- */
12933 /*         values of Givens rotations performed on the I-th level on */
12934 /*         the computation tree. */
12935 
12936 /*  C      (output) DOUBLE PRECISION array, */
12937 /*         dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. */
12938 /*         If ICOMPQ = 1 and the I-th subproblem is not square, on exit, */
12939 /*         C( I ) contains the C-value of a Givens rotation related to */
12940 /*         the right null space of the I-th subproblem. */
12941 
12942 /*  S      (output) DOUBLE PRECISION array, dimension ( N ) if */
12943 /*         ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 */
12944 /*         and the I-th subproblem is not square, on exit, S( I ) */
12945 /*         contains the S-value of a Givens rotation related to */
12946 /*         the right null space of the I-th subproblem. */
12947 
12948 /*  WORK   (workspace) DOUBLE PRECISION array, dimension */
12949 /*         (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). */
12950 
12951 /*  IWORK  (workspace) INTEGER array. */
12952 /*         Dimension must be at least (7 * N). */
12953 
12954 /*  INFO   (output) INTEGER */
12955 /*          = 0:  successful exit. */
12956 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
12957 /*          > 0:  if INFO = 1, an singular value did not converge */
12958 
12959 /*  Further Details */
12960 /*  =============== */
12961 
12962 /*  Based on contributions by */
12963 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
12964 /*     California at Berkeley, USA */
12965 
12966 /*  ===================================================================== */
12967 
12968 /*     .. Parameters .. */
12969 /*     .. */
12970 /*     .. Local Scalars .. */
12971 /*     .. */
12972 /*     .. External Subroutines .. */
12973 /*     .. */
12974 /*     .. Executable Statements .. */
12975 
12976 /*     Test the input parameters. */
12977 
12978     /* Parameter adjustments */
12979     --d__;
12980     --e;
12981     givnum_dim1 = *ldu;
12982     givnum_offset = 1 + givnum_dim1;
12983     givnum -= givnum_offset;
12984     poles_dim1 = *ldu;
12985     poles_offset = 1 + poles_dim1;
12986     poles -= poles_offset;
12987     z_dim1 = *ldu;
12988     z_offset = 1 + z_dim1;
12989     z__ -= z_offset;
12990     difr_dim1 = *ldu;
12991     difr_offset = 1 + difr_dim1;
12992     difr -= difr_offset;
12993     difl_dim1 = *ldu;
12994     difl_offset = 1 + difl_dim1;
12995     difl -= difl_offset;
12996     vt_dim1 = *ldu;
12997     vt_offset = 1 + vt_dim1;
12998     vt -= vt_offset;
12999     u_dim1 = *ldu;
13000     u_offset = 1 + u_dim1;
13001     u -= u_offset;
13002     --k;
13003     --givptr;
13004     perm_dim1 = *ldgcol;
13005     perm_offset = 1 + perm_dim1;
13006     perm -= perm_offset;
13007     givcol_dim1 = *ldgcol;
13008     givcol_offset = 1 + givcol_dim1;
13009     givcol -= givcol_offset;
13010     --c__;
13011     --s;
13012     --work;
13013     --iwork;
13014 
13015     /* Function Body */
13016     *info = 0;
13017 
13018     if (*icompq < 0 || *icompq > 1) {
13019 	*info = -1;
13020     } else if (*smlsiz < 3) {
13021 	*info = -2;
13022     } else if (*n < 0) {
13023 	*info = -3;
13024     } else if (*sqre < 0 || *sqre > 1) {
13025 	*info = -4;
13026     } else if (*ldu < *n + *sqre) {
13027 	*info = -8;
13028     } else if (*ldgcol < *n) {
13029 	*info = -17;
13030     }
13031     if (*info != 0) {
13032 	i__1 = -(*info);
13033 	xerbla_("DLASDA", &i__1);
13034 	return 0;
13035     }
13036 
13037     m = *n + *sqre;
13038 
13039 /*     If the input matrix is too small, call DLASDQ to find the SVD. */
13040 
13041     if (*n <= *smlsiz) {
13042 	if (*icompq == 0) {
13043 	    dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
13044 		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
13045 		    work[1], info);
13046 	} else {
13047 	    dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
13048 , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1],
13049 		    info);
13050 	}
13051 	return 0;
13052     }
13053 
13054 /*     Book-keeping and  set up the computation tree. */
13055 
13056     inode = 1;
13057     ndiml = inode + *n;
13058     ndimr = ndiml + *n;
13059     idxq = ndimr + *n;
13060     iwk = idxq + *n;
13061 
13062     ncc = 0;
13063     nru = 0;
13064 
13065     smlszp = *smlsiz + 1;
13066     vf = 1;
13067     vl = vf + m;
13068     nwork1 = vl + m;
13069     nwork2 = nwork1 + smlszp * smlszp;
13070 
13071     dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
13072 	    smlsiz);
13073 
13074 /*     for the nodes on bottom level of the tree, solve */
13075 /*     their subproblems by DLASDQ. */
13076 
13077     ndb1 = (nd + 1) / 2;
13078     i__1 = nd;
13079     for (i__ = ndb1; i__ <= i__1; ++i__) {
13080 
13081 /*        IC : center row of each node */
13082 /*        NL : number of rows of left  subproblem */
13083 /*        NR : number of rows of right subproblem */
13084 /*        NLF: starting row of the left   subproblem */
13085 /*        NRF: starting row of the right  subproblem */
13086 
13087 	i1 = i__ - 1;
13088 	ic = iwork[inode + i1];
13089 	nl = iwork[ndiml + i1];
13090 	nlp1 = nl + 1;
13091 	nr = iwork[ndimr + i1];
13092 	nlf = ic - nl;
13093 	nrf = ic + 1;
13094 	idxqi = idxq + nlf - 2;
13095 	vfi = vf + nlf - 1;
13096 	vli = vl + nlf - 1;
13097 	sqrei = 1;
13098 	if (*icompq == 0) {
13099 	    dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &work[nwork1], &smlszp);
13100 	    dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
13101 		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2],
13102 		    &nl, &work[nwork2], info);
13103 	    itemp = nwork1 + nl * smlszp;
13104 	    dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
13105 	    dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
13106 	} else {
13107 	    dlaset_("A", &nl, &nl, &c_b11, &c_b12, &u[nlf + u_dim1], ldu);
13108 	    dlaset_("A", &nlp1, &nlp1, &c_b11, &c_b12, &vt[nlf + vt_dim1],
13109 		    ldu);
13110 	    dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
13111 		    vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf +
13112 		    u_dim1], ldu, &work[nwork1], info);
13113 	    dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
13114 	    dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
13115 		    ;
13116 	}
13117 	if (*info != 0) {
13118 	    return 0;
13119 	}
13120 	i__2 = nl;
13121 	for (j = 1; j <= i__2; ++j) {
13122 	    iwork[idxqi + j] = j;
13123 /* L10: */
13124 	}
13125 	if (i__ == nd && *sqre == 0) {
13126 	    sqrei = 0;
13127 	} else {
13128 	    sqrei = 1;
13129 	}
13130 	idxqi += nlp1;
13131 	vfi += nlp1;
13132 	vli += nlp1;
13133 	nrp1 = nr + sqrei;
13134 	if (*icompq == 0) {
13135 	    dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &work[nwork1], &smlszp);
13136 	    dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
13137 		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2],
13138 		    &nr, &work[nwork2], info);
13139 	    itemp = nwork1 + (nrp1 - 1) * smlszp;
13140 	    dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
13141 	    dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
13142 	} else {
13143 	    dlaset_("A", &nr, &nr, &c_b11, &c_b12, &u[nrf + u_dim1], ldu);
13144 	    dlaset_("A", &nrp1, &nrp1, &c_b11, &c_b12, &vt[nrf + vt_dim1],
13145 		    ldu);
13146 	    dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
13147 		    vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf +
13148 		    u_dim1], ldu, &work[nwork1], info);
13149 	    dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
13150 	    dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
13151 		    ;
13152 	}
13153 	if (*info != 0) {
13154 	    return 0;
13155 	}
13156 	i__2 = nr;
13157 	for (j = 1; j <= i__2; ++j) {
13158 	    iwork[idxqi + j] = j;
13159 /* L20: */
13160 	}
13161 /* L30: */
13162     }
13163 
13164 /*     Now conquer each subproblem bottom-up. */
13165 
13166     j = pow_ii(&c__2, &nlvl);
13167     for (lvl = nlvl; lvl >= 1; --lvl) {
13168 	lvl2 = (lvl << 1) - 1;
13169 
13170 /*        Find the first node LF and last node LL on */
13171 /*        the current level LVL. */
13172 
13173 	if (lvl == 1) {
13174 	    lf = 1;
13175 	    ll = 1;
13176 	} else {
13177 	    i__1 = lvl - 1;
13178 	    lf = pow_ii(&c__2, &i__1);
13179 	    ll = (lf << 1) - 1;
13180 	}
13181 	i__1 = ll;
13182 	for (i__ = lf; i__ <= i__1; ++i__) {
13183 	    im1 = i__ - 1;
13184 	    ic = iwork[inode + im1];
13185 	    nl = iwork[ndiml + im1];
13186 	    nr = iwork[ndimr + im1];
13187 	    nlf = ic - nl;
13188 	    nrf = ic + 1;
13189 	    if (i__ == ll) {
13190 		sqrei = *sqre;
13191 	    } else {
13192 		sqrei = 1;
13193 	    }
13194 	    vfi = vf + nlf - 1;
13195 	    vli = vl + nlf - 1;
13196 	    idxqi = idxq + nlf - 1;
13197 	    alpha = d__[ic];
13198 	    beta = e[ic];
13199 	    if (*icompq == 0) {
13200 		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
13201 			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
13202 			perm_offset], &givptr[1], &givcol[givcol_offset],
13203 			ldgcol, &givnum[givnum_offset], ldu, &poles[
13204 			poles_offset], &difl[difl_offset], &difr[difr_offset],
13205 			 &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1],
13206 			 &iwork[iwk], info);
13207 	    } else {
13208 		--j;
13209 		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
13210 			work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf +
13211 			lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 *
13212 			givcol_dim1], ldgcol, &givnum[nlf + lvl2 *
13213 			givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], &
13214 			difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 *
13215 			difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j],
13216 			&s[j], &work[nwork1], &iwork[iwk], info);
13217 	    }
13218 	    if (*info != 0) {
13219 		return 0;
13220 	    }
13221 /* L40: */
13222 	}
13223 /* L50: */
13224     }
13225 
13226     return 0;
13227 
13228 /*     End of DLASDA */
13229 
13230 } /* dlasda_ */
13231 
dlasdq_(const char * uplo,integer * sqre,integer * n,integer * ncvt,integer * nru,integer * ncc,double * d__,double * e,double * vt,integer * ldvt,double * u,integer * ldu,double * c__,integer * ldc,double * work,integer * info)13232 /* Subroutine */ int dlasdq_(const char *uplo, integer *sqre, integer *n, integer *
13233 	ncvt, integer *nru, integer *ncc, double *d__, double *e,
13234 	double *vt, integer *ldvt, double *u, integer *ldu,
13235 	double *c__, integer *ldc, double *work, integer *info)
13236 {
13237 	/* Table of constant values */
13238 	static integer c__1 = 1;
13239 
13240     /* System generated locals */
13241     integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
13242 	    i__2;
13243 
13244     /* Local variables */
13245     integer i__, j;
13246     double r__, cs, sn;
13247     integer np1, isub;
13248     double smin;
13249     integer sqre1;
13250     integer iuplo;
13251     bool rotate;
13252 
13253 
13254 /*  -- LAPACK auxiliary routine (version 3.1) -- */
13255 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
13256 /*     November 2006 */
13257 
13258 /*     .. Scalar Arguments .. */
13259 /*     .. */
13260 /*     .. Array Arguments .. */
13261 /*     .. */
13262 
13263 /*  Purpose */
13264 /*  ======= */
13265 
13266 /*  DLASDQ computes the singular value decomposition (SVD) of a real */
13267 /*  (upper or lower) bidiagonal matrix with diagonal D and offdiagonal */
13268 /*  E, accumulating the transformations if desired. Letting B denote */
13269 /*  the input bidiagonal matrix, the algorithm computes orthogonal */
13270 /*  matrices Q and P such that B = Q * S * P' (P' denotes the transpose */
13271 /*  of P). The singular values S are overwritten on D. */
13272 
13273 /*  The input matrix U  is changed to U  * Q  if desired. */
13274 /*  The input matrix VT is changed to P' * VT if desired. */
13275 /*  The input matrix C  is changed to Q' * C  if desired. */
13276 
13277 /*  See "Computing  Small Singular Values of Bidiagonal Matrices With */
13278 /*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, */
13279 /*  LAPACK Working Note #3, for a detailed description of the algorithm. */
13280 
13281 /*  Arguments */
13282 /*  ========= */
13283 
13284 /*  UPLO  (input) CHARACTER*1 */
13285 /*        On entry, UPLO specifies whether the input bidiagonal matrix */
13286 /*        is upper or lower bidiagonal, and wether it is square are */
13287 /*        not. */
13288 /*           UPLO = 'U' or 'u'   B is upper bidiagonal. */
13289 /*           UPLO = 'L' or 'l'   B is lower bidiagonal. */
13290 
13291 /*  SQRE  (input) INTEGER */
13292 /*        = 0: then the input matrix is N-by-N. */
13293 /*        = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and */
13294 /*             (N+1)-by-N if UPLU = 'L'. */
13295 
13296 /*        The bidiagonal matrix has */
13297 /*        N = NL + NR + 1 rows and */
13298 /*        M = N + SQRE >= N columns. */
13299 
13300 /*  N     (input) INTEGER */
13301 /*        On entry, N specifies the number of rows and columns */
13302 /*        in the matrix. N must be at least 0. */
13303 
13304 /*  NCVT  (input) INTEGER */
13305 /*        On entry, NCVT specifies the number of columns of */
13306 /*        the matrix VT. NCVT must be at least 0. */
13307 
13308 /*  NRU   (input) INTEGER */
13309 /*        On entry, NRU specifies the number of rows of */
13310 /*        the matrix U. NRU must be at least 0. */
13311 
13312 /*  NCC   (input) INTEGER */
13313 /*        On entry, NCC specifies the number of columns of */
13314 /*        the matrix C. NCC must be at least 0. */
13315 
13316 /*  D     (input/output) DOUBLE PRECISION array, dimension (N) */
13317 /*        On entry, D contains the diagonal entries of the */
13318 /*        bidiagonal matrix whose SVD is desired. On normal exit, */
13319 /*        D contains the singular values in ascending order. */
13320 
13321 /*  E     (input/output) DOUBLE PRECISION array. */
13322 /*        dimension is (N-1) if SQRE = 0 and N if SQRE = 1. */
13323 /*        On entry, the entries of E contain the offdiagonal entries */
13324 /*        of the bidiagonal matrix whose SVD is desired. On normal */
13325 /*        exit, E will contain 0. If the algorithm does not converge, */
13326 /*        D and E will contain the diagonal and superdiagonal entries */
13327 /*        of a bidiagonal matrix orthogonally equivalent to the one */
13328 /*        given as input. */
13329 
13330 /*  VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT) */
13331 /*        On entry, contains a matrix which on exit has been */
13332 /*        premultiplied by P', dimension N-by-NCVT if SQRE = 0 */
13333 /*        and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). */
13334 
13335 /*  LDVT  (input) INTEGER */
13336 /*        On entry, LDVT specifies the leading dimension of VT as */
13337 /*        declared in the calling (sub) program. LDVT must be at */
13338 /*        least 1. If NCVT is nonzero LDVT must also be at least N. */
13339 
13340 /*  U     (input/output) DOUBLE PRECISION array, dimension (LDU, N) */
13341 /*        On entry, contains a  matrix which on exit has been */
13342 /*        postmultiplied by Q, dimension NRU-by-N if SQRE = 0 */
13343 /*        and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). */
13344 
13345 /*  LDU   (input) INTEGER */
13346 /*        On entry, LDU  specifies the leading dimension of U as */
13347 /*        declared in the calling (sub) program. LDU must be at */
13348 /*        least max( 1, NRU ) . */
13349 
13350 /*  C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC) */
13351 /*        On entry, contains an N-by-NCC matrix which on exit */
13352 /*        has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0 */
13353 /*        and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). */
13354 
13355 /*  LDC   (input) INTEGER */
13356 /*        On entry, LDC  specifies the leading dimension of C as */
13357 /*        declared in the calling (sub) program. LDC must be at */
13358 /*        least 1. If NCC is nonzero, LDC must also be at least N. */
13359 
13360 /*  WORK  (workspace) DOUBLE PRECISION array, dimension (4*N) */
13361 /*        Workspace. Only referenced if one of NCVT, NRU, or NCC is */
13362 /*        nonzero, and if N is at least 2. */
13363 
13364 /*  INFO  (output) INTEGER */
13365 /*        On exit, a value of 0 indicates a successful exit. */
13366 /*        If INFO < 0, argument number -INFO is illegal. */
13367 /*        If INFO > 0, the algorithm did not converge, and INFO */
13368 /*        specifies how many superdiagonals did not converge. */
13369 
13370 /*  Further Details */
13371 /*  =============== */
13372 
13373 /*  Based on contributions by */
13374 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
13375 /*     California at Berkeley, USA */
13376 
13377 /*  ===================================================================== */
13378 
13379 /*     .. Parameters .. */
13380 /*     .. */
13381 /*     .. Local Scalars .. */
13382 /*     .. */
13383 /*     .. External Subroutines .. */
13384 /*     .. */
13385 /*     .. External Functions .. */
13386 /*     .. */
13387 /*     .. Intrinsic Functions .. */
13388 /*     .. */
13389 /*     .. Executable Statements .. */
13390 
13391 /*     Test the input parameters. */
13392 
13393     /* Parameter adjustments */
13394     --d__;
13395     --e;
13396     vt_dim1 = *ldvt;
13397     vt_offset = 1 + vt_dim1;
13398     vt -= vt_offset;
13399     u_dim1 = *ldu;
13400     u_offset = 1 + u_dim1;
13401     u -= u_offset;
13402     c_dim1 = *ldc;
13403     c_offset = 1 + c_dim1;
13404     c__ -= c_offset;
13405     --work;
13406 
13407     /* Function Body */
13408     *info = 0;
13409     iuplo = 0;
13410     if (lsame_(uplo, "U")) {
13411 	iuplo = 1;
13412     }
13413     if (lsame_(uplo, "L")) {
13414 	iuplo = 2;
13415     }
13416     if (iuplo == 0) {
13417 	*info = -1;
13418     } else if (*sqre < 0 || *sqre > 1) {
13419 	*info = -2;
13420     } else if (*n < 0) {
13421 	*info = -3;
13422     } else if (*ncvt < 0) {
13423 	*info = -4;
13424     } else if (*nru < 0) {
13425 	*info = -5;
13426     } else if (*ncc < 0) {
13427 	*info = -6;
13428     } else if (*ncvt == 0 && *ldvt < 1 || *ncvt > 0 && *ldvt < std::max(1_integer,*n)) {
13429 	*info = -10;
13430     } else if (*ldu < std::max(1_integer,*nru)) {
13431 	*info = -12;
13432     } else if (*ncc == 0 && *ldc < 1 || *ncc > 0 && *ldc < std::max(1_integer,*n)) {
13433 	*info = -14;
13434     }
13435     if (*info != 0) {
13436 	i__1 = -(*info);
13437 	xerbla_("DLASDQ", &i__1);
13438 	return 0;
13439     }
13440     if (*n == 0) {
13441 	return 0;
13442     }
13443 
13444 /*     ROTATE is true if any singular vectors desired, false otherwise */
13445 
13446     rotate = *ncvt > 0 || *nru > 0 || *ncc > 0;
13447     np1 = *n + 1;
13448     sqre1 = *sqre;
13449 
13450 /*     If matrix non-square upper bidiagonal, rotate to be lower */
13451 /*     bidiagonal.  The rotations are on the right. */
13452 
13453     if (iuplo == 1 && sqre1 == 1) {
13454 	i__1 = *n - 1;
13455 	for (i__ = 1; i__ <= i__1; ++i__) {
13456 	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
13457 	    d__[i__] = r__;
13458 	    e[i__] = sn * d__[i__ + 1];
13459 	    d__[i__ + 1] = cs * d__[i__ + 1];
13460 	    if (rotate) {
13461 		work[i__] = cs;
13462 		work[*n + i__] = sn;
13463 	    }
13464 /* L10: */
13465 	}
13466 	dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
13467 	d__[*n] = r__;
13468 	e[*n] = 0.;
13469 	if (rotate) {
13470 	    work[*n] = cs;
13471 	    work[*n + *n] = sn;
13472 	}
13473 	iuplo = 2;
13474 	sqre1 = 0;
13475 
13476 /*        Update singular vectors if desired. */
13477 
13478 	if (*ncvt > 0) {
13479 	    dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
13480 		    vt_offset], ldvt);
13481 	}
13482     }
13483 
13484 /*     If matrix lower bidiagonal, rotate to be upper bidiagonal */
13485 /*     by applying Givens rotations on the left. */
13486 
13487     if (iuplo == 2) {
13488 	i__1 = *n - 1;
13489 	for (i__ = 1; i__ <= i__1; ++i__) {
13490 	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
13491 	    d__[i__] = r__;
13492 	    e[i__] = sn * d__[i__ + 1];
13493 	    d__[i__ + 1] = cs * d__[i__ + 1];
13494 	    if (rotate) {
13495 		work[i__] = cs;
13496 		work[*n + i__] = sn;
13497 	    }
13498 /* L20: */
13499 	}
13500 
13501 /*        If matrix (N+1)-by-N lower bidiagonal, one additional */
13502 /*        rotation is needed. */
13503 
13504 	if (sqre1 == 1) {
13505 	    dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
13506 	    d__[*n] = r__;
13507 	    if (rotate) {
13508 		work[*n] = cs;
13509 		work[*n + *n] = sn;
13510 	    }
13511 	}
13512 
13513 /*        Update singular vectors if desired. */
13514 
13515 	if (*nru > 0) {
13516 	    if (sqre1 == 0) {
13517 		dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
13518 			u_offset], ldu);
13519 	    } else {
13520 		dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
13521 			u_offset], ldu);
13522 	    }
13523 	}
13524 	if (*ncc > 0) {
13525 	    if (sqre1 == 0) {
13526 		dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
13527 			c_offset], ldc);
13528 	    } else {
13529 		dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
13530 			c_offset], ldc);
13531 	    }
13532 	}
13533     }
13534 
13535 /*     Call DBDSQR to compute the SVD of the reduced real */
13536 /*     N-by-N upper bidiagonal matrix. */
13537 
13538     dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
13539 	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);
13540 
13541 /*     Sort the singular values into ascending order (insertion sort on */
13542 /*     singular values, but only one transposition per singular vector) */
13543 
13544     i__1 = *n;
13545     for (i__ = 1; i__ <= i__1; ++i__) {
13546 
13547 /*        Scan for smallest D(I). */
13548 
13549 	isub = i__;
13550 	smin = d__[i__];
13551 	i__2 = *n;
13552 	for (j = i__ + 1; j <= i__2; ++j) {
13553 	    if (d__[j] < smin) {
13554 		isub = j;
13555 		smin = d__[j];
13556 	    }
13557 /* L30: */
13558 	}
13559 	if (isub != i__) {
13560 
13561 /*           Swap singular values and vectors. */
13562 
13563 	    d__[isub] = d__[i__];
13564 	    d__[i__] = smin;
13565 	    if (*ncvt > 0) {
13566 		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
13567 			ldvt);
13568 	    }
13569 	    if (*nru > 0) {
13570 		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
13571 , &c__1);
13572 	    }
13573 	    if (*ncc > 0) {
13574 		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
13575 			;
13576 	    }
13577 	}
13578 /* L40: */
13579     }
13580 
13581     return 0;
13582 
13583 /*     End of DLASDQ */
13584 
13585 } /* dlasdq_ */
13586 
dlasdt_(integer * n,integer * lvl,integer * nd,integer * inode,integer * ndiml,integer * ndimr,integer * msub)13587 /* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer *
13588 	inode, integer *ndiml, integer *ndimr, integer *msub)
13589 {
13590     /* System generated locals */
13591     integer i__1, i__2;
13592 
13593     /* Local variables */
13594     integer i__, il, ir, maxn;
13595     double temp;
13596     integer nlvl, llst, ncrnt;
13597 
13598 
13599 /*  -- LAPACK auxiliary routine (version 3.1) -- */
13600 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
13601 /*     November 2006 */
13602 
13603 /*     .. Scalar Arguments .. */
13604 /*     .. */
13605 /*     .. Array Arguments .. */
13606 /*     .. */
13607 
13608 /*  Purpose */
13609 /*  ======= */
13610 
13611 /*  DLASDT creates a tree of subproblems for bidiagonal divide and */
13612 /*  conquer. */
13613 
13614 /*  Arguments */
13615 /*  ========= */
13616 
13617 /*   N      (input) INTEGER */
13618 /*          On entry, the number of diagonal elements of the */
13619 /*          bidiagonal matrix. */
13620 
13621 /*   LVL    (output) INTEGER */
13622 /*          On exit, the number of levels on the computation tree. */
13623 
13624 /*   ND     (output) INTEGER */
13625 /*          On exit, the number of nodes on the tree. */
13626 
13627 /*   INODE  (output) INTEGER array, dimension ( N ) */
13628 /*          On exit, centers of subproblems. */
13629 
13630 /*   NDIML  (output) INTEGER array, dimension ( N ) */
13631 /*          On exit, row dimensions of left children. */
13632 
13633 /*   NDIMR  (output) INTEGER array, dimension ( N ) */
13634 /*          On exit, row dimensions of right children. */
13635 
13636 /*   MSUB   (input) INTEGER. */
13637 /*          On entry, the maximum row dimension each subproblem at the */
13638 /*          bottom of the tree can be of. */
13639 
13640 /*  Further Details */
13641 /*  =============== */
13642 
13643 /*  Based on contributions by */
13644 /*     Ming Gu and Huan Ren, Computer Science Division, University of */
13645 /*     California at Berkeley, USA */
13646 
13647 /*  ===================================================================== */
13648 
13649 /*     .. Parameters .. */
13650 /*     .. */
13651 /*     .. Local Scalars .. */
13652 /*     .. */
13653 /*     .. Intrinsic Functions .. */
13654 /*     .. */
13655 /*     .. Executable Statements .. */
13656 
13657 /*     Find the number of levels on the tree. */
13658 
13659     /* Parameter adjustments */
13660     --ndimr;
13661     --ndiml;
13662     --inode;
13663 
13664     /* Function Body */
13665     maxn = std::max(1_integer,*n);
13666     temp = log((double) maxn / (double) (*msub + 1)) / log(2.);
13667     *lvl = (integer) temp + 1;
13668 
13669     i__ = *n / 2;
13670     inode[1] = i__ + 1;
13671     ndiml[1] = i__;
13672     ndimr[1] = *n - i__ - 1;
13673     il = 0;
13674     ir = 1;
13675     llst = 1;
13676     i__1 = *lvl - 1;
13677     for (nlvl = 1; nlvl <= i__1; ++nlvl) {
13678 
13679 /*        Constructing the tree at (NLVL+1)-st level. The number of */
13680 /*        nodes created on this level is LLST * 2. */
13681 
13682 	i__2 = llst - 1;
13683 	for (i__ = 0; i__ <= i__2; ++i__) {
13684 	    il += 2;
13685 	    ir += 2;
13686 	    ncrnt = llst + i__;
13687 	    ndiml[il] = ndiml[ncrnt] / 2;
13688 	    ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1;
13689 	    inode[il] = inode[ncrnt] - ndimr[il] - 1;
13690 	    ndiml[ir] = ndimr[ncrnt] / 2;
13691 	    ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1;
13692 	    inode[ir] = inode[ncrnt] + ndiml[ir] + 1;
13693 /* L10: */
13694 	}
13695 	llst  <<=  1;
13696 /* L20: */
13697     }
13698     *nd = (llst << 1) - 1;
13699 
13700     return 0;
13701 
13702 /*     End of DLASDT */
13703 
13704 } /* dlasdt_ */
13705 
dlaset_(const char * uplo,integer * m,integer * n,double * alpha,double * beta,double * a,integer * lda)13706 /* Subroutine */ int dlaset_(const char *uplo, integer *m, integer *n, double *
13707 	alpha, double *beta, double *a, integer *lda)
13708 {
13709     /* System generated locals */
13710     integer a_dim1, a_offset, i__1, i__2, i__3;
13711 
13712     /* Local variables */
13713     integer i__, j;
13714 
13715 
13716 
13717 /*  -- LAPACK auxiliary routine (version 3.1) -- */
13718 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
13719 /*     November 2006 */
13720 
13721 /*     .. Scalar Arguments .. */
13722 /*     .. */
13723 /*     .. Array Arguments .. */
13724 /*     .. */
13725 
13726 /*  Purpose */
13727 /*  ======= */
13728 
13729 /*  DLASET initializes an m-by-n matrix A to BETA on the diagonal and */
13730 /*  ALPHA on the offdiagonals. */
13731 
13732 /*  Arguments */
13733 /*  ========= */
13734 
13735 /*  UPLO    (input) CHARACTER*1 */
13736 /*          Specifies the part of the matrix A to be set. */
13737 /*          = 'U':      Upper triangular part is set; the strictly lower */
13738 /*                      triangular part of A is not changed. */
13739 /*          = 'L':      Lower triangular part is set; the strictly upper */
13740 /*                      triangular part of A is not changed. */
13741 /*          Otherwise:  All of the matrix A is set. */
13742 
13743 /*  M       (input) INTEGER */
13744 /*          The number of rows of the matrix A.  M >= 0. */
13745 
13746 /*  N       (input) INTEGER */
13747 /*          The number of columns of the matrix A.  N >= 0. */
13748 
13749 /*  ALPHA   (input) DOUBLE PRECISION */
13750 /*          The constant to which the offdiagonal elements are to be set. */
13751 
13752 /*  BETA    (input) DOUBLE PRECISION */
13753 /*          The constant to which the diagonal elements are to be set. */
13754 
13755 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
13756 /*          On exit, the leading m-by-n submatrix of A is set as follows: */
13757 
13758 /*          if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, */
13759 /*          if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, */
13760 /*          otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, */
13761 
13762 /*          and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). */
13763 
13764 /*  LDA     (input) INTEGER */
13765 /*          The leading dimension of the array A.  LDA >= max(1,M). */
13766 
13767 /* ===================================================================== */
13768 
13769 /*     .. Local Scalars .. */
13770 /*     .. */
13771 /*     .. External Functions .. */
13772 /*     .. */
13773 /*     .. Intrinsic Functions .. */
13774 /*     .. */
13775 /*     .. Executable Statements .. */
13776 
13777     /* Parameter adjustments */
13778     a_dim1 = *lda;
13779     a_offset = 1 + a_dim1;
13780     a -= a_offset;
13781 
13782     /* Function Body */
13783     if (lsame_(uplo, "U")) {
13784 
13785 /*        Set the strictly upper triangular or trapezoidal part of the */
13786 /*        array to ALPHA. */
13787 
13788 	i__1 = *n;
13789 	for (j = 2; j <= i__1; ++j) {
13790 /* Computing MIN */
13791 	    i__3 = j - 1;
13792 	    i__2 = std::min(i__3,*m);
13793 	    for (i__ = 1; i__ <= i__2; ++i__) {
13794 		a[i__ + j * a_dim1] = *alpha;
13795 /* L10: */
13796 	    }
13797 /* L20: */
13798 	}
13799 
13800     } else if (lsame_(uplo, "L")) {
13801 
13802 /*        Set the strictly lower triangular or trapezoidal part of the */
13803 /*        array to ALPHA. */
13804 
13805 	i__1 = std::min(*m,*n);
13806 	for (j = 1; j <= i__1; ++j) {
13807 	    i__2 = *m;
13808 	    for (i__ = j + 1; i__ <= i__2; ++i__) {
13809 		a[i__ + j * a_dim1] = *alpha;
13810 /* L30: */
13811 	    }
13812 /* L40: */
13813 	}
13814 
13815     } else {
13816 
13817 /*        Set the leading m-by-n submatrix to ALPHA. */
13818 
13819 	i__1 = *n;
13820 	for (j = 1; j <= i__1; ++j) {
13821 	    i__2 = *m;
13822 	    for (i__ = 1; i__ <= i__2; ++i__) {
13823 		a[i__ + j * a_dim1] = *alpha;
13824 /* L50: */
13825 	    }
13826 /* L60: */
13827 	}
13828     }
13829 
13830 /*     Set the first min(M,N) diagonal elements to BETA. */
13831 
13832     i__1 = std::min(*m,*n);
13833     for (i__ = 1; i__ <= i__1; ++i__) {
13834 	a[i__ + i__ * a_dim1] = *beta;
13835 /* L70: */
13836     }
13837 
13838     return 0;
13839 
13840 /*     End of DLASET */
13841 
13842 } /* dlaset_ */
13843 
dlasq1_(integer * n,double * d__,double * e,double * work,integer * info)13844 /* Subroutine */ int dlasq1_(integer *n, double *d__, double *e,
13845 	double *work, integer *info)
13846 {
13847 	/* Table of constant values */
13848 	static integer c__1 = 1;
13849 	static integer c__2 = 2;
13850 	static integer c__0 = 0;
13851 
13852     /* System generated locals */
13853     integer i__1, i__2;
13854     double d__1, d__2, d__3;
13855 
13856     /* Local variables */
13857     integer i__;
13858     double eps;
13859     double scale;
13860     integer iinfo;
13861     double sigmn;
13862     double sigmx;
13863     double safmin;
13864 
13865 
13866 /*  -- LAPACK routine (version 3.1) -- */
13867 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
13868 /*     November 2006 */
13869 
13870 /*     .. Scalar Arguments .. */
13871 /*     .. */
13872 /*     .. Array Arguments .. */
13873 /*     .. */
13874 
13875 /*  Purpose */
13876 /*  ======= */
13877 
13878 /*  DLASQ1 computes the singular values of a real N-by-N bidiagonal */
13879 /*  matrix with diagonal D and off-diagonal E. The singular values */
13880 /*  are computed to high relative accuracy, in the absence of */
13881 /*  denormalization, underflow and overflow. The algorithm was first */
13882 /*  presented in */
13883 
13884 /*  "Accurate singular values and differential qd algorithms" by K. V. */
13885 /*  Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, */
13886 /*  1994, */
13887 
13888 /*  and the present implementation is described in "An implementation of */
13889 /*  the dqds Algorithm (Positive Case)", LAPACK Working Note. */
13890 
13891 /*  Arguments */
13892 /*  ========= */
13893 
13894 /*  N     (input) INTEGER */
13895 /*        The number of rows and columns in the matrix. N >= 0. */
13896 
13897 /*  D     (input/output) DOUBLE PRECISION array, dimension (N) */
13898 /*        On entry, D contains the diagonal elements of the */
13899 /*        bidiagonal matrix whose SVD is desired. On normal exit, */
13900 /*        D contains the singular values in decreasing order. */
13901 
13902 /*  E     (input/output) DOUBLE PRECISION array, dimension (N) */
13903 /*        On entry, elements E(1:N-1) contain the off-diagonal elements */
13904 /*        of the bidiagonal matrix whose SVD is desired. */
13905 /*        On exit, E is overwritten. */
13906 
13907 /*  WORK  (workspace) DOUBLE PRECISION array, dimension (4*N) */
13908 
13909 /*  INFO  (output) INTEGER */
13910 /*        = 0: successful exit */
13911 /*        < 0: if INFO = -i, the i-th argument had an illegal value */
13912 /*        > 0: the algorithm failed */
13913 /*             = 1, a split was marked by a positive value in E */
13914 /*             = 2, current block of Z not diagonalized after 30*N */
13915 /*                  iterations (in inner while loop) */
13916 /*             = 3, termination criterion of outer while loop not met */
13917 /*                  (program created more than N unreduced blocks) */
13918 
13919 /*  ===================================================================== */
13920 
13921 /*     .. Parameters .. */
13922 /*     .. */
13923 /*     .. Local Scalars .. */
13924 /*     .. */
13925 /*     .. External Subroutines .. */
13926 /*     .. */
13927 /*     .. External Functions .. */
13928 /*     .. */
13929 /*     .. Intrinsic Functions .. */
13930 /*     .. */
13931 /*     .. Executable Statements .. */
13932 
13933     /* Parameter adjustments */
13934     --work;
13935     --e;
13936     --d__;
13937 
13938     /* Function Body */
13939     *info = 0;
13940     if (*n < 0) {
13941 	*info = -2;
13942 	i__1 = -(*info);
13943 	xerbla_("DLASQ1", &i__1);
13944 	return 0;
13945     } else if (*n == 0) {
13946 	return 0;
13947     } else if (*n == 1) {
13948 	d__[1] = abs(d__[1]);
13949 	return 0;
13950     } else if (*n == 2) {
13951 	dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
13952 	d__[1] = sigmx;
13953 	d__[2] = sigmn;
13954 	return 0;
13955     }
13956 
13957 /*     Estimate the largest singular value. */
13958 
13959     sigmx = 0.;
13960     i__1 = *n - 1;
13961     for (i__ = 1; i__ <= i__1; ++i__) {
13962 	d__[i__] = (d__1 = d__[i__], abs(d__1));
13963 /* Computing MAX */
13964 	d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1));
13965 	sigmx = std::max(d__2,d__3);
13966 /* L10: */
13967     }
13968     d__[*n] = (d__1 = d__[*n], abs(d__1));
13969 
13970 /*     Early return if SIGMX is zero (matrix is already diagonal). */
13971 
13972     if (sigmx == 0.) {
13973 	dlasrt_("D", n, &d__[1], &iinfo);
13974 	return 0;
13975     }
13976 
13977     i__1 = *n;
13978     for (i__ = 1; i__ <= i__1; ++i__) {
13979 /* Computing MAX */
13980 	d__1 = sigmx, d__2 = d__[i__];
13981 	sigmx = std::max(d__1,d__2);
13982 /* L20: */
13983     }
13984 
13985 /*     Copy D and E into WORK (in the Z format) and scale (squaring the */
13986 /*     input data makes scaling by a power of the radix pointless). */
13987 
13988     eps = dlamch_("Precision");
13989     safmin = dlamch_("Safe minimum");
13990     scale = sqrt(eps / safmin);
13991     dcopy_(n, &d__[1], &c__1, &work[1], &c__2);
13992     i__1 = *n - 1;
13993     dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
13994     i__1 = (*n << 1) - 1;
13995     i__2 = (*n << 1) - 1;
13996     dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
13997 	    &iinfo);
13998 
13999 /*     Compute the q's and e's. */
14000 
14001     i__1 = (*n << 1) - 1;
14002     for (i__ = 1; i__ <= i__1; ++i__) {
14003 /* Computing 2nd power */
14004 	d__1 = work[i__];
14005 	work[i__] = d__1 * d__1;
14006 /* L30: */
14007     }
14008     work[*n * 2] = 0.;
14009 
14010     dlasq2_(n, &work[1], info);
14011 
14012     if (*info == 0) {
14013 	i__1 = *n;
14014 	for (i__ = 1; i__ <= i__1; ++i__) {
14015 	    d__[i__] = sqrt(work[i__]);
14016 /* L40: */
14017 	}
14018 	dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
14019 		iinfo);
14020     }
14021 
14022     return 0;
14023 
14024 /*     End of DLASQ1 */
14025 
14026 } /* dlasq1_ */
14027 
dlasq2_(integer * n,double * z__,integer * info)14028 /* Subroutine */ int dlasq2_(integer *n, double *z__, integer *info)
14029 {
14030 	/* Table of constant values */
14031 	static integer c__1 = 1;
14032 	static integer c__2 = 2;
14033 	static integer c__10 = 10;
14034 	static integer c__3 = 3;
14035 	static integer c__4 = 4;
14036 	static integer c__11 = 11;
14037 
14038     /* System generated locals */
14039     integer i__1, i__2, i__3;
14040     double d__1, d__2;
14041 
14042     /* Local variables */
14043     double d__, e, g;
14044     integer k;
14045     double s, t;
14046     integer i0, i4, n0;
14047     double dn;
14048     integer pp;
14049     double dn1, dn2, dee, eps, tau, tol;
14050     integer ipn4;
14051     double tol2;
14052     bool ieee;
14053     integer nbig;
14054     double dmin__, emin, emax;
14055     integer kmin, ndiv, iter;
14056     double qmin, temp, qmax, zmax;
14057     integer splt;
14058     double dmin1, dmin2;
14059     integer nfail;
14060     double desig, trace, sigma;
14061     integer iinfo, ttype;
14062     double deemin;
14063     integer iwhila, iwhilb;
14064     double oldemn, safmin;
14065 
14066 
14067 /*  -- LAPACK routine (version 3.2)                                    -- */
14068 
14069 /*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
14070 /*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
14071 /*  -- Berkeley                                                        -- */
14072 /*  -- November 2008                                                   -- */
14073 
14074 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
14075 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
14076 
14077 /*     .. Scalar Arguments .. */
14078 /*     .. */
14079 /*     .. Array Arguments .. */
14080 /*     .. */
14081 
14082 /*  Purpose */
14083 /*  ======= */
14084 
14085 /*  DLASQ2 computes all the eigenvalues of the symmetric positive */
14086 /*  definite tridiagonal matrix associated with the qd array Z to high */
14087 /*  relative accuracy are computed to high relative accuracy, in the */
14088 /*  absence of denormalization, underflow and overflow. */
14089 
14090 /*  To see the relation of Z to the tridiagonal matrix, let L be a */
14091 /*  unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and */
14092 /*  let U be an upper bidiagonal matrix with 1's above and diagonal */
14093 /*  Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the */
14094 /*  symmetric tridiagonal to which it is similar. */
14095 
14096 /*  Note : DLASQ2 defines a logical variable, IEEE, which is true */
14097 /*  on machines which follow ieee-754 floating-point standard in their */
14098 /*  handling of infinities and NaNs, and false otherwise. This variable */
14099 /*  is passed to DLASQ3. */
14100 
14101 /*  Arguments */
14102 /*  ========= */
14103 
14104 /*  N     (input) INTEGER */
14105 /*        The number of rows and columns in the matrix. N >= 0. */
14106 
14107 /*  Z     (input/output) DOUBLE PRECISION array, dimension ( 4*N ) */
14108 /*        On entry Z holds the qd array. On exit, entries 1 to N hold */
14109 /*        the eigenvalues in decreasing order, Z( 2*N+1 ) holds the */
14110 /*        trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If */
14111 /*        N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) */
14112 /*        holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of */
14113 /*        shifts that failed. */
14114 
14115 /*  INFO  (output) INTEGER */
14116 /*        = 0: successful exit */
14117 /*        < 0: if the i-th argument is a scalar and had an illegal */
14118 /*             value, then INFO = -i, if the i-th argument is an */
14119 /*             array and the j-entry had an illegal value, then */
14120 /*             INFO = -(i*100+j) */
14121 /*        > 0: the algorithm failed */
14122 /*              = 1, a split was marked by a positive value in E */
14123 /*              = 2, current block of Z not diagonalized after 30*N */
14124 /*                   iterations (in inner while loop) */
14125 /*              = 3, termination criterion of outer while loop not met */
14126 /*                   (program created more than N unreduced blocks) */
14127 
14128 /*  Further Details */
14129 /*  =============== */
14130 /*  Local Variables: I0:N0 defines a current unreduced segment of Z. */
14131 /*  The shifts are accumulated in SIGMA. Iteration count is in ITER. */
14132 /*  Ping-pong is controlled by PP (alternates between 0 and 1). */
14133 
14134 /*  ===================================================================== */
14135 
14136 /*     .. Parameters .. */
14137 /*     .. */
14138 /*     .. Local Scalars .. */
14139 /*     .. */
14140 /*     .. External Subroutines .. */
14141 /*     .. */
14142 /*     .. External Functions .. */
14143 /*     .. */
14144 /*     .. Intrinsic Functions .. */
14145 /*     .. */
14146 /*     .. Executable Statements .. */
14147 
14148 /*     Test the input arguments. */
14149 /*     (in case DLASQ2 is not called by DLASQ1) */
14150 
14151     /* Parameter adjustments */
14152     --z__;
14153 
14154     /* Function Body */
14155     *info = 0;
14156     eps = dlamch_("Precision");
14157     safmin = dlamch_("Safe minimum");
14158     tol = eps * 100.;
14159 /* Computing 2nd power */
14160     d__1 = tol;
14161     tol2 = d__1 * d__1;
14162 
14163     if (*n < 0) {
14164 	*info = -1;
14165 	xerbla_("DLASQ2", &c__1);
14166 	return 0;
14167     } else if (*n == 0) {
14168 	return 0;
14169     } else if (*n == 1) {
14170 
14171 /*        1-by-1 case. */
14172 
14173 	if (z__[1] < 0.) {
14174 	    *info = -201;
14175 	    xerbla_("DLASQ2", &c__2);
14176 	}
14177 	return 0;
14178     } else if (*n == 2) {
14179 
14180 /*        2-by-2 case. */
14181 
14182 	if (z__[2] < 0. || z__[3] < 0.) {
14183 	    *info = -2;
14184 	    xerbla_("DLASQ2", &c__2);
14185 	    return 0;
14186 	} else if (z__[3] > z__[1]) {
14187 	    d__ = z__[3];
14188 	    z__[3] = z__[1];
14189 	    z__[1] = d__;
14190 	}
14191 	z__[5] = z__[1] + z__[2] + z__[3];
14192 	if (z__[2] > z__[3] * tol2) {
14193 	    t = (z__[1] - z__[3] + z__[2]) * .5;
14194 	    s = z__[3] * (z__[2] / t);
14195 	    if (s <= t) {
14196 		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
14197 	    } else {
14198 		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
14199 	    }
14200 	    t = z__[1] + (s + z__[2]);
14201 	    z__[3] *= z__[1] / t;
14202 	    z__[1] = t;
14203 	}
14204 	z__[2] = z__[3];
14205 	z__[6] = z__[2] + z__[1];
14206 	return 0;
14207     }
14208 
14209 /*     Check for negative data and compute sums of q's and e's. */
14210 
14211     z__[*n * 2] = 0.;
14212     emin = z__[2];
14213     qmax = 0.;
14214     zmax = 0.;
14215     d__ = 0.;
14216     e = 0.;
14217 
14218     i__1 = *n - 1 << 1;
14219     for (k = 1; k <= i__1; k += 2) {
14220 	if (z__[k] < 0.) {
14221 	    *info = -(k + 200);
14222 	    xerbla_("DLASQ2", &c__2);
14223 	    return 0;
14224 	} else if (z__[k + 1] < 0.) {
14225 	    *info = -(k + 201);
14226 	    xerbla_("DLASQ2", &c__2);
14227 	    return 0;
14228 	}
14229 	d__ += z__[k];
14230 	e += z__[k + 1];
14231 /* Computing MAX */
14232 	d__1 = qmax, d__2 = z__[k];
14233 	qmax = std::max (d__1,d__2);
14234 /* Computing MIN */
14235 	d__1 = emin, d__2 = z__[k + 1];
14236 	emin = std::min(d__1,d__2);
14237 /* Computing MAX */
14238 	d__1 = std::max (qmax,zmax), d__2 = z__[k + 1];
14239 	zmax = std::max (d__1,d__2);
14240 /* L10: */
14241     }
14242     if (z__[(*n << 1) - 1] < 0.) {
14243 	*info = -((*n << 1) + 199);
14244 	xerbla_("DLASQ2", &c__2);
14245 	return 0;
14246     }
14247     d__ += z__[(*n << 1) - 1];
14248 /* Computing MAX */
14249     d__1 = qmax, d__2 = z__[(*n << 1) - 1];
14250     qmax = std::max (d__1,d__2);
14251     zmax = std::max (qmax,zmax);
14252 
14253 /*     Check for diagonality. */
14254 
14255     if (e == 0.) {
14256 	i__1 = *n;
14257 	for (k = 2; k <= i__1; ++k) {
14258 	    z__[k] = z__[(k << 1) - 1];
14259 /* L20: */
14260 	}
14261 	dlasrt_("D", n, &z__[1], &iinfo);
14262 	z__[(*n << 1) - 1] = d__;
14263 	return 0;
14264     }
14265 
14266     trace = d__ + e;
14267 
14268 /*     Check for zero data. */
14269 
14270     if (trace == 0.) {
14271 	z__[(*n << 1) - 1] = 0.;
14272 	return 0;
14273     }
14274 
14275 /*     Check whether the machine is IEEE conformable. */
14276 
14277     ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
14278 	     &c__3, &c__4) == 1;
14279 
14280 /*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
14281 
14282     for (k = *n << 1; k >= 2; k += -2) {
14283 	z__[k * 2] = 0.;
14284 	z__[(k << 1) - 1] = z__[k];
14285 	z__[(k << 1) - 2] = 0.;
14286 	z__[(k << 1) - 3] = z__[k - 1];
14287 /* L30: */
14288     }
14289 
14290     i0 = 1;
14291     n0 = *n;
14292 
14293 /*     Reverse the qd-array, if warranted. */
14294 
14295     if (z__[(i0 << 2) - 3] * 1.5 < z__[(n0 << 2) - 3]) {
14296 	ipn4 = i0 + n0 << 2;
14297 	i__1 = i0 + n0 - 1 << 1;
14298 	for (i4 = i0 << 2; i4 <= i__1; i4 += 4) {
14299 	    temp = z__[i4 - 3];
14300 	    z__[i4 - 3] = z__[ipn4 - i4 - 3];
14301 	    z__[ipn4 - i4 - 3] = temp;
14302 	    temp = z__[i4 - 1];
14303 	    z__[i4 - 1] = z__[ipn4 - i4 - 5];
14304 	    z__[ipn4 - i4 - 5] = temp;
14305 /* L40: */
14306 	}
14307     }
14308 
14309 /*     Initial split checking via dqd and Li's test. */
14310 
14311     pp = 0;
14312 
14313     for (k = 1; k <= 2; ++k) {
14314 
14315 	d__ = z__[(n0 << 2) + pp - 3];
14316 	i__1 = (i0 << 2) + pp;
14317 	for (i4 = (n0 - 1 << 2) + pp; i4 >= i__1; i4 += -4) {
14318 	    if (z__[i4 - 1] <= tol2 * d__) {
14319 		z__[i4 - 1] = -0.;
14320 		d__ = z__[i4 - 3];
14321 	    } else {
14322 		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
14323 	    }
14324 /* L50: */
14325 	}
14326 
14327 /*        dqd maps Z to ZZ plus Li's test. */
14328 
14329 	emin = z__[(i0 << 2) + pp + 1];
14330 	d__ = z__[(i0 << 2) + pp - 3];
14331 	i__1 = (n0 - 1 << 2) + pp;
14332 	for (i4 = (i0 << 2) + pp; i4 <= i__1; i4 += 4) {
14333 	    z__[i4 - (pp << 1) - 2] = d__ + z__[i4 - 1];
14334 	    if (z__[i4 - 1] <= tol2 * d__) {
14335 		z__[i4 - 1] = -0.;
14336 		z__[i4 - (pp << 1) - 2] = d__;
14337 		z__[i4 - (pp << 1)] = 0.;
14338 		d__ = z__[i4 + 1];
14339 	    } else if (safmin * z__[i4 + 1] < z__[i4 - (pp << 1) - 2] &&
14340 		    safmin * z__[i4 - (pp << 1) - 2] < z__[i4 + 1]) {
14341 		temp = z__[i4 + 1] / z__[i4 - (pp << 1) - 2];
14342 		z__[i4 - (pp << 1)] = z__[i4 - 1] * temp;
14343 		d__ *= temp;
14344 	    } else {
14345 		z__[i4 - (pp << 1)] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4 - (
14346 			pp << 1) - 2]);
14347 		d__ = z__[i4 + 1] * (d__ / z__[i4 - (pp << 1) - 2]);
14348 	    }
14349 /* Computing MIN */
14350 	    d__1 = emin, d__2 = z__[i4 - (pp << 1)];
14351 	    emin = std::min(d__1,d__2);
14352 /* L60: */
14353 	}
14354 	z__[(n0 << 2) - pp - 2] = d__;
14355 
14356 /*        Now find qmax. */
14357 
14358 	qmax = z__[(i0 << 2) - pp - 2];
14359 	i__1 = (n0 << 2) - pp - 2;
14360 	for (i4 = (i0 << 2) - pp + 2; i4 <= i__1; i4 += 4) {
14361 /* Computing MAX */
14362 	    d__1 = qmax, d__2 = z__[i4];
14363 	    qmax = std::max (d__1,d__2);
14364 /* L70: */
14365 	}
14366 
14367 /*        Prepare for the next iteration on K. */
14368 
14369 	pp = 1 - pp;
14370 /* L80: */
14371     }
14372 
14373 /*     Initialise variables to pass to DLASQ3. */
14374 
14375     ttype = 0;
14376     dmin1 = 0.;
14377     dmin2 = 0.;
14378     dn = 0.;
14379     dn1 = 0.;
14380     dn2 = 0.;
14381     g = 0.;
14382     tau = 0.;
14383 
14384     iter = 2;
14385     nfail = 0;
14386     ndiv = n0 - i0 << 1;
14387 
14388     i__1 = *n + 1;
14389     for (iwhila = 1; iwhila <= i__1; ++iwhila) {
14390 	if (n0 < 1) {
14391 	    goto L170;
14392 	}
14393 
14394 /*        While array unfinished do */
14395 
14396 /*        E(N0) holds the value of SIGMA when submatrix in I0:N0 */
14397 /*        splits from the rest of the array, but is negated. */
14398 
14399 	desig = 0.;
14400 	if (n0 == *n) {
14401 	    sigma = 0.;
14402 	} else {
14403 	    sigma = -z__[(n0 << 2) - 1];
14404 	}
14405 	if (sigma < 0.) {
14406 	    *info = 1;
14407 	    return 0;
14408 	}
14409 
14410 /*        Find last unreduced submatrix's top index I0, find QMAX and */
14411 /*        EMIN. Find Gershgorin-type bound if Q's much greater than E's. */
14412 
14413 	emax = 0.;
14414 	if (n0 > i0) {
14415 	    emin = (d__1 = z__[(n0 << 2) - 5], abs(d__1));
14416 	} else {
14417 	    emin = 0.;
14418 	}
14419 	qmin = z__[(n0 << 2) - 3];
14420 	qmax = qmin;
14421 	for (i4 = n0 << 2; i4 >= 8; i4 += -4) {
14422 	    if (z__[i4 - 5] <= 0.) {
14423 		goto L100;
14424 	    }
14425 	    if (qmin >= emax * 4.) {
14426 /* Computing MIN */
14427 		d__1 = qmin, d__2 = z__[i4 - 3];
14428 		qmin = std::min(d__1,d__2);
14429 /* Computing MAX */
14430 		d__1 = emax, d__2 = z__[i4 - 5];
14431 		emax = std::max (d__1,d__2);
14432 	    }
14433 /* Computing MAX */
14434 	    d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
14435 	    qmax = std::max (d__1,d__2);
14436 /* Computing MIN */
14437 	    d__1 = emin, d__2 = z__[i4 - 5];
14438 	    emin = std::min(d__1,d__2);
14439 /* L90: */
14440 	}
14441 	i4 = 4;
14442 
14443 L100:
14444 	i0 = i4 / 4;
14445 	pp = 0;
14446 
14447 	if (n0 - i0 > 1) {
14448 	    dee = z__[(i0 << 2) - 3];
14449 	    deemin = dee;
14450 	    kmin = i0;
14451 	    i__2 = (n0 << 2) - 3;
14452 	    for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
14453 		dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
14454 		if (dee <= deemin) {
14455 		    deemin = dee;
14456 		    kmin = (i4 + 3) / 4;
14457 		}
14458 /* L110: */
14459 	    }
14460 	    if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] *
14461 		    .5) {
14462 		ipn4 = i0 + n0 << 2;
14463 		pp = 2;
14464 		i__2 = i0 + n0 - 1 << 1;
14465 		for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
14466 		    temp = z__[i4 - 3];
14467 		    z__[i4 - 3] = z__[ipn4 - i4 - 3];
14468 		    z__[ipn4 - i4 - 3] = temp;
14469 		    temp = z__[i4 - 2];
14470 		    z__[i4 - 2] = z__[ipn4 - i4 - 2];
14471 		    z__[ipn4 - i4 - 2] = temp;
14472 		    temp = z__[i4 - 1];
14473 		    z__[i4 - 1] = z__[ipn4 - i4 - 5];
14474 		    z__[ipn4 - i4 - 5] = temp;
14475 		    temp = z__[i4];
14476 		    z__[i4] = z__[ipn4 - i4 - 4];
14477 		    z__[ipn4 - i4 - 4] = temp;
14478 /* L120: */
14479 		}
14480 	    }
14481 	}
14482 
14483 /*        Put -(initial shift) into DMIN. */
14484 
14485 /* Computing MAX */
14486 	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
14487 	dmin__ = -std::max (d__1,d__2);
14488 
14489 /*        Now I0:N0 is unreduced. */
14490 /*        PP = 0 for ping, PP = 1 for pong. */
14491 /*        PP = 2 indicates that flipping was applied to the Z array and */
14492 /*               and that the tests for deflation upon entry in DLASQ3 */
14493 /*               should not be performed. */
14494 
14495 	nbig = (n0 - i0 + 1) * 30;
14496 	i__2 = nbig;
14497 	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
14498 	    if (i0 > n0) {
14499 		goto L150;
14500 	    }
14501 
14502 /*           While submatrix unfinished take a good dqds step. */
14503 
14504 	    dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
14505 		    nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
14506 		    dn1, &dn2, &g, &tau);
14507 
14508 	    pp = 1 - pp;
14509 
14510 /*           When EMIN is very small check for splits. */
14511 
14512 	    if (pp == 0 && n0 - i0 >= 3) {
14513 		if (z__[n0 * 4] <= tol2 * qmax || z__[(n0 << 2) - 1] <= tol2 *
14514 			 sigma) {
14515 		    splt = i0 - 1;
14516 		    qmax = z__[(i0 << 2) - 3];
14517 		    emin = z__[(i0 << 2) - 1];
14518 		    oldemn = z__[i0 * 4];
14519 		    i__3 = n0 - 3 << 2;
14520 		    for (i4 = i0 << 2; i4 <= i__3; i4 += 4) {
14521 			if (z__[i4] <= tol2 * z__[i4 - 3] || z__[i4 - 1] <=
14522 				tol2 * sigma) {
14523 			    z__[i4 - 1] = -sigma;
14524 			    splt = i4 / 4;
14525 			    qmax = 0.;
14526 			    emin = z__[i4 + 3];
14527 			    oldemn = z__[i4 + 4];
14528 			} else {
14529 /* Computing MAX */
14530 			    d__1 = qmax, d__2 = z__[i4 + 1];
14531 			    qmax = std::max (d__1,d__2);
14532 /* Computing MIN */
14533 			    d__1 = emin, d__2 = z__[i4 - 1];
14534 			    emin = std::min(d__1,d__2);
14535 /* Computing MIN */
14536 			    d__1 = oldemn, d__2 = z__[i4];
14537 			    oldemn = std::min(d__1,d__2);
14538 			}
14539 /* L130: */
14540 		    }
14541 		    z__[(n0 << 2) - 1] = emin;
14542 		    z__[n0 * 4] = oldemn;
14543 		    i0 = splt + 1;
14544 		}
14545 	    }
14546 
14547 /* L140: */
14548 	}
14549 
14550 	*info = 2;
14551 	return 0;
14552 
14553 /*        end IWHILB */
14554 
14555 L150:
14556 
14557 /* L160: */
14558 	;
14559     }
14560 
14561     *info = 3;
14562     return 0;
14563 
14564 /*     end IWHILA */
14565 
14566 L170:
14567 
14568 /*     Move q's to the front. */
14569 
14570     i__1 = *n;
14571     for (k = 2; k <= i__1; ++k) {
14572 	z__[k] = z__[(k << 2) - 3];
14573 /* L180: */
14574     }
14575 
14576 /*     Sort and compute sum of eigenvalues. */
14577 
14578     dlasrt_("D", n, &z__[1], &iinfo);
14579 
14580     e = 0.;
14581     for (k = *n; k >= 1; --k) {
14582 	e += z__[k];
14583 /* L190: */
14584     }
14585 
14586 /*     Store trace, sum(eigenvalues) and information on performance. */
14587 
14588     z__[(*n << 1) + 1] = trace;
14589     z__[(*n << 1) + 2] = e;
14590     z__[(*n << 1) + 3] = (double) iter;
14591 /* Computing 2nd power */
14592     i__1 = *n;
14593     z__[(*n << 1) + 4] = (double) ndiv / (double) (i__1 * i__1);
14594     z__[(*n << 1) + 5] = nfail * 100. / (double) iter;
14595     return 0;
14596 
14597 /*     End of DLASQ2 */
14598 
14599 } /* dlasq2_ */
14600 
dlasq3_(integer * i0,integer * n0,double * z__,integer * pp,double * dmin__,double * sigma,double * desig,double * qmax,integer * nfail,integer * iter,integer * ndiv,bool * ieee,integer * ttype,double * dmin1,double * dmin2,double * dn,double * dn1,double * dn2,double * g,double * tau)14601 /* Subroutine */ int dlasq3_(integer *i0, integer *n0, double *z__, integer *pp, double *dmin__, double *sigma,
14602 	double *desig, double *qmax, integer *nfail, integer *iter, integer *ndiv, bool *ieee,
14603 	integer *ttype, double *dmin1, double *dmin2, double *dn, double *dn1, double *dn2,
14604 	double *g, double *tau)
14605 {
14606     /* System generated locals */
14607     integer i__1;
14608     double d__1, d__2;
14609 
14610     /* Local variables */
14611     double s, t;
14612     integer j4, nn;
14613     double eps, tol;
14614     integer n0in, ipn4;
14615     double tol2, temp;
14616 
14617 
14618 /*  -- LAPACK routine (version 3.2)                                    -- */
14619 
14620 /*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
14621 /*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
14622 /*  -- Berkeley                                                        -- */
14623 /*  -- November 2008                                                   -- */
14624 
14625 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
14626 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
14627 
14628 /*     .. Scalar Arguments .. */
14629 /*     .. */
14630 /*     .. Array Arguments .. */
14631 /*     .. */
14632 
14633 /*  Purpose */
14634 /*  ======= */
14635 
14636 /*  DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
14637 /*  In case of failure it changes shifts, and tries again until output */
14638 /*  is positive. */
14639 
14640 /*  Arguments */
14641 /*  ========= */
14642 
14643 /*  I0     (input) INTEGER */
14644 /*         First index. */
14645 
14646 /*  N0     (input) INTEGER */
14647 /*         Last index. */
14648 
14649 /*  Z      (input) DOUBLE PRECISION array, dimension ( 4*N ) */
14650 /*         Z holds the qd array. */
14651 
14652 /*  PP     (input/output) INTEGER */
14653 /*         PP=0 for ping, PP=1 for pong. */
14654 /*         PP=2 indicates that flipping was applied to the Z array */
14655 /*         and that the initial tests for deflation should not be */
14656 /*         performed. */
14657 
14658 /*  DMIN   (output) DOUBLE PRECISION */
14659 /*         Minimum value of d. */
14660 
14661 /*  SIGMA  (output) DOUBLE PRECISION */
14662 /*         Sum of shifts used in current segment. */
14663 
14664 /*  DESIG  (input/output) DOUBLE PRECISION */
14665 /*         Lower order part of SIGMA */
14666 
14667 /*  QMAX   (input) DOUBLE PRECISION */
14668 /*         Maximum value of q. */
14669 
14670 /*  NFAIL  (output) INTEGER */
14671 /*         Number of times shift was too big. */
14672 
14673 /*  ITER   (output) INTEGER */
14674 /*         Number of iterations. */
14675 
14676 /*  NDIV   (output) INTEGER */
14677 /*         Number of divisions. */
14678 
14679 /*  IEEE   (input) LOGICAL */
14680 /*         Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */
14681 
14682 /*  TTYPE  (input/output) INTEGER */
14683 /*         Shift type. */
14684 
14685 /*  DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
14686 /*         These are passed as arguments in order to save their values */
14687 /*         between calls to DLASQ3. */
14688 
14689 /*  ===================================================================== */
14690 
14691 /*     .. Parameters .. */
14692 /*     .. */
14693 /*     .. Local Scalars .. */
14694 /*     .. */
14695 /*     .. External Subroutines .. */
14696 /*     .. */
14697 /*     .. External Function .. */
14698 /*     .. */
14699 /*     .. Intrinsic Functions .. */
14700 /*     .. */
14701 /*     .. Executable Statements .. */
14702 
14703     /* Parameter adjustments */
14704     --z__;
14705 
14706     /* Function Body */
14707     n0in = *n0;
14708     eps = dlamch_("Precision");
14709     tol = eps * 100.;
14710 /* Computing 2nd power */
14711     d__1 = tol;
14712     tol2 = d__1 * d__1;
14713 
14714 /*     Check for deflation. */
14715 
14716 L10:
14717 
14718     if (*n0 < *i0) {
14719 	return 0;
14720     }
14721     if (*n0 == *i0) {
14722 	goto L20;
14723     }
14724     nn = (*n0 << 2) + *pp;
14725     if (*n0 == *i0 + 1) {
14726 	goto L40;
14727     }
14728 
14729 /*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
14730 
14731     if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
14732 	    4] > tol2 * z__[nn - 7]) {
14733 	goto L30;
14734     }
14735 
14736 L20:
14737 
14738     z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
14739     --(*n0);
14740     goto L10;
14741 
14742 /*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
14743 
14744 L30:
14745 
14746     if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
14747 	    nn - 11]) {
14748 	goto L50;
14749     }
14750 
14751 L40:
14752 
14753     if (z__[nn - 3] > z__[nn - 7]) {
14754 	s = z__[nn - 3];
14755 	z__[nn - 3] = z__[nn - 7];
14756 	z__[nn - 7] = s;
14757     }
14758     if (z__[nn - 5] > z__[nn - 3] * tol2) {
14759 	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
14760 	s = z__[nn - 3] * (z__[nn - 5] / t);
14761 	if (s <= t) {
14762 	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
14763 	} else {
14764 	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
14765 	}
14766 	t = z__[nn - 7] + (s + z__[nn - 5]);
14767 	z__[nn - 3] *= z__[nn - 7] / t;
14768 	z__[nn - 7] = t;
14769     }
14770     z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
14771     z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
14772     *n0 += -2;
14773     goto L10;
14774 
14775 L50:
14776     if (*pp == 2) {
14777 	*pp = 0;
14778     }
14779 
14780 /*     Reverse the qd-array, if warranted. */
14781 
14782     if (*dmin__ <= 0. || *n0 < n0in) {
14783 	if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
14784 	    ipn4 = *i0 + *n0 << 2;
14785 	    i__1 = *i0 + *n0 - 1 << 1;
14786 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
14787 		temp = z__[j4 - 3];
14788 		z__[j4 - 3] = z__[ipn4 - j4 - 3];
14789 		z__[ipn4 - j4 - 3] = temp;
14790 		temp = z__[j4 - 2];
14791 		z__[j4 - 2] = z__[ipn4 - j4 - 2];
14792 		z__[ipn4 - j4 - 2] = temp;
14793 		temp = z__[j4 - 1];
14794 		z__[j4 - 1] = z__[ipn4 - j4 - 5];
14795 		z__[ipn4 - j4 - 5] = temp;
14796 		temp = z__[j4];
14797 		z__[j4] = z__[ipn4 - j4 - 4];
14798 		z__[ipn4 - j4 - 4] = temp;
14799 /* L60: */
14800 	    }
14801 	    if (*n0 - *i0 <= 4) {
14802 		z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
14803 		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
14804 	    }
14805 /* Computing MIN */
14806 	    d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
14807 	    *dmin2 = std::min(d__1,d__2);
14808 /* Computing MIN */
14809 	    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
14810 		    , d__1 = std::min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
14811 	    z__[(*n0 << 2) + *pp - 1] = std::min(d__1,d__2);
14812 /* Computing MIN */
14813 	    d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
14814 		     std::min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
14815 	    z__[(*n0 << 2) - *pp] = std::min(d__1,d__2);
14816 /* Computing MAX */
14817 	    d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = std::max(d__1,
14818 		    d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
14819 	    *qmax = std::max(d__1,d__2);
14820 	    *dmin__ = -0.;
14821 	}
14822     }
14823 
14824 /*     Choose a shift. */
14825 
14826     dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, tau, ttype, g);
14827 
14828 /*     Call dqds until DMIN > 0. */
14829 
14830 L70:
14831 
14832     dlasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
14833 	    ieee);
14834 
14835     *ndiv += *n0 - *i0 + 2;
14836     ++(*iter);
14837 
14838 /*     Check status. */
14839 
14840     if (*dmin__ >= 0. && *dmin1 > 0.) {
14841 
14842 /*        Success. */
14843 
14844 	goto L90;
14845 
14846     } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol
14847 	    * (*sigma + *dn1) && abs(*dn) < tol * *sigma) {
14848 
14849 /*        Convergence hidden by negative DN. */
14850 
14851 	z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
14852 	*dmin__ = 0.;
14853 	goto L90;
14854     } else if (*dmin__ < 0.) {
14855 
14856 /*        TAU too big. Select new TAU and try again. */
14857 
14858 	++(*nfail);
14859 	if (*ttype < -22) {
14860 
14861 /*           Failed twice. Play it safe. */
14862 
14863 	    *tau = 0.;
14864 	} else if (*dmin1 > 0.) {
14865 
14866 /*           Late failure. Gives excellent shift. */
14867 
14868 	    *tau = (*tau + *dmin__) * (1. - eps * 2.);
14869 	    *ttype += -11;
14870 	} else {
14871 
14872 /*           Early failure. Divide by 4. */
14873 
14874 	    *tau *= .25;
14875 	    *ttype += -12;
14876 	}
14877 	goto L70;
14878     } else if (disnan_(dmin__)) {
14879 
14880 /*        NaN. */
14881 
14882 	if (*tau == 0.) {
14883 	    goto L80;
14884 	} else {
14885 	    *tau = 0.;
14886 	    goto L70;
14887 	}
14888     } else {
14889 
14890 /*        Possible underflow. Play it safe. */
14891 
14892 	goto L80;
14893     }
14894 
14895 /*     Risk of underflow. */
14896 
14897 L80:
14898     dlasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
14899     *ndiv += *n0 - *i0 + 2;
14900     ++(*iter);
14901     *tau = 0.;
14902 
14903 L90:
14904     if (*tau < *sigma) {
14905 	*desig += *tau;
14906 	t = *sigma + *desig;
14907 	*desig -= t - *sigma;
14908     } else {
14909 	t = *sigma + *tau;
14910 	*desig = *sigma - (t - *tau) + *desig;
14911     }
14912     *sigma = t;
14913 
14914     return 0;
14915 
14916 /*     End of DLASQ3 */
14917 
14918 } /* dlasq3_ */
14919 
dlasq4_(integer * i0,integer * n0,double * z__,integer * pp,integer * n0in,double * dmin__,double * dmin1,double * dmin2,double * dn,double * dn1,double * dn2,double * tau,integer * ttype,double * g)14920 /* Subroutine */ int dlasq4_(integer *i0, integer *n0, double *z__,
14921 	integer *pp, integer *n0in, double *dmin__, double *dmin1,
14922 	double *dmin2, double *dn, double *dn1, double *dn2,
14923 	double *tau, integer *ttype, double *g)
14924 {
14925     /* System generated locals */
14926     integer i__1;
14927     double d__1, d__2;
14928 
14929     /* Local variables */
14930     double s, a2, b1, b2;
14931     integer i4, nn, np;
14932     double gam, gap1, gap2;
14933 
14934 
14935 /*  -- LAPACK routine (version 3.2)                                    -- */
14936 
14937 /*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
14938 /*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
14939 /*  -- Berkeley                                                        -- */
14940 /*  -- November 2008                                                   -- */
14941 
14942 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
14943 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
14944 
14945 /*     .. Scalar Arguments .. */
14946 /*     .. */
14947 /*     .. Array Arguments .. */
14948 /*     .. */
14949 
14950 /*  Purpose */
14951 /*  ======= */
14952 
14953 /*  DLASQ4 computes an approximation TAU to the smallest eigenvalue */
14954 /*  using values of d from the previous transform. */
14955 
14956 /*  I0    (input) INTEGER */
14957 /*        First index. */
14958 
14959 /*  N0    (input) INTEGER */
14960 /*        Last index. */
14961 
14962 /*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
14963 /*        Z holds the qd array. */
14964 
14965 /*  PP    (input) INTEGER */
14966 /*        PP=0 for ping, PP=1 for pong. */
14967 
14968 /*  NOIN  (input) INTEGER */
14969 /*        The value of N0 at start of EIGTEST. */
14970 
14971 /*  DMIN  (input) DOUBLE PRECISION */
14972 /*        Minimum value of d. */
14973 
14974 /*  DMIN1 (input) DOUBLE PRECISION */
14975 /*        Minimum value of d, excluding D( N0 ). */
14976 
14977 /*  DMIN2 (input) DOUBLE PRECISION */
14978 /*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
14979 
14980 /*  DN    (input) DOUBLE PRECISION */
14981 /*        d(N) */
14982 
14983 /*  DN1   (input) DOUBLE PRECISION */
14984 /*        d(N-1) */
14985 
14986 /*  DN2   (input) DOUBLE PRECISION */
14987 /*        d(N-2) */
14988 
14989 /*  TAU   (output) DOUBLE PRECISION */
14990 /*        This is the shift. */
14991 
14992 /*  TTYPE (output) INTEGER */
14993 /*        Shift type. */
14994 
14995 /*  G     (input/output) REAL */
14996 /*        G is passed as an argument in order to save its value between */
14997 /*        calls to DLASQ4. */
14998 
14999 /*  Further Details */
15000 /*  =============== */
15001 /*  CNST1 = 9/16 */
15002 
15003 /*  ===================================================================== */
15004 
15005 /*     .. Parameters .. */
15006 /*     .. */
15007 /*     .. Local Scalars .. */
15008 /*     .. */
15009 /*     .. Intrinsic Functions .. */
15010 /*     .. */
15011 /*     .. Executable Statements .. */
15012 
15013 /*     A negative DMIN forces the shift to take that absolute value */
15014 /*     TTYPE records the type of shift. */
15015 
15016     /* Parameter adjustments */
15017     --z__;
15018 
15019     /* Function Body */
15020     if (*dmin__ <= 0.) {
15021 	*tau = -(*dmin__);
15022 	*ttype = -1;
15023 	return 0;
15024     }
15025 
15026     nn = (*n0 << 2) + *pp;
15027     if (*n0in == *n0) {
15028 
15029 /*        No eigenvalues deflated. */
15030 
15031 	if (*dmin__ == *dn || *dmin__ == *dn1) {
15032 
15033 	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
15034 	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
15035 	    a2 = z__[nn - 7] + z__[nn - 5];
15036 
15037 /*           Cases 2 and 3. */
15038 
15039 	    if (*dmin__ == *dn && *dmin1 == *dn1) {
15040 		gap2 = *dmin2 - a2 - *dmin2 * .25;
15041 		if (gap2 > 0. && gap2 > b2) {
15042 		    gap1 = a2 - *dn - b2 / gap2 * b2;
15043 		} else {
15044 		    gap1 = a2 - *dn - (b1 + b2);
15045 		}
15046 		if (gap1 > 0. && gap1 > b1) {
15047 /* Computing MAX */
15048 		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
15049 		    s = std::max(d__1,d__2);
15050 		    *ttype = -2;
15051 		} else {
15052 		    s = 0.;
15053 		    if (*dn > b1) {
15054 			s = *dn - b1;
15055 		    }
15056 		    if (a2 > b1 + b2) {
15057 /* Computing MIN */
15058 			d__1 = s, d__2 = a2 - (b1 + b2);
15059 			s = std::min(d__1,d__2);
15060 		    }
15061 /* Computing MAX */
15062 		    d__1 = s, d__2 = *dmin__ * .333;
15063 		    s = std::max(d__1,d__2);
15064 		    *ttype = -3;
15065 		}
15066 	    } else {
15067 
15068 /*              Case 4. */
15069 
15070 		*ttype = -4;
15071 		s = *dmin__ * .25;
15072 		if (*dmin__ == *dn) {
15073 		    gam = *dn;
15074 		    a2 = 0.;
15075 		    if (z__[nn - 5] > z__[nn - 7]) {
15076 			return 0;
15077 		    }
15078 		    b2 = z__[nn - 5] / z__[nn - 7];
15079 		    np = nn - 9;
15080 		} else {
15081 		    np = nn - (*pp << 1);
15082 		    b2 = z__[np - 2];
15083 		    gam = *dn1;
15084 		    if (z__[np - 4] > z__[np - 2]) {
15085 			return 0;
15086 		    }
15087 		    a2 = z__[np - 4] / z__[np - 2];
15088 		    if (z__[nn - 9] > z__[nn - 11]) {
15089 			return 0;
15090 		    }
15091 		    b2 = z__[nn - 9] / z__[nn - 11];
15092 		    np = nn - 13;
15093 		}
15094 
15095 /*              Approximate contribution to norm squared from I < NN-1. */
15096 
15097 		a2 += b2;
15098 		i__1 = (*i0 << 2) - 1 + *pp;
15099 		for (i4 = np; i4 >= i__1; i4 += -4) {
15100 		    if (b2 == 0.) {
15101 			goto L20;
15102 		    }
15103 		    b1 = b2;
15104 		    if (z__[i4] > z__[i4 - 2]) {
15105 			return 0;
15106 		    }
15107 		    b2 *= z__[i4] / z__[i4 - 2];
15108 		    a2 += b2;
15109 		    if (std::max(b2,b1) * 100. < a2 || .563 < a2) {
15110 			goto L20;
15111 		    }
15112 /* L10: */
15113 		}
15114 L20:
15115 		a2 *= 1.05;
15116 
15117 /*              Rayleigh quotient residual bound. */
15118 
15119 		if (a2 < .563) {
15120 		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
15121 		}
15122 	    }
15123 	} else if (*dmin__ == *dn2) {
15124 
15125 /*           Case 5. */
15126 
15127 	    *ttype = -5;
15128 	    s = *dmin__ * .25;
15129 
15130 /*           Compute contribution to norm squared from I > NN-2. */
15131 
15132 	    np = nn - (*pp << 1);
15133 	    b1 = z__[np - 2];
15134 	    b2 = z__[np - 6];
15135 	    gam = *dn2;
15136 	    if (z__[np - 8] > b2 || z__[np - 4] > b1) {
15137 		return 0;
15138 	    }
15139 	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
15140 
15141 /*           Approximate contribution to norm squared from I < NN-2. */
15142 
15143 	    if (*n0 - *i0 > 2) {
15144 		b2 = z__[nn - 13] / z__[nn - 15];
15145 		a2 += b2;
15146 		i__1 = (*i0 << 2) - 1 + *pp;
15147 		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
15148 		    if (b2 == 0.) {
15149 			goto L40;
15150 		    }
15151 		    b1 = b2;
15152 		    if (z__[i4] > z__[i4 - 2]) {
15153 			return 0;
15154 		    }
15155 		    b2 *= z__[i4] / z__[i4 - 2];
15156 		    a2 += b2;
15157 		    if (std::max(b2,b1) * 100. < a2 || .563 < a2) {
15158 			goto L40;
15159 		    }
15160 /* L30: */
15161 		}
15162 L40:
15163 		a2 *= 1.05;
15164 	    }
15165 
15166 	    if (a2 < .563) {
15167 		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
15168 	    }
15169 	} else {
15170 
15171 /*           Case 6, no information to guide us. */
15172 
15173 	    if (*ttype == -6) {
15174 		*g += (1. - *g) * .333;
15175 	    } else if (*ttype == -18) {
15176 		*g = .083250000000000005;
15177 	    } else {
15178 		*g = .25;
15179 	    }
15180 	    s = *g * *dmin__;
15181 	    *ttype = -6;
15182 	}
15183 
15184     } else if (*n0in == *n0 + 1) {
15185 
15186 /*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
15187 
15188 	if (*dmin1 == *dn1 && *dmin2 == *dn2) {
15189 
15190 /*           Cases 7 and 8. */
15191 
15192 	    *ttype = -7;
15193 	    s = *dmin1 * .333;
15194 	    if (z__[nn - 5] > z__[nn - 7]) {
15195 		return 0;
15196 	    }
15197 	    b1 = z__[nn - 5] / z__[nn - 7];
15198 	    b2 = b1;
15199 	    if (b2 == 0.) {
15200 		goto L60;
15201 	    }
15202 	    i__1 = (*i0 << 2) - 1 + *pp;
15203 	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
15204 		a2 = b1;
15205 		if (z__[i4] > z__[i4 - 2]) {
15206 		    return 0;
15207 		}
15208 		b1 *= z__[i4] / z__[i4 - 2];
15209 		b2 += b1;
15210 		if (std::max(b1,a2) * 100. < b2) {
15211 		    goto L60;
15212 		}
15213 /* L50: */
15214 	    }
15215 L60:
15216 	    b2 = sqrt(b2 * 1.05);
15217 /* Computing 2nd power */
15218 	    d__1 = b2;
15219 	    a2 = *dmin1 / (d__1 * d__1 + 1.);
15220 	    gap2 = *dmin2 * .5 - a2;
15221 	    if (gap2 > 0. && gap2 > b2 * a2) {
15222 /* Computing MAX */
15223 		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
15224 		s = std::max(d__1,d__2);
15225 	    } else {
15226 /* Computing MAX */
15227 		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
15228 		s = std::max(d__1,d__2);
15229 		*ttype = -8;
15230 	    }
15231 	} else {
15232 
15233 /*           Case 9. */
15234 
15235 	    s = *dmin1 * .25;
15236 	    if (*dmin1 == *dn1) {
15237 		s = *dmin1 * .5;
15238 	    }
15239 	    *ttype = -9;
15240 	}
15241 
15242     } else if (*n0in == *n0 + 2) {
15243 
15244 /*        Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
15245 
15246 /*        Cases 10 and 11. */
15247 
15248 	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
15249 	    *ttype = -10;
15250 	    s = *dmin2 * .333;
15251 	    if (z__[nn - 5] > z__[nn - 7]) {
15252 		return 0;
15253 	    }
15254 	    b1 = z__[nn - 5] / z__[nn - 7];
15255 	    b2 = b1;
15256 	    if (b2 == 0.) {
15257 		goto L80;
15258 	    }
15259 	    i__1 = (*i0 << 2) - 1 + *pp;
15260 	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
15261 		if (z__[i4] > z__[i4 - 2]) {
15262 		    return 0;
15263 		}
15264 		b1 *= z__[i4] / z__[i4 - 2];
15265 		b2 += b1;
15266 		if (b1 * 100. < b2) {
15267 		    goto L80;
15268 		}
15269 /* L70: */
15270 	    }
15271 L80:
15272 	    b2 = sqrt(b2 * 1.05);
15273 /* Computing 2nd power */
15274 	    d__1 = b2;
15275 	    a2 = *dmin2 / (d__1 * d__1 + 1.);
15276 	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
15277 		    nn - 9]) - a2;
15278 	    if (gap2 > 0. && gap2 > b2 * a2) {
15279 /* Computing MAX */
15280 		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
15281 		s = std::max(d__1,d__2);
15282 	    } else {
15283 /* Computing MAX */
15284 		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
15285 		s = std::max(d__1,d__2);
15286 	    }
15287 	} else {
15288 	    s = *dmin2 * .25;
15289 	    *ttype = -11;
15290 	}
15291     } else if (*n0in > *n0 + 2) {
15292 
15293 /*        Case 12, more than two eigenvalues deflated. No information. */
15294 
15295 	s = 0.;
15296 	*ttype = -12;
15297     }
15298 
15299     *tau = s;
15300     return 0;
15301 
15302 /*     End of DLASQ4 */
15303 
15304 } /* dlasq4_ */
15305 
dlasq5_(integer * i0,integer * n0,double * z__,integer * pp,double * tau,double * dmin__,double * dmin1,double * dmin2,double * dn,double * dnm1,double * dnm2,bool * ieee)15306 /* Subroutine */ int dlasq5_(integer *i0, integer *n0, double *z__,
15307 	integer *pp, double *tau, double *dmin__, double *dmin1,
15308 	double *dmin2, double *dn, double *dnm1, double *dnm2,
15309 	bool *ieee)
15310 {
15311     /* System generated locals */
15312     integer i__1;
15313     double d__1, d__2;
15314 
15315     /* Local variables */
15316     double d__;
15317     integer j4, j4p2;
15318     double emin, temp;
15319 
15320 
15321 /*  -- LAPACK auxiliary routine (version 3.1) -- */
15322 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
15323 /*     November 2006 */
15324 
15325 /*     .. Scalar Arguments .. */
15326 /*     .. */
15327 /*     .. Array Arguments .. */
15328 /*     .. */
15329 
15330 /*  Purpose */
15331 /*  ======= */
15332 
15333 /*  DLASQ5 computes one dqds transform in ping-pong form, one */
15334 /*  version for IEEE machines another for non IEEE machines. */
15335 
15336 /*  Arguments */
15337 /*  ========= */
15338 
15339 /*  I0    (input) INTEGER */
15340 /*        First index. */
15341 
15342 /*  N0    (input) INTEGER */
15343 /*        Last index. */
15344 
15345 /*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
15346 /*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
15347 /*        an extra argument. */
15348 
15349 /*  PP    (input) INTEGER */
15350 /*        PP=0 for ping, PP=1 for pong. */
15351 
15352 /*  TAU   (input) DOUBLE PRECISION */
15353 /*        This is the shift. */
15354 
15355 /*  DMIN  (output) DOUBLE PRECISION */
15356 /*        Minimum value of d. */
15357 
15358 /*  DMIN1 (output) DOUBLE PRECISION */
15359 /*        Minimum value of d, excluding D( N0 ). */
15360 
15361 /*  DMIN2 (output) DOUBLE PRECISION */
15362 /*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
15363 
15364 /*  DN    (output) DOUBLE PRECISION */
15365 /*        d(N0), the last value of d. */
15366 
15367 /*  DNM1  (output) DOUBLE PRECISION */
15368 /*        d(N0-1). */
15369 
15370 /*  DNM2  (output) DOUBLE PRECISION */
15371 /*        d(N0-2). */
15372 
15373 /*  IEEE  (input) LOGICAL */
15374 /*        Flag for IEEE or non IEEE arithmetic. */
15375 
15376 /*  ===================================================================== */
15377 
15378 /*     .. Parameter .. */
15379 /*     .. */
15380 /*     .. Local Scalars .. */
15381 /*     .. */
15382 /*     .. Intrinsic Functions .. */
15383 /*     .. */
15384 /*     .. Executable Statements .. */
15385 
15386     /* Parameter adjustments */
15387     --z__;
15388 
15389     /* Function Body */
15390     if (*n0 - *i0 - 1 <= 0) {
15391 	return 0;
15392     }
15393 
15394     j4 = (*i0 << 2) + *pp - 3;
15395     emin = z__[j4 + 4];
15396     d__ = z__[j4] - *tau;
15397     *dmin__ = d__;
15398     *dmin1 = -z__[j4];
15399 
15400     if (*ieee) {
15401 
15402 /*        Code for IEEE arithmetic. */
15403 
15404 	if (*pp == 0) {
15405 	    i__1 = *n0 - 3 << 2;
15406 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
15407 		z__[j4 - 2] = d__ + z__[j4 - 1];
15408 		temp = z__[j4 + 1] / z__[j4 - 2];
15409 		d__ = d__ * temp - *tau;
15410 		*dmin__ = std::min(*dmin__,d__);
15411 		z__[j4] = z__[j4 - 1] * temp;
15412 /* Computing MIN */
15413 		d__1 = z__[j4];
15414 		emin = std::min(d__1,emin);
15415 /* L10: */
15416 	    }
15417 	} else {
15418 	    i__1 = *n0 - 3 << 2;
15419 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
15420 		z__[j4 - 3] = d__ + z__[j4];
15421 		temp = z__[j4 + 2] / z__[j4 - 3];
15422 		d__ = d__ * temp - *tau;
15423 		*dmin__ = std::min(*dmin__,d__);
15424 		z__[j4 - 1] = z__[j4] * temp;
15425 /* Computing MIN */
15426 		d__1 = z__[j4 - 1];
15427 		emin = std::min(d__1,emin);
15428 /* L20: */
15429 	    }
15430 	}
15431 
15432 /*        Unroll last two steps. */
15433 
15434 	*dnm2 = d__;
15435 	*dmin2 = *dmin__;
15436 	j4 = (*n0 - 2 << 2) - *pp;
15437 	j4p2 = j4 + (*pp << 1) - 1;
15438 	z__[j4 - 2] = *dnm2 + z__[j4p2];
15439 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
15440 	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
15441 	*dmin__ = std::min(*dmin__,*dnm1);
15442 
15443 	*dmin1 = *dmin__;
15444 	j4 += 4;
15445 	j4p2 = j4 + (*pp << 1) - 1;
15446 	z__[j4 - 2] = *dnm1 + z__[j4p2];
15447 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
15448 	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
15449 	*dmin__ = std::min(*dmin__,*dn);
15450 
15451     } else {
15452 
15453 /*        Code for non IEEE arithmetic. */
15454 
15455 	if (*pp == 0) {
15456 	    i__1 = *n0 - 3 << 2;
15457 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
15458 		z__[j4 - 2] = d__ + z__[j4 - 1];
15459 		if (d__ < 0.) {
15460 		    return 0;
15461 		} else {
15462 		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
15463 		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
15464 		}
15465 		*dmin__ = std::min(*dmin__,d__);
15466 /* Computing MIN */
15467 		d__1 = emin, d__2 = z__[j4];
15468 		emin = std::min(d__1,d__2);
15469 /* L30: */
15470 	    }
15471 	} else {
15472 	    i__1 = *n0 - 3 << 2;
15473 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
15474 		z__[j4 - 3] = d__ + z__[j4];
15475 		if (d__ < 0.) {
15476 		    return 0;
15477 		} else {
15478 		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
15479 		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
15480 		}
15481 		*dmin__ = std::min(*dmin__,d__);
15482 /* Computing MIN */
15483 		d__1 = emin, d__2 = z__[j4 - 1];
15484 		emin = std::min(d__1,d__2);
15485 /* L40: */
15486 	    }
15487 	}
15488 
15489 /*        Unroll last two steps. */
15490 
15491 	*dnm2 = d__;
15492 	*dmin2 = *dmin__;
15493 	j4 = (*n0 - 2 << 2) - *pp;
15494 	j4p2 = j4 + (*pp << 1) - 1;
15495 	z__[j4 - 2] = *dnm2 + z__[j4p2];
15496 	if (*dnm2 < 0.) {
15497 	    return 0;
15498 	} else {
15499 	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
15500 	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
15501 	}
15502 	*dmin__ = std::min(*dmin__,*dnm1);
15503 
15504 	*dmin1 = *dmin__;
15505 	j4 += 4;
15506 	j4p2 = j4 + (*pp << 1) - 1;
15507 	z__[j4 - 2] = *dnm1 + z__[j4p2];
15508 	if (*dnm1 < 0.) {
15509 	    return 0;
15510 	} else {
15511 	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
15512 	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
15513 	}
15514 	*dmin__ = std::min(*dmin__,*dn);
15515 
15516     }
15517 
15518     z__[j4 + 2] = *dn;
15519     z__[(*n0 << 2) - *pp] = emin;
15520     return 0;
15521 
15522 /*     End of DLASQ5 */
15523 
15524 } /* dlasq5_ */
15525 
dlasq6_(integer * i0,integer * n0,double * z__,integer * pp,double * dmin__,double * dmin1,double * dmin2,double * dn,double * dnm1,double * dnm2)15526 /* Subroutine */ int dlasq6_(integer *i0, integer *n0, double *z__,
15527 	integer *pp, double *dmin__, double *dmin1, double *dmin2,
15528 	double *dn, double *dnm1, double *dnm2)
15529 {
15530     /* System generated locals */
15531     integer i__1;
15532     double d__1, d__2;
15533 
15534     /* Local variables */
15535     double d__;
15536     integer j4, j4p2;
15537     double emin, temp;
15538 
15539     double safmin;
15540 
15541 
15542 /*  -- LAPACK auxiliary routine (version 3.1) -- */
15543 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
15544 /*     November 2006 */
15545 
15546 /*     .. Scalar Arguments .. */
15547 /*     .. */
15548 /*     .. Array Arguments .. */
15549 /*     .. */
15550 
15551 /*  Purpose */
15552 /*  ======= */
15553 
15554 /*  DLASQ6 computes one dqd (shift equal to zero) transform in */
15555 /*  ping-pong form, with protection against underflow and overflow. */
15556 
15557 /*  Arguments */
15558 /*  ========= */
15559 
15560 /*  I0    (input) INTEGER */
15561 /*        First index. */
15562 
15563 /*  N0    (input) INTEGER */
15564 /*        Last index. */
15565 
15566 /*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
15567 /*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
15568 /*        an extra argument. */
15569 
15570 /*  PP    (input) INTEGER */
15571 /*        PP=0 for ping, PP=1 for pong. */
15572 
15573 /*  DMIN  (output) DOUBLE PRECISION */
15574 /*        Minimum value of d. */
15575 
15576 /*  DMIN1 (output) DOUBLE PRECISION */
15577 /*        Minimum value of d, excluding D( N0 ). */
15578 
15579 /*  DMIN2 (output) DOUBLE PRECISION */
15580 /*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
15581 
15582 /*  DN    (output) DOUBLE PRECISION */
15583 /*        d(N0), the last value of d. */
15584 
15585 /*  DNM1  (output) DOUBLE PRECISION */
15586 /*        d(N0-1). */
15587 
15588 /*  DNM2  (output) DOUBLE PRECISION */
15589 /*        d(N0-2). */
15590 
15591 /*  ===================================================================== */
15592 
15593 /*     .. Parameter .. */
15594 /*     .. */
15595 /*     .. Local Scalars .. */
15596 /*     .. */
15597 /*     .. External Function .. */
15598 /*     .. */
15599 /*     .. Intrinsic Functions .. */
15600 /*     .. */
15601 /*     .. Executable Statements .. */
15602 
15603     /* Parameter adjustments */
15604     --z__;
15605 
15606     /* Function Body */
15607     if (*n0 - *i0 - 1 <= 0) {
15608 	return 0;
15609     }
15610 
15611     safmin = dlamch_("Safe minimum");
15612     j4 = (*i0 << 2) + *pp - 3;
15613     emin = z__[j4 + 4];
15614     d__ = z__[j4];
15615     *dmin__ = d__;
15616 
15617     if (*pp == 0) {
15618 	i__1 = *n0 - 3 << 2;
15619 	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
15620 	    z__[j4 - 2] = d__ + z__[j4 - 1];
15621 	    if (z__[j4 - 2] == 0.) {
15622 		z__[j4] = 0.;
15623 		d__ = z__[j4 + 1];
15624 		*dmin__ = d__;
15625 		emin = 0.;
15626 	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
15627 		    - 2] < z__[j4 + 1]) {
15628 		temp = z__[j4 + 1] / z__[j4 - 2];
15629 		z__[j4] = z__[j4 - 1] * temp;
15630 		d__ *= temp;
15631 	    } else {
15632 		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
15633 		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
15634 	    }
15635 	    *dmin__ = std::min(*dmin__,d__);
15636 /* Computing MIN */
15637 	    d__1 = emin, d__2 = z__[j4];
15638 	    emin = std::min(d__1,d__2);
15639 /* L10: */
15640 	}
15641     } else {
15642 	i__1 = *n0 - 3 << 2;
15643 	for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
15644 	    z__[j4 - 3] = d__ + z__[j4];
15645 	    if (z__[j4 - 3] == 0.) {
15646 		z__[j4 - 1] = 0.;
15647 		d__ = z__[j4 + 2];
15648 		*dmin__ = d__;
15649 		emin = 0.;
15650 	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
15651 		    - 3] < z__[j4 + 2]) {
15652 		temp = z__[j4 + 2] / z__[j4 - 3];
15653 		z__[j4 - 1] = z__[j4] * temp;
15654 		d__ *= temp;
15655 	    } else {
15656 		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
15657 		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
15658 	    }
15659 	    *dmin__ = std::min(*dmin__,d__);
15660 /* Computing MIN */
15661 	    d__1 = emin, d__2 = z__[j4 - 1];
15662 	    emin = std::min(d__1,d__2);
15663 /* L20: */
15664 	}
15665     }
15666 
15667 /*     Unroll last two steps. */
15668 
15669     *dnm2 = d__;
15670     *dmin2 = *dmin__;
15671     j4 = (*n0 - 2 << 2) - *pp;
15672     j4p2 = j4 + (*pp << 1) - 1;
15673     z__[j4 - 2] = *dnm2 + z__[j4p2];
15674     if (z__[j4 - 2] == 0.) {
15675 	z__[j4] = 0.;
15676 	*dnm1 = z__[j4p2 + 2];
15677 	*dmin__ = *dnm1;
15678 	emin = 0.;
15679     } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
15680 	    z__[j4p2 + 2]) {
15681 	temp = z__[j4p2 + 2] / z__[j4 - 2];
15682 	z__[j4] = z__[j4p2] * temp;
15683 	*dnm1 = *dnm2 * temp;
15684     } else {
15685 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
15686 	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
15687     }
15688     *dmin__ = std::min(*dmin__,*dnm1);
15689 
15690     *dmin1 = *dmin__;
15691     j4 += 4;
15692     j4p2 = j4 + (*pp << 1) - 1;
15693     z__[j4 - 2] = *dnm1 + z__[j4p2];
15694     if (z__[j4 - 2] == 0.) {
15695 	z__[j4] = 0.;
15696 	*dn = z__[j4p2 + 2];
15697 	*dmin__ = *dn;
15698 	emin = 0.;
15699     } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
15700 	    z__[j4p2 + 2]) {
15701 	temp = z__[j4p2 + 2] / z__[j4 - 2];
15702 	z__[j4] = z__[j4p2] * temp;
15703 	*dn = *dnm1 * temp;
15704     } else {
15705 	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
15706 	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
15707     }
15708     *dmin__ = std::min(*dmin__,*dn);
15709 
15710     z__[j4 + 2] = *dn;
15711     z__[(*n0 << 2) - *pp] = emin;
15712     return 0;
15713 
15714 /*     End of DLASQ6 */
15715 
15716 } /* dlasq6_ */
15717 
dlasr_(const char * side,const char * pivot,const char * direct,integer * m,integer * n,double * c__,double * s,double * a,integer * lda)15718 /* Subroutine */ int dlasr_(const char *side, const char *pivot, const char *direct, integer *m,
15719 	integer *n, double *c__, double *s, double *a, integer *lda)
15720 {
15721     /* System generated locals */
15722     integer a_dim1, a_offset, i__1, i__2;
15723 
15724     /* Local variables */
15725     integer i__, j, info;
15726     double temp;
15727 
15728     double ctemp, stemp;
15729 
15730 
15731 
15732 /*  -- LAPACK auxiliary routine (version 3.1) -- */
15733 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
15734 /*     November 2006 */
15735 
15736 /*     .. Scalar Arguments .. */
15737 /*     .. */
15738 /*     .. Array Arguments .. */
15739 /*     .. */
15740 
15741 /*  Purpose */
15742 /*  ======= */
15743 
15744 /*  DLASR applies a sequence of plane rotations to a real matrix A, */
15745 /*  from either the left or the right. */
15746 
15747 /*  When SIDE = 'L', the transformation takes the form */
15748 
15749 /*     A := P*A */
15750 
15751 /*  and when SIDE = 'R', the transformation takes the form */
15752 
15753 /*     A := A*P**T */
15754 
15755 /*  where P is an orthogonal matrix consisting of a sequence of z plane */
15756 /*  rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', */
15757 /*  and P**T is the transpose of P. */
15758 
15759 /*  When DIRECT = 'F' (Forward sequence), then */
15760 
15761 /*     P = P(z-1) * ... * P(2) * P(1) */
15762 
15763 /*  and when DIRECT = 'B' (Backward sequence), then */
15764 
15765 /*     P = P(1) * P(2) * ... * P(z-1) */
15766 
15767 /*  where P(k) is a plane rotation matrix defined by the 2-by-2 rotation */
15768 
15769 /*     R(k) = (  c(k)  s(k) ) */
15770 /*          = ( -s(k)  c(k) ). */
15771 
15772 /*  When PIVOT = 'V' (Variable pivot), the rotation is performed */
15773 /*  for the plane (k,k+1), i.e., P(k) has the form */
15774 
15775 /*     P(k) = (  1                                            ) */
15776 /*            (       ...                                     ) */
15777 /*            (              1                                ) */
15778 /*            (                   c(k)  s(k)                  ) */
15779 /*            (                  -s(k)  c(k)                  ) */
15780 /*            (                                1              ) */
15781 /*            (                                     ...       ) */
15782 /*            (                                            1  ) */
15783 
15784 /*  where R(k) appears as a rank-2 modification to the identity matrix in */
15785 /*  rows and columns k and k+1. */
15786 
15787 /*  When PIVOT = 'T' (Top pivot), the rotation is performed for the */
15788 /*  plane (1,k+1), so P(k) has the form */
15789 
15790 /*     P(k) = (  c(k)                    s(k)                 ) */
15791 /*            (         1                                     ) */
15792 /*            (              ...                              ) */
15793 /*            (                     1                         ) */
15794 /*            ( -s(k)                    c(k)                 ) */
15795 /*            (                                 1             ) */
15796 /*            (                                      ...      ) */
15797 /*            (                                             1 ) */
15798 
15799 /*  where R(k) appears in rows and columns 1 and k+1. */
15800 
15801 /*  Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is */
15802 /*  performed for the plane (k,z), giving P(k) the form */
15803 
15804 /*     P(k) = ( 1                                             ) */
15805 /*            (      ...                                      ) */
15806 /*            (             1                                 ) */
15807 /*            (                  c(k)                    s(k) ) */
15808 /*            (                         1                     ) */
15809 /*            (                              ...              ) */
15810 /*            (                                     1         ) */
15811 /*            (                 -s(k)                    c(k) ) */
15812 
15813 /*  where R(k) appears in rows and columns k and z.  The rotations are */
15814 /*  performed without ever forming P(k) explicitly. */
15815 
15816 /*  Arguments */
15817 /*  ========= */
15818 
15819 /*  SIDE    (input) CHARACTER*1 */
15820 /*          Specifies whether the plane rotation matrix P is applied to */
15821 /*          A on the left or the right. */
15822 /*          = 'L':  Left, compute A := P*A */
15823 /*          = 'R':  Right, compute A:= A*P**T */
15824 
15825 /*  PIVOT   (input) CHARACTER*1 */
15826 /*          Specifies the plane for which P(k) is a plane rotation */
15827 /*          matrix. */
15828 /*          = 'V':  Variable pivot, the plane (k,k+1) */
15829 /*          = 'T':  Top pivot, the plane (1,k+1) */
15830 /*          = 'B':  Bottom pivot, the plane (k,z) */
15831 
15832 /*  DIRECT  (input) CHARACTER*1 */
15833 /*          Specifies whether P is a forward or backward sequence of */
15834 /*          plane rotations. */
15835 /*          = 'F':  Forward, P = P(z-1)*...*P(2)*P(1) */
15836 /*          = 'B':  Backward, P = P(1)*P(2)*...*P(z-1) */
15837 
15838 /*  M       (input) INTEGER */
15839 /*          The number of rows of the matrix A.  If m <= 1, an immediate */
15840 /*          return is effected. */
15841 
15842 /*  N       (input) INTEGER */
15843 /*          The number of columns of the matrix A.  If n <= 1, an */
15844 /*          immediate return is effected. */
15845 
15846 /*  C       (input) DOUBLE PRECISION array, dimension */
15847 /*                  (M-1) if SIDE = 'L' */
15848 /*                  (N-1) if SIDE = 'R' */
15849 /*          The cosines c(k) of the plane rotations. */
15850 
15851 /*  S       (input) DOUBLE PRECISION array, dimension */
15852 /*                  (M-1) if SIDE = 'L' */
15853 /*                  (N-1) if SIDE = 'R' */
15854 /*          The sines s(k) of the plane rotations.  The 2-by-2 plane */
15855 /*          rotation part of the matrix P(k), R(k), has the form */
15856 /*          R(k) = (  c(k)  s(k) ) */
15857 /*                 ( -s(k)  c(k) ). */
15858 
15859 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
15860 /*          The M-by-N matrix A.  On exit, A is overwritten by P*A if */
15861 /*          SIDE = 'R' or by A*P**T if SIDE = 'L'. */
15862 
15863 /*  LDA     (input) INTEGER */
15864 /*          The leading dimension of the array A.  LDA >= max(1,M). */
15865 
15866 /*  ===================================================================== */
15867 
15868 /*     .. Parameters .. */
15869 /*     .. */
15870 /*     .. Local Scalars .. */
15871 /*     .. */
15872 /*     .. External Functions .. */
15873 /*     .. */
15874 /*     .. External Subroutines .. */
15875 /*     .. */
15876 /*     .. Intrinsic Functions .. */
15877 /*     .. */
15878 /*     .. Executable Statements .. */
15879 
15880 /*     Test the input parameters */
15881 
15882     /* Parameter adjustments */
15883     --c__;
15884     --s;
15885     a_dim1 = *lda;
15886     a_offset = 1 + a_dim1;
15887     a -= a_offset;
15888 
15889     /* Function Body */
15890     info = 0;
15891     if (! (lsame_(side, "L") || lsame_(side, "R"))) {
15892 	info = 1;
15893     } else if (! (lsame_(pivot, "V") || lsame_(pivot,
15894 	    "T") || lsame_(pivot, "B"))) {
15895 	info = 2;
15896     } else if (! (lsame_(direct, "F") || lsame_(direct,
15897 	    "B"))) {
15898 	info = 3;
15899     } else if (*m < 0) {
15900 	info = 4;
15901     } else if (*n < 0) {
15902 	info = 5;
15903     } else if (*lda < std::max(1_integer,*m)) {
15904 	info = 9;
15905     }
15906     if (info != 0) {
15907 	xerbla_("DLASR ", &info);
15908 	return 0;
15909     }
15910 
15911 /*     Quick return if possible */
15912 
15913     if (*m == 0 || *n == 0) {
15914 	return 0;
15915     }
15916     if (lsame_(side, "L")) {
15917 
15918 /*        Form  P * A */
15919 
15920 	if (lsame_(pivot, "V")) {
15921 	    if (lsame_(direct, "F")) {
15922 		i__1 = *m - 1;
15923 		for (j = 1; j <= i__1; ++j) {
15924 		    ctemp = c__[j];
15925 		    stemp = s[j];
15926 		    if (ctemp != 1. || stemp != 0.) {
15927 			i__2 = *n;
15928 			for (i__ = 1; i__ <= i__2; ++i__) {
15929 			    temp = a[j + 1 + i__ * a_dim1];
15930 			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
15931 				    a[j + i__ * a_dim1];
15932 			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
15933 				    + i__ * a_dim1];
15934 /* L10: */
15935 			}
15936 		    }
15937 /* L20: */
15938 		}
15939 	    } else if (lsame_(direct, "B")) {
15940 		for (j = *m - 1; j >= 1; --j) {
15941 		    ctemp = c__[j];
15942 		    stemp = s[j];
15943 		    if (ctemp != 1. || stemp != 0.) {
15944 			i__1 = *n;
15945 			for (i__ = 1; i__ <= i__1; ++i__) {
15946 			    temp = a[j + 1 + i__ * a_dim1];
15947 			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
15948 				    a[j + i__ * a_dim1];
15949 			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
15950 				    + i__ * a_dim1];
15951 /* L30: */
15952 			}
15953 		    }
15954 /* L40: */
15955 		}
15956 	    }
15957 	} else if (lsame_(pivot, "T")) {
15958 	    if (lsame_(direct, "F")) {
15959 		i__1 = *m;
15960 		for (j = 2; j <= i__1; ++j) {
15961 		    ctemp = c__[j - 1];
15962 		    stemp = s[j - 1];
15963 		    if (ctemp != 1. || stemp != 0.) {
15964 			i__2 = *n;
15965 			for (i__ = 1; i__ <= i__2; ++i__) {
15966 			    temp = a[j + i__ * a_dim1];
15967 			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
15968 				    i__ * a_dim1 + 1];
15969 			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
15970 				    i__ * a_dim1 + 1];
15971 /* L50: */
15972 			}
15973 		    }
15974 /* L60: */
15975 		}
15976 	    } else if (lsame_(direct, "B")) {
15977 		for (j = *m; j >= 2; --j) {
15978 		    ctemp = c__[j - 1];
15979 		    stemp = s[j - 1];
15980 		    if (ctemp != 1. || stemp != 0.) {
15981 			i__1 = *n;
15982 			for (i__ = 1; i__ <= i__1; ++i__) {
15983 			    temp = a[j + i__ * a_dim1];
15984 			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
15985 				    i__ * a_dim1 + 1];
15986 			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
15987 				    i__ * a_dim1 + 1];
15988 /* L70: */
15989 			}
15990 		    }
15991 /* L80: */
15992 		}
15993 	    }
15994 	} else if (lsame_(pivot, "B")) {
15995 	    if (lsame_(direct, "F")) {
15996 		i__1 = *m - 1;
15997 		for (j = 1; j <= i__1; ++j) {
15998 		    ctemp = c__[j];
15999 		    stemp = s[j];
16000 		    if (ctemp != 1. || stemp != 0.) {
16001 			i__2 = *n;
16002 			for (i__ = 1; i__ <= i__2; ++i__) {
16003 			    temp = a[j + i__ * a_dim1];
16004 			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
16005 				     + ctemp * temp;
16006 			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
16007 				    a_dim1] - stemp * temp;
16008 /* L90: */
16009 			}
16010 		    }
16011 /* L100: */
16012 		}
16013 	    } else if (lsame_(direct, "B")) {
16014 		for (j = *m - 1; j >= 1; --j) {
16015 		    ctemp = c__[j];
16016 		    stemp = s[j];
16017 		    if (ctemp != 1. || stemp != 0.) {
16018 			i__1 = *n;
16019 			for (i__ = 1; i__ <= i__1; ++i__) {
16020 			    temp = a[j + i__ * a_dim1];
16021 			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
16022 				     + ctemp * temp;
16023 			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
16024 				    a_dim1] - stemp * temp;
16025 /* L110: */
16026 			}
16027 		    }
16028 /* L120: */
16029 		}
16030 	    }
16031 	}
16032     } else if (lsame_(side, "R")) {
16033 
16034 /*        Form A * P' */
16035 
16036 	if (lsame_(pivot, "V")) {
16037 	    if (lsame_(direct, "F")) {
16038 		i__1 = *n - 1;
16039 		for (j = 1; j <= i__1; ++j) {
16040 		    ctemp = c__[j];
16041 		    stemp = s[j];
16042 		    if (ctemp != 1. || stemp != 0.) {
16043 			i__2 = *m;
16044 			for (i__ = 1; i__ <= i__2; ++i__) {
16045 			    temp = a[i__ + (j + 1) * a_dim1];
16046 			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
16047 				     a[i__ + j * a_dim1];
16048 			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
16049 				    i__ + j * a_dim1];
16050 /* L130: */
16051 			}
16052 		    }
16053 /* L140: */
16054 		}
16055 	    } else if (lsame_(direct, "B")) {
16056 		for (j = *n - 1; j >= 1; --j) {
16057 		    ctemp = c__[j];
16058 		    stemp = s[j];
16059 		    if (ctemp != 1. || stemp != 0.) {
16060 			i__1 = *m;
16061 			for (i__ = 1; i__ <= i__1; ++i__) {
16062 			    temp = a[i__ + (j + 1) * a_dim1];
16063 			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
16064 				     a[i__ + j * a_dim1];
16065 			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
16066 				    i__ + j * a_dim1];
16067 /* L150: */
16068 			}
16069 		    }
16070 /* L160: */
16071 		}
16072 	    }
16073 	} else if (lsame_(pivot, "T")) {
16074 	    if (lsame_(direct, "F")) {
16075 		i__1 = *n;
16076 		for (j = 2; j <= i__1; ++j) {
16077 		    ctemp = c__[j - 1];
16078 		    stemp = s[j - 1];
16079 		    if (ctemp != 1. || stemp != 0.) {
16080 			i__2 = *m;
16081 			for (i__ = 1; i__ <= i__2; ++i__) {
16082 			    temp = a[i__ + j * a_dim1];
16083 			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
16084 				    i__ + a_dim1];
16085 			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
16086 				    a_dim1];
16087 /* L170: */
16088 			}
16089 		    }
16090 /* L180: */
16091 		}
16092 	    } else if (lsame_(direct, "B")) {
16093 		for (j = *n; j >= 2; --j) {
16094 		    ctemp = c__[j - 1];
16095 		    stemp = s[j - 1];
16096 		    if (ctemp != 1. || stemp != 0.) {
16097 			i__1 = *m;
16098 			for (i__ = 1; i__ <= i__1; ++i__) {
16099 			    temp = a[i__ + j * a_dim1];
16100 			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
16101 				    i__ + a_dim1];
16102 			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
16103 				    a_dim1];
16104 /* L190: */
16105 			}
16106 		    }
16107 /* L200: */
16108 		}
16109 	    }
16110 	} else if (lsame_(pivot, "B")) {
16111 	    if (lsame_(direct, "F")) {
16112 		i__1 = *n - 1;
16113 		for (j = 1; j <= i__1; ++j) {
16114 		    ctemp = c__[j];
16115 		    stemp = s[j];
16116 		    if (ctemp != 1. || stemp != 0.) {
16117 			i__2 = *m;
16118 			for (i__ = 1; i__ <= i__2; ++i__) {
16119 			    temp = a[i__ + j * a_dim1];
16120 			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
16121 				     + ctemp * temp;
16122 			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
16123 				    a_dim1] - stemp * temp;
16124 /* L210: */
16125 			}
16126 		    }
16127 /* L220: */
16128 		}
16129 	    } else if (lsame_(direct, "B")) {
16130 		for (j = *n - 1; j >= 1; --j) {
16131 		    ctemp = c__[j];
16132 		    stemp = s[j];
16133 		    if (ctemp != 1. || stemp != 0.) {
16134 			i__1 = *m;
16135 			for (i__ = 1; i__ <= i__1; ++i__) {
16136 			    temp = a[i__ + j * a_dim1];
16137 			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
16138 				     + ctemp * temp;
16139 			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
16140 				    a_dim1] - stemp * temp;
16141 /* L230: */
16142 			}
16143 		    }
16144 /* L240: */
16145 		}
16146 	    }
16147 	}
16148     }
16149 
16150     return 0;
16151 
16152 /*     End of DLASR */
16153 
16154 } /* dlasr_ */
16155 
dlasrt_(const char * id,integer * n,double * d__,integer * info)16156 /* Subroutine */ int dlasrt_(const char *id, integer *n, double *d__, integer *info)
16157 {
16158     /* System generated locals */
16159     integer i__1, i__2;
16160 
16161     /* Local variables */
16162     integer i__, j;
16163     double d1, d2, d3;
16164     integer dir;
16165     double tmp;
16166     integer endd;
16167 
16168     integer stack[64]	/* was [2][32] */;
16169     double dmnmx;
16170     integer start;
16171 
16172     integer stkpnt;
16173 
16174 
16175 /*  -- LAPACK routine (version 3.1) -- */
16176 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
16177 /*     November 2006 */
16178 
16179 /*     .. Scalar Arguments .. */
16180 /*     .. */
16181 /*     .. Array Arguments .. */
16182 /*     .. */
16183 
16184 /*  Purpose */
16185 /*  ======= */
16186 
16187 /*  Sort the numbers in D in increasing order (if ID = 'I') or */
16188 /*  in decreasing order (if ID = 'D' ). */
16189 
16190 /*  Use Quick Sort, reverting to Insertion sort on arrays of */
16191 /*  size <= 20. Dimension of STACK limits N to about 2**32. */
16192 
16193 /*  Arguments */
16194 /*  ========= */
16195 
16196 /*  ID      (input) CHARACTER*1 */
16197 /*          = 'I': sort D in increasing order; */
16198 /*          = 'D': sort D in decreasing order. */
16199 
16200 /*  N       (input) INTEGER */
16201 /*          The length of the array D. */
16202 
16203 /*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
16204 /*          On entry, the array to be sorted. */
16205 /*          On exit, D has been sorted into increasing order */
16206 /*          (D(1) <= ... <= D(N) ) or into decreasing order */
16207 /*          (D(1) >= ... >= D(N) ), depending on ID. */
16208 
16209 /*  INFO    (output) INTEGER */
16210 /*          = 0:  successful exit */
16211 /*          < 0:  if INFO = -i, the i-th argument had an illegal value */
16212 
16213 /*  ===================================================================== */
16214 
16215 /*     .. Parameters .. */
16216 /*     .. */
16217 /*     .. Local Scalars .. */
16218 /*     .. */
16219 /*     .. Local Arrays .. */
16220 /*     .. */
16221 /*     .. External Functions .. */
16222 /*     .. */
16223 /*     .. External Subroutines .. */
16224 /*     .. */
16225 /*     .. Executable Statements .. */
16226 
16227 /*     Test the input paramters. */
16228 
16229     /* Parameter adjustments */
16230     --d__;
16231 
16232     /* Function Body */
16233     *info = 0;
16234     dir = -1;
16235     if (lsame_(id, "D")) {
16236 	dir = 0;
16237     } else if (lsame_(id, "I")) {
16238 	dir = 1;
16239     }
16240     if (dir == -1) {
16241 	*info = -1;
16242     } else if (*n < 0) {
16243 	*info = -2;
16244     }
16245     if (*info != 0) {
16246 	i__1 = -(*info);
16247 	xerbla_("DLASRT", &i__1);
16248 	return 0;
16249     }
16250 
16251 /*     Quick return if possible */
16252 
16253     if (*n <= 1) {
16254 	return 0;
16255     }
16256 
16257     stkpnt = 1;
16258     stack[0] = 1;
16259     stack[1] = *n;
16260 L10:
16261     start = stack[(stkpnt << 1) - 2];
16262     endd = stack[(stkpnt << 1) - 1];
16263     --stkpnt;
16264     if (endd - start <= 20 && endd - start > 0) {
16265 
16266 /*        Do Insertion sort on D( START:ENDD ) */
16267 
16268 	if (dir == 0) {
16269 
16270 /*           Sort into decreasing order */
16271 
16272 	    i__1 = endd;
16273 	    for (i__ = start + 1; i__ <= i__1; ++i__) {
16274 		i__2 = start + 1;
16275 		for (j = i__; j >= i__2; --j) {
16276 		    if (d__[j] > d__[j - 1]) {
16277 			dmnmx = d__[j];
16278 			d__[j] = d__[j - 1];
16279 			d__[j - 1] = dmnmx;
16280 		    } else {
16281 			goto L30;
16282 		    }
16283 /* L20: */
16284 		}
16285 L30:
16286 		;
16287 	    }
16288 
16289 	} else {
16290 
16291 /*           Sort into increasing order */
16292 
16293 	    i__1 = endd;
16294 	    for (i__ = start + 1; i__ <= i__1; ++i__) {
16295 		i__2 = start + 1;
16296 		for (j = i__; j >= i__2; --j) {
16297 		    if (d__[j] < d__[j - 1]) {
16298 			dmnmx = d__[j];
16299 			d__[j] = d__[j - 1];
16300 			d__[j - 1] = dmnmx;
16301 		    } else {
16302 			goto L50;
16303 		    }
16304 /* L40: */
16305 		}
16306 L50:
16307 		;
16308 	    }
16309 
16310 	}
16311 
16312     } else if (endd - start > 20) {
16313 
16314 /*        Partition D( START:ENDD ) and stack parts, largest one first */
16315 
16316 /*        Choose partition entry as median of 3 */
16317 
16318 	d1 = d__[start];
16319 	d2 = d__[endd];
16320 	i__ = (start + endd) / 2;
16321 	d3 = d__[i__];
16322 	if (d1 < d2) {
16323 	    if (d3 < d1) {
16324 		dmnmx = d1;
16325 	    } else if (d3 < d2) {
16326 		dmnmx = d3;
16327 	    } else {
16328 		dmnmx = d2;
16329 	    }
16330 	} else {
16331 	    if (d3 < d2) {
16332 		dmnmx = d2;
16333 	    } else if (d3 < d1) {
16334 		dmnmx = d3;
16335 	    } else {
16336 		dmnmx = d1;
16337 	    }
16338 	}
16339 
16340 	if (dir == 0) {
16341 
16342 /*           Sort into decreasing order */
16343 
16344 	    i__ = start - 1;
16345 	    j = endd + 1;
16346 L60:
16347 L70:
16348 	    --j;
16349 	    if (d__[j] < dmnmx) {
16350 		goto L70;
16351 	    }
16352 L80:
16353 	    ++i__;
16354 	    if (d__[i__] > dmnmx) {
16355 		goto L80;
16356 	    }
16357 	    if (i__ < j) {
16358 		tmp = d__[i__];
16359 		d__[i__] = d__[j];
16360 		d__[j] = tmp;
16361 		goto L60;
16362 	    }
16363 	    if (j - start > endd - j - 1) {
16364 		++stkpnt;
16365 		stack[(stkpnt << 1) - 2] = start;
16366 		stack[(stkpnt << 1) - 1] = j;
16367 		++stkpnt;
16368 		stack[(stkpnt << 1) - 2] = j + 1;
16369 		stack[(stkpnt << 1) - 1] = endd;
16370 	    } else {
16371 		++stkpnt;
16372 		stack[(stkpnt << 1) - 2] = j + 1;
16373 		stack[(stkpnt << 1) - 1] = endd;
16374 		++stkpnt;
16375 		stack[(stkpnt << 1) - 2] = start;
16376 		stack[(stkpnt << 1) - 1] = j;
16377 	    }
16378 	} else {
16379 
16380 /*           Sort into increasing order */
16381 
16382 	    i__ = start - 1;
16383 	    j = endd + 1;
16384 L90:
16385 L100:
16386 	    --j;
16387 	    if (d__[j] > dmnmx) {
16388 		goto L100;
16389 	    }
16390 L110:
16391 	    ++i__;
16392 	    if (d__[i__] < dmnmx) {
16393 		goto L110;
16394 	    }
16395 	    if (i__ < j) {
16396 		tmp = d__[i__];
16397 		d__[i__] = d__[j];
16398 		d__[j] = tmp;
16399 		goto L90;
16400 	    }
16401 	    if (j - start > endd - j - 1) {
16402 		++stkpnt;
16403 		stack[(stkpnt << 1) - 2] = start;
16404 		stack[(stkpnt << 1) - 1] = j;
16405 		++stkpnt;
16406 		stack[(stkpnt << 1) - 2] = j + 1;
16407 		stack[(stkpnt << 1) - 1] = endd;
16408 	    } else {
16409 		++stkpnt;
16410 		stack[(stkpnt << 1) - 2] = j + 1;
16411 		stack[(stkpnt << 1) - 1] = endd;
16412 		++stkpnt;
16413 		stack[(stkpnt << 1) - 2] = start;
16414 		stack[(stkpnt << 1) - 1] = j;
16415 	    }
16416 	}
16417     }
16418     if (stkpnt > 0) {
16419 	goto L10;
16420     }
16421     return 0;
16422 
16423 /*     End of DLASRT */
16424 
16425 } /* dlasrt_ */
16426 
dlassq_(integer * n,double * x,integer * incx,double * scale,double * sumsq)16427 /* Subroutine */ int dlassq_(integer *n, double *x, integer *incx,
16428 	double *scale, double *sumsq)
16429 {
16430     /* System generated locals */
16431     integer i__1, i__2;
16432     double d__1;
16433 
16434     /* Local variables */
16435     integer ix;
16436     double absxi;
16437 
16438 
16439 /*  -- LAPACK auxiliary routine (version 3.1) -- */
16440 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
16441 /*     November 2006 */
16442 
16443 /*     .. Scalar Arguments .. */
16444 /*     .. */
16445 /*     .. Array Arguments .. */
16446 /*     .. */
16447 
16448 /*  Purpose */
16449 /*  ======= */
16450 
16451 /*  DLASSQ  returns the values  scl  and  smsq  such that */
16452 
16453 /*     ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, */
16454 
16455 /*  where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is */
16456 /*  assumed to be non-negative and  scl  returns the value */
16457 
16458 /*     scl = max( scale, abs( x( i ) ) ). */
16459 
16460 /*  scale and sumsq must be supplied in SCALE and SUMSQ and */
16461 /*  scl and smsq are overwritten on SCALE and SUMSQ respectively. */
16462 
16463 /*  The routine makes only one pass through the vector x. */
16464 
16465 /*  Arguments */
16466 /*  ========= */
16467 
16468 /*  N       (input) INTEGER */
16469 /*          The number of elements to be used from the vector X. */
16470 
16471 /*  X       (input) DOUBLE PRECISION array, dimension (N) */
16472 /*          The vector for which a scaled sum of squares is computed. */
16473 /*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. */
16474 
16475 /*  INCX    (input) INTEGER */
16476 /*          The increment between successive values of the vector X. */
16477 /*          INCX > 0. */
16478 
16479 /*  SCALE   (input/output) DOUBLE PRECISION */
16480 /*          On entry, the value  scale  in the equation above. */
16481 /*          On exit, SCALE is overwritten with  scl , the scaling factor */
16482 /*          for the sum of squares. */
16483 
16484 /*  SUMSQ   (input/output) DOUBLE PRECISION */
16485 /*          On entry, the value  sumsq  in the equation above. */
16486 /*          On exit, SUMSQ is overwritten with  smsq , the basic sum of */
16487 /*          squares from which  scl  has been factored out. */
16488 
16489 /* ===================================================================== */
16490 
16491 /*     .. Parameters .. */
16492 /*     .. */
16493 /*     .. Local Scalars .. */
16494 /*     .. */
16495 /*     .. Intrinsic Functions .. */
16496 /*     .. */
16497 /*     .. Executable Statements .. */
16498 
16499     /* Parameter adjustments */
16500     --x;
16501 
16502     /* Function Body */
16503     if (*n > 0) {
16504 	i__1 = (*n - 1) * *incx + 1;
16505 	i__2 = *incx;
16506 	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
16507 	    if (x[ix] != 0.) {
16508 		absxi = (d__1 = x[ix], abs(d__1));
16509 		if (*scale < absxi) {
16510 /* Computing 2nd power */
16511 		    d__1 = *scale / absxi;
16512 		    *sumsq = *sumsq * (d__1 * d__1) + 1;
16513 		    *scale = absxi;
16514 		} else {
16515 /* Computing 2nd power */
16516 		    d__1 = absxi / *scale;
16517 		    *sumsq += d__1 * d__1;
16518 		}
16519 	    }
16520 /* L10: */
16521 	}
16522     }
16523     return 0;
16524 
16525 /*     End of DLASSQ */
16526 
16527 } /* dlassq_ */
16528 
dlasv2_(double * f,double * g,double * h__,double * ssmin,double * ssmax,double * snr,double * csr,double * snl,double * csl)16529 /* Subroutine */ int dlasv2_(double *f, double *g, double *h__,
16530 	double *ssmin, double *ssmax, double *snr, double *
16531 	csr, double *snl, double *csl)
16532 {
16533 	/* Table of constant values */
16534 	static double c_b3 = 2.;
16535 	static double c_b4 = 1.;
16536 
16537     /* System generated locals */
16538     double d__1;
16539 
16540     /* Local variables */
16541     double a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt,
16542 	    crt, slt, srt;
16543     integer pmax;
16544     double temp;
16545     bool swap;
16546     double tsign;
16547 
16548     bool gasmal;
16549 
16550 
16551 /*  -- LAPACK auxiliary routine (version 3.1) -- */
16552 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
16553 /*     November 2006 */
16554 
16555 /*     .. Scalar Arguments .. */
16556 /*     .. */
16557 
16558 /*  Purpose */
16559 /*  ======= */
16560 
16561 /*  DLASV2 computes the singular value decomposition of a 2-by-2 */
16562 /*  triangular matrix */
16563 /*     [  F   G  ] */
16564 /*     [  0   H  ]. */
16565 /*  On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the */
16566 /*  smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and */
16567 /*  right singular vectors for abs(SSMAX), giving the decomposition */
16568 
16569 /*     [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ] */
16570 /*     [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ]. */
16571 
16572 /*  Arguments */
16573 /*  ========= */
16574 
16575 /*  F       (input) DOUBLE PRECISION */
16576 /*          The (1,1) element of the 2-by-2 matrix. */
16577 
16578 /*  G       (input) DOUBLE PRECISION */
16579 /*          The (1,2) element of the 2-by-2 matrix. */
16580 
16581 /*  H       (input) DOUBLE PRECISION */
16582 /*          The (2,2) element of the 2-by-2 matrix. */
16583 
16584 /*  SSMIN   (output) DOUBLE PRECISION */
16585 /*          abs(SSMIN) is the smaller singular value. */
16586 
16587 /*  SSMAX   (output) DOUBLE PRECISION */
16588 /*          abs(SSMAX) is the larger singular value. */
16589 
16590 /*  SNL     (output) DOUBLE PRECISION */
16591 /*  CSL     (output) DOUBLE PRECISION */
16592 /*          The vector (CSL, SNL) is a unit left singular vector for the */
16593 /*          singular value abs(SSMAX). */
16594 
16595 /*  SNR     (output) DOUBLE PRECISION */
16596 /*  CSR     (output) DOUBLE PRECISION */
16597 /*          The vector (CSR, SNR) is a unit right singular vector for the */
16598 /*          singular value abs(SSMAX). */
16599 
16600 /*  Further Details */
16601 /*  =============== */
16602 
16603 /*  Any input parameter may be aliased with any output parameter. */
16604 
16605 /*  Barring over/underflow and assuming a guard digit in subtraction, all */
16606 /*  output quantities are correct to within a few units in the last */
16607 /*  place (ulps). */
16608 
16609 /*  In IEEE arithmetic, the code works correctly if one matrix element is */
16610 /*  infinite. */
16611 
16612 /*  Overflow will not occur unless the largest singular value itself */
16613 /*  overflows or is within a few ulps of overflow. (On machines with */
16614 /*  partial overflow, like the Cray, overflow may occur if the largest */
16615 /*  singular value is within a factor of 2 of overflow.) */
16616 
16617 /*  Underflow is harmless if underflow is gradual. Otherwise, results */
16618 /*  may correspond to a matrix modified by perturbations of size near */
16619 /*  the underflow threshold. */
16620 
16621 /* ===================================================================== */
16622 
16623 /*     .. Parameters .. */
16624 /*     .. */
16625 /*     .. Local Scalars .. */
16626 /*     .. */
16627 /*     .. Intrinsic Functions .. */
16628 /*     .. */
16629 /*     .. External Functions .. */
16630 /*     .. */
16631 /*     .. Executable Statements .. */
16632 
16633     ft = *f;
16634     fa = abs(ft);
16635     ht = *h__;
16636     ha = abs(*h__);
16637 
16638 /*     PMAX points to the maximum absolute element of matrix */
16639 /*       PMAX = 1 if F largest in absolute values */
16640 /*       PMAX = 2 if G largest in absolute values */
16641 /*       PMAX = 3 if H largest in absolute values */
16642 
16643     pmax = 1;
16644     swap = ha > fa;
16645     if (swap) {
16646 	pmax = 3;
16647 	temp = ft;
16648 	ft = ht;
16649 	ht = temp;
16650 	temp = fa;
16651 	fa = ha;
16652 	ha = temp;
16653 
16654 /*        Now FA .ge. HA */
16655 
16656     }
16657     gt = *g;
16658     ga = abs(gt);
16659     if (ga == 0.) {
16660 
16661 /*        Diagonal matrix */
16662 
16663 	*ssmin = ha;
16664 	*ssmax = fa;
16665 	clt = 1.;
16666 	crt = 1.;
16667 	slt = 0.;
16668 	srt = 0.;
16669     } else {
16670 	gasmal = true;
16671 	if (ga > fa) {
16672 	    pmax = 2;
16673 	    if (fa / ga < dlamch_("EPS")) {
16674 
16675 /*              Case of very large GA */
16676 
16677 		gasmal = false;
16678 		*ssmax = ga;
16679 		if (ha > 1.) {
16680 		    *ssmin = fa / (ga / ha);
16681 		} else {
16682 		    *ssmin = fa / ga * ha;
16683 		}
16684 		clt = 1.;
16685 		slt = ht / gt;
16686 		srt = 1.;
16687 		crt = ft / gt;
16688 	    }
16689 	}
16690 	if (gasmal) {
16691 
16692 /*           Normal case */
16693 
16694 	    d__ = fa - ha;
16695 	    if (d__ == fa) {
16696 
16697 /*              Copes with infinite F or H */
16698 
16699 		l = 1.;
16700 	    } else {
16701 		l = d__ / fa;
16702 	    }
16703 
16704 /*           Note that 0 .le. L .le. 1 */
16705 
16706 	    m = gt / ft;
16707 
16708 /*           Note that abs(M) .le. 1/macheps */
16709 
16710 	    t = 2. - l;
16711 
16712 /*           Note that T .ge. 1 */
16713 
16714 	    mm = m * m;
16715 	    tt = t * t;
16716 	    s = sqrt(tt + mm);
16717 
16718 /*           Note that 1 .le. S .le. 1 + 1/macheps */
16719 
16720 	    if (l == 0.) {
16721 		r__ = abs(m);
16722 	    } else {
16723 		r__ = sqrt(l * l + mm);
16724 	    }
16725 
16726 /*           Note that 0 .le. R .le. 1 + 1/macheps */
16727 
16728 	    a = (s + r__) * .5;
16729 
16730 /*           Note that 1 .le. A .le. 1 + abs(M) */
16731 
16732 	    *ssmin = ha / a;
16733 	    *ssmax = fa * a;
16734 	    if (mm == 0.) {
16735 
16736 /*              Note that M is very tiny */
16737 
16738 		if (l == 0.) {
16739 		    t = d_sign(&c_b3, &ft) * d_sign(&c_b4, &gt);
16740 		} else {
16741 		    t = gt / d_sign(&d__, &ft) + m / t;
16742 		}
16743 	    } else {
16744 		t = (m / (s + t) + m / (r__ + l)) * (a + 1.);
16745 	    }
16746 	    l = sqrt(t * t + 4.);
16747 	    crt = 2. / l;
16748 	    srt = t / l;
16749 	    clt = (crt + srt * m) / a;
16750 	    slt = ht / ft * srt / a;
16751 	}
16752     }
16753     if (swap) {
16754 	*csl = srt;
16755 	*snl = crt;
16756 	*csr = slt;
16757 	*snr = clt;
16758     } else {
16759 	*csl = clt;
16760 	*snl = slt;
16761 	*csr = crt;
16762 	*snr = srt;
16763     }
16764 
16765 /*     Correct signs of SSMAX and SSMIN */
16766 
16767     if (pmax == 1) {
16768 	tsign = d_sign(&c_b4, csr) * d_sign(&c_b4, csl) * d_sign(&c_b4, f);
16769     }
16770     if (pmax == 2) {
16771 	tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, csl) * d_sign(&c_b4, g);
16772     }
16773     if (pmax == 3) {
16774 	tsign = d_sign(&c_b4, snr) * d_sign(&c_b4, snl) * d_sign(&c_b4, h__);
16775     }
16776     *ssmax = d_sign(ssmax, &tsign);
16777     d__1 = tsign * d_sign(&c_b4, f) * d_sign(&c_b4, h__);
16778     *ssmin = d_sign(ssmin, &d__1);
16779     return 0;
16780 
16781 /*     End of DLASV2 */
16782 
16783 } /* dlasv2_ */
16784 
dlaswp_(integer * n,double * a,integer * lda,integer * k1,integer * k2,integer * ipiv,integer * incx)16785 /* Subroutine */ int dlaswp_(integer *n, double *a, integer *lda, integer
16786 	*k1, integer *k2, integer *ipiv, integer *incx)
16787 {
16788     /* System generated locals */
16789     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
16790 
16791     /* Local variables */
16792     integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
16793     double temp;
16794 
16795 
16796 /*  -- LAPACK auxiliary routine (version 3.1) -- */
16797 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
16798 /*     November 2006 */
16799 
16800 /*     .. Scalar Arguments .. */
16801 /*     .. */
16802 /*     .. Array Arguments .. */
16803 /*     .. */
16804 
16805 /*  Purpose */
16806 /*  ======= */
16807 
16808 /*  DLASWP performs a series of row interchanges on the matrix A. */
16809 /*  One row interchange is initiated for each of rows K1 through K2 of A. */
16810 
16811 /*  Arguments */
16812 /*  ========= */
16813 
16814 /*  N       (input) INTEGER */
16815 /*          The number of columns of the matrix A. */
16816 
16817 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
16818 /*          On entry, the matrix of column dimension N to which the row */
16819 /*          interchanges will be applied. */
16820 /*          On exit, the permuted matrix. */
16821 
16822 /*  LDA     (input) INTEGER */
16823 /*          The leading dimension of the array A. */
16824 
16825 /*  K1      (input) INTEGER */
16826 /*          The first element of IPIV for which a row interchange will */
16827 /*          be done. */
16828 
16829 /*  K2      (input) INTEGER */
16830 /*          The last element of IPIV for which a row interchange will */
16831 /*          be done. */
16832 
16833 /*  IPIV    (input) INTEGER array, dimension (K2*abs(INCX)) */
16834 /*          The vector of pivot indices.  Only the elements in positions */
16835 /*          K1 through K2 of IPIV are accessed. */
16836 /*          IPIV(K) = L implies rows K and L are to be interchanged. */
16837 
16838 /*  INCX    (input) INTEGER */
16839 /*          The increment between successive values of IPIV.  If IPIV */
16840 /*          is negative, the pivots are applied in reverse order. */
16841 
16842 /*  Further Details */
16843 /*  =============== */
16844 
16845 /*  Modified by */
16846 /*   R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA */
16847 
16848 /* ===================================================================== */
16849 
16850 /*     .. Local Scalars .. */
16851 /*     .. */
16852 /*     .. Executable Statements .. */
16853 
16854 /*     Interchange row I with row IPIV(I) for each of rows K1 through K2. */
16855 
16856     /* Parameter adjustments */
16857     a_dim1 = *lda;
16858     a_offset = 1 + a_dim1;
16859     a -= a_offset;
16860     --ipiv;
16861 
16862     /* Function Body */
16863     if (*incx > 0) {
16864 	ix0 = *k1;
16865 	i1 = *k1;
16866 	i2 = *k2;
16867 	inc = 1;
16868     } else if (*incx < 0) {
16869 	ix0 = (1 - *k2) * *incx + 1;
16870 	i1 = *k2;
16871 	i2 = *k1;
16872 	inc = -1;
16873     } else {
16874 	return 0;
16875     }
16876 
16877     n32 = *n / 32 << 5;
16878     if (n32 != 0) {
16879 	i__1 = n32;
16880 	for (j = 1; j <= i__1; j += 32) {
16881 	    ix = ix0;
16882 	    i__2 = i2;
16883 	    i__3 = inc;
16884 	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
16885 		    {
16886 		ip = ipiv[ix];
16887 		if (ip != i__) {
16888 		    i__4 = j + 31;
16889 		    for (k = j; k <= i__4; ++k) {
16890 			temp = a[i__ + k * a_dim1];
16891 			a[i__ + k * a_dim1] = a[ip + k * a_dim1];
16892 			a[ip + k * a_dim1] = temp;
16893 /* L10: */
16894 		    }
16895 		}
16896 		ix += *incx;
16897 /* L20: */
16898 	    }
16899 /* L30: */
16900 	}
16901     }
16902     if (n32 != *n) {
16903 	++n32;
16904 	ix = ix0;
16905 	i__1 = i2;
16906 	i__3 = inc;
16907 	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
16908 	    ip = ipiv[ix];
16909 	    if (ip != i__) {
16910 		i__2 = *n;
16911 		for (k = n32; k <= i__2; ++k) {
16912 		    temp = a[i__ + k * a_dim1];
16913 		    a[i__ + k * a_dim1] = a[ip + k * a_dim1];
16914 		    a[ip + k * a_dim1] = temp;
16915 /* L40: */
16916 		}
16917 	    }
16918 	    ix += *incx;
16919 /* L50: */
16920 	}
16921     }
16922 
16923     return 0;
16924 
16925 /*     End of DLASWP */
16926 
16927 } /* dlaswp_ */
16928 
dlasy2_(bool * ltranl,bool * ltranr,integer * isgn,integer * n1,integer * n2,double * tl,integer * ldtl,double * tr,integer * ldtr,double * b,integer * ldb,double * scale,double * x,integer * ldx,double * xnorm,integer * info)16929 /* Subroutine */ int dlasy2_(bool *ltranl, bool *ltranr, integer *isgn,
16930 	integer *n1, integer *n2, double *tl, integer *ldtl, double *
16931 	tr, integer *ldtr, double *b, integer *ldb, double *scale,
16932 	double *x, integer *ldx, double *xnorm, integer *info)
16933 {
16934 	/* Table of constant values */
16935 	static integer c__4 = 4;
16936 	static integer c__1 = 1;
16937 	static integer c__16 = 16;
16938 	static integer c__0 = 0;
16939 
16940     /* Initialized data */
16941 
16942     static integer locu12[4] = { 3,4,1,2 };
16943     static integer locl21[4] = { 2,1,4,3 };
16944     static integer locu22[4] = { 4,3,2,1 };
16945     static bool xswpiv[4] = { false,false,true,true };
16946     static bool bswpiv[4] = { false,true,false,true };
16947 
16948     /* System generated locals */
16949     integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1,
16950 	    x_offset;
16951     double d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8;
16952 
16953     /* Local variables */
16954     integer i__, j, k;
16955     double x2[2], l21, u11, u12;
16956     integer ip, jp;
16957     double u22, t16[16]	/* was [4][4] */, gam, bet, eps, sgn, tmp[4],
16958 	    tau1, btmp[4], smin;
16959     integer ipiv;
16960     double temp;
16961     integer jpiv[4];
16962     double xmax;
16963     integer ipsv, jpsv;
16964     bool bswap;
16965     bool xswap;
16966     double smlnum;
16967 
16968 
16969 /*  -- LAPACK auxiliary routine (version 3.1) -- */
16970 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
16971 /*     November 2006 */
16972 
16973 /*     .. Scalar Arguments .. */
16974 /*     .. */
16975 /*     .. Array Arguments .. */
16976 /*     .. */
16977 
16978 /*  Purpose */
16979 /*  ======= */
16980 
16981 /*  DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in */
16982 
16983 /*         op(TL)*X + ISGN*X*op(TR) = SCALE*B, */
16984 
16985 /*  where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or */
16986 /*  -1.  op(T) = T or T', where T' denotes the transpose of T. */
16987 
16988 /*  Arguments */
16989 /*  ========= */
16990 
16991 /*  LTRANL  (input) LOGICAL */
16992 /*          On entry, LTRANL specifies the op(TL): */
16993 /*             = .FALSE., op(TL) = TL, */
16994 /*             = .TRUE., op(TL) = TL'. */
16995 
16996 /*  LTRANR  (input) LOGICAL */
16997 /*          On entry, LTRANR specifies the op(TR): */
16998 /*            = .FALSE., op(TR) = TR, */
16999 /*            = .TRUE., op(TR) = TR'. */
17000 
17001 /*  ISGN    (input) INTEGER */
17002 /*          On entry, ISGN specifies the sign of the equation */
17003 /*          as described before. ISGN may only be 1 or -1. */
17004 
17005 /*  N1      (input) INTEGER */
17006 /*          On entry, N1 specifies the order of matrix TL. */
17007 /*          N1 may only be 0, 1 or 2. */
17008 
17009 /*  N2      (input) INTEGER */
17010 /*          On entry, N2 specifies the order of matrix TR. */
17011 /*          N2 may only be 0, 1 or 2. */
17012 
17013 /*  TL      (input) DOUBLE PRECISION array, dimension (LDTL,2) */
17014 /*          On entry, TL contains an N1 by N1 matrix. */
17015 
17016 /*  LDTL    (input) INTEGER */
17017 /*          The leading dimension of the matrix TL. LDTL >= max(1,N1). */
17018 
17019 /*  TR      (input) DOUBLE PRECISION array, dimension (LDTR,2) */
17020 /*          On entry, TR contains an N2 by N2 matrix. */
17021 
17022 /*  LDTR    (input) INTEGER */
17023 /*          The leading dimension of the matrix TR. LDTR >= max(1,N2). */
17024 
17025 /*  B       (input) DOUBLE PRECISION array, dimension (LDB,2) */
17026 /*          On entry, the N1 by N2 matrix B contains the right-hand */
17027 /*          side of the equation. */
17028 
17029 /*  LDB     (input) INTEGER */
17030 /*          The leading dimension of the matrix B. LDB >= max(1,N1). */
17031 
17032 /*  SCALE   (output) DOUBLE PRECISION */
17033 /*          On exit, SCALE contains the scale factor. SCALE is chosen */
17034 /*          less than or equal to 1 to prevent the solution overflowing. */
17035 
17036 /*  X       (output) DOUBLE PRECISION array, dimension (LDX,2) */
17037 /*          On exit, X contains the N1 by N2 solution. */
17038 
17039 /*  LDX     (input) INTEGER */
17040 /*          The leading dimension of the matrix X. LDX >= max(1,N1). */
17041 
17042 /*  XNORM   (output) DOUBLE PRECISION */
17043 /*          On exit, XNORM is the infinity-norm of the solution. */
17044 
17045 /*  INFO    (output) INTEGER */
17046 /*          On exit, INFO is set to */
17047 /*             0: successful exit. */
17048 /*             1: TL and TR have too close eigenvalues, so TL or */
17049 /*                TR is perturbed to get a nonsingular equation. */
17050 /*          NOTE: In the interests of speed, this routine does not */
17051 /*                check the inputs for errors. */
17052 
17053 /* ===================================================================== */
17054 
17055 /*     .. Parameters .. */
17056 /*     .. */
17057 /*     .. Local Scalars .. */
17058 /*     .. */
17059 /*     .. Local Arrays .. */
17060 /*     .. */
17061 /*     .. External Functions .. */
17062 /*     .. */
17063 /*     .. External Subroutines .. */
17064 /*     .. */
17065 /*     .. Intrinsic Functions .. */
17066 /*     .. */
17067 /*     .. Data statements .. */
17068     /* Parameter adjustments */
17069     tl_dim1 = *ldtl;
17070     tl_offset = 1 + tl_dim1;
17071     tl -= tl_offset;
17072     tr_dim1 = *ldtr;
17073     tr_offset = 1 + tr_dim1;
17074     tr -= tr_offset;
17075     b_dim1 = *ldb;
17076     b_offset = 1 + b_dim1;
17077     b -= b_offset;
17078     x_dim1 = *ldx;
17079     x_offset = 1 + x_dim1;
17080     x -= x_offset;
17081 
17082     /* Function Body */
17083 /*     .. */
17084 /*     .. Executable Statements .. */
17085 
17086 /*     Do not check the input parameters for errors */
17087 
17088     *info = 0;
17089 
17090 /*     Quick return if possible */
17091 
17092     if (*n1 == 0 || *n2 == 0) {
17093 	return 0;
17094     }
17095 
17096 /*     Set constants to control overflow */
17097 
17098     eps = dlamch_("P");
17099     smlnum = dlamch_("S") / eps;
17100     sgn = (double) (*isgn);
17101 
17102     k = *n1 + *n1 + *n2 - 2;
17103     switch (k) {
17104 	case 1:  goto L10;
17105 	case 2:  goto L20;
17106 	case 3:  goto L30;
17107 	case 4:  goto L50;
17108     }
17109 
17110 /*     1 by 1: TL11*X + SGN*X*TR11 = B11 */
17111 
17112 L10:
17113     tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
17114     bet = abs(tau1);
17115     if (bet <= smlnum) {
17116 	tau1 = smlnum;
17117 	bet = smlnum;
17118 	*info = 1;
17119     }
17120 
17121     *scale = 1.;
17122     gam = (d__1 = b[b_dim1 + 1], abs(d__1));
17123     if (smlnum * gam > bet) {
17124 	*scale = 1. / gam;
17125     }
17126 
17127     x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1;
17128     *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1));
17129     return 0;
17130 
17131 /*     1 by 2: */
17132 /*     TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12]  = [B11 B12] */
17133 /*                                       [TR21 TR22] */
17134 
17135 L20:
17136 
17137 /* Computing MAX */
17138 /* Computing MAX */
17139     d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1]
17140 	    , abs(d__2)), d__7 = std::max(d__7,d__8), d__8 = (d__3 = tr[(tr_dim1 <<
17141 	     1) + 1], abs(d__3)), d__7 = std::max(d__7,d__8), d__8 = (d__4 = tr[
17142 	    tr_dim1 + 2], abs(d__4)), d__7 = std::max(d__7,d__8), d__8 = (d__5 =
17143 	    tr[(tr_dim1 << 1) + 2], abs(d__5));
17144     d__6 = eps * std::max(d__7,d__8);
17145     smin = std::max(d__6,smlnum);
17146     tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
17147     tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2];
17148     if (*ltranr) {
17149 	tmp[1] = sgn * tr[tr_dim1 + 2];
17150 	tmp[2] = sgn * tr[(tr_dim1 << 1) + 1];
17151     } else {
17152 	tmp[1] = sgn * tr[(tr_dim1 << 1) + 1];
17153 	tmp[2] = sgn * tr[tr_dim1 + 2];
17154     }
17155     btmp[0] = b[b_dim1 + 1];
17156     btmp[1] = b[(b_dim1 << 1) + 1];
17157     goto L40;
17158 
17159 /*     2 by 1: */
17160 /*          op[TL11 TL12]*[X11] + ISGN* [X11]*TR11  = [B11] */
17161 /*            [TL21 TL22] [X21]         [X21]         [B21] */
17162 
17163 L30:
17164 /* Computing MAX */
17165 /* Computing MAX */
17166     d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1]
17167 	    , abs(d__2)), d__7 = std::max(d__7,d__8), d__8 = (d__3 = tl[(tl_dim1 <<
17168 	     1) + 1], abs(d__3)), d__7 = std::max(d__7,d__8), d__8 = (d__4 = tl[
17169 	    tl_dim1 + 2], abs(d__4)), d__7 = std::max(d__7,d__8), d__8 = (d__5 =
17170 	    tl[(tl_dim1 << 1) + 2], abs(d__5));
17171     d__6 = eps * std::max(d__7,d__8);
17172     smin = std::max(d__6,smlnum);
17173     tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
17174     tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1];
17175     if (*ltranl) {
17176 	tmp[1] = tl[(tl_dim1 << 1) + 1];
17177 	tmp[2] = tl[tl_dim1 + 2];
17178     } else {
17179 	tmp[1] = tl[tl_dim1 + 2];
17180 	tmp[2] = tl[(tl_dim1 << 1) + 1];
17181     }
17182     btmp[0] = b[b_dim1 + 1];
17183     btmp[1] = b[b_dim1 + 2];
17184 L40:
17185 
17186 /*     Solve 2 by 2 system using complete pivoting. */
17187 /*     Set pivots less than SMIN to SMIN. */
17188 
17189     ipiv = idamax_(&c__4, tmp, &c__1);
17190     u11 = tmp[ipiv - 1];
17191     if (abs(u11) <= smin) {
17192 	*info = 1;
17193 	u11 = smin;
17194     }
17195     u12 = tmp[locu12[ipiv - 1] - 1];
17196     l21 = tmp[locl21[ipiv - 1] - 1] / u11;
17197     u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21;
17198     xswap = xswpiv[ipiv - 1];
17199     bswap = bswpiv[ipiv - 1];
17200     if (abs(u22) <= smin) {
17201 	*info = 1;
17202 	u22 = smin;
17203     }
17204     if (bswap) {
17205 	temp = btmp[1];
17206 	btmp[1] = btmp[0] - l21 * temp;
17207 	btmp[0] = temp;
17208     } else {
17209 	btmp[1] -= l21 * btmp[0];
17210     }
17211     *scale = 1.;
17212     if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) >
17213 	    abs(u11)) {
17214 /* Computing MAX */
17215 	d__1 = abs(btmp[0]), d__2 = abs(btmp[1]);
17216 	*scale = .5 / std::max(d__1,d__2);
17217 	btmp[0] *= *scale;
17218 	btmp[1] *= *scale;
17219     }
17220     x2[1] = btmp[1] / u22;
17221     x2[0] = btmp[0] / u11 - u12 / u11 * x2[1];
17222     if (xswap) {
17223 	temp = x2[1];
17224 	x2[1] = x2[0];
17225 	x2[0] = temp;
17226     }
17227     x[x_dim1 + 1] = x2[0];
17228     if (*n1 == 1) {
17229 	x[(x_dim1 << 1) + 1] = x2[1];
17230 	*xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1)
17231 		+ 1], abs(d__2));
17232     } else {
17233 	x[x_dim1 + 2] = x2[1];
17234 /* Computing MAX */
17235 	d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2]
17236 		, abs(d__2));
17237 	*xnorm = std::max(d__3,d__4);
17238     }
17239     return 0;
17240 
17241 /*     2 by 2: */
17242 /*     op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] */
17243 /*       [TL21 TL22] [X21 X22]        [X21 X22]   [TR21 TR22]   [B21 B22] */
17244 
17245 /*     Solve equivalent 4 by 4 system using complete pivoting. */
17246 /*     Set pivots less than SMIN to SMIN. */
17247 
17248 L50:
17249 /* Computing MAX */
17250     d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 <<
17251 	    1) + 1], abs(d__2)), d__5 = std::max(d__5,d__6), d__6 = (d__3 = tr[
17252 	    tr_dim1 + 2], abs(d__3)), d__5 = std::max(d__5,d__6), d__6 = (d__4 =
17253 	    tr[(tr_dim1 << 1) + 2], abs(d__4));
17254     smin = std::max(d__5,d__6);
17255 /* Computing MAX */
17256     d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = std::max(d__5,
17257 	    d__6), d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 =
17258 	    std::max(d__5,d__6), d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 =
17259 	     std::max(d__5,d__6), d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4))
17260 	    ;
17261     smin = std::max(d__5,d__6);
17262 /* Computing MAX */
17263     d__1 = eps * smin;
17264     smin = std::max(d__1,smlnum);
17265     btmp[0] = 0.;
17266     dcopy_(&c__16, btmp, &c__0, t16, &c__1);
17267     t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
17268     t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1];
17269     t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2];
17270     t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2];
17271     if (*ltranl) {
17272 	t16[4] = tl[tl_dim1 + 2];
17273 	t16[1] = tl[(tl_dim1 << 1) + 1];
17274 	t16[14] = tl[tl_dim1 + 2];
17275 	t16[11] = tl[(tl_dim1 << 1) + 1];
17276     } else {
17277 	t16[4] = tl[(tl_dim1 << 1) + 1];
17278 	t16[1] = tl[tl_dim1 + 2];
17279 	t16[14] = tl[(tl_dim1 << 1) + 1];
17280 	t16[11] = tl[tl_dim1 + 2];
17281     }
17282     if (*ltranr) {
17283 	t16[8] = sgn * tr[(tr_dim1 << 1) + 1];
17284 	t16[13] = sgn * tr[(tr_dim1 << 1) + 1];
17285 	t16[2] = sgn * tr[tr_dim1 + 2];
17286 	t16[7] = sgn * tr[tr_dim1 + 2];
17287     } else {
17288 	t16[8] = sgn * tr[tr_dim1 + 2];
17289 	t16[13] = sgn * tr[tr_dim1 + 2];
17290 	t16[2] = sgn * tr[(tr_dim1 << 1) + 1];
17291 	t16[7] = sgn * tr[(tr_dim1 << 1) + 1];
17292     }
17293     btmp[0] = b[b_dim1 + 1];
17294     btmp[1] = b[b_dim1 + 2];
17295     btmp[2] = b[(b_dim1 << 1) + 1];
17296     btmp[3] = b[(b_dim1 << 1) + 2];
17297 
17298 /*     Perform elimination */
17299 
17300     for (i__ = 1; i__ <= 3; ++i__) {
17301 	xmax = 0.;
17302 	for (ip = i__; ip <= 4; ++ip) {
17303 	    for (jp = i__; jp <= 4; ++jp) {
17304 		if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) {
17305 		    xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1));
17306 		    ipsv = ip;
17307 		    jpsv = jp;
17308 		}
17309 /* L60: */
17310 	    }
17311 /* L70: */
17312 	}
17313 	if (ipsv != i__) {
17314 	    dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4);
17315 	    temp = btmp[i__ - 1];
17316 	    btmp[i__ - 1] = btmp[ipsv - 1];
17317 	    btmp[ipsv - 1] = temp;
17318 	}
17319 	if (jpsv != i__) {
17320 	    dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4],
17321 		    &c__1);
17322 	}
17323 	jpiv[i__ - 1] = jpsv;
17324 	if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) {
17325 	    *info = 1;
17326 	    t16[i__ + (i__ << 2) - 5] = smin;
17327 	}
17328 	for (j = i__ + 1; j <= 4; ++j) {
17329 	    t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5];
17330 	    btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1];
17331 	    for (k = i__ + 1; k <= 4; ++k) {
17332 		t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + (
17333 			k << 2) - 5];
17334 /* L80: */
17335 	    }
17336 /* L90: */
17337 	}
17338 /* L100: */
17339     }
17340     if (abs(t16[15]) < smin) {
17341 	t16[15] = smin;
17342     }
17343     *scale = 1.;
17344     if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1])
17345 	     > abs(t16[5]) || smlnum * 8. * abs(btmp[2]) > abs(t16[10]) ||
17346 	    smlnum * 8. * abs(btmp[3]) > abs(t16[15])) {
17347 /* Computing MAX */
17348 	d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = std::max(d__1,d__2), d__2
17349 		= abs(btmp[2]), d__1 = std::max(d__1,d__2), d__2 = abs(btmp[3]);
17350 	*scale = .125 / std::max(d__1,d__2);
17351 	btmp[0] *= *scale;
17352 	btmp[1] *= *scale;
17353 	btmp[2] *= *scale;
17354 	btmp[3] *= *scale;
17355     }
17356     for (i__ = 1; i__ <= 4; ++i__) {
17357 	k = 5 - i__;
17358 	temp = 1. / t16[k + (k << 2) - 5];
17359 	tmp[k - 1] = btmp[k - 1] * temp;
17360 	for (j = k + 1; j <= 4; ++j) {
17361 	    tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1];
17362 /* L110: */
17363 	}
17364 /* L120: */
17365     }
17366     for (i__ = 1; i__ <= 3; ++i__) {
17367 	if (jpiv[4 - i__ - 1] != 4 - i__) {
17368 	    temp = tmp[4 - i__ - 1];
17369 	    tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1];
17370 	    tmp[jpiv[4 - i__ - 1] - 1] = temp;
17371 	}
17372 /* L130: */
17373     }
17374     x[x_dim1 + 1] = tmp[0];
17375     x[x_dim1 + 2] = tmp[1];
17376     x[(x_dim1 << 1) + 1] = tmp[2];
17377     x[(x_dim1 << 1) + 2] = tmp[3];
17378 /* Computing MAX */
17379     d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]);
17380     *xnorm = std::max(d__1,d__2);
17381     return 0;
17382 
17383 /*     End of DLASY2 */
17384 
17385 } /* dlasy2_ */
17386 
dlasyf_(const char * uplo,integer * n,integer * nb,integer * kb,double * a,integer * lda,integer * ipiv,double * w,integer * ldw,integer * info)17387 /* Subroutine */ int dlasyf_(const char *uplo, integer *n, integer *nb, integer *kb,
17388 	double *a, integer *lda, integer *ipiv, double *w, integer *ldw, integer *info)
17389 {
17390 	/* Table of constant values */
17391 	static integer c__1 = 1;
17392 	static double c_b8 = -1.;
17393 	static double c_b9 = 1.;
17394 
17395     /* System generated locals */
17396     integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5;
17397     double d__1, d__2, d__3;
17398 
17399     /* Local variables */
17400     integer j, k;
17401     double t, r1, d11, d21, d22;
17402     integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax;
17403     double alpha;
17404     integer kstep;
17405     double absakk;
17406     double colmax, rowmax;
17407 
17408 
17409 /*  -- LAPACK routine (version 3.1) -- */
17410 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
17411 /*     November 2006 */
17412 
17413 /*     .. Scalar Arguments .. */
17414 /*     .. */
17415 /*     .. Array Arguments .. */
17416 /*     .. */
17417 
17418 /*  Purpose */
17419 /*  ======= */
17420 
17421 /*  DLASYF computes a partial factorization of a real symmetric matrix A */
17422 /*  using the Bunch-Kaufman diagonal pivoting method. The partial */
17423 /*  factorization has the form: */
17424 
17425 /*  A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or: */
17426 /*        ( 0  U22 ) (  0   D  ) ( U12' U22' ) */
17427 
17428 /*  A  =  ( L11  0 ) (  D   0  ) ( L11' L21' )  if UPLO = 'L' */
17429 /*        ( L21  I ) (  0  A22 ) (  0    I   ) */
17430 
17431 /*  where the order of D is at most NB. The actual order is returned in */
17432 /*  the argument KB, and is either NB or NB-1, or N if N <= NB. */
17433 
17434 /*  DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code */
17435 /*  (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or */
17436 /*  A22 (if UPLO = 'L'). */
17437 
17438 /*  Arguments */
17439 /*  ========= */
17440 
17441 /*  UPLO    (input) CHARACTER*1 */
17442 /*          Specifies whether the upper or lower triangular part of the */
17443 /*          symmetric matrix A is stored: */
17444 /*          = 'U':  Upper triangular */
17445 /*          = 'L':  Lower triangular */
17446 
17447 /*  N       (input) INTEGER */
17448 /*          The order of the matrix A.  N >= 0. */
17449 
17450 /*  NB      (input) INTEGER */
17451 /*          The maximum number of columns of the matrix A that should be */
17452 /*          factored.  NB should be at least 2 to allow for 2-by-2 pivot */
17453 /*          blocks. */
17454 
17455 /*  KB      (output) INTEGER */
17456 /*          The number of columns of A that were actually factored. */
17457 /*          KB is either NB-1 or NB, or N if N <= NB. */
17458 
17459 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
17460 /*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
17461 /*          n-by-n upper triangular part of A contains the upper */
17462 /*          triangular part of the matrix A, and the strictly lower */
17463 /*          triangular part of A is not referenced.  If UPLO = 'L', the */
17464 /*          leading n-by-n lower triangular part of A contains the lower */
17465 /*          triangular part of the matrix A, and the strictly upper */
17466 /*          triangular part of A is not referenced. */
17467 /*          On exit, A contains details of the partial factorization. */
17468 
17469 /*  LDA     (input) INTEGER */
17470 /*          The leading dimension of the array A.  LDA >= max(1,N). */
17471 
17472 /*  IPIV    (output) INTEGER array, dimension (N) */
17473 /*          Details of the interchanges and the block structure of D. */
17474 /*          If UPLO = 'U', only the last KB elements of IPIV are set; */
17475 /*          if UPLO = 'L', only the first KB elements are set. */
17476 
17477 /*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were */
17478 /*          interchanged and D(k,k) is a 1-by-1 diagonal block. */
17479 /*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and */
17480 /*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) */
17481 /*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) = */
17482 /*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were */
17483 /*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. */
17484 
17485 /*  W       (workspace) DOUBLE PRECISION array, dimension (LDW,NB) */
17486 
17487 /*  LDW     (input) INTEGER */
17488 /*          The leading dimension of the array W.  LDW >= max(1,N). */
17489 
17490 /*  INFO    (output) INTEGER */
17491 /*          = 0: successful exit */
17492 /*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization */
17493 /*               has been completed, but the block diagonal matrix D is */
17494 /*               exactly singular. */
17495 
17496 /*  ===================================================================== */
17497 
17498 /*     .. Parameters .. */
17499 /*     .. */
17500 /*     .. Local Scalars .. */
17501 /*     .. */
17502 /*     .. External Functions .. */
17503 /*     .. */
17504 /*     .. External Subroutines .. */
17505 /*     .. */
17506 /*     .. Intrinsic Functions .. */
17507 /*     .. */
17508 /*     .. Executable Statements .. */
17509 
17510     /* Parameter adjustments */
17511     a_dim1 = *lda;
17512     a_offset = 1 + a_dim1;
17513     a -= a_offset;
17514     --ipiv;
17515     w_dim1 = *ldw;
17516     w_offset = 1 + w_dim1;
17517     w -= w_offset;
17518 
17519     /* Function Body */
17520     *info = 0;
17521 
17522 /*     Initialize ALPHA for use in choosing pivot block size. */
17523 
17524     alpha = (sqrt(17.) + 1.) / 8.;
17525 
17526     if (lsame_(uplo, "U")) {
17527 
17528 /*        Factorize the trailing columns of A using the upper triangle */
17529 /*        of A and working backwards, and compute the matrix W = U12*D */
17530 /*        for use in updating A11 */
17531 
17532 /*        K is the main loop index, decreasing from N in steps of 1 or 2 */
17533 
17534 /*        KW is the column of W which corresponds to column K of A */
17535 
17536 	k = *n;
17537 L10:
17538 	kw = *nb + k - *n;
17539 
17540 /*        Exit from loop */
17541 
17542 	if (k <= *n - *nb + 1 && *nb < *n || k < 1) {
17543 	    goto L30;
17544 	}
17545 
17546 /*        Copy column K of A to column KW of W and update it */
17547 
17548 	dcopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
17549 	if (k < *n) {
17550 	    i__1 = *n - k;
17551 	    dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) * a_dim1 + 1],
17552 		     lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b9, &w[kw *
17553 		    w_dim1 + 1], &c__1);
17554 	}
17555 
17556 	kstep = 1;
17557 
17558 /*        Determine rows and columns to be interchanged and whether */
17559 /*        a 1-by-1 or 2-by-2 pivot block will be used */
17560 
17561 	absakk = (d__1 = w[k + kw * w_dim1], abs(d__1));
17562 
17563 /*        IMAX is the row-index of the largest off-diagonal element in */
17564 /*        column K, and COLMAX is its absolute value */
17565 
17566 	if (k > 1) {
17567 	    i__1 = k - 1;
17568 	    imax = idamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
17569 	    colmax = (d__1 = w[imax + kw * w_dim1], abs(d__1));
17570 	} else {
17571 	    colmax = 0.;
17572 	}
17573 
17574 	if (std::max(absakk,colmax) == 0.) {
17575 
17576 /*           Column K is zero: set INFO and continue */
17577 
17578 	    if (*info == 0) {
17579 		*info = k;
17580 	    }
17581 	    kp = k;
17582 	} else {
17583 	    if (absakk >= alpha * colmax) {
17584 
17585 /*              no interchange, use 1-by-1 pivot block */
17586 
17587 		kp = k;
17588 	    } else {
17589 
17590 /*              Copy column IMAX to column KW-1 of W and update it */
17591 
17592 		dcopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
17593 			w_dim1 + 1], &c__1);
17594 		i__1 = k - imax;
17595 		dcopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
17596 			1 + (kw - 1) * w_dim1], &c__1);
17597 		if (k < *n) {
17598 		    i__1 = *n - k;
17599 		    dgemv_("No transpose", &k, &i__1, &c_b8, &a[(k + 1) *
17600 			    a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
17601 			    ldw, &c_b9, &w[(kw - 1) * w_dim1 + 1], &c__1);
17602 		}
17603 
17604 /*              JMAX is the column-index of the largest off-diagonal */
17605 /*              element in row IMAX, and ROWMAX is its absolute value */
17606 
17607 		i__1 = k - imax;
17608 		jmax = imax + idamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1],
17609 			 &c__1);
17610 		rowmax = (d__1 = w[jmax + (kw - 1) * w_dim1], abs(d__1));
17611 		if (imax > 1) {
17612 		    i__1 = imax - 1;
17613 		    jmax = idamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
17614 /* Computing MAX */
17615 		    d__2 = rowmax, d__3 = (d__1 = w[jmax + (kw - 1) * w_dim1],
17616 			     abs(d__1));
17617 		    rowmax = std::max(d__2,d__3);
17618 		}
17619 
17620 		if (absakk >= alpha * colmax * (colmax / rowmax)) {
17621 
17622 /*                 no interchange, use 1-by-1 pivot block */
17623 
17624 		    kp = k;
17625 		} else if ((d__1 = w[imax + (kw - 1) * w_dim1], abs(d__1)) >=
17626 			alpha * rowmax) {
17627 
17628 /*                 interchange rows and columns K and IMAX, use 1-by-1 */
17629 /*                 pivot block */
17630 
17631 		    kp = imax;
17632 
17633 /*                 copy column KW-1 of W to column KW */
17634 
17635 		    dcopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
17636 			    w_dim1 + 1], &c__1);
17637 		} else {
17638 
17639 /*                 interchange rows and columns K-1 and IMAX, use 2-by-2 */
17640 /*                 pivot block */
17641 
17642 		    kp = imax;
17643 		    kstep = 2;
17644 		}
17645 	    }
17646 
17647 	    kk = k - kstep + 1;
17648 	    kkw = *nb + kk - *n;
17649 
17650 /*           Updated column KP is already stored in column KKW of W */
17651 
17652 	    if (kp != kk) {
17653 
17654 /*              Copy non-updated column KK to column KP */
17655 
17656 		a[kp + k * a_dim1] = a[kk + k * a_dim1];
17657 		i__1 = k - 1 - kp;
17658 		dcopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
17659 			1) * a_dim1], lda);
17660 		dcopy_(&kp, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &
17661 			c__1);
17662 
17663 /*              Interchange rows KK and KP in last KK columns of A and W */
17664 
17665 		i__1 = *n - kk + 1;
17666 		dswap_(&i__1, &a[kk + kk * a_dim1], lda, &a[kp + kk * a_dim1],
17667 			 lda);
17668 		i__1 = *n - kk + 1;
17669 		dswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
17670 			w_dim1], ldw);
17671 	    }
17672 
17673 	    if (kstep == 1) {
17674 
17675 /*              1-by-1 pivot block D(k): column KW of W now holds */
17676 
17677 /*              W(k) = U(k)*D(k) */
17678 
17679 /*              where U(k) is the k-th column of U */
17680 
17681 /*              Store U(k) in column k of A */
17682 
17683 		dcopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
17684 			c__1);
17685 		r1 = 1. / a[k + k * a_dim1];
17686 		i__1 = k - 1;
17687 		dscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
17688 	    } else {
17689 
17690 /*              2-by-2 pivot block D(k): columns KW and KW-1 of W now */
17691 /*              hold */
17692 
17693 /*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) */
17694 
17695 /*              where U(k) and U(k-1) are the k-th and (k-1)-th columns */
17696 /*              of U */
17697 
17698 		if (k > 2) {
17699 
17700 /*                 Store U(k) and U(k-1) in columns k and k-1 of A */
17701 
17702 		    d21 = w[k - 1 + kw * w_dim1];
17703 		    d11 = w[k + kw * w_dim1] / d21;
17704 		    d22 = w[k - 1 + (kw - 1) * w_dim1] / d21;
17705 		    t = 1. / (d11 * d22 - 1.);
17706 		    d21 = t / d21;
17707 		    i__1 = k - 2;
17708 		    for (j = 1; j <= i__1; ++j) {
17709 			a[j + (k - 1) * a_dim1] = d21 * (d11 * w[j + (kw - 1)
17710 				* w_dim1] - w[j + kw * w_dim1]);
17711 			a[j + k * a_dim1] = d21 * (d22 * w[j + kw * w_dim1] -
17712 				w[j + (kw - 1) * w_dim1]);
17713 /* L20: */
17714 		    }
17715 		}
17716 
17717 /*              Copy D(k) to A */
17718 
17719 		a[k - 1 + (k - 1) * a_dim1] = w[k - 1 + (kw - 1) * w_dim1];
17720 		a[k - 1 + k * a_dim1] = w[k - 1 + kw * w_dim1];
17721 		a[k + k * a_dim1] = w[k + kw * w_dim1];
17722 	    }
17723 	}
17724 
17725 /*        Store details of the interchanges in IPIV */
17726 
17727 	if (kstep == 1) {
17728 	    ipiv[k] = kp;
17729 	} else {
17730 	    ipiv[k] = -kp;
17731 	    ipiv[k - 1] = -kp;
17732 	}
17733 
17734 /*        Decrease K and return to the start of the main loop */
17735 
17736 	k -= kstep;
17737 	goto L10;
17738 
17739 L30:
17740 
17741 /*        Update the upper triangle of A11 (= A(1:k,1:k)) as */
17742 
17743 /*        A11 := A11 - U12*D*U12' = A11 - U12*W' */
17744 
17745 /*        computing blocks of NB columns at a time */
17746 
17747 	i__1 = -(*nb);
17748 	for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j +=
17749 		i__1) {
17750 /* Computing MIN */
17751 	    i__2 = *nb, i__3 = k - j + 1;
17752 	    jb = std::min(i__2,i__3);
17753 
17754 /*           Update the upper triangle of the diagonal block */
17755 
17756 	    i__2 = j + jb - 1;
17757 	    for (jj = j; jj <= i__2; ++jj) {
17758 		i__3 = jj - j + 1;
17759 		i__4 = *n - k;
17760 		dgemv_("No transpose", &i__3, &i__4, &c_b8, &a[j + (k + 1) *
17761 			a_dim1], lda, &w[jj + (kw + 1) * w_dim1], ldw, &c_b9,
17762 			&a[j + jj * a_dim1], &c__1);
17763 /* L40: */
17764 	    }
17765 
17766 /*           Update the rectangular superdiagonal block */
17767 
17768 	    i__2 = j - 1;
17769 	    i__3 = *n - k;
17770 	    dgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &c_b8, &a[(
17771 		    k + 1) * a_dim1 + 1], lda, &w[j + (kw + 1) * w_dim1], ldw,
17772 		     &c_b9, &a[j * a_dim1 + 1], lda);
17773 /* L50: */
17774 	}
17775 
17776 /*        Put U12 in standard form by partially undoing the interchanges */
17777 /*        in columns k+1:n */
17778 
17779 	j = k + 1;
17780 L60:
17781 	jj = j;
17782 	jp = ipiv[j];
17783 	if (jp < 0) {
17784 	    jp = -jp;
17785 	    ++j;
17786 	}
17787 	++j;
17788 	if (jp != jj && j <= *n) {
17789 	    i__1 = *n - j + 1;
17790 	    dswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
17791 	}
17792 	if (j <= *n) {
17793 	    goto L60;
17794 	}
17795 
17796 /*        Set KB to the number of columns factorized */
17797 
17798 	*kb = *n - k;
17799 
17800     } else {
17801 
17802 /*        Factorize the leading columns of A using the lower triangle */
17803 /*        of A and working forwards, and compute the matrix W = L21*D */
17804 /*        for use in updating A22 */
17805 
17806 /*        K is the main loop index, increasing from 1 in steps of 1 or 2 */
17807 
17808 	k = 1;
17809 L70:
17810 
17811 /*        Exit from loop */
17812 
17813 	if (k >= *nb && *nb < *n || k > *n) {
17814 	    goto L90;
17815 	}
17816 
17817 /*        Copy column K of A to column K of W and update it */
17818 
17819 	i__1 = *n - k + 1;
17820 	dcopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
17821 	i__1 = *n - k + 1;
17822 	i__2 = k - 1;
17823 	dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1], lda, &w[k
17824 		+ w_dim1], ldw, &c_b9, &w[k + k * w_dim1], &c__1);
17825 
17826 	kstep = 1;
17827 
17828 /*        Determine rows and columns to be interchanged and whether */
17829 /*        a 1-by-1 or 2-by-2 pivot block will be used */
17830 
17831 	absakk = (d__1 = w[k + k * w_dim1], abs(d__1));
17832 
17833 /*        IMAX is the row-index of the largest off-diagonal element in */
17834 /*        column K, and COLMAX is its absolute value */
17835 
17836 	if (k < *n) {
17837 	    i__1 = *n - k;
17838 	    imax = k + idamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
17839 	    colmax = (d__1 = w[imax + k * w_dim1], abs(d__1));
17840 	} else {
17841 	    colmax = 0.;
17842 	}
17843 
17844 	if (std::max(absakk,colmax) == 0.) {
17845 
17846 /*           Column K is zero: set INFO and continue */
17847 
17848 	    if (*info == 0) {
17849 		*info = k;
17850 	    }
17851 	    kp = k;
17852 	} else {
17853 	    if (absakk >= alpha * colmax) {
17854 
17855 /*              no interchange, use 1-by-1 pivot block */
17856 
17857 		kp = k;
17858 	    } else {
17859 
17860 /*              Copy column IMAX to column K+1 of W and update it */
17861 
17862 		i__1 = imax - k;
17863 		dcopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
17864 			w_dim1], &c__1);
17865 		i__1 = *n - imax + 1;
17866 		dcopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k +
17867 			1) * w_dim1], &c__1);
17868 		i__1 = *n - k + 1;
17869 		i__2 = k - 1;
17870 		dgemv_("No transpose", &i__1, &i__2, &c_b8, &a[k + a_dim1],
17871 			lda, &w[imax + w_dim1], ldw, &c_b9, &w[k + (k + 1) *
17872 			w_dim1], &c__1);
17873 
17874 /*              JMAX is the column-index of the largest off-diagonal */
17875 /*              element in row IMAX, and ROWMAX is its absolute value */
17876 
17877 		i__1 = imax - k;
17878 		jmax = k - 1 + idamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1)
17879 			;
17880 		rowmax = (d__1 = w[jmax + (k + 1) * w_dim1], abs(d__1));
17881 		if (imax < *n) {
17882 		    i__1 = *n - imax;
17883 		    jmax = imax + idamax_(&i__1, &w[imax + 1 + (k + 1) *
17884 			    w_dim1], &c__1);
17885 /* Computing MAX */
17886 		    d__2 = rowmax, d__3 = (d__1 = w[jmax + (k + 1) * w_dim1],
17887 			    abs(d__1));
17888 		    rowmax = std::max(d__2,d__3);
17889 		}
17890 
17891 		if (absakk >= alpha * colmax * (colmax / rowmax)) {
17892 
17893 /*                 no interchange, use 1-by-1 pivot block */
17894 
17895 		    kp = k;
17896 		} else if ((d__1 = w[imax + (k + 1) * w_dim1], abs(d__1)) >=
17897 			alpha * rowmax) {
17898 
17899 /*                 interchange rows and columns K and IMAX, use 1-by-1 */
17900 /*                 pivot block */
17901 
17902 		    kp = imax;
17903 
17904 /*                 copy column K+1 of W to column K */
17905 
17906 		    i__1 = *n - k + 1;
17907 		    dcopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
17908 			    w_dim1], &c__1);
17909 		} else {
17910 
17911 /*                 interchange rows and columns K+1 and IMAX, use 2-by-2 */
17912 /*                 pivot block */
17913 
17914 		    kp = imax;
17915 		    kstep = 2;
17916 		}
17917 	    }
17918 
17919 	    kk = k + kstep - 1;
17920 
17921 /*           Updated column KP is already stored in column KK of W */
17922 
17923 	    if (kp != kk) {
17924 
17925 /*              Copy non-updated column KK to column KP */
17926 
17927 		a[kp + k * a_dim1] = a[kk + k * a_dim1];
17928 		i__1 = kp - k - 1;
17929 		dcopy_(&i__1, &a[k + 1 + kk * a_dim1], &c__1, &a[kp + (k + 1)
17930 			* a_dim1], lda);
17931 		i__1 = *n - kp + 1;
17932 		dcopy_(&i__1, &a[kp + kk * a_dim1], &c__1, &a[kp + kp *
17933 			a_dim1], &c__1);
17934 
17935 /*              Interchange rows KK and KP in first KK columns of A and W */
17936 
17937 		dswap_(&kk, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
17938 		dswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
17939 	    }
17940 
17941 	    if (kstep == 1) {
17942 
17943 /*              1-by-1 pivot block D(k): column k of W now holds */
17944 
17945 /*              W(k) = L(k)*D(k) */
17946 
17947 /*              where L(k) is the k-th column of L */
17948 
17949 /*              Store L(k) in column k of A */
17950 
17951 		i__1 = *n - k + 1;
17952 		dcopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
17953 			c__1);
17954 		if (k < *n) {
17955 		    r1 = 1. / a[k + k * a_dim1];
17956 		    i__1 = *n - k;
17957 		    dscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
17958 		}
17959 	    } else {
17960 
17961 /*              2-by-2 pivot block D(k): columns k and k+1 of W now hold */
17962 
17963 /*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */
17964 
17965 /*              where L(k) and L(k+1) are the k-th and (k+1)-th columns */
17966 /*              of L */
17967 
17968 		if (k < *n - 1) {
17969 
17970 /*                 Store L(k) and L(k+1) in columns k and k+1 of A */
17971 
17972 		    d21 = w[k + 1 + k * w_dim1];
17973 		    d11 = w[k + 1 + (k + 1) * w_dim1] / d21;
17974 		    d22 = w[k + k * w_dim1] / d21;
17975 		    t = 1. / (d11 * d22 - 1.);
17976 		    d21 = t / d21;
17977 		    i__1 = *n;
17978 		    for (j = k + 2; j <= i__1; ++j) {
17979 			a[j + k * a_dim1] = d21 * (d11 * w[j + k * w_dim1] -
17980 				w[j + (k + 1) * w_dim1]);
17981 			a[j + (k + 1) * a_dim1] = d21 * (d22 * w[j + (k + 1) *
17982 				 w_dim1] - w[j + k * w_dim1]);
17983 /* L80: */
17984 		    }
17985 		}
17986 
17987 /*              Copy D(k) to A */
17988 
17989 		a[k + k * a_dim1] = w[k + k * w_dim1];
17990 		a[k + 1 + k * a_dim1] = w[k + 1 + k * w_dim1];
17991 		a[k + 1 + (k + 1) * a_dim1] = w[k + 1 + (k + 1) * w_dim1];
17992 	    }
17993 	}
17994 
17995 /*        Store details of the interchanges in IPIV */
17996 
17997 	if (kstep == 1) {
17998 	    ipiv[k] = kp;
17999 	} else {
18000 	    ipiv[k] = -kp;
18001 	    ipiv[k + 1] = -kp;
18002 	}
18003 
18004 /*        Increase K and return to the start of the main loop */
18005 
18006 	k += kstep;
18007 	goto L70;
18008 
18009 L90:
18010 
18011 /*        Update the lower triangle of A22 (= A(k:n,k:n)) as */
18012 
18013 /*        A22 := A22 - L21*D*L21' = A22 - L21*W' */
18014 
18015 /*        computing blocks of NB columns at a time */
18016 
18017 	i__1 = *n;
18018 	i__2 = *nb;
18019 	for (j = k; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
18020 /* Computing MIN */
18021 	    i__3 = *nb, i__4 = *n - j + 1;
18022 	    jb = std::min(i__3,i__4);
18023 
18024 /*           Update the lower triangle of the diagonal block */
18025 
18026 	    i__3 = j + jb - 1;
18027 	    for (jj = j; jj <= i__3; ++jj) {
18028 		i__4 = j + jb - jj;
18029 		i__5 = k - 1;
18030 		dgemv_("No transpose", &i__4, &i__5, &c_b8, &a[jj + a_dim1],
18031 			lda, &w[jj + w_dim1], ldw, &c_b9, &a[jj + jj * a_dim1]
18032 , &c__1);
18033 /* L100: */
18034 	    }
18035 
18036 /*           Update the rectangular subdiagonal block */
18037 
18038 	    if (j + jb <= *n) {
18039 		i__3 = *n - j - jb + 1;
18040 		i__4 = k - 1;
18041 		dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &c_b8,
18042 			&a[j + jb + a_dim1], lda, &w[j + w_dim1], ldw, &c_b9,
18043 			&a[j + jb + j * a_dim1], lda);
18044 	    }
18045 /* L110: */
18046 	}
18047 
18048 /*        Put L21 in standard form by partially undoing the interchanges */
18049 /*        in columns 1:k-1 */
18050 
18051 	j = k - 1;
18052 L120:
18053 	jj = j;
18054 	jp = ipiv[j];
18055 	if (jp < 0) {
18056 	    jp = -jp;
18057 	    --j;
18058 	}
18059 	--j;
18060 	if (jp != jj && j >= 1) {
18061 	    dswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
18062 	}
18063 	if (j >= 1) {
18064 	    goto L120;
18065 	}
18066 
18067 /*        Set KB to the number of columns factorized */
18068 
18069 	*kb = k - 1;
18070 
18071     }
18072     return 0;
18073 
18074 /*     End of DLASYF */
18075 
18076 } /* dlasyf_ */
18077 
dlat2s_(const char * uplo,integer * n,double * a,integer * lda,float * sa,integer * ldsa,integer * info)18078 int dlat2s_(const char *uplo, integer *n, double *a, integer *lda, float *sa, integer *ldsa, integer *info)
18079 {
18080     /* System generated locals */
18081     integer sa_dim1, sa_offset, a_dim1, a_offset, i__1, i__2;
18082 
18083     /* Local variables */
18084     integer i__, j;
18085     double rmax;
18086     bool upper;
18087 
18088 
18089 
18090 /*  -- LAPACK PROTOTYPE auxiliary routine (version 3.1.2) -- */
18091 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
18092 /*     May 2007 */
18093 
18094 /*     .. Scalar Arguments .. */
18095 /*     .. */
18096 /*     .. Array Arguments .. */
18097 /*     .. */
18098 
18099 /*  Purpose */
18100 /*  ======= */
18101 
18102 /*  DLAT2S converts a DOUBLE PRECISION triangular matrix, SA, to a SINGLE */
18103 /*  PRECISION triangular matrix, A. */
18104 
18105 /*  RMAX is the overflow for the SINGLE PRECISION arithmetic */
18106 /*  DLAS2S checks that all the entries of A are between -RMAX and */
18107 /*  RMAX. If not the convertion is aborted and a flag is raised. */
18108 
18109 /*  This is an auxiliary routine so there is no argument checking. */
18110 
18111 /*  Arguments */
18112 /*  ========= */
18113 
18114 /*  UPLO    (input) CHARACTER*1 */
18115 /*          = 'U':  A is upper triangular; */
18116 /*          = 'L':  A is lower triangular. */
18117 
18118 /*  N       (input) INTEGER */
18119 /*          The number of rows and columns of the matrix A.  N >= 0. */
18120 
18121 /*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
18122 /*          On entry, the N-by-N triangular coefficient matrix A. */
18123 
18124 /*  LDA     (input) INTEGER */
18125 /*          The leading dimension of the array A.  LDA >= max(1,N). */
18126 
18127 /*  SA      (output) REAL array, dimension (LDSA,N) */
18128 /*          Only the UPLO part of SA is referenced.  On exit, if INFO=0, */
18129 /*          the N-by-N coefficient matrix SA; if INFO>0, the content of */
18130 /*          the UPLO part of SA is unspecified. */
18131 
18132 /*  LDSA    (input) INTEGER */
18133 /*          The leading dimension of the array SA.  LDSA >= max(1,M). */
18134 
18135 /*  INFO    (output) INTEGER */
18136 /*          = 0:  successful exit. */
18137 /*          = 1:  an entry of the matrix A is greater than the SINGLE */
18138 /*                PRECISION overflow threshold, in this case, the content */
18139 /*                of the UPLO part of SA in exit is unspecified. */
18140 
18141 /*  ========= */
18142 
18143 /*     .. Local Scalars .. */
18144 /*     .. */
18145 /*     .. External Functions .. */
18146 /*     .. */
18147 /*     .. Executable Statements .. */
18148 
18149     /* Parameter adjustments */
18150     a_dim1 = *lda;
18151     a_offset = 1 + a_dim1;
18152     a -= a_offset;
18153     sa_dim1 = *ldsa;
18154     sa_offset = 1 + sa_dim1;
18155     sa -= sa_offset;
18156 
18157     /* Function Body */
18158     rmax = slamch_("O");
18159     upper = lsame_(uplo, "U");
18160     if (upper) {
18161 	i__1 = *n;
18162 	for (j = 1; j <= i__1; ++j) {
18163 	    i__2 = j;
18164 	    for (i__ = 1; i__ <= i__2; ++i__) {
18165 		if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax)
18166 			 {
18167 		    *info = 1;
18168 		    goto L50;
18169 		}
18170 		sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1];
18171 /* L10: */
18172 	    }
18173 /* L20: */
18174 	}
18175     } else {
18176 	i__1 = *n;
18177 	for (j = 1; j <= i__1; ++j) {
18178 	    i__2 = *n;
18179 	    for (i__ = j; i__ <= i__2; ++i__) {
18180 		if (a[i__ + j * a_dim1] < -rmax || a[i__ + j * a_dim1] > rmax)
18181 			 {
18182 		    *info = 1;
18183 		    goto L50;
18184 		}
18185 		sa[i__ + j * sa_dim1] = a[i__ + j * a_dim1];
18186 /* L30: */
18187 	    }
18188 /* L40: */
18189 	}
18190     }
18191 L50:
18192 
18193     return 0;
18194 
18195 /*     End of DLAT2S */
18196 
18197 } /* dlat2s_ */
18198 
dlatbs_(const char * uplo,const char * trans,const char * diag,const char * normin,integer * n,integer * kd,double * ab,integer * ldab,double * x,double * scale,double * cnorm,integer * info)18199 /* Subroutine */ int dlatbs_(const char *uplo, const char *trans, const char *diag, const char *
18200 	normin, integer *n, integer *kd, double *ab, integer *ldab,
18201 	double *x, double *scale, double *cnorm, integer *info)
18202 {
18203 	/* Table of constant values */
18204 	static integer c__1 = 1;
18205 	static double c_b36 = .5;
18206 
18207     /* System generated locals */
18208     integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
18209     double d__1, d__2, d__3;
18210 
18211     /* Local variables */
18212     integer i__, j;
18213     double xj, rec, tjj;
18214     integer jinc, jlen;
18215     double xbnd;
18216     integer imax;
18217     double tmax, tjjs, xmax, grow, sumj;
18218     integer maind;
18219     double tscal, uscal;
18220     integer jlast;
18221     bool upper;
18222     double bignum;
18223     bool notran;
18224     integer jfirst;
18225     double smlnum;
18226     bool nounit;
18227 
18228 
18229 /*  -- LAPACK auxiliary routine (version 3.1) -- */
18230 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
18231 /*     November 2006 */
18232 
18233 /*     .. Scalar Arguments .. */
18234 /*     .. */
18235 /*     .. Array Arguments .. */
18236 /*     .. */
18237 
18238 /*  Purpose */
18239 /*  ======= */
18240 
18241 /*  DLATBS solves one of the triangular systems */
18242 
18243 /*     A *x = s*b  or  A'*x = s*b */
18244 
18245 /*  with scaling to prevent overflow, where A is an upper or lower */
18246 /*  triangular band matrix.  Here A' denotes the transpose of A, x and b */
18247 /*  are n-element vectors, and s is a scaling factor, usually less than */
18248 /*  or equal to 1, chosen so that the components of x will be less than */
18249 /*  the overflow threshold.  If the unscaled problem will not cause */
18250 /*  overflow, the Level 2 BLAS routine DTBSV is called.  If the matrix A */
18251 /*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a */
18252 /*  non-trivial solution to A*x = 0 is returned. */
18253 
18254 /*  Arguments */
18255 /*  ========= */
18256 
18257 /*  UPLO    (input) CHARACTER*1 */
18258 /*          Specifies whether the matrix A is upper or lower triangular. */
18259 /*          = 'U':  Upper triangular */
18260 /*          = 'L':  Lower triangular */
18261 
18262 /*  TRANS   (input) CHARACTER*1 */
18263 /*          Specifies the operation applied to A. */
18264 /*          = 'N':  Solve A * x = s*b  (No transpose) */
18265 /*          = 'T':  Solve A'* x = s*b  (Transpose) */
18266 /*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose) */
18267 
18268 /*  DIAG    (input) CHARACTER*1 */
18269 /*          Specifies whether or not the matrix A is unit triangular. */
18270 /*          = 'N':  Non-unit triangular */
18271 /*          = 'U':  Unit triangular */
18272 
18273 /*  NORMIN  (input) CHARACTER*1 */
18274 /*          Specifies whether CNORM has been set or not. */
18275 /*          = 'Y':  CNORM contains the column norms on entry */
18276 /*          = 'N':  CNORM is not set on entry.  On exit, the norms will */
18277 /*                  be computed and stored in CNORM. */
18278 
18279 /*  N       (input) INTEGER */
18280 /*          The order of the matrix A.  N >= 0. */
18281 
18282 /*  KD      (input) INTEGER */
18283 /*          The number of subdiagonals or superdiagonals in the */
18284 /*          triangular matrix A.  KD >= 0. */
18285 
18286 /*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N) */
18287 /*          The upper or lower triangular band matrix A, stored in the */
18288 /*          first KD+1 rows of the array. The j-th column of A is stored */
18289 /*          in the j-th column of the array AB as follows: */
18290 /*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
18291 /*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). */
18292 
18293 /*  LDAB    (input) INTEGER */
18294 /*          The leading dimension of the array AB.  LDAB >= KD+1. */
18295 
18296 /*  X       (input/output) DOUBLE PRECISION array, dimension (N) */
18297 /*          On entry, the right hand side b of the triangular system. */
18298 /*          On exit, X is overwritten by the solution vector x. */
18299 
18300 /*  SCALE   (output) DOUBLE PRECISION */
18301 /*          The scaling factor s for the triangular system */
18302 /*             A * x = s*b  or  A'* x = s*b. */
18303 /*          If SCALE = 0, the matrix A is singular or badly scaled, and */
18304 /*          the vector x is an exact or approximate solution to A*x = 0. */
18305 
18306 /*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N) */
18307 
18308 /*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
18309 /*          contains the norm of the off-diagonal part of the j-th column */
18310 /*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
18311 /*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
18312 /*          must be greater than or equal to the 1-norm. */
18313 
18314 /*          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
18315 /*          returns the 1-norm of the offdiagonal part of the j-th column */
18316 /*          of A. */
18317 
18318 /*  INFO    (output) INTEGER */
18319 /*          = 0:  successful exit */
18320 /*          < 0:  if INFO = -k, the k-th argument had an illegal value */
18321 
18322 /*  Further Details */
18323 /*  ======= ======= */
18324 
18325 /*  A rough bound on x is computed; if that is less than overflow, DTBSV */
18326 /*  is called, otherwise, specific code is used which checks for possible */
18327 /*  overflow or divide-by-zero at every operation. */
18328 
18329 /*  A columnwise scheme is used for solving A*x = b.  The basic algorithm */
18330 /*  if A is lower triangular is */
18331 
18332 /*       x[1:n] := b[1:n] */
18333 /*       for j = 1, ..., n */
18334 /*            x(j) := x(j) / A(j,j) */
18335 /*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */
18336 /*       end */
18337 
18338 /*  Define bounds on the components of x after j iterations of the loop: */
18339 /*     M(j) = bound on x[1:j] */
18340 /*     G(j) = bound on x[j+1:n] */
18341 /*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */
18342 
18343 /*  Then for iteration j+1 we have */
18344 /*     M(j+1) <= G(j) / | A(j+1,j+1) | */
18345 /*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */
18346 /*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */
18347 
18348 /*  where CNORM(j+1) is greater than or equal to the infinity-norm of */
18349 /*  column j+1 of A, not counting the diagonal.  Hence */
18350 
18351 /*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */
18352 /*                  1<=i<=j */
18353 /*  and */
18354 
18355 /*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */
18356 /*                                   1<=i< j */
18357 
18358 /*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the */
18359 /*  reciprocal of the largest M(j), j=1,..,n, is larger than */
18360 /*  max(underflow, 1/overflow). */
18361 
18362 /*  The bound on x(j) is also used to determine when a step in the */
18363 /*  columnwise method can be performed without fear of overflow.  If */
18364 /*  the computed bound is greater than a large constant, x is scaled to */
18365 /*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to */
18366 /*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */
18367 
18368 /*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic */
18369 /*  algorithm for A upper triangular is */
18370 
18371 /*       for j = 1, ..., n */
18372 /*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */
18373 /*       end */
18374 
18375 /*  We simultaneously compute two bounds */
18376 /*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */
18377 /*       M(j) = bound on x(i), 1<=i<=j */
18378 
18379 /*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */
18380 /*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */
18381 /*  Then the bound on x(j) is */
18382 
18383 /*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */
18384 
18385 /*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */
18386 /*                      1<=i<=j */
18387 
18388 /*  and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater */
18389 /*  than max(underflow, 1/overflow). */
18390 
18391 /*  ===================================================================== */
18392 
18393 /*     .. Parameters .. */
18394 /*     .. */
18395 /*     .. Local Scalars .. */
18396 /*     .. */
18397 /*     .. External Functions .. */
18398 /*     .. */
18399 /*     .. External Subroutines .. */
18400 /*     .. */
18401 /*     .. Intrinsic Functions .. */
18402 /*     .. */
18403 /*     .. Executable Statements .. */
18404 
18405     /* Parameter adjustments */
18406     ab_dim1 = *ldab;
18407     ab_offset = 1 + ab_dim1;
18408     ab -= ab_offset;
18409     --x;
18410     --cnorm;
18411 
18412     /* Function Body */
18413     *info = 0;
18414     upper = lsame_(uplo, "U");
18415     notran = lsame_(trans, "N");
18416     nounit = lsame_(diag, "N");
18417 
18418 /*     Test the input parameters. */
18419 
18420     if (! upper && ! lsame_(uplo, "L")) {
18421 	*info = -1;
18422     } else if (! notran && ! lsame_(trans, "T") && !
18423 	    lsame_(trans, "C")) {
18424 	*info = -2;
18425     } else if (! nounit && ! lsame_(diag, "U")) {
18426 	*info = -3;
18427     } else if (! lsame_(normin, "Y") && ! lsame_(normin,
18428 	     "N")) {
18429 	*info = -4;
18430     } else if (*n < 0) {
18431 	*info = -5;
18432     } else if (*kd < 0) {
18433 	*info = -6;
18434     } else if (*ldab < *kd + 1) {
18435 	*info = -8;
18436     }
18437     if (*info != 0) {
18438 	i__1 = -(*info);
18439 	xerbla_("DLATBS", &i__1);
18440 	return 0;
18441     }
18442 
18443 /*     Quick return if possible */
18444 
18445     if (*n == 0) {
18446 	return 0;
18447     }
18448 
18449 /*     Determine machine dependent parameters to control overflow. */
18450 
18451     smlnum = dlamch_("Safe minimum") / dlamch_("Precision");
18452     bignum = 1. / smlnum;
18453     *scale = 1.;
18454 
18455     if (lsame_(normin, "N")) {
18456 
18457 /*        Compute the 1-norm of each column, not including the diagonal. */
18458 
18459 	if (upper) {
18460 
18461 /*           A is upper triangular. */
18462 
18463 	    i__1 = *n;
18464 	    for (j = 1; j <= i__1; ++j) {
18465 /* Computing MIN */
18466 		i__2 = *kd, i__3 = j - 1;
18467 		jlen = std::min(i__2,i__3);
18468 		cnorm[j] = dasum_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], &
18469 			c__1);
18470 /* L10: */
18471 	    }
18472 	} else {
18473 
18474 /*           A is lower triangular. */
18475 
18476 	    i__1 = *n;
18477 	    for (j = 1; j <= i__1; ++j) {
18478 /* Computing MIN */
18479 		i__2 = *kd, i__3 = *n - j;
18480 		jlen = std::min(i__2,i__3);
18481 		if (jlen > 0) {
18482 		    cnorm[j] = dasum_(&jlen, &ab[j * ab_dim1 + 2], &c__1);
18483 		} else {
18484 		    cnorm[j] = 0.;
18485 		}
18486 /* L20: */
18487 	    }
18488 	}
18489     }
18490 
18491 /*     Scale the column norms by TSCAL if the maximum element in CNORM is */
18492 /*     greater than BIGNUM. */
18493 
18494     imax = idamax_(n, &cnorm[1], &c__1);
18495     tmax = cnorm[imax];
18496     if (tmax <= bignum) {
18497 	tscal = 1.;
18498     } else {
18499 	tscal = 1. / (smlnum * tmax);
18500 	dscal_(n, &tscal, &cnorm[1], &c__1);
18501     }
18502 
18503 /*     Compute a bound on the computed solution vector to see if the */
18504 /*     Level 2 BLAS routine DTBSV can be used. */
18505 
18506     j = idamax_(n, &x[1], &c__1);
18507     xmax = (d__1 = x[j], abs(d__1));
18508     xbnd = xmax;
18509     if (notran) {
18510 
18511 /*        Compute the growth in A * x = b. */
18512 
18513 	if (upper) {
18514 	    jfirst = *n;
18515 	    jlast = 1;
18516 	    jinc = -1;
18517 	    maind = *kd + 1;
18518 	} else {
18519 	    jfirst = 1;
18520 	    jlast = *n;
18521 	    jinc = 1;
18522 	    maind = 1;
18523 	}
18524 
18525 	if (tscal != 1.) {
18526 	    grow = 0.;
18527 	    goto L50;
18528 	}
18529 
18530 	if (nounit) {
18531 
18532 /*           A is non-unit triangular. */
18533 
18534 /*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
18535 /*           Initially, G(0) = max{x(i), i=1,...,n}. */
18536 
18537 	    grow = 1. / std::max(xbnd,smlnum);
18538 	    xbnd = grow;
18539 	    i__1 = jlast;
18540 	    i__2 = jinc;
18541 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
18542 
18543 /*              Exit the loop if the growth factor is too small. */
18544 
18545 		if (grow <= smlnum) {
18546 		    goto L50;
18547 		}
18548 
18549 /*              M(j) = G(j-1) / abs(A(j,j)) */
18550 
18551 		tjj = (d__1 = ab[maind + j * ab_dim1], abs(d__1));
18552 /* Computing MIN */
18553 		d__1 = xbnd, d__2 = std::min(1.,tjj) * grow;
18554 		xbnd = std::min(d__1,d__2);
18555 		if (tjj + cnorm[j] >= smlnum) {
18556 
18557 /*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
18558 
18559 		    grow *= tjj / (tjj + cnorm[j]);
18560 		} else {
18561 
18562 /*                 G(j) could overflow, set GROW to 0. */
18563 
18564 		    grow = 0.;
18565 		}
18566 /* L30: */
18567 	    }
18568 	    grow = xbnd;
18569 	} else {
18570 
18571 /*           A is unit triangular. */
18572 
18573 /*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */
18574 
18575 /* Computing MIN */
18576 	    d__1 = 1., d__2 = 1. / std::max(xbnd,smlnum);
18577 	    grow = std::min(d__1,d__2);
18578 	    i__2 = jlast;
18579 	    i__1 = jinc;
18580 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
18581 
18582 /*              Exit the loop if the growth factor is too small. */
18583 
18584 		if (grow <= smlnum) {
18585 		    goto L50;
18586 		}
18587 
18588 /*              G(j) = G(j-1)*( 1 + CNORM(j) ) */
18589 
18590 		grow *= 1. / (cnorm[j] + 1.);
18591 /* L40: */
18592 	    }
18593 	}
18594 L50:
18595 
18596 	;
18597     } else {
18598 
18599 /*        Compute the growth in A' * x = b. */
18600 
18601 	if (upper) {
18602 	    jfirst = 1;
18603 	    jlast = *n;
18604 	    jinc = 1;
18605 	    maind = *kd + 1;
18606 	} else {
18607 	    jfirst = *n;
18608 	    jlast = 1;
18609 	    jinc = -1;
18610 	    maind = 1;
18611 	}
18612 
18613 	if (tscal != 1.) {
18614 	    grow = 0.;
18615 	    goto L80;
18616 	}
18617 
18618 	if (nounit) {
18619 
18620 /*           A is non-unit triangular. */
18621 
18622 /*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
18623 /*           Initially, M(0) = max{x(i), i=1,...,n}. */
18624 
18625 	    grow = 1. / std::max(xbnd,smlnum);
18626 	    xbnd = grow;
18627 	    i__1 = jlast;
18628 	    i__2 = jinc;
18629 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
18630 
18631 /*              Exit the loop if the growth factor is too small. */
18632 
18633 		if (grow <= smlnum) {
18634 		    goto L80;
18635 		}
18636 
18637 /*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
18638 
18639 		xj = cnorm[j] + 1.;
18640 /* Computing MIN */
18641 		d__1 = grow, d__2 = xbnd / xj;
18642 		grow = std::min(d__1,d__2);
18643 
18644 /*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
18645 
18646 		tjj = (d__1 = ab[maind + j * ab_dim1], abs(d__1));
18647 		if (xj > tjj) {
18648 		    xbnd *= tjj / xj;
18649 		}
18650 /* L60: */
18651 	    }
18652 	    grow = std::min(grow,xbnd);
18653 	} else {
18654 
18655 /*           A is unit triangular. */
18656 
18657 /*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */
18658 
18659 /* Computing MIN */
18660 	    d__1 = 1., d__2 = 1. / std::max(xbnd,smlnum);
18661 	    grow = std::min(d__1,d__2);
18662 	    i__2 = jlast;
18663 	    i__1 = jinc;
18664 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
18665 
18666 /*              Exit the loop if the growth factor is too small. */
18667 
18668 		if (grow <= smlnum) {
18669 		    goto L80;
18670 		}
18671 
18672 /*              G(j) = ( 1 + CNORM(j) )*G(j-1) */
18673 
18674 		xj = cnorm[j] + 1.;
18675 		grow /= xj;
18676 /* L70: */
18677 	    }
18678 	}
18679 L80:
18680 	;
18681     }
18682 
18683     if (grow * tscal > smlnum) {
18684 
18685 /*        Use the Level 2 BLAS solve if the reciprocal of the bound on */
18686 /*        elements of X is not too small. */
18687 
18688 	dtbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &x[1], &c__1);
18689     } else {
18690 
18691 /*        Use a Level 1 BLAS solve, scaling intermediate results. */
18692 
18693 	if (xmax > bignum) {
18694 
18695 /*           Scale X so that its components are less than or equal to */
18696 /*           BIGNUM in absolute value. */
18697 
18698 	    *scale = bignum / xmax;
18699 	    dscal_(n, scale, &x[1], &c__1);
18700 	    xmax = bignum;
18701 	}
18702 
18703 	if (notran) {
18704 
18705 /*           Solve A * x = b */
18706 
18707 	    i__1 = jlast;
18708 	    i__2 = jinc;
18709 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
18710 
18711 /*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
18712 
18713 		xj = (d__1 = x[j], abs(d__1));
18714 		if (nounit) {
18715 		    tjjs = ab[maind + j * ab_dim1] * tscal;
18716 		} else {
18717 		    tjjs = tscal;
18718 		    if (tscal == 1.) {
18719 			goto L100;
18720 		    }
18721 		}
18722 		tjj = abs(tjjs);
18723 		if (tjj > smlnum) {
18724 
18725 /*                    abs(A(j,j)) > SMLNUM: */
18726 
18727 		    if (tjj < 1.) {
18728 			if (xj > tjj * bignum) {
18729 
18730 /*                          Scale x by 1/b(j). */
18731 
18732 			    rec = 1. / xj;
18733 			    dscal_(n, &rec, &x[1], &c__1);
18734 			    *scale *= rec;
18735 			    xmax *= rec;
18736 			}
18737 		    }
18738 		    x[j] /= tjjs;
18739 		    xj = (d__1 = x[j], abs(d__1));
18740 		} else if (tjj > 0.) {
18741 
18742 /*                    0 < abs(A(j,j)) <= SMLNUM: */
18743 
18744 		    if (xj > tjj * bignum) {
18745 
18746 /*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */
18747 /*                       to avoid overflow when dividing by A(j,j). */
18748 
18749 			rec = tjj * bignum / xj;
18750 			if (cnorm[j] > 1.) {
18751 
18752 /*                          Scale by 1/CNORM(j) to avoid overflow when */
18753 /*                          multiplying x(j) times column j. */
18754 
18755 			    rec /= cnorm[j];
18756 			}
18757 			dscal_(n, &rec, &x[1], &c__1);
18758 			*scale *= rec;
18759 			xmax *= rec;
18760 		    }
18761 		    x[j] /= tjjs;
18762 		    xj = (d__1 = x[j], abs(d__1));
18763 		} else {
18764 
18765 /*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
18766 /*                    scale = 0, and compute a solution to A*x = 0. */
18767 
18768 		    i__3 = *n;
18769 		    for (i__ = 1; i__ <= i__3; ++i__) {
18770 			x[i__] = 0.;
18771 /* L90: */
18772 		    }
18773 		    x[j] = 1.;
18774 		    xj = 1.;
18775 		    *scale = 0.;
18776 		    xmax = 0.;
18777 		}
18778 L100:
18779 
18780 /*              Scale x if necessary to avoid overflow when adding a */
18781 /*              multiple of column j of A. */
18782 
18783 		if (xj > 1.) {
18784 		    rec = 1. / xj;
18785 		    if (cnorm[j] > (bignum - xmax) * rec) {
18786 
18787 /*                    Scale x by 1/(2*abs(x(j))). */
18788 
18789 			rec *= .5;
18790 			dscal_(n, &rec, &x[1], &c__1);
18791 			*scale *= rec;
18792 		    }
18793 		} else if (xj * cnorm[j] > bignum - xmax) {
18794 
18795 /*                 Scale x by 1/2. */
18796 
18797 		    dscal_(n, &c_b36, &x[1], &c__1);
18798 		    *scale *= .5;
18799 		}
18800 
18801 		if (upper) {
18802 		    if (j > 1) {
18803 
18804 /*                    Compute the update */
18805 /*                       x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - */
18806 /*                                             x(j)* A(max(1,j-kd):j-1,j) */
18807 
18808 /* Computing MIN */
18809 			i__3 = *kd, i__4 = j - 1;
18810 			jlen = std::min(i__3,i__4);
18811 			d__1 = -x[j] * tscal;
18812 			daxpy_(&jlen, &d__1, &ab[*kd + 1 - jlen + j * ab_dim1]
18813 , &c__1, &x[j - jlen], &c__1);
18814 			i__3 = j - 1;
18815 			i__ = idamax_(&i__3, &x[1], &c__1);
18816 			xmax = (d__1 = x[i__], abs(d__1));
18817 		    }
18818 		} else if (j < *n) {
18819 
18820 /*                 Compute the update */
18821 /*                    x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - */
18822 /*                                          x(j) * A(j+1:min(j+kd,n),j) */
18823 
18824 /* Computing MIN */
18825 		    i__3 = *kd, i__4 = *n - j;
18826 		    jlen = std::min(i__3,i__4);
18827 		    if (jlen > 0) {
18828 			d__1 = -x[j] * tscal;
18829 			daxpy_(&jlen, &d__1, &ab[j * ab_dim1 + 2], &c__1, &x[
18830 				j + 1], &c__1);
18831 		    }
18832 		    i__3 = *n - j;
18833 		    i__ = j + idamax_(&i__3, &x[j + 1], &c__1);
18834 		    xmax = (d__1 = x[i__], abs(d__1));
18835 		}
18836 /* L110: */
18837 	    }
18838 
18839 	} else {
18840 
18841 /*           Solve A' * x = b */
18842 
18843 	    i__2 = jlast;
18844 	    i__1 = jinc;
18845 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
18846 
18847 /*              Compute x(j) = b(j) - sum A(k,j)*x(k). */
18848 /*                                    k<>j */
18849 
18850 		xj = (d__1 = x[j], abs(d__1));
18851 		uscal = tscal;
18852 		rec = 1. / std::max(xmax,1.);
18853 		if (cnorm[j] > (bignum - xj) * rec) {
18854 
18855 /*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
18856 
18857 		    rec *= .5;
18858 		    if (nounit) {
18859 			tjjs = ab[maind + j * ab_dim1] * tscal;
18860 		    } else {
18861 			tjjs = tscal;
18862 		    }
18863 		    tjj = abs(tjjs);
18864 		    if (tjj > 1.) {
18865 
18866 /*                       Divide by A(j,j) when scaling x if A(j,j) > 1. */
18867 
18868 /* Computing MIN */
18869 			d__1 = 1., d__2 = rec * tjj;
18870 			rec = std::min(d__1,d__2);
18871 			uscal /= tjjs;
18872 		    }
18873 		    if (rec < 1.) {
18874 			dscal_(n, &rec, &x[1], &c__1);
18875 			*scale *= rec;
18876 			xmax *= rec;
18877 		    }
18878 		}
18879 
18880 		sumj = 0.;
18881 		if (uscal == 1.) {
18882 
18883 /*                 If the scaling needed for A in the dot product is 1, */
18884 /*                 call DDOT to perform the dot product. */
18885 
18886 		    if (upper) {
18887 /* Computing MIN */
18888 			i__3 = *kd, i__4 = j - 1;
18889 			jlen = std::min(i__3,i__4);
18890 			sumj = ddot_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1],
18891 				 &c__1, &x[j - jlen], &c__1);
18892 		    } else {
18893 /* Computing MIN */
18894 			i__3 = *kd, i__4 = *n - j;
18895 			jlen = std::min(i__3,i__4);
18896 			if (jlen > 0) {
18897 			    sumj = ddot_(&jlen, &ab[j * ab_dim1 + 2], &c__1, &
18898 				    x[j + 1], &c__1);
18899 			}
18900 		    }
18901 		} else {
18902 
18903 /*                 Otherwise, use in-line code for the dot product. */
18904 
18905 		    if (upper) {
18906 /* Computing MIN */
18907 			i__3 = *kd, i__4 = j - 1;
18908 			jlen = std::min(i__3,i__4);
18909 			i__3 = jlen;
18910 			for (i__ = 1; i__ <= i__3; ++i__) {
18911 			    sumj += ab[*kd + i__ - jlen + j * ab_dim1] *
18912 				    uscal * x[j - jlen - 1 + i__];
18913 /* L120: */
18914 			}
18915 		    } else {
18916 /* Computing MIN */
18917 			i__3 = *kd, i__4 = *n - j;
18918 			jlen = std::min(i__3,i__4);
18919 			i__3 = jlen;
18920 			for (i__ = 1; i__ <= i__3; ++i__) {
18921 			    sumj += ab[i__ + 1 + j * ab_dim1] * uscal * x[j +
18922 				    i__];
18923 /* L130: */
18924 			}
18925 		    }
18926 		}
18927 
18928 		if (uscal == tscal) {
18929 
18930 /*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */
18931 /*                 was not used to scale the dotproduct. */
18932 
18933 		    x[j] -= sumj;
18934 		    xj = (d__1 = x[j], abs(d__1));
18935 		    if (nounit) {
18936 
18937 /*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
18938 
18939 			tjjs = ab[maind + j * ab_dim1] * tscal;
18940 		    } else {
18941 			tjjs = tscal;
18942 			if (tscal == 1.) {
18943 			    goto L150;
18944 			}
18945 		    }
18946 		    tjj = abs(tjjs);
18947 		    if (tjj > smlnum) {
18948 
18949 /*                       abs(A(j,j)) > SMLNUM: */
18950 
18951 			if (tjj < 1.) {
18952 			    if (xj > tjj * bignum) {
18953 
18954 /*                             Scale X by 1/abs(x(j)). */
18955 
18956 				rec = 1. / xj;
18957 				dscal_(n, &rec, &x[1], &c__1);
18958 				*scale *= rec;
18959 				xmax *= rec;
18960 			    }
18961 			}
18962 			x[j] /= tjjs;
18963 		    } else if (tjj > 0.) {
18964 
18965 /*                       0 < abs(A(j,j)) <= SMLNUM: */
18966 
18967 			if (xj > tjj * bignum) {
18968 
18969 /*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
18970 
18971 			    rec = tjj * bignum / xj;
18972 			    dscal_(n, &rec, &x[1], &c__1);
18973 			    *scale *= rec;
18974 			    xmax *= rec;
18975 			}
18976 			x[j] /= tjjs;
18977 		    } else {
18978 
18979 /*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
18980 /*                       scale = 0, and compute a solution to A'*x = 0. */
18981 
18982 			i__3 = *n;
18983 			for (i__ = 1; i__ <= i__3; ++i__) {
18984 			    x[i__] = 0.;
18985 /* L140: */
18986 			}
18987 			x[j] = 1.;
18988 			*scale = 0.;
18989 			xmax = 0.;
18990 		    }
18991 L150:
18992 		    ;
18993 		} else {
18994 
18995 /*                 Compute x(j) := x(j) / A(j,j) - sumj if the dot */
18996 /*                 product has already been divided by 1/A(j,j). */
18997 
18998 		    x[j] = x[j] / tjjs - sumj;
18999 		}
19000 /* Computing MAX */
19001 		d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1));
19002 		xmax = std::max(d__2,d__3);
19003 /* L160: */
19004 	    }
19005 	}
19006 	*scale /= tscal;
19007     }
19008 
19009 /*     Scale the column norms by 1/TSCAL for return. */
19010 
19011     if (tscal != 1.) {
19012 	d__1 = 1. / tscal;
19013 	dscal_(n, &d__1, &cnorm[1], &c__1);
19014     }
19015 
19016     return 0;
19017 
19018 /*     End of DLATBS */
19019 
19020 } /* dlatbs_ */
19021 
dlatdf_(integer * ijob,integer * n,double * z__,integer * ldz,double * rhs,double * rdsum,double * rdscal,integer * ipiv,integer * jpiv)19022 /* Subroutine */ int dlatdf_(integer *ijob, integer *n, double *z__,
19023 	integer *ldz, double *rhs, double *rdsum, double *rdscal,
19024 	integer *ipiv, integer *jpiv)
19025 {
19026 	/* Table of constant values */
19027 	static integer c__1 = 1;
19028 	static integer c_n1 = -1;
19029 	static double c_b23 = 1.;
19030 	static double c_b37 = -1.;
19031 
19032     /* System generated locals */
19033     integer z_dim1, z_offset, i__1, i__2;
19034     double d__1;
19035 
19036     /* Local variables */
19037     integer i__, j, k;
19038     double bm, bp, xm[8], xp[8];
19039     integer info;
19040     double temp, work[32];
19041     double pmone;
19042     double sminu;
19043     integer iwork[8];
19044     double splus;
19045 
19046 /*  -- LAPACK auxiliary routine (version 3.1) -- */
19047 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
19048 /*     November 2006 */
19049 
19050 /*     .. Scalar Arguments .. */
19051 /*     .. */
19052 /*     .. Array Arguments .. */
19053 /*     .. */
19054 
19055 /*  Purpose */
19056 /*  ======= */
19057 
19058 /*  DLATDF uses the LU factorization of the n-by-n matrix Z computed by */
19059 /*  DGETC2 and computes a contribution to the reciprocal Dif-estimate */
19060 /*  by solving Z * x = b for x, and choosing the r.h.s. b such that */
19061 /*  the norm of x is as large as possible. On entry RHS = b holds the */
19062 /*  contribution from earlier solved sub-systems, and on return RHS = x. */
19063 
19064 /*  The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q, */
19065 /*  where P and Q are permutation matrices. L is lower triangular with */
19066 /*  unit diagonal elements and U is upper triangular. */
19067 
19068 /*  Arguments */
19069 /*  ========= */
19070 
19071 /*  IJOB    (input) INTEGER */
19072 /*          IJOB = 2: First compute an approximative null-vector e */
19073 /*              of Z using DGECON, e is normalized and solve for */
19074 /*              Zx = +-e - f with the sign giving the greater value */
19075 /*              of 2-norm(x). About 5 times as expensive as Default. */
19076 /*          IJOB .ne. 2: Local look ahead strategy where all entries of */
19077 /*              the r.h.s. b is choosen as either +1 or -1 (Default). */
19078 
19079 /*  N       (input) INTEGER */
19080 /*          The number of columns of the matrix Z. */
19081 
19082 /*  Z       (input) DOUBLE PRECISION array, dimension (LDZ, N) */
19083 /*          On entry, the LU part of the factorization of the n-by-n */
19084 /*          matrix Z computed by DGETC2:  Z = P * L * U * Q */
19085 
19086 /*  LDZ     (input) INTEGER */
19087 /*          The leading dimension of the array Z.  LDA >= max(1, N). */
19088 
19089 /*  RHS     (input/output) DOUBLE PRECISION array, dimension N. */
19090 /*          On entry, RHS contains contributions from other subsystems. */
19091 /*          On exit, RHS contains the solution of the subsystem with */
19092 /*          entries acoording to the value of IJOB (see above). */
19093 
19094 /*  RDSUM   (input/output) DOUBLE PRECISION */
19095 /*          On entry, the sum of squares of computed contributions to */
19096 /*          the Dif-estimate under computation by DTGSYL, where the */
19097 /*          scaling factor RDSCAL (see below) has been factored out. */
19098 /*          On exit, the corresponding sum of squares updated with the */
19099 /*          contributions from the current sub-system. */
19100 /*          If TRANS = 'T' RDSUM is not touched. */
19101 /*          NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. */
19102 
19103 /*  RDSCAL  (input/output) DOUBLE PRECISION */
19104 /*          On entry, scaling factor used to prevent overflow in RDSUM. */
19105 /*          On exit, RDSCAL is updated w.r.t. the current contributions */
19106 /*          in RDSUM. */
19107 /*          If TRANS = 'T', RDSCAL is not touched. */
19108 /*          NOTE: RDSCAL only makes sense when DTGSY2 is called by */
19109 /*                DTGSYL. */
19110 
19111 /*  IPIV    (input) INTEGER array, dimension (N). */
19112 /*          The pivot indices; for 1 <= i <= N, row i of the */
19113 /*          matrix has been interchanged with row IPIV(i). */
19114 
19115 /*  JPIV    (input) INTEGER array, dimension (N). */
19116 /*          The pivot indices; for 1 <= j <= N, column j of the */
19117 /*          matrix has been interchanged with column JPIV(j). */
19118 
19119 /*  Further Details */
19120 /*  =============== */
19121 
19122 /*  Based on contributions by */
19123 /*     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
19124 /*     Umea University, S-901 87 Umea, Sweden. */
19125 
19126 /*  This routine is a further developed implementation of algorithm */
19127 /*  BSOLVE in [1] using complete pivoting in the LU factorization. */
19128 
19129 /*  [1] Bo Kagstrom and Lars Westin, */
19130 /*      Generalized Schur Methods with Condition Estimators for */
19131 /*      Solving the Generalized Sylvester Equation, IEEE Transactions */
19132 /*      on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. */
19133 
19134 /*  [2] Peter Poromaa, */
19135 /*      On Efficient and Robust Estimators for the Separation */
19136 /*      between two Regular Matrix Pairs with Applications in */
19137 /*      Condition Estimation. Report IMINF-95.05, Departement of */
19138 /*      Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. */
19139 
19140 /*  ===================================================================== */
19141 
19142 /*     .. Parameters .. */
19143 /*     .. */
19144 /*     .. Local Scalars .. */
19145 /*     .. */
19146 /*     .. Local Arrays .. */
19147 /*     .. */
19148 /*     .. External Subroutines .. */
19149 /*     .. */
19150 /*     .. External Functions .. */
19151 /*     .. */
19152 /*     .. Intrinsic Functions .. */
19153 /*     .. */
19154 /*     .. Executable Statements .. */
19155 
19156     /* Parameter adjustments */
19157     z_dim1 = *ldz;
19158     z_offset = 1 + z_dim1;
19159     z__ -= z_offset;
19160     --rhs;
19161     --ipiv;
19162     --jpiv;
19163 
19164     /* Function Body */
19165     if (*ijob != 2) {
19166 
19167 /*        Apply permutations IPIV to RHS */
19168 
19169 	i__1 = *n - 1;
19170 	dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1);
19171 
19172 /*        Solve for L-part choosing RHS either to +1 or -1. */
19173 
19174 	pmone = -1.;
19175 
19176 	i__1 = *n - 1;
19177 	for (j = 1; j <= i__1; ++j) {
19178 	    bp = rhs[j] + 1.;
19179 	    bm = rhs[j] - 1.;
19180 	    splus = 1.;
19181 
19182 /*           Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and */
19183 /*           SMIN computed more efficiently than in BSOLVE [1]. */
19184 
19185 	    i__2 = *n - j;
19186 	    splus += ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &z__[j + 1
19187 		    + j * z_dim1], &c__1);
19188 	    i__2 = *n - j;
19189 	    sminu = ddot_(&i__2, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1],
19190 		     &c__1);
19191 	    splus *= rhs[j];
19192 	    if (splus > sminu) {
19193 		rhs[j] = bp;
19194 	    } else if (sminu > splus) {
19195 		rhs[j] = bm;
19196 	    } else {
19197 
19198 /*              In this case the updating sums are equal and we can */
19199 /*              choose RHS(J) +1 or -1. The first time this happens */
19200 /*              we choose -1, thereafter +1. This is a simple way to */
19201 /*              get good estimates of matrices like Byers well-known */
19202 /*              example (see [1]). (Not done in BSOLVE.) */
19203 
19204 		rhs[j] += pmone;
19205 		pmone = 1.;
19206 	    }
19207 
19208 /*           Compute the remaining r.h.s. */
19209 
19210 	    temp = -rhs[j];
19211 	    i__2 = *n - j;
19212 	    daxpy_(&i__2, &temp, &z__[j + 1 + j * z_dim1], &c__1, &rhs[j + 1],
19213 		     &c__1);
19214 
19215 /* L10: */
19216 	}
19217 
19218 /*        Solve for U-part, look-ahead for RHS(N) = +-1. This is not done */
19219 /*        in BSOLVE and will hopefully give us a better estimate because */
19220 /*        any ill-conditioning of the original matrix is transfered to U */
19221 /*        and not to L. U(N, N) is an approximation to sigma_min(LU). */
19222 
19223 	i__1 = *n - 1;
19224 	dcopy_(&i__1, &rhs[1], &c__1, xp, &c__1);
19225 	xp[*n - 1] = rhs[*n] + 1.;
19226 	rhs[*n] += -1.;
19227 	splus = 0.;
19228 	sminu = 0.;
19229 	for (i__ = *n; i__ >= 1; --i__) {
19230 	    temp = 1. / z__[i__ + i__ * z_dim1];
19231 	    xp[i__ - 1] *= temp;
19232 	    rhs[i__] *= temp;
19233 	    i__1 = *n;
19234 	    for (k = i__ + 1; k <= i__1; ++k) {
19235 		xp[i__ - 1] -= xp[k - 1] * (z__[i__ + k * z_dim1] * temp);
19236 		rhs[i__] -= rhs[k] * (z__[i__ + k * z_dim1] * temp);
19237 /* L20: */
19238 	    }
19239 	    splus += (d__1 = xp[i__ - 1], abs(d__1));
19240 	    sminu += (d__1 = rhs[i__], abs(d__1));
19241 /* L30: */
19242 	}
19243 	if (splus > sminu) {
19244 	    dcopy_(n, xp, &c__1, &rhs[1], &c__1);
19245 	}
19246 
19247 /*        Apply the permutations JPIV to the computed solution (RHS) */
19248 
19249 	i__1 = *n - 1;
19250 	dlaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1);
19251 
19252 /*        Compute the sum of squares */
19253 
19254 	dlassq_(n, &rhs[1], &c__1, rdscal, rdsum);
19255 
19256     } else {
19257 
19258 /*        IJOB = 2, Compute approximate nullvector XM of Z */
19259 
19260 	dgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, &
19261 		info);
19262 	dcopy_(n, &work[*n], &c__1, xm, &c__1);
19263 
19264 /*        Compute RHS */
19265 
19266 	i__1 = *n - 1;
19267 	dlaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1);
19268 	temp = 1. / sqrt(ddot_(n, xm, &c__1, xm, &c__1));
19269 	dscal_(n, &temp, xm, &c__1);
19270 	dcopy_(n, xm, &c__1, xp, &c__1);
19271 	daxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1);
19272 	daxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1);
19273 	dgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp);
19274 	dgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp);
19275 	if (dasum_(n, xp, &c__1) > dasum_(n, &rhs[1], &c__1)) {
19276 	    dcopy_(n, xp, &c__1, &rhs[1], &c__1);
19277 	}
19278 
19279 /*        Compute the sum of squares */
19280 
19281 	dlassq_(n, &rhs[1], &c__1, rdscal, rdsum);
19282 
19283     }
19284 
19285     return 0;
19286 
19287 /*     End of DLATDF */
19288 
19289 } /* dlatdf_ */
19290 
dlatps_(const char * uplo,const char * trans,const char * diag,const char * normin,integer * n,double * ap,double * x,double * scale,double * cnorm,integer * info)19291 /* Subroutine */ int dlatps_(const char *uplo, const char *trans, const char *diag, const char *
19292 	normin, integer *n, double *ap, double *x, double *scale,
19293 	double *cnorm, integer *info)
19294 {
19295 	/* Table of constant values */
19296 	static integer c__1 = 1;
19297 	static double c_b36 = .5;
19298 
19299     /* System generated locals */
19300     integer i__1, i__2, i__3;
19301     double d__1, d__2, d__3;
19302 
19303     /* Local variables */
19304     integer i__, j, ip;
19305     double xj, rec, tjj;
19306     integer jinc, jlen;
19307     double xbnd;
19308     integer imax;
19309     double tmax, tjjs, xmax, grow, sumj;
19310     double tscal, uscal;
19311     integer jlast;
19312     bool upper;
19313     double bignum;
19314     bool notran;
19315     integer jfirst;
19316     double smlnum;
19317     bool nounit;
19318 
19319 
19320 /*  -- LAPACK auxiliary routine (version 3.1) -- */
19321 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
19322 /*     November 2006 */
19323 
19324 /*     .. Scalar Arguments .. */
19325 /*     .. */
19326 /*     .. Array Arguments .. */
19327 /*     .. */
19328 
19329 /*  Purpose */
19330 /*  ======= */
19331 
19332 /*  DLATPS solves one of the triangular systems */
19333 
19334 /*     A *x = s*b  or  A'*x = s*b */
19335 
19336 /*  with scaling to prevent overflow, where A is an upper or lower */
19337 /*  triangular matrix stored in packed form.  Here A' denotes the */
19338 /*  transpose of A, x and b are n-element vectors, and s is a scaling */
19339 /*  factor, usually less than or equal to 1, chosen so that the */
19340 /*  components of x will be less than the overflow threshold.  If the */
19341 /*  unscaled problem will not cause overflow, the Level 2 BLAS routine */
19342 /*  DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), */
19343 /*  then s is set to 0 and a non-trivial solution to A*x = 0 is returned. */
19344 
19345 /*  Arguments */
19346 /*  ========= */
19347 
19348 /*  UPLO    (input) CHARACTER*1 */
19349 /*          Specifies whether the matrix A is upper or lower triangular. */
19350 /*          = 'U':  Upper triangular */
19351 /*          = 'L':  Lower triangular */
19352 
19353 /*  TRANS   (input) CHARACTER*1 */
19354 /*          Specifies the operation applied to A. */
19355 /*          = 'N':  Solve A * x = s*b  (No transpose) */
19356 /*          = 'T':  Solve A'* x = s*b  (Transpose) */
19357 /*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose) */
19358 
19359 /*  DIAG    (input) CHARACTER*1 */
19360 /*          Specifies whether or not the matrix A is unit triangular. */
19361 /*          = 'N':  Non-unit triangular */
19362 /*          = 'U':  Unit triangular */
19363 
19364 /*  NORMIN  (input) CHARACTER*1 */
19365 /*          Specifies whether CNORM has been set or not. */
19366 /*          = 'Y':  CNORM contains the column norms on entry */
19367 /*          = 'N':  CNORM is not set on entry.  On exit, the norms will */
19368 /*                  be computed and stored in CNORM. */
19369 
19370 /*  N       (input) INTEGER */
19371 /*          The order of the matrix A.  N >= 0. */
19372 
19373 /*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) */
19374 /*          The upper or lower triangular matrix A, packed columnwise in */
19375 /*          a linear array.  The j-th column of A is stored in the array */
19376 /*          AP as follows: */
19377 /*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */
19378 /*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */
19379 
19380 /*  X       (input/output) DOUBLE PRECISION array, dimension (N) */
19381 /*          On entry, the right hand side b of the triangular system. */
19382 /*          On exit, X is overwritten by the solution vector x. */
19383 
19384 /*  SCALE   (output) DOUBLE PRECISION */
19385 /*          The scaling factor s for the triangular system */
19386 /*             A * x = s*b  or  A'* x = s*b. */
19387 /*          If SCALE = 0, the matrix A is singular or badly scaled, and */
19388 /*          the vector x is an exact or approximate solution to A*x = 0. */
19389 
19390 /*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N) */
19391 
19392 /*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
19393 /*          contains the norm of the off-diagonal part of the j-th column */
19394 /*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
19395 /*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
19396 /*          must be greater than or equal to the 1-norm. */
19397 
19398 /*          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
19399 /*          returns the 1-norm of the offdiagonal part of the j-th column */
19400 /*          of A. */
19401 
19402 /*  INFO    (output) INTEGER */
19403 /*          = 0:  successful exit */
19404 /*          < 0:  if INFO = -k, the k-th argument had an illegal value */
19405 
19406 /*  Further Details */
19407 /*  ======= ======= */
19408 
19409 /*  A rough bound on x is computed; if that is less than overflow, DTPSV */
19410 /*  is called, otherwise, specific code is used which checks for possible */
19411 /*  overflow or divide-by-zero at every operation. */
19412 
19413 /*  A columnwise scheme is used for solving A*x = b.  The basic algorithm */
19414 /*  if A is lower triangular is */
19415 
19416 /*       x[1:n] := b[1:n] */
19417 /*       for j = 1, ..., n */
19418 /*            x(j) := x(j) / A(j,j) */
19419 /*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */
19420 /*       end */
19421 
19422 /*  Define bounds on the components of x after j iterations of the loop: */
19423 /*     M(j) = bound on x[1:j] */
19424 /*     G(j) = bound on x[j+1:n] */
19425 /*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */
19426 
19427 /*  Then for iteration j+1 we have */
19428 /*     M(j+1) <= G(j) / | A(j+1,j+1) | */
19429 /*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */
19430 /*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */
19431 
19432 /*  where CNORM(j+1) is greater than or equal to the infinity-norm of */
19433 /*  column j+1 of A, not counting the diagonal.  Hence */
19434 
19435 /*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */
19436 /*                  1<=i<=j */
19437 /*  and */
19438 
19439 /*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */
19440 /*                                   1<=i< j */
19441 
19442 /*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTPSV if the */
19443 /*  reciprocal of the largest M(j), j=1,..,n, is larger than */
19444 /*  max(underflow, 1/overflow). */
19445 
19446 /*  The bound on x(j) is also used to determine when a step in the */
19447 /*  columnwise method can be performed without fear of overflow.  If */
19448 /*  the computed bound is greater than a large constant, x is scaled to */
19449 /*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to */
19450 /*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */
19451 
19452 /*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic */
19453 /*  algorithm for A upper triangular is */
19454 
19455 /*       for j = 1, ..., n */
19456 /*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */
19457 /*       end */
19458 
19459 /*  We simultaneously compute two bounds */
19460 /*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */
19461 /*       M(j) = bound on x(i), 1<=i<=j */
19462 
19463 /*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */
19464 /*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */
19465 /*  Then the bound on x(j) is */
19466 
19467 /*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */
19468 
19469 /*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */
19470 /*                      1<=i<=j */
19471 
19472 /*  and we can safely call DTPSV if 1/M(n) and 1/G(n) are both greater */
19473 /*  than max(underflow, 1/overflow). */
19474 
19475 /*  ===================================================================== */
19476 
19477 /*     .. Parameters .. */
19478 /*     .. */
19479 /*     .. Local Scalars .. */
19480 /*     .. */
19481 /*     .. External Functions .. */
19482 /*     .. */
19483 /*     .. External Subroutines .. */
19484 /*     .. */
19485 /*     .. Intrinsic Functions .. */
19486 /*     .. */
19487 /*     .. Executable Statements .. */
19488 
19489     /* Parameter adjustments */
19490     --cnorm;
19491     --x;
19492     --ap;
19493 
19494     /* Function Body */
19495     *info = 0;
19496     upper = lsame_(uplo, "U");
19497     notran = lsame_(trans, "N");
19498     nounit = lsame_(diag, "N");
19499 
19500 /*     Test the input parameters. */
19501 
19502     if (! upper && ! lsame_(uplo, "L")) {
19503 	*info = -1;
19504     } else if (! notran && ! lsame_(trans, "T") && !
19505 	    lsame_(trans, "C")) {
19506 	*info = -2;
19507     } else if (! nounit && ! lsame_(diag, "U")) {
19508 	*info = -3;
19509     } else if (! lsame_(normin, "Y") && ! lsame_(normin,
19510 	     "N")) {
19511 	*info = -4;
19512     } else if (*n < 0) {
19513 	*info = -5;
19514     }
19515     if (*info != 0) {
19516 	i__1 = -(*info);
19517 	xerbla_("DLATPS", &i__1);
19518 	return 0;
19519     }
19520 
19521 /*     Quick return if possible */
19522 
19523     if (*n == 0) {
19524 	return 0;
19525     }
19526 
19527 /*     Determine machine dependent parameters to control overflow. */
19528 
19529     smlnum = dlamch_("Safe minimum") / dlamch_("Precision");
19530     bignum = 1. / smlnum;
19531     *scale = 1.;
19532 
19533     if (lsame_(normin, "N")) {
19534 
19535 /*        Compute the 1-norm of each column, not including the diagonal. */
19536 
19537 	if (upper) {
19538 
19539 /*           A is upper triangular. */
19540 
19541 	    ip = 1;
19542 	    i__1 = *n;
19543 	    for (j = 1; j <= i__1; ++j) {
19544 		i__2 = j - 1;
19545 		cnorm[j] = dasum_(&i__2, &ap[ip], &c__1);
19546 		ip += j;
19547 /* L10: */
19548 	    }
19549 	} else {
19550 
19551 /*           A is lower triangular. */
19552 
19553 	    ip = 1;
19554 	    i__1 = *n - 1;
19555 	    for (j = 1; j <= i__1; ++j) {
19556 		i__2 = *n - j;
19557 		cnorm[j] = dasum_(&i__2, &ap[ip + 1], &c__1);
19558 		ip = ip + *n - j + 1;
19559 /* L20: */
19560 	    }
19561 	    cnorm[*n] = 0.;
19562 	}
19563     }
19564 
19565 /*     Scale the column norms by TSCAL if the maximum element in CNORM is */
19566 /*     greater than BIGNUM. */
19567 
19568     imax = idamax_(n, &cnorm[1], &c__1);
19569     tmax = cnorm[imax];
19570     if (tmax <= bignum) {
19571 	tscal = 1.;
19572     } else {
19573 	tscal = 1. / (smlnum * tmax);
19574 	dscal_(n, &tscal, &cnorm[1], &c__1);
19575     }
19576 
19577 /*     Compute a bound on the computed solution vector to see if the */
19578 /*     Level 2 BLAS routine DTPSV can be used. */
19579 
19580     j = idamax_(n, &x[1], &c__1);
19581     xmax = (d__1 = x[j], abs(d__1));
19582     xbnd = xmax;
19583     if (notran) {
19584 
19585 /*        Compute the growth in A * x = b. */
19586 
19587 	if (upper) {
19588 	    jfirst = *n;
19589 	    jlast = 1;
19590 	    jinc = -1;
19591 	} else {
19592 	    jfirst = 1;
19593 	    jlast = *n;
19594 	    jinc = 1;
19595 	}
19596 
19597 	if (tscal != 1.) {
19598 	    grow = 0.;
19599 	    goto L50;
19600 	}
19601 
19602 	if (nounit) {
19603 
19604 /*           A is non-unit triangular. */
19605 
19606 /*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
19607 /*           Initially, G(0) = max{x(i), i=1,...,n}. */
19608 
19609 	    grow = 1. / std::max(xbnd,smlnum);
19610 	    xbnd = grow;
19611 	    ip = jfirst * (jfirst + 1) / 2;
19612 	    jlen = *n;
19613 	    i__1 = jlast;
19614 	    i__2 = jinc;
19615 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
19616 
19617 /*              Exit the loop if the growth factor is too small. */
19618 
19619 		if (grow <= smlnum) {
19620 		    goto L50;
19621 		}
19622 
19623 /*              M(j) = G(j-1) / abs(A(j,j)) */
19624 
19625 		tjj = (d__1 = ap[ip], abs(d__1));
19626 /* Computing MIN */
19627 		d__1 = xbnd, d__2 = std::min(1.,tjj) * grow;
19628 		xbnd = std::min(d__1,d__2);
19629 		if (tjj + cnorm[j] >= smlnum) {
19630 
19631 /*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
19632 
19633 		    grow *= tjj / (tjj + cnorm[j]);
19634 		} else {
19635 
19636 /*                 G(j) could overflow, set GROW to 0. */
19637 
19638 		    grow = 0.;
19639 		}
19640 		ip += jinc * jlen;
19641 		--jlen;
19642 /* L30: */
19643 	    }
19644 	    grow = xbnd;
19645 	} else {
19646 
19647 /*           A is unit triangular. */
19648 
19649 /*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */
19650 
19651 /* Computing MIN */
19652 	    d__1 = 1., d__2 = 1. / std::max(xbnd,smlnum);
19653 	    grow = std::min(d__1,d__2);
19654 	    i__2 = jlast;
19655 	    i__1 = jinc;
19656 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
19657 
19658 /*              Exit the loop if the growth factor is too small. */
19659 
19660 		if (grow <= smlnum) {
19661 		    goto L50;
19662 		}
19663 
19664 /*              G(j) = G(j-1)*( 1 + CNORM(j) ) */
19665 
19666 		grow *= 1. / (cnorm[j] + 1.);
19667 /* L40: */
19668 	    }
19669 	}
19670 L50:
19671 
19672 	;
19673     } else {
19674 
19675 /*        Compute the growth in A' * x = b. */
19676 
19677 	if (upper) {
19678 	    jfirst = 1;
19679 	    jlast = *n;
19680 	    jinc = 1;
19681 	} else {
19682 	    jfirst = *n;
19683 	    jlast = 1;
19684 	    jinc = -1;
19685 	}
19686 
19687 	if (tscal != 1.) {
19688 	    grow = 0.;
19689 	    goto L80;
19690 	}
19691 
19692 	if (nounit) {
19693 
19694 /*           A is non-unit triangular. */
19695 
19696 /*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
19697 /*           Initially, M(0) = max{x(i), i=1,...,n}. */
19698 
19699 	    grow = 1. / std::max(xbnd,smlnum);
19700 	    xbnd = grow;
19701 	    ip = jfirst * (jfirst + 1) / 2;
19702 	    jlen = 1;
19703 	    i__1 = jlast;
19704 	    i__2 = jinc;
19705 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
19706 
19707 /*              Exit the loop if the growth factor is too small. */
19708 
19709 		if (grow <= smlnum) {
19710 		    goto L80;
19711 		}
19712 
19713 /*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
19714 
19715 		xj = cnorm[j] + 1.;
19716 /* Computing MIN */
19717 		d__1 = grow, d__2 = xbnd / xj;
19718 		grow = std::min(d__1,d__2);
19719 
19720 /*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
19721 
19722 		tjj = (d__1 = ap[ip], abs(d__1));
19723 		if (xj > tjj) {
19724 		    xbnd *= tjj / xj;
19725 		}
19726 		++jlen;
19727 		ip += jinc * jlen;
19728 /* L60: */
19729 	    }
19730 	    grow = std::min(grow,xbnd);
19731 	} else {
19732 
19733 /*           A is unit triangular. */
19734 
19735 /*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */
19736 
19737 /* Computing MIN */
19738 	    d__1 = 1., d__2 = 1. / std::max(xbnd,smlnum);
19739 	    grow = std::min(d__1,d__2);
19740 	    i__2 = jlast;
19741 	    i__1 = jinc;
19742 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
19743 
19744 /*              Exit the loop if the growth factor is too small. */
19745 
19746 		if (grow <= smlnum) {
19747 		    goto L80;
19748 		}
19749 
19750 /*              G(j) = ( 1 + CNORM(j) )*G(j-1) */
19751 
19752 		xj = cnorm[j] + 1.;
19753 		grow /= xj;
19754 /* L70: */
19755 	    }
19756 	}
19757 L80:
19758 	;
19759     }
19760 
19761     if (grow * tscal > smlnum) {
19762 
19763 /*        Use the Level 2 BLAS solve if the reciprocal of the bound on */
19764 /*        elements of X is not too small. */
19765 
19766 	dtpsv_(uplo, trans, diag, n, &ap[1], &x[1], &c__1);
19767     } else {
19768 
19769 /*        Use a Level 1 BLAS solve, scaling intermediate results. */
19770 
19771 	if (xmax > bignum) {
19772 
19773 /*           Scale X so that its components are less than or equal to */
19774 /*           BIGNUM in absolute value. */
19775 
19776 	    *scale = bignum / xmax;
19777 	    dscal_(n, scale, &x[1], &c__1);
19778 	    xmax = bignum;
19779 	}
19780 
19781 	if (notran) {
19782 
19783 /*           Solve A * x = b */
19784 
19785 	    ip = jfirst * (jfirst + 1) / 2;
19786 	    i__1 = jlast;
19787 	    i__2 = jinc;
19788 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
19789 
19790 /*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
19791 
19792 		xj = (d__1 = x[j], abs(d__1));
19793 		if (nounit) {
19794 		    tjjs = ap[ip] * tscal;
19795 		} else {
19796 		    tjjs = tscal;
19797 		    if (tscal == 1.) {
19798 			goto L100;
19799 		    }
19800 		}
19801 		tjj = abs(tjjs);
19802 		if (tjj > smlnum) {
19803 
19804 /*                    abs(A(j,j)) > SMLNUM: */
19805 
19806 		    if (tjj < 1.) {
19807 			if (xj > tjj * bignum) {
19808 
19809 /*                          Scale x by 1/b(j). */
19810 
19811 			    rec = 1. / xj;
19812 			    dscal_(n, &rec, &x[1], &c__1);
19813 			    *scale *= rec;
19814 			    xmax *= rec;
19815 			}
19816 		    }
19817 		    x[j] /= tjjs;
19818 		    xj = (d__1 = x[j], abs(d__1));
19819 		} else if (tjj > 0.) {
19820 
19821 /*                    0 < abs(A(j,j)) <= SMLNUM: */
19822 
19823 		    if (xj > tjj * bignum) {
19824 
19825 /*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */
19826 /*                       to avoid overflow when dividing by A(j,j). */
19827 
19828 			rec = tjj * bignum / xj;
19829 			if (cnorm[j] > 1.) {
19830 
19831 /*                          Scale by 1/CNORM(j) to avoid overflow when */
19832 /*                          multiplying x(j) times column j. */
19833 
19834 			    rec /= cnorm[j];
19835 			}
19836 			dscal_(n, &rec, &x[1], &c__1);
19837 			*scale *= rec;
19838 			xmax *= rec;
19839 		    }
19840 		    x[j] /= tjjs;
19841 		    xj = (d__1 = x[j], abs(d__1));
19842 		} else {
19843 
19844 /*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
19845 /*                    scale = 0, and compute a solution to A*x = 0. */
19846 
19847 		    i__3 = *n;
19848 		    for (i__ = 1; i__ <= i__3; ++i__) {
19849 			x[i__] = 0.;
19850 /* L90: */
19851 		    }
19852 		    x[j] = 1.;
19853 		    xj = 1.;
19854 		    *scale = 0.;
19855 		    xmax = 0.;
19856 		}
19857 L100:
19858 
19859 /*              Scale x if necessary to avoid overflow when adding a */
19860 /*              multiple of column j of A. */
19861 
19862 		if (xj > 1.) {
19863 		    rec = 1. / xj;
19864 		    if (cnorm[j] > (bignum - xmax) * rec) {
19865 
19866 /*                    Scale x by 1/(2*abs(x(j))). */
19867 
19868 			rec *= .5;
19869 			dscal_(n, &rec, &x[1], &c__1);
19870 			*scale *= rec;
19871 		    }
19872 		} else if (xj * cnorm[j] > bignum - xmax) {
19873 
19874 /*                 Scale x by 1/2. */
19875 
19876 		    dscal_(n, &c_b36, &x[1], &c__1);
19877 		    *scale *= .5;
19878 		}
19879 
19880 		if (upper) {
19881 		    if (j > 1) {
19882 
19883 /*                    Compute the update */
19884 /*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */
19885 
19886 			i__3 = j - 1;
19887 			d__1 = -x[j] * tscal;
19888 			daxpy_(&i__3, &d__1, &ap[ip - j + 1], &c__1, &x[1], &
19889 				c__1);
19890 			i__3 = j - 1;
19891 			i__ = idamax_(&i__3, &x[1], &c__1);
19892 			xmax = (d__1 = x[i__], abs(d__1));
19893 		    }
19894 		    ip -= j;
19895 		} else {
19896 		    if (j < *n) {
19897 
19898 /*                    Compute the update */
19899 /*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */
19900 
19901 			i__3 = *n - j;
19902 			d__1 = -x[j] * tscal;
19903 			daxpy_(&i__3, &d__1, &ap[ip + 1], &c__1, &x[j + 1], &
19904 				c__1);
19905 			i__3 = *n - j;
19906 			i__ = j + idamax_(&i__3, &x[j + 1], &c__1);
19907 			xmax = (d__1 = x[i__], abs(d__1));
19908 		    }
19909 		    ip = ip + *n - j + 1;
19910 		}
19911 /* L110: */
19912 	    }
19913 
19914 	} else {
19915 
19916 /*           Solve A' * x = b */
19917 
19918 	    ip = jfirst * (jfirst + 1) / 2;
19919 	    jlen = 1;
19920 	    i__2 = jlast;
19921 	    i__1 = jinc;
19922 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
19923 
19924 /*              Compute x(j) = b(j) - sum A(k,j)*x(k). */
19925 /*                                    k<>j */
19926 
19927 		xj = (d__1 = x[j], abs(d__1));
19928 		uscal = tscal;
19929 		rec = 1. / std::max(xmax,1.);
19930 		if (cnorm[j] > (bignum - xj) * rec) {
19931 
19932 /*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
19933 
19934 		    rec *= .5;
19935 		    if (nounit) {
19936 			tjjs = ap[ip] * tscal;
19937 		    } else {
19938 			tjjs = tscal;
19939 		    }
19940 		    tjj = abs(tjjs);
19941 		    if (tjj > 1.) {
19942 
19943 /*                       Divide by A(j,j) when scaling x if A(j,j) > 1. */
19944 
19945 /* Computing MIN */
19946 			d__1 = 1., d__2 = rec * tjj;
19947 			rec = std::min(d__1,d__2);
19948 			uscal /= tjjs;
19949 		    }
19950 		    if (rec < 1.) {
19951 			dscal_(n, &rec, &x[1], &c__1);
19952 			*scale *= rec;
19953 			xmax *= rec;
19954 		    }
19955 		}
19956 
19957 		sumj = 0.;
19958 		if (uscal == 1.) {
19959 
19960 /*                 If the scaling needed for A in the dot product is 1, */
19961 /*                 call DDOT to perform the dot product. */
19962 
19963 		    if (upper) {
19964 			i__3 = j - 1;
19965 			sumj = ddot_(&i__3, &ap[ip - j + 1], &c__1, &x[1], &
19966 				c__1);
19967 		    } else if (j < *n) {
19968 			i__3 = *n - j;
19969 			sumj = ddot_(&i__3, &ap[ip + 1], &c__1, &x[j + 1], &
19970 				c__1);
19971 		    }
19972 		} else {
19973 
19974 /*                 Otherwise, use in-line code for the dot product. */
19975 
19976 		    if (upper) {
19977 			i__3 = j - 1;
19978 			for (i__ = 1; i__ <= i__3; ++i__) {
19979 			    sumj += ap[ip - j + i__] * uscal * x[i__];
19980 /* L120: */
19981 			}
19982 		    } else if (j < *n) {
19983 			i__3 = *n - j;
19984 			for (i__ = 1; i__ <= i__3; ++i__) {
19985 			    sumj += ap[ip + i__] * uscal * x[j + i__];
19986 /* L130: */
19987 			}
19988 		    }
19989 		}
19990 
19991 		if (uscal == tscal) {
19992 
19993 /*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */
19994 /*                 was not used to scale the dotproduct. */
19995 
19996 		    x[j] -= sumj;
19997 		    xj = (d__1 = x[j], abs(d__1));
19998 		    if (nounit) {
19999 
20000 /*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
20001 
20002 			tjjs = ap[ip] * tscal;
20003 		    } else {
20004 			tjjs = tscal;
20005 			if (tscal == 1.) {
20006 			    goto L150;
20007 			}
20008 		    }
20009 		    tjj = abs(tjjs);
20010 		    if (tjj > smlnum) {
20011 
20012 /*                       abs(A(j,j)) > SMLNUM: */
20013 
20014 			if (tjj < 1.) {
20015 			    if (xj > tjj * bignum) {
20016 
20017 /*                             Scale X by 1/abs(x(j)). */
20018 
20019 				rec = 1. / xj;
20020 				dscal_(n, &rec, &x[1], &c__1);
20021 				*scale *= rec;
20022 				xmax *= rec;
20023 			    }
20024 			}
20025 			x[j] /= tjjs;
20026 		    } else if (tjj > 0.) {
20027 
20028 /*                       0 < abs(A(j,j)) <= SMLNUM: */
20029 
20030 			if (xj > tjj * bignum) {
20031 
20032 /*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
20033 
20034 			    rec = tjj * bignum / xj;
20035 			    dscal_(n, &rec, &x[1], &c__1);
20036 			    *scale *= rec;
20037 			    xmax *= rec;
20038 			}
20039 			x[j] /= tjjs;
20040 		    } else {
20041 
20042 /*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
20043 /*                       scale = 0, and compute a solution to A'*x = 0. */
20044 
20045 			i__3 = *n;
20046 			for (i__ = 1; i__ <= i__3; ++i__) {
20047 			    x[i__] = 0.;
20048 /* L140: */
20049 			}
20050 			x[j] = 1.;
20051 			*scale = 0.;
20052 			xmax = 0.;
20053 		    }
20054 L150:
20055 		    ;
20056 		} else {
20057 
20058 /*                 Compute x(j) := x(j) / A(j,j)  - sumj if the dot */
20059 /*                 product has already been divided by 1/A(j,j). */
20060 
20061 		    x[j] = x[j] / tjjs - sumj;
20062 		}
20063 /* Computing MAX */
20064 		d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1));
20065 		xmax = std::max(d__2,d__3);
20066 		++jlen;
20067 		ip += jinc * jlen;
20068 /* L160: */
20069 	    }
20070 	}
20071 	*scale /= tscal;
20072     }
20073 
20074 /*     Scale the column norms by 1/TSCAL for return. */
20075 
20076     if (tscal != 1.) {
20077 	d__1 = 1. / tscal;
20078 	dscal_(n, &d__1, &cnorm[1], &c__1);
20079     }
20080 
20081     return 0;
20082 
20083 /*     End of DLATPS */
20084 
20085 } /* dlatps_ */
20086 
dlatrd_(const char * uplo,integer * n,integer * nb,double * a,integer * lda,double * e,double * tau,double * w,integer * ldw)20087 /* Subroutine */ int dlatrd_(const char *uplo, integer *n, integer *nb, double *
20088 	a, integer *lda, double *e, double *tau, double *w,
20089 	integer *ldw)
20090 {
20091 	/* Table of constant values */
20092 	static double c_b5 = -1.;
20093 	static double c_b6 = 1.;
20094 	static integer c__1 = 1;
20095 	static double c_b16 = 0.;
20096 
20097     /* System generated locals */
20098     integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
20099 
20100     /* Local variables */
20101     integer i__, iw;
20102     double alpha;
20103 
20104 /*  -- LAPACK auxiliary routine (version 3.1) -- */
20105 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
20106 /*     November 2006 */
20107 
20108 /*     .. Scalar Arguments .. */
20109 /*     .. */
20110 /*     .. Array Arguments .. */
20111 /*     .. */
20112 
20113 /*  Purpose */
20114 /*  ======= */
20115 
20116 /*  DLATRD reduces NB rows and columns of a real symmetric matrix A to */
20117 /*  symmetric tridiagonal form by an orthogonal similarity */
20118 /*  transformation Q' * A * Q, and returns the matrices V and W which are */
20119 /*  needed to apply the transformation to the unreduced part of A. */
20120 
20121 /*  If UPLO = 'U', DLATRD reduces the last NB rows and columns of a */
20122 /*  matrix, of which the upper triangle is supplied; */
20123 /*  if UPLO = 'L', DLATRD reduces the first NB rows and columns of a */
20124 /*  matrix, of which the lower triangle is supplied. */
20125 
20126 /*  This is an auxiliary routine called by DSYTRD. */
20127 
20128 /*  Arguments */
20129 /*  ========= */
20130 
20131 /*  UPLO    (input) CHARACTER*1 */
20132 /*          Specifies whether the upper or lower triangular part of the */
20133 /*          symmetric matrix A is stored: */
20134 /*          = 'U': Upper triangular */
20135 /*          = 'L': Lower triangular */
20136 
20137 /*  N       (input) INTEGER */
20138 /*          The order of the matrix A. */
20139 
20140 /*  NB      (input) INTEGER */
20141 /*          The number of rows and columns to be reduced. */
20142 
20143 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
20144 /*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
20145 /*          n-by-n upper triangular part of A contains the upper */
20146 /*          triangular part of the matrix A, and the strictly lower */
20147 /*          triangular part of A is not referenced.  If UPLO = 'L', the */
20148 /*          leading n-by-n lower triangular part of A contains the lower */
20149 /*          triangular part of the matrix A, and the strictly upper */
20150 /*          triangular part of A is not referenced. */
20151 /*          On exit: */
20152 /*          if UPLO = 'U', the last NB columns have been reduced to */
20153 /*            tridiagonal form, with the diagonal elements overwriting */
20154 /*            the diagonal elements of A; the elements above the diagonal */
20155 /*            with the array TAU, represent the orthogonal matrix Q as a */
20156 /*            product of elementary reflectors; */
20157 /*          if UPLO = 'L', the first NB columns have been reduced to */
20158 /*            tridiagonal form, with the diagonal elements overwriting */
20159 /*            the diagonal elements of A; the elements below the diagonal */
20160 /*            with the array TAU, represent the  orthogonal matrix Q as a */
20161 /*            product of elementary reflectors. */
20162 /*          See Further Details. */
20163 
20164 /*  LDA     (input) INTEGER */
20165 /*          The leading dimension of the array A.  LDA >= (1,N). */
20166 
20167 /*  E       (output) DOUBLE PRECISION array, dimension (N-1) */
20168 /*          If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal */
20169 /*          elements of the last NB columns of the reduced matrix; */
20170 /*          if UPLO = 'L', E(1:nb) contains the subdiagonal elements of */
20171 /*          the first NB columns of the reduced matrix. */
20172 
20173 /*  TAU     (output) DOUBLE PRECISION array, dimension (N-1) */
20174 /*          The scalar factors of the elementary reflectors, stored in */
20175 /*          TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. */
20176 /*          See Further Details. */
20177 
20178 /*  W       (output) DOUBLE PRECISION array, dimension (LDW,NB) */
20179 /*          The n-by-nb matrix W required to update the unreduced part */
20180 /*          of A. */
20181 
20182 /*  LDW     (input) INTEGER */
20183 /*          The leading dimension of the array W. LDW >= max(1,N). */
20184 
20185 /*  Further Details */
20186 /*  =============== */
20187 
20188 /*  If UPLO = 'U', the matrix Q is represented as a product of elementary */
20189 /*  reflectors */
20190 
20191 /*     Q = H(n) H(n-1) . . . H(n-nb+1). */
20192 
20193 /*  Each H(i) has the form */
20194 
20195 /*     H(i) = I - tau * v * v' */
20196 
20197 /*  where tau is a real scalar, and v is a real vector with */
20198 /*  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), */
20199 /*  and tau in TAU(i-1). */
20200 
20201 /*  If UPLO = 'L', the matrix Q is represented as a product of elementary */
20202 /*  reflectors */
20203 
20204 /*     Q = H(1) H(2) . . . H(nb). */
20205 
20206 /*  Each H(i) has the form */
20207 
20208 /*     H(i) = I - tau * v * v' */
20209 
20210 /*  where tau is a real scalar, and v is a real vector with */
20211 /*  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), */
20212 /*  and tau in TAU(i). */
20213 
20214 /*  The elements of the vectors v together form the n-by-nb matrix V */
20215 /*  which is needed, with W, to apply the transformation to the unreduced */
20216 /*  part of the matrix, using a symmetric rank-2k update of the form: */
20217 /*  A := A - V*W' - W*V'. */
20218 
20219 /*  The contents of A on exit are illustrated by the following examples */
20220 /*  with n = 5 and nb = 2: */
20221 
20222 /*  if UPLO = 'U':                       if UPLO = 'L': */
20223 
20224 /*    (  a   a   a   v4  v5 )              (  d                  ) */
20225 /*    (      a   a   v4  v5 )              (  1   d              ) */
20226 /*    (          a   1   v5 )              (  v1  1   a          ) */
20227 /*    (              d   1  )              (  v1  v2  a   a      ) */
20228 /*    (                  d  )              (  v1  v2  a   a   a  ) */
20229 
20230 /*  where d denotes a diagonal element of the reduced matrix, a denotes */
20231 /*  an element of the original matrix that is unchanged, and vi denotes */
20232 /*  an element of the vector defining H(i). */
20233 
20234 /*  ===================================================================== */
20235 
20236 /*     .. Parameters .. */
20237 /*     .. */
20238 /*     .. Local Scalars .. */
20239 /*     .. */
20240 /*     .. External Subroutines .. */
20241 /*     .. */
20242 /*     .. External Functions .. */
20243 /*     .. */
20244 /*     .. Intrinsic Functions .. */
20245 /*     .. */
20246 /*     .. Executable Statements .. */
20247 
20248 /*     Quick return if possible */
20249 
20250     /* Parameter adjustments */
20251     a_dim1 = *lda;
20252     a_offset = 1 + a_dim1;
20253     a -= a_offset;
20254     --e;
20255     --tau;
20256     w_dim1 = *ldw;
20257     w_offset = 1 + w_dim1;
20258     w -= w_offset;
20259 
20260     /* Function Body */
20261     if (*n <= 0) {
20262 	return 0;
20263     }
20264 
20265     if (lsame_(uplo, "U")) {
20266 
20267 /*        Reduce last NB columns of upper triangle */
20268 
20269 	i__1 = *n - *nb + 1;
20270 	for (i__ = *n; i__ >= i__1; --i__) {
20271 	    iw = i__ - *n + *nb;
20272 	    if (i__ < *n) {
20273 
20274 /*              Update A(1:i,i) */
20275 
20276 		i__2 = *n - i__;
20277 		dgemv_("No transpose", &i__, &i__2, &c_b5, &a[(i__ + 1) *
20278 			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
20279 			c_b6, &a[i__ * a_dim1 + 1], &c__1);
20280 		i__2 = *n - i__;
20281 		dgemv_("No transpose", &i__, &i__2, &c_b5, &w[(iw + 1) *
20282 			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
20283 			c_b6, &a[i__ * a_dim1 + 1], &c__1);
20284 	    }
20285 	    if (i__ > 1) {
20286 
20287 /*              Generate elementary reflector H(i) to annihilate */
20288 /*              A(1:i-2,i) */
20289 
20290 		i__2 = i__ - 1;
20291 		dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
20292 			1], &c__1, &tau[i__ - 1]);
20293 		e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
20294 		a[i__ - 1 + i__ * a_dim1] = 1.;
20295 
20296 /*              Compute W(1:i-1,i) */
20297 
20298 		i__2 = i__ - 1;
20299 		dsymv_("Upper", &i__2, &c_b6, &a[a_offset], lda, &a[i__ *
20300 			a_dim1 + 1], &c__1, &c_b16, &w[iw * w_dim1 + 1], &
20301 			c__1);
20302 		if (i__ < *n) {
20303 		    i__2 = i__ - 1;
20304 		    i__3 = *n - i__;
20305 		    dgemv_("Transpose", &i__2, &i__3, &c_b6, &w[(iw + 1) *
20306 			    w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
20307 			    c_b16, &w[i__ + 1 + iw * w_dim1], &c__1);
20308 		    i__2 = i__ - 1;
20309 		    i__3 = *n - i__;
20310 		    dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[(i__ + 1) *
20311 			     a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
20312 			    c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1);
20313 		    i__2 = i__ - 1;
20314 		    i__3 = *n - i__;
20315 		    dgemv_("Transpose", &i__2, &i__3, &c_b6, &a[(i__ + 1) *
20316 			    a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
20317 			    c_b16, &w[i__ + 1 + iw * w_dim1], &c__1);
20318 		    i__2 = i__ - 1;
20319 		    i__3 = *n - i__;
20320 		    dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[(iw + 1) *
20321 			    w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
20322 			    c__1, &c_b6, &w[iw * w_dim1 + 1], &c__1);
20323 		}
20324 		i__2 = i__ - 1;
20325 		dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
20326 		i__2 = i__ - 1;
20327 		alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1],
20328 			 &c__1, &a[i__ * a_dim1 + 1], &c__1);
20329 		i__2 = i__ - 1;
20330 		daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
20331 			w_dim1 + 1], &c__1);
20332 	    }
20333 
20334 /* L10: */
20335 	}
20336     } else {
20337 
20338 /*        Reduce first NB columns of lower triangle */
20339 
20340 	i__1 = *nb;
20341 	for (i__ = 1; i__ <= i__1; ++i__) {
20342 
20343 /*           Update A(i:n,i) */
20344 
20345 	    i__2 = *n - i__ + 1;
20346 	    i__3 = i__ - 1;
20347 	    dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + a_dim1], lda,
20348 		     &w[i__ + w_dim1], ldw, &c_b6, &a[i__ + i__ * a_dim1], &
20349 		    c__1);
20350 	    i__2 = *n - i__ + 1;
20351 	    i__3 = i__ - 1;
20352 	    dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + w_dim1], ldw,
20353 		     &a[i__ + a_dim1], lda, &c_b6, &a[i__ + i__ * a_dim1], &
20354 		    c__1);
20355 	    if (i__ < *n) {
20356 
20357 /*              Generate elementary reflector H(i) to annihilate */
20358 /*              A(i+2:n,i) */
20359 
20360 		i__2 = *n - i__;
20361 /* Computing MIN */
20362 		i__3 = i__ + 2;
20363 		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[std::min(i__3, *n)+
20364 			i__ * a_dim1], &c__1, &tau[i__]);
20365 		e[i__] = a[i__ + 1 + i__ * a_dim1];
20366 		a[i__ + 1 + i__ * a_dim1] = 1.;
20367 
20368 /*              Compute W(i+1:n,i) */
20369 
20370 		i__2 = *n - i__;
20371 		dsymv_("Lower", &i__2, &c_b6, &a[i__ + 1 + (i__ + 1) * a_dim1]
20372 , lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[
20373 			i__ + 1 + i__ * w_dim1], &c__1);
20374 		i__2 = *n - i__;
20375 		i__3 = i__ - 1;
20376 		dgemv_("Transpose", &i__2, &i__3, &c_b6, &w[i__ + 1 + w_dim1],
20377 			 ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[
20378 			i__ * w_dim1 + 1], &c__1);
20379 		i__2 = *n - i__;
20380 		i__3 = i__ - 1;
20381 		dgemv_("No transpose", &i__2, &i__3, &c_b5, &a[i__ + 1 +
20382 			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[
20383 			i__ + 1 + i__ * w_dim1], &c__1);
20384 		i__2 = *n - i__;
20385 		i__3 = i__ - 1;
20386 		dgemv_("Transpose", &i__2, &i__3, &c_b6, &a[i__ + 1 + a_dim1],
20387 			 lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b16, &w[
20388 			i__ * w_dim1 + 1], &c__1);
20389 		i__2 = *n - i__;
20390 		i__3 = i__ - 1;
20391 		dgemv_("No transpose", &i__2, &i__3, &c_b5, &w[i__ + 1 +
20392 			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b6, &w[
20393 			i__ + 1 + i__ * w_dim1], &c__1);
20394 		i__2 = *n - i__;
20395 		dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
20396 		i__2 = *n - i__;
20397 		alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ *
20398 			w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
20399 		i__2 = *n - i__;
20400 		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
20401 			i__ + 1 + i__ * w_dim1], &c__1);
20402 	    }
20403 
20404 /* L20: */
20405 	}
20406     }
20407 
20408     return 0;
20409 
20410 /*     End of DLATRD */
20411 
20412 } /* dlatrd_ */
20413 
dlatrs_(const char * uplo,const char * trans,const char * diag,const char * normin,integer * n,double * a,integer * lda,double * x,double * scale,double * cnorm,integer * info)20414 /* Subroutine */ int dlatrs_(const char *uplo, const char *trans, const char *diag, const char *
20415 	normin, integer *n, double *a, integer *lda, double *x,
20416 	double *scale, double *cnorm, integer *info)
20417 {
20418 	/* Table of constant values */
20419 	static integer c__1 = 1;
20420 	static double c_b36 = .5;
20421 
20422     /* System generated locals */
20423     integer a_dim1, a_offset, i__1, i__2, i__3;
20424     double d__1, d__2, d__3;
20425 
20426     /* Local variables */
20427     integer i__, j;
20428     double xj, rec, tjj;
20429     integer jinc;
20430     double xbnd;
20431     integer imax;
20432     double tmax, tjjs, xmax, grow, sumj;
20433     double tscal, uscal;
20434     integer jlast;
20435     bool upper;
20436     double bignum;
20437     bool notran;
20438     integer jfirst;
20439     double smlnum;
20440     bool nounit;
20441 
20442 
20443 /*  -- LAPACK auxiliary routine (version 3.1) -- */
20444 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
20445 /*     November 2006 */
20446 
20447 /*     .. Scalar Arguments .. */
20448 /*     .. */
20449 /*     .. Array Arguments .. */
20450 /*     .. */
20451 
20452 /*  Purpose */
20453 /*  ======= */
20454 
20455 /*  DLATRS solves one of the triangular systems */
20456 
20457 /*     A *x = s*b  or  A'*x = s*b */
20458 
20459 /*  with scaling to prevent overflow.  Here A is an upper or lower */
20460 /*  triangular matrix, A' denotes the transpose of A, x and b are */
20461 /*  n-element vectors, and s is a scaling factor, usually less than */
20462 /*  or equal to 1, chosen so that the components of x will be less than */
20463 /*  the overflow threshold.  If the unscaled problem will not cause */
20464 /*  overflow, the Level 2 BLAS routine DTRSV is called.  If the matrix A */
20465 /*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a */
20466 /*  non-trivial solution to A*x = 0 is returned. */
20467 
20468 /*  Arguments */
20469 /*  ========= */
20470 
20471 /*  UPLO    (input) CHARACTER*1 */
20472 /*          Specifies whether the matrix A is upper or lower triangular. */
20473 /*          = 'U':  Upper triangular */
20474 /*          = 'L':  Lower triangular */
20475 
20476 /*  TRANS   (input) CHARACTER*1 */
20477 /*          Specifies the operation applied to A. */
20478 /*          = 'N':  Solve A * x = s*b  (No transpose) */
20479 /*          = 'T':  Solve A'* x = s*b  (Transpose) */
20480 /*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose) */
20481 
20482 /*  DIAG    (input) CHARACTER*1 */
20483 /*          Specifies whether or not the matrix A is unit triangular. */
20484 /*          = 'N':  Non-unit triangular */
20485 /*          = 'U':  Unit triangular */
20486 
20487 /*  NORMIN  (input) CHARACTER*1 */
20488 /*          Specifies whether CNORM has been set or not. */
20489 /*          = 'Y':  CNORM contains the column norms on entry */
20490 /*          = 'N':  CNORM is not set on entry.  On exit, the norms will */
20491 /*                  be computed and stored in CNORM. */
20492 
20493 /*  N       (input) INTEGER */
20494 /*          The order of the matrix A.  N >= 0. */
20495 
20496 /*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
20497 /*          The triangular matrix A.  If UPLO = 'U', the leading n by n */
20498 /*          upper triangular part of the array A contains the upper */
20499 /*          triangular matrix, and the strictly lower triangular part of */
20500 /*          A is not referenced.  If UPLO = 'L', the leading n by n lower */
20501 /*          triangular part of the array A contains the lower triangular */
20502 /*          matrix, and the strictly upper triangular part of A is not */
20503 /*          referenced.  If DIAG = 'U', the diagonal elements of A are */
20504 /*          also not referenced and are assumed to be 1. */
20505 
20506 /*  LDA     (input) INTEGER */
20507 /*          The leading dimension of the array A.  LDA >= max (1,N). */
20508 
20509 /*  X       (input/output) DOUBLE PRECISION array, dimension (N) */
20510 /*          On entry, the right hand side b of the triangular system. */
20511 /*          On exit, X is overwritten by the solution vector x. */
20512 
20513 /*  SCALE   (output) DOUBLE PRECISION */
20514 /*          The scaling factor s for the triangular system */
20515 /*             A * x = s*b  or  A'* x = s*b. */
20516 /*          If SCALE = 0, the matrix A is singular or badly scaled, and */
20517 /*          the vector x is an exact or approximate solution to A*x = 0. */
20518 
20519 /*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N) */
20520 
20521 /*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
20522 /*          contains the norm of the off-diagonal part of the j-th column */
20523 /*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
20524 /*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
20525 /*          must be greater than or equal to the 1-norm. */
20526 
20527 /*          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
20528 /*          returns the 1-norm of the offdiagonal part of the j-th column */
20529 /*          of A. */
20530 
20531 /*  INFO    (output) INTEGER */
20532 /*          = 0:  successful exit */
20533 /*          < 0:  if INFO = -k, the k-th argument had an illegal value */
20534 
20535 /*  Further Details */
20536 /*  ======= ======= */
20537 
20538 /*  A rough bound on x is computed; if that is less than overflow, DTRSV */
20539 /*  is called, otherwise, specific code is used which checks for possible */
20540 /*  overflow or divide-by-zero at every operation. */
20541 
20542 /*  A columnwise scheme is used for solving A*x = b.  The basic algorithm */
20543 /*  if A is lower triangular is */
20544 
20545 /*       x[1:n] := b[1:n] */
20546 /*       for j = 1, ..., n */
20547 /*            x(j) := x(j) / A(j,j) */
20548 /*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */
20549 /*       end */
20550 
20551 /*  Define bounds on the components of x after j iterations of the loop: */
20552 /*     M(j) = bound on x[1:j] */
20553 /*     G(j) = bound on x[j+1:n] */
20554 /*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */
20555 
20556 /*  Then for iteration j+1 we have */
20557 /*     M(j+1) <= G(j) / | A(j+1,j+1) | */
20558 /*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */
20559 /*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */
20560 
20561 /*  where CNORM(j+1) is greater than or equal to the infinity-norm of */
20562 /*  column j+1 of A, not counting the diagonal.  Hence */
20563 
20564 /*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */
20565 /*                  1<=i<=j */
20566 /*  and */
20567 
20568 /*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */
20569 /*                                   1<=i< j */
20570 
20571 /*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the */
20572 /*  reciprocal of the largest M(j), j=1,..,n, is larger than */
20573 /*  max(underflow, 1/overflow). */
20574 
20575 /*  The bound on x(j) is also used to determine when a step in the */
20576 /*  columnwise method can be performed without fear of overflow.  If */
20577 /*  the computed bound is greater than a large constant, x is scaled to */
20578 /*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to */
20579 /*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */
20580 
20581 /*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic */
20582 /*  algorithm for A upper triangular is */
20583 
20584 /*       for j = 1, ..., n */
20585 /*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */
20586 /*       end */
20587 
20588 /*  We simultaneously compute two bounds */
20589 /*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */
20590 /*       M(j) = bound on x(i), 1<=i<=j */
20591 
20592 /*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */
20593 /*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */
20594 /*  Then the bound on x(j) is */
20595 
20596 /*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */
20597 
20598 /*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */
20599 /*                      1<=i<=j */
20600 
20601 /*  and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater */
20602 /*  than max(underflow, 1/overflow). */
20603 
20604 /*  ===================================================================== */
20605 
20606 /*     .. Parameters .. */
20607 /*     .. */
20608 /*     .. Local Scalars .. */
20609 /*     .. */
20610 /*     .. External Functions .. */
20611 /*     .. */
20612 /*     .. External Subroutines .. */
20613 /*     .. */
20614 /*     .. Intrinsic Functions .. */
20615 /*     .. */
20616 /*     .. Executable Statements .. */
20617 
20618     /* Parameter adjustments */
20619     a_dim1 = *lda;
20620     a_offset = 1 + a_dim1;
20621     a -= a_offset;
20622     --x;
20623     --cnorm;
20624 
20625     /* Function Body */
20626     *info = 0;
20627     upper = lsame_(uplo, "U");
20628     notran = lsame_(trans, "N");
20629     nounit = lsame_(diag, "N");
20630 
20631 /*     Test the input parameters. */
20632 
20633     if (! upper && ! lsame_(uplo, "L")) {
20634 	*info = -1;
20635     } else if (! notran && ! lsame_(trans, "T") && !
20636 	    lsame_(trans, "C")) {
20637 	*info = -2;
20638     } else if (! nounit && ! lsame_(diag, "U")) {
20639 	*info = -3;
20640     } else if (! lsame_(normin, "Y") && ! lsame_(normin,
20641 	     "N")) {
20642 	*info = -4;
20643     } else if (*n < 0) {
20644 	*info = -5;
20645     } else if (*lda < std::max(1_integer,*n)) {
20646 	*info = -7;
20647     }
20648     if (*info != 0) {
20649 	i__1 = -(*info);
20650 	xerbla_("DLATRS", &i__1);
20651 	return 0;
20652     }
20653 
20654 /*     Quick return if possible */
20655 
20656     if (*n == 0) {
20657 	return 0;
20658     }
20659 
20660 /*     Determine machine dependent parameters to control overflow. */
20661 
20662     smlnum = dlamch_("Safe minimum") / dlamch_("Precision");
20663     bignum = 1. / smlnum;
20664     *scale = 1.;
20665 
20666     if (lsame_(normin, "N")) {
20667 
20668 /*        Compute the 1-norm of each column, not including the diagonal. */
20669 
20670 	if (upper) {
20671 
20672 /*           A is upper triangular. */
20673 
20674 	    i__1 = *n;
20675 	    for (j = 1; j <= i__1; ++j) {
20676 		i__2 = j - 1;
20677 		cnorm[j] = dasum_(&i__2, &a[j * a_dim1 + 1], &c__1);
20678 /* L10: */
20679 	    }
20680 	} else {
20681 
20682 /*           A is lower triangular. */
20683 
20684 	    i__1 = *n - 1;
20685 	    for (j = 1; j <= i__1; ++j) {
20686 		i__2 = *n - j;
20687 		cnorm[j] = dasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1);
20688 /* L20: */
20689 	    }
20690 	    cnorm[*n] = 0.;
20691 	}
20692     }
20693 
20694 /*     Scale the column norms by TSCAL if the maximum element in CNORM is */
20695 /*     greater than BIGNUM. */
20696 
20697     imax = idamax_(n, &cnorm[1], &c__1);
20698     tmax = cnorm[imax];
20699     if (tmax <= bignum) {
20700 	tscal = 1.;
20701     } else {
20702 	tscal = 1. / (smlnum * tmax);
20703 	dscal_(n, &tscal, &cnorm[1], &c__1);
20704     }
20705 
20706 /*     Compute a bound on the computed solution vector to see if the */
20707 /*     Level 2 BLAS routine DTRSV can be used. */
20708 
20709     j = idamax_(n, &x[1], &c__1);
20710     xmax = (d__1 = x[j], abs(d__1));
20711     xbnd = xmax;
20712     if (notran) {
20713 
20714 /*        Compute the growth in A * x = b. */
20715 
20716 	if (upper) {
20717 	    jfirst = *n;
20718 	    jlast = 1;
20719 	    jinc = -1;
20720 	} else {
20721 	    jfirst = 1;
20722 	    jlast = *n;
20723 	    jinc = 1;
20724 	}
20725 
20726 	if (tscal != 1.) {
20727 	    grow = 0.;
20728 	    goto L50;
20729 	}
20730 
20731 	if (nounit) {
20732 
20733 /*           A is non-unit triangular. */
20734 
20735 /*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
20736 /*           Initially, G(0) = max{x(i), i=1,...,n}. */
20737 
20738 	    grow = 1. / std::max(xbnd,smlnum);
20739 	    xbnd = grow;
20740 	    i__1 = jlast;
20741 	    i__2 = jinc;
20742 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
20743 
20744 /*              Exit the loop if the growth factor is too small. */
20745 
20746 		if (grow <= smlnum) {
20747 		    goto L50;
20748 		}
20749 
20750 /*              M(j) = G(j-1) / abs(A(j,j)) */
20751 
20752 		tjj = (d__1 = a[j + j * a_dim1], abs(d__1));
20753 /* Computing MIN */
20754 		d__1 = xbnd, d__2 = std::min(1.,tjj) * grow;
20755 		xbnd = std::min(d__1,d__2);
20756 		if (tjj + cnorm[j] >= smlnum) {
20757 
20758 /*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
20759 
20760 		    grow *= tjj / (tjj + cnorm[j]);
20761 		} else {
20762 
20763 /*                 G(j) could overflow, set GROW to 0. */
20764 
20765 		    grow = 0.;
20766 		}
20767 /* L30: */
20768 	    }
20769 	    grow = xbnd;
20770 	} else {
20771 
20772 /*           A is unit triangular. */
20773 
20774 /*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */
20775 
20776 /* Computing MIN */
20777 	    d__1 = 1., d__2 = 1. / std::max(xbnd,smlnum);
20778 	    grow = std::min(d__1,d__2);
20779 	    i__2 = jlast;
20780 	    i__1 = jinc;
20781 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
20782 
20783 /*              Exit the loop if the growth factor is too small. */
20784 
20785 		if (grow <= smlnum) {
20786 		    goto L50;
20787 		}
20788 
20789 /*              G(j) = G(j-1)*( 1 + CNORM(j) ) */
20790 
20791 		grow *= 1. / (cnorm[j] + 1.);
20792 /* L40: */
20793 	    }
20794 	}
20795 L50:
20796 
20797 	;
20798     } else {
20799 
20800 /*        Compute the growth in A' * x = b. */
20801 
20802 	if (upper) {
20803 	    jfirst = 1;
20804 	    jlast = *n;
20805 	    jinc = 1;
20806 	} else {
20807 	    jfirst = *n;
20808 	    jlast = 1;
20809 	    jinc = -1;
20810 	}
20811 
20812 	if (tscal != 1.) {
20813 	    grow = 0.;
20814 	    goto L80;
20815 	}
20816 
20817 	if (nounit) {
20818 
20819 /*           A is non-unit triangular. */
20820 
20821 /*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
20822 /*           Initially, M(0) = max{x(i), i=1,...,n}. */
20823 
20824 	    grow = 1. / std::max(xbnd,smlnum);
20825 	    xbnd = grow;
20826 	    i__1 = jlast;
20827 	    i__2 = jinc;
20828 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
20829 
20830 /*              Exit the loop if the growth factor is too small. */
20831 
20832 		if (grow <= smlnum) {
20833 		    goto L80;
20834 		}
20835 
20836 /*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
20837 
20838 		xj = cnorm[j] + 1.;
20839 /* Computing MIN */
20840 		d__1 = grow, d__2 = xbnd / xj;
20841 		grow = std::min(d__1,d__2);
20842 
20843 /*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
20844 
20845 		tjj = (d__1 = a[j + j * a_dim1], abs(d__1));
20846 		if (xj > tjj) {
20847 		    xbnd *= tjj / xj;
20848 		}
20849 /* L60: */
20850 	    }
20851 	    grow = std::min(grow,xbnd);
20852 	} else {
20853 
20854 /*           A is unit triangular. */
20855 
20856 /*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */
20857 
20858 /* Computing MIN */
20859 	    d__1 = 1., d__2 = 1. / std::max(xbnd,smlnum);
20860 	    grow = std::min(d__1,d__2);
20861 	    i__2 = jlast;
20862 	    i__1 = jinc;
20863 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
20864 
20865 /*              Exit the loop if the growth factor is too small. */
20866 
20867 		if (grow <= smlnum) {
20868 		    goto L80;
20869 		}
20870 
20871 /*              G(j) = ( 1 + CNORM(j) )*G(j-1) */
20872 
20873 		xj = cnorm[j] + 1.;
20874 		grow /= xj;
20875 /* L70: */
20876 	    }
20877 	}
20878 L80:
20879 	;
20880     }
20881 
20882     if (grow * tscal > smlnum) {
20883 
20884 /*        Use the Level 2 BLAS solve if the reciprocal of the bound on */
20885 /*        elements of X is not too small. */
20886 
20887 	dtrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1);
20888     } else {
20889 
20890 /*        Use a Level 1 BLAS solve, scaling intermediate results. */
20891 
20892 	if (xmax > bignum) {
20893 
20894 /*           Scale X so that its components are less than or equal to */
20895 /*           BIGNUM in absolute value. */
20896 
20897 	    *scale = bignum / xmax;
20898 	    dscal_(n, scale, &x[1], &c__1);
20899 	    xmax = bignum;
20900 	}
20901 
20902 	if (notran) {
20903 
20904 /*           Solve A * x = b */
20905 
20906 	    i__1 = jlast;
20907 	    i__2 = jinc;
20908 	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
20909 
20910 /*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
20911 
20912 		xj = (d__1 = x[j], abs(d__1));
20913 		if (nounit) {
20914 		    tjjs = a[j + j * a_dim1] * tscal;
20915 		} else {
20916 		    tjjs = tscal;
20917 		    if (tscal == 1.) {
20918 			goto L100;
20919 		    }
20920 		}
20921 		tjj = abs(tjjs);
20922 		if (tjj > smlnum) {
20923 
20924 /*                    abs(A(j,j)) > SMLNUM: */
20925 
20926 		    if (tjj < 1.) {
20927 			if (xj > tjj * bignum) {
20928 
20929 /*                          Scale x by 1/b(j). */
20930 
20931 			    rec = 1. / xj;
20932 			    dscal_(n, &rec, &x[1], &c__1);
20933 			    *scale *= rec;
20934 			    xmax *= rec;
20935 			}
20936 		    }
20937 		    x[j] /= tjjs;
20938 		    xj = (d__1 = x[j], abs(d__1));
20939 		} else if (tjj > 0.) {
20940 
20941 /*                    0 < abs(A(j,j)) <= SMLNUM: */
20942 
20943 		    if (xj > tjj * bignum) {
20944 
20945 /*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */
20946 /*                       to avoid overflow when dividing by A(j,j). */
20947 
20948 			rec = tjj * bignum / xj;
20949 			if (cnorm[j] > 1.) {
20950 
20951 /*                          Scale by 1/CNORM(j) to avoid overflow when */
20952 /*                          multiplying x(j) times column j. */
20953 
20954 			    rec /= cnorm[j];
20955 			}
20956 			dscal_(n, &rec, &x[1], &c__1);
20957 			*scale *= rec;
20958 			xmax *= rec;
20959 		    }
20960 		    x[j] /= tjjs;
20961 		    xj = (d__1 = x[j], abs(d__1));
20962 		} else {
20963 
20964 /*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
20965 /*                    scale = 0, and compute a solution to A*x = 0. */
20966 
20967 		    i__3 = *n;
20968 		    for (i__ = 1; i__ <= i__3; ++i__) {
20969 			x[i__] = 0.;
20970 /* L90: */
20971 		    }
20972 		    x[j] = 1.;
20973 		    xj = 1.;
20974 		    *scale = 0.;
20975 		    xmax = 0.;
20976 		}
20977 L100:
20978 
20979 /*              Scale x if necessary to avoid overflow when adding a */
20980 /*              multiple of column j of A. */
20981 
20982 		if (xj > 1.) {
20983 		    rec = 1. / xj;
20984 		    if (cnorm[j] > (bignum - xmax) * rec) {
20985 
20986 /*                    Scale x by 1/(2*abs(x(j))). */
20987 
20988 			rec *= .5;
20989 			dscal_(n, &rec, &x[1], &c__1);
20990 			*scale *= rec;
20991 		    }
20992 		} else if (xj * cnorm[j] > bignum - xmax) {
20993 
20994 /*                 Scale x by 1/2. */
20995 
20996 		    dscal_(n, &c_b36, &x[1], &c__1);
20997 		    *scale *= .5;
20998 		}
20999 
21000 		if (upper) {
21001 		    if (j > 1) {
21002 
21003 /*                    Compute the update */
21004 /*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */
21005 
21006 			i__3 = j - 1;
21007 			d__1 = -x[j] * tscal;
21008 			daxpy_(&i__3, &d__1, &a[j * a_dim1 + 1], &c__1, &x[1],
21009 				 &c__1);
21010 			i__3 = j - 1;
21011 			i__ = idamax_(&i__3, &x[1], &c__1);
21012 			xmax = (d__1 = x[i__], abs(d__1));
21013 		    }
21014 		} else {
21015 		    if (j < *n) {
21016 
21017 /*                    Compute the update */
21018 /*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */
21019 
21020 			i__3 = *n - j;
21021 			d__1 = -x[j] * tscal;
21022 			daxpy_(&i__3, &d__1, &a[j + 1 + j * a_dim1], &c__1, &
21023 				x[j + 1], &c__1);
21024 			i__3 = *n - j;
21025 			i__ = j + idamax_(&i__3, &x[j + 1], &c__1);
21026 			xmax = (d__1 = x[i__], abs(d__1));
21027 		    }
21028 		}
21029 /* L110: */
21030 	    }
21031 
21032 	} else {
21033 
21034 /*           Solve A' * x = b */
21035 
21036 	    i__2 = jlast;
21037 	    i__1 = jinc;
21038 	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
21039 
21040 /*              Compute x(j) = b(j) - sum A(k,j)*x(k). */
21041 /*                                    k<>j */
21042 
21043 		xj = (d__1 = x[j], abs(d__1));
21044 		uscal = tscal;
21045 		rec = 1. / std::max(xmax,1.);
21046 		if (cnorm[j] > (bignum - xj) * rec) {
21047 
21048 /*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
21049 
21050 		    rec *= .5;
21051 		    if (nounit) {
21052 			tjjs = a[j + j * a_dim1] * tscal;
21053 		    } else {
21054 			tjjs = tscal;
21055 		    }
21056 		    tjj = abs(tjjs);
21057 		    if (tjj > 1.) {
21058 
21059 /*                       Divide by A(j,j) when scaling x if A(j,j) > 1. */
21060 
21061 /* Computing MIN */
21062 			d__1 = 1., d__2 = rec * tjj;
21063 			rec = std::min(d__1,d__2);
21064 			uscal /= tjjs;
21065 		    }
21066 		    if (rec < 1.) {
21067 			dscal_(n, &rec, &x[1], &c__1);
21068 			*scale *= rec;
21069 			xmax *= rec;
21070 		    }
21071 		}
21072 
21073 		sumj = 0.;
21074 		if (uscal == 1.) {
21075 
21076 /*                 If the scaling needed for A in the dot product is 1, */
21077 /*                 call DDOT to perform the dot product. */
21078 
21079 		    if (upper) {
21080 			i__3 = j - 1;
21081 			sumj = ddot_(&i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
21082 				&c__1);
21083 		    } else if (j < *n) {
21084 			i__3 = *n - j;
21085 			sumj = ddot_(&i__3, &a[j + 1 + j * a_dim1], &c__1, &x[
21086 				j + 1], &c__1);
21087 		    }
21088 		} else {
21089 
21090 /*                 Otherwise, use in-line code for the dot product. */
21091 
21092 		    if (upper) {
21093 			i__3 = j - 1;
21094 			for (i__ = 1; i__ <= i__3; ++i__) {
21095 			    sumj += a[i__ + j * a_dim1] * uscal * x[i__];
21096 /* L120: */
21097 			}
21098 		    } else if (j < *n) {
21099 			i__3 = *n;
21100 			for (i__ = j + 1; i__ <= i__3; ++i__) {
21101 			    sumj += a[i__ + j * a_dim1] * uscal * x[i__];
21102 /* L130: */
21103 			}
21104 		    }
21105 		}
21106 
21107 		if (uscal == tscal) {
21108 
21109 /*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */
21110 /*                 was not used to scale the dotproduct. */
21111 
21112 		    x[j] -= sumj;
21113 		    xj = (d__1 = x[j], abs(d__1));
21114 		    if (nounit) {
21115 			tjjs = a[j + j * a_dim1] * tscal;
21116 		    } else {
21117 			tjjs = tscal;
21118 			if (tscal == 1.) {
21119 			    goto L150;
21120 			}
21121 		    }
21122 
21123 /*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
21124 
21125 		    tjj = abs(tjjs);
21126 		    if (tjj > smlnum) {
21127 
21128 /*                       abs(A(j,j)) > SMLNUM: */
21129 
21130 			if (tjj < 1.) {
21131 			    if (xj > tjj * bignum) {
21132 
21133 /*                             Scale X by 1/abs(x(j)). */
21134 
21135 				rec = 1. / xj;
21136 				dscal_(n, &rec, &x[1], &c__1);
21137 				*scale *= rec;
21138 				xmax *= rec;
21139 			    }
21140 			}
21141 			x[j] /= tjjs;
21142 		    } else if (tjj > 0.) {
21143 
21144 /*                       0 < abs(A(j,j)) <= SMLNUM: */
21145 
21146 			if (xj > tjj * bignum) {
21147 
21148 /*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
21149 
21150 			    rec = tjj * bignum / xj;
21151 			    dscal_(n, &rec, &x[1], &c__1);
21152 			    *scale *= rec;
21153 			    xmax *= rec;
21154 			}
21155 			x[j] /= tjjs;
21156 		    } else {
21157 
21158 /*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
21159 /*                       scale = 0, and compute a solution to A'*x = 0. */
21160 
21161 			i__3 = *n;
21162 			for (i__ = 1; i__ <= i__3; ++i__) {
21163 			    x[i__] = 0.;
21164 /* L140: */
21165 			}
21166 			x[j] = 1.;
21167 			*scale = 0.;
21168 			xmax = 0.;
21169 		    }
21170 L150:
21171 		    ;
21172 		} else {
21173 
21174 /*                 Compute x(j) := x(j) / A(j,j)  - sumj if the dot */
21175 /*                 product has already been divided by 1/A(j,j). */
21176 
21177 		    x[j] = x[j] / tjjs - sumj;
21178 		}
21179 /* Computing MAX */
21180 		d__2 = xmax, d__3 = (d__1 = x[j], abs(d__1));
21181 		xmax = std::max(d__2,d__3);
21182 /* L160: */
21183 	    }
21184 	}
21185 	*scale /= tscal;
21186     }
21187 
21188 /*     Scale the column norms by 1/TSCAL for return. */
21189 
21190     if (tscal != 1.) {
21191 	d__1 = 1. / tscal;
21192 	dscal_(n, &d__1, &cnorm[1], &c__1);
21193     }
21194 
21195     return 0;
21196 
21197 /*     End of DLATRS */
21198 
21199 } /* dlatrs_ */
21200 
dlatrz_(integer * m,integer * n,integer * l,double * a,integer * lda,double * tau,double * work)21201 /* Subroutine */ int dlatrz_(integer *m, integer *n, integer *l, double *a, integer *lda, double *tau, double *work)
21202 {
21203     /* System generated locals */
21204     integer a_dim1, a_offset, i__1, i__2;
21205 
21206     /* Local variables */
21207     integer i__;
21208 
21209 /*  -- LAPACK routine (version 3.2) -- */
21210 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
21211 /*     November 2006 */
21212 
21213 /*     .. Scalar Arguments .. */
21214 /*     .. */
21215 /*     .. Array Arguments .. */
21216 /*     .. */
21217 
21218 /*  Purpose */
21219 /*  ======= */
21220 
21221 /*  DLATRZ factors the M-by-(M+L) real upper trapezoidal matrix */
21222 /*  [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R  0 ) * Z, by means */
21223 /*  of orthogonal transformations.  Z is an (M+L)-by-(M+L) orthogonal */
21224 /*  matrix and, R and A1 are M-by-M upper triangular matrices. */
21225 
21226 /*  Arguments */
21227 /*  ========= */
21228 
21229 /*  M       (input) INTEGER */
21230 /*          The number of rows of the matrix A.  M >= 0. */
21231 
21232 /*  N       (input) INTEGER */
21233 /*          The number of columns of the matrix A.  N >= 0. */
21234 
21235 /*  L       (input) INTEGER */
21236 /*          The number of columns of the matrix A containing the */
21237 /*          meaningful part of the Householder vectors. N-M >= L >= 0. */
21238 
21239 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
21240 /*          On entry, the leading M-by-N upper trapezoidal part of the */
21241 /*          array A must contain the matrix to be factorized. */
21242 /*          On exit, the leading M-by-M upper triangular part of A */
21243 /*          contains the upper triangular matrix R, and elements N-L+1 to */
21244 /*          N of the first M rows of A, with the array TAU, represent the */
21245 /*          orthogonal matrix Z as a product of M elementary reflectors. */
21246 
21247 /*  LDA     (input) INTEGER */
21248 /*          The leading dimension of the array A.  LDA >= max(1,M). */
21249 
21250 /*  TAU     (output) DOUBLE PRECISION array, dimension (M) */
21251 /*          The scalar factors of the elementary reflectors. */
21252 
21253 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (M) */
21254 
21255 /*  Further Details */
21256 /*  =============== */
21257 
21258 /*  Based on contributions by */
21259 /*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */
21260 
21261 /*  The factorization is obtained by Householder's method.  The kth */
21262 /*  transformation matrix, Z( k ), which is used to introduce zeros into */
21263 /*  the ( m - k + 1 )th row of A, is given in the form */
21264 
21265 /*     Z( k ) = ( I     0   ), */
21266 /*              ( 0  T( k ) ) */
21267 
21268 /*  where */
21269 
21270 /*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ), */
21271 /*                                                 (   0    ) */
21272 /*                                                 ( z( k ) ) */
21273 
21274 /*  tau is a scalar and z( k ) is an l element vector. tau and z( k ) */
21275 /*  are chosen to annihilate the elements of the kth row of A2. */
21276 
21277 /*  The scalar tau is returned in the kth element of TAU and the vector */
21278 /*  u( k ) in the kth row of A2, such that the elements of z( k ) are */
21279 /*  in  a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in */
21280 /*  the upper triangular part of A1. */
21281 
21282 /*  Z is given by */
21283 
21284 /*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ). */
21285 
21286 /*  ===================================================================== */
21287 
21288 /*     .. Parameters .. */
21289 /*     .. */
21290 /*     .. Local Scalars .. */
21291 /*     .. */
21292 /*     .. External Subroutines .. */
21293 /*     .. */
21294 /*     .. Executable Statements .. */
21295 
21296 /*     Test the input arguments */
21297 
21298 /*     Quick return if possible */
21299 
21300     /* Parameter adjustments */
21301     a_dim1 = *lda;
21302     a_offset = 1 + a_dim1;
21303     a -= a_offset;
21304     --tau;
21305     --work;
21306 
21307     /* Function Body */
21308     if (*m == 0) {
21309 	return 0;
21310     } else if (*m == *n) {
21311 	i__1 = *n;
21312 	for (i__ = 1; i__ <= i__1; ++i__) {
21313 	    tau[i__] = 0.;
21314 /* L10: */
21315 	}
21316 	return 0;
21317     }
21318 
21319     for (i__ = *m; i__ >= 1; --i__) {
21320 
21321 /*        Generate elementary reflector H(i) to annihilate */
21322 /*        [ A(i,i) A(i,n-l+1:n) ] */
21323 
21324 	i__1 = *l + 1;
21325 	dlarfp_(&i__1, &a[i__ + i__ * a_dim1], &a[i__ + (*n - *l + 1) *
21326 		a_dim1], lda, &tau[i__]);
21327 
21328 /*        Apply H(i) to A(1:i-1,i:n) from the right */
21329 
21330 	i__1 = i__ - 1;
21331 	i__2 = *n - i__ + 1;
21332 	dlarz_("Right", &i__1, &i__2, l, &a[i__ + (*n - *l + 1) * a_dim1],
21333 		lda, &tau[i__], &a[i__ * a_dim1 + 1], lda, &work[1]);
21334 
21335 /* L20: */
21336     }
21337 
21338     return 0;
21339 
21340 /*     End of DLATRZ */
21341 
21342 } /* dlatrz_ */
21343 
dlatzm_(const char * side,integer * m,integer * n,double * v,integer * incv,double * tau,double * c1,double * c2,integer * ldc,double * work)21344 /* Subroutine */ int dlatzm_(const char *side, integer *m, integer *n, double *
21345 	v, integer *incv, double *tau, double *c1, double *c2,
21346 	integer *ldc, double *work)
21347 {
21348 	/* Table of constant values */
21349 	static integer c__1 = 1;
21350 	static double c_b5 = 1.;
21351 
21352     /* System generated locals */
21353     integer c1_dim1, c1_offset, c2_dim1, c2_offset, i__1;
21354     double d__1;
21355 
21356 /*  -- LAPACK routine (version 3.1) -- */
21357 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
21358 /*     November 2006 */
21359 
21360 /*     .. Scalar Arguments .. */
21361 /*     .. */
21362 /*     .. Array Arguments .. */
21363 /*     .. */
21364 
21365 /*  Purpose */
21366 /*  ======= */
21367 
21368 /*  This routine is deprecated and has been replaced by routine DORMRZ. */
21369 
21370 /*  DLATZM applies a Householder matrix generated by DTZRQF to a matrix. */
21371 
21372 /*  Let P = I - tau*u*u',   u = ( 1 ), */
21373 /*                              ( v ) */
21374 /*  where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if */
21375 /*  SIDE = 'R'. */
21376 
21377 /*  If SIDE equals 'L', let */
21378 /*         C = [ C1 ] 1 */
21379 /*             [ C2 ] m-1 */
21380 /*               n */
21381 /*  Then C is overwritten by P*C. */
21382 
21383 /*  If SIDE equals 'R', let */
21384 /*         C = [ C1, C2 ] m */
21385 /*                1  n-1 */
21386 /*  Then C is overwritten by C*P. */
21387 
21388 /*  Arguments */
21389 /*  ========= */
21390 
21391 /*  SIDE    (input) CHARACTER*1 */
21392 /*          = 'L': form P * C */
21393 /*          = 'R': form C * P */
21394 
21395 /*  M       (input) INTEGER */
21396 /*          The number of rows of the matrix C. */
21397 
21398 /*  N       (input) INTEGER */
21399 /*          The number of columns of the matrix C. */
21400 
21401 /*  V       (input) DOUBLE PRECISION array, dimension */
21402 /*                  (1 + (M-1)*abs(INCV)) if SIDE = 'L' */
21403 /*                  (1 + (N-1)*abs(INCV)) if SIDE = 'R' */
21404 /*          The vector v in the representation of P. V is not used */
21405 /*          if TAU = 0. */
21406 
21407 /*  INCV    (input) INTEGER */
21408 /*          The increment between elements of v. INCV <> 0 */
21409 
21410 /*  TAU     (input) DOUBLE PRECISION */
21411 /*          The value tau in the representation of P. */
21412 
21413 /*  C1      (input/output) DOUBLE PRECISION array, dimension */
21414 /*                         (LDC,N) if SIDE = 'L' */
21415 /*                         (M,1)   if SIDE = 'R' */
21416 /*          On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 */
21417 /*          if SIDE = 'R'. */
21418 
21419 /*          On exit, the first row of P*C if SIDE = 'L', or the first */
21420 /*          column of C*P if SIDE = 'R'. */
21421 
21422 /*  C2      (input/output) DOUBLE PRECISION array, dimension */
21423 /*                         (LDC, N)   if SIDE = 'L' */
21424 /*                         (LDC, N-1) if SIDE = 'R' */
21425 /*          On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the */
21426 /*          m x (n - 1) matrix C2 if SIDE = 'R'. */
21427 
21428 /*          On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P */
21429 /*          if SIDE = 'R'. */
21430 
21431 /*  LDC     (input) INTEGER */
21432 /*          The leading dimension of the arrays C1 and C2. LDC >= (1,M). */
21433 
21434 /*  WORK    (workspace) DOUBLE PRECISION array, dimension */
21435 /*                      (N) if SIDE = 'L' */
21436 /*                      (M) if SIDE = 'R' */
21437 
21438 /*  ===================================================================== */
21439 
21440 /*     .. Parameters .. */
21441 /*     .. */
21442 /*     .. External Subroutines .. */
21443 /*     .. */
21444 /*     .. External Functions .. */
21445 /*     .. */
21446 /*     .. Intrinsic Functions .. */
21447 /*     .. */
21448 /*     .. Executable Statements .. */
21449 
21450     /* Parameter adjustments */
21451     --v;
21452     c2_dim1 = *ldc;
21453     c2_offset = 1 + c2_dim1;
21454     c2 -= c2_offset;
21455     c1_dim1 = *ldc;
21456     c1_offset = 1 + c1_dim1;
21457     c1 -= c1_offset;
21458     --work;
21459 
21460     /* Function Body */
21461     if (std::min(*m,*n) == 0 || *tau == 0.) {
21462 	return 0;
21463     }
21464 
21465     if (lsame_(side, "L")) {
21466 
21467 /*        w := C1 + v' * C2 */
21468 
21469 	dcopy_(n, &c1[c1_offset], ldc, &work[1], &c__1);
21470 	i__1 = *m - 1;
21471 	dgemv_("Transpose", &i__1, n, &c_b5, &c2[c2_offset], ldc, &v[1], incv,
21472 		 &c_b5, &work[1], &c__1);
21473 
21474 /*        [ C1 ] := [ C1 ] - tau* [ 1 ] * w' */
21475 /*        [ C2 ]    [ C2 ]        [ v ] */
21476 
21477 	d__1 = -(*tau);
21478 	daxpy_(n, &d__1, &work[1], &c__1, &c1[c1_offset], ldc);
21479 	i__1 = *m - 1;
21480 	d__1 = -(*tau);
21481 	dger_(&i__1, n, &d__1, &v[1], incv, &work[1], &c__1, &c2[c2_offset],
21482 		ldc);
21483 
21484     } else if (lsame_(side, "R")) {
21485 
21486 /*        w := C1 + C2 * v */
21487 
21488 	dcopy_(m, &c1[c1_offset], &c__1, &work[1], &c__1);
21489 	i__1 = *n - 1;
21490 	dgemv_("No transpose", m, &i__1, &c_b5, &c2[c2_offset], ldc, &v[1],
21491 		incv, &c_b5, &work[1], &c__1);
21492 
21493 /*        [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] */
21494 
21495 	d__1 = -(*tau);
21496 	daxpy_(m, &d__1, &work[1], &c__1, &c1[c1_offset], &c__1);
21497 	i__1 = *n - 1;
21498 	d__1 = -(*tau);
21499 	dger_(m, &i__1, &d__1, &work[1], &c__1, &v[1], incv, &c2[c2_offset],
21500 		ldc);
21501     }
21502 
21503     return 0;
21504 
21505 /*     End of DLATZM */
21506 
21507 } /* dlatzm_ */
21508 
dlauu2_(const char * uplo,integer * n,double * a,integer * lda,integer * info)21509 /* Subroutine */ int dlauu2_(const char *uplo, integer *n, double *a, integer *
21510 	lda, integer *info)
21511 {
21512 	/* Table of constant values */
21513 	static double c_b7 = 1.;
21514 	static integer c__1 = 1;
21515 
21516     /* System generated locals */
21517     integer a_dim1, a_offset, i__1, i__2, i__3;
21518 
21519     /* Local variables */
21520     integer i__;
21521     double aii;
21522     bool upper;
21523 
21524 
21525 /*  -- LAPACK auxiliary routine (version 3.1) -- */
21526 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
21527 /*     November 2006 */
21528 
21529 /*     .. Scalar Arguments .. */
21530 /*     .. */
21531 /*     .. Array Arguments .. */
21532 /*     .. */
21533 
21534 /*  Purpose */
21535 /*  ======= */
21536 
21537 /*  DLAUU2 computes the product U * U' or L' * L, where the triangular */
21538 /*  factor U or L is stored in the upper or lower triangular part of */
21539 /*  the array A. */
21540 
21541 /*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */
21542 /*  overwriting the factor U in A. */
21543 /*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */
21544 /*  overwriting the factor L in A. */
21545 
21546 /*  This is the unblocked form of the algorithm, calling Level 2 BLAS. */
21547 
21548 /*  Arguments */
21549 /*  ========= */
21550 
21551 /*  UPLO    (input) CHARACTER*1 */
21552 /*          Specifies whether the triangular factor stored in the array A */
21553 /*          is upper or lower triangular: */
21554 /*          = 'U':  Upper triangular */
21555 /*          = 'L':  Lower triangular */
21556 
21557 /*  N       (input) INTEGER */
21558 /*          The order of the triangular factor U or L.  N >= 0. */
21559 
21560 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
21561 /*          On entry, the triangular factor U or L. */
21562 /*          On exit, if UPLO = 'U', the upper triangle of A is */
21563 /*          overwritten with the upper triangle of the product U * U'; */
21564 /*          if UPLO = 'L', the lower triangle of A is overwritten with */
21565 /*          the lower triangle of the product L' * L. */
21566 
21567 /*  LDA     (input) INTEGER */
21568 /*          The leading dimension of the array A.  LDA >= max(1,N). */
21569 
21570 /*  INFO    (output) INTEGER */
21571 /*          = 0: successful exit */
21572 /*          < 0: if INFO = -k, the k-th argument had an illegal value */
21573 
21574 /*  ===================================================================== */
21575 
21576 /*     .. Parameters .. */
21577 /*     .. */
21578 /*     .. Local Scalars .. */
21579 /*     .. */
21580 /*     .. External Functions .. */
21581 /*     .. */
21582 /*     .. External Subroutines .. */
21583 /*     .. */
21584 /*     .. Intrinsic Functions .. */
21585 /*     .. */
21586 /*     .. Executable Statements .. */
21587 
21588 /*     Test the input parameters. */
21589 
21590     /* Parameter adjustments */
21591     a_dim1 = *lda;
21592     a_offset = 1 + a_dim1;
21593     a -= a_offset;
21594 
21595     /* Function Body */
21596     *info = 0;
21597     upper = lsame_(uplo, "U");
21598     if (! upper && ! lsame_(uplo, "L")) {
21599 	*info = -1;
21600     } else if (*n < 0) {
21601 	*info = -2;
21602     } else if (*lda < std::max(1_integer,*n)) {
21603 	*info = -4;
21604     }
21605     if (*info != 0) {
21606 	i__1 = -(*info);
21607 	xerbla_("DLAUU2", &i__1);
21608 	return 0;
21609     }
21610 
21611 /*     Quick return if possible */
21612 
21613     if (*n == 0) {
21614 	return 0;
21615     }
21616 
21617     if (upper) {
21618 
21619 /*        Compute the product U * U'. */
21620 
21621 	i__1 = *n;
21622 	for (i__ = 1; i__ <= i__1; ++i__) {
21623 	    aii = a[i__ + i__ * a_dim1];
21624 	    if (i__ < *n) {
21625 		i__2 = *n - i__ + 1;
21626 		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1],
21627 			lda, &a[i__ + i__ * a_dim1], lda);
21628 		i__2 = i__ - 1;
21629 		i__3 = *n - i__;
21630 		dgemv_("No transpose", &i__2, &i__3, &c_b7, &a[(i__ + 1) *
21631 			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
21632 			aii, &a[i__ * a_dim1 + 1], &c__1);
21633 	    } else {
21634 		dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
21635 	    }
21636 /* L10: */
21637 	}
21638 
21639     } else {
21640 
21641 /*        Compute the product L' * L. */
21642 
21643 	i__1 = *n;
21644 	for (i__ = 1; i__ <= i__1; ++i__) {
21645 	    aii = a[i__ + i__ * a_dim1];
21646 	    if (i__ < *n) {
21647 		i__2 = *n - i__ + 1;
21648 		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], &
21649 			c__1, &a[i__ + i__ * a_dim1], &c__1);
21650 		i__2 = *n - i__;
21651 		i__3 = i__ - 1;
21652 		dgemv_("Transpose", &i__2, &i__3, &c_b7, &a[i__ + 1 + a_dim1],
21653 			 lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[i__
21654 			+ a_dim1], lda);
21655 	    } else {
21656 		dscal_(&i__, &aii, &a[i__ + a_dim1], lda);
21657 	    }
21658 /* L20: */
21659 	}
21660     }
21661 
21662     return 0;
21663 
21664 /*     End of DLAUU2 */
21665 
21666 } /* dlauu2_ */
21667 
dlauum_(const char * uplo,integer * n,double * a,integer * lda,integer * info)21668 /* Subroutine */ int dlauum_(const char *uplo, integer *n, double *a, integer *
21669 	lda, integer *info)
21670 {
21671 	/* Table of constant values */
21672 	static integer c__1 = 1;
21673 	static integer c_n1 = -1;
21674 	static double c_b15 = 1.;
21675 
21676     /* System generated locals */
21677     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
21678 
21679     /* Local variables */
21680     integer i__, ib, nb;
21681     bool upper;
21682 
21683 
21684 /*  -- LAPACK auxiliary routine (version 3.1) -- */
21685 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
21686 /*     November 2006 */
21687 
21688 /*     .. Scalar Arguments .. */
21689 /*     .. */
21690 /*     .. Array Arguments .. */
21691 /*     .. */
21692 
21693 /*  Purpose */
21694 /*  ======= */
21695 
21696 /*  DLAUUM computes the product U * U' or L' * L, where the triangular */
21697 /*  factor U or L is stored in the upper or lower triangular part of */
21698 /*  the array A. */
21699 
21700 /*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */
21701 /*  overwriting the factor U in A. */
21702 /*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */
21703 /*  overwriting the factor L in A. */
21704 
21705 /*  This is the blocked form of the algorithm, calling Level 3 BLAS. */
21706 
21707 /*  Arguments */
21708 /*  ========= */
21709 
21710 /*  UPLO    (input) CHARACTER*1 */
21711 /*          Specifies whether the triangular factor stored in the array A */
21712 /*          is upper or lower triangular: */
21713 /*          = 'U':  Upper triangular */
21714 /*          = 'L':  Lower triangular */
21715 
21716 /*  N       (input) INTEGER */
21717 /*          The order of the triangular factor U or L.  N >= 0. */
21718 
21719 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
21720 /*          On entry, the triangular factor U or L. */
21721 /*          On exit, if UPLO = 'U', the upper triangle of A is */
21722 /*          overwritten with the upper triangle of the product U * U'; */
21723 /*          if UPLO = 'L', the lower triangle of A is overwritten with */
21724 /*          the lower triangle of the product L' * L. */
21725 
21726 /*  LDA     (input) INTEGER */
21727 /*          The leading dimension of the array A.  LDA >= max(1,N). */
21728 
21729 /*  INFO    (output) INTEGER */
21730 /*          = 0: successful exit */
21731 /*          < 0: if INFO = -k, the k-th argument had an illegal value */
21732 
21733 /*  ===================================================================== */
21734 
21735 /*     .. Parameters .. */
21736 /*     .. */
21737 /*     .. Local Scalars .. */
21738 /*     .. */
21739 /*     .. External Functions .. */
21740 /*     .. */
21741 /*     .. External Subroutines .. */
21742 /*     .. */
21743 /*     .. Intrinsic Functions .. */
21744 /*     .. */
21745 /*     .. Executable Statements .. */
21746 
21747 /*     Test the input parameters. */
21748 
21749     /* Parameter adjustments */
21750     a_dim1 = *lda;
21751     a_offset = 1 + a_dim1;
21752     a -= a_offset;
21753 
21754     /* Function Body */
21755     *info = 0;
21756     upper = lsame_(uplo, "U");
21757     if (! upper && ! lsame_(uplo, "L")) {
21758 	*info = -1;
21759     } else if (*n < 0) {
21760 	*info = -2;
21761     } else if (*lda < std::max(1_integer,*n)) {
21762 	*info = -4;
21763     }
21764     if (*info != 0) {
21765 	i__1 = -(*info);
21766 	xerbla_("DLAUUM", &i__1);
21767 	return 0;
21768     }
21769 
21770 /*     Quick return if possible */
21771 
21772     if (*n == 0) {
21773 	return 0;
21774     }
21775 
21776 /*     Determine the block size for this environment. */
21777 
21778     nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1);
21779 
21780     if (nb <= 1 || nb >= *n) {
21781 
21782 /*        Use unblocked code */
21783 
21784 	dlauu2_(uplo, n, &a[a_offset], lda, info);
21785     } else {
21786 
21787 /*        Use blocked code */
21788 
21789 	if (upper) {
21790 
21791 /*           Compute the product U * U'. */
21792 
21793 	    i__1 = *n;
21794 	    i__2 = nb;
21795 	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
21796 /* Computing MIN */
21797 		i__3 = nb, i__4 = *n - i__ + 1;
21798 		ib = std::min(i__3,i__4);
21799 		i__3 = i__ - 1;
21800 		dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib,
21801 			&c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1
21802 			+ 1], lda)
21803 			;
21804 		dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
21805 		if (i__ + ib <= *n) {
21806 		    i__3 = i__ - 1;
21807 		    i__4 = *n - i__ - ib + 1;
21808 		    dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
21809 			    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ +
21810 			    (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ *
21811 			    a_dim1 + 1], lda);
21812 		    i__3 = *n - i__ - ib + 1;
21813 		    dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[
21814 			    i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ +
21815 			    i__ * a_dim1], lda);
21816 		}
21817 /* L10: */
21818 	    }
21819 	} else {
21820 
21821 /*           Compute the product L' * L. */
21822 
21823 	    i__2 = *n;
21824 	    i__1 = nb;
21825 	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
21826 /* Computing MIN */
21827 		i__3 = nb, i__4 = *n - i__ + 1;
21828 		ib = std::min(i__3,i__4);
21829 		i__3 = i__ - 1;
21830 		dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
21831 			c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1],
21832 			lda);
21833 		dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
21834 		if (i__ + ib <= *n) {
21835 		    i__3 = i__ - 1;
21836 		    i__4 = *n - i__ - ib + 1;
21837 		    dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
21838 			    c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ +
21839 			    ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda);
21840 		    i__3 = *n - i__ - ib + 1;
21841 		    dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ +
21842 			    ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ *
21843 			    a_dim1], lda);
21844 		}
21845 /* L20: */
21846 	    }
21847 	}
21848     }
21849 
21850     return 0;
21851 
21852 /*     End of DLAUUM */
21853 
21854 } /* dlauum_ */
21855 
dlazq3_(integer * i0,integer * n0,double * z__,integer * pp,double * dmin__,double * sigma,double * desig,double * qmax,integer * nfail,integer * iter,integer * ndiv,bool * ieee,integer * ttype,double * dmin1,double * dmin2,double * dn,double * dn1,double * dn2,double * tau)21856 /* Subroutine */ int dlazq3_(integer *i0, integer *n0, double *z__,
21857 	integer *pp, double *dmin__, double *sigma, double *desig,
21858 	double *qmax, integer *nfail, integer *iter, integer *ndiv,
21859 	bool *ieee, integer *ttype, double *dmin1, double *dmin2,
21860 	double *dn, double *dn1, double *dn2, double *tau)
21861 {
21862     /* System generated locals */
21863     integer i__1;
21864     double d__1, d__2;
21865 
21866     /* Local variables */
21867     double g, s, t;
21868     integer j4, nn;
21869     double eps, tol;
21870     integer n0in, ipn4;
21871     double tol2, temp;
21872     double safmin;
21873 
21874 
21875 /*  -- LAPACK auxiliary routine (version 3.1) -- */
21876 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
21877 /*     November 2006 */
21878 
21879 /*     .. Scalar Arguments .. */
21880 /*     .. */
21881 /*     .. Array Arguments .. */
21882 /*     .. */
21883 
21884 /*  Purpose */
21885 /*  ======= */
21886 
21887 /*  DLAZQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
21888 /*  In case of failure it changes shifts, and tries again until output */
21889 /*  is positive. */
21890 
21891 /*  Arguments */
21892 /*  ========= */
21893 
21894 /*  I0     (input) INTEGER */
21895 /*         First index. */
21896 
21897 /*  N0     (input) INTEGER */
21898 /*         Last index. */
21899 
21900 /*  Z      (input) DOUBLE PRECISION array, dimension ( 4*N ) */
21901 /*         Z holds the qd array. */
21902 
21903 /*  PP     (input) INTEGER */
21904 /*         PP=0 for ping, PP=1 for pong. */
21905 
21906 /*  DMIN   (output) DOUBLE PRECISION */
21907 /*         Minimum value of d. */
21908 
21909 /*  SIGMA  (output) DOUBLE PRECISION */
21910 /*         Sum of shifts used in current segment. */
21911 
21912 /*  DESIG  (input/output) DOUBLE PRECISION */
21913 /*         Lower order part of SIGMA */
21914 
21915 /*  QMAX   (input) DOUBLE PRECISION */
21916 /*         Maximum value of q. */
21917 
21918 /*  NFAIL  (output) INTEGER */
21919 /*         Number of times shift was too big. */
21920 
21921 /*  ITER   (output) INTEGER */
21922 /*         Number of iterations. */
21923 
21924 /*  NDIV   (output) INTEGER */
21925 /*         Number of divisions. */
21926 
21927 /*  IEEE   (input) LOGICAL */
21928 /*         Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */
21929 
21930 /*  TTYPE  (input/output) INTEGER */
21931 /*         Shift type.  TTYPE is passed as an argument in order to save */
21932 /*         its value between calls to DLAZQ3 */
21933 
21934 /*  DMIN1  (input/output) REAL */
21935 /*  DMIN2  (input/output) REAL */
21936 /*  DN     (input/output) REAL */
21937 /*  DN1    (input/output) REAL */
21938 /*  DN2    (input/output) REAL */
21939 /*  TAU    (input/output) REAL */
21940 /*         These are passed as arguments in order to save their values */
21941 /*         between calls to DLAZQ3 */
21942 
21943 /*  This is a thread safe version of DLASQ3, which passes TTYPE, DMIN1, */
21944 /*  DMIN2, DN, DN1. DN2 and TAU through the argument list in place of */
21945 /*  declaring them in a SAVE statment. */
21946 
21947 /*  ===================================================================== */
21948 
21949 /*     .. Parameters .. */
21950 /*     .. */
21951 /*     .. Local Scalars .. */
21952 /*     .. */
21953 /*     .. External Subroutines .. */
21954 /*     .. */
21955 /*     .. External Function .. */
21956 /*     .. */
21957 /*     .. Intrinsic Functions .. */
21958 /*     .. */
21959 /*     .. Executable Statements .. */
21960 
21961     /* Parameter adjustments */
21962     --z__;
21963 
21964     /* Function Body */
21965     n0in = *n0;
21966     eps = dlamch_("Precision");
21967     safmin = dlamch_("Safe minimum");
21968     tol = eps * 100.;
21969 /* Computing 2nd power */
21970     d__1 = tol;
21971     tol2 = d__1 * d__1;
21972     g = 0.;
21973 
21974 /*     Check for deflation. */
21975 
21976 L10:
21977 
21978     if (*n0 < *i0) {
21979 	return 0;
21980     }
21981     if (*n0 == *i0) {
21982 	goto L20;
21983     }
21984     nn = (*n0 << 2) + *pp;
21985     if (*n0 == *i0 + 1) {
21986 	goto L40;
21987     }
21988 
21989 /*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
21990 
21991     if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
21992 	    4] > tol2 * z__[nn - 7]) {
21993 	goto L30;
21994     }
21995 
21996 L20:
21997 
21998     z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
21999     --(*n0);
22000     goto L10;
22001 
22002 /*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
22003 
22004 L30:
22005 
22006     if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
22007 	    nn - 11]) {
22008 	goto L50;
22009     }
22010 
22011 L40:
22012 
22013     if (z__[nn - 3] > z__[nn - 7]) {
22014 	s = z__[nn - 3];
22015 	z__[nn - 3] = z__[nn - 7];
22016 	z__[nn - 7] = s;
22017     }
22018     if (z__[nn - 5] > z__[nn - 3] * tol2) {
22019 	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
22020 	s = z__[nn - 3] * (z__[nn - 5] / t);
22021 	if (s <= t) {
22022 	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
22023 	} else {
22024 	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
22025 	}
22026 	t = z__[nn - 7] + (s + z__[nn - 5]);
22027 	z__[nn - 3] *= z__[nn - 7] / t;
22028 	z__[nn - 7] = t;
22029     }
22030     z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
22031     z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
22032     *n0 += -2;
22033     goto L10;
22034 
22035 L50:
22036 
22037 /*     Reverse the qd-array, if warranted. */
22038 
22039     if (*dmin__ <= 0. || *n0 < n0in) {
22040 	if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
22041 	    ipn4 = *i0 + *n0 << 2;
22042 	    i__1 = *i0 + *n0 - 1 << 1;
22043 	    for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
22044 		temp = z__[j4 - 3];
22045 		z__[j4 - 3] = z__[ipn4 - j4 - 3];
22046 		z__[ipn4 - j4 - 3] = temp;
22047 		temp = z__[j4 - 2];
22048 		z__[j4 - 2] = z__[ipn4 - j4 - 2];
22049 		z__[ipn4 - j4 - 2] = temp;
22050 		temp = z__[j4 - 1];
22051 		z__[j4 - 1] = z__[ipn4 - j4 - 5];
22052 		z__[ipn4 - j4 - 5] = temp;
22053 		temp = z__[j4];
22054 		z__[j4] = z__[ipn4 - j4 - 4];
22055 		z__[ipn4 - j4 - 4] = temp;
22056 /* L60: */
22057 	    }
22058 	    if (*n0 - *i0 <= 4) {
22059 		z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
22060 		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
22061 	    }
22062 /* Computing MIN */
22063 	    d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
22064 	    *dmin2 = std::min(d__1,d__2);
22065 /* Computing MIN */
22066 	    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
22067 		    , d__1 = std::min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
22068 	    z__[(*n0 << 2) + *pp - 1] = std::min(d__1,d__2);
22069 /* Computing MIN */
22070 	    d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
22071 		     std::min(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
22072 	    z__[(*n0 << 2) - *pp] = std::min(d__1,d__2);
22073 /* Computing MAX */
22074 	    d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = std::max(d__1,
22075 		    d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
22076 	    *qmax = std::max(d__1,d__2);
22077 	    *dmin__ = -0.;
22078 	}
22079     }
22080 
22081 /* Computing MIN */
22082     d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*n0 << 2) + *pp - 9], d__1 =
22083 	     std::min(d__1,d__2), d__2 = *dmin2 + z__[(*n0 << 2) - *pp];
22084     if (*dmin__ < 0. || safmin * *qmax < std::min(d__1,d__2)) {
22085 
22086 /*        Choose a shift. */
22087 
22088 	dlazq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1,
22089 		dn2, tau, ttype, &g);
22090 
22091 /*        Call dqds until DMIN > 0. */
22092 
22093 L80:
22094 
22095 	dlasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
22096 		ieee);
22097 
22098 	*ndiv += *n0 - *i0 + 2;
22099 	++(*iter);
22100 
22101 /*        Check status. */
22102 
22103 	if (*dmin__ >= 0. && *dmin1 > 0.) {
22104 
22105 /*           Success. */
22106 
22107 	    goto L100;
22108 
22109 	} else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] <
22110 		tol * (*sigma + *dn1) && abs(*dn) < tol * *sigma) {
22111 
22112 /*           Convergence hidden by negative DN. */
22113 
22114 	    z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
22115 	    *dmin__ = 0.;
22116 	    goto L100;
22117 	} else if (*dmin__ < 0.) {
22118 
22119 /*           TAU too big. Select new TAU and try again. */
22120 
22121 	    ++(*nfail);
22122 	    if (*ttype < -22) {
22123 
22124 /*              Failed twice. Play it safe. */
22125 
22126 		*tau = 0.;
22127 	    } else if (*dmin1 > 0.) {
22128 
22129 /*              Late failure. Gives excellent shift. */
22130 
22131 		*tau = (*tau + *dmin__) * (1. - eps * 2.);
22132 		*ttype += -11;
22133 	    } else {
22134 
22135 /*              Early failure. Divide by 4. */
22136 
22137 		*tau *= .25;
22138 		*ttype += -12;
22139 	    }
22140 	    goto L80;
22141 	} else if (*dmin__ != *dmin__) {
22142 
22143 /*           NaN. */
22144 
22145 	    *tau = 0.;
22146 	    goto L80;
22147 	} else {
22148 
22149 /*           Possible underflow. Play it safe. */
22150 
22151 	    goto L90;
22152 	}
22153     }
22154 
22155 /*     Risk of underflow. */
22156 
22157 L90:
22158     dlasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
22159     *ndiv += *n0 - *i0 + 2;
22160     ++(*iter);
22161     *tau = 0.;
22162 
22163 L100:
22164     if (*tau < *sigma) {
22165 	*desig += *tau;
22166 	t = *sigma + *desig;
22167 	*desig -= t - *sigma;
22168     } else {
22169 	t = *sigma + *tau;
22170 	*desig = *sigma - (t - *tau) + *desig;
22171     }
22172     *sigma = t;
22173 
22174     return 0;
22175 
22176 /*     End of DLAZQ3 */
22177 
22178 } /* dlazq3_ */
22179 
dlazq4_(integer * i0,integer * n0,double * z__,integer * pp,integer * n0in,double * dmin__,double * dmin1,double * dmin2,double * dn,double * dn1,double * dn2,double * tau,integer * ttype,double * g)22180 /* Subroutine */ int dlazq4_(integer *i0, integer *n0, double *z__,
22181 	integer *pp, integer *n0in, double *dmin__, double *dmin1,
22182 	double *dmin2, double *dn, double *dn1, double *dn2,
22183 	double *tau, integer *ttype, double *g)
22184 {
22185     /* System generated locals */
22186     integer i__1;
22187     double d__1, d__2;
22188 
22189     /* Local variables */
22190     double s, a2, b1, b2;
22191     integer i4, nn, np;
22192     double gam, gap1, gap2;
22193 
22194 
22195 /*  -- LAPACK auxiliary routine (version 3.1) -- */
22196 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
22197 /*     November 2006 */
22198 
22199 /*     .. Scalar Arguments .. */
22200 /*     .. */
22201 /*     .. Array Arguments .. */
22202 /*     .. */
22203 
22204 /*  Purpose */
22205 /*  ======= */
22206 
22207 /*  DLAZQ4 computes an approximation TAU to the smallest eigenvalue */
22208 /*  using values of d from the previous transform. */
22209 
22210 /*  I0    (input) INTEGER */
22211 /*        First index. */
22212 
22213 /*  N0    (input) INTEGER */
22214 /*        Last index. */
22215 
22216 /*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
22217 /*        Z holds the qd array. */
22218 
22219 /*  PP    (input) INTEGER */
22220 /*        PP=0 for ping, PP=1 for pong. */
22221 
22222 /*  N0IN  (input) INTEGER */
22223 /*        The value of N0 at start of EIGTEST. */
22224 
22225 /*  DMIN  (input) DOUBLE PRECISION */
22226 /*        Minimum value of d. */
22227 
22228 /*  DMIN1 (input) DOUBLE PRECISION */
22229 /*        Minimum value of d, excluding D( N0 ). */
22230 
22231 /*  DMIN2 (input) DOUBLE PRECISION */
22232 /*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
22233 
22234 /*  DN    (input) DOUBLE PRECISION */
22235 /*        d(N) */
22236 
22237 /*  DN1   (input) DOUBLE PRECISION */
22238 /*        d(N-1) */
22239 
22240 /*  DN2   (input) DOUBLE PRECISION */
22241 /*        d(N-2) */
22242 
22243 /*  TAU   (output) DOUBLE PRECISION */
22244 /*        This is the shift. */
22245 
22246 /*  TTYPE (output) INTEGER */
22247 /*        Shift type. */
22248 
22249 /*  G     (input/output) DOUBLE PRECISION */
22250 /*        G is passed as an argument in order to save its value between */
22251 /*        calls to DLAZQ4 */
22252 
22253 /*  Further Details */
22254 /*  =============== */
22255 /*  CNST1 = 9/16 */
22256 
22257 /*  This is a thread safe version of DLASQ4, which passes G through the */
22258 /*  argument list in place of declaring G in a SAVE statment. */
22259 
22260 /*  ===================================================================== */
22261 
22262 /*     .. Parameters .. */
22263 /*     .. */
22264 /*     .. Local Scalars .. */
22265 /*     .. */
22266 /*     .. Intrinsic Functions .. */
22267 /*     .. */
22268 /*     .. Executable Statements .. */
22269 
22270 /*     A negative DMIN forces the shift to take that absolute value */
22271 /*     TTYPE records the type of shift. */
22272 
22273     /* Parameter adjustments */
22274     --z__;
22275 
22276     /* Function Body */
22277     if (*dmin__ <= 0.) {
22278 	*tau = -(*dmin__);
22279 	*ttype = -1;
22280 	return 0;
22281     }
22282 
22283     nn = (*n0 << 2) + *pp;
22284     if (*n0in == *n0) {
22285 
22286 /*        No eigenvalues deflated. */
22287 
22288 	if (*dmin__ == *dn || *dmin__ == *dn1) {
22289 
22290 	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
22291 	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
22292 	    a2 = z__[nn - 7] + z__[nn - 5];
22293 
22294 /*           Cases 2 and 3. */
22295 
22296 	    if (*dmin__ == *dn && *dmin1 == *dn1) {
22297 		gap2 = *dmin2 - a2 - *dmin2 * .25;
22298 		if (gap2 > 0. && gap2 > b2) {
22299 		    gap1 = a2 - *dn - b2 / gap2 * b2;
22300 		} else {
22301 		    gap1 = a2 - *dn - (b1 + b2);
22302 		}
22303 		if (gap1 > 0. && gap1 > b1) {
22304 /* Computing MAX */
22305 		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
22306 		    s = std::max(d__1,d__2);
22307 		    *ttype = -2;
22308 		} else {
22309 		    s = 0.;
22310 		    if (*dn > b1) {
22311 			s = *dn - b1;
22312 		    }
22313 		    if (a2 > b1 + b2) {
22314 /* Computing MIN */
22315 			d__1 = s, d__2 = a2 - (b1 + b2);
22316 			s = std::min(d__1,d__2);
22317 		    }
22318 /* Computing MAX */
22319 		    d__1 = s, d__2 = *dmin__ * .333;
22320 		    s = std::max(d__1,d__2);
22321 		    *ttype = -3;
22322 		}
22323 	    } else {
22324 
22325 /*              Case 4. */
22326 
22327 		*ttype = -4;
22328 		s = *dmin__ * .25;
22329 		if (*dmin__ == *dn) {
22330 		    gam = *dn;
22331 		    a2 = 0.;
22332 		    if (z__[nn - 5] > z__[nn - 7]) {
22333 			return 0;
22334 		    }
22335 		    b2 = z__[nn - 5] / z__[nn - 7];
22336 		    np = nn - 9;
22337 		} else {
22338 		    np = nn - (*pp << 1);
22339 		    b2 = z__[np - 2];
22340 		    gam = *dn1;
22341 		    if (z__[np - 4] > z__[np - 2]) {
22342 			return 0;
22343 		    }
22344 		    a2 = z__[np - 4] / z__[np - 2];
22345 		    if (z__[nn - 9] > z__[nn - 11]) {
22346 			return 0;
22347 		    }
22348 		    b2 = z__[nn - 9] / z__[nn - 11];
22349 		    np = nn - 13;
22350 		}
22351 
22352 /*              Approximate contribution to norm squared from I < NN-1. */
22353 
22354 		a2 += b2;
22355 		i__1 = (*i0 << 2) - 1 + *pp;
22356 		for (i4 = np; i4 >= i__1; i4 += -4) {
22357 		    if (b2 == 0.) {
22358 			goto L20;
22359 		    }
22360 		    b1 = b2;
22361 		    if (z__[i4] > z__[i4 - 2]) {
22362 			return 0;
22363 		    }
22364 		    b2 *= z__[i4] / z__[i4 - 2];
22365 		    a2 += b2;
22366 		    if (std::max(b2,b1) * 100. < a2 || .563 < a2) {
22367 			goto L20;
22368 		    }
22369 /* L10: */
22370 		}
22371 L20:
22372 		a2 *= 1.05;
22373 
22374 /*              Rayleigh quotient residual bound. */
22375 
22376 		if (a2 < .563) {
22377 		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
22378 		}
22379 	    }
22380 	} else if (*dmin__ == *dn2) {
22381 
22382 /*           Case 5. */
22383 
22384 	    *ttype = -5;
22385 	    s = *dmin__ * .25;
22386 
22387 /*           Compute contribution to norm squared from I > NN-2. */
22388 
22389 	    np = nn - (*pp << 1);
22390 	    b1 = z__[np - 2];
22391 	    b2 = z__[np - 6];
22392 	    gam = *dn2;
22393 	    if (z__[np - 8] > b2 || z__[np - 4] > b1) {
22394 		return 0;
22395 	    }
22396 	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
22397 
22398 /*           Approximate contribution to norm squared from I < NN-2. */
22399 
22400 	    if (*n0 - *i0 > 2) {
22401 		b2 = z__[nn - 13] / z__[nn - 15];
22402 		a2 += b2;
22403 		i__1 = (*i0 << 2) - 1 + *pp;
22404 		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
22405 		    if (b2 == 0.) {
22406 			goto L40;
22407 		    }
22408 		    b1 = b2;
22409 		    if (z__[i4] > z__[i4 - 2]) {
22410 			return 0;
22411 		    }
22412 		    b2 *= z__[i4] / z__[i4 - 2];
22413 		    a2 += b2;
22414 		    if (std::max(b2,b1) * 100. < a2 || .563 < a2) {
22415 			goto L40;
22416 		    }
22417 /* L30: */
22418 		}
22419 L40:
22420 		a2 *= 1.05;
22421 	    }
22422 
22423 	    if (a2 < .563) {
22424 		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
22425 	    }
22426 	} else {
22427 
22428 /*           Case 6, no information to guide us. */
22429 
22430 	    if (*ttype == -6) {
22431 		*g += (1. - *g) * .333;
22432 	    } else if (*ttype == -18) {
22433 		*g = .083250000000000005;
22434 	    } else {
22435 		*g = .25;
22436 	    }
22437 	    s = *g * *dmin__;
22438 	    *ttype = -6;
22439 	}
22440 
22441     } else if (*n0in == *n0 + 1) {
22442 
22443 /*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
22444 
22445 	if (*dmin1 == *dn1 && *dmin2 == *dn2) {
22446 
22447 /*           Cases 7 and 8. */
22448 
22449 	    *ttype = -7;
22450 	    s = *dmin1 * .333;
22451 	    if (z__[nn - 5] > z__[nn - 7]) {
22452 		return 0;
22453 	    }
22454 	    b1 = z__[nn - 5] / z__[nn - 7];
22455 	    b2 = b1;
22456 	    if (b2 == 0.) {
22457 		goto L60;
22458 	    }
22459 	    i__1 = (*i0 << 2) - 1 + *pp;
22460 	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
22461 		a2 = b1;
22462 		if (z__[i4] > z__[i4 - 2]) {
22463 		    return 0;
22464 		}
22465 		b1 *= z__[i4] / z__[i4 - 2];
22466 		b2 += b1;
22467 		if (std::max(b1,a2) * 100. < b2) {
22468 		    goto L60;
22469 		}
22470 /* L50: */
22471 	    }
22472 L60:
22473 	    b2 = sqrt(b2 * 1.05);
22474 /* Computing 2nd power */
22475 	    d__1 = b2;
22476 	    a2 = *dmin1 / (d__1 * d__1 + 1.);
22477 	    gap2 = *dmin2 * .5 - a2;
22478 	    if (gap2 > 0. && gap2 > b2 * a2) {
22479 /* Computing MAX */
22480 		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
22481 		s = std::max(d__1,d__2);
22482 	    } else {
22483 /* Computing MAX */
22484 		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
22485 		s = std::max(d__1,d__2);
22486 		*ttype = -8;
22487 	    }
22488 	} else {
22489 
22490 /*           Case 9. */
22491 
22492 	    s = *dmin1 * .25;
22493 	    if (*dmin1 == *dn1) {
22494 		s = *dmin1 * .5;
22495 	    }
22496 	    *ttype = -9;
22497 	}
22498 
22499     } else if (*n0in == *n0 + 2) {
22500 
22501 /*        Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
22502 
22503 /*        Cases 10 and 11. */
22504 
22505 	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
22506 	    *ttype = -10;
22507 	    s = *dmin2 * .333;
22508 	    if (z__[nn - 5] > z__[nn - 7]) {
22509 		return 0;
22510 	    }
22511 	    b1 = z__[nn - 5] / z__[nn - 7];
22512 	    b2 = b1;
22513 	    if (b2 == 0.) {
22514 		goto L80;
22515 	    }
22516 	    i__1 = (*i0 << 2) - 1 + *pp;
22517 	    for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
22518 		if (z__[i4] > z__[i4 - 2]) {
22519 		    return 0;
22520 		}
22521 		b1 *= z__[i4] / z__[i4 - 2];
22522 		b2 += b1;
22523 		if (b1 * 100. < b2) {
22524 		    goto L80;
22525 		}
22526 /* L70: */
22527 	    }
22528 L80:
22529 	    b2 = sqrt(b2 * 1.05);
22530 /* Computing 2nd power */
22531 	    d__1 = b2;
22532 	    a2 = *dmin2 / (d__1 * d__1 + 1.);
22533 	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
22534 		    nn - 9]) - a2;
22535 	    if (gap2 > 0. && gap2 > b2 * a2) {
22536 /* Computing MAX */
22537 		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
22538 		s = std::max(d__1,d__2);
22539 	    } else {
22540 /* Computing MAX */
22541 		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
22542 		s = std::max(d__1,d__2);
22543 	    }
22544 	} else {
22545 	    s = *dmin2 * .25;
22546 	    *ttype = -11;
22547 	}
22548     } else if (*n0in > *n0 + 2) {
22549 
22550 /*        Case 12, more than two eigenvalues deflated. No information. */
22551 
22552 	s = 0.;
22553 	*ttype = -12;
22554     }
22555 
22556     *tau = s;
22557     return 0;
22558 
22559 /*     End of DLAZQ4 */
22560 
22561 } /* dlazq4_ */
22562