1*> \brief \b SLAED2 used by SSTEDC. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SLAED2 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaed2.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaed2.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaed2.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
22*                          Q2, INDX, INDXC, INDXP, COLTYP, INFO )
23*
24*       .. Scalar Arguments ..
25*       INTEGER            INFO, K, LDQ, N, N1
26*       REAL               RHO
27*       ..
28*       .. Array Arguments ..
29*       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
30*      $                   INDXQ( * )
31*       REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
32*      $                   W( * ), Z( * )
33*       ..
34*
35*
36*> \par Purpose:
37*  =============
38*>
39*> \verbatim
40*>
41*> SLAED2 merges the two sets of eigenvalues together into a single
42*> sorted set.  Then it tries to deflate the size of the problem.
43*> There are two ways in which deflation can occur:  when two or more
44*> eigenvalues are close together or if there is a tiny entry in the
45*> Z vector.  For each such occurrence the order of the related secular
46*> equation problem is reduced by one.
47*> \endverbatim
48*
49*  Arguments:
50*  ==========
51*
52*> \param[out] K
53*> \verbatim
54*>          K is INTEGER
55*>         The number of non-deflated eigenvalues, and the order of the
56*>         related secular equation. 0 <= K <=N.
57*> \endverbatim
58*>
59*> \param[in] N
60*> \verbatim
61*>          N is INTEGER
62*>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
63*> \endverbatim
64*>
65*> \param[in] N1
66*> \verbatim
67*>          N1 is INTEGER
68*>         The location of the last eigenvalue in the leading sub-matrix.
69*>         min(1,N) <= N1 <= N/2.
70*> \endverbatim
71*>
72*> \param[in,out] D
73*> \verbatim
74*>          D is REAL array, dimension (N)
75*>         On entry, D contains the eigenvalues of the two submatrices to
76*>         be combined.
77*>         On exit, D contains the trailing (N-K) updated eigenvalues
78*>         (those which were deflated) sorted into increasing order.
79*> \endverbatim
80*>
81*> \param[in,out] Q
82*> \verbatim
83*>          Q is REAL array, dimension (LDQ, N)
84*>         On entry, Q contains the eigenvectors of two submatrices in
85*>         the two square blocks with corners at (1,1), (N1,N1)
86*>         and (N1+1, N1+1), (N,N).
87*>         On exit, Q contains the trailing (N-K) updated eigenvectors
88*>         (those which were deflated) in its last N-K columns.
89*> \endverbatim
90*>
91*> \param[in] LDQ
92*> \verbatim
93*>          LDQ is INTEGER
94*>         The leading dimension of the array Q.  LDQ >= max(1,N).
95*> \endverbatim
96*>
97*> \param[in,out] INDXQ
98*> \verbatim
99*>          INDXQ is INTEGER array, dimension (N)
100*>         The permutation which separately sorts the two sub-problems
101*>         in D into ascending order.  Note that elements in the second
102*>         half of this permutation must first have N1 added to their
103*>         values. Destroyed on exit.
104*> \endverbatim
105*>
106*> \param[in,out] RHO
107*> \verbatim
108*>          RHO is REAL
109*>         On entry, the off-diagonal element associated with the rank-1
110*>         cut which originally split the two submatrices which are now
111*>         being recombined.
112*>         On exit, RHO has been modified to the value required by
113*>         SLAED3.
114*> \endverbatim
115*>
116*> \param[in] Z
117*> \verbatim
118*>          Z is REAL array, dimension (N)
119*>         On entry, Z contains the updating vector (the last
120*>         row of the first sub-eigenvector matrix and the first row of
121*>         the second sub-eigenvector matrix).
122*>         On exit, the contents of Z have been destroyed by the updating
123*>         process.
124*> \endverbatim
125*>
126*> \param[out] DLAMDA
127*> \verbatim
128*>          DLAMDA is REAL array, dimension (N)
129*>         A copy of the first K eigenvalues which will be used by
130*>         SLAED3 to form the secular equation.
131*> \endverbatim
132*>
133*> \param[out] W
134*> \verbatim
135*>          W is REAL array, dimension (N)
136*>         The first k values of the final deflation-altered z-vector
137*>         which will be passed to SLAED3.
138*> \endverbatim
139*>
140*> \param[out] Q2
141*> \verbatim
142*>          Q2 is REAL array, dimension (N1**2+(N-N1)**2)
143*>         A copy of the first K eigenvectors which will be used by
144*>         SLAED3 in a matrix multiply (SGEMM) to solve for the new
145*>         eigenvectors.
146*> \endverbatim
147*>
148*> \param[out] INDX
149*> \verbatim
150*>          INDX is INTEGER array, dimension (N)
151*>         The permutation used to sort the contents of DLAMDA into
152*>         ascending order.
153*> \endverbatim
154*>
155*> \param[out] INDXC
156*> \verbatim
157*>          INDXC is INTEGER array, dimension (N)
158*>         The permutation used to arrange the columns of the deflated
159*>         Q matrix into three groups:  the first group contains non-zero
160*>         elements only at and above N1, the second contains
161*>         non-zero elements only below N1, and the third is dense.
162*> \endverbatim
163*>
164*> \param[out] INDXP
165*> \verbatim
166*>          INDXP is INTEGER array, dimension (N)
167*>         The permutation used to place deflated values of D at the end
168*>         of the array.  INDXP(1:K) points to the nondeflated D-values
169*>         and INDXP(K+1:N) points to the deflated eigenvalues.
170*> \endverbatim
171*>
172*> \param[out] COLTYP
173*> \verbatim
174*>          COLTYP is INTEGER array, dimension (N)
175*>         During execution, a label which will indicate which of the
176*>         following types a column in the Q2 matrix is:
177*>         1 : non-zero in the upper half only;
178*>         2 : dense;
179*>         3 : non-zero in the lower half only;
180*>         4 : deflated.
181*>         On exit, COLTYP(i) is the number of columns of type i,
182*>         for i=1 to 4 only.
183*> \endverbatim
184*>
185*> \param[out] INFO
186*> \verbatim
187*>          INFO is INTEGER
188*>          = 0:  successful exit.
189*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
190*> \endverbatim
191*
192*  Authors:
193*  ========
194*
195*> \author Univ. of Tennessee
196*> \author Univ. of California Berkeley
197*> \author Univ. of Colorado Denver
198*> \author NAG Ltd.
199*
200*> \ingroup auxOTHERcomputational
201*
202*> \par Contributors:
203*  ==================
204*>
205*> Jeff Rutter, Computer Science Division, University of California
206*> at Berkeley, USA \n
207*>  Modified by Francoise Tisseur, University of Tennessee
208*>
209*  =====================================================================
210      SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
211     $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
212*
213*  -- LAPACK computational routine --
214*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
215*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
216*
217*     .. Scalar Arguments ..
218      INTEGER            INFO, K, LDQ, N, N1
219      REAL               RHO
220*     ..
221*     .. Array Arguments ..
222      INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
223     $                   INDXQ( * )
224      REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
225     $                   W( * ), Z( * )
226*     ..
227*
228*  =====================================================================
229*
230*     .. Parameters ..
231      REAL               MONE, ZERO, ONE, TWO, EIGHT
232      PARAMETER          ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0,
233     $                   TWO = 2.0E0, EIGHT = 8.0E0 )
234*     ..
235*     .. Local Arrays ..
236      INTEGER            CTOT( 4 ), PSM( 4 )
237*     ..
238*     .. Local Scalars ..
239      INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
240     $                   N2, NJ, PJ
241      REAL               C, EPS, S, T, TAU, TOL
242*     ..
243*     .. External Functions ..
244      INTEGER            ISAMAX
245      REAL               SLAMCH, SLAPY2
246      EXTERNAL           ISAMAX, SLAMCH, SLAPY2
247*     ..
248*     .. External Subroutines ..
249      EXTERNAL           SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA
250*     ..
251*     .. Intrinsic Functions ..
252      INTRINSIC          ABS, MAX, MIN, SQRT
253*     ..
254*     .. Executable Statements ..
255*
256*     Test the input parameters.
257*
258      INFO = 0
259*
260      IF( N.LT.0 ) THEN
261         INFO = -2
262      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
263         INFO = -6
264      ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
265         INFO = -3
266      END IF
267      IF( INFO.NE.0 ) THEN
268         CALL XERBLA( 'SLAED2', -INFO )
269         RETURN
270      END IF
271*
272*     Quick return if possible
273*
274      IF( N.EQ.0 )
275     $   RETURN
276*
277      N2 = N - N1
278      N1P1 = N1 + 1
279*
280      IF( RHO.LT.ZERO ) THEN
281         CALL SSCAL( N2, MONE, Z( N1P1 ), 1 )
282      END IF
283*
284*     Normalize z so that norm(z) = 1.  Since z is the concatenation of
285*     two normalized vectors, norm2(z) = sqrt(2).
286*
287      T = ONE / SQRT( TWO )
288      CALL SSCAL( N, T, Z, 1 )
289*
290*     RHO = ABS( norm(z)**2 * RHO )
291*
292      RHO = ABS( TWO*RHO )
293*
294*     Sort the eigenvalues into increasing order
295*
296      DO 10 I = N1P1, N
297         INDXQ( I ) = INDXQ( I ) + N1
298   10 CONTINUE
299*
300*     re-integrate the deflated parts from the last pass
301*
302      DO 20 I = 1, N
303         DLAMDA( I ) = D( INDXQ( I ) )
304   20 CONTINUE
305      CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
306      DO 30 I = 1, N
307         INDX( I ) = INDXQ( INDXC( I ) )
308   30 CONTINUE
309*
310*     Calculate the allowable deflation tolerance
311*
312      IMAX = ISAMAX( N, Z, 1 )
313      JMAX = ISAMAX( N, D, 1 )
314      EPS = SLAMCH( 'Epsilon' )
315      TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
316*
317*     If the rank-1 modifier is small enough, no more needs to be done
318*     except to reorganize Q so that its columns correspond with the
319*     elements in D.
320*
321      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
322         K = 0
323         IQ2 = 1
324         DO 40 J = 1, N
325            I = INDX( J )
326            CALL SCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
327            DLAMDA( J ) = D( I )
328            IQ2 = IQ2 + N
329   40    CONTINUE
330         CALL SLACPY( 'A', N, N, Q2, N, Q, LDQ )
331         CALL SCOPY( N, DLAMDA, 1, D, 1 )
332         GO TO 190
333      END IF
334*
335*     If there are multiple eigenvalues then the problem deflates.  Here
336*     the number of equal eigenvalues are found.  As each equal
337*     eigenvalue is found, an elementary reflector is computed to rotate
338*     the corresponding eigensubspace so that the corresponding
339*     components of Z are zero in this new basis.
340*
341      DO 50 I = 1, N1
342         COLTYP( I ) = 1
343   50 CONTINUE
344      DO 60 I = N1P1, N
345         COLTYP( I ) = 3
346   60 CONTINUE
347*
348*
349      K = 0
350      K2 = N + 1
351      DO 70 J = 1, N
352         NJ = INDX( J )
353         IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
354*
355*           Deflate due to small z component.
356*
357            K2 = K2 - 1
358            COLTYP( NJ ) = 4
359            INDXP( K2 ) = NJ
360            IF( J.EQ.N )
361     $         GO TO 100
362         ELSE
363            PJ = NJ
364            GO TO 80
365         END IF
366   70 CONTINUE
367   80 CONTINUE
368      J = J + 1
369      NJ = INDX( J )
370      IF( J.GT.N )
371     $   GO TO 100
372      IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
373*
374*        Deflate due to small z component.
375*
376         K2 = K2 - 1
377         COLTYP( NJ ) = 4
378         INDXP( K2 ) = NJ
379      ELSE
380*
381*        Check if eigenvalues are close enough to allow deflation.
382*
383         S = Z( PJ )
384         C = Z( NJ )
385*
386*        Find sqrt(a**2+b**2) without overflow or
387*        destructive underflow.
388*
389         TAU = SLAPY2( C, S )
390         T = D( NJ ) - D( PJ )
391         C = C / TAU
392         S = -S / TAU
393         IF( ABS( T*C*S ).LE.TOL ) THEN
394*
395*           Deflation is possible.
396*
397            Z( NJ ) = TAU
398            Z( PJ ) = ZERO
399            IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
400     $         COLTYP( NJ ) = 2
401            COLTYP( PJ ) = 4
402            CALL SROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
403            T = D( PJ )*C**2 + D( NJ )*S**2
404            D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
405            D( PJ ) = T
406            K2 = K2 - 1
407            I = 1
408   90       CONTINUE
409            IF( K2+I.LE.N ) THEN
410               IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
411                  INDXP( K2+I-1 ) = INDXP( K2+I )
412                  INDXP( K2+I ) = PJ
413                  I = I + 1
414                  GO TO 90
415               ELSE
416                  INDXP( K2+I-1 ) = PJ
417               END IF
418            ELSE
419               INDXP( K2+I-1 ) = PJ
420            END IF
421            PJ = NJ
422         ELSE
423            K = K + 1
424            DLAMDA( K ) = D( PJ )
425            W( K ) = Z( PJ )
426            INDXP( K ) = PJ
427            PJ = NJ
428         END IF
429      END IF
430      GO TO 80
431  100 CONTINUE
432*
433*     Record the last eigenvalue.
434*
435      K = K + 1
436      DLAMDA( K ) = D( PJ )
437      W( K ) = Z( PJ )
438      INDXP( K ) = PJ
439*
440*     Count up the total number of the various types of columns, then
441*     form a permutation which positions the four column types into
442*     four uniform groups (although one or more of these groups may be
443*     empty).
444*
445      DO 110 J = 1, 4
446         CTOT( J ) = 0
447  110 CONTINUE
448      DO 120 J = 1, N
449         CT = COLTYP( J )
450         CTOT( CT ) = CTOT( CT ) + 1
451  120 CONTINUE
452*
453*     PSM(*) = Position in SubMatrix (of types 1 through 4)
454*
455      PSM( 1 ) = 1
456      PSM( 2 ) = 1 + CTOT( 1 )
457      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
458      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
459      K = N - CTOT( 4 )
460*
461*     Fill out the INDXC array so that the permutation which it induces
462*     will place all type-1 columns first, all type-2 columns next,
463*     then all type-3's, and finally all type-4's.
464*
465      DO 130 J = 1, N
466         JS = INDXP( J )
467         CT = COLTYP( JS )
468         INDX( PSM( CT ) ) = JS
469         INDXC( PSM( CT ) ) = J
470         PSM( CT ) = PSM( CT ) + 1
471  130 CONTINUE
472*
473*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
474*     and Q2 respectively.  The eigenvalues/vectors which were not
475*     deflated go into the first K slots of DLAMDA and Q2 respectively,
476*     while those which were deflated go into the last N - K slots.
477*
478      I = 1
479      IQ1 = 1
480      IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
481      DO 140 J = 1, CTOT( 1 )
482         JS = INDX( I )
483         CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
484         Z( I ) = D( JS )
485         I = I + 1
486         IQ1 = IQ1 + N1
487  140 CONTINUE
488*
489      DO 150 J = 1, CTOT( 2 )
490         JS = INDX( I )
491         CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
492         CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
493         Z( I ) = D( JS )
494         I = I + 1
495         IQ1 = IQ1 + N1
496         IQ2 = IQ2 + N2
497  150 CONTINUE
498*
499      DO 160 J = 1, CTOT( 3 )
500         JS = INDX( I )
501         CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
502         Z( I ) = D( JS )
503         I = I + 1
504         IQ2 = IQ2 + N2
505  160 CONTINUE
506*
507      IQ1 = IQ2
508      DO 170 J = 1, CTOT( 4 )
509         JS = INDX( I )
510         CALL SCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
511         IQ2 = IQ2 + N
512         Z( I ) = D( JS )
513         I = I + 1
514  170 CONTINUE
515*
516*     The deflated eigenvalues and their corresponding vectors go back
517*     into the last N - K slots of D and Q respectively.
518*
519      IF( K.LT.N ) THEN
520         CALL SLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
521     $                Q( 1, K+1 ), LDQ )
522         CALL SCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
523      END IF
524*
525*     Copy CTOT into COLTYP for referencing in SLAED3.
526*
527      DO 180 J = 1, 4
528         COLTYP( J ) = CTOT( J )
529  180 CONTINUE
530*
531  190 CONTINUE
532      RETURN
533*
534*     End of SLAED2
535*
536      END
537