1*> \brief \b SLAED2 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal.
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SLAED2 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaed2.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaed2.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaed2.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
22*                          Q2, INDX, INDXC, INDXP, COLTYP, INFO )
23*
24*       .. Scalar Arguments ..
25*       INTEGER            INFO, K, LDQ, N, N1
26*       REAL               RHO
27*       ..
28*       .. Array Arguments ..
29*       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
30*      $                   INDXQ( * )
31*       REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
32*      $                   W( * ), Z( * )
33*       ..
34*
35*
36*> \par Purpose:
37*  =============
38*>
39*> \verbatim
40*>
41*> SLAED2 merges the two sets of eigenvalues together into a single
42*> sorted set.  Then it tries to deflate the size of the problem.
43*> There are two ways in which deflation can occur:  when two or more
44*> eigenvalues are close together or if there is a tiny entry in the
45*> Z vector.  For each such occurrence the order of the related secular
46*> equation problem is reduced by one.
47*> \endverbatim
48*
49*  Arguments:
50*  ==========
51*
52*> \param[out] K
53*> \verbatim
54*>          K is INTEGER
55*>         The number of non-deflated eigenvalues, and the order of the
56*>         related secular equation. 0 <= K <=N.
57*> \endverbatim
58*>
59*> \param[in] N
60*> \verbatim
61*>          N is INTEGER
62*>         The dimension of the symmetric tridiagonal matrix.  N >= 0.
63*> \endverbatim
64*>
65*> \param[in] N1
66*> \verbatim
67*>          N1 is INTEGER
68*>         The location of the last eigenvalue in the leading sub-matrix.
69*>         min(1,N) <= N1 <= N/2.
70*> \endverbatim
71*>
72*> \param[in,out] D
73*> \verbatim
74*>          D is REAL array, dimension (N)
75*>         On entry, D contains the eigenvalues of the two submatrices to
76*>         be combined.
77*>         On exit, D contains the trailing (N-K) updated eigenvalues
78*>         (those which were deflated) sorted into increasing order.
79*> \endverbatim
80*>
81*> \param[in,out] Q
82*> \verbatim
83*>          Q is REAL array, dimension (LDQ, N)
84*>         On entry, Q contains the eigenvectors of two submatrices in
85*>         the two square blocks with corners at (1,1), (N1,N1)
86*>         and (N1+1, N1+1), (N,N).
87*>         On exit, Q contains the trailing (N-K) updated eigenvectors
88*>         (those which were deflated) in its last N-K columns.
89*> \endverbatim
90*>
91*> \param[in] LDQ
92*> \verbatim
93*>          LDQ is INTEGER
94*>         The leading dimension of the array Q.  LDQ >= max(1,N).
95*> \endverbatim
96*>
97*> \param[in,out] INDXQ
98*> \verbatim
99*>          INDXQ is INTEGER array, dimension (N)
100*>         The permutation which separately sorts the two sub-problems
101*>         in D into ascending order.  Note that elements in the second
102*>         half of this permutation must first have N1 added to their
103*>         values. Destroyed on exit.
104*> \endverbatim
105*>
106*> \param[in,out] RHO
107*> \verbatim
108*>          RHO is REAL
109*>         On entry, the off-diagonal element associated with the rank-1
110*>         cut which originally split the two submatrices which are now
111*>         being recombined.
112*>         On exit, RHO has been modified to the value required by
113*>         SLAED3.
114*> \endverbatim
115*>
116*> \param[in] Z
117*> \verbatim
118*>          Z is REAL array, dimension (N)
119*>         On entry, Z contains the updating vector (the last
120*>         row of the first sub-eigenvector matrix and the first row of
121*>         the second sub-eigenvector matrix).
122*>         On exit, the contents of Z have been destroyed by the updating
123*>         process.
124*> \endverbatim
125*>
126*> \param[out] DLAMDA
127*> \verbatim
128*>          DLAMDA is REAL array, dimension (N)
129*>         A copy of the first K eigenvalues which will be used by
130*>         SLAED3 to form the secular equation.
131*> \endverbatim
132*>
133*> \param[out] W
134*> \verbatim
135*>          W is REAL array, dimension (N)
136*>         The first k values of the final deflation-altered z-vector
137*>         which will be passed to SLAED3.
138*> \endverbatim
139*>
140*> \param[out] Q2
141*> \verbatim
142*>          Q2 is REAL array, dimension (N1**2+(N-N1)**2)
143*>         A copy of the first K eigenvectors which will be used by
144*>         SLAED3 in a matrix multiply (SGEMM) to solve for the new
145*>         eigenvectors.
146*> \endverbatim
147*>
148*> \param[out] INDX
149*> \verbatim
150*>          INDX is INTEGER array, dimension (N)
151*>         The permutation used to sort the contents of DLAMDA into
152*>         ascending order.
153*> \endverbatim
154*>
155*> \param[out] INDXC
156*> \verbatim
157*>          INDXC is INTEGER array, dimension (N)
158*>         The permutation used to arrange the columns of the deflated
159*>         Q matrix into three groups:  the first group contains non-zero
160*>         elements only at and above N1, the second contains
161*>         non-zero elements only below N1, and the third is dense.
162*> \endverbatim
163*>
164*> \param[out] INDXP
165*> \verbatim
166*>          INDXP is INTEGER array, dimension (N)
167*>         The permutation used to place deflated values of D at the end
168*>         of the array.  INDXP(1:K) points to the nondeflated D-values
169*>         and INDXP(K+1:N) points to the deflated eigenvalues.
170*> \endverbatim
171*>
172*> \param[out] COLTYP
173*> \verbatim
174*>          COLTYP is INTEGER array, dimension (N)
175*>         During execution, a label which will indicate which of the
176*>         following types a column in the Q2 matrix is:
177*>         1 : non-zero in the upper half only;
178*>         2 : dense;
179*>         3 : non-zero in the lower half only;
180*>         4 : deflated.
181*>         On exit, COLTYP(i) is the number of columns of type i,
182*>         for i=1 to 4 only.
183*> \endverbatim
184*>
185*> \param[out] INFO
186*> \verbatim
187*>          INFO is INTEGER
188*>          = 0:  successful exit.
189*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
190*> \endverbatim
191*
192*  Authors:
193*  ========
194*
195*> \author Univ. of Tennessee
196*> \author Univ. of California Berkeley
197*> \author Univ. of Colorado Denver
198*> \author NAG Ltd.
199*
200*> \date September 2012
201*
202*> \ingroup auxOTHERcomputational
203*
204*> \par Contributors:
205*  ==================
206*>
207*> Jeff Rutter, Computer Science Division, University of California
208*> at Berkeley, USA \n
209*>  Modified by Francoise Tisseur, University of Tennessee
210*>
211*  =====================================================================
212      SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
213     $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
214*
215*  -- LAPACK computational routine (version 3.4.2) --
216*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
217*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
218*     September 2012
219*
220*     .. Scalar Arguments ..
221      INTEGER            INFO, K, LDQ, N, N1
222      REAL               RHO
223*     ..
224*     .. Array Arguments ..
225      INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
226     $                   INDXQ( * )
227      REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
228     $                   W( * ), Z( * )
229*     ..
230*
231*  =====================================================================
232*
233*     .. Parameters ..
234      REAL               MONE, ZERO, ONE, TWO, EIGHT
235      PARAMETER          ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0,
236     $                   TWO = 2.0E0, EIGHT = 8.0E0 )
237*     ..
238*     .. Local Arrays ..
239      INTEGER            CTOT( 4 ), PSM( 4 )
240*     ..
241*     .. Local Scalars ..
242      INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
243     $                   N2, NJ, PJ
244      REAL               C, EPS, S, T, TAU, TOL
245*     ..
246*     .. External Functions ..
247      INTEGER            ISAMAX
248      REAL               SLAMCH, SLAPY2
249      EXTERNAL           ISAMAX, SLAMCH, SLAPY2
250*     ..
251*     .. External Subroutines ..
252      EXTERNAL           SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA
253*     ..
254*     .. Intrinsic Functions ..
255      INTRINSIC          ABS, MAX, MIN, SQRT
256*     ..
257*     .. Executable Statements ..
258*
259*     Test the input parameters.
260*
261      INFO = 0
262*
263      IF( N.LT.0 ) THEN
264         INFO = -2
265      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
266         INFO = -6
267      ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
268         INFO = -3
269      END IF
270      IF( INFO.NE.0 ) THEN
271         CALL XERBLA( 'SLAED2', -INFO )
272         RETURN
273      END IF
274*
275*     Quick return if possible
276*
277      IF( N.EQ.0 )
278     $   RETURN
279*
280      N2 = N - N1
281      N1P1 = N1 + 1
282*
283      IF( RHO.LT.ZERO ) THEN
284         CALL SSCAL( N2, MONE, Z( N1P1 ), 1 )
285      END IF
286*
287*     Normalize z so that norm(z) = 1.  Since z is the concatenation of
288*     two normalized vectors, norm2(z) = sqrt(2).
289*
290      T = ONE / SQRT( TWO )
291      CALL SSCAL( N, T, Z, 1 )
292*
293*     RHO = ABS( norm(z)**2 * RHO )
294*
295      RHO = ABS( TWO*RHO )
296*
297*     Sort the eigenvalues into increasing order
298*
299      DO 10 I = N1P1, N
300         INDXQ( I ) = INDXQ( I ) + N1
301   10 CONTINUE
302*
303*     re-integrate the deflated parts from the last pass
304*
305      DO 20 I = 1, N
306         DLAMDA( I ) = D( INDXQ( I ) )
307   20 CONTINUE
308      CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
309      DO 30 I = 1, N
310         INDX( I ) = INDXQ( INDXC( I ) )
311   30 CONTINUE
312*
313*     Calculate the allowable deflation tolerance
314*
315      IMAX = ISAMAX( N, Z, 1 )
316      JMAX = ISAMAX( N, D, 1 )
317      EPS = SLAMCH( 'Epsilon' )
318      TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
319*
320*     If the rank-1 modifier is small enough, no more needs to be done
321*     except to reorganize Q so that its columns correspond with the
322*     elements in D.
323*
324      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
325         K = 0
326         IQ2 = 1
327         DO 40 J = 1, N
328            I = INDX( J )
329            CALL SCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
330            DLAMDA( J ) = D( I )
331            IQ2 = IQ2 + N
332   40    CONTINUE
333         CALL SLACPY( 'A', N, N, Q2, N, Q, LDQ )
334         CALL SCOPY( N, DLAMDA, 1, D, 1 )
335         GO TO 190
336      END IF
337*
338*     If there are multiple eigenvalues then the problem deflates.  Here
339*     the number of equal eigenvalues are found.  As each equal
340*     eigenvalue is found, an elementary reflector is computed to rotate
341*     the corresponding eigensubspace so that the corresponding
342*     components of Z are zero in this new basis.
343*
344      DO 50 I = 1, N1
345         COLTYP( I ) = 1
346   50 CONTINUE
347      DO 60 I = N1P1, N
348         COLTYP( I ) = 3
349   60 CONTINUE
350*
351*
352      K = 0
353      K2 = N + 1
354      DO 70 J = 1, N
355         NJ = INDX( J )
356         IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
357*
358*           Deflate due to small z component.
359*
360            K2 = K2 - 1
361            COLTYP( NJ ) = 4
362            INDXP( K2 ) = NJ
363            IF( J.EQ.N )
364     $         GO TO 100
365         ELSE
366            PJ = NJ
367            GO TO 80
368         END IF
369   70 CONTINUE
370   80 CONTINUE
371      J = J + 1
372      NJ = INDX( J )
373      IF( J.GT.N )
374     $   GO TO 100
375      IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
376*
377*        Deflate due to small z component.
378*
379         K2 = K2 - 1
380         COLTYP( NJ ) = 4
381         INDXP( K2 ) = NJ
382      ELSE
383*
384*        Check if eigenvalues are close enough to allow deflation.
385*
386         S = Z( PJ )
387         C = Z( NJ )
388*
389*        Find sqrt(a**2+b**2) without overflow or
390*        destructive underflow.
391*
392         TAU = SLAPY2( C, S )
393         T = D( NJ ) - D( PJ )
394         C = C / TAU
395         S = -S / TAU
396         IF( ABS( T*C*S ).LE.TOL ) THEN
397*
398*           Deflation is possible.
399*
400            Z( NJ ) = TAU
401            Z( PJ ) = ZERO
402            IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
403     $         COLTYP( NJ ) = 2
404            COLTYP( PJ ) = 4
405            CALL SROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
406            T = D( PJ )*C**2 + D( NJ )*S**2
407            D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
408            D( PJ ) = T
409            K2 = K2 - 1
410            I = 1
411   90       CONTINUE
412            IF( K2+I.LE.N ) THEN
413               IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
414                  INDXP( K2+I-1 ) = INDXP( K2+I )
415                  INDXP( K2+I ) = PJ
416                  I = I + 1
417                  GO TO 90
418               ELSE
419                  INDXP( K2+I-1 ) = PJ
420               END IF
421            ELSE
422               INDXP( K2+I-1 ) = PJ
423            END IF
424            PJ = NJ
425         ELSE
426            K = K + 1
427            DLAMDA( K ) = D( PJ )
428            W( K ) = Z( PJ )
429            INDXP( K ) = PJ
430            PJ = NJ
431         END IF
432      END IF
433      GO TO 80
434  100 CONTINUE
435*
436*     Record the last eigenvalue.
437*
438      K = K + 1
439      DLAMDA( K ) = D( PJ )
440      W( K ) = Z( PJ )
441      INDXP( K ) = PJ
442*
443*     Count up the total number of the various types of columns, then
444*     form a permutation which positions the four column types into
445*     four uniform groups (although one or more of these groups may be
446*     empty).
447*
448      DO 110 J = 1, 4
449         CTOT( J ) = 0
450  110 CONTINUE
451      DO 120 J = 1, N
452         CT = COLTYP( J )
453         CTOT( CT ) = CTOT( CT ) + 1
454  120 CONTINUE
455*
456*     PSM(*) = Position in SubMatrix (of types 1 through 4)
457*
458      PSM( 1 ) = 1
459      PSM( 2 ) = 1 + CTOT( 1 )
460      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
461      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
462      K = N - CTOT( 4 )
463*
464*     Fill out the INDXC array so that the permutation which it induces
465*     will place all type-1 columns first, all type-2 columns next,
466*     then all type-3's, and finally all type-4's.
467*
468      DO 130 J = 1, N
469         JS = INDXP( J )
470         CT = COLTYP( JS )
471         INDX( PSM( CT ) ) = JS
472         INDXC( PSM( CT ) ) = J
473         PSM( CT ) = PSM( CT ) + 1
474  130 CONTINUE
475*
476*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
477*     and Q2 respectively.  The eigenvalues/vectors which were not
478*     deflated go into the first K slots of DLAMDA and Q2 respectively,
479*     while those which were deflated go into the last N - K slots.
480*
481      I = 1
482      IQ1 = 1
483      IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
484      DO 140 J = 1, CTOT( 1 )
485         JS = INDX( I )
486         CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
487         Z( I ) = D( JS )
488         I = I + 1
489         IQ1 = IQ1 + N1
490  140 CONTINUE
491*
492      DO 150 J = 1, CTOT( 2 )
493         JS = INDX( I )
494         CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
495         CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
496         Z( I ) = D( JS )
497         I = I + 1
498         IQ1 = IQ1 + N1
499         IQ2 = IQ2 + N2
500  150 CONTINUE
501*
502      DO 160 J = 1, CTOT( 3 )
503         JS = INDX( I )
504         CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
505         Z( I ) = D( JS )
506         I = I + 1
507         IQ2 = IQ2 + N2
508  160 CONTINUE
509*
510      IQ1 = IQ2
511      DO 170 J = 1, CTOT( 4 )
512         JS = INDX( I )
513         CALL SCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
514         IQ2 = IQ2 + N
515         Z( I ) = D( JS )
516         I = I + 1
517  170 CONTINUE
518*
519*     The deflated eigenvalues and their corresponding vectors go back
520*     into the last N - K slots of D and Q respectively.
521*
522      IF( K.LT.N ) THEN
523         CALL SLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N,
524     $                Q( 1, K+1 ), LDQ )
525         CALL SCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
526      END IF
527*
528*     Copy CTOT into COLTYP for referencing in SLAED3.
529*
530      DO 180 J = 1, 4
531         COLTYP( J ) = CTOT( J )
532  180 CONTINUE
533*
534  190 CONTINUE
535      RETURN
536*
537*     End of SLAED2
538*
539      END
540