1*> \brief \b ZLAQR2 performs the unitary similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation).
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download ZLAQR2 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr2.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr2.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr2.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
22*                          IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
23*                          NV, WV, LDWV, WORK, LWORK )
24*
25*       .. Scalar Arguments ..
26*       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
27*      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
28*       LOGICAL            WANTT, WANTZ
29*       ..
30*       .. Array Arguments ..
31*       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
32*      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
33*       ..
34*
35*
36*> \par Purpose:
37*  =============
38*>
39*> \verbatim
40*>
41*>    ZLAQR2 is identical to ZLAQR3 except that it avoids
42*>    recursion by calling ZLAHQR instead of ZLAQR4.
43*>
44*>    Aggressive early deflation:
45*>
46*>    ZLAQR2 accepts as input an upper Hessenberg matrix
47*>    H and performs an unitary similarity transformation
48*>    designed to detect and deflate fully converged eigenvalues from
49*>    a trailing principal submatrix.  On output H has been over-
50*>    written by a new Hessenberg matrix that is a perturbation of
51*>    an unitary similarity transformation of H.  It is to be
52*>    hoped that the final version of H has many zero subdiagonal
53*>    entries.
54*>
55*> \endverbatim
56*
57*  Arguments:
58*  ==========
59*
60*> \param[in] WANTT
61*> \verbatim
62*>          WANTT is LOGICAL
63*>          If .TRUE., then the Hessenberg matrix H is fully updated
64*>          so that the triangular Schur factor may be
65*>          computed (in cooperation with the calling subroutine).
66*>          If .FALSE., then only enough of H is updated to preserve
67*>          the eigenvalues.
68*> \endverbatim
69*>
70*> \param[in] WANTZ
71*> \verbatim
72*>          WANTZ is LOGICAL
73*>          If .TRUE., then the unitary matrix Z is updated so
74*>          so that the unitary Schur factor may be computed
75*>          (in cooperation with the calling subroutine).
76*>          If .FALSE., then Z is not referenced.
77*> \endverbatim
78*>
79*> \param[in] N
80*> \verbatim
81*>          N is INTEGER
82*>          The order of the matrix H and (if WANTZ is .TRUE.) the
83*>          order of the unitary matrix Z.
84*> \endverbatim
85*>
86*> \param[in] KTOP
87*> \verbatim
88*>          KTOP is INTEGER
89*>          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
90*>          KBOT and KTOP together determine an isolated block
91*>          along the diagonal of the Hessenberg matrix.
92*> \endverbatim
93*>
94*> \param[in] KBOT
95*> \verbatim
96*>          KBOT is INTEGER
97*>          It is assumed without a check that either
98*>          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
99*>          determine an isolated block along the diagonal of the
100*>          Hessenberg matrix.
101*> \endverbatim
102*>
103*> \param[in] NW
104*> \verbatim
105*>          NW is INTEGER
106*>          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
107*> \endverbatim
108*>
109*> \param[in,out] H
110*> \verbatim
111*>          H is COMPLEX*16 array, dimension (LDH,N)
112*>          On input the initial N-by-N section of H stores the
113*>          Hessenberg matrix undergoing aggressive early deflation.
114*>          On output H has been transformed by a unitary
115*>          similarity transformation, perturbed, and the returned
116*>          to Hessenberg form that (it is to be hoped) has some
117*>          zero subdiagonal entries.
118*> \endverbatim
119*>
120*> \param[in] LDH
121*> \verbatim
122*>          LDH is integer
123*>          Leading dimension of H just as declared in the calling
124*>          subroutine.  N .LE. LDH
125*> \endverbatim
126*>
127*> \param[in] ILOZ
128*> \verbatim
129*>          ILOZ is INTEGER
130*> \endverbatim
131*>
132*> \param[in] IHIZ
133*> \verbatim
134*>          IHIZ is INTEGER
135*>          Specify the rows of Z to which transformations must be
136*>          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
137*> \endverbatim
138*>
139*> \param[in,out] Z
140*> \verbatim
141*>          Z is COMPLEX*16 array, dimension (LDZ,N)
142*>          IF WANTZ is .TRUE., then on output, the unitary
143*>          similarity transformation mentioned above has been
144*>          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
145*>          If WANTZ is .FALSE., then Z is unreferenced.
146*> \endverbatim
147*>
148*> \param[in] LDZ
149*> \verbatim
150*>          LDZ is integer
151*>          The leading dimension of Z just as declared in the
152*>          calling subroutine.  1 .LE. LDZ.
153*> \endverbatim
154*>
155*> \param[out] NS
156*> \verbatim
157*>          NS is integer
158*>          The number of unconverged (ie approximate) eigenvalues
159*>          returned in SR and SI that may be used as shifts by the
160*>          calling subroutine.
161*> \endverbatim
162*>
163*> \param[out] ND
164*> \verbatim
165*>          ND is integer
166*>          The number of converged eigenvalues uncovered by this
167*>          subroutine.
168*> \endverbatim
169*>
170*> \param[out] SH
171*> \verbatim
172*>          SH is COMPLEX*16 array, dimension KBOT
173*>          On output, approximate eigenvalues that may
174*>          be used for shifts are stored in SH(KBOT-ND-NS+1)
175*>          through SR(KBOT-ND).  Converged eigenvalues are
176*>          stored in SH(KBOT-ND+1) through SH(KBOT).
177*> \endverbatim
178*>
179*> \param[out] V
180*> \verbatim
181*>          V is COMPLEX*16 array, dimension (LDV,NW)
182*>          An NW-by-NW work array.
183*> \endverbatim
184*>
185*> \param[in] LDV
186*> \verbatim
187*>          LDV is integer scalar
188*>          The leading dimension of V just as declared in the
189*>          calling subroutine.  NW .LE. LDV
190*> \endverbatim
191*>
192*> \param[in] NH
193*> \verbatim
194*>          NH is integer scalar
195*>          The number of columns of T.  NH.GE.NW.
196*> \endverbatim
197*>
198*> \param[out] T
199*> \verbatim
200*>          T is COMPLEX*16 array, dimension (LDT,NW)
201*> \endverbatim
202*>
203*> \param[in] LDT
204*> \verbatim
205*>          LDT is integer
206*>          The leading dimension of T just as declared in the
207*>          calling subroutine.  NW .LE. LDT
208*> \endverbatim
209*>
210*> \param[in] NV
211*> \verbatim
212*>          NV is integer
213*>          The number of rows of work array WV available for
214*>          workspace.  NV.GE.NW.
215*> \endverbatim
216*>
217*> \param[out] WV
218*> \verbatim
219*>          WV is COMPLEX*16 array, dimension (LDWV,NW)
220*> \endverbatim
221*>
222*> \param[in] LDWV
223*> \verbatim
224*>          LDWV is integer
225*>          The leading dimension of W just as declared in the
226*>          calling subroutine.  NW .LE. LDV
227*> \endverbatim
228*>
229*> \param[out] WORK
230*> \verbatim
231*>          WORK is COMPLEX*16 array, dimension LWORK.
232*>          On exit, WORK(1) is set to an estimate of the optimal value
233*>          of LWORK for the given values of N, NW, KTOP and KBOT.
234*> \endverbatim
235*>
236*> \param[in] LWORK
237*> \verbatim
238*>          LWORK is integer
239*>          The dimension of the work array WORK.  LWORK = 2*NW
240*>          suffices, but greater efficiency may result from larger
241*>          values of LWORK.
242*>
243*>          If LWORK = -1, then a workspace query is assumed; ZLAQR2
244*>          only estimates the optimal workspace size for the given
245*>          values of N, NW, KTOP and KBOT.  The estimate is returned
246*>          in WORK(1).  No error message related to LWORK is issued
247*>          by XERBLA.  Neither H nor Z are accessed.
248*> \endverbatim
249*
250*  Authors:
251*  ========
252*
253*> \author Univ. of Tennessee
254*> \author Univ. of California Berkeley
255*> \author Univ. of Colorado Denver
256*> \author NAG Ltd.
257*
258*> \date September 2012
259*
260*> \ingroup complex16OTHERauxiliary
261*
262*> \par Contributors:
263*  ==================
264*>
265*>       Karen Braman and Ralph Byers, Department of Mathematics,
266*>       University of Kansas, USA
267*>
268*  =====================================================================
269      SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
270     $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
271     $                   NV, WV, LDWV, WORK, LWORK )
272*
273*  -- LAPACK auxiliary routine (version 3.4.2) --
274*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
275*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
276*     September 2012
277*
278*     .. Scalar Arguments ..
279      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
280     $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
281      LOGICAL            WANTT, WANTZ
282*     ..
283*     .. Array Arguments ..
284      COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
285     $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
286*     ..
287*
288*  ================================================================
289*
290*     .. Parameters ..
291      COMPLEX*16         ZERO, ONE
292      PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
293     $                   ONE = ( 1.0d0, 0.0d0 ) )
294      DOUBLE PRECISION   RZERO, RONE
295      PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
296*     ..
297*     .. Local Scalars ..
298      COMPLEX*16         BETA, CDUM, S, TAU
299      DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
300      INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
301     $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT
302*     ..
303*     .. External Functions ..
304      DOUBLE PRECISION   DLAMCH
305      EXTERNAL           DLAMCH
306*     ..
307*     .. External Subroutines ..
308      EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
309     $                   ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
310*     ..
311*     .. Intrinsic Functions ..
312      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
313*     ..
314*     .. Statement Functions ..
315      DOUBLE PRECISION   CABS1
316*     ..
317*     .. Statement Function definitions ..
318      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
319*     ..
320*     .. Executable Statements ..
321*
322*     ==== Estimate optimal workspace. ====
323*
324      JW = MIN( NW, KBOT-KTOP+1 )
325      IF( JW.LE.2 ) THEN
326         LWKOPT = 1
327      ELSE
328*
329*        ==== Workspace query call to ZGEHRD ====
330*
331         CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
332         LWK1 = INT( WORK( 1 ) )
333*
334*        ==== Workspace query call to ZUNMHR ====
335*
336         CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
337     $                WORK, -1, INFO )
338         LWK2 = INT( WORK( 1 ) )
339*
340*        ==== Optimal workspace ====
341*
342         LWKOPT = JW + MAX( LWK1, LWK2 )
343      END IF
344*
345*     ==== Quick return in case of workspace query. ====
346*
347      IF( LWORK.EQ.-1 ) THEN
348         WORK( 1 ) = DCMPLX( LWKOPT, 0 )
349         RETURN
350      END IF
351*
352*     ==== Nothing to do ...
353*     ... for an empty active block ... ====
354      NS = 0
355      ND = 0
356      WORK( 1 ) = ONE
357      IF( KTOP.GT.KBOT )
358     $   RETURN
359*     ... nor for an empty deflation window. ====
360      IF( NW.LT.1 )
361     $   RETURN
362*
363*     ==== Machine constants ====
364*
365      SAFMIN = DLAMCH( 'SAFE MINIMUM' )
366      SAFMAX = RONE / SAFMIN
367      CALL DLABAD( SAFMIN, SAFMAX )
368      ULP = DLAMCH( 'PRECISION' )
369      SMLNUM = SAFMIN*( DBLE( N ) / ULP )
370*
371*     ==== Setup deflation window ====
372*
373      JW = MIN( NW, KBOT-KTOP+1 )
374      KWTOP = KBOT - JW + 1
375      IF( KWTOP.EQ.KTOP ) THEN
376         S = ZERO
377      ELSE
378         S = H( KWTOP, KWTOP-1 )
379      END IF
380*
381      IF( KBOT.EQ.KWTOP ) THEN
382*
383*        ==== 1-by-1 deflation window: not much to do ====
384*
385         SH( KWTOP ) = H( KWTOP, KWTOP )
386         NS = 1
387         ND = 0
388         IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
389     $       KWTOP ) ) ) ) THEN
390            NS = 0
391            ND = 1
392            IF( KWTOP.GT.KTOP )
393     $         H( KWTOP, KWTOP-1 ) = ZERO
394         END IF
395         WORK( 1 ) = ONE
396         RETURN
397      END IF
398*
399*     ==== Convert to spike-triangular form.  (In case of a
400*     .    rare QR failure, this routine continues to do
401*     .    aggressive early deflation using that part of
402*     .    the deflation window that converged using INFQR
403*     .    here and there to keep track.) ====
404*
405      CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
406      CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
407*
408      CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
409      CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
410     $             JW, V, LDV, INFQR )
411*
412*     ==== Deflation detection loop ====
413*
414      NS = JW
415      ILST = INFQR + 1
416      DO 10 KNT = INFQR + 1, JW
417*
418*        ==== Small spike tip deflation test ====
419*
420         FOO = CABS1( T( NS, NS ) )
421         IF( FOO.EQ.RZERO )
422     $      FOO = CABS1( S )
423         IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
424     $        THEN
425*
426*           ==== One more converged eigenvalue ====
427*
428            NS = NS - 1
429         ELSE
430*
431*           ==== One undeflatable eigenvalue.  Move it up out of the
432*           .    way.   (ZTREXC can not fail in this case.) ====
433*
434            IFST = NS
435            CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
436            ILST = ILST + 1
437         END IF
438   10 CONTINUE
439*
440*        ==== Return to Hessenberg form ====
441*
442      IF( NS.EQ.0 )
443     $   S = ZERO
444*
445      IF( NS.LT.JW ) THEN
446*
447*        ==== sorting the diagonal of T improves accuracy for
448*        .    graded matrices.  ====
449*
450         DO 30 I = INFQR + 1, NS
451            IFST = I
452            DO 20 J = I + 1, NS
453               IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
454     $            IFST = J
455   20       CONTINUE
456            ILST = I
457            IF( IFST.NE.ILST )
458     $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
459   30    CONTINUE
460      END IF
461*
462*     ==== Restore shift/eigenvalue array from T ====
463*
464      DO 40 I = INFQR + 1, JW
465         SH( KWTOP+I-1 ) = T( I, I )
466   40 CONTINUE
467*
468*
469      IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
470         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
471*
472*           ==== Reflect spike back into lower triangle ====
473*
474            CALL ZCOPY( NS, V, LDV, WORK, 1 )
475            DO 50 I = 1, NS
476               WORK( I ) = DCONJG( WORK( I ) )
477   50       CONTINUE
478            BETA = WORK( 1 )
479            CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
480            WORK( 1 ) = ONE
481*
482            CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
483*
484            CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
485     $                  WORK( JW+1 ) )
486            CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
487     $                  WORK( JW+1 ) )
488            CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
489     $                  WORK( JW+1 ) )
490*
491            CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
492     $                   LWORK-JW, INFO )
493         END IF
494*
495*        ==== Copy updated reduced window into place ====
496*
497         IF( KWTOP.GT.1 )
498     $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
499         CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
500         CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
501     $               LDH+1 )
502*
503*        ==== Accumulate orthogonal matrix in order update
504*        .    H and Z, if requested.  ====
505*
506         IF( NS.GT.1 .AND. S.NE.ZERO )
507     $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
508     $                   WORK( JW+1 ), LWORK-JW, INFO )
509*
510*        ==== Update vertical slab in H ====
511*
512         IF( WANTT ) THEN
513            LTOP = 1
514         ELSE
515            LTOP = KTOP
516         END IF
517         DO 60 KROW = LTOP, KWTOP - 1, NV
518            KLN = MIN( NV, KWTOP-KROW )
519            CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
520     $                  LDH, V, LDV, ZERO, WV, LDWV )
521            CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
522   60    CONTINUE
523*
524*        ==== Update horizontal slab in H ====
525*
526         IF( WANTT ) THEN
527            DO 70 KCOL = KBOT + 1, N, NH
528               KLN = MIN( NH, N-KCOL+1 )
529               CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
530     $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
531               CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
532     $                      LDH )
533   70       CONTINUE
534         END IF
535*
536*        ==== Update vertical slab in Z ====
537*
538         IF( WANTZ ) THEN
539            DO 80 KROW = ILOZ, IHIZ, NV
540               KLN = MIN( NV, IHIZ-KROW+1 )
541               CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
542     $                     LDZ, V, LDV, ZERO, WV, LDWV )
543               CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
544     $                      LDZ )
545   80       CONTINUE
546         END IF
547      END IF
548*
549*     ==== Return the number of deflations ... ====
550*
551      ND = JW - NS
552*
553*     ==== ... and the number of shifts. (Subtracting
554*     .    INFQR from the spike length takes care
555*     .    of the case of a rare QR failure while
556*     .    calculating eigenvalues of the deflation
557*     .    window.)  ====
558*
559      NS = NS - INFQR
560*
561*      ==== Return optimal workspace. ====
562*
563      WORK( 1 ) = DCMPLX( LWKOPT, 0 )
564*
565*     ==== End of ZLAQR2 ====
566*
567      END
568c $Id$
569