1*> \brief \b ZLAQR3 performs the unitary similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation).
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download ZLAQR3 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr3.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr3.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr3.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
22*                          IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
23*                          NV, WV, LDWV, WORK, LWORK )
24*
25*       .. Scalar Arguments ..
26*       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
27*      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
28*       LOGICAL            WANTT, WANTZ
29*       ..
30*       .. Array Arguments ..
31*       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
32*      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
33*       ..
34*
35*
36*> \par Purpose:
37*  =============
38*>
39*> \verbatim
40*>
41*>    Aggressive early deflation:
42*>
43*>    ZLAQR3 accepts as input an upper Hessenberg matrix
44*>    H and performs an unitary similarity transformation
45*>    designed to detect and deflate fully converged eigenvalues from
46*>    a trailing principal submatrix.  On output H has been over-
47*>    written by a new Hessenberg matrix that is a perturbation of
48*>    an unitary similarity transformation of H.  It is to be
49*>    hoped that the final version of H has many zero subdiagonal
50*>    entries.
51*>
52*> \endverbatim
53*
54*  Arguments:
55*  ==========
56*
57*> \param[in] WANTT
58*> \verbatim
59*>          WANTT is LOGICAL
60*>          If .TRUE., then the Hessenberg matrix H is fully updated
61*>          so that the triangular Schur factor may be
62*>          computed (in cooperation with the calling subroutine).
63*>          If .FALSE., then only enough of H is updated to preserve
64*>          the eigenvalues.
65*> \endverbatim
66*>
67*> \param[in] WANTZ
68*> \verbatim
69*>          WANTZ is LOGICAL
70*>          If .TRUE., then the unitary matrix Z is updated so
71*>          so that the unitary Schur factor may be computed
72*>          (in cooperation with the calling subroutine).
73*>          If .FALSE., then Z is not referenced.
74*> \endverbatim
75*>
76*> \param[in] N
77*> \verbatim
78*>          N is INTEGER
79*>          The order of the matrix H and (if WANTZ is .TRUE.) the
80*>          order of the unitary matrix Z.
81*> \endverbatim
82*>
83*> \param[in] KTOP
84*> \verbatim
85*>          KTOP is INTEGER
86*>          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
87*>          KBOT and KTOP together determine an isolated block
88*>          along the diagonal of the Hessenberg matrix.
89*> \endverbatim
90*>
91*> \param[in] KBOT
92*> \verbatim
93*>          KBOT is INTEGER
94*>          It is assumed without a check that either
95*>          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
96*>          determine an isolated block along the diagonal of the
97*>          Hessenberg matrix.
98*> \endverbatim
99*>
100*> \param[in] NW
101*> \verbatim
102*>          NW is INTEGER
103*>          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
104*> \endverbatim
105*>
106*> \param[in,out] H
107*> \verbatim
108*>          H is COMPLEX*16 array, dimension (LDH,N)
109*>          On input the initial N-by-N section of H stores the
110*>          Hessenberg matrix undergoing aggressive early deflation.
111*>          On output H has been transformed by a unitary
112*>          similarity transformation, perturbed, and the returned
113*>          to Hessenberg form that (it is to be hoped) has some
114*>          zero subdiagonal entries.
115*> \endverbatim
116*>
117*> \param[in] LDH
118*> \verbatim
119*>          LDH is integer
120*>          Leading dimension of H just as declared in the calling
121*>          subroutine.  N .LE. LDH
122*> \endverbatim
123*>
124*> \param[in] ILOZ
125*> \verbatim
126*>          ILOZ is INTEGER
127*> \endverbatim
128*>
129*> \param[in] IHIZ
130*> \verbatim
131*>          IHIZ is INTEGER
132*>          Specify the rows of Z to which transformations must be
133*>          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
134*> \endverbatim
135*>
136*> \param[in,out] Z
137*> \verbatim
138*>          Z is COMPLEX*16 array, dimension (LDZ,N)
139*>          IF WANTZ is .TRUE., then on output, the unitary
140*>          similarity transformation mentioned above has been
141*>          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
142*>          If WANTZ is .FALSE., then Z is unreferenced.
143*> \endverbatim
144*>
145*> \param[in] LDZ
146*> \verbatim
147*>          LDZ is integer
148*>          The leading dimension of Z just as declared in the
149*>          calling subroutine.  1 .LE. LDZ.
150*> \endverbatim
151*>
152*> \param[out] NS
153*> \verbatim
154*>          NS is integer
155*>          The number of unconverged (ie approximate) eigenvalues
156*>          returned in SR and SI that may be used as shifts by the
157*>          calling subroutine.
158*> \endverbatim
159*>
160*> \param[out] ND
161*> \verbatim
162*>          ND is integer
163*>          The number of converged eigenvalues uncovered by this
164*>          subroutine.
165*> \endverbatim
166*>
167*> \param[out] SH
168*> \verbatim
169*>          SH is COMPLEX*16 array, dimension KBOT
170*>          On output, approximate eigenvalues that may
171*>          be used for shifts are stored in SH(KBOT-ND-NS+1)
172*>          through SR(KBOT-ND).  Converged eigenvalues are
173*>          stored in SH(KBOT-ND+1) through SH(KBOT).
174*> \endverbatim
175*>
176*> \param[out] V
177*> \verbatim
178*>          V is COMPLEX*16 array, dimension (LDV,NW)
179*>          An NW-by-NW work array.
180*> \endverbatim
181*>
182*> \param[in] LDV
183*> \verbatim
184*>          LDV is integer scalar
185*>          The leading dimension of V just as declared in the
186*>          calling subroutine.  NW .LE. LDV
187*> \endverbatim
188*>
189*> \param[in] NH
190*> \verbatim
191*>          NH is integer scalar
192*>          The number of columns of T.  NH.GE.NW.
193*> \endverbatim
194*>
195*> \param[out] T
196*> \verbatim
197*>          T is COMPLEX*16 array, dimension (LDT,NW)
198*> \endverbatim
199*>
200*> \param[in] LDT
201*> \verbatim
202*>          LDT is integer
203*>          The leading dimension of T just as declared in the
204*>          calling subroutine.  NW .LE. LDT
205*> \endverbatim
206*>
207*> \param[in] NV
208*> \verbatim
209*>          NV is integer
210*>          The number of rows of work array WV available for
211*>          workspace.  NV.GE.NW.
212*> \endverbatim
213*>
214*> \param[out] WV
215*> \verbatim
216*>          WV is COMPLEX*16 array, dimension (LDWV,NW)
217*> \endverbatim
218*>
219*> \param[in] LDWV
220*> \verbatim
221*>          LDWV is integer
222*>          The leading dimension of W just as declared in the
223*>          calling subroutine.  NW .LE. LDV
224*> \endverbatim
225*>
226*> \param[out] WORK
227*> \verbatim
228*>          WORK is COMPLEX*16 array, dimension LWORK.
229*>          On exit, WORK(1) is set to an estimate of the optimal value
230*>          of LWORK for the given values of N, NW, KTOP and KBOT.
231*> \endverbatim
232*>
233*> \param[in] LWORK
234*> \verbatim
235*>          LWORK is integer
236*>          The dimension of the work array WORK.  LWORK = 2*NW
237*>          suffices, but greater efficiency may result from larger
238*>          values of LWORK.
239*>
240*>          If LWORK = -1, then a workspace query is assumed; ZLAQR3
241*>          only estimates the optimal workspace size for the given
242*>          values of N, NW, KTOP and KBOT.  The estimate is returned
243*>          in WORK(1).  No error message related to LWORK is issued
244*>          by XERBLA.  Neither H nor Z are accessed.
245*> \endverbatim
246*
247*  Authors:
248*  ========
249*
250*> \author Univ. of Tennessee
251*> \author Univ. of California Berkeley
252*> \author Univ. of Colorado Denver
253*> \author NAG Ltd.
254*
255*> \date September 2012
256*
257*> \ingroup complex16OTHERauxiliary
258*
259*> \par Contributors:
260*  ==================
261*>
262*>       Karen Braman and Ralph Byers, Department of Mathematics,
263*>       University of Kansas, USA
264*>
265*  =====================================================================
266      SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
267     $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
268     $                   NV, WV, LDWV, WORK, LWORK )
269*
270*  -- LAPACK auxiliary routine (version 3.4.2) --
271*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
272*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
273*     September 2012
274*
275*     .. Scalar Arguments ..
276      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
277     $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
278      LOGICAL            WANTT, WANTZ
279*     ..
280*     .. Array Arguments ..
281      COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
282     $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
283*     ..
284*
285*  ================================================================
286*
287*     .. Parameters ..
288      COMPLEX*16         ZERO, ONE
289      PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
290     $                   ONE = ( 1.0d0, 0.0d0 ) )
291      DOUBLE PRECISION   RZERO, RONE
292      PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
293*     ..
294*     .. Local Scalars ..
295      COMPLEX*16         BETA, CDUM, S, TAU
296      DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
297      INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
298     $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
299     $                   LWKOPT, NMIN
300*     ..
301*     .. External Functions ..
302      DOUBLE PRECISION   DLAMCH
303      INTEGER            ILAENV
304      EXTERNAL           DLAMCH, ILAENV
305*     ..
306*     .. External Subroutines ..
307      EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
308     $                   ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
309*     ..
310*     .. Intrinsic Functions ..
311      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
312*     ..
313*     .. Statement Functions ..
314      DOUBLE PRECISION   CABS1
315*     ..
316*     .. Statement Function definitions ..
317      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
318*     ..
319*     .. Executable Statements ..
320*
321*     ==== Estimate optimal workspace. ====
322*
323      JW = MIN( NW, KBOT-KTOP+1 )
324      IF( JW.LE.2 ) THEN
325         LWKOPT = 1
326      ELSE
327*
328*        ==== Workspace query call to ZGEHRD ====
329*
330         CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
331         LWK1 = INT( WORK( 1 ) )
332*
333*        ==== Workspace query call to ZUNMHR ====
334*
335         CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
336     $                WORK, -1, INFO )
337         LWK2 = INT( WORK( 1 ) )
338*
339*        ==== Workspace query call to ZLAQR4 ====
340*
341         CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V,
342     $                LDV, WORK, -1, INFQR )
343         LWK3 = INT( WORK( 1 ) )
344*
345*        ==== Optimal workspace ====
346*
347         LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
348      END IF
349*
350*     ==== Quick return in case of workspace query. ====
351*
352      IF( LWORK.EQ.-1 ) THEN
353         WORK( 1 ) = DCMPLX( LWKOPT, 0 )
354         RETURN
355      END IF
356*
357*     ==== Nothing to do ...
358*     ... for an empty active block ... ====
359      NS = 0
360      ND = 0
361      WORK( 1 ) = ONE
362      IF( KTOP.GT.KBOT )
363     $   RETURN
364*     ... nor for an empty deflation window. ====
365      IF( NW.LT.1 )
366     $   RETURN
367*
368*     ==== Machine constants ====
369*
370      SAFMIN = DLAMCH( 'SAFE MINIMUM' )
371      SAFMAX = RONE / SAFMIN
372      CALL DLABAD( SAFMIN, SAFMAX )
373      ULP = DLAMCH( 'PRECISION' )
374      SMLNUM = SAFMIN*( DBLE( N ) / ULP )
375*
376*     ==== Setup deflation window ====
377*
378      JW = MIN( NW, KBOT-KTOP+1 )
379      KWTOP = KBOT - JW + 1
380      IF( KWTOP.EQ.KTOP ) THEN
381         S = ZERO
382      ELSE
383         S = H( KWTOP, KWTOP-1 )
384      END IF
385*
386      IF( KBOT.EQ.KWTOP ) THEN
387*
388*        ==== 1-by-1 deflation window: not much to do ====
389*
390         SH( KWTOP ) = H( KWTOP, KWTOP )
391         NS = 1
392         ND = 0
393         IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
394     $       KWTOP ) ) ) ) THEN
395            NS = 0
396            ND = 1
397            IF( KWTOP.GT.KTOP )
398     $         H( KWTOP, KWTOP-1 ) = ZERO
399         END IF
400         WORK( 1 ) = ONE
401         RETURN
402      END IF
403*
404*     ==== Convert to spike-triangular form.  (In case of a
405*     .    rare QR failure, this routine continues to do
406*     .    aggressive early deflation using that part of
407*     .    the deflation window that converged using INFQR
408*     .    here and there to keep track.) ====
409*
410      CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
411      CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
412*
413      CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
414      NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK )
415      IF( JW.GT.NMIN ) THEN
416         CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
417     $                JW, V, LDV, WORK, LWORK, INFQR )
418      ELSE
419         CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
420     $                JW, V, LDV, INFQR )
421      END IF
422*
423*     ==== Deflation detection loop ====
424*
425      NS = JW
426      ILST = INFQR + 1
427      DO 10 KNT = INFQR + 1, JW
428*
429*        ==== Small spike tip deflation test ====
430*
431         FOO = CABS1( T( NS, NS ) )
432         IF( FOO.EQ.RZERO )
433     $      FOO = CABS1( S )
434         IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
435     $        THEN
436*
437*           ==== One more converged eigenvalue ====
438*
439            NS = NS - 1
440         ELSE
441*
442*           ==== One undeflatable eigenvalue.  Move it up out of the
443*           .    way.   (ZTREXC can not fail in this case.) ====
444*
445            IFST = NS
446            CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
447            ILST = ILST + 1
448         END IF
449   10 CONTINUE
450*
451*        ==== Return to Hessenberg form ====
452*
453      IF( NS.EQ.0 )
454     $   S = ZERO
455*
456      IF( NS.LT.JW ) THEN
457*
458*        ==== sorting the diagonal of T improves accuracy for
459*        .    graded matrices.  ====
460*
461         DO 30 I = INFQR + 1, NS
462            IFST = I
463            DO 20 J = I + 1, NS
464               IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
465     $            IFST = J
466   20       CONTINUE
467            ILST = I
468            IF( IFST.NE.ILST )
469     $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
470   30    CONTINUE
471      END IF
472*
473*     ==== Restore shift/eigenvalue array from T ====
474*
475      DO 40 I = INFQR + 1, JW
476         SH( KWTOP+I-1 ) = T( I, I )
477   40 CONTINUE
478*
479*
480      IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
481         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
482*
483*           ==== Reflect spike back into lower triangle ====
484*
485            CALL ZCOPY( NS, V, LDV, WORK, 1 )
486            DO 50 I = 1, NS
487               WORK( I ) = DCONJG( WORK( I ) )
488   50       CONTINUE
489            BETA = WORK( 1 )
490            CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
491            WORK( 1 ) = ONE
492*
493            CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
494*
495            CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
496     $                  WORK( JW+1 ) )
497            CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
498     $                  WORK( JW+1 ) )
499            CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
500     $                  WORK( JW+1 ) )
501*
502            CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
503     $                   LWORK-JW, INFO )
504         END IF
505*
506*        ==== Copy updated reduced window into place ====
507*
508         IF( KWTOP.GT.1 )
509     $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
510         CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
511         CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
512     $               LDH+1 )
513*
514*        ==== Accumulate orthogonal matrix in order update
515*        .    H and Z, if requested.  ====
516*
517         IF( NS.GT.1 .AND. S.NE.ZERO )
518     $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
519     $                   WORK( JW+1 ), LWORK-JW, INFO )
520*
521*        ==== Update vertical slab in H ====
522*
523         IF( WANTT ) THEN
524            LTOP = 1
525         ELSE
526            LTOP = KTOP
527         END IF
528         DO 60 KROW = LTOP, KWTOP - 1, NV
529            KLN = MIN( NV, KWTOP-KROW )
530            CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
531     $                  LDH, V, LDV, ZERO, WV, LDWV )
532            CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
533   60    CONTINUE
534*
535*        ==== Update horizontal slab in H ====
536*
537         IF( WANTT ) THEN
538            DO 70 KCOL = KBOT + 1, N, NH
539               KLN = MIN( NH, N-KCOL+1 )
540               CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
541     $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
542               CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
543     $                      LDH )
544   70       CONTINUE
545         END IF
546*
547*        ==== Update vertical slab in Z ====
548*
549         IF( WANTZ ) THEN
550            DO 80 KROW = ILOZ, IHIZ, NV
551               KLN = MIN( NV, IHIZ-KROW+1 )
552               CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
553     $                     LDZ, V, LDV, ZERO, WV, LDWV )
554               CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
555     $                      LDZ )
556   80       CONTINUE
557         END IF
558      END IF
559*
560*     ==== Return the number of deflations ... ====
561*
562      ND = JW - NS
563*
564*     ==== ... and the number of shifts. (Subtracting
565*     .    INFQR from the spike length takes care
566*     .    of the case of a rare QR failure while
567*     .    calculating eigenvalues of the deflation
568*     .    window.)  ====
569*
570      NS = NS - INFQR
571*
572*      ==== Return optimal workspace. ====
573*
574      WORK( 1 ) = DCMPLX( LWKOPT, 0 )
575*
576*     ==== End of ZLAQR3 ====
577*
578      END
579