1*> \brief \b CLAQR2 performs the unitary similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation).
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download CLAQR2 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/claqr2.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/claqr2.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/claqr2.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
22*                          IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
23*                          NV, WV, LDWV, WORK, LWORK )
24*
25*       .. Scalar Arguments ..
26*       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
27*      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
28*       LOGICAL            WANTT, WANTZ
29*       ..
30*       .. Array Arguments ..
31*       COMPLEX            H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
32*      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
33*       ..
34*
35*
36*> \par Purpose:
37*  =============
38*>
39*> \verbatim
40*>
41*>    CLAQR2 is identical to CLAQR3 except that it avoids
42*>    recursion by calling CLAHQR instead of CLAQR4.
43*>
44*>    Aggressive early deflation:
45*>
46*>    This subroutine accepts as input an upper Hessenberg matrix
47*>    H and performs an unitary similarity transformation
48*>    designed to detect and deflate fully converged eigenvalues from
49*>    a trailing principal submatrix.  On output H has been over-
50*>    written by a new Hessenberg matrix that is a perturbation of
51*>    an unitary similarity transformation of H.  It is to be
52*>    hoped that the final version of H has many zero subdiagonal
53*>    entries.
54*> \endverbatim
55*
56*  Arguments:
57*  ==========
58*
59*> \param[in] WANTT
60*> \verbatim
61*>          WANTT is LOGICAL
62*>          If .TRUE., then the Hessenberg matrix H is fully updated
63*>          so that the triangular Schur factor may be
64*>          computed (in cooperation with the calling subroutine).
65*>          If .FALSE., then only enough of H is updated to preserve
66*>          the eigenvalues.
67*> \endverbatim
68*>
69*> \param[in] WANTZ
70*> \verbatim
71*>          WANTZ is LOGICAL
72*>          If .TRUE., then the unitary matrix Z is updated so
73*>          so that the unitary Schur factor may be computed
74*>          (in cooperation with the calling subroutine).
75*>          If .FALSE., then Z is not referenced.
76*> \endverbatim
77*>
78*> \param[in] N
79*> \verbatim
80*>          N is INTEGER
81*>          The order of the matrix H and (if WANTZ is .TRUE.) the
82*>          order of the unitary matrix Z.
83*> \endverbatim
84*>
85*> \param[in] KTOP
86*> \verbatim
87*>          KTOP is INTEGER
88*>          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
89*>          KBOT and KTOP together determine an isolated block
90*>          along the diagonal of the Hessenberg matrix.
91*> \endverbatim
92*>
93*> \param[in] KBOT
94*> \verbatim
95*>          KBOT is INTEGER
96*>          It is assumed without a check that either
97*>          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
98*>          determine an isolated block along the diagonal of the
99*>          Hessenberg matrix.
100*> \endverbatim
101*>
102*> \param[in] NW
103*> \verbatim
104*>          NW is INTEGER
105*>          Deflation window size.  1 <= NW <= (KBOT-KTOP+1).
106*> \endverbatim
107*>
108*> \param[in,out] H
109*> \verbatim
110*>          H is COMPLEX array, dimension (LDH,N)
111*>          On input the initial N-by-N section of H stores the
112*>          Hessenberg matrix undergoing aggressive early deflation.
113*>          On output H has been transformed by a unitary
114*>          similarity transformation, perturbed, and the returned
115*>          to Hessenberg form that (it is to be hoped) has some
116*>          zero subdiagonal entries.
117*> \endverbatim
118*>
119*> \param[in] LDH
120*> \verbatim
121*>          LDH is INTEGER
122*>          Leading dimension of H just as declared in the calling
123*>          subroutine.  N <= LDH
124*> \endverbatim
125*>
126*> \param[in] ILOZ
127*> \verbatim
128*>          ILOZ is INTEGER
129*> \endverbatim
130*>
131*> \param[in] IHIZ
132*> \verbatim
133*>          IHIZ is INTEGER
134*>          Specify the rows of Z to which transformations must be
135*>          applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N.
136*> \endverbatim
137*>
138*> \param[in,out] Z
139*> \verbatim
140*>          Z is COMPLEX array, dimension (LDZ,N)
141*>          IF WANTZ is .TRUE., then on output, the unitary
142*>          similarity transformation mentioned above has been
143*>          accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
144*>          If WANTZ is .FALSE., then Z is unreferenced.
145*> \endverbatim
146*>
147*> \param[in] LDZ
148*> \verbatim
149*>          LDZ is INTEGER
150*>          The leading dimension of Z just as declared in the
151*>          calling subroutine.  1 <= LDZ.
152*> \endverbatim
153*>
154*> \param[out] NS
155*> \verbatim
156*>          NS is INTEGER
157*>          The number of unconverged (ie approximate) eigenvalues
158*>          returned in SR and SI that may be used as shifts by the
159*>          calling subroutine.
160*> \endverbatim
161*>
162*> \param[out] ND
163*> \verbatim
164*>          ND is INTEGER
165*>          The number of converged eigenvalues uncovered by this
166*>          subroutine.
167*> \endverbatim
168*>
169*> \param[out] SH
170*> \verbatim
171*>          SH is COMPLEX array, dimension (KBOT)
172*>          On output, approximate eigenvalues that may
173*>          be used for shifts are stored in SH(KBOT-ND-NS+1)
174*>          through SR(KBOT-ND).  Converged eigenvalues are
175*>          stored in SH(KBOT-ND+1) through SH(KBOT).
176*> \endverbatim
177*>
178*> \param[out] V
179*> \verbatim
180*>          V is COMPLEX array, dimension (LDV,NW)
181*>          An NW-by-NW work array.
182*> \endverbatim
183*>
184*> \param[in] LDV
185*> \verbatim
186*>          LDV is INTEGER
187*>          The leading dimension of V just as declared in the
188*>          calling subroutine.  NW <= LDV
189*> \endverbatim
190*>
191*> \param[in] NH
192*> \verbatim
193*>          NH is INTEGER
194*>          The number of columns of T.  NH >= NW.
195*> \endverbatim
196*>
197*> \param[out] T
198*> \verbatim
199*>          T is COMPLEX array, dimension (LDT,NW)
200*> \endverbatim
201*>
202*> \param[in] LDT
203*> \verbatim
204*>          LDT is INTEGER
205*>          The leading dimension of T just as declared in the
206*>          calling subroutine.  NW <= LDT
207*> \endverbatim
208*>
209*> \param[in] NV
210*> \verbatim
211*>          NV is INTEGER
212*>          The number of rows of work array WV available for
213*>          workspace.  NV >= NW.
214*> \endverbatim
215*>
216*> \param[out] WV
217*> \verbatim
218*>          WV is COMPLEX array, dimension (LDWV,NW)
219*> \endverbatim
220*>
221*> \param[in] LDWV
222*> \verbatim
223*>          LDWV is INTEGER
224*>          The leading dimension of W just as declared in the
225*>          calling subroutine.  NW <= LDV
226*> \endverbatim
227*>
228*> \param[out] WORK
229*> \verbatim
230*>          WORK is COMPLEX array, dimension (LWORK)
231*>          On exit, WORK(1) is set to an estimate of the optimal value
232*>          of LWORK for the given values of N, NW, KTOP and KBOT.
233*> \endverbatim
234*>
235*> \param[in] LWORK
236*> \verbatim
237*>          LWORK is INTEGER
238*>          The dimension of the work array WORK.  LWORK = 2*NW
239*>          suffices, but greater efficiency may result from larger
240*>          values of LWORK.
241*>
242*>          If LWORK = -1, then a workspace query is assumed; CLAQR2
243*>          only estimates the optimal workspace size for the given
244*>          values of N, NW, KTOP and KBOT.  The estimate is returned
245*>          in WORK(1).  No error message related to LWORK is issued
246*>          by XERBLA.  Neither H nor Z are accessed.
247*> \endverbatim
248*
249*  Authors:
250*  ========
251*
252*> \author Univ. of Tennessee
253*> \author Univ. of California Berkeley
254*> \author Univ. of Colorado Denver
255*> \author NAG Ltd.
256*
257*> \date June 2017
258*
259*> \ingroup complexOTHERauxiliary
260*
261*> \par Contributors:
262*  ==================
263*>
264*>       Karen Braman and Ralph Byers, Department of Mathematics,
265*>       University of Kansas, USA
266*>
267*  =====================================================================
268      SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
269     $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
270     $                   NV, WV, LDWV, WORK, LWORK )
271*
272*  -- LAPACK auxiliary routine (version 3.7.1) --
273*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
274*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
275*     June 2017
276*
277*     .. Scalar Arguments ..
278      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
279     $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
280      LOGICAL            WANTT, WANTZ
281*     ..
282*     .. Array Arguments ..
283      COMPLEX            H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
284     $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
285*     ..
286*
287*  ================================================================
288*
289*     .. Parameters ..
290      COMPLEX            ZERO, ONE
291      PARAMETER          ( ZERO = ( 0.0e0, 0.0e0 ),
292     $                   ONE = ( 1.0e0, 0.0e0 ) )
293      REAL               RZERO, RONE
294      PARAMETER          ( RZERO = 0.0e0, RONE = 1.0e0 )
295*     ..
296*     .. Local Scalars ..
297      COMPLEX            BETA, CDUM, S, TAU
298      REAL               FOO, SAFMAX, SAFMIN, SMLNUM, ULP
299      INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
300     $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT
301*     ..
302*     .. External Functions ..
303      REAL               SLAMCH
304      EXTERNAL           SLAMCH
305*     ..
306*     .. External Subroutines ..
307      EXTERNAL           CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLARF,
308     $                   CLARFG, CLASET, CTREXC, CUNMHR, SLABAD
309*     ..
310*     .. Intrinsic Functions ..
311      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL
312*     ..
313*     .. Statement Functions ..
314      REAL               CABS1
315*     ..
316*     .. Statement Function definitions ..
317      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
318*     ..
319*     .. Executable Statements ..
320*
321*     ==== Estimate optimal workspace. ====
322*
323      JW = MIN( NW, KBOT-KTOP+1 )
324      IF( JW.LE.2 ) THEN
325         LWKOPT = 1
326      ELSE
327*
328*        ==== Workspace query call to CGEHRD ====
329*
330         CALL CGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
331         LWK1 = INT( WORK( 1 ) )
332*
333*        ==== Workspace query call to CUNMHR ====
334*
335         CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
336     $                WORK, -1, INFO )
337         LWK2 = INT( WORK( 1 ) )
338*
339*        ==== Optimal workspace ====
340*
341         LWKOPT = JW + MAX( LWK1, LWK2 )
342      END IF
343*
344*     ==== Quick return in case of workspace query. ====
345*
346      IF( LWORK.EQ.-1 ) THEN
347         WORK( 1 ) = CMPLX( LWKOPT, 0 )
348         RETURN
349      END IF
350*
351*     ==== Nothing to do ...
352*     ... for an empty active block ... ====
353      NS = 0
354      ND = 0
355      WORK( 1 ) = ONE
356      IF( KTOP.GT.KBOT )
357     $   RETURN
358*     ... nor for an empty deflation window. ====
359      IF( NW.LT.1 )
360     $   RETURN
361*
362*     ==== Machine constants ====
363*
364      SAFMIN = SLAMCH( 'SAFE MINIMUM' )
365      SAFMAX = RONE / SAFMIN
366      CALL SLABAD( SAFMIN, SAFMAX )
367      ULP = SLAMCH( 'PRECISION' )
368      SMLNUM = SAFMIN*( REAL( N ) / ULP )
369*
370*     ==== Setup deflation window ====
371*
372      JW = MIN( NW, KBOT-KTOP+1 )
373      KWTOP = KBOT - JW + 1
374      IF( KWTOP.EQ.KTOP ) THEN
375         S = ZERO
376      ELSE
377         S = H( KWTOP, KWTOP-1 )
378      END IF
379*
380      IF( KBOT.EQ.KWTOP ) THEN
381*
382*        ==== 1-by-1 deflation window: not much to do ====
383*
384         SH( KWTOP ) = H( KWTOP, KWTOP )
385         NS = 1
386         ND = 0
387         IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
388     $       KWTOP ) ) ) ) THEN
389            NS = 0
390            ND = 1
391            IF( KWTOP.GT.KTOP )
392     $         H( KWTOP, KWTOP-1 ) = ZERO
393         END IF
394         WORK( 1 ) = ONE
395         RETURN
396      END IF
397*
398*     ==== Convert to spike-triangular form.  (In case of a
399*     .    rare QR failure, this routine continues to do
400*     .    aggressive early deflation using that part of
401*     .    the deflation window that converged using INFQR
402*     .    here and there to keep track.) ====
403*
404      CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
405      CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
406*
407      CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
408      CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
409     $             JW, V, LDV, INFQR )
410*
411*     ==== Deflation detection loop ====
412*
413      NS = JW
414      ILST = INFQR + 1
415      DO 10 KNT = INFQR + 1, JW
416*
417*        ==== Small spike tip deflation test ====
418*
419         FOO = CABS1( T( NS, NS ) )
420         IF( FOO.EQ.RZERO )
421     $      FOO = CABS1( S )
422         IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
423     $        THEN
424*
425*           ==== One more converged eigenvalue ====
426*
427            NS = NS - 1
428         ELSE
429*
430*           ==== One undeflatable eigenvalue.  Move it up out of the
431*           .    way.   (CTREXC can not fail in this case.) ====
432*
433            IFST = NS
434            CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
435            ILST = ILST + 1
436         END IF
437   10 CONTINUE
438*
439*        ==== Return to Hessenberg form ====
440*
441      IF( NS.EQ.0 )
442     $   S = ZERO
443*
444      IF( NS.LT.JW ) THEN
445*
446*        ==== sorting the diagonal of T improves accuracy for
447*        .    graded matrices.  ====
448*
449         DO 30 I = INFQR + 1, NS
450            IFST = I
451            DO 20 J = I + 1, NS
452               IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
453     $            IFST = J
454   20       CONTINUE
455            ILST = I
456            IF( IFST.NE.ILST )
457     $         CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
458   30    CONTINUE
459      END IF
460*
461*     ==== Restore shift/eigenvalue array from T ====
462*
463      DO 40 I = INFQR + 1, JW
464         SH( KWTOP+I-1 ) = T( I, I )
465   40 CONTINUE
466*
467*
468      IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
469         IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
470*
471*           ==== Reflect spike back into lower triangle ====
472*
473            CALL CCOPY( NS, V, LDV, WORK, 1 )
474            DO 50 I = 1, NS
475               WORK( I ) = CONJG( WORK( I ) )
476   50       CONTINUE
477            BETA = WORK( 1 )
478            CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU )
479            WORK( 1 ) = ONE
480*
481            CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
482*
483            CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT,
484     $                  WORK( JW+1 ) )
485            CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
486     $                  WORK( JW+1 ) )
487            CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
488     $                  WORK( JW+1 ) )
489*
490            CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
491     $                   LWORK-JW, INFO )
492         END IF
493*
494*        ==== Copy updated reduced window into place ====
495*
496         IF( KWTOP.GT.1 )
497     $      H( KWTOP, KWTOP-1 ) = S*CONJG( V( 1, 1 ) )
498         CALL CLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
499         CALL CCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
500     $               LDH+1 )
501*
502*        ==== Accumulate orthogonal matrix in order update
503*        .    H and Z, if requested.  ====
504*
505         IF( NS.GT.1 .AND. S.NE.ZERO )
506     $      CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
507     $                   WORK( JW+1 ), LWORK-JW, INFO )
508*
509*        ==== Update vertical slab in H ====
510*
511         IF( WANTT ) THEN
512            LTOP = 1
513         ELSE
514            LTOP = KTOP
515         END IF
516         DO 60 KROW = LTOP, KWTOP - 1, NV
517            KLN = MIN( NV, KWTOP-KROW )
518            CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
519     $                  LDH, V, LDV, ZERO, WV, LDWV )
520            CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
521   60    CONTINUE
522*
523*        ==== Update horizontal slab in H ====
524*
525         IF( WANTT ) THEN
526            DO 70 KCOL = KBOT + 1, N, NH
527               KLN = MIN( NH, N-KCOL+1 )
528               CALL CGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
529     $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
530               CALL CLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
531     $                      LDH )
532   70       CONTINUE
533         END IF
534*
535*        ==== Update vertical slab in Z ====
536*
537         IF( WANTZ ) THEN
538            DO 80 KROW = ILOZ, IHIZ, NV
539               KLN = MIN( NV, IHIZ-KROW+1 )
540               CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
541     $                     LDZ, V, LDV, ZERO, WV, LDWV )
542               CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
543     $                      LDZ )
544   80       CONTINUE
545         END IF
546      END IF
547*
548*     ==== Return the number of deflations ... ====
549*
550      ND = JW - NS
551*
552*     ==== ... and the number of shifts. (Subtracting
553*     .    INFQR from the spike length takes care
554*     .    of the case of a rare QR failure while
555*     .    calculating eigenvalues of the deflation
556*     .    window.)  ====
557*
558      NS = NS - INFQR
559*
560*      ==== Return optimal workspace. ====
561*
562      WORK( 1 ) = CMPLX( LWKOPT, 0 )
563*
564*     ==== End of CLAQR2 ====
565*
566      END
567