1*> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download ZLAQR5 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
22*                          H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
23*                          WV, LDWV, NH, WH, LDWH )
24*
25*       .. Scalar Arguments ..
26*       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27*      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28*       LOGICAL            WANTT, WANTZ
29*       ..
30*       .. Array Arguments ..
31*       COMPLEX*16         H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
32*      $                   WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
33*       ..
34*
35*
36*> \par Purpose:
37*  =============
38*>
39*> \verbatim
40*>
41*>    ZLAQR5, called by ZLAQR0, performs a
42*>    single small-bulge multi-shift QR sweep.
43*> \endverbatim
44*
45*  Arguments:
46*  ==========
47*
48*> \param[in] WANTT
49*> \verbatim
50*>          WANTT is LOGICAL
51*>             WANTT = .true. if the triangular Schur factor
52*>             is being computed.  WANTT is set to .false. otherwise.
53*> \endverbatim
54*>
55*> \param[in] WANTZ
56*> \verbatim
57*>          WANTZ is LOGICAL
58*>             WANTZ = .true. if the unitary Schur factor is being
59*>             computed.  WANTZ is set to .false. otherwise.
60*> \endverbatim
61*>
62*> \param[in] KACC22
63*> \verbatim
64*>          KACC22 is INTEGER with value 0, 1, or 2.
65*>             Specifies the computation mode of far-from-diagonal
66*>             orthogonal updates.
67*>        = 0: ZLAQR5 does not accumulate reflections and does not
68*>             use matrix-matrix multiply to update far-from-diagonal
69*>             matrix entries.
70*>        = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
71*>             multiply to update the far-from-diagonal matrix entries.
72*>        = 2: Same as KACC22 = 1. This option used to enable exploiting
73*>             the 2-by-2 structure during matrix multiplications, but
74*>             this is no longer supported.
75*> \endverbatim
76*>
77*> \param[in] N
78*> \verbatim
79*>          N is INTEGER
80*>             N is the order of the Hessenberg matrix H upon which this
81*>             subroutine operates.
82*> \endverbatim
83*>
84*> \param[in] KTOP
85*> \verbatim
86*>          KTOP is INTEGER
87*> \endverbatim
88*>
89*> \param[in] KBOT
90*> \verbatim
91*>          KBOT is INTEGER
92*>             These are the first and last rows and columns of an
93*>             isolated diagonal block upon which the QR sweep is to be
94*>             applied. It is assumed without a check that
95*>                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
96*>             and
97*>                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
98*> \endverbatim
99*>
100*> \param[in] NSHFTS
101*> \verbatim
102*>          NSHFTS is INTEGER
103*>             NSHFTS gives the number of simultaneous shifts.  NSHFTS
104*>             must be positive and even.
105*> \endverbatim
106*>
107*> \param[in,out] S
108*> \verbatim
109*>          S is COMPLEX*16 array, dimension (NSHFTS)
110*>             S contains the shifts of origin that define the multi-
111*>             shift QR sweep.  On output S may be reordered.
112*> \endverbatim
113*>
114*> \param[in,out] H
115*> \verbatim
116*>          H is COMPLEX*16 array, dimension (LDH,N)
117*>             On input H contains a Hessenberg matrix.  On output a
118*>             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
119*>             to the isolated diagonal block in rows and columns KTOP
120*>             through KBOT.
121*> \endverbatim
122*>
123*> \param[in] LDH
124*> \verbatim
125*>          LDH is INTEGER
126*>             LDH is the leading dimension of H just as declared in the
127*>             calling procedure.  LDH >= MAX(1,N).
128*> \endverbatim
129*>
130*> \param[in] ILOZ
131*> \verbatim
132*>          ILOZ is INTEGER
133*> \endverbatim
134*>
135*> \param[in] IHIZ
136*> \verbatim
137*>          IHIZ is INTEGER
138*>             Specify the rows of Z to which transformations must be
139*>             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
140*> \endverbatim
141*>
142*> \param[in,out] Z
143*> \verbatim
144*>          Z is COMPLEX*16 array, dimension (LDZ,IHIZ)
145*>             If WANTZ = .TRUE., then the QR Sweep unitary
146*>             similarity transformation is accumulated into
147*>             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
148*>             If WANTZ = .FALSE., then Z is unreferenced.
149*> \endverbatim
150*>
151*> \param[in] LDZ
152*> \verbatim
153*>          LDZ is INTEGER
154*>             LDA is the leading dimension of Z just as declared in
155*>             the calling procedure. LDZ >= N.
156*> \endverbatim
157*>
158*> \param[out] V
159*> \verbatim
160*>          V is COMPLEX*16 array, dimension (LDV,NSHFTS/2)
161*> \endverbatim
162*>
163*> \param[in] LDV
164*> \verbatim
165*>          LDV is INTEGER
166*>             LDV is the leading dimension of V as declared in the
167*>             calling procedure.  LDV >= 3.
168*> \endverbatim
169*>
170*> \param[out] U
171*> \verbatim
172*>          U is COMPLEX*16 array, dimension (LDU,2*NSHFTS)
173*> \endverbatim
174*>
175*> \param[in] LDU
176*> \verbatim
177*>          LDU is INTEGER
178*>             LDU is the leading dimension of U just as declared in the
179*>             in the calling subroutine.  LDU >= 2*NSHFTS.
180*> \endverbatim
181*>
182*> \param[in] NV
183*> \verbatim
184*>          NV is INTEGER
185*>             NV is the number of rows in WV agailable for workspace.
186*>             NV >= 1.
187*> \endverbatim
188*>
189*> \param[out] WV
190*> \verbatim
191*>          WV is COMPLEX*16 array, dimension (LDWV,2*NSHFTS)
192*> \endverbatim
193*>
194*> \param[in] LDWV
195*> \verbatim
196*>          LDWV is INTEGER
197*>             LDWV is the leading dimension of WV as declared in the
198*>             in the calling subroutine.  LDWV >= NV.
199*> \endverbatim
200*
201*> \param[in] NH
202*> \verbatim
203*>          NH is INTEGER
204*>             NH is the number of columns in array WH available for
205*>             workspace. NH >= 1.
206*> \endverbatim
207*>
208*> \param[out] WH
209*> \verbatim
210*>          WH is COMPLEX*16 array, dimension (LDWH,NH)
211*> \endverbatim
212*>
213*> \param[in] LDWH
214*> \verbatim
215*>          LDWH is INTEGER
216*>             Leading dimension of WH just as declared in the
217*>             calling procedure.  LDWH >= 2*NSHFTS.
218*> \endverbatim
219*>
220*  Authors:
221*  ========
222*
223*> \author Univ. of Tennessee
224*> \author Univ. of California Berkeley
225*> \author Univ. of Colorado Denver
226*> \author NAG Ltd.
227*
228*> \ingroup complex16OTHERauxiliary
229*
230*> \par Contributors:
231*  ==================
232*>
233*>       Karen Braman and Ralph Byers, Department of Mathematics,
234*>       University of Kansas, USA
235*>
236*>       Lars Karlsson, Daniel Kressner, and Bruno Lang
237*>
238*>       Thijs Steel, Department of Computer science,
239*>       KU Leuven, Belgium
240*
241*> \par References:
242*  ================
243*>
244*>       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
245*>       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
246*>       Performance, SIAM Journal of Matrix Analysis, volume 23, pages
247*>       929--947, 2002.
248*>
249*>       Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
250*>       chains of bulges in multishift QR algorithms.
251*>       ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
252*>
253*  =====================================================================
254      SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
255     $                   H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
256     $                   WV, LDWV, NH, WH, LDWH )
257      IMPLICIT NONE
258*
259*  -- LAPACK auxiliary routine --
260*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
261*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
262*
263*     .. Scalar Arguments ..
264      INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
265     $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
266      LOGICAL            WANTT, WANTZ
267*     ..
268*     .. Array Arguments ..
269      COMPLEX*16         H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
270     $                   WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
271*     ..
272*
273*  ================================================================
274*     .. Parameters ..
275      COMPLEX*16         ZERO, ONE
276      PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
277     $                   ONE = ( 1.0d0, 0.0d0 ) )
278      DOUBLE PRECISION   RZERO, RONE
279      PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
280*     ..
281*     .. Local Scalars ..
282      COMPLEX*16         ALPHA, BETA, CDUM, REFSUM
283      DOUBLE PRECISION   H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
284     $                   SMLNUM, TST1, TST2, ULP
285      INTEGER            I2, I4, INCOL, J, JBOT, JCOL, JLEN,
286     $                   JROW, JTOP, K, K1, KDU, KMS, KRCOL,
287     $                   M, M22, MBOT, MTOP, NBMPS, NDCOL,
288     $                   NS, NU
289      LOGICAL            ACCUM, BMP22
290*     ..
291*     .. External Functions ..
292      DOUBLE PRECISION   DLAMCH
293      EXTERNAL           DLAMCH
294*     ..
295*     .. Intrinsic Functions ..
296*
297      INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD
298*     ..
299*     .. Local Arrays ..
300      COMPLEX*16         VT( 3 )
301*     ..
302*     .. External Subroutines ..
303      EXTERNAL           DLABAD, ZGEMM, ZLACPY, ZLAQR1, ZLARFG, ZLASET,
304     $                   ZTRMM
305*     ..
306*     .. Statement Functions ..
307      DOUBLE PRECISION   CABS1
308*     ..
309*     .. Statement Function definitions ..
310      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
311*     ..
312*     .. Executable Statements ..
313*
314*     ==== If there are no shifts, then there is nothing to do. ====
315*
316      IF( NSHFTS.LT.2 )
317     $   RETURN
318*
319*     ==== If the active block is empty or 1-by-1, then there
320*     .    is nothing to do. ====
321*
322      IF( KTOP.GE.KBOT )
323     $   RETURN
324*
325*     ==== NSHFTS is supposed to be even, but if it is odd,
326*     .    then simply reduce it by one.  ====
327*
328      NS = NSHFTS - MOD( NSHFTS, 2 )
329*
330*     ==== Machine constants for deflation ====
331*
332      SAFMIN = DLAMCH( 'SAFE MINIMUM' )
333      SAFMAX = RONE / SAFMIN
334      CALL DLABAD( SAFMIN, SAFMAX )
335      ULP = DLAMCH( 'PRECISION' )
336      SMLNUM = SAFMIN*( DBLE( N ) / ULP )
337*
338*     ==== Use accumulated reflections to update far-from-diagonal
339*     .    entries ? ====
340*
341      ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
342*
343*     ==== clear trash ====
344*
345      IF( KTOP+2.LE.KBOT )
346     $   H( KTOP+2, KTOP ) = ZERO
347*
348*     ==== NBMPS = number of 2-shift bulges in the chain ====
349*
350      NBMPS = NS / 2
351*
352*     ==== KDU = width of slab ====
353*
354      KDU = 4*NBMPS
355*
356*     ==== Create and chase chains of NBMPS bulges ====
357*
358      DO 180 INCOL = KTOP - 2*NBMPS + 1, KBOT - 2, 2*NBMPS
359*
360*        JTOP = Index from which updates from the right start.
361*
362         IF( ACCUM ) THEN
363            JTOP = MAX( KTOP, INCOL )
364         ELSE IF( WANTT ) THEN
365            JTOP = 1
366         ELSE
367            JTOP = KTOP
368         END IF
369*
370         NDCOL = INCOL + KDU
371         IF( ACCUM )
372     $      CALL ZLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
373*
374*        ==== Near-the-diagonal bulge chase.  The following loop
375*        .    performs the near-the-diagonal part of a small bulge
376*        .    multi-shift QR sweep.  Each 4*NBMPS column diagonal
377*        .    chunk extends from column INCOL to column NDCOL
378*        .    (including both column INCOL and column NDCOL). The
379*        .    following loop chases a 2*NBMPS+1 column long chain of
380*        .    NBMPS bulges 2*NBMPS columns to the right.  (INCOL
381*        .    may be less than KTOP and and NDCOL may be greater than
382*        .    KBOT indicating phantom columns from which to chase
383*        .    bulges before they are actually introduced or to which
384*        .    to chase bulges beyond column KBOT.)  ====
385*
386         DO 145 KRCOL = INCOL, MIN( INCOL+2*NBMPS-1, KBOT-2 )
387*
388*           ==== Bulges number MTOP to MBOT are active double implicit
389*           .    shift bulges.  There may or may not also be small
390*           .    2-by-2 bulge, if there is room.  The inactive bulges
391*           .    (if any) must wait until the active bulges have moved
392*           .    down the diagonal to make room.  The phantom matrix
393*           .    paradigm described above helps keep track.  ====
394*
395            MTOP = MAX( 1, ( KTOP-KRCOL ) / 2+1 )
396            MBOT = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 2 )
397            M22 = MBOT + 1
398            BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+2*( M22-1 ) ).EQ.
399     $              ( KBOT-2 )
400*
401*           ==== Generate reflections to chase the chain right
402*           .    one column.  (The minimum value of K is KTOP-1.) ====
403*
404            IF ( BMP22 ) THEN
405*
406*              ==== Special case: 2-by-2 reflection at bottom treated
407*              .    separately ====
408*
409               K = KRCOL + 2*( M22-1 )
410               IF( K.EQ.KTOP-1 ) THEN
411                  CALL ZLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ),
412     $                         S( 2*M22 ), V( 1, M22 ) )
413                  BETA = V( 1, M22 )
414                  CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
415               ELSE
416                  BETA = H( K+1, K )
417                  V( 2, M22 ) = H( K+2, K )
418                  CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
419                  H( K+1, K ) = BETA
420                  H( K+2, K ) = ZERO
421               END IF
422
423*
424*              ==== Perform update from right within
425*              .    computational window. ====
426*
427               DO 30 J = JTOP, MIN( KBOT, K+3 )
428                  REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
429     $                     H( J, K+2 ) )
430                  H( J, K+1 ) = H( J, K+1 ) - REFSUM
431                  H( J, K+2 ) = H( J, K+2 ) -
432     $                          REFSUM*DCONJG( V( 2, M22 ) )
433   30          CONTINUE
434*
435*              ==== Perform update from left within
436*              .    computational window. ====
437*
438               IF( ACCUM ) THEN
439                  JBOT = MIN( NDCOL, KBOT )
440               ELSE IF( WANTT ) THEN
441                  JBOT = N
442               ELSE
443                  JBOT = KBOT
444               END IF
445               DO 40 J = K+1, JBOT
446                  REFSUM = DCONJG( V( 1, M22 ) )*
447     $                     ( H( K+1, J )+DCONJG( V( 2, M22 ) )*
448     $                     H( K+2, J ) )
449                  H( K+1, J ) = H( K+1, J ) - REFSUM
450                  H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
451   40          CONTINUE
452*
453*              ==== The following convergence test requires that
454*              .    the tradition small-compared-to-nearby-diagonals
455*              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
456*              .    criteria both be satisfied.  The latter improves
457*              .    accuracy in some examples. Falling back on an
458*              .    alternate convergence criterion when TST1 or TST2
459*              .    is zero (as done here) is traditional but probably
460*              .    unnecessary. ====
461*
462               IF( K.GE.KTOP ) THEN
463                  IF( H( K+1, K ).NE.ZERO ) THEN
464                     TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
465                     IF( TST1.EQ.RZERO ) THEN
466                        IF( K.GE.KTOP+1 )
467     $                     TST1 = TST1 + CABS1( H( K, K-1 ) )
468                        IF( K.GE.KTOP+2 )
469     $                     TST1 = TST1 + CABS1( H( K, K-2 ) )
470                        IF( K.GE.KTOP+3 )
471     $                     TST1 = TST1 + CABS1( H( K, K-3 ) )
472                        IF( K.LE.KBOT-2 )
473     $                     TST1 = TST1 + CABS1( H( K+2, K+1 ) )
474                        IF( K.LE.KBOT-3 )
475     $                     TST1 = TST1 + CABS1( H( K+3, K+1 ) )
476                        IF( K.LE.KBOT-4 )
477     $                     TST1 = TST1 + CABS1( H( K+4, K+1 ) )
478                     END IF
479                     IF( CABS1( H( K+1, K ) )
480     $                   .LE.MAX( SMLNUM, ULP*TST1 ) ) THEN
481                        H12 = MAX( CABS1( H( K+1, K ) ),
482     $                     CABS1( H( K, K+1 ) ) )
483                        H21 = MIN( CABS1( H( K+1, K ) ),
484     $                     CABS1( H( K, K+1 ) ) )
485                        H11 = MAX( CABS1( H( K+1, K+1 ) ),
486     $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
487                        H22 = MIN( CABS1( H( K+1, K+1 ) ),
488     $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
489                        SCL = H11 + H12
490                        TST2 = H22*( H11 / SCL )
491*
492                        IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
493     $                      MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
494                     END IF
495                  END IF
496               END IF
497*
498*              ==== Accumulate orthogonal transformations. ====
499*
500               IF( ACCUM ) THEN
501                  KMS = K - INCOL
502                  DO 50 J = MAX( 1, KTOP-INCOL ), KDU
503                     REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
504     $                        V( 2, M22 )*U( J, KMS+2 ) )
505                     U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
506                     U( J, KMS+2 ) = U( J, KMS+2 ) -
507     $                               REFSUM*DCONJG( V( 2, M22 ) )
508  50                 CONTINUE
509               ELSE IF( WANTZ ) THEN
510                  DO 60 J = ILOZ, IHIZ
511                     REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
512     $                        Z( J, K+2 ) )
513                     Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
514                     Z( J, K+2 ) = Z( J, K+2 ) -
515     $                             REFSUM*DCONJG( V( 2, M22 ) )
516  60              CONTINUE
517               END IF
518            END IF
519*
520*           ==== Normal case: Chain of 3-by-3 reflections ====
521*
522            DO 80 M = MBOT, MTOP, -1
523               K = KRCOL + 2*( M-1 )
524               IF( K.EQ.KTOP-1 ) THEN
525                  CALL ZLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ),
526     $                         S( 2*M ), V( 1, M ) )
527                  ALPHA = V( 1, M )
528                  CALL ZLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
529               ELSE
530*
531*                 ==== Perform delayed transformation of row below
532*                 .    Mth bulge. Exploit fact that first two elements
533*                 .    of row are actually zero. ====
534*
535                  REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 )
536                  H( K+3, K   ) = -REFSUM
537                  H( K+3, K+1 ) = -REFSUM*DCONJG( V( 2, M ) )
538                  H( K+3, K+2 ) = H( K+3, K+2 ) -
539     $                            REFSUM*DCONJG( V( 3, M ) )
540*
541*                 ==== Calculate reflection to move
542*                 .    Mth bulge one step. ====
543*
544                  BETA      = H( K+1, K )
545                  V( 2, M ) = H( K+2, K )
546                  V( 3, M ) = H( K+3, K )
547                  CALL ZLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
548*
549*                 ==== A Bulge may collapse because of vigilant
550*                 .    deflation or destructive underflow.  In the
551*                 .    underflow case, try the two-small-subdiagonals
552*                 .    trick to try to reinflate the bulge.  ====
553*
554                  IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
555     $                ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
556*
557*                    ==== Typical case: not collapsed (yet). ====
558*
559                     H( K+1, K ) = BETA
560                     H( K+2, K ) = ZERO
561                     H( K+3, K ) = ZERO
562                  ELSE
563*
564*                    ==== Atypical case: collapsed.  Attempt to
565*                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).
566*                    .    If the fill resulting from the new
567*                    .    reflector is too large, then abandon it.
568*                    .    Otherwise, use the new one. ====
569*
570                     CALL ZLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ),
571     $                            S( 2*M ), VT )
572                     ALPHA = VT( 1 )
573                     CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
574                     REFSUM = DCONJG( VT( 1 ) )*
575     $                        ( H( K+1, K )+DCONJG( VT( 2 ) )*
576     $                        H( K+2, K ) )
577*
578                     IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+
579     $                   CABS1( REFSUM*VT( 3 ) ).GT.ULP*
580     $                   ( CABS1( H( K, K ) )+CABS1( H( K+1,
581     $                   K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN
582*
583*                       ==== Starting a new bulge here would
584*                       .    create non-negligible fill.  Use
585*                       .    the old one with trepidation. ====
586*
587                        H( K+1, K ) = BETA
588                        H( K+2, K ) = ZERO
589                        H( K+3, K ) = ZERO
590                     ELSE
591*
592*                       ==== Starting a new bulge here would
593*                       .    create only negligible fill.
594*                       .    Replace the old reflector with
595*                       .    the new one. ====
596*
597                        H( K+1, K ) = H( K+1, K ) - REFSUM
598                        H( K+2, K ) = ZERO
599                        H( K+3, K ) = ZERO
600                        V( 1, M ) = VT( 1 )
601                        V( 2, M ) = VT( 2 )
602                        V( 3, M ) = VT( 3 )
603                     END IF
604                  END IF
605               END IF
606*
607*              ====  Apply reflection from the right and
608*              .     the first column of update from the left.
609*              .     These updates are required for the vigilant
610*              .     deflation check. We still delay most of the
611*              .     updates from the left for efficiency. ====
612*
613               DO 70 J = JTOP, MIN( KBOT, K+3 )
614                  REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
615     $                     H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
616                  H( J, K+1 ) = H( J, K+1 ) - REFSUM
617                  H( J, K+2 ) = H( J, K+2 ) -
618     $                          REFSUM*DCONJG( V( 2, M ) )
619                  H( J, K+3 ) = H( J, K+3 ) -
620     $                          REFSUM*DCONJG( V( 3, M ) )
621   70          CONTINUE
622*
623*              ==== Perform update from left for subsequent
624*              .    column. ====
625*
626               REFSUM =  DCONJG( V( 1, M ) )*( H( K+1, K+1 )
627     $                  +DCONJG( V( 2, M ) )*H( K+2, K+1 )
628     $                  +DCONJG( V( 3, M ) )*H( K+3, K+1 ) )
629               H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM
630               H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*V( 2, M )
631               H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*V( 3, M )
632*
633*              ==== The following convergence test requires that
634*              .    the tradition small-compared-to-nearby-diagonals
635*              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
636*              .    criteria both be satisfied.  The latter improves
637*              .    accuracy in some examples. Falling back on an
638*              .    alternate convergence criterion when TST1 or TST2
639*              .    is zero (as done here) is traditional but probably
640*              .    unnecessary. ====
641*
642               IF( K.LT.KTOP)
643     $              CYCLE
644               IF( H( K+1, K ).NE.ZERO ) THEN
645                  TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
646                  IF( TST1.EQ.RZERO ) THEN
647                     IF( K.GE.KTOP+1 )
648     $                  TST1 = TST1 + CABS1( H( K, K-1 ) )
649                     IF( K.GE.KTOP+2 )
650     $                  TST1 = TST1 + CABS1( H( K, K-2 ) )
651                     IF( K.GE.KTOP+3 )
652     $                  TST1 = TST1 + CABS1( H( K, K-3 ) )
653                     IF( K.LE.KBOT-2 )
654     $                  TST1 = TST1 + CABS1( H( K+2, K+1 ) )
655                     IF( K.LE.KBOT-3 )
656     $                  TST1 = TST1 + CABS1( H( K+3, K+1 ) )
657                     IF( K.LE.KBOT-4 )
658     $                  TST1 = TST1 + CABS1( H( K+4, K+1 ) )
659                  END IF
660                  IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
661     $                 THEN
662                     H12 = MAX( CABS1( H( K+1, K ) ),
663     $                     CABS1( H( K, K+1 ) ) )
664                     H21 = MIN( CABS1( H( K+1, K ) ),
665     $                     CABS1( H( K, K+1 ) ) )
666                     H11 = MAX( CABS1( H( K+1, K+1 ) ),
667     $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
668                     H22 = MIN( CABS1( H( K+1, K+1 ) ),
669     $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
670                     SCL = H11 + H12
671                     TST2 = H22*( H11 / SCL )
672*
673                     IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
674     $                   MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
675                  END IF
676               END IF
677   80       CONTINUE
678*
679*           ==== Multiply H by reflections from the left ====
680*
681            IF( ACCUM ) THEN
682               JBOT = MIN( NDCOL, KBOT )
683            ELSE IF( WANTT ) THEN
684               JBOT = N
685            ELSE
686               JBOT = KBOT
687            END IF
688*
689            DO 100 M = MBOT, MTOP, -1
690               K = KRCOL + 2*( M-1 )
691               DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT
692                  REFSUM = DCONJG( V( 1, M ) )*
693     $                     ( H( K+1, J )+DCONJG( V( 2, M ) )*
694     $                     H( K+2, J )+DCONJG( V( 3, M ) )*H( K+3, J ) )
695                  H( K+1, J ) = H( K+1, J ) - REFSUM
696                  H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
697                  H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
698   90          CONTINUE
699  100       CONTINUE
700*
701*           ==== Accumulate orthogonal transformations. ====
702*
703            IF( ACCUM ) THEN
704*
705*              ==== Accumulate U. (If needed, update Z later
706*              .    with an efficient matrix-matrix
707*              .    multiply.) ====
708*
709               DO 120 M = MBOT, MTOP, -1
710                  K = KRCOL + 2*( M-1 )
711                  KMS = K - INCOL
712                  I2 = MAX( 1, KTOP-INCOL )
713                  I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 )
714                  I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 )
715                  DO 110 J = I2, I4
716                     REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
717     $                        U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
718                     U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
719                     U( J, KMS+2 ) = U( J, KMS+2 ) -
720     $                               REFSUM*DCONJG( V( 2, M ) )
721                     U( J, KMS+3 ) = U( J, KMS+3 ) -
722     $                               REFSUM*DCONJG( V( 3, M ) )
723  110             CONTINUE
724  120          CONTINUE
725            ELSE IF( WANTZ ) THEN
726*
727*              ==== U is not accumulated, so update Z
728*              .    now by multiplying by reflections
729*              .    from the right. ====
730*
731               DO 140 M = MBOT, MTOP, -1
732                  K = KRCOL + 2*( M-1 )
733                  DO 130 J = ILOZ, IHIZ
734                     REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
735     $                        Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
736                     Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
737                     Z( J, K+2 ) = Z( J, K+2 ) -
738     $                             REFSUM*DCONJG( V( 2, M ) )
739                     Z( J, K+3 ) = Z( J, K+3 ) -
740     $                             REFSUM*DCONJG( V( 3, M ) )
741  130             CONTINUE
742  140          CONTINUE
743            END IF
744*
745*           ==== End of near-the-diagonal bulge chase. ====
746*
747  145    CONTINUE
748*
749*        ==== Use U (if accumulated) to update far-from-diagonal
750*        .    entries in H.  If required, use U to update Z as
751*        .    well. ====
752*
753         IF( ACCUM ) THEN
754            IF( WANTT ) THEN
755               JTOP = 1
756               JBOT = N
757            ELSE
758               JTOP = KTOP
759               JBOT = KBOT
760            END IF
761            K1 = MAX( 1, KTOP-INCOL )
762            NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
763*
764*           ==== Horizontal Multiply ====
765*
766            DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
767               JLEN = MIN( NH, JBOT-JCOL+1 )
768               CALL ZGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
769     $                     LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
770     $                     LDWH )
771               CALL ZLACPY( 'ALL', NU, JLEN, WH, LDWH,
772     $                      H( INCOL+K1, JCOL ), LDH )
773  150       CONTINUE
774*
775*           ==== Vertical multiply ====
776*
777            DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
778               JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
779               CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
780     $                     H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
781     $                     LDU, ZERO, WV, LDWV )
782               CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
783     $                      H( JROW, INCOL+K1 ), LDH )
784  160       CONTINUE
785*
786*           ==== Z multiply (also vertical) ====
787*
788            IF( WANTZ ) THEN
789               DO 170 JROW = ILOZ, IHIZ, NV
790                  JLEN = MIN( NV, IHIZ-JROW+1 )
791                  CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE,
792     $                        Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
793     $                        LDU, ZERO, WV, LDWV )
794                  CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV,
795     $                         Z( JROW, INCOL+K1 ), LDZ )
796  170          CONTINUE
797            END IF
798         END IF
799  180 CONTINUE
800*
801*     ==== End of ZLAQR5 ====
802*
803      END
804