1*> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3*  =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6*            http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DLAQR5 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18*  Definition:
19*  ===========
20*
21*       SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22*                          SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23*                          LDU, NV, WV, LDWV, NH, WH, LDWH )
24*
25*       .. Scalar Arguments ..
26*       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27*      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28*       LOGICAL            WANTT, WANTZ
29*       ..
30*       .. Array Arguments ..
31*       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32*      $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33*      $                   Z( LDZ, * )
34*       ..
35*
36*
37*> \par Purpose:
38*  =============
39*>
40*> \verbatim
41*>
42*>    DLAQR5, called by DLAQR0, performs a
43*>    single small-bulge multi-shift QR sweep.
44*> \endverbatim
45*
46*  Arguments:
47*  ==========
48*
49*> \param[in] WANTT
50*> \verbatim
51*>          WANTT is logical scalar
52*>             WANTT = .true. if the quasi-triangular Schur factor
53*>             is being computed.  WANTT is set to .false. otherwise.
54*> \endverbatim
55*>
56*> \param[in] WANTZ
57*> \verbatim
58*>          WANTZ is logical scalar
59*>             WANTZ = .true. if the orthogonal Schur factor is being
60*>             computed.  WANTZ is set to .false. otherwise.
61*> \endverbatim
62*>
63*> \param[in] KACC22
64*> \verbatim
65*>          KACC22 is integer with value 0, 1, or 2.
66*>             Specifies the computation mode of far-from-diagonal
67*>             orthogonal updates.
68*>        = 0: DLAQR5 does not accumulate reflections and does not
69*>             use matrix-matrix multiply to update far-from-diagonal
70*>             matrix entries.
71*>        = 1: DLAQR5 accumulates reflections and uses matrix-matrix
72*>             multiply to update the far-from-diagonal matrix entries.
73*>        = 2: DLAQR5 accumulates reflections, uses matrix-matrix
74*>             multiply to update the far-from-diagonal matrix entries,
75*>             and takes advantage of 2-by-2 block structure during
76*>             matrix multiplies.
77*> \endverbatim
78*>
79*> \param[in] N
80*> \verbatim
81*>          N is integer scalar
82*>             N is the order of the Hessenberg matrix H upon which this
83*>             subroutine operates.
84*> \endverbatim
85*>
86*> \param[in] KTOP
87*> \verbatim
88*>          KTOP is integer scalar
89*> \endverbatim
90*>
91*> \param[in] KBOT
92*> \verbatim
93*>          KBOT is integer scalar
94*>             These are the first and last rows and columns of an
95*>             isolated diagonal block upon which the QR sweep is to be
96*>             applied. It is assumed without a check that
97*>                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
98*>             and
99*>                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
100*> \endverbatim
101*>
102*> \param[in] NSHFTS
103*> \verbatim
104*>          NSHFTS is integer scalar
105*>             NSHFTS gives the number of simultaneous shifts.  NSHFTS
106*>             must be positive and even.
107*> \endverbatim
108*>
109*> \param[in,out] SR
110*> \verbatim
111*>          SR is DOUBLE PRECISION array of size (NSHFTS)
112*> \endverbatim
113*>
114*> \param[in,out] SI
115*> \verbatim
116*>          SI is DOUBLE PRECISION array of size (NSHFTS)
117*>             SR contains the real parts and SI contains the imaginary
118*>             parts of the NSHFTS shifts of origin that define the
119*>             multi-shift QR sweep.  On output SR and SI may be
120*>             reordered.
121*> \endverbatim
122*>
123*> \param[in,out] H
124*> \verbatim
125*>          H is DOUBLE PRECISION array of size (LDH,N)
126*>             On input H contains a Hessenberg matrix.  On output a
127*>             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
128*>             to the isolated diagonal block in rows and columns KTOP
129*>             through KBOT.
130*> \endverbatim
131*>
132*> \param[in] LDH
133*> \verbatim
134*>          LDH is integer scalar
135*>             LDH is the leading dimension of H just as declared in the
136*>             calling procedure.  LDH.GE.MAX(1,N).
137*> \endverbatim
138*>
139*> \param[in] ILOZ
140*> \verbatim
141*>          ILOZ is INTEGER
142*> \endverbatim
143*>
144*> \param[in] IHIZ
145*> \verbatim
146*>          IHIZ is INTEGER
147*>             Specify the rows of Z to which transformations must be
148*>             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
149*> \endverbatim
150*>
151*> \param[in,out] Z
152*> \verbatim
153*>          Z is DOUBLE PRECISION array of size (LDZ,IHI)
154*>             If WANTZ = .TRUE., then the QR Sweep orthogonal
155*>             similarity transformation is accumulated into
156*>             Z(ILOZ:IHIZ,ILO:IHI) from the right.
157*>             If WANTZ = .FALSE., then Z is unreferenced.
158*> \endverbatim
159*>
160*> \param[in] LDZ
161*> \verbatim
162*>          LDZ is integer scalar
163*>             LDA is the leading dimension of Z just as declared in
164*>             the calling procedure. LDZ.GE.N.
165*> \endverbatim
166*>
167*> \param[out] V
168*> \verbatim
169*>          V is DOUBLE PRECISION array of size (LDV,NSHFTS/2)
170*> \endverbatim
171*>
172*> \param[in] LDV
173*> \verbatim
174*>          LDV is integer scalar
175*>             LDV is the leading dimension of V as declared in the
176*>             calling procedure.  LDV.GE.3.
177*> \endverbatim
178*>
179*> \param[out] U
180*> \verbatim
181*>          U is DOUBLE PRECISION array of size
182*>             (LDU,3*NSHFTS-3)
183*> \endverbatim
184*>
185*> \param[in] LDU
186*> \verbatim
187*>          LDU is integer scalar
188*>             LDU is the leading dimension of U just as declared in the
189*>             in the calling subroutine.  LDU.GE.3*NSHFTS-3.
190*> \endverbatim
191*>
192*> \param[in] NH
193*> \verbatim
194*>          NH is integer scalar
195*>             NH is the number of columns in array WH available for
196*>             workspace. NH.GE.1.
197*> \endverbatim
198*>
199*> \param[out] WH
200*> \verbatim
201*>          WH is DOUBLE PRECISION array of size (LDWH,NH)
202*> \endverbatim
203*>
204*> \param[in] LDWH
205*> \verbatim
206*>          LDWH is integer scalar
207*>             Leading dimension of WH just as declared in the
208*>             calling procedure.  LDWH.GE.3*NSHFTS-3.
209*> \endverbatim
210*>
211*> \param[in] NV
212*> \verbatim
213*>          NV is integer scalar
214*>             NV is the number of rows in WV agailable for workspace.
215*>             NV.GE.1.
216*> \endverbatim
217*>
218*> \param[out] WV
219*> \verbatim
220*>          WV is DOUBLE PRECISION array of size
221*>             (LDWV,3*NSHFTS-3)
222*> \endverbatim
223*>
224*> \param[in] LDWV
225*> \verbatim
226*>          LDWV is integer scalar
227*>             LDWV is the leading dimension of WV as declared in the
228*>             in the calling subroutine.  LDWV.GE.NV.
229*> \endverbatim
230*
231*  Authors:
232*  ========
233*
234*> \author Univ. of Tennessee
235*> \author Univ. of California Berkeley
236*> \author Univ. of Colorado Denver
237*> \author NAG Ltd.
238*
239*> \date September 2012
240*
241*> \ingroup doubleOTHERauxiliary
242*
243*> \par Contributors:
244*  ==================
245*>
246*>       Karen Braman and Ralph Byers, Department of Mathematics,
247*>       University of Kansas, USA
248*
249*> \par References:
250*  ================
251*>
252*>       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253*>       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254*>       Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255*>       929--947, 2002.
256*>
257*  =====================================================================
258      SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
259     $                   SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
260     $                   LDU, NV, WV, LDWV, NH, WH, LDWH )
261*
262*  -- LAPACK auxiliary routine (version 3.4.2) --
263*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
264*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
265*     September 2012
266*
267*     .. Scalar Arguments ..
268      INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
269     $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
270      LOGICAL            WANTT, WANTZ
271*     ..
272*     .. Array Arguments ..
273      DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
274     $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
275     $                   Z( LDZ, * )
276*     ..
277*
278*  ================================================================
279*     .. Parameters ..
280      DOUBLE PRECISION   ZERO, ONE
281      PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )
282*     ..
283*     .. Local Scalars ..
284      DOUBLE PRECISION   ALPHA, BETA, H11, H12, H21, H22, REFSUM,
285     $                   SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
286     $                   ULP
287      INTEGER            I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
288     $                   JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
289     $                   M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
290     $                   NS, NU
291      LOGICAL            ACCUM, BLK22, BMP22
292*     ..
293*     .. External Functions ..
294      DOUBLE PRECISION   DLAMCH
295      EXTERNAL           DLAMCH
296*     ..
297*     .. Intrinsic Functions ..
298*
299      INTRINSIC          ABS, DBLE, MAX, MIN, MOD
300*     ..
301*     .. Local Arrays ..
302      DOUBLE PRECISION   VT( 3 )
303*     ..
304*     .. External Subroutines ..
305      EXTERNAL           DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
306     $                   DTRMM
307*     ..
308*     .. Executable Statements ..
309*
310*     ==== If there are no shifts, then there is nothing to do. ====
311*
312      IF( NSHFTS.LT.2 )
313     $   RETURN
314*
315*     ==== If the active block is empty or 1-by-1, then there
316*     .    is nothing to do. ====
317*
318      IF( KTOP.GE.KBOT )
319     $   RETURN
320*
321*     ==== Shuffle shifts into pairs of real shifts and pairs
322*     .    of complex conjugate shifts assuming complex
323*     .    conjugate shifts are already adjacent to one
324*     .    another. ====
325*
326      DO 10 I = 1, NSHFTS - 2, 2
327         IF( SI( I ).NE.-SI( I+1 ) ) THEN
328*
329            SWAP = SR( I )
330            SR( I ) = SR( I+1 )
331            SR( I+1 ) = SR( I+2 )
332            SR( I+2 ) = SWAP
333*
334            SWAP = SI( I )
335            SI( I ) = SI( I+1 )
336            SI( I+1 ) = SI( I+2 )
337            SI( I+2 ) = SWAP
338         END IF
339   10 CONTINUE
340*
341*     ==== NSHFTS is supposed to be even, but if it is odd,
342*     .    then simply reduce it by one.  The shuffle above
343*     .    ensures that the dropped shift is real and that
344*     .    the remaining shifts are paired. ====
345*
346      NS = NSHFTS - MOD( NSHFTS, 2 )
347*
348*     ==== Machine constants for deflation ====
349*
350      SAFMIN = DLAMCH( 'SAFE MINIMUM' )
351      SAFMAX = ONE / SAFMIN
352      CALL DLABAD( SAFMIN, SAFMAX )
353      ULP = DLAMCH( 'PRECISION' )
354      SMLNUM = SAFMIN*( DBLE( N ) / ULP )
355*
356*     ==== Use accumulated reflections to update far-from-diagonal
357*     .    entries ? ====
358*
359      ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
360*
361*     ==== If so, exploit the 2-by-2 block structure? ====
362*
363      BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
364*
365*     ==== clear trash ====
366*
367      IF( KTOP+2.LE.KBOT )
368     $   H( KTOP+2, KTOP ) = ZERO
369*
370*     ==== NBMPS = number of 2-shift bulges in the chain ====
371*
372      NBMPS = NS / 2
373*
374*     ==== KDU = width of slab ====
375*
376      KDU = 6*NBMPS - 3
377*
378*     ==== Create and chase chains of NBMPS bulges ====
379*
380      DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
381         NDCOL = INCOL + KDU
382         IF( ACCUM )
383     $      CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
384*
385*        ==== Near-the-diagonal bulge chase.  The following loop
386*        .    performs the near-the-diagonal part of a small bulge
387*        .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
388*        .    chunk extends from column INCOL to column NDCOL
389*        .    (including both column INCOL and column NDCOL). The
390*        .    following loop chases a 3*NBMPS column long chain of
391*        .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
392*        .    may be less than KTOP and and NDCOL may be greater than
393*        .    KBOT indicating phantom columns from which to chase
394*        .    bulges before they are actually introduced or to which
395*        .    to chase bulges beyond column KBOT.)  ====
396*
397         DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
398*
399*           ==== Bulges number MTOP to MBOT are active double implicit
400*           .    shift bulges.  There may or may not also be small
401*           .    2-by-2 bulge, if there is room.  The inactive bulges
402*           .    (if any) must wait until the active bulges have moved
403*           .    down the diagonal to make room.  The phantom matrix
404*           .    paradigm described above helps keep track.  ====
405*
406            MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
407            MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
408            M22 = MBOT + 1
409            BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
410     $              ( KBOT-2 )
411*
412*           ==== Generate reflections to chase the chain right
413*           .    one column.  (The minimum value of K is KTOP-1.) ====
414*
415            DO 20 M = MTOP, MBOT
416               K = KRCOL + 3*( M-1 )
417               IF( K.EQ.KTOP-1 ) THEN
418                  CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
419     $                         SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
420     $                         V( 1, M ) )
421                  ALPHA = V( 1, M )
422                  CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
423               ELSE
424                  BETA = H( K+1, K )
425                  V( 2, M ) = H( K+2, K )
426                  V( 3, M ) = H( K+3, K )
427                  CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
428*
429*                 ==== A Bulge may collapse because of vigilant
430*                 .    deflation or destructive underflow.  In the
431*                 .    underflow case, try the two-small-subdiagonals
432*                 .    trick to try to reinflate the bulge.  ====
433*
434                  IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
435     $                ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
436*
437*                    ==== Typical case: not collapsed (yet). ====
438*
439                     H( K+1, K ) = BETA
440                     H( K+2, K ) = ZERO
441                     H( K+3, K ) = ZERO
442                  ELSE
443*
444*                    ==== Atypical case: collapsed.  Attempt to
445*                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).
446*                    .    If the fill resulting from the new
447*                    .    reflector is too large, then abandon it.
448*                    .    Otherwise, use the new one. ====
449*
450                     CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
451     $                            SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
452     $                            VT )
453                     ALPHA = VT( 1 )
454                     CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
455                     REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
456     $                        H( K+2, K ) )
457*
458                     IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
459     $                   ABS( REFSUM*VT( 3 ) ).GT.ULP*
460     $                   ( ABS( H( K, K ) )+ABS( H( K+1,
461     $                   K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
462*
463*                       ==== Starting a new bulge here would
464*                       .    create non-negligible fill.  Use
465*                       .    the old one with trepidation. ====
466*
467                        H( K+1, K ) = BETA
468                        H( K+2, K ) = ZERO
469                        H( K+3, K ) = ZERO
470                     ELSE
471*
472*                       ==== Stating a new bulge here would
473*                       .    create only negligible fill.
474*                       .    Replace the old reflector with
475*                       .    the new one. ====
476*
477                        H( K+1, K ) = H( K+1, K ) - REFSUM
478                        H( K+2, K ) = ZERO
479                        H( K+3, K ) = ZERO
480                        V( 1, M ) = VT( 1 )
481                        V( 2, M ) = VT( 2 )
482                        V( 3, M ) = VT( 3 )
483                     END IF
484                  END IF
485               END IF
486   20       CONTINUE
487*
488*           ==== Generate a 2-by-2 reflection, if needed. ====
489*
490            K = KRCOL + 3*( M22-1 )
491            IF( BMP22 ) THEN
492               IF( K.EQ.KTOP-1 ) THEN
493                  CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
494     $                         SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
495     $                         V( 1, M22 ) )
496                  BETA = V( 1, M22 )
497                  CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
498               ELSE
499                  BETA = H( K+1, K )
500                  V( 2, M22 ) = H( K+2, K )
501                  CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
502                  H( K+1, K ) = BETA
503                  H( K+2, K ) = ZERO
504               END IF
505            END IF
506*
507*           ==== Multiply H by reflections from the left ====
508*
509            IF( ACCUM ) THEN
510               JBOT = MIN( NDCOL, KBOT )
511            ELSE IF( WANTT ) THEN
512               JBOT = N
513            ELSE
514               JBOT = KBOT
515            END IF
516            DO 40 J = MAX( KTOP, KRCOL ), JBOT
517               MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
518               DO 30 M = MTOP, MEND
519                  K = KRCOL + 3*( M-1 )
520                  REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
521     $                     H( K+2, J )+V( 3, M )*H( K+3, J ) )
522                  H( K+1, J ) = H( K+1, J ) - REFSUM
523                  H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
524                  H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
525   30          CONTINUE
526   40       CONTINUE
527            IF( BMP22 ) THEN
528               K = KRCOL + 3*( M22-1 )
529               DO 50 J = MAX( K+1, KTOP ), JBOT
530                  REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
531     $                     H( K+2, J ) )
532                  H( K+1, J ) = H( K+1, J ) - REFSUM
533                  H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
534   50          CONTINUE
535            END IF
536*
537*           ==== Multiply H by reflections from the right.
538*           .    Delay filling in the last row until the
539*           .    vigilant deflation check is complete. ====
540*
541            IF( ACCUM ) THEN
542               JTOP = MAX( KTOP, INCOL )
543            ELSE IF( WANTT ) THEN
544               JTOP = 1
545            ELSE
546               JTOP = KTOP
547            END IF
548            DO 90 M = MTOP, MBOT
549               IF( V( 1, M ).NE.ZERO ) THEN
550                  K = KRCOL + 3*( M-1 )
551                  DO 60 J = JTOP, MIN( KBOT, K+3 )
552                     REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
553     $                        H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
554                     H( J, K+1 ) = H( J, K+1 ) - REFSUM
555                     H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
556                     H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
557   60             CONTINUE
558*
559                  IF( ACCUM ) THEN
560*
561*                    ==== Accumulate U. (If necessary, update Z later
562*                    .    with with an efficient matrix-matrix
563*                    .    multiply.) ====
564*
565                     KMS = K - INCOL
566                     DO 70 J = MAX( 1, KTOP-INCOL ), KDU
567                        REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
568     $                           U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
569                        U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
570                        U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
571                        U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
572   70                CONTINUE
573                  ELSE IF( WANTZ ) THEN
574*
575*                    ==== U is not accumulated, so update Z
576*                    .    now by multiplying by reflections
577*                    .    from the right. ====
578*
579                     DO 80 J = ILOZ, IHIZ
580                        REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
581     $                           Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
582                        Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
583                        Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
584                        Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
585   80                CONTINUE
586                  END IF
587               END IF
588   90       CONTINUE
589*
590*           ==== Special case: 2-by-2 reflection (if needed) ====
591*
592            K = KRCOL + 3*( M22-1 )
593            IF( BMP22 ) THEN
594               IF ( V( 1, M22 ).NE.ZERO ) THEN
595                  DO 100 J = JTOP, MIN( KBOT, K+3 )
596                     REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
597     $                        H( J, K+2 ) )
598                     H( J, K+1 ) = H( J, K+1 ) - REFSUM
599                     H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
600  100             CONTINUE
601*
602                  IF( ACCUM ) THEN
603                     KMS = K - INCOL
604                     DO 110 J = MAX( 1, KTOP-INCOL ), KDU
605                        REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
606     $                           V( 2, M22 )*U( J, KMS+2 ) )
607                        U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
608                        U( J, KMS+2 ) = U( J, KMS+2 ) -
609     $                                  REFSUM*V( 2, M22 )
610  110             CONTINUE
611                  ELSE IF( WANTZ ) THEN
612                     DO 120 J = ILOZ, IHIZ
613                        REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
614     $                           Z( J, K+2 ) )
615                        Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
616                        Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
617  120                CONTINUE
618                  END IF
619               END IF
620            END IF
621*
622*           ==== Vigilant deflation check ====
623*
624            MSTART = MTOP
625            IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
626     $         MSTART = MSTART + 1
627            MEND = MBOT
628            IF( BMP22 )
629     $         MEND = MEND + 1
630            IF( KRCOL.EQ.KBOT-2 )
631     $         MEND = MEND + 1
632            DO 130 M = MSTART, MEND
633               K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
634*
635*              ==== The following convergence test requires that
636*              .    the tradition small-compared-to-nearby-diagonals
637*              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
638*              .    criteria both be satisfied.  The latter improves
639*              .    accuracy in some examples. Falling back on an
640*              .    alternate convergence criterion when TST1 or TST2
641*              .    is zero (as done here) is traditional but probably
642*              .    unnecessary. ====
643*
644               IF( H( K+1, K ).NE.ZERO ) THEN
645                  TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
646                  IF( TST1.EQ.ZERO ) THEN
647                     IF( K.GE.KTOP+1 )
648     $                  TST1 = TST1 + ABS( H( K, K-1 ) )
649                     IF( K.GE.KTOP+2 )
650     $                  TST1 = TST1 + ABS( H( K, K-2 ) )
651                     IF( K.GE.KTOP+3 )
652     $                  TST1 = TST1 + ABS( H( K, K-3 ) )
653                     IF( K.LE.KBOT-2 )
654     $                  TST1 = TST1 + ABS( H( K+2, K+1 ) )
655                     IF( K.LE.KBOT-3 )
656     $                  TST1 = TST1 + ABS( H( K+3, K+1 ) )
657                     IF( K.LE.KBOT-4 )
658     $                  TST1 = TST1 + ABS( H( K+4, K+1 ) )
659                  END IF
660                  IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
661     $                 THEN
662                     H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
663                     H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
664                     H11 = MAX( ABS( H( K+1, K+1 ) ),
665     $                     ABS( H( K, K )-H( K+1, K+1 ) ) )
666                     H22 = MIN( ABS( H( K+1, K+1 ) ),
667     $                     ABS( H( K, K )-H( K+1, K+1 ) ) )
668                     SCL = H11 + H12
669                     TST2 = H22*( H11 / SCL )
670*
671                     IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
672     $                   MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
673                  END IF
674               END IF
675  130       CONTINUE
676*
677*           ==== Fill in the last row of each bulge. ====
678*
679            MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
680            DO 140 M = MTOP, MEND
681               K = KRCOL + 3*( M-1 )
682               REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
683               H( K+4, K+1 ) = -REFSUM
684               H( K+4, K+2 ) = -REFSUM*V( 2, M )
685               H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
686  140       CONTINUE
687*
688*           ==== End of near-the-diagonal bulge chase. ====
689*
690  150    CONTINUE
691*
692*        ==== Use U (if accumulated) to update far-from-diagonal
693*        .    entries in H.  If required, use U to update Z as
694*        .    well. ====
695*
696         IF( ACCUM ) THEN
697            IF( WANTT ) THEN
698               JTOP = 1
699               JBOT = N
700            ELSE
701               JTOP = KTOP
702               JBOT = KBOT
703            END IF
704            IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
705     $          ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
706*
707*              ==== Updates not exploiting the 2-by-2 block
708*              .    structure of U.  K1 and NU keep track of
709*              .    the location and size of U in the special
710*              .    cases of introducing bulges and chasing
711*              .    bulges off the bottom.  In these special
712*              .    cases and in case the number of shifts
713*              .    is NS = 2, there is no 2-by-2 block
714*              .    structure to exploit.  ====
715*
716               K1 = MAX( 1, KTOP-INCOL )
717               NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
718*
719*              ==== Horizontal Multiply ====
720*
721               DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
722                  JLEN = MIN( NH, JBOT-JCOL+1 )
723                  CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
724     $                        LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
725     $                        LDWH )
726                  CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
727     $                         H( INCOL+K1, JCOL ), LDH )
728  160          CONTINUE
729*
730*              ==== Vertical multiply ====
731*
732               DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
733                  JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
734                  CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
735     $                        H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
736     $                        LDU, ZERO, WV, LDWV )
737                  CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
738     $                         H( JROW, INCOL+K1 ), LDH )
739  170          CONTINUE
740*
741*              ==== Z multiply (also vertical) ====
742*
743               IF( WANTZ ) THEN
744                  DO 180 JROW = ILOZ, IHIZ, NV
745                     JLEN = MIN( NV, IHIZ-JROW+1 )
746                     CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
747     $                           Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
748     $                           LDU, ZERO, WV, LDWV )
749                     CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
750     $                            Z( JROW, INCOL+K1 ), LDZ )
751  180             CONTINUE
752               END IF
753            ELSE
754*
755*              ==== Updates exploiting U's 2-by-2 block structure.
756*              .    (I2, I4, J2, J4 are the last rows and columns
757*              .    of the blocks.) ====
758*
759               I2 = ( KDU+1 ) / 2
760               I4 = KDU
761               J2 = I4 - I2
762               J4 = KDU
763*
764*              ==== KZS and KNZ deal with the band of zeros
765*              .    along the diagonal of one of the triangular
766*              .    blocks. ====
767*
768               KZS = ( J4-J2 ) - ( NS+1 )
769               KNZ = NS + 1
770*
771*              ==== Horizontal multiply ====
772*
773               DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
774                  JLEN = MIN( NH, JBOT-JCOL+1 )
775*
776*                 ==== Copy bottom of H to top+KZS of scratch ====
777*                  (The first KZS rows get multiplied by zero.) ====
778*
779                  CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
780     $                         LDH, WH( KZS+1, 1 ), LDWH )
781*
782*                 ==== Multiply by U21**T ====
783*
784                  CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
785                  CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
786     $                        U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
787     $                        LDWH )
788*
789*                 ==== Multiply top of H by U11**T ====
790*
791                  CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
792     $                        H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
793*
794*                 ==== Copy top of H to bottom of WH ====
795*
796                  CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
797     $                         WH( I2+1, 1 ), LDWH )
798*
799*                 ==== Multiply by U21**T ====
800*
801                  CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
802     $                        U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
803*
804*                 ==== Multiply by U22 ====
805*
806                  CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
807     $                        U( J2+1, I2+1 ), LDU,
808     $                        H( INCOL+1+J2, JCOL ), LDH, ONE,
809     $                        WH( I2+1, 1 ), LDWH )
810*
811*                 ==== Copy it back ====
812*
813                  CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH,
814     $                         H( INCOL+1, JCOL ), LDH )
815  190          CONTINUE
816*
817*              ==== Vertical multiply ====
818*
819               DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
820                  JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
821*
822*                 ==== Copy right of H to scratch (the first KZS
823*                 .    columns get multiplied by zero) ====
824*
825                  CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
826     $                         LDH, WV( 1, 1+KZS ), LDWV )
827*
828*                 ==== Multiply by U21 ====
829*
830                  CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
831                  CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
832     $                        U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
833     $                        LDWV )
834*
835*                 ==== Multiply by U11 ====
836*
837                  CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
838     $                        H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
839     $                        LDWV )
840*
841*                 ==== Copy left of H to right of scratch ====
842*
843                  CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
844     $                         WV( 1, 1+I2 ), LDWV )
845*
846*                 ==== Multiply by U21 ====
847*
848                  CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
849     $                        U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
850*
851*                 ==== Multiply by U22 ====
852*
853                  CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
854     $                        H( JROW, INCOL+1+J2 ), LDH,
855     $                        U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
856     $                        LDWV )
857*
858*                 ==== Copy it back ====
859*
860                  CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
861     $                         H( JROW, INCOL+1 ), LDH )
862  200          CONTINUE
863*
864*              ==== Multiply Z (also vertical) ====
865*
866               IF( WANTZ ) THEN
867                  DO 210 JROW = ILOZ, IHIZ, NV
868                     JLEN = MIN( NV, IHIZ-JROW+1 )
869*
870*                    ==== Copy right of Z to left of scratch (first
871*                    .     KZS columns get multiplied by zero) ====
872*
873                     CALL DLACPY( 'ALL', JLEN, KNZ,
874     $                            Z( JROW, INCOL+1+J2 ), LDZ,
875     $                            WV( 1, 1+KZS ), LDWV )
876*
877*                    ==== Multiply by U12 ====
878*
879                     CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
880     $                            LDWV )
881                     CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
882     $                           U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
883     $                           LDWV )
884*
885*                    ==== Multiply by U11 ====
886*
887                     CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
888     $                           Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
889     $                           WV, LDWV )
890*
891*                    ==== Copy left of Z to right of scratch ====
892*
893                     CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
894     $                            LDZ, WV( 1, 1+I2 ), LDWV )
895*
896*                    ==== Multiply by U21 ====
897*
898                     CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
899     $                           U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
900     $                           LDWV )
901*
902*                    ==== Multiply by U22 ====
903*
904                     CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
905     $                           Z( JROW, INCOL+1+J2 ), LDZ,
906     $                           U( J2+1, I2+1 ), LDU, ONE,
907     $                           WV( 1, 1+I2 ), LDWV )
908*
909*                    ==== Copy the result back to Z ====
910*
911                     CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
912     $                            Z( JROW, INCOL+1 ), LDZ )
913  210             CONTINUE
914               END IF
915            END IF
916         END IF
917  220 CONTINUE
918*
919*     ==== End of DLAQR5 ====
920*
921      END
922