1*> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep. 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download DLAQR5 + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, 22* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, 23* LDU, NV, WV, LDWV, NH, WH, LDWH ) 24* 25* .. Scalar Arguments .. 26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 28* LOGICAL WANTT, WANTZ 29* .. 30* .. Array Arguments .. 31* DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), 32* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), 33* $ Z( LDZ, * ) 34* .. 35* 36* 37*> \par Purpose: 38* ============= 39*> 40*> \verbatim 41*> 42*> DLAQR5, called by DLAQR0, performs a 43*> single small-bulge multi-shift QR sweep. 44*> \endverbatim 45* 46* Arguments: 47* ========== 48* 49*> \param[in] WANTT 50*> \verbatim 51*> WANTT is logical scalar 52*> WANTT = .true. if the quasi-triangular Schur factor 53*> is being computed. WANTT is set to .false. otherwise. 54*> \endverbatim 55*> 56*> \param[in] WANTZ 57*> \verbatim 58*> WANTZ is logical scalar 59*> WANTZ = .true. if the orthogonal Schur factor is being 60*> computed. WANTZ is set to .false. otherwise. 61*> \endverbatim 62*> 63*> \param[in] KACC22 64*> \verbatim 65*> KACC22 is integer with value 0, 1, or 2. 66*> Specifies the computation mode of far-from-diagonal 67*> orthogonal updates. 68*> = 0: DLAQR5 does not accumulate reflections and does not 69*> use matrix-matrix multiply to update far-from-diagonal 70*> matrix entries. 71*> = 1: DLAQR5 accumulates reflections and uses matrix-matrix 72*> multiply to update the far-from-diagonal matrix entries. 73*> = 2: DLAQR5 accumulates reflections, uses matrix-matrix 74*> multiply to update the far-from-diagonal matrix entries, 75*> and takes advantage of 2-by-2 block structure during 76*> matrix multiplies. 77*> \endverbatim 78*> 79*> \param[in] N 80*> \verbatim 81*> N is integer scalar 82*> N is the order of the Hessenberg matrix H upon which this 83*> subroutine operates. 84*> \endverbatim 85*> 86*> \param[in] KTOP 87*> \verbatim 88*> KTOP is integer scalar 89*> \endverbatim 90*> 91*> \param[in] KBOT 92*> \verbatim 93*> KBOT is integer scalar 94*> These are the first and last rows and columns of an 95*> isolated diagonal block upon which the QR sweep is to be 96*> applied. It is assumed without a check that 97*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 98*> and 99*> either KBOT = N or H(KBOT+1,KBOT) = 0. 100*> \endverbatim 101*> 102*> \param[in] NSHFTS 103*> \verbatim 104*> NSHFTS is integer scalar 105*> NSHFTS gives the number of simultaneous shifts. NSHFTS 106*> must be positive and even. 107*> \endverbatim 108*> 109*> \param[in,out] SR 110*> \verbatim 111*> SR is DOUBLE PRECISION array of size (NSHFTS) 112*> \endverbatim 113*> 114*> \param[in,out] SI 115*> \verbatim 116*> SI is DOUBLE PRECISION array of size (NSHFTS) 117*> SR contains the real parts and SI contains the imaginary 118*> parts of the NSHFTS shifts of origin that define the 119*> multi-shift QR sweep. On output SR and SI may be 120*> reordered. 121*> \endverbatim 122*> 123*> \param[in,out] H 124*> \verbatim 125*> H is DOUBLE PRECISION array of size (LDH,N) 126*> On input H contains a Hessenberg matrix. On output a 127*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied 128*> to the isolated diagonal block in rows and columns KTOP 129*> through KBOT. 130*> \endverbatim 131*> 132*> \param[in] LDH 133*> \verbatim 134*> LDH is integer scalar 135*> LDH is the leading dimension of H just as declared in the 136*> calling procedure. LDH.GE.MAX(1,N). 137*> \endverbatim 138*> 139*> \param[in] ILOZ 140*> \verbatim 141*> ILOZ is INTEGER 142*> \endverbatim 143*> 144*> \param[in] IHIZ 145*> \verbatim 146*> IHIZ is INTEGER 147*> Specify the rows of Z to which transformations must be 148*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N 149*> \endverbatim 150*> 151*> \param[in,out] Z 152*> \verbatim 153*> Z is DOUBLE PRECISION array of size (LDZ,IHI) 154*> If WANTZ = .TRUE., then the QR Sweep orthogonal 155*> similarity transformation is accumulated into 156*> Z(ILOZ:IHIZ,ILO:IHI) from the right. 157*> If WANTZ = .FALSE., then Z is unreferenced. 158*> \endverbatim 159*> 160*> \param[in] LDZ 161*> \verbatim 162*> LDZ is integer scalar 163*> LDA is the leading dimension of Z just as declared in 164*> the calling procedure. LDZ.GE.N. 165*> \endverbatim 166*> 167*> \param[out] V 168*> \verbatim 169*> V is DOUBLE PRECISION array of size (LDV,NSHFTS/2) 170*> \endverbatim 171*> 172*> \param[in] LDV 173*> \verbatim 174*> LDV is integer scalar 175*> LDV is the leading dimension of V as declared in the 176*> calling procedure. LDV.GE.3. 177*> \endverbatim 178*> 179*> \param[out] U 180*> \verbatim 181*> U is DOUBLE PRECISION array of size 182*> (LDU,3*NSHFTS-3) 183*> \endverbatim 184*> 185*> \param[in] LDU 186*> \verbatim 187*> LDU is integer scalar 188*> LDU is the leading dimension of U just as declared in the 189*> in the calling subroutine. LDU.GE.3*NSHFTS-3. 190*> \endverbatim 191*> 192*> \param[in] NH 193*> \verbatim 194*> NH is integer scalar 195*> NH is the number of columns in array WH available for 196*> workspace. NH.GE.1. 197*> \endverbatim 198*> 199*> \param[out] WH 200*> \verbatim 201*> WH is DOUBLE PRECISION array of size (LDWH,NH) 202*> \endverbatim 203*> 204*> \param[in] LDWH 205*> \verbatim 206*> LDWH is integer scalar 207*> Leading dimension of WH just as declared in the 208*> calling procedure. LDWH.GE.3*NSHFTS-3. 209*> \endverbatim 210*> 211*> \param[in] NV 212*> \verbatim 213*> NV is integer scalar 214*> NV is the number of rows in WV agailable for workspace. 215*> NV.GE.1. 216*> \endverbatim 217*> 218*> \param[out] WV 219*> \verbatim 220*> WV is DOUBLE PRECISION array of size 221*> (LDWV,3*NSHFTS-3) 222*> \endverbatim 223*> 224*> \param[in] LDWV 225*> \verbatim 226*> LDWV is integer scalar 227*> LDWV is the leading dimension of WV as declared in the 228*> in the calling subroutine. LDWV.GE.NV. 229*> \endverbatim 230* 231* Authors: 232* ======== 233* 234*> \author Univ. of Tennessee 235*> \author Univ. of California Berkeley 236*> \author Univ. of Colorado Denver 237*> \author NAG Ltd. 238* 239*> \date September 2012 240* 241*> \ingroup doubleOTHERauxiliary 242* 243*> \par Contributors: 244* ================== 245*> 246*> Karen Braman and Ralph Byers, Department of Mathematics, 247*> University of Kansas, USA 248* 249*> \par References: 250* ================ 251*> 252*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR 253*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 254*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages 255*> 929--947, 2002. 256*> 257* ===================================================================== 258 SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, 259 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, 260 $ LDU, NV, WV, LDWV, NH, WH, LDWH ) 261* 262* -- LAPACK auxiliary routine (version 3.4.2) -- 263* -- LAPACK is a software package provided by Univ. of Tennessee, -- 264* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 265* September 2012 266* 267* .. Scalar Arguments .. 268 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 269 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 270 LOGICAL WANTT, WANTZ 271* .. 272* .. Array Arguments .. 273 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), 274 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), 275 $ Z( LDZ, * ) 276* .. 277* 278* ================================================================ 279* .. Parameters .. 280 DOUBLE PRECISION ZERO, ONE 281 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) 282* .. 283* .. Local Scalars .. 284 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM, 285 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, 286 $ ULP 287 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, 288 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, 289 $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, 290 $ NS, NU 291 LOGICAL ACCUM, BLK22, BMP22 292* .. 293* .. External Functions .. 294 DOUBLE PRECISION DLAMCH 295 EXTERNAL DLAMCH 296* .. 297* .. Intrinsic Functions .. 298* 299 INTRINSIC ABS, DBLE, MAX, MIN, MOD 300* .. 301* .. Local Arrays .. 302 DOUBLE PRECISION VT( 3 ) 303* .. 304* .. External Subroutines .. 305 EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET, 306 $ DTRMM 307* .. 308* .. Executable Statements .. 309* 310* ==== If there are no shifts, then there is nothing to do. ==== 311* 312 IF( NSHFTS.LT.2 ) 313 $ RETURN 314* 315* ==== If the active block is empty or 1-by-1, then there 316* . is nothing to do. ==== 317* 318 IF( KTOP.GE.KBOT ) 319 $ RETURN 320* 321* ==== Shuffle shifts into pairs of real shifts and pairs 322* . of complex conjugate shifts assuming complex 323* . conjugate shifts are already adjacent to one 324* . another. ==== 325* 326 DO 10 I = 1, NSHFTS - 2, 2 327 IF( SI( I ).NE.-SI( I+1 ) ) THEN 328* 329 SWAP = SR( I ) 330 SR( I ) = SR( I+1 ) 331 SR( I+1 ) = SR( I+2 ) 332 SR( I+2 ) = SWAP 333* 334 SWAP = SI( I ) 335 SI( I ) = SI( I+1 ) 336 SI( I+1 ) = SI( I+2 ) 337 SI( I+2 ) = SWAP 338 END IF 339 10 CONTINUE 340* 341* ==== NSHFTS is supposed to be even, but if it is odd, 342* . then simply reduce it by one. The shuffle above 343* . ensures that the dropped shift is real and that 344* . the remaining shifts are paired. ==== 345* 346 NS = NSHFTS - MOD( NSHFTS, 2 ) 347* 348* ==== Machine constants for deflation ==== 349* 350 SAFMIN = DLAMCH( 'SAFE MINIMUM' ) 351 SAFMAX = ONE / SAFMIN 352 CALL DLABAD( SAFMIN, SAFMAX ) 353 ULP = DLAMCH( 'PRECISION' ) 354 SMLNUM = SAFMIN*( DBLE( N ) / ULP ) 355* 356* ==== Use accumulated reflections to update far-from-diagonal 357* . entries ? ==== 358* 359 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) 360* 361* ==== If so, exploit the 2-by-2 block structure? ==== 362* 363 BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) 364* 365* ==== clear trash ==== 366* 367 IF( KTOP+2.LE.KBOT ) 368 $ H( KTOP+2, KTOP ) = ZERO 369* 370* ==== NBMPS = number of 2-shift bulges in the chain ==== 371* 372 NBMPS = NS / 2 373* 374* ==== KDU = width of slab ==== 375* 376 KDU = 6*NBMPS - 3 377* 378* ==== Create and chase chains of NBMPS bulges ==== 379* 380 DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 381 NDCOL = INCOL + KDU 382 IF( ACCUM ) 383 $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) 384* 385* ==== Near-the-diagonal bulge chase. The following loop 386* . performs the near-the-diagonal part of a small bulge 387* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal 388* . chunk extends from column INCOL to column NDCOL 389* . (including both column INCOL and column NDCOL). The 390* . following loop chases a 3*NBMPS column long chain of 391* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL 392* . may be less than KTOP and and NDCOL may be greater than 393* . KBOT indicating phantom columns from which to chase 394* . bulges before they are actually introduced or to which 395* . to chase bulges beyond column KBOT.) ==== 396* 397 DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) 398* 399* ==== Bulges number MTOP to MBOT are active double implicit 400* . shift bulges. There may or may not also be small 401* . 2-by-2 bulge, if there is room. The inactive bulges 402* . (if any) must wait until the active bulges have moved 403* . down the diagonal to make room. The phantom matrix 404* . paradigm described above helps keep track. ==== 405* 406 MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) 407 MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) 408 M22 = MBOT + 1 409 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. 410 $ ( KBOT-2 ) 411* 412* ==== Generate reflections to chase the chain right 413* . one column. (The minimum value of K is KTOP-1.) ==== 414* 415 DO 20 M = MTOP, MBOT 416 K = KRCOL + 3*( M-1 ) 417 IF( K.EQ.KTOP-1 ) THEN 418 CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ), 419 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), 420 $ V( 1, M ) ) 421 ALPHA = V( 1, M ) 422 CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) 423 ELSE 424 BETA = H( K+1, K ) 425 V( 2, M ) = H( K+2, K ) 426 V( 3, M ) = H( K+3, K ) 427 CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) 428* 429* ==== A Bulge may collapse because of vigilant 430* . deflation or destructive underflow. In the 431* . underflow case, try the two-small-subdiagonals 432* . trick to try to reinflate the bulge. ==== 433* 434 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. 435 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN 436* 437* ==== Typical case: not collapsed (yet). ==== 438* 439 H( K+1, K ) = BETA 440 H( K+2, K ) = ZERO 441 H( K+3, K ) = ZERO 442 ELSE 443* 444* ==== Atypical case: collapsed. Attempt to 445* . reintroduce ignoring H(K+1,K) and H(K+2,K). 446* . If the fill resulting from the new 447* . reflector is too large, then abandon it. 448* . Otherwise, use the new one. ==== 449* 450 CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ), 451 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), 452 $ VT ) 453 ALPHA = VT( 1 ) 454 CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) 455 REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* 456 $ H( K+2, K ) ) 457* 458 IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ 459 $ ABS( REFSUM*VT( 3 ) ).GT.ULP* 460 $ ( ABS( H( K, K ) )+ABS( H( K+1, 461 $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN 462* 463* ==== Starting a new bulge here would 464* . create non-negligible fill. Use 465* . the old one with trepidation. ==== 466* 467 H( K+1, K ) = BETA 468 H( K+2, K ) = ZERO 469 H( K+3, K ) = ZERO 470 ELSE 471* 472* ==== Stating a new bulge here would 473* . create only negligible fill. 474* . Replace the old reflector with 475* . the new one. ==== 476* 477 H( K+1, K ) = H( K+1, K ) - REFSUM 478 H( K+2, K ) = ZERO 479 H( K+3, K ) = ZERO 480 V( 1, M ) = VT( 1 ) 481 V( 2, M ) = VT( 2 ) 482 V( 3, M ) = VT( 3 ) 483 END IF 484 END IF 485 END IF 486 20 CONTINUE 487* 488* ==== Generate a 2-by-2 reflection, if needed. ==== 489* 490 K = KRCOL + 3*( M22-1 ) 491 IF( BMP22 ) THEN 492 IF( K.EQ.KTOP-1 ) THEN 493 CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ), 494 $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ), 495 $ V( 1, M22 ) ) 496 BETA = V( 1, M22 ) 497 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 498 ELSE 499 BETA = H( K+1, K ) 500 V( 2, M22 ) = H( K+2, K ) 501 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 502 H( K+1, K ) = BETA 503 H( K+2, K ) = ZERO 504 END IF 505 END IF 506* 507* ==== Multiply H by reflections from the left ==== 508* 509 IF( ACCUM ) THEN 510 JBOT = MIN( NDCOL, KBOT ) 511 ELSE IF( WANTT ) THEN 512 JBOT = N 513 ELSE 514 JBOT = KBOT 515 END IF 516 DO 40 J = MAX( KTOP, KRCOL ), JBOT 517 MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) 518 DO 30 M = MTOP, MEND 519 K = KRCOL + 3*( M-1 ) 520 REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* 521 $ H( K+2, J )+V( 3, M )*H( K+3, J ) ) 522 H( K+1, J ) = H( K+1, J ) - REFSUM 523 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) 524 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) 525 30 CONTINUE 526 40 CONTINUE 527 IF( BMP22 ) THEN 528 K = KRCOL + 3*( M22-1 ) 529 DO 50 J = MAX( K+1, KTOP ), JBOT 530 REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* 531 $ H( K+2, J ) ) 532 H( K+1, J ) = H( K+1, J ) - REFSUM 533 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) 534 50 CONTINUE 535 END IF 536* 537* ==== Multiply H by reflections from the right. 538* . Delay filling in the last row until the 539* . vigilant deflation check is complete. ==== 540* 541 IF( ACCUM ) THEN 542 JTOP = MAX( KTOP, INCOL ) 543 ELSE IF( WANTT ) THEN 544 JTOP = 1 545 ELSE 546 JTOP = KTOP 547 END IF 548 DO 90 M = MTOP, MBOT 549 IF( V( 1, M ).NE.ZERO ) THEN 550 K = KRCOL + 3*( M-1 ) 551 DO 60 J = JTOP, MIN( KBOT, K+3 ) 552 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* 553 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) 554 H( J, K+1 ) = H( J, K+1 ) - REFSUM 555 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) 556 H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) 557 60 CONTINUE 558* 559 IF( ACCUM ) THEN 560* 561* ==== Accumulate U. (If necessary, update Z later 562* . with with an efficient matrix-matrix 563* . multiply.) ==== 564* 565 KMS = K - INCOL 566 DO 70 J = MAX( 1, KTOP-INCOL ), KDU 567 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* 568 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) 569 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 570 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) 571 U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) 572 70 CONTINUE 573 ELSE IF( WANTZ ) THEN 574* 575* ==== U is not accumulated, so update Z 576* . now by multiplying by reflections 577* . from the right. ==== 578* 579 DO 80 J = ILOZ, IHIZ 580 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* 581 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) 582 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 583 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) 584 Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) 585 80 CONTINUE 586 END IF 587 END IF 588 90 CONTINUE 589* 590* ==== Special case: 2-by-2 reflection (if needed) ==== 591* 592 K = KRCOL + 3*( M22-1 ) 593 IF( BMP22 ) THEN 594 IF ( V( 1, M22 ).NE.ZERO ) THEN 595 DO 100 J = JTOP, MIN( KBOT, K+3 ) 596 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* 597 $ H( J, K+2 ) ) 598 H( J, K+1 ) = H( J, K+1 ) - REFSUM 599 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 ) 600 100 CONTINUE 601* 602 IF( ACCUM ) THEN 603 KMS = K - INCOL 604 DO 110 J = MAX( 1, KTOP-INCOL ), KDU 605 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ 606 $ V( 2, M22 )*U( J, KMS+2 ) ) 607 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 608 U( J, KMS+2 ) = U( J, KMS+2 ) - 609 $ REFSUM*V( 2, M22 ) 610 110 CONTINUE 611 ELSE IF( WANTZ ) THEN 612 DO 120 J = ILOZ, IHIZ 613 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* 614 $ Z( J, K+2 ) ) 615 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 616 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 ) 617 120 CONTINUE 618 END IF 619 END IF 620 END IF 621* 622* ==== Vigilant deflation check ==== 623* 624 MSTART = MTOP 625 IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) 626 $ MSTART = MSTART + 1 627 MEND = MBOT 628 IF( BMP22 ) 629 $ MEND = MEND + 1 630 IF( KRCOL.EQ.KBOT-2 ) 631 $ MEND = MEND + 1 632 DO 130 M = MSTART, MEND 633 K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) 634* 635* ==== The following convergence test requires that 636* . the tradition small-compared-to-nearby-diagonals 637* . criterion and the Ahues & Tisseur (LAWN 122, 1997) 638* . criteria both be satisfied. The latter improves 639* . accuracy in some examples. Falling back on an 640* . alternate convergence criterion when TST1 or TST2 641* . is zero (as done here) is traditional but probably 642* . unnecessary. ==== 643* 644 IF( H( K+1, K ).NE.ZERO ) THEN 645 TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) ) 646 IF( TST1.EQ.ZERO ) THEN 647 IF( K.GE.KTOP+1 ) 648 $ TST1 = TST1 + ABS( H( K, K-1 ) ) 649 IF( K.GE.KTOP+2 ) 650 $ TST1 = TST1 + ABS( H( K, K-2 ) ) 651 IF( K.GE.KTOP+3 ) 652 $ TST1 = TST1 + ABS( H( K, K-3 ) ) 653 IF( K.LE.KBOT-2 ) 654 $ TST1 = TST1 + ABS( H( K+2, K+1 ) ) 655 IF( K.LE.KBOT-3 ) 656 $ TST1 = TST1 + ABS( H( K+3, K+1 ) ) 657 IF( K.LE.KBOT-4 ) 658 $ TST1 = TST1 + ABS( H( K+4, K+1 ) ) 659 END IF 660 IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) 661 $ THEN 662 H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) 663 H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) 664 H11 = MAX( ABS( H( K+1, K+1 ) ), 665 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 666 H22 = MIN( ABS( H( K+1, K+1 ) ), 667 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 668 SCL = H11 + H12 669 TST2 = H22*( H11 / SCL ) 670* 671 IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE. 672 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO 673 END IF 674 END IF 675 130 CONTINUE 676* 677* ==== Fill in the last row of each bulge. ==== 678* 679 MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) 680 DO 140 M = MTOP, MEND 681 K = KRCOL + 3*( M-1 ) 682 REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) 683 H( K+4, K+1 ) = -REFSUM 684 H( K+4, K+2 ) = -REFSUM*V( 2, M ) 685 H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M ) 686 140 CONTINUE 687* 688* ==== End of near-the-diagonal bulge chase. ==== 689* 690 150 CONTINUE 691* 692* ==== Use U (if accumulated) to update far-from-diagonal 693* . entries in H. If required, use U to update Z as 694* . well. ==== 695* 696 IF( ACCUM ) THEN 697 IF( WANTT ) THEN 698 JTOP = 1 699 JBOT = N 700 ELSE 701 JTOP = KTOP 702 JBOT = KBOT 703 END IF 704 IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. 705 $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN 706* 707* ==== Updates not exploiting the 2-by-2 block 708* . structure of U. K1 and NU keep track of 709* . the location and size of U in the special 710* . cases of introducing bulges and chasing 711* . bulges off the bottom. In these special 712* . cases and in case the number of shifts 713* . is NS = 2, there is no 2-by-2 block 714* . structure to exploit. ==== 715* 716 K1 = MAX( 1, KTOP-INCOL ) 717 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 718* 719* ==== Horizontal Multiply ==== 720* 721 DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 722 JLEN = MIN( NH, JBOT-JCOL+1 ) 723 CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), 724 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, 725 $ LDWH ) 726 CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH, 727 $ H( INCOL+K1, JCOL ), LDH ) 728 160 CONTINUE 729* 730* ==== Vertical multiply ==== 731* 732 DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV 733 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) 734 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE, 735 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), 736 $ LDU, ZERO, WV, LDWV ) 737 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV, 738 $ H( JROW, INCOL+K1 ), LDH ) 739 170 CONTINUE 740* 741* ==== Z multiply (also vertical) ==== 742* 743 IF( WANTZ ) THEN 744 DO 180 JROW = ILOZ, IHIZ, NV 745 JLEN = MIN( NV, IHIZ-JROW+1 ) 746 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE, 747 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), 748 $ LDU, ZERO, WV, LDWV ) 749 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV, 750 $ Z( JROW, INCOL+K1 ), LDZ ) 751 180 CONTINUE 752 END IF 753 ELSE 754* 755* ==== Updates exploiting U's 2-by-2 block structure. 756* . (I2, I4, J2, J4 are the last rows and columns 757* . of the blocks.) ==== 758* 759 I2 = ( KDU+1 ) / 2 760 I4 = KDU 761 J2 = I4 - I2 762 J4 = KDU 763* 764* ==== KZS and KNZ deal with the band of zeros 765* . along the diagonal of one of the triangular 766* . blocks. ==== 767* 768 KZS = ( J4-J2 ) - ( NS+1 ) 769 KNZ = NS + 1 770* 771* ==== Horizontal multiply ==== 772* 773 DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 774 JLEN = MIN( NH, JBOT-JCOL+1 ) 775* 776* ==== Copy bottom of H to top+KZS of scratch ==== 777* (The first KZS rows get multiplied by zero.) ==== 778* 779 CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), 780 $ LDH, WH( KZS+1, 1 ), LDWH ) 781* 782* ==== Multiply by U21**T ==== 783* 784 CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) 785 CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, 786 $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), 787 $ LDWH ) 788* 789* ==== Multiply top of H by U11**T ==== 790* 791 CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, 792 $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) 793* 794* ==== Copy top of H to bottom of WH ==== 795* 796 CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, 797 $ WH( I2+1, 1 ), LDWH ) 798* 799* ==== Multiply by U21**T ==== 800* 801 CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, 802 $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) 803* 804* ==== Multiply by U22 ==== 805* 806 CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, 807 $ U( J2+1, I2+1 ), LDU, 808 $ H( INCOL+1+J2, JCOL ), LDH, ONE, 809 $ WH( I2+1, 1 ), LDWH ) 810* 811* ==== Copy it back ==== 812* 813 CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH, 814 $ H( INCOL+1, JCOL ), LDH ) 815 190 CONTINUE 816* 817* ==== Vertical multiply ==== 818* 819 DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV 820 JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) 821* 822* ==== Copy right of H to scratch (the first KZS 823* . columns get multiplied by zero) ==== 824* 825 CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), 826 $ LDH, WV( 1, 1+KZS ), LDWV ) 827* 828* ==== Multiply by U21 ==== 829* 830 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) 831 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, 832 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), 833 $ LDWV ) 834* 835* ==== Multiply by U11 ==== 836* 837 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE, 838 $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, 839 $ LDWV ) 840* 841* ==== Copy left of H to right of scratch ==== 842* 843 CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, 844 $ WV( 1, 1+I2 ), LDWV ) 845* 846* ==== Multiply by U21 ==== 847* 848 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, 849 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) 850* 851* ==== Multiply by U22 ==== 852* 853 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, 854 $ H( JROW, INCOL+1+J2 ), LDH, 855 $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), 856 $ LDWV ) 857* 858* ==== Copy it back ==== 859* 860 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV, 861 $ H( JROW, INCOL+1 ), LDH ) 862 200 CONTINUE 863* 864* ==== Multiply Z (also vertical) ==== 865* 866 IF( WANTZ ) THEN 867 DO 210 JROW = ILOZ, IHIZ, NV 868 JLEN = MIN( NV, IHIZ-JROW+1 ) 869* 870* ==== Copy right of Z to left of scratch (first 871* . KZS columns get multiplied by zero) ==== 872* 873 CALL DLACPY( 'ALL', JLEN, KNZ, 874 $ Z( JROW, INCOL+1+J2 ), LDZ, 875 $ WV( 1, 1+KZS ), LDWV ) 876* 877* ==== Multiply by U12 ==== 878* 879 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, 880 $ LDWV ) 881 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, 882 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), 883 $ LDWV ) 884* 885* ==== Multiply by U11 ==== 886* 887 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE, 888 $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, 889 $ WV, LDWV ) 890* 891* ==== Copy left of Z to right of scratch ==== 892* 893 CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), 894 $ LDZ, WV( 1, 1+I2 ), LDWV ) 895* 896* ==== Multiply by U21 ==== 897* 898 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, 899 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), 900 $ LDWV ) 901* 902* ==== Multiply by U22 ==== 903* 904 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, 905 $ Z( JROW, INCOL+1+J2 ), LDZ, 906 $ U( J2+1, I2+1 ), LDU, ONE, 907 $ WV( 1, 1+I2 ), LDWV ) 908* 909* ==== Copy the result back to Z ==== 910* 911 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV, 912 $ Z( JROW, INCOL+1 ), LDZ ) 913 210 CONTINUE 914 END IF 915 END IF 916 END IF 917 220 CONTINUE 918* 919* ==== End of DLAQR5 ==== 920* 921 END 922