1*> \brief \b SLAQR5 performs a single small-bulge multi-shift QR sweep. 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download SLAQR5 + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaqr5.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaqr5.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaqr5.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, 22* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, 23* LDU, NV, WV, LDWV, NH, WH, LDWH ) 24* 25* .. Scalar Arguments .. 26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 28* LOGICAL WANTT, WANTZ 29* .. 30* .. Array Arguments .. 31* REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), 32* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), 33* $ Z( LDZ, * ) 34* .. 35* 36* 37*> \par Purpose: 38* ============= 39*> 40*> \verbatim 41*> 42*> SLAQR5, called by SLAQR0, performs a 43*> single small-bulge multi-shift QR sweep. 44*> \endverbatim 45* 46* Arguments: 47* ========== 48* 49*> \param[in] WANTT 50*> \verbatim 51*> WANTT is LOGICAL 52*> WANTT = .true. if the quasi-triangular Schur factor 53*> is being computed. WANTT is set to .false. otherwise. 54*> \endverbatim 55*> 56*> \param[in] WANTZ 57*> \verbatim 58*> WANTZ is LOGICAL 59*> WANTZ = .true. if the orthogonal Schur factor is being 60*> computed. WANTZ is set to .false. otherwise. 61*> \endverbatim 62*> 63*> \param[in] KACC22 64*> \verbatim 65*> KACC22 is INTEGER with value 0, 1, or 2. 66*> Specifies the computation mode of far-from-diagonal 67*> orthogonal updates. 68*> = 0: SLAQR5 does not accumulate reflections and does not 69*> use matrix-matrix multiply to update far-from-diagonal 70*> matrix entries. 71*> = 1: SLAQR5 accumulates reflections and uses matrix-matrix 72*> multiply to update the far-from-diagonal matrix entries. 73*> = 2: Same as KACC22 = 1. This option used to enable exploiting 74*> the 2-by-2 structure during matrix multiplications, but 75*> this is no longer supported. 76*> \endverbatim 77*> 78*> \param[in] N 79*> \verbatim 80*> N is INTEGER 81*> N is the order of the Hessenberg matrix H upon which this 82*> subroutine operates. 83*> \endverbatim 84*> 85*> \param[in] KTOP 86*> \verbatim 87*> KTOP is INTEGER 88*> \endverbatim 89*> 90*> \param[in] KBOT 91*> \verbatim 92*> KBOT is INTEGER 93*> These are the first and last rows and columns of an 94*> isolated diagonal block upon which the QR sweep is to be 95*> applied. It is assumed without a check that 96*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 97*> and 98*> either KBOT = N or H(KBOT+1,KBOT) = 0. 99*> \endverbatim 100*> 101*> \param[in] NSHFTS 102*> \verbatim 103*> NSHFTS is INTEGER 104*> NSHFTS gives the number of simultaneous shifts. NSHFTS 105*> must be positive and even. 106*> \endverbatim 107*> 108*> \param[in,out] SR 109*> \verbatim 110*> SR is REAL array, dimension (NSHFTS) 111*> \endverbatim 112*> 113*> \param[in,out] SI 114*> \verbatim 115*> SI is REAL array, dimension (NSHFTS) 116*> SR contains the real parts and SI contains the imaginary 117*> parts of the NSHFTS shifts of origin that define the 118*> multi-shift QR sweep. On output SR and SI may be 119*> reordered. 120*> \endverbatim 121*> 122*> \param[in,out] H 123*> \verbatim 124*> H is REAL array, dimension (LDH,N) 125*> On input H contains a Hessenberg matrix. On output a 126*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied 127*> to the isolated diagonal block in rows and columns KTOP 128*> through KBOT. 129*> \endverbatim 130*> 131*> \param[in] LDH 132*> \verbatim 133*> LDH is INTEGER 134*> LDH is the leading dimension of H just as declared in the 135*> calling procedure. LDH >= MAX(1,N). 136*> \endverbatim 137*> 138*> \param[in] ILOZ 139*> \verbatim 140*> ILOZ is INTEGER 141*> \endverbatim 142*> 143*> \param[in] IHIZ 144*> \verbatim 145*> IHIZ is INTEGER 146*> Specify the rows of Z to which transformations must be 147*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N 148*> \endverbatim 149*> 150*> \param[in,out] Z 151*> \verbatim 152*> Z is REAL array, dimension (LDZ,IHIZ) 153*> If WANTZ = .TRUE., then the QR Sweep orthogonal 154*> similarity transformation is accumulated into 155*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. 156*> If WANTZ = .FALSE., then Z is unreferenced. 157*> \endverbatim 158*> 159*> \param[in] LDZ 160*> \verbatim 161*> LDZ is INTEGER 162*> LDA is the leading dimension of Z just as declared in 163*> the calling procedure. LDZ >= N. 164*> \endverbatim 165*> 166*> \param[out] V 167*> \verbatim 168*> V is REAL array, dimension (LDV,NSHFTS/2) 169*> \endverbatim 170*> 171*> \param[in] LDV 172*> \verbatim 173*> LDV is INTEGER 174*> LDV is the leading dimension of V as declared in the 175*> calling procedure. LDV >= 3. 176*> \endverbatim 177*> 178*> \param[out] U 179*> \verbatim 180*> U is REAL array, dimension (LDU,2*NSHFTS) 181*> \endverbatim 182*> 183*> \param[in] LDU 184*> \verbatim 185*> LDU is INTEGER 186*> LDU is the leading dimension of U just as declared in the 187*> in the calling subroutine. LDU >= 2*NSHFTS. 188*> \endverbatim 189*> 190*> \param[in] NV 191*> \verbatim 192*> NV is INTEGER 193*> NV is the number of rows in WV agailable for workspace. 194*> NV >= 1. 195*> \endverbatim 196*> 197*> \param[out] WV 198*> \verbatim 199*> WV is REAL array, dimension (LDWV,2*NSHFTS) 200*> \endverbatim 201*> 202*> \param[in] LDWV 203*> \verbatim 204*> LDWV is INTEGER 205*> LDWV is the leading dimension of WV as declared in the 206*> in the calling subroutine. LDWV >= NV. 207*> \endverbatim 208* 209*> \param[in] NH 210*> \verbatim 211*> NH is INTEGER 212*> NH is the number of columns in array WH available for 213*> workspace. NH >= 1. 214*> \endverbatim 215*> 216*> \param[out] WH 217*> \verbatim 218*> WH is REAL array, dimension (LDWH,NH) 219*> \endverbatim 220*> 221*> \param[in] LDWH 222*> \verbatim 223*> LDWH is INTEGER 224*> Leading dimension of WH just as declared in the 225*> calling procedure. LDWH >= 2*NSHFTS. 226*> \endverbatim 227*> 228* Authors: 229* ======== 230* 231*> \author Univ. of Tennessee 232*> \author Univ. of California Berkeley 233*> \author Univ. of Colorado Denver 234*> \author NAG Ltd. 235* 236*> \ingroup realOTHERauxiliary 237* 238*> \par Contributors: 239* ================== 240*> 241*> Karen Braman and Ralph Byers, Department of Mathematics, 242*> University of Kansas, USA 243*> 244*> Lars Karlsson, Daniel Kressner, and Bruno Lang 245*> 246*> Thijs Steel, Department of Computer science, 247*> KU Leuven, Belgium 248* 249*> \par References: 250* ================ 251*> 252*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR 253*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 254*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages 255*> 929--947, 2002. 256*> 257*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed 258*> chains of bulges in multishift QR algorithms. 259*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014). 260*> 261* ===================================================================== 262 SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, 263 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, 264 $ LDU, NV, WV, LDWV, NH, WH, LDWH ) 265 IMPLICIT NONE 266* 267* -- LAPACK auxiliary routine -- 268* -- LAPACK is a software package provided by Univ. of Tennessee, -- 269* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 270* 271* .. Scalar Arguments .. 272 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 273 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 274 LOGICAL WANTT, WANTZ 275* .. 276* .. Array Arguments .. 277 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), 278 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), 279 $ Z( LDZ, * ) 280* .. 281* 282* ================================================================ 283* .. Parameters .. 284 REAL ZERO, ONE 285 PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) 286* .. 287* .. Local Scalars .. 288 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM, 289 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, 290 $ ULP 291 INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN, 292 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL, 293 $ M, M22, MBOT, MTOP, NBMPS, NDCOL, 294 $ NS, NU 295 LOGICAL ACCUM, BMP22 296* .. 297* .. External Functions .. 298 REAL SLAMCH 299 EXTERNAL SLAMCH 300* .. 301* .. Intrinsic Functions .. 302* 303 INTRINSIC ABS, MAX, MIN, MOD, REAL 304* .. 305* .. Local Arrays .. 306 REAL VT( 3 ) 307* .. 308* .. External Subroutines .. 309 EXTERNAL SGEMM, SLABAD, SLACPY, SLAQR1, SLARFG, SLASET, 310 $ STRMM 311* .. 312* .. Executable Statements .. 313* 314* ==== If there are no shifts, then there is nothing to do. ==== 315* 316 IF( NSHFTS.LT.2 ) 317 $ RETURN 318* 319* ==== If the active block is empty or 1-by-1, then there 320* . is nothing to do. ==== 321* 322 IF( KTOP.GE.KBOT ) 323 $ RETURN 324* 325* ==== Shuffle shifts into pairs of real shifts and pairs 326* . of complex conjugate shifts assuming complex 327* . conjugate shifts are already adjacent to one 328* . another. ==== 329* 330 DO 10 I = 1, NSHFTS - 2, 2 331 IF( SI( I ).NE.-SI( I+1 ) ) THEN 332* 333 SWAP = SR( I ) 334 SR( I ) = SR( I+1 ) 335 SR( I+1 ) = SR( I+2 ) 336 SR( I+2 ) = SWAP 337* 338 SWAP = SI( I ) 339 SI( I ) = SI( I+1 ) 340 SI( I+1 ) = SI( I+2 ) 341 SI( I+2 ) = SWAP 342 END IF 343 10 CONTINUE 344* 345* ==== NSHFTS is supposed to be even, but if it is odd, 346* . then simply reduce it by one. The shuffle above 347* . ensures that the dropped shift is real and that 348* . the remaining shifts are paired. ==== 349* 350 NS = NSHFTS - MOD( NSHFTS, 2 ) 351* 352* ==== Machine constants for deflation ==== 353* 354 SAFMIN = SLAMCH( 'SAFE MINIMUM' ) 355 SAFMAX = ONE / SAFMIN 356 CALL SLABAD( SAFMIN, SAFMAX ) 357 ULP = SLAMCH( 'PRECISION' ) 358 SMLNUM = SAFMIN*( REAL( N ) / ULP ) 359* 360* ==== Use accumulated reflections to update far-from-diagonal 361* . entries ? ==== 362* 363 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) 364* 365* ==== clear trash ==== 366* 367 IF( KTOP+2.LE.KBOT ) 368 $ H( KTOP+2, KTOP ) = ZERO 369* 370* ==== NBMPS = number of 2-shift bulges in the chain ==== 371* 372 NBMPS = NS / 2 373* 374* ==== KDU = width of slab ==== 375* 376 KDU = 4*NBMPS 377* 378* ==== Create and chase chains of NBMPS bulges ==== 379* 380 DO 180 INCOL = KTOP - 2*NBMPS + 1, KBOT - 2, 2*NBMPS 381* 382* JTOP = Index from which updates from the right start. 383* 384 IF( ACCUM ) THEN 385 JTOP = MAX( KTOP, INCOL ) 386 ELSE IF( WANTT ) THEN 387 JTOP = 1 388 ELSE 389 JTOP = KTOP 390 END IF 391* 392 NDCOL = INCOL + KDU 393 IF( ACCUM ) 394 $ CALL SLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) 395* 396* ==== Near-the-diagonal bulge chase. The following loop 397* . performs the near-the-diagonal part of a small bulge 398* . multi-shift QR sweep. Each 4*NBMPS column diagonal 399* . chunk extends from column INCOL to column NDCOL 400* . (including both column INCOL and column NDCOL). The 401* . following loop chases a 2*NBMPS+1 column long chain of 402* . NBMPS bulges 2*NBMPS-1 columns to the right. (INCOL 403* . may be less than KTOP and and NDCOL may be greater than 404* . KBOT indicating phantom columns from which to chase 405* . bulges before they are actually introduced or to which 406* . to chase bulges beyond column KBOT.) ==== 407* 408 DO 145 KRCOL = INCOL, MIN( INCOL+2*NBMPS-1, KBOT-2 ) 409* 410* ==== Bulges number MTOP to MBOT are active double implicit 411* . shift bulges. There may or may not also be small 412* . 2-by-2 bulge, if there is room. The inactive bulges 413* . (if any) must wait until the active bulges have moved 414* . down the diagonal to make room. The phantom matrix 415* . paradigm described above helps keep track. ==== 416* 417 MTOP = MAX( 1, ( KTOP-KRCOL ) / 2+1 ) 418 MBOT = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 2 ) 419 M22 = MBOT + 1 420 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+2*( M22-1 ) ).EQ. 421 $ ( KBOT-2 ) 422* 423* ==== Generate reflections to chase the chain right 424* . one column. (The minimum value of K is KTOP-1.) ==== 425* 426 IF ( BMP22 ) THEN 427* 428* ==== Special case: 2-by-2 reflection at bottom treated 429* . separately ==== 430* 431 K = KRCOL + 2*( M22-1 ) 432 IF( K.EQ.KTOP-1 ) THEN 433 CALL SLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ), 434 $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ), 435 $ V( 1, M22 ) ) 436 BETA = V( 1, M22 ) 437 CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 438 ELSE 439 BETA = H( K+1, K ) 440 V( 2, M22 ) = H( K+2, K ) 441 CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 442 H( K+1, K ) = BETA 443 H( K+2, K ) = ZERO 444 END IF 445 446* 447* ==== Perform update from right within 448* . computational window. ==== 449* 450 DO 30 J = JTOP, MIN( KBOT, K+3 ) 451 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* 452 $ H( J, K+2 ) ) 453 H( J, K+1 ) = H( J, K+1 ) - REFSUM 454 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 ) 455 30 CONTINUE 456* 457* ==== Perform update from left within 458* . computational window. ==== 459* 460 IF( ACCUM ) THEN 461 JBOT = MIN( NDCOL, KBOT ) 462 ELSE IF( WANTT ) THEN 463 JBOT = N 464 ELSE 465 JBOT = KBOT 466 END IF 467 DO 40 J = K+1, JBOT 468 REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* 469 $ H( K+2, J ) ) 470 H( K+1, J ) = H( K+1, J ) - REFSUM 471 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) 472 40 CONTINUE 473* 474* ==== The following convergence test requires that 475* . the tradition small-compared-to-nearby-diagonals 476* . criterion and the Ahues & Tisseur (LAWN 122, 1997) 477* . criteria both be satisfied. The latter improves 478* . accuracy in some examples. Falling back on an 479* . alternate convergence criterion when TST1 or TST2 480* . is zero (as done here) is traditional but probably 481* . unnecessary. ==== 482* 483 IF( K.GE.KTOP ) THEN 484 IF( H( K+1, K ).NE.ZERO ) THEN 485 TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) ) 486 IF( TST1.EQ.ZERO ) THEN 487 IF( K.GE.KTOP+1 ) 488 $ TST1 = TST1 + ABS( H( K, K-1 ) ) 489 IF( K.GE.KTOP+2 ) 490 $ TST1 = TST1 + ABS( H( K, K-2 ) ) 491 IF( K.GE.KTOP+3 ) 492 $ TST1 = TST1 + ABS( H( K, K-3 ) ) 493 IF( K.LE.KBOT-2 ) 494 $ TST1 = TST1 + ABS( H( K+2, K+1 ) ) 495 IF( K.LE.KBOT-3 ) 496 $ TST1 = TST1 + ABS( H( K+3, K+1 ) ) 497 IF( K.LE.KBOT-4 ) 498 $ TST1 = TST1 + ABS( H( K+4, K+1 ) ) 499 END IF 500 IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) 501 $ THEN 502 H12 = MAX( ABS( H( K+1, K ) ), 503 $ ABS( H( K, K+1 ) ) ) 504 H21 = MIN( ABS( H( K+1, K ) ), 505 $ ABS( H( K, K+1 ) ) ) 506 H11 = MAX( ABS( H( K+1, K+1 ) ), 507 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 508 H22 = MIN( ABS( H( K+1, K+1 ) ), 509 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 510 SCL = H11 + H12 511 TST2 = H22*( H11 / SCL ) 512* 513 IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE. 514 $ MAX( SMLNUM, ULP*TST2 ) ) THEN 515 H( K+1, K ) = ZERO 516 END IF 517 END IF 518 END IF 519 END IF 520* 521* ==== Accumulate orthogonal transformations. ==== 522* 523 IF( ACCUM ) THEN 524 KMS = K - INCOL 525 DO 50 J = MAX( 1, KTOP-INCOL ), KDU 526 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ 527 $ V( 2, M22 )*U( J, KMS+2 ) ) 528 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 529 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M22 ) 530 50 CONTINUE 531 ELSE IF( WANTZ ) THEN 532 DO 60 J = ILOZ, IHIZ 533 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* 534 $ Z( J, K+2 ) ) 535 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 536 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 ) 537 60 CONTINUE 538 END IF 539 END IF 540* 541* ==== Normal case: Chain of 3-by-3 reflections ==== 542* 543 DO 80 M = MBOT, MTOP, -1 544 K = KRCOL + 2*( M-1 ) 545 IF( K.EQ.KTOP-1 ) THEN 546 CALL SLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ), 547 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), 548 $ V( 1, M ) ) 549 ALPHA = V( 1, M ) 550 CALL SLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) 551 ELSE 552* 553* ==== Perform delayed transformation of row below 554* . Mth bulge. Exploit fact that first two elements 555* . of row are actually zero. ==== 556* 557 REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) 558 H( K+3, K ) = -REFSUM 559 H( K+3, K+1 ) = -REFSUM*V( 2, M ) 560 H( K+3, K+2 ) = H( K+3, K+2 ) - REFSUM*V( 3, M ) 561* 562* ==== Calculate reflection to move 563* . Mth bulge one step. ==== 564* 565 BETA = H( K+1, K ) 566 V( 2, M ) = H( K+2, K ) 567 V( 3, M ) = H( K+3, K ) 568 CALL SLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) 569* 570* ==== A Bulge may collapse because of vigilant 571* . deflation or destructive underflow. In the 572* . underflow case, try the two-small-subdiagonals 573* . trick to try to reinflate the bulge. ==== 574* 575 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. 576 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN 577* 578* ==== Typical case: not collapsed (yet). ==== 579* 580 H( K+1, K ) = BETA 581 H( K+2, K ) = ZERO 582 H( K+3, K ) = ZERO 583 ELSE 584* 585* ==== Atypical case: collapsed. Attempt to 586* . reintroduce ignoring H(K+1,K) and H(K+2,K). 587* . If the fill resulting from the new 588* . reflector is too large, then abandon it. 589* . Otherwise, use the new one. ==== 590* 591 CALL SLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ), 592 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), 593 $ VT ) 594 ALPHA = VT( 1 ) 595 CALL SLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) 596 REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* 597 $ H( K+2, K ) ) 598* 599 IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ 600 $ ABS( REFSUM*VT( 3 ) ).GT.ULP* 601 $ ( ABS( H( K, K ) )+ABS( H( K+1, 602 $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN 603* 604* ==== Starting a new bulge here would 605* . create non-negligible fill. Use 606* . the old one with trepidation. ==== 607* 608 H( K+1, K ) = BETA 609 H( K+2, K ) = ZERO 610 H( K+3, K ) = ZERO 611 ELSE 612* 613* ==== Starting a new bulge here would 614* . create only negligible fill. 615* . Replace the old reflector with 616* . the new one. ==== 617* 618 H( K+1, K ) = H( K+1, K ) - REFSUM 619 H( K+2, K ) = ZERO 620 H( K+3, K ) = ZERO 621 V( 1, M ) = VT( 1 ) 622 V( 2, M ) = VT( 2 ) 623 V( 3, M ) = VT( 3 ) 624 END IF 625 END IF 626 END IF 627* 628* ==== Apply reflection from the right and 629* . the first column of update from the left. 630* . These updates are required for the vigilant 631* . deflation check. We still delay most of the 632* . updates from the left for efficiency. ==== 633* 634 DO 70 J = JTOP, MIN( KBOT, K+3 ) 635 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* 636 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) 637 H( J, K+1 ) = H( J, K+1 ) - REFSUM 638 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) 639 H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) 640 70 CONTINUE 641* 642* ==== Perform update from left for subsequent 643* . column. ==== 644* 645 REFSUM = V( 1, M )*( H( K+1, K+1 )+V( 2, M )* 646 $ H( K+2, K+1 )+V( 3, M )*H( K+3, K+1 ) ) 647 H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM 648 H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*V( 2, M ) 649 H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*V( 3, M ) 650* 651* ==== The following convergence test requires that 652* . the tradition small-compared-to-nearby-diagonals 653* . criterion and the Ahues & Tisseur (LAWN 122, 1997) 654* . criteria both be satisfied. The latter improves 655* . accuracy in some examples. Falling back on an 656* . alternate convergence criterion when TST1 or TST2 657* . is zero (as done here) is traditional but probably 658* . unnecessary. ==== 659* 660 IF( K.LT.KTOP) 661 $ CYCLE 662 IF( H( K+1, K ).NE.ZERO ) THEN 663 TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) ) 664 IF( TST1.EQ.ZERO ) THEN 665 IF( K.GE.KTOP+1 ) 666 $ TST1 = TST1 + ABS( H( K, K-1 ) ) 667 IF( K.GE.KTOP+2 ) 668 $ TST1 = TST1 + ABS( H( K, K-2 ) ) 669 IF( K.GE.KTOP+3 ) 670 $ TST1 = TST1 + ABS( H( K, K-3 ) ) 671 IF( K.LE.KBOT-2 ) 672 $ TST1 = TST1 + ABS( H( K+2, K+1 ) ) 673 IF( K.LE.KBOT-3 ) 674 $ TST1 = TST1 + ABS( H( K+3, K+1 ) ) 675 IF( K.LE.KBOT-4 ) 676 $ TST1 = TST1 + ABS( H( K+4, K+1 ) ) 677 END IF 678 IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) 679 $ THEN 680 H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) 681 H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) 682 H11 = MAX( ABS( H( K+1, K+1 ) ), 683 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 684 H22 = MIN( ABS( H( K+1, K+1 ) ), 685 $ ABS( H( K, K )-H( K+1, K+1 ) ) ) 686 SCL = H11 + H12 687 TST2 = H22*( H11 / SCL ) 688* 689 IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE. 690 $ MAX( SMLNUM, ULP*TST2 ) ) THEN 691 H( K+1, K ) = ZERO 692 END IF 693 END IF 694 END IF 695 80 CONTINUE 696* 697* ==== Multiply H by reflections from the left ==== 698* 699 IF( ACCUM ) THEN 700 JBOT = MIN( NDCOL, KBOT ) 701 ELSE IF( WANTT ) THEN 702 JBOT = N 703 ELSE 704 JBOT = KBOT 705 END IF 706* 707 DO 100 M = MBOT, MTOP, -1 708 K = KRCOL + 2*( M-1 ) 709 DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT 710 REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* 711 $ H( K+2, J )+V( 3, M )*H( K+3, J ) ) 712 H( K+1, J ) = H( K+1, J ) - REFSUM 713 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) 714 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) 715 90 CONTINUE 716 100 CONTINUE 717* 718* ==== Accumulate orthogonal transformations. ==== 719* 720 IF( ACCUM ) THEN 721* 722* ==== Accumulate U. (If needed, update Z later 723* . with an efficient matrix-matrix 724* . multiply.) ==== 725* 726 DO 120 M = MBOT, MTOP, -1 727 K = KRCOL + 2*( M-1 ) 728 KMS = K - INCOL 729 I2 = MAX( 1, KTOP-INCOL ) 730 I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 ) 731 I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 ) 732 DO 110 J = I2, I4 733 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* 734 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) 735 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 736 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) 737 U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) 738 110 CONTINUE 739 120 CONTINUE 740 ELSE IF( WANTZ ) THEN 741* 742* ==== U is not accumulated, so update Z 743* . now by multiplying by reflections 744* . from the right. ==== 745* 746 DO 140 M = MBOT, MTOP, -1 747 K = KRCOL + 2*( M-1 ) 748 DO 130 J = ILOZ, IHIZ 749 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* 750 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) 751 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 752 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) 753 Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) 754 130 CONTINUE 755 140 CONTINUE 756 END IF 757* 758* ==== End of near-the-diagonal bulge chase. ==== 759* 760 145 CONTINUE 761* 762* ==== Use U (if accumulated) to update far-from-diagonal 763* . entries in H. If required, use U to update Z as 764* . well. ==== 765* 766 IF( ACCUM ) THEN 767 IF( WANTT ) THEN 768 JTOP = 1 769 JBOT = N 770 ELSE 771 JTOP = KTOP 772 JBOT = KBOT 773 END IF 774 K1 = MAX( 1, KTOP-INCOL ) 775 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 776* 777* ==== Horizontal Multiply ==== 778* 779 DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 780 JLEN = MIN( NH, JBOT-JCOL+1 ) 781 CALL SGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), 782 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, 783 $ LDWH ) 784 CALL SLACPY( 'ALL', NU, JLEN, WH, LDWH, 785 $ H( INCOL+K1, JCOL ), LDH ) 786 150 CONTINUE 787* 788* ==== Vertical multiply ==== 789* 790 DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV 791 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) 792 CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE, 793 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), 794 $ LDU, ZERO, WV, LDWV ) 795 CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV, 796 $ H( JROW, INCOL+K1 ), LDH ) 797 160 CONTINUE 798* 799* ==== Z multiply (also vertical) ==== 800* 801 IF( WANTZ ) THEN 802 DO 170 JROW = ILOZ, IHIZ, NV 803 JLEN = MIN( NV, IHIZ-JROW+1 ) 804 CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE, 805 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), 806 $ LDU, ZERO, WV, LDWV ) 807 CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV, 808 $ Z( JROW, INCOL+K1 ), LDZ ) 809 170 CONTINUE 810 END IF 811 END IF 812 180 CONTINUE 813* 814* ==== End of SLAQR5 ==== 815* 816 END 817