1*> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep. 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download ZLAQR5 + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, 22* H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, 23* WV, LDWV, NH, WH, LDWH ) 24* 25* .. Scalar Arguments .. 26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 28* LOGICAL WANTT, WANTZ 29* .. 30* .. Array Arguments .. 31* COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), 32* $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) 33* .. 34* 35* 36*> \par Purpose: 37* ============= 38*> 39*> \verbatim 40*> 41*> ZLAQR5, called by ZLAQR0, performs a 42*> single small-bulge multi-shift QR sweep. 43*> \endverbatim 44* 45* Arguments: 46* ========== 47* 48*> \param[in] WANTT 49*> \verbatim 50*> WANTT is LOGICAL 51*> WANTT = .true. if the triangular Schur factor 52*> is being computed. WANTT is set to .false. otherwise. 53*> \endverbatim 54*> 55*> \param[in] WANTZ 56*> \verbatim 57*> WANTZ is LOGICAL 58*> WANTZ = .true. if the unitary Schur factor is being 59*> computed. WANTZ is set to .false. otherwise. 60*> \endverbatim 61*> 62*> \param[in] KACC22 63*> \verbatim 64*> KACC22 is INTEGER with value 0, 1, or 2. 65*> Specifies the computation mode of far-from-diagonal 66*> orthogonal updates. 67*> = 0: ZLAQR5 does not accumulate reflections and does not 68*> use matrix-matrix multiply to update far-from-diagonal 69*> matrix entries. 70*> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix 71*> multiply to update the far-from-diagonal matrix entries. 72*> = 2: Same as KACC22 = 1. This option used to enable exploiting 73*> the 2-by-2 structure during matrix multiplications, but 74*> this is no longer supported. 75*> \endverbatim 76*> 77*> \param[in] N 78*> \verbatim 79*> N is INTEGER 80*> N is the order of the Hessenberg matrix H upon which this 81*> subroutine operates. 82*> \endverbatim 83*> 84*> \param[in] KTOP 85*> \verbatim 86*> KTOP is INTEGER 87*> \endverbatim 88*> 89*> \param[in] KBOT 90*> \verbatim 91*> KBOT is INTEGER 92*> These are the first and last rows and columns of an 93*> isolated diagonal block upon which the QR sweep is to be 94*> applied. It is assumed without a check that 95*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 96*> and 97*> either KBOT = N or H(KBOT+1,KBOT) = 0. 98*> \endverbatim 99*> 100*> \param[in] NSHFTS 101*> \verbatim 102*> NSHFTS is INTEGER 103*> NSHFTS gives the number of simultaneous shifts. NSHFTS 104*> must be positive and even. 105*> \endverbatim 106*> 107*> \param[in,out] S 108*> \verbatim 109*> S is COMPLEX*16 array, dimension (NSHFTS) 110*> S contains the shifts of origin that define the multi- 111*> shift QR sweep. On output S may be reordered. 112*> \endverbatim 113*> 114*> \param[in,out] H 115*> \verbatim 116*> H is COMPLEX*16 array, dimension (LDH,N) 117*> On input H contains a Hessenberg matrix. On output a 118*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied 119*> to the isolated diagonal block in rows and columns KTOP 120*> through KBOT. 121*> \endverbatim 122*> 123*> \param[in] LDH 124*> \verbatim 125*> LDH is INTEGER 126*> LDH is the leading dimension of H just as declared in the 127*> calling procedure. LDH >= MAX(1,N). 128*> \endverbatim 129*> 130*> \param[in] ILOZ 131*> \verbatim 132*> ILOZ is INTEGER 133*> \endverbatim 134*> 135*> \param[in] IHIZ 136*> \verbatim 137*> IHIZ is INTEGER 138*> Specify the rows of Z to which transformations must be 139*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N 140*> \endverbatim 141*> 142*> \param[in,out] Z 143*> \verbatim 144*> Z is COMPLEX*16 array, dimension (LDZ,IHIZ) 145*> If WANTZ = .TRUE., then the QR Sweep unitary 146*> similarity transformation is accumulated into 147*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. 148*> If WANTZ = .FALSE., then Z is unreferenced. 149*> \endverbatim 150*> 151*> \param[in] LDZ 152*> \verbatim 153*> LDZ is INTEGER 154*> LDA is the leading dimension of Z just as declared in 155*> the calling procedure. LDZ >= N. 156*> \endverbatim 157*> 158*> \param[out] V 159*> \verbatim 160*> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2) 161*> \endverbatim 162*> 163*> \param[in] LDV 164*> \verbatim 165*> LDV is INTEGER 166*> LDV is the leading dimension of V as declared in the 167*> calling procedure. LDV >= 3. 168*> \endverbatim 169*> 170*> \param[out] U 171*> \verbatim 172*> U is COMPLEX*16 array, dimension (LDU,2*NSHFTS) 173*> \endverbatim 174*> 175*> \param[in] LDU 176*> \verbatim 177*> LDU is INTEGER 178*> LDU is the leading dimension of U just as declared in the 179*> in the calling subroutine. LDU >= 2*NSHFTS. 180*> \endverbatim 181*> 182*> \param[in] NV 183*> \verbatim 184*> NV is INTEGER 185*> NV is the number of rows in WV agailable for workspace. 186*> NV >= 1. 187*> \endverbatim 188*> 189*> \param[out] WV 190*> \verbatim 191*> WV is COMPLEX*16 array, dimension (LDWV,2*NSHFTS) 192*> \endverbatim 193*> 194*> \param[in] LDWV 195*> \verbatim 196*> LDWV is INTEGER 197*> LDWV is the leading dimension of WV as declared in the 198*> in the calling subroutine. LDWV >= NV. 199*> \endverbatim 200* 201*> \param[in] NH 202*> \verbatim 203*> NH is INTEGER 204*> NH is the number of columns in array WH available for 205*> workspace. NH >= 1. 206*> \endverbatim 207*> 208*> \param[out] WH 209*> \verbatim 210*> WH is COMPLEX*16 array, dimension (LDWH,NH) 211*> \endverbatim 212*> 213*> \param[in] LDWH 214*> \verbatim 215*> LDWH is INTEGER 216*> Leading dimension of WH just as declared in the 217*> calling procedure. LDWH >= 2*NSHFTS. 218*> \endverbatim 219*> 220* Authors: 221* ======== 222* 223*> \author Univ. of Tennessee 224*> \author Univ. of California Berkeley 225*> \author Univ. of Colorado Denver 226*> \author NAG Ltd. 227* 228*> \ingroup complex16OTHERauxiliary 229* 230*> \par Contributors: 231* ================== 232*> 233*> Karen Braman and Ralph Byers, Department of Mathematics, 234*> University of Kansas, USA 235*> 236*> Lars Karlsson, Daniel Kressner, and Bruno Lang 237*> 238*> Thijs Steel, Department of Computer science, 239*> KU Leuven, Belgium 240* 241*> \par References: 242* ================ 243*> 244*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR 245*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 246*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages 247*> 929--947, 2002. 248*> 249*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed 250*> chains of bulges in multishift QR algorithms. 251*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014). 252*> 253* ===================================================================== 254 SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, 255 $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, 256 $ WV, LDWV, NH, WH, LDWH ) 257 IMPLICIT NONE 258* 259* -- LAPACK auxiliary routine -- 260* -- LAPACK is a software package provided by Univ. of Tennessee, -- 261* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 262* 263* .. Scalar Arguments .. 264 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 265 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 266 LOGICAL WANTT, WANTZ 267* .. 268* .. Array Arguments .. 269 COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), 270 $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) 271* .. 272* 273* ================================================================ 274* .. Parameters .. 275 COMPLEX*16 ZERO, ONE 276 PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), 277 $ ONE = ( 1.0d0, 0.0d0 ) ) 278 DOUBLE PRECISION RZERO, RONE 279 PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) 280* .. 281* .. Local Scalars .. 282 COMPLEX*16 ALPHA, BETA, CDUM, REFSUM 283 DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL, 284 $ SMLNUM, TST1, TST2, ULP 285 INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN, 286 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL, 287 $ M, M22, MBOT, MTOP, NBMPS, NDCOL, 288 $ NS, NU 289 LOGICAL ACCUM, BMP22 290* .. 291* .. External Functions .. 292 DOUBLE PRECISION DLAMCH 293 EXTERNAL DLAMCH 294* .. 295* .. Intrinsic Functions .. 296* 297 INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD 298* .. 299* .. Local Arrays .. 300 COMPLEX*16 VT( 3 ) 301* .. 302* .. External Subroutines .. 303 EXTERNAL DLABAD, ZGEMM, ZLACPY, ZLAQR1, ZLARFG, ZLASET, 304 $ ZTRMM 305* .. 306* .. Statement Functions .. 307 DOUBLE PRECISION CABS1 308* .. 309* .. Statement Function definitions .. 310 CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) 311* .. 312* .. Executable Statements .. 313* 314* ==== If there are no shifts, then there is nothing to do. ==== 315* 316 IF( NSHFTS.LT.2 ) 317 $ RETURN 318* 319* ==== If the active block is empty or 1-by-1, then there 320* . is nothing to do. ==== 321* 322 IF( KTOP.GE.KBOT ) 323 $ RETURN 324* 325* ==== NSHFTS is supposed to be even, but if it is odd, 326* . then simply reduce it by one. ==== 327* 328 NS = NSHFTS - MOD( NSHFTS, 2 ) 329* 330* ==== Machine constants for deflation ==== 331* 332 SAFMIN = DLAMCH( 'SAFE MINIMUM' ) 333 SAFMAX = RONE / SAFMIN 334 CALL DLABAD( SAFMIN, SAFMAX ) 335 ULP = DLAMCH( 'PRECISION' ) 336 SMLNUM = SAFMIN*( DBLE( N ) / ULP ) 337* 338* ==== Use accumulated reflections to update far-from-diagonal 339* . entries ? ==== 340* 341 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) 342* 343* ==== clear trash ==== 344* 345 IF( KTOP+2.LE.KBOT ) 346 $ H( KTOP+2, KTOP ) = ZERO 347* 348* ==== NBMPS = number of 2-shift bulges in the chain ==== 349* 350 NBMPS = NS / 2 351* 352* ==== KDU = width of slab ==== 353* 354 KDU = 4*NBMPS 355* 356* ==== Create and chase chains of NBMPS bulges ==== 357* 358 DO 180 INCOL = KTOP - 2*NBMPS + 1, KBOT - 2, 2*NBMPS 359* 360* JTOP = Index from which updates from the right start. 361* 362 IF( ACCUM ) THEN 363 JTOP = MAX( KTOP, INCOL ) 364 ELSE IF( WANTT ) THEN 365 JTOP = 1 366 ELSE 367 JTOP = KTOP 368 END IF 369* 370 NDCOL = INCOL + KDU 371 IF( ACCUM ) 372 $ CALL ZLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) 373* 374* ==== Near-the-diagonal bulge chase. The following loop 375* . performs the near-the-diagonal part of a small bulge 376* . multi-shift QR sweep. Each 4*NBMPS column diagonal 377* . chunk extends from column INCOL to column NDCOL 378* . (including both column INCOL and column NDCOL). The 379* . following loop chases a 2*NBMPS+1 column long chain of 380* . NBMPS bulges 2*NBMPS columns to the right. (INCOL 381* . may be less than KTOP and and NDCOL may be greater than 382* . KBOT indicating phantom columns from which to chase 383* . bulges before they are actually introduced or to which 384* . to chase bulges beyond column KBOT.) ==== 385* 386 DO 145 KRCOL = INCOL, MIN( INCOL+2*NBMPS-1, KBOT-2 ) 387* 388* ==== Bulges number MTOP to MBOT are active double implicit 389* . shift bulges. There may or may not also be small 390* . 2-by-2 bulge, if there is room. The inactive bulges 391* . (if any) must wait until the active bulges have moved 392* . down the diagonal to make room. The phantom matrix 393* . paradigm described above helps keep track. ==== 394* 395 MTOP = MAX( 1, ( KTOP-KRCOL ) / 2+1 ) 396 MBOT = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 2 ) 397 M22 = MBOT + 1 398 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+2*( M22-1 ) ).EQ. 399 $ ( KBOT-2 ) 400* 401* ==== Generate reflections to chase the chain right 402* . one column. (The minimum value of K is KTOP-1.) ==== 403* 404 IF ( BMP22 ) THEN 405* 406* ==== Special case: 2-by-2 reflection at bottom treated 407* . separately ==== 408* 409 K = KRCOL + 2*( M22-1 ) 410 IF( K.EQ.KTOP-1 ) THEN 411 CALL ZLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ), 412 $ S( 2*M22 ), V( 1, M22 ) ) 413 BETA = V( 1, M22 ) 414 CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 415 ELSE 416 BETA = H( K+1, K ) 417 V( 2, M22 ) = H( K+2, K ) 418 CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 419 H( K+1, K ) = BETA 420 H( K+2, K ) = ZERO 421 END IF 422 423* 424* ==== Perform update from right within 425* . computational window. ==== 426* 427 DO 30 J = JTOP, MIN( KBOT, K+3 ) 428 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* 429 $ H( J, K+2 ) ) 430 H( J, K+1 ) = H( J, K+1 ) - REFSUM 431 H( J, K+2 ) = H( J, K+2 ) - 432 $ REFSUM*DCONJG( V( 2, M22 ) ) 433 30 CONTINUE 434* 435* ==== Perform update from left within 436* . computational window. ==== 437* 438 IF( ACCUM ) THEN 439 JBOT = MIN( NDCOL, KBOT ) 440 ELSE IF( WANTT ) THEN 441 JBOT = N 442 ELSE 443 JBOT = KBOT 444 END IF 445 DO 40 J = K+1, JBOT 446 REFSUM = DCONJG( V( 1, M22 ) )* 447 $ ( H( K+1, J )+DCONJG( V( 2, M22 ) )* 448 $ H( K+2, J ) ) 449 H( K+1, J ) = H( K+1, J ) - REFSUM 450 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) 451 40 CONTINUE 452* 453* ==== The following convergence test requires that 454* . the tradition small-compared-to-nearby-diagonals 455* . criterion and the Ahues & Tisseur (LAWN 122, 1997) 456* . criteria both be satisfied. The latter improves 457* . accuracy in some examples. Falling back on an 458* . alternate convergence criterion when TST1 or TST2 459* . is zero (as done here) is traditional but probably 460* . unnecessary. ==== 461* 462 IF( K.GE.KTOP ) THEN 463 IF( H( K+1, K ).NE.ZERO ) THEN 464 TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) ) 465 IF( TST1.EQ.RZERO ) THEN 466 IF( K.GE.KTOP+1 ) 467 $ TST1 = TST1 + CABS1( H( K, K-1 ) ) 468 IF( K.GE.KTOP+2 ) 469 $ TST1 = TST1 + CABS1( H( K, K-2 ) ) 470 IF( K.GE.KTOP+3 ) 471 $ TST1 = TST1 + CABS1( H( K, K-3 ) ) 472 IF( K.LE.KBOT-2 ) 473 $ TST1 = TST1 + CABS1( H( K+2, K+1 ) ) 474 IF( K.LE.KBOT-3 ) 475 $ TST1 = TST1 + CABS1( H( K+3, K+1 ) ) 476 IF( K.LE.KBOT-4 ) 477 $ TST1 = TST1 + CABS1( H( K+4, K+1 ) ) 478 END IF 479 IF( CABS1( H( K+1, K ) ) 480 $ .LE.MAX( SMLNUM, ULP*TST1 ) ) THEN 481 H12 = MAX( CABS1( H( K+1, K ) ), 482 $ CABS1( H( K, K+1 ) ) ) 483 H21 = MIN( CABS1( H( K+1, K ) ), 484 $ CABS1( H( K, K+1 ) ) ) 485 H11 = MAX( CABS1( H( K+1, K+1 ) ), 486 $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) 487 H22 = MIN( CABS1( H( K+1, K+1 ) ), 488 $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) 489 SCL = H11 + H12 490 TST2 = H22*( H11 / SCL ) 491* 492 IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE. 493 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO 494 END IF 495 END IF 496 END IF 497* 498* ==== Accumulate orthogonal transformations. ==== 499* 500 IF( ACCUM ) THEN 501 KMS = K - INCOL 502 DO 50 J = MAX( 1, KTOP-INCOL ), KDU 503 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ 504 $ V( 2, M22 )*U( J, KMS+2 ) ) 505 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 506 U( J, KMS+2 ) = U( J, KMS+2 ) - 507 $ REFSUM*DCONJG( V( 2, M22 ) ) 508 50 CONTINUE 509 ELSE IF( WANTZ ) THEN 510 DO 60 J = ILOZ, IHIZ 511 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* 512 $ Z( J, K+2 ) ) 513 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 514 Z( J, K+2 ) = Z( J, K+2 ) - 515 $ REFSUM*DCONJG( V( 2, M22 ) ) 516 60 CONTINUE 517 END IF 518 END IF 519* 520* ==== Normal case: Chain of 3-by-3 reflections ==== 521* 522 DO 80 M = MBOT, MTOP, -1 523 K = KRCOL + 2*( M-1 ) 524 IF( K.EQ.KTOP-1 ) THEN 525 CALL ZLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ), 526 $ S( 2*M ), V( 1, M ) ) 527 ALPHA = V( 1, M ) 528 CALL ZLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) 529 ELSE 530* 531* ==== Perform delayed transformation of row below 532* . Mth bulge. Exploit fact that first two elements 533* . of row are actually zero. ==== 534* 535 REFSUM = V( 1, M )*V( 3, M )*H( K+3, K+2 ) 536 H( K+3, K ) = -REFSUM 537 H( K+3, K+1 ) = -REFSUM*DCONJG( V( 2, M ) ) 538 H( K+3, K+2 ) = H( K+3, K+2 ) - 539 $ REFSUM*DCONJG( V( 3, M ) ) 540* 541* ==== Calculate reflection to move 542* . Mth bulge one step. ==== 543* 544 BETA = H( K+1, K ) 545 V( 2, M ) = H( K+2, K ) 546 V( 3, M ) = H( K+3, K ) 547 CALL ZLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) 548* 549* ==== A Bulge may collapse because of vigilant 550* . deflation or destructive underflow. In the 551* . underflow case, try the two-small-subdiagonals 552* . trick to try to reinflate the bulge. ==== 553* 554 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. 555 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN 556* 557* ==== Typical case: not collapsed (yet). ==== 558* 559 H( K+1, K ) = BETA 560 H( K+2, K ) = ZERO 561 H( K+3, K ) = ZERO 562 ELSE 563* 564* ==== Atypical case: collapsed. Attempt to 565* . reintroduce ignoring H(K+1,K) and H(K+2,K). 566* . If the fill resulting from the new 567* . reflector is too large, then abandon it. 568* . Otherwise, use the new one. ==== 569* 570 CALL ZLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ), 571 $ S( 2*M ), VT ) 572 ALPHA = VT( 1 ) 573 CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) 574 REFSUM = DCONJG( VT( 1 ) )* 575 $ ( H( K+1, K )+DCONJG( VT( 2 ) )* 576 $ H( K+2, K ) ) 577* 578 IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ 579 $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* 580 $ ( CABS1( H( K, K ) )+CABS1( H( K+1, 581 $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN 582* 583* ==== Starting a new bulge here would 584* . create non-negligible fill. Use 585* . the old one with trepidation. ==== 586* 587 H( K+1, K ) = BETA 588 H( K+2, K ) = ZERO 589 H( K+3, K ) = ZERO 590 ELSE 591* 592* ==== Starting a new bulge here would 593* . create only negligible fill. 594* . Replace the old reflector with 595* . the new one. ==== 596* 597 H( K+1, K ) = H( K+1, K ) - REFSUM 598 H( K+2, K ) = ZERO 599 H( K+3, K ) = ZERO 600 V( 1, M ) = VT( 1 ) 601 V( 2, M ) = VT( 2 ) 602 V( 3, M ) = VT( 3 ) 603 END IF 604 END IF 605 END IF 606* 607* ==== Apply reflection from the right and 608* . the first column of update from the left. 609* . These updates are required for the vigilant 610* . deflation check. We still delay most of the 611* . updates from the left for efficiency. ==== 612* 613 DO 70 J = JTOP, MIN( KBOT, K+3 ) 614 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* 615 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) 616 H( J, K+1 ) = H( J, K+1 ) - REFSUM 617 H( J, K+2 ) = H( J, K+2 ) - 618 $ REFSUM*DCONJG( V( 2, M ) ) 619 H( J, K+3 ) = H( J, K+3 ) - 620 $ REFSUM*DCONJG( V( 3, M ) ) 621 70 CONTINUE 622* 623* ==== Perform update from left for subsequent 624* . column. ==== 625* 626 REFSUM = DCONJG( V( 1, M ) )*( H( K+1, K+1 ) 627 $ +DCONJG( V( 2, M ) )*H( K+2, K+1 ) 628 $ +DCONJG( V( 3, M ) )*H( K+3, K+1 ) ) 629 H( K+1, K+1 ) = H( K+1, K+1 ) - REFSUM 630 H( K+2, K+1 ) = H( K+2, K+1 ) - REFSUM*V( 2, M ) 631 H( K+3, K+1 ) = H( K+3, K+1 ) - REFSUM*V( 3, M ) 632* 633* ==== The following convergence test requires that 634* . the tradition small-compared-to-nearby-diagonals 635* . criterion and the Ahues & Tisseur (LAWN 122, 1997) 636* . criteria both be satisfied. The latter improves 637* . accuracy in some examples. Falling back on an 638* . alternate convergence criterion when TST1 or TST2 639* . is zero (as done here) is traditional but probably 640* . unnecessary. ==== 641* 642 IF( K.LT.KTOP) 643 $ CYCLE 644 IF( H( K+1, K ).NE.ZERO ) THEN 645 TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) ) 646 IF( TST1.EQ.RZERO ) THEN 647 IF( K.GE.KTOP+1 ) 648 $ TST1 = TST1 + CABS1( H( K, K-1 ) ) 649 IF( K.GE.KTOP+2 ) 650 $ TST1 = TST1 + CABS1( H( K, K-2 ) ) 651 IF( K.GE.KTOP+3 ) 652 $ TST1 = TST1 + CABS1( H( K, K-3 ) ) 653 IF( K.LE.KBOT-2 ) 654 $ TST1 = TST1 + CABS1( H( K+2, K+1 ) ) 655 IF( K.LE.KBOT-3 ) 656 $ TST1 = TST1 + CABS1( H( K+3, K+1 ) ) 657 IF( K.LE.KBOT-4 ) 658 $ TST1 = TST1 + CABS1( H( K+4, K+1 ) ) 659 END IF 660 IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) 661 $ THEN 662 H12 = MAX( CABS1( H( K+1, K ) ), 663 $ CABS1( H( K, K+1 ) ) ) 664 H21 = MIN( CABS1( H( K+1, K ) ), 665 $ CABS1( H( K, K+1 ) ) ) 666 H11 = MAX( CABS1( H( K+1, K+1 ) ), 667 $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) 668 H22 = MIN( CABS1( H( K+1, K+1 ) ), 669 $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) 670 SCL = H11 + H12 671 TST2 = H22*( H11 / SCL ) 672* 673 IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE. 674 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO 675 END IF 676 END IF 677 80 CONTINUE 678* 679* ==== Multiply H by reflections from the left ==== 680* 681 IF( ACCUM ) THEN 682 JBOT = MIN( NDCOL, KBOT ) 683 ELSE IF( WANTT ) THEN 684 JBOT = N 685 ELSE 686 JBOT = KBOT 687 END IF 688* 689 DO 100 M = MBOT, MTOP, -1 690 K = KRCOL + 2*( M-1 ) 691 DO 90 J = MAX( KTOP, KRCOL + 2*M ), JBOT 692 REFSUM = DCONJG( V( 1, M ) )* 693 $ ( H( K+1, J )+DCONJG( V( 2, M ) )* 694 $ H( K+2, J )+DCONJG( V( 3, M ) )*H( K+3, J ) ) 695 H( K+1, J ) = H( K+1, J ) - REFSUM 696 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) 697 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) 698 90 CONTINUE 699 100 CONTINUE 700* 701* ==== Accumulate orthogonal transformations. ==== 702* 703 IF( ACCUM ) THEN 704* 705* ==== Accumulate U. (If needed, update Z later 706* . with an efficient matrix-matrix 707* . multiply.) ==== 708* 709 DO 120 M = MBOT, MTOP, -1 710 K = KRCOL + 2*( M-1 ) 711 KMS = K - INCOL 712 I2 = MAX( 1, KTOP-INCOL ) 713 I2 = MAX( I2, KMS-(KRCOL-INCOL)+1 ) 714 I4 = MIN( KDU, KRCOL + 2*( MBOT-1 ) - INCOL + 5 ) 715 DO 110 J = I2, I4 716 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* 717 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) 718 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 719 U( J, KMS+2 ) = U( J, KMS+2 ) - 720 $ REFSUM*DCONJG( V( 2, M ) ) 721 U( J, KMS+3 ) = U( J, KMS+3 ) - 722 $ REFSUM*DCONJG( V( 3, M ) ) 723 110 CONTINUE 724 120 CONTINUE 725 ELSE IF( WANTZ ) THEN 726* 727* ==== U is not accumulated, so update Z 728* . now by multiplying by reflections 729* . from the right. ==== 730* 731 DO 140 M = MBOT, MTOP, -1 732 K = KRCOL + 2*( M-1 ) 733 DO 130 J = ILOZ, IHIZ 734 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* 735 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) 736 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 737 Z( J, K+2 ) = Z( J, K+2 ) - 738 $ REFSUM*DCONJG( V( 2, M ) ) 739 Z( J, K+3 ) = Z( J, K+3 ) - 740 $ REFSUM*DCONJG( V( 3, M ) ) 741 130 CONTINUE 742 140 CONTINUE 743 END IF 744* 745* ==== End of near-the-diagonal bulge chase. ==== 746* 747 145 CONTINUE 748* 749* ==== Use U (if accumulated) to update far-from-diagonal 750* . entries in H. If required, use U to update Z as 751* . well. ==== 752* 753 IF( ACCUM ) THEN 754 IF( WANTT ) THEN 755 JTOP = 1 756 JBOT = N 757 ELSE 758 JTOP = KTOP 759 JBOT = KBOT 760 END IF 761 K1 = MAX( 1, KTOP-INCOL ) 762 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 763* 764* ==== Horizontal Multiply ==== 765* 766 DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 767 JLEN = MIN( NH, JBOT-JCOL+1 ) 768 CALL ZGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), 769 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, 770 $ LDWH ) 771 CALL ZLACPY( 'ALL', NU, JLEN, WH, LDWH, 772 $ H( INCOL+K1, JCOL ), LDH ) 773 150 CONTINUE 774* 775* ==== Vertical multiply ==== 776* 777 DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV 778 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) 779 CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE, 780 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), 781 $ LDU, ZERO, WV, LDWV ) 782 CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV, 783 $ H( JROW, INCOL+K1 ), LDH ) 784 160 CONTINUE 785* 786* ==== Z multiply (also vertical) ==== 787* 788 IF( WANTZ ) THEN 789 DO 170 JROW = ILOZ, IHIZ, NV 790 JLEN = MIN( NV, IHIZ-JROW+1 ) 791 CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE, 792 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), 793 $ LDU, ZERO, WV, LDWV ) 794 CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV, 795 $ Z( JROW, INCOL+K1 ), LDZ ) 796 170 CONTINUE 797 END IF 798 END IF 799 180 CONTINUE 800* 801* ==== End of ZLAQR5 ==== 802* 803 END 804