1*> \brief \b CLAQR5 performs a single small-bulge multi-shift QR sweep. 2* 3* =========== DOCUMENTATION =========== 4* 5* Online html documentation available at 6* http://www.netlib.org/lapack/explore-html/ 7* 8*> \htmlonly 9*> Download CLAQR5 + dependencies 10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/claqr5.f"> 11*> [TGZ]</a> 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/claqr5.f"> 13*> [ZIP]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/claqr5.f"> 15*> [TXT]</a> 16*> \endhtmlonly 17* 18* Definition: 19* =========== 20* 21* SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, 22* H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, 23* WV, LDWV, NH, WH, LDWH ) 24* 25* .. Scalar Arguments .. 26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 28* LOGICAL WANTT, WANTZ 29* .. 30* .. Array Arguments .. 31* COMPLEX H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), 32* $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) 33* .. 34* 35* 36*> \par Purpose: 37* ============= 38*> 39*> \verbatim 40*> 41*> CLAQR5 called by CLAQR0 performs a 42*> single small-bulge multi-shift QR sweep. 43*> \endverbatim 44* 45* Arguments: 46* ========== 47* 48*> \param[in] WANTT 49*> \verbatim 50*> WANTT is logical scalar 51*> WANTT = .true. if the triangular Schur factor 52*> is being computed. WANTT is set to .false. otherwise. 53*> \endverbatim 54*> 55*> \param[in] WANTZ 56*> \verbatim 57*> WANTZ is logical scalar 58*> WANTZ = .true. if the unitary Schur factor is being 59*> computed. WANTZ is set to .false. otherwise. 60*> \endverbatim 61*> 62*> \param[in] KACC22 63*> \verbatim 64*> KACC22 is integer with value 0, 1, or 2. 65*> Specifies the computation mode of far-from-diagonal 66*> orthogonal updates. 67*> = 0: CLAQR5 does not accumulate reflections and does not 68*> use matrix-matrix multiply to update far-from-diagonal 69*> matrix entries. 70*> = 1: CLAQR5 accumulates reflections and uses matrix-matrix 71*> multiply to update the far-from-diagonal matrix entries. 72*> = 2: CLAQR5 accumulates reflections, uses matrix-matrix 73*> multiply to update the far-from-diagonal matrix entries, 74*> and takes advantage of 2-by-2 block structure during 75*> matrix multiplies. 76*> \endverbatim 77*> 78*> \param[in] N 79*> \verbatim 80*> N is integer scalar 81*> N is the order of the Hessenberg matrix H upon which this 82*> subroutine operates. 83*> \endverbatim 84*> 85*> \param[in] KTOP 86*> \verbatim 87*> KTOP is integer scalar 88*> \endverbatim 89*> 90*> \param[in] KBOT 91*> \verbatim 92*> KBOT is integer scalar 93*> These are the first and last rows and columns of an 94*> isolated diagonal block upon which the QR sweep is to be 95*> applied. It is assumed without a check that 96*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 97*> and 98*> either KBOT = N or H(KBOT+1,KBOT) = 0. 99*> \endverbatim 100*> 101*> \param[in] NSHFTS 102*> \verbatim 103*> NSHFTS is integer scalar 104*> NSHFTS gives the number of simultaneous shifts. NSHFTS 105*> must be positive and even. 106*> \endverbatim 107*> 108*> \param[in,out] S 109*> \verbatim 110*> S is COMPLEX array of size (NSHFTS) 111*> S contains the shifts of origin that define the multi- 112*> shift QR sweep. On output S may be reordered. 113*> \endverbatim 114*> 115*> \param[in,out] H 116*> \verbatim 117*> H is COMPLEX array of size (LDH,N) 118*> On input H contains a Hessenberg matrix. On output a 119*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied 120*> to the isolated diagonal block in rows and columns KTOP 121*> through KBOT. 122*> \endverbatim 123*> 124*> \param[in] LDH 125*> \verbatim 126*> LDH is integer scalar 127*> LDH is the leading dimension of H just as declared in the 128*> calling procedure. LDH.GE.MAX(1,N). 129*> \endverbatim 130*> 131*> \param[in] ILOZ 132*> \verbatim 133*> ILOZ is INTEGER 134*> \endverbatim 135*> 136*> \param[in] IHIZ 137*> \verbatim 138*> IHIZ is INTEGER 139*> Specify the rows of Z to which transformations must be 140*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N 141*> \endverbatim 142*> 143*> \param[in,out] Z 144*> \verbatim 145*> Z is COMPLEX array of size (LDZ,IHI) 146*> If WANTZ = .TRUE., then the QR Sweep unitary 147*> similarity transformation is accumulated into 148*> Z(ILOZ:IHIZ,ILO:IHI) from the right. 149*> If WANTZ = .FALSE., then Z is unreferenced. 150*> \endverbatim 151*> 152*> \param[in] LDZ 153*> \verbatim 154*> LDZ is integer scalar 155*> LDA is the leading dimension of Z just as declared in 156*> the calling procedure. LDZ.GE.N. 157*> \endverbatim 158*> 159*> \param[out] V 160*> \verbatim 161*> V is COMPLEX array of size (LDV,NSHFTS/2) 162*> \endverbatim 163*> 164*> \param[in] LDV 165*> \verbatim 166*> LDV is integer scalar 167*> LDV is the leading dimension of V as declared in the 168*> calling procedure. LDV.GE.3. 169*> \endverbatim 170*> 171*> \param[out] U 172*> \verbatim 173*> U is COMPLEX array of size 174*> (LDU,3*NSHFTS-3) 175*> \endverbatim 176*> 177*> \param[in] LDU 178*> \verbatim 179*> LDU is integer scalar 180*> LDU is the leading dimension of U just as declared in the 181*> in the calling subroutine. LDU.GE.3*NSHFTS-3. 182*> \endverbatim 183*> 184*> \param[in] NH 185*> \verbatim 186*> NH is integer scalar 187*> NH is the number of columns in array WH available for 188*> workspace. NH.GE.1. 189*> \endverbatim 190*> 191*> \param[out] WH 192*> \verbatim 193*> WH is COMPLEX array of size (LDWH,NH) 194*> \endverbatim 195*> 196*> \param[in] LDWH 197*> \verbatim 198*> LDWH is integer scalar 199*> Leading dimension of WH just as declared in the 200*> calling procedure. LDWH.GE.3*NSHFTS-3. 201*> \endverbatim 202*> 203*> \param[in] NV 204*> \verbatim 205*> NV is integer scalar 206*> NV is the number of rows in WV agailable for workspace. 207*> NV.GE.1. 208*> \endverbatim 209*> 210*> \param[out] WV 211*> \verbatim 212*> WV is COMPLEX array of size 213*> (LDWV,3*NSHFTS-3) 214*> \endverbatim 215*> 216*> \param[in] LDWV 217*> \verbatim 218*> LDWV is integer scalar 219*> LDWV is the leading dimension of WV as declared in the 220*> in the calling subroutine. LDWV.GE.NV. 221*> \endverbatim 222* 223* Authors: 224* ======== 225* 226*> \author Univ. of Tennessee 227*> \author Univ. of California Berkeley 228*> \author Univ. of Colorado Denver 229*> \author NAG Ltd. 230* 231*> \date September 2012 232* 233*> \ingroup complexOTHERauxiliary 234* 235*> \par Contributors: 236* ================== 237*> 238*> Karen Braman and Ralph Byers, Department of Mathematics, 239*> University of Kansas, USA 240* 241*> \par References: 242* ================ 243*> 244*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR 245*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 246*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages 247*> 929--947, 2002. 248*> 249* ===================================================================== 250 SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, 251 $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, 252 $ WV, LDWV, NH, WH, LDWH ) 253* 254* -- LAPACK auxiliary routine (version 3.4.2) -- 255* -- LAPACK is a software package provided by Univ. of Tennessee, -- 256* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 257* September 2012 258* 259* .. Scalar Arguments .. 260 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, 261 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV 262 LOGICAL WANTT, WANTZ 263* .. 264* .. Array Arguments .. 265 COMPLEX H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), 266 $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) 267* .. 268* 269* ================================================================ 270* .. Parameters .. 271 COMPLEX ZERO, ONE 272 PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), 273 $ ONE = ( 1.0e0, 0.0e0 ) ) 274 REAL RZERO, RONE 275 PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) 276* .. 277* .. Local Scalars .. 278 COMPLEX ALPHA, BETA, CDUM, REFSUM 279 REAL H11, H12, H21, H22, SAFMAX, SAFMIN, SCL, 280 $ SMLNUM, TST1, TST2, ULP 281 INTEGER I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, 282 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, 283 $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, 284 $ NS, NU 285 LOGICAL ACCUM, BLK22, BMP22 286* .. 287* .. External Functions .. 288 REAL SLAMCH 289 EXTERNAL SLAMCH 290* .. 291* .. Intrinsic Functions .. 292* 293 INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL 294* .. 295* .. Local Arrays .. 296 COMPLEX VT( 3 ) 297* .. 298* .. External Subroutines .. 299 EXTERNAL CGEMM, CLACPY, CLAQR1, CLARFG, CLASET, CTRMM, 300 $ SLABAD 301* .. 302* .. Statement Functions .. 303 REAL CABS1 304* .. 305* .. Statement Function definitions .. 306 CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) 307* .. 308* .. Executable Statements .. 309* 310* ==== If there are no shifts, then there is nothing to do. ==== 311* 312 IF( NSHFTS.LT.2 ) 313 $ RETURN 314* 315* ==== If the active block is empty or 1-by-1, then there 316* . is nothing to do. ==== 317* 318 IF( KTOP.GE.KBOT ) 319 $ RETURN 320* 321* ==== NSHFTS is supposed to be even, but if it is odd, 322* . then simply reduce it by one. ==== 323* 324 NS = NSHFTS - MOD( NSHFTS, 2 ) 325* 326* ==== Machine constants for deflation ==== 327* 328 SAFMIN = SLAMCH( 'SAFE MINIMUM' ) 329 SAFMAX = RONE / SAFMIN 330 CALL SLABAD( SAFMIN, SAFMAX ) 331 ULP = SLAMCH( 'PRECISION' ) 332 SMLNUM = SAFMIN*( REAL( N ) / ULP ) 333* 334* ==== Use accumulated reflections to update far-from-diagonal 335* . entries ? ==== 336* 337 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) 338* 339* ==== If so, exploit the 2-by-2 block structure? ==== 340* 341 BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) 342* 343* ==== clear trash ==== 344* 345 IF( KTOP+2.LE.KBOT ) 346 $ H( KTOP+2, KTOP ) = ZERO 347* 348* ==== NBMPS = number of 2-shift bulges in the chain ==== 349* 350 NBMPS = NS / 2 351* 352* ==== KDU = width of slab ==== 353* 354 KDU = 6*NBMPS - 3 355* 356* ==== Create and chase chains of NBMPS bulges ==== 357* 358 DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 359 NDCOL = INCOL + KDU 360 IF( ACCUM ) 361 $ CALL CLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) 362* 363* ==== Near-the-diagonal bulge chase. The following loop 364* . performs the near-the-diagonal part of a small bulge 365* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal 366* . chunk extends from column INCOL to column NDCOL 367* . (including both column INCOL and column NDCOL). The 368* . following loop chases a 3*NBMPS column long chain of 369* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL 370* . may be less than KTOP and and NDCOL may be greater than 371* . KBOT indicating phantom columns from which to chase 372* . bulges before they are actually introduced or to which 373* . to chase bulges beyond column KBOT.) ==== 374* 375 DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) 376* 377* ==== Bulges number MTOP to MBOT are active double implicit 378* . shift bulges. There may or may not also be small 379* . 2-by-2 bulge, if there is room. The inactive bulges 380* . (if any) must wait until the active bulges have moved 381* . down the diagonal to make room. The phantom matrix 382* . paradigm described above helps keep track. ==== 383* 384 MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) 385 MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) 386 M22 = MBOT + 1 387 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. 388 $ ( KBOT-2 ) 389* 390* ==== Generate reflections to chase the chain right 391* . one column. (The minimum value of K is KTOP-1.) ==== 392* 393 DO 10 M = MTOP, MBOT 394 K = KRCOL + 3*( M-1 ) 395 IF( K.EQ.KTOP-1 ) THEN 396 CALL CLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ), 397 $ S( 2*M ), V( 1, M ) ) 398 ALPHA = V( 1, M ) 399 CALL CLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) 400 ELSE 401 BETA = H( K+1, K ) 402 V( 2, M ) = H( K+2, K ) 403 V( 3, M ) = H( K+3, K ) 404 CALL CLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) 405* 406* ==== A Bulge may collapse because of vigilant 407* . deflation or destructive underflow. In the 408* . underflow case, try the two-small-subdiagonals 409* . trick to try to reinflate the bulge. ==== 410* 411 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. 412 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN 413* 414* ==== Typical case: not collapsed (yet). ==== 415* 416 H( K+1, K ) = BETA 417 H( K+2, K ) = ZERO 418 H( K+3, K ) = ZERO 419 ELSE 420* 421* ==== Atypical case: collapsed. Attempt to 422* . reintroduce ignoring H(K+1,K) and H(K+2,K). 423* . If the fill resulting from the new 424* . reflector is too large, then abandon it. 425* . Otherwise, use the new one. ==== 426* 427 CALL CLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ), 428 $ S( 2*M ), VT ) 429 ALPHA = VT( 1 ) 430 CALL CLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) 431 REFSUM = CONJG( VT( 1 ) )* 432 $ ( H( K+1, K )+CONJG( VT( 2 ) )* 433 $ H( K+2, K ) ) 434* 435 IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ 436 $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* 437 $ ( CABS1( H( K, K ) )+CABS1( H( K+1, 438 $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN 439* 440* ==== Starting a new bulge here would 441* . create non-negligible fill. Use 442* . the old one with trepidation. ==== 443* 444 H( K+1, K ) = BETA 445 H( K+2, K ) = ZERO 446 H( K+3, K ) = ZERO 447 ELSE 448* 449* ==== Stating a new bulge here would 450* . create only negligible fill. 451* . Replace the old reflector with 452* . the new one. ==== 453* 454 H( K+1, K ) = H( K+1, K ) - REFSUM 455 H( K+2, K ) = ZERO 456 H( K+3, K ) = ZERO 457 V( 1, M ) = VT( 1 ) 458 V( 2, M ) = VT( 2 ) 459 V( 3, M ) = VT( 3 ) 460 END IF 461 END IF 462 END IF 463 10 CONTINUE 464* 465* ==== Generate a 2-by-2 reflection, if needed. ==== 466* 467 K = KRCOL + 3*( M22-1 ) 468 IF( BMP22 ) THEN 469 IF( K.EQ.KTOP-1 ) THEN 470 CALL CLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ), 471 $ S( 2*M22 ), V( 1, M22 ) ) 472 BETA = V( 1, M22 ) 473 CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 474 ELSE 475 BETA = H( K+1, K ) 476 V( 2, M22 ) = H( K+2, K ) 477 CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) 478 H( K+1, K ) = BETA 479 H( K+2, K ) = ZERO 480 END IF 481 END IF 482* 483* ==== Multiply H by reflections from the left ==== 484* 485 IF( ACCUM ) THEN 486 JBOT = MIN( NDCOL, KBOT ) 487 ELSE IF( WANTT ) THEN 488 JBOT = N 489 ELSE 490 JBOT = KBOT 491 END IF 492 DO 30 J = MAX( KTOP, KRCOL ), JBOT 493 MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) 494 DO 20 M = MTOP, MEND 495 K = KRCOL + 3*( M-1 ) 496 REFSUM = CONJG( V( 1, M ) )* 497 $ ( H( K+1, J )+CONJG( V( 2, M ) )*H( K+2, J )+ 498 $ CONJG( V( 3, M ) )*H( K+3, J ) ) 499 H( K+1, J ) = H( K+1, J ) - REFSUM 500 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) 501 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) 502 20 CONTINUE 503 30 CONTINUE 504 IF( BMP22 ) THEN 505 K = KRCOL + 3*( M22-1 ) 506 DO 40 J = MAX( K+1, KTOP ), JBOT 507 REFSUM = CONJG( V( 1, M22 ) )* 508 $ ( H( K+1, J )+CONJG( V( 2, M22 ) )* 509 $ H( K+2, J ) ) 510 H( K+1, J ) = H( K+1, J ) - REFSUM 511 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) 512 40 CONTINUE 513 END IF 514* 515* ==== Multiply H by reflections from the right. 516* . Delay filling in the last row until the 517* . vigilant deflation check is complete. ==== 518* 519 IF( ACCUM ) THEN 520 JTOP = MAX( KTOP, INCOL ) 521 ELSE IF( WANTT ) THEN 522 JTOP = 1 523 ELSE 524 JTOP = KTOP 525 END IF 526 DO 80 M = MTOP, MBOT 527 IF( V( 1, M ).NE.ZERO ) THEN 528 K = KRCOL + 3*( M-1 ) 529 DO 50 J = JTOP, MIN( KBOT, K+3 ) 530 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* 531 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) 532 H( J, K+1 ) = H( J, K+1 ) - REFSUM 533 H( J, K+2 ) = H( J, K+2 ) - 534 $ REFSUM*CONJG( V( 2, M ) ) 535 H( J, K+3 ) = H( J, K+3 ) - 536 $ REFSUM*CONJG( V( 3, M ) ) 537 50 CONTINUE 538* 539 IF( ACCUM ) THEN 540* 541* ==== Accumulate U. (If necessary, update Z later 542* . with with an efficient matrix-matrix 543* . multiply.) ==== 544* 545 KMS = K - INCOL 546 DO 60 J = MAX( 1, KTOP-INCOL ), KDU 547 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* 548 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) 549 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 550 U( J, KMS+2 ) = U( J, KMS+2 ) - 551 $ REFSUM*CONJG( V( 2, M ) ) 552 U( J, KMS+3 ) = U( J, KMS+3 ) - 553 $ REFSUM*CONJG( V( 3, M ) ) 554 60 CONTINUE 555 ELSE IF( WANTZ ) THEN 556* 557* ==== U is not accumulated, so update Z 558* . now by multiplying by reflections 559* . from the right. ==== 560* 561 DO 70 J = ILOZ, IHIZ 562 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* 563 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) 564 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 565 Z( J, K+2 ) = Z( J, K+2 ) - 566 $ REFSUM*CONJG( V( 2, M ) ) 567 Z( J, K+3 ) = Z( J, K+3 ) - 568 $ REFSUM*CONJG( V( 3, M ) ) 569 70 CONTINUE 570 END IF 571 END IF 572 80 CONTINUE 573* 574* ==== Special case: 2-by-2 reflection (if needed) ==== 575* 576 K = KRCOL + 3*( M22-1 ) 577 IF( BMP22 ) THEN 578 IF ( V( 1, M22 ).NE.ZERO ) THEN 579 DO 90 J = JTOP, MIN( KBOT, K+3 ) 580 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* 581 $ H( J, K+2 ) ) 582 H( J, K+1 ) = H( J, K+1 ) - REFSUM 583 H( J, K+2 ) = H( J, K+2 ) - 584 $ REFSUM*CONJG( V( 2, M22 ) ) 585 90 CONTINUE 586* 587 IF( ACCUM ) THEN 588 KMS = K - INCOL 589 DO 100 J = MAX( 1, KTOP-INCOL ), KDU 590 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ 591 $ V( 2, M22 )*U( J, KMS+2 ) ) 592 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM 593 U( J, KMS+2 ) = U( J, KMS+2 ) - 594 $ REFSUM*CONJG( V( 2, M22 ) ) 595 100 CONTINUE 596 ELSE IF( WANTZ ) THEN 597 DO 110 J = ILOZ, IHIZ 598 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* 599 $ Z( J, K+2 ) ) 600 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM 601 Z( J, K+2 ) = Z( J, K+2 ) - 602 $ REFSUM*CONJG( V( 2, M22 ) ) 603 110 CONTINUE 604 END IF 605 END IF 606 END IF 607* 608* ==== Vigilant deflation check ==== 609* 610 MSTART = MTOP 611 IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) 612 $ MSTART = MSTART + 1 613 MEND = MBOT 614 IF( BMP22 ) 615 $ MEND = MEND + 1 616 IF( KRCOL.EQ.KBOT-2 ) 617 $ MEND = MEND + 1 618 DO 120 M = MSTART, MEND 619 K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) 620* 621* ==== The following convergence test requires that 622* . the tradition small-compared-to-nearby-diagonals 623* . criterion and the Ahues & Tisseur (LAWN 122, 1997) 624* . criteria both be satisfied. The latter improves 625* . accuracy in some examples. Falling back on an 626* . alternate convergence criterion when TST1 or TST2 627* . is zero (as done here) is traditional but probably 628* . unnecessary. ==== 629* 630 IF( H( K+1, K ).NE.ZERO ) THEN 631 TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) ) 632 IF( TST1.EQ.RZERO ) THEN 633 IF( K.GE.KTOP+1 ) 634 $ TST1 = TST1 + CABS1( H( K, K-1 ) ) 635 IF( K.GE.KTOP+2 ) 636 $ TST1 = TST1 + CABS1( H( K, K-2 ) ) 637 IF( K.GE.KTOP+3 ) 638 $ TST1 = TST1 + CABS1( H( K, K-3 ) ) 639 IF( K.LE.KBOT-2 ) 640 $ TST1 = TST1 + CABS1( H( K+2, K+1 ) ) 641 IF( K.LE.KBOT-3 ) 642 $ TST1 = TST1 + CABS1( H( K+3, K+1 ) ) 643 IF( K.LE.KBOT-4 ) 644 $ TST1 = TST1 + CABS1( H( K+4, K+1 ) ) 645 END IF 646 IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) 647 $ THEN 648 H12 = MAX( CABS1( H( K+1, K ) ), 649 $ CABS1( H( K, K+1 ) ) ) 650 H21 = MIN( CABS1( H( K+1, K ) ), 651 $ CABS1( H( K, K+1 ) ) ) 652 H11 = MAX( CABS1( H( K+1, K+1 ) ), 653 $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) 654 H22 = MIN( CABS1( H( K+1, K+1 ) ), 655 $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) 656 SCL = H11 + H12 657 TST2 = H22*( H11 / SCL ) 658* 659 IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE. 660 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO 661 END IF 662 END IF 663 120 CONTINUE 664* 665* ==== Fill in the last row of each bulge. ==== 666* 667 MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) 668 DO 130 M = MTOP, MEND 669 K = KRCOL + 3*( M-1 ) 670 REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) 671 H( K+4, K+1 ) = -REFSUM 672 H( K+4, K+2 ) = -REFSUM*CONJG( V( 2, M ) ) 673 H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*CONJG( V( 3, M ) ) 674 130 CONTINUE 675* 676* ==== End of near-the-diagonal bulge chase. ==== 677* 678 140 CONTINUE 679* 680* ==== Use U (if accumulated) to update far-from-diagonal 681* . entries in H. If required, use U to update Z as 682* . well. ==== 683* 684 IF( ACCUM ) THEN 685 IF( WANTT ) THEN 686 JTOP = 1 687 JBOT = N 688 ELSE 689 JTOP = KTOP 690 JBOT = KBOT 691 END IF 692 IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. 693 $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN 694* 695* ==== Updates not exploiting the 2-by-2 block 696* . structure of U. K1 and NU keep track of 697* . the location and size of U in the special 698* . cases of introducing bulges and chasing 699* . bulges off the bottom. In these special 700* . cases and in case the number of shifts 701* . is NS = 2, there is no 2-by-2 block 702* . structure to exploit. ==== 703* 704 K1 = MAX( 1, KTOP-INCOL ) 705 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 706* 707* ==== Horizontal Multiply ==== 708* 709 DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 710 JLEN = MIN( NH, JBOT-JCOL+1 ) 711 CALL CGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), 712 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, 713 $ LDWH ) 714 CALL CLACPY( 'ALL', NU, JLEN, WH, LDWH, 715 $ H( INCOL+K1, JCOL ), LDH ) 716 150 CONTINUE 717* 718* ==== Vertical multiply ==== 719* 720 DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV 721 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) 722 CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE, 723 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), 724 $ LDU, ZERO, WV, LDWV ) 725 CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV, 726 $ H( JROW, INCOL+K1 ), LDH ) 727 160 CONTINUE 728* 729* ==== Z multiply (also vertical) ==== 730* 731 IF( WANTZ ) THEN 732 DO 170 JROW = ILOZ, IHIZ, NV 733 JLEN = MIN( NV, IHIZ-JROW+1 ) 734 CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE, 735 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), 736 $ LDU, ZERO, WV, LDWV ) 737 CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV, 738 $ Z( JROW, INCOL+K1 ), LDZ ) 739 170 CONTINUE 740 END IF 741 ELSE 742* 743* ==== Updates exploiting U's 2-by-2 block structure. 744* . (I2, I4, J2, J4 are the last rows and columns 745* . of the blocks.) ==== 746* 747 I2 = ( KDU+1 ) / 2 748 I4 = KDU 749 J2 = I4 - I2 750 J4 = KDU 751* 752* ==== KZS and KNZ deal with the band of zeros 753* . along the diagonal of one of the triangular 754* . blocks. ==== 755* 756 KZS = ( J4-J2 ) - ( NS+1 ) 757 KNZ = NS + 1 758* 759* ==== Horizontal multiply ==== 760* 761 DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH 762 JLEN = MIN( NH, JBOT-JCOL+1 ) 763* 764* ==== Copy bottom of H to top+KZS of scratch ==== 765* (The first KZS rows get multiplied by zero.) ==== 766* 767 CALL CLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), 768 $ LDH, WH( KZS+1, 1 ), LDWH ) 769* 770* ==== Multiply by U21**H ==== 771* 772 CALL CLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) 773 CALL CTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, 774 $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), 775 $ LDWH ) 776* 777* ==== Multiply top of H by U11**H ==== 778* 779 CALL CGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, 780 $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) 781* 782* ==== Copy top of H to bottom of WH ==== 783* 784 CALL CLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, 785 $ WH( I2+1, 1 ), LDWH ) 786* 787* ==== Multiply by U21**H ==== 788* 789 CALL CTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, 790 $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) 791* 792* ==== Multiply by U22 ==== 793* 794 CALL CGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, 795 $ U( J2+1, I2+1 ), LDU, 796 $ H( INCOL+1+J2, JCOL ), LDH, ONE, 797 $ WH( I2+1, 1 ), LDWH ) 798* 799* ==== Copy it back ==== 800* 801 CALL CLACPY( 'ALL', KDU, JLEN, WH, LDWH, 802 $ H( INCOL+1, JCOL ), LDH ) 803 180 CONTINUE 804* 805* ==== Vertical multiply ==== 806* 807 DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV 808 JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) 809* 810* ==== Copy right of H to scratch (the first KZS 811* . columns get multiplied by zero) ==== 812* 813 CALL CLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), 814 $ LDH, WV( 1, 1+KZS ), LDWV ) 815* 816* ==== Multiply by U21 ==== 817* 818 CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) 819 CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, 820 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), 821 $ LDWV ) 822* 823* ==== Multiply by U11 ==== 824* 825 CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE, 826 $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, 827 $ LDWV ) 828* 829* ==== Copy left of H to right of scratch ==== 830* 831 CALL CLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, 832 $ WV( 1, 1+I2 ), LDWV ) 833* 834* ==== Multiply by U21 ==== 835* 836 CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, 837 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) 838* 839* ==== Multiply by U22 ==== 840* 841 CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, 842 $ H( JROW, INCOL+1+J2 ), LDH, 843 $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), 844 $ LDWV ) 845* 846* ==== Copy it back ==== 847* 848 CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV, 849 $ H( JROW, INCOL+1 ), LDH ) 850 190 CONTINUE 851* 852* ==== Multiply Z (also vertical) ==== 853* 854 IF( WANTZ ) THEN 855 DO 200 JROW = ILOZ, IHIZ, NV 856 JLEN = MIN( NV, IHIZ-JROW+1 ) 857* 858* ==== Copy right of Z to left of scratch (first 859* . KZS columns get multiplied by zero) ==== 860* 861 CALL CLACPY( 'ALL', JLEN, KNZ, 862 $ Z( JROW, INCOL+1+J2 ), LDZ, 863 $ WV( 1, 1+KZS ), LDWV ) 864* 865* ==== Multiply by U12 ==== 866* 867 CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, 868 $ LDWV ) 869 CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, 870 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), 871 $ LDWV ) 872* 873* ==== Multiply by U11 ==== 874* 875 CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE, 876 $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, 877 $ WV, LDWV ) 878* 879* ==== Copy left of Z to right of scratch ==== 880* 881 CALL CLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), 882 $ LDZ, WV( 1, 1+I2 ), LDWV ) 883* 884* ==== Multiply by U21 ==== 885* 886 CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, 887 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), 888 $ LDWV ) 889* 890* ==== Multiply by U22 ==== 891* 892 CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, 893 $ Z( JROW, INCOL+1+J2 ), LDZ, 894 $ U( J2+1, I2+1 ), LDU, ONE, 895 $ WV( 1, 1+I2 ), LDWV ) 896* 897* ==== Copy the result back to Z ==== 898* 899 CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV, 900 $ Z( JROW, INCOL+1 ), LDZ ) 901 200 CONTINUE 902 END IF 903 END IF 904 END IF 905 210 CONTINUE 906* 907* ==== End of CLAQR5 ==== 908* 909 END 910