1c\BeginDoc 2c 3c\Name: cnapps 4c 5c\Description: 6c Given the Arnoldi factorization 7c 8c A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T, 9c 10c apply NP implicit shifts resulting in 11c 12c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q 13c 14c where Q is an orthogonal matrix which is the product of rotations 15c and reflections resulting from the NP bulge change sweeps. 16c The updated Arnoldi factorization becomes: 17c 18c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T. 19c 20c\Usage: 21c call cnapps 22c ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, 23c WORKL, WORKD ) 24c 25c\Arguments 26c N Integer. (INPUT) 27c Problem size, i.e. size of matrix A. 28c 29c KEV Integer. (INPUT/OUTPUT) 30c KEV+NP is the size of the input matrix H. 31c KEV is the size of the updated matrix HNEW. 32c 33c NP Integer. (INPUT) 34c Number of implicit shifts to be applied. 35c 36c SHIFT Complex array of length NP. (INPUT) 37c The shifts to be applied. 38c 39c V Complex N by (KEV+NP) array. (INPUT/OUTPUT) 40c On INPUT, V contains the current KEV+NP Arnoldi vectors. 41c On OUTPUT, V contains the updated KEV Arnoldi vectors 42c in the first KEV columns of V. 43c 44c LDV Integer. (INPUT) 45c Leading dimension of V exactly as declared in the calling 46c program. 47c 48c H Complex (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT) 49c On INPUT, H contains the current KEV+NP by KEV+NP upper 50c Hessenberg matrix of the Arnoldi factorization. 51c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg 52c matrix in the KEV leading submatrix. 53c 54c LDH Integer. (INPUT) 55c Leading dimension of H exactly as declared in the calling 56c program. 57c 58c RESID Complex array of length N. (INPUT/OUTPUT) 59c On INPUT, RESID contains the the residual vector r_{k+p}. 60c On OUTPUT, RESID is the update residual vector rnew_{k} 61c in the first KEV locations. 62c 63c Q Complex KEV+NP by KEV+NP work array. (WORKSPACE) 64c Work array used to accumulate the rotations and reflections 65c during the bulge chase sweep. 66c 67c LDQ Integer. (INPUT) 68c Leading dimension of Q exactly as declared in the calling 69c program. 70c 71c WORKL Complex work array of length (KEV+NP). (WORKSPACE) 72c Private (replicated) array on each PE or array allocated on 73c the front end. 74c 75c WORKD Complex work array of length 2*N. (WORKSPACE) 76c Distributed array used in the application of the accumulated 77c orthogonal matrix Q. 78c 79c\EndDoc 80c 81c----------------------------------------------------------------------- 82c 83c\BeginLib 84c 85c\Local variables: 86c xxxxxx Complex 87c 88c\References: 89c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in 90c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992), 91c pp 357-385. 92c 93c\Routines called: 94c ivout ARPACK utility routine that prints integers. 95c second ARPACK utility routine for timing. 96c cmout ARPACK utility routine that prints matrices 97c cvout ARPACK utility routine that prints vectors. 98c clacpy LAPACK matrix copy routine. 99c clanhs LAPACK routine that computes various norms of a matrix. 100c clartg LAPACK Givens rotation construction routine. 101c claset LAPACK matrix initialization routine. 102c slabad LAPACK routine for defining the underflow and overflow 103c limits. 104c slamch LAPACK routine that determines machine constants. 105c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully. 106c cgemv Level 2 BLAS routine for matrix vector multiplication. 107c caxpy Level 1 BLAS that computes a vector triad. 108c ccopy Level 1 BLAS that copies one vector to another. 109c cscal Level 1 BLAS that scales a vector. 110c 111c\Author 112c Danny Sorensen Phuong Vu 113c Richard Lehoucq CRPC / Rice University 114c Dept. of Computational & Houston, Texas 115c Applied Mathematics 116c Rice University 117c Houston, Texas 118c 119c\SCCS Information: @(#) 120c FILE: napps.F SID: 2.2 DATE OF SID: 4/20/96 RELEASE: 2 121c 122c\Remarks 123c 1. In this version, each shift is applied to all the sublocks of 124c the Hessenberg matrix H and not just to the submatrix that it 125c comes from. Deflation as in LAPACK routine clahqr (QR algorithm 126c for upper Hessenberg matrices ) is used. 127c Upon output, the subdiagonals of H are enforced to be non-negative 128c real numbers. 129c 130c\EndLib 131c 132c----------------------------------------------------------------------- 133c 134 subroutine cnapps 135 & ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, 136 & workl, workd ) 137c 138c %----------------------------------------------------% 139c | Include files for debugging and timing information | 140c %----------------------------------------------------% 141c 142 include 'debug.h' 143 include 'stat.h' 144c 145c %------------------% 146c | Scalar Arguments | 147c %------------------% 148c 149 integer kev, ldh, ldq, ldv, n, np 150c 151c %-----------------% 152c | Array Arguments | 153c %-----------------% 154c 155 Complex 156 & h(ldh,kev+np), resid(n), shift(np), 157 & v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np) 158c 159c %------------% 160c | Parameters | 161c %------------% 162c 163 Complex 164 & one, zero 165 Real 166 & rzero 167 parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0), 168 & rzero = 0.0E+0) 169c 170c %------------------------% 171c | Local Scalars & Arrays | 172c %------------------------% 173c 174 integer i, iend, istart, j, jj, kplusp, msglvl 175 logical first 176 Complex 177 & cdum, f, g, h11, h21, r, s, sigma, t 178 Real 179 & c, ovfl, smlnum, ulp, unfl, tst1 180 save first, ovfl, smlnum, ulp, unfl 181c 182c %----------------------% 183c | External Subroutines | 184c %----------------------% 185c 186 external caxpy, ccopy, cgemv, cscal, clacpy, clartg, 187 & cvout, claset, slabad, cmout, second, ivout 188c 189c %--------------------% 190c | External Functions | 191c %--------------------% 192c 193 Real 194 & clanhs, slamch, slapy2 195 external clanhs, slamch, slapy2 196c 197c %----------------------% 198c | Intrinsics Functions | 199c %----------------------% 200c 201 intrinsic abs, aimag, conjg, cmplx, max, min, real 202c 203c %---------------------% 204c | Statement Functions | 205c %---------------------% 206c 207 Real 208 & cabs1 209 cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) ) 210c 211c %----------------% 212c | Data statments | 213c %----------------% 214c 215 data first / .true. / 216c 217c %-----------------------% 218c | Executable Statements | 219c %-----------------------% 220c 221 if (first) then 222c 223c %-----------------------------------------------% 224c | Set machine-dependent constants for the | 225c | stopping criterion. If norm(H) <= sqrt(OVFL), | 226c | overflow should not occur. | 227c | REFERENCE: LAPACK subroutine clahqr | 228c %-----------------------------------------------% 229c 230 unfl = slamch( 'safe minimum' ) 231 ovfl = real(one / unfl) 232 call slabad( unfl, ovfl ) 233 ulp = slamch( 'precision' ) 234 smlnum = unfl*( n / ulp ) 235 first = .false. 236 end if 237c 238c %-------------------------------% 239c | Initialize timing statistics | 240c | & message level for debugging | 241c %-------------------------------% 242c 243 call second (t0) 244 msglvl = mcapps 245c 246 kplusp = kev + np 247c 248c %--------------------------------------------% 249c | Initialize Q to the identity to accumulate | 250c | the rotations and reflections | 251c %--------------------------------------------% 252c 253 call claset ('All', kplusp, kplusp, zero, one, q, ldq) 254c 255c %----------------------------------------------% 256c | Quick return if there are no shifts to apply | 257c %----------------------------------------------% 258c 259 if (np .eq. 0) go to 9000 260c 261c %----------------------------------------------% 262c | Chase the bulge with the application of each | 263c | implicit shift. Each shift is applied to the | 264c | whole matrix including each block. | 265c %----------------------------------------------% 266c 267 do 110 jj = 1, np 268 sigma = shift(jj) 269c 270 if (msglvl .gt. 2 ) then 271 call ivout (logfil, 1, jj, ndigit, 272 & '_napps: shift number.') 273 call cvout (logfil, 1, sigma, ndigit, 274 & '_napps: Value of the shift ') 275 end if 276c 277 istart = 1 278 20 continue 279c 280 do 30 i = istart, kplusp-1 281c 282c %----------------------------------------% 283c | Check for splitting and deflation. Use | 284c | a standard test as in the QR algorithm | 285c | REFERENCE: LAPACK subroutine clahqr | 286c %----------------------------------------% 287c 288 tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) ) 289 if( tst1.eq.rzero ) 290 & tst1 = clanhs( '1', kplusp-jj+1, h, ldh, workl ) 291 if ( abs(real(h(i+1,i))) 292 & .le. max(ulp*tst1, smlnum) ) then 293 if (msglvl .gt. 0) then 294 call ivout (logfil, 1, i, ndigit, 295 & '_napps: matrix splitting at row/column no.') 296 call ivout (logfil, 1, jj, ndigit, 297 & '_napps: matrix splitting with shift number.') 298 call cvout (logfil, 1, h(i+1,i), ndigit, 299 & '_napps: off diagonal element.') 300 end if 301 iend = i 302 h(i+1,i) = zero 303 go to 40 304 end if 305 30 continue 306 iend = kplusp 307 40 continue 308c 309 if (msglvl .gt. 2) then 310 call ivout (logfil, 1, istart, ndigit, 311 & '_napps: Start of current block ') 312 call ivout (logfil, 1, iend, ndigit, 313 & '_napps: End of current block ') 314 end if 315c 316c %------------------------------------------------% 317c | No reason to apply a shift to block of order 1 | 318c | or if the current block starts after the point | 319c | of compression since we'll discard this stuff | 320c %------------------------------------------------% 321c 322 if ( istart .eq. iend .or. istart .gt. kev) go to 100 323c 324 h11 = h(istart,istart) 325 h21 = h(istart+1,istart) 326 f = h11 - sigma 327 g = h21 328c 329 do 80 i = istart, iend-1 330c 331c %------------------------------------------------------% 332c | Construct the plane rotation G to zero out the bulge | 333c %------------------------------------------------------% 334c 335 call clartg (f, g, c, s, r) 336 if (i .gt. istart) then 337 h(i,i-1) = r 338 h(i+1,i-1) = zero 339 end if 340c 341c %---------------------------------------------% 342c | Apply rotation to the left of H; H <- G'*H | 343c %---------------------------------------------% 344c 345 do 50 j = i, kplusp 346 t = c*h(i,j) + s*h(i+1,j) 347 h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j) 348 h(i,j) = t 349 50 continue 350c 351c %---------------------------------------------% 352c | Apply rotation to the right of H; H <- H*G | 353c %---------------------------------------------% 354c 355 do 60 j = 1, min(i+2,iend) 356 t = c*h(j,i) + conjg(s)*h(j,i+1) 357 h(j,i+1) = -s*h(j,i) + c*h(j,i+1) 358 h(j,i) = t 359 60 continue 360c 361c %-----------------------------------------------------% 362c | Accumulate the rotation in the matrix Q; Q <- Q*G' | 363c %-----------------------------------------------------% 364c 365 do 70 j = 1, min(j+jj, kplusp) 366 t = c*q(j,i) + conjg(s)*q(j,i+1) 367 q(j,i+1) = - s*q(j,i) + c*q(j,i+1) 368 q(j,i) = t 369 70 continue 370c 371c %---------------------------% 372c | Prepare for next rotation | 373c %---------------------------% 374c 375 if (i .lt. iend-1) then 376 f = h(i+1,i) 377 g = h(i+2,i) 378 end if 379 80 continue 380c 381c %-------------------------------% 382c | Finished applying the shift. | 383c %-------------------------------% 384c 385 100 continue 386c 387c %---------------------------------------------------------% 388c | Apply the same shift to the next block if there is any. | 389c %---------------------------------------------------------% 390c 391 istart = iend + 1 392 if (iend .lt. kplusp) go to 20 393c 394c %---------------------------------------------% 395c | Loop back to the top to get the next shift. | 396c %---------------------------------------------% 397c 398 110 continue 399c 400c %---------------------------------------------------% 401c | Perform a similarity transformation that makes | 402c | sure that the compressed H will have non-negative | 403c | real subdiagonal elements. | 404c %---------------------------------------------------% 405c 406 do 120 j=1,kev 407 if ( real( h(j+1,j) ) .lt. rzero .or. 408 & aimag( h(j+1,j) ) .ne. rzero ) then 409 t = h(j+1,j) / slapy2(real(h(j+1,j)),aimag(h(j+1,j))) 410 call cscal( kplusp-j+1, conjg(t), h(j+1,j), ldh ) 411 call cscal( min(j+2, kplusp), t, h(1,j+1), 1 ) 412 call cscal( min(j+np+1,kplusp), t, q(1,j+1), 1 ) 413 h(j+1,j) = cmplx( real( h(j+1,j) ), rzero ) 414 end if 415 120 continue 416c 417 do 130 i = 1, kev 418c 419c %--------------------------------------------% 420c | Final check for splitting and deflation. | 421c | Use a standard test as in the QR algorithm | 422c | REFERENCE: LAPACK subroutine clahqr. | 423c | Note: Since the subdiagonals of the | 424c | compressed H are nonnegative real numbers, | 425c | we take advantage of this. | 426c %--------------------------------------------% 427c 428 tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) ) 429 if( tst1 .eq. rzero ) 430 & tst1 = clanhs( '1', kev, h, ldh, workl ) 431 if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) ) 432 & h(i+1,i) = zero 433 130 continue 434c 435c %-------------------------------------------------% 436c | Compute the (kev+1)-st column of (V*Q) and | 437c | temporarily store the result in WORKD(N+1:2*N). | 438c | This is needed in the residual update since we | 439c | cannot GUARANTEE that the corresponding entry | 440c | of H would be zero as in exact arithmetic. | 441c %-------------------------------------------------% 442c 443 if ( real( h(kev+1,kev) ) .gt. rzero ) 444 & call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero, 445 & workd(n+1), 1) 446c 447c %----------------------------------------------------------% 448c | Compute column 1 to kev of (V*Q) in backward order | 449c | taking advantage of the upper Hessenberg structure of Q. | 450c %----------------------------------------------------------% 451c 452 do 140 i = 1, kev 453 call cgemv ('N', n, kplusp-i+1, one, v, ldv, 454 & q(1,kev-i+1), 1, zero, workd, 1) 455 call ccopy (n, workd, 1, v(1,kplusp-i+1), 1) 456 140 continue 457c 458c %-------------------------------------------------% 459c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). | 460c %-------------------------------------------------% 461c 462 call clacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv) 463c 464c %--------------------------------------------------------------% 465c | Copy the (kev+1)-st column of (V*Q) in the appropriate place | 466c %--------------------------------------------------------------% 467c 468 if ( real( h(kev+1,kev) ) .gt. rzero ) 469 & call ccopy (n, workd(n+1), 1, v(1,kev+1), 1) 470c 471c %-------------------------------------% 472c | Update the residual vector: | 473c | r <- sigmak*r + betak*v(:,kev+1) | 474c | where | 475c | sigmak = (e_{kev+p}'*Q)*e_{kev} | 476c | betak = e_{kev+1}'*H*e_{kev} | 477c %-------------------------------------% 478c 479 call cscal (n, q(kplusp,kev), resid, 1) 480 if ( real( h(kev+1,kev) ) .gt. rzero ) 481 & call caxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1) 482c 483 if (msglvl .gt. 1) then 484 call cvout (logfil, 1, q(kplusp,kev), ndigit, 485 & '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}') 486 call cvout (logfil, 1, h(kev+1,kev), ndigit, 487 & '_napps: betak = e_{kev+1}^T*H*e_{kev}') 488 call ivout (logfil, 1, kev, ndigit, 489 & '_napps: Order of the final Hessenberg matrix ') 490 if (msglvl .gt. 2) then 491 call cmout (logfil, kev, kev, h, ldh, ndigit, 492 & '_napps: updated Hessenberg matrix H for next iteration') 493 end if 494c 495 end if 496c 497 9000 continue 498 call second (t1) 499 tcapps = tcapps + (t1 - t0) 500c 501 return 502c 503c %---------------% 504c | End of cnapps | 505c %---------------% 506c 507 end 508