1c\BeginDoc
2c
3c\Name: cnapps
4c
5c\Description:
6c  Given the Arnoldi factorization
7c
8c     A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
9c
10c  apply NP implicit shifts resulting in
11c
12c     A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
13c
14c  where Q is an orthogonal matrix which is the product of rotations
15c  and reflections resulting from the NP bulge change sweeps.
16c  The updated Arnoldi factorization becomes:
17c
18c     A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
19c
20c\Usage:
21c  call cnapps
22c     ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ,
23c       WORKL, WORKD )
24c
25c\Arguments
26c  N       Integer.  (INPUT)
27c          Problem size, i.e. size of matrix A.
28c
29c  KEV     Integer.  (INPUT/OUTPUT)
30c          KEV+NP is the size of the input matrix H.
31c          KEV is the size of the updated matrix HNEW.
32c
33c  NP      Integer.  (INPUT)
34c          Number of implicit shifts to be applied.
35c
36c  SHIFT   Complex array of length NP.  (INPUT)
37c          The shifts to be applied.
38c
39c  V       Complex N by (KEV+NP) array.  (INPUT/OUTPUT)
40c          On INPUT, V contains the current KEV+NP Arnoldi vectors.
41c          On OUTPUT, V contains the updated KEV Arnoldi vectors
42c          in the first KEV columns of V.
43c
44c  LDV     Integer.  (INPUT)
45c          Leading dimension of V exactly as declared in the calling
46c          program.
47c
48c  H       Complex (KEV+NP) by (KEV+NP) array.  (INPUT/OUTPUT)
49c          On INPUT, H contains the current KEV+NP by KEV+NP upper
50c          Hessenberg matrix of the Arnoldi factorization.
51c          On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
52c          matrix in the KEV leading submatrix.
53c
54c  LDH     Integer.  (INPUT)
55c          Leading dimension of H exactly as declared in the calling
56c          program.
57c
58c  RESID   Complex array of length N.  (INPUT/OUTPUT)
59c          On INPUT, RESID contains the the residual vector r_{k+p}.
60c          On OUTPUT, RESID is the update residual vector rnew_{k}
61c          in the first KEV locations.
62c
63c  Q       Complex KEV+NP by KEV+NP work array.  (WORKSPACE)
64c          Work array used to accumulate the rotations and reflections
65c          during the bulge chase sweep.
66c
67c  LDQ     Integer.  (INPUT)
68c          Leading dimension of Q exactly as declared in the calling
69c          program.
70c
71c  WORKL   Complex work array of length (KEV+NP).  (WORKSPACE)
72c          Private (replicated) array on each PE or array allocated on
73c          the front end.
74c
75c  WORKD   Complex work array of length 2*N.  (WORKSPACE)
76c          Distributed array used in the application of the accumulated
77c          orthogonal matrix Q.
78c
79c\EndDoc
80c
81c-----------------------------------------------------------------------
82c
83c\BeginLib
84c
85c\Local variables:
86c     xxxxxx  Complex
87c
88c\References:
89c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
90c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
91c     pp 357-385.
92c
93c\Routines called:
94c     ivout   ARPACK utility routine that prints integers.
95c     second  ARPACK utility routine for timing.
96c     cmout   ARPACK utility routine that prints matrices
97c     cvout   ARPACK utility routine that prints vectors.
98c     clacpy  LAPACK matrix copy routine.
99c     clanhs  LAPACK routine that computes various norms of a matrix.
100c     clartg  LAPACK Givens rotation construction routine.
101c     claset  LAPACK matrix initialization routine.
102c     slabad  LAPACK routine for defining the underflow and overflow
103c             limits.
104c     slamch  LAPACK routine that determines machine constants.
105c     slapy2  LAPACK routine to compute sqrt(x**2+y**2) carefully.
106c     cgemv   Level 2 BLAS routine for matrix vector multiplication.
107c     caxpy   Level 1 BLAS that computes a vector triad.
108c     ccopy   Level 1 BLAS that copies one vector to another.
109c     cscal   Level 1 BLAS that scales a vector.
110c
111c\Author
112c     Danny Sorensen               Phuong Vu
113c     Richard Lehoucq              CRPC / Rice University
114c     Dept. of Computational &     Houston, Texas
115c     Applied Mathematics
116c     Rice University
117c     Houston, Texas
118c
119c\SCCS Information: @(#)
120c FILE: napps.F   SID: 2.2   DATE OF SID: 4/20/96   RELEASE: 2
121c
122c\Remarks
123c  1. In this version, each shift is applied to all the sublocks of
124c     the Hessenberg matrix H and not just to the submatrix that it
125c     comes from. Deflation as in LAPACK routine clahqr (QR algorithm
126c     for upper Hessenberg matrices ) is used.
127c     Upon output, the subdiagonals of H are enforced to be non-negative
128c     real numbers.
129c
130c\EndLib
131c
132c-----------------------------------------------------------------------
133c
134      subroutine cnapps
135     &   ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq,
136     &     workl, workd )
137c
138c     %----------------------------------------------------%
139c     | Include files for debugging and timing information |
140c     %----------------------------------------------------%
141c
142      include   'debug.h'
143      include   'stat.h'
144c
145c     %------------------%
146c     | Scalar Arguments |
147c     %------------------%
148c
149      integer    kev, ldh, ldq, ldv, n, np
150c
151c     %-----------------%
152c     | Array Arguments |
153c     %-----------------%
154c
155      Complex
156     &           h(ldh,kev+np), resid(n), shift(np),
157     &           v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
158c
159c     %------------%
160c     | Parameters |
161c     %------------%
162c
163      Complex
164     &           one, zero
165      Real
166     &           rzero
167      parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0),
168     &           rzero = 0.0E+0)
169c
170c     %------------------------%
171c     | Local Scalars & Arrays |
172c     %------------------------%
173c
174      integer    i, iend, istart, j, jj, kplusp, msglvl
175      logical    first
176      Complex
177     &           cdum, f, g, h11, h21, r, s, sigma, t
178      Real
179     &           c,  ovfl, smlnum, ulp, unfl, tst1
180      save       first, ovfl, smlnum, ulp, unfl
181c
182c     %----------------------%
183c     | External Subroutines |
184c     %----------------------%
185c
186      external   caxpy, ccopy, cgemv, cscal, clacpy, clartg,
187     &           cvout, claset, slabad, cmout, second, ivout
188c
189c     %--------------------%
190c     | External Functions |
191c     %--------------------%
192c
193      Real
194     &           clanhs, slamch, slapy2
195      external   clanhs, slamch, slapy2
196c
197c     %----------------------%
198c     | Intrinsics Functions |
199c     %----------------------%
200c
201      intrinsic  abs, aimag, conjg, cmplx, max, min, real
202c
203c     %---------------------%
204c     | Statement Functions |
205c     %---------------------%
206c
207      Real
208     &           cabs1
209      cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
210c
211c     %----------------%
212c     | Data statments |
213c     %----------------%
214c
215      data       first / .true. /
216c
217c     %-----------------------%
218c     | Executable Statements |
219c     %-----------------------%
220c
221      if (first) then
222c
223c        %-----------------------------------------------%
224c        | Set machine-dependent constants for the       |
225c        | stopping criterion. If norm(H) <= sqrt(OVFL), |
226c        | overflow should not occur.                    |
227c        | REFERENCE: LAPACK subroutine clahqr           |
228c        %-----------------------------------------------%
229c
230         unfl = slamch( 'safe minimum' )
231         ovfl = real(one / unfl)
232         call slabad( unfl, ovfl )
233         ulp = slamch( 'precision' )
234         smlnum = unfl*( n / ulp )
235         first = .false.
236      end if
237c
238c     %-------------------------------%
239c     | Initialize timing statistics  |
240c     | & message level for debugging |
241c     %-------------------------------%
242c
243      call second (t0)
244      msglvl = mcapps
245c
246      kplusp = kev + np
247c
248c     %--------------------------------------------%
249c     | Initialize Q to the identity to accumulate |
250c     | the rotations and reflections              |
251c     %--------------------------------------------%
252c
253      call claset ('All', kplusp, kplusp, zero, one, q, ldq)
254c
255c     %----------------------------------------------%
256c     | Quick return if there are no shifts to apply |
257c     %----------------------------------------------%
258c
259      if (np .eq. 0) go to 9000
260c
261c     %----------------------------------------------%
262c     | Chase the bulge with the application of each |
263c     | implicit shift. Each shift is applied to the |
264c     | whole matrix including each block.           |
265c     %----------------------------------------------%
266c
267      do 110 jj = 1, np
268         sigma = shift(jj)
269c
270         if (msglvl .gt. 2 ) then
271            call ivout (logfil, 1, jj, ndigit,
272     &               '_napps: shift number.')
273            call cvout (logfil, 1, sigma, ndigit,
274     &               '_napps: Value of the shift ')
275         end if
276c
277         istart = 1
278   20    continue
279c
280         do 30 i = istart, kplusp-1
281c
282c           %----------------------------------------%
283c           | Check for splitting and deflation. Use |
284c           | a standard test as in the QR algorithm |
285c           | REFERENCE: LAPACK subroutine clahqr    |
286c           %----------------------------------------%
287c
288            tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
289            if( tst1.eq.rzero )
290     &         tst1 = clanhs( '1', kplusp-jj+1, h, ldh, workl )
291            if ( abs(real(h(i+1,i)))
292     &           .le. max(ulp*tst1, smlnum) )  then
293               if (msglvl .gt. 0) then
294                  call ivout (logfil, 1, i, ndigit,
295     &                 '_napps: matrix splitting at row/column no.')
296                  call ivout (logfil, 1, jj, ndigit,
297     &                 '_napps: matrix splitting with shift number.')
298                  call cvout (logfil, 1, h(i+1,i), ndigit,
299     &                 '_napps: off diagonal element.')
300               end if
301               iend = i
302               h(i+1,i) = zero
303               go to 40
304            end if
305   30    continue
306         iend = kplusp
307   40    continue
308c
309         if (msglvl .gt. 2) then
310             call ivout (logfil, 1, istart, ndigit,
311     &                   '_napps: Start of current block ')
312             call ivout (logfil, 1, iend, ndigit,
313     &                   '_napps: End of current block ')
314         end if
315c
316c        %------------------------------------------------%
317c        | No reason to apply a shift to block of order 1 |
318c        | or if the current block starts after the point |
319c        | of compression since we'll discard this stuff  |
320c        %------------------------------------------------%
321c
322         if ( istart .eq. iend .or. istart .gt. kev) go to 100
323c
324         h11 = h(istart,istart)
325         h21 = h(istart+1,istart)
326         f = h11 - sigma
327         g = h21
328c
329         do 80 i = istart, iend-1
330c
331c           %------------------------------------------------------%
332c           | Construct the plane rotation G to zero out the bulge |
333c           %------------------------------------------------------%
334c
335            call clartg (f, g, c, s, r)
336            if (i .gt. istart) then
337               h(i,i-1) = r
338               h(i+1,i-1) = zero
339            end if
340c
341c           %---------------------------------------------%
342c           | Apply rotation to the left of H;  H <- G'*H |
343c           %---------------------------------------------%
344c
345            do 50 j = i, kplusp
346               t        =  c*h(i,j) + s*h(i+1,j)
347               h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j)
348               h(i,j)   = t
349   50       continue
350c
351c           %---------------------------------------------%
352c           | Apply rotation to the right of H;  H <- H*G |
353c           %---------------------------------------------%
354c
355            do 60 j = 1, min(i+2,iend)
356               t        =  c*h(j,i) + conjg(s)*h(j,i+1)
357               h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
358               h(j,i)   = t
359   60       continue
360c
361c           %-----------------------------------------------------%
362c           | Accumulate the rotation in the matrix Q;  Q <- Q*G' |
363c           %-----------------------------------------------------%
364c
365            do 70 j = 1, min(j+jj, kplusp)
366               t        =   c*q(j,i) + conjg(s)*q(j,i+1)
367               q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
368               q(j,i)   = t
369   70       continue
370c
371c           %---------------------------%
372c           | Prepare for next rotation |
373c           %---------------------------%
374c
375            if (i .lt. iend-1) then
376               f = h(i+1,i)
377               g = h(i+2,i)
378            end if
379   80    continue
380c
381c        %-------------------------------%
382c        | Finished applying the shift.  |
383c        %-------------------------------%
384c
385  100    continue
386c
387c        %---------------------------------------------------------%
388c        | Apply the same shift to the next block if there is any. |
389c        %---------------------------------------------------------%
390c
391         istart = iend + 1
392         if (iend .lt. kplusp) go to 20
393c
394c        %---------------------------------------------%
395c        | Loop back to the top to get the next shift. |
396c        %---------------------------------------------%
397c
398  110 continue
399c
400c     %---------------------------------------------------%
401c     | Perform a similarity transformation that makes    |
402c     | sure that the compressed H will have non-negative |
403c     | real subdiagonal elements.                        |
404c     %---------------------------------------------------%
405c
406      do 120 j=1,kev
407         if ( real( h(j+1,j) ) .lt. rzero .or.
408     &        aimag( h(j+1,j) ) .ne. rzero ) then
409            t = h(j+1,j) / slapy2(real(h(j+1,j)),aimag(h(j+1,j)))
410            call cscal( kplusp-j+1, conjg(t), h(j+1,j), ldh )
411            call cscal( min(j+2, kplusp), t, h(1,j+1), 1 )
412            call cscal( min(j+np+1,kplusp), t, q(1,j+1), 1 )
413            h(j+1,j) = cmplx( real( h(j+1,j) ), rzero )
414         end if
415  120 continue
416c
417      do 130 i = 1, kev
418c
419c        %--------------------------------------------%
420c        | Final check for splitting and deflation.   |
421c        | Use a standard test as in the QR algorithm |
422c        | REFERENCE: LAPACK subroutine clahqr.       |
423c        | Note: Since the subdiagonals of the        |
424c        | compressed H are nonnegative real numbers, |
425c        | we take advantage of this.                 |
426c        %--------------------------------------------%
427c
428         tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
429         if( tst1 .eq. rzero )
430     &       tst1 = clanhs( '1', kev, h, ldh, workl )
431         if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) )
432     &       h(i+1,i) = zero
433 130  continue
434c
435c     %-------------------------------------------------%
436c     | Compute the (kev+1)-st column of (V*Q) and      |
437c     | temporarily store the result in WORKD(N+1:2*N). |
438c     | This is needed in the residual update since we  |
439c     | cannot GUARANTEE that the corresponding entry   |
440c     | of H would be zero as in exact arithmetic.      |
441c     %-------------------------------------------------%
442c
443      if ( real( h(kev+1,kev) ) .gt. rzero )
444     &   call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
445     &                workd(n+1), 1)
446c
447c     %----------------------------------------------------------%
448c     | Compute column 1 to kev of (V*Q) in backward order       |
449c     | taking advantage of the upper Hessenberg structure of Q. |
450c     %----------------------------------------------------------%
451c
452      do 140 i = 1, kev
453         call cgemv ('N', n, kplusp-i+1, one, v, ldv,
454     &               q(1,kev-i+1), 1, zero, workd, 1)
455         call ccopy (n, workd, 1, v(1,kplusp-i+1), 1)
456  140 continue
457c
458c     %-------------------------------------------------%
459c     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
460c     %-------------------------------------------------%
461c
462      call clacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv)
463c
464c     %--------------------------------------------------------------%
465c     | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
466c     %--------------------------------------------------------------%
467c
468      if ( real( h(kev+1,kev) ) .gt. rzero )
469     &   call ccopy (n, workd(n+1), 1, v(1,kev+1), 1)
470c
471c     %-------------------------------------%
472c     | Update the residual vector:         |
473c     |    r <- sigmak*r + betak*v(:,kev+1) |
474c     | where                               |
475c     |    sigmak = (e_{kev+p}'*Q)*e_{kev}  |
476c     |    betak = e_{kev+1}'*H*e_{kev}     |
477c     %-------------------------------------%
478c
479      call cscal (n, q(kplusp,kev), resid, 1)
480      if ( real( h(kev+1,kev) ) .gt. rzero )
481     &   call caxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
482c
483      if (msglvl .gt. 1) then
484         call cvout (logfil, 1, q(kplusp,kev), ndigit,
485     &        '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
486         call cvout (logfil, 1, h(kev+1,kev), ndigit,
487     &        '_napps: betak = e_{kev+1}^T*H*e_{kev}')
488         call ivout (logfil, 1, kev, ndigit,
489     &               '_napps: Order of the final Hessenberg matrix ')
490         if (msglvl .gt. 2) then
491            call cmout (logfil, kev, kev, h, ldh, ndigit,
492     &      '_napps: updated Hessenberg matrix H for next iteration')
493         end if
494c
495      end if
496c
497 9000 continue
498      call second (t1)
499      tcapps = tcapps + (t1 - t0)
500c
501      return
502c
503c     %---------------%
504c     | End of cnapps |
505c     %---------------%
506c
507      end
508