1c-----------------------------------------------------------------------
2c\BeginDoc
3c
4c\Name: dnapps
5c
6c\Description:
7c  Given the Arnoldi factorization
8c
9c     A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
10c
11c  apply NP implicit shifts resulting in
12c
13c     A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
14c
15c  where Q is an orthogonal matrix which is the product of rotations
16c  and reflections resulting from the NP bulge chage sweeps.
17c  The updated Arnoldi factorization becomes:
18c
19c     A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
20c
21c\Usage:
22c  call dnapps
23c     ( N, KEV, NP, SHIFTR, SHIFTI, V, LDV, H, LDH, RESID, Q, LDQ,
24c       WORKL, WORKD )
25c
26c\Arguments
27c  N       Integer.  (INPUT)
28c          Problem size, i.e. size of matrix A.
29c
30c  KEV     Integer.  (INPUT/OUTPUT)
31c          KEV+NP is the size of the input matrix H.
32c          KEV is the size of the updated matrix HNEW.  KEV is only
33c          updated on output when fewer than NP shifts are applied in
34c          order to keep the conjugate pair together.
35c
36c  NP      Integer.  (INPUT)
37c          Number of implicit shifts to be applied.
38c
39c  SHIFTR, Double precision array of length NP.  (INPUT)
40c  SHIFTI  Real and imaginary part of the shifts to be applied.
41c          Upon, entry to dnapps, the shifts must be sorted so that the
42c          conjugate pairs are in consecutive locations.
43c
44c  V       Double precision N by (KEV+NP) array.  (INPUT/OUTPUT)
45c          On INPUT, V contains the current KEV+NP Arnoldi vectors.
46c          On OUTPUT, V contains the updated KEV Arnoldi vectors
47c          in the first KEV columns of V.
48c
49c  LDV     Integer.  (INPUT)
50c          Leading dimension of V exactly as declared in the calling
51c          program.
52c
53c  H       Double precision (KEV+NP) by (KEV+NP) array.  (INPUT/OUTPUT)
54c          On INPUT, H contains the current KEV+NP by KEV+NP upper
55c          Hessenber matrix of the Arnoldi factorization.
56c          On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
57c          matrix in the KEV leading submatrix.
58c
59c  LDH     Integer.  (INPUT)
60c          Leading dimension of H exactly as declared in the calling
61c          program.
62c
63c  RESID   Double precision array of length N.  (INPUT/OUTPUT)
64c          On INPUT, RESID contains the the residual vector r_{k+p}.
65c          On OUTPUT, RESID is the update residual vector rnew_{k}
66c          in the first KEV locations.
67c
68c  Q       Double precision KEV+NP by KEV+NP work array.  (WORKSPACE)
69c          Work array used to accumulate the rotations and reflections
70c          during the bulge chase sweep.
71c
72c  LDQ     Integer.  (INPUT)
73c          Leading dimension of Q exactly as declared in the calling
74c          program.
75c
76c  WORKL   Double precision work array of length (KEV+NP).  (WORKSPACE)
77c          Private (replicated) array on each PE or array allocated on
78c          the front end.
79c
80c  WORKD   Double precision work array of length 2*N.  (WORKSPACE)
81c          Distributed array used in the application of the accumulated
82c          orthogonal matrix Q.
83c
84c\EndDoc
85c
86c-----------------------------------------------------------------------
87c
88c\BeginLib
89c
90c\Local variables:
91c     xxxxxx  real
92c
93c\References:
94c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
95c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
96c     pp 357-385.
97c
98c\Routines called:
99c     ivout   ARPACK utility routine that prints integers.
100c     arscnd  ARPACK utility routine for timing.
101c     dmout   ARPACK utility routine that prints matrices.
102c     dvout   ARPACK utility routine that prints vectors.
103c     dlabad  LAPACK routine that computes machine constants.
104c     dlacpy  LAPACK matrix copy routine.
105c     dlamch  LAPACK routine that determines machine constants.
106c     dlanhs  LAPACK routine that computes various norms of a matrix.
107c     dlapy2  LAPACK routine to compute sqrt(x**2+y**2) carefully.
108c     dlarf   LAPACK routine that applies Householder reflection to
109c             a matrix.
110c     dlarfg  LAPACK Householder reflection construction routine.
111c     dlartg  LAPACK Givens rotation construction routine.
112c     dlaset  LAPACK matrix initialization routine.
113c     dgemv   Level 2 BLAS routine for matrix vector multiplication.
114c     daxpy   Level 1 BLAS that computes a vector triad.
115c     dcopy   Level 1 BLAS that copies one vector to another .
116c     dscal   Level 1 BLAS that scales a vector.
117c
118c\Author
119c     Danny Sorensen               Phuong Vu
120c     Richard Lehoucq              CRPC / Rice University
121c     Dept. of Computational &     Houston, Texas
122c     Applied Mathematics
123c     Rice University
124c     Houston, Texas
125c
126c\Revision history:
127c     xx/xx/92: Version ' 2.4'
128c
129c\SCCS Information: @(#)
130c FILE: napps.F   SID: 2.4   DATE OF SID: 3/28/97   RELEASE: 2
131c
132c\Remarks
133c  1. In this version, each shift is applied to all the sublocks of
134c     the Hessenberg matrix H and not just to the submatrix that it
135c     comes from. Deflation as in LAPACK routine dlahqr (QR algorithm
136c     for upper Hessenberg matrices ) is used.
137c     The subdiagonals of H are enforced to be non-negative.
138c
139c\EndLib
140c
141c-----------------------------------------------------------------------
142c
143      subroutine dnapps
144     &   ( n, kev, np, shiftr, shifti, v, ldv, h, ldh, resid, q, ldq,
145     &     workl, workd )
146c
147c     %----------------------------------------------------%
148c     | Include files for debugging and timing information |
149c     %----------------------------------------------------%
150c
151      include   'debug.h'
152      include   'stat.h'
153c
154c     %------------------%
155c     | Scalar Arguments |
156c     %------------------%
157c
158      integer    kev, ldh, ldq, ldv, n, np
159c
160c     %-----------------%
161c     | Array Arguments |
162c     %-----------------%
163c
164      Double precision
165     &           h(ldh,kev+np), resid(n), shifti(np), shiftr(np),
166     &           v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
167c
168c     %------------%
169c     | Parameters |
170c     %------------%
171c
172      Double precision
173     &           one, zero
174      parameter (one = 1.0D+0, zero = 0.0D+0)
175c
176c     %------------------------%
177c     | Local Scalars & Arrays |
178c     %------------------------%
179c
180      integer    i, iend, ir, istart, j, jj, kplusp, msglvl, nr
181      logical    cconj, first
182      Double precision
183     &           c, f, g, h11, h12, h21, h22, h32, ovfl, r, s, sigmai,
184     &           sigmar, smlnum, ulp, unfl, u(3), t, tau, tst1
185      save       first, ovfl, smlnum, ulp, unfl
186c
187c     %----------------------%
188c     | External Subroutines |
189c     %----------------------%
190c
191      external   daxpy, dcopy, dscal, dlacpy, dlarfg, dlarf,
192     &           dlaset, dlabad, arscnd, dlartg
193c
194c     %--------------------%
195c     | External Functions |
196c     %--------------------%
197c
198      Double precision
199     &           dlamch, dlanhs, dlapy2
200      external   dlamch, dlanhs, dlapy2
201c
202c     %----------------------%
203c     | Intrinsics Functions |
204c     %----------------------%
205c
206      intrinsic  abs, max, min
207c
208c     %----------------%
209c     | Data statements |
210c     %----------------%
211c
212      data       first / .true. /
213c
214c     %-----------------------%
215c     | Executable Statements |
216c     %-----------------------%
217c
218      if (first) then
219c
220c        %-----------------------------------------------%
221c        | Set machine-dependent constants for the       |
222c        | stopping criterion. If norm(H) <= sqrt(OVFL), |
223c        | overflow should not occur.                    |
224c        | REFERENCE: LAPACK subroutine dlahqr           |
225c        %-----------------------------------------------%
226c
227         unfl = dlamch( 'safe minimum' )
228         ovfl = one / unfl
229         call dlabad( unfl, ovfl )
230         ulp = dlamch( 'precision' )
231         smlnum = unfl*( n / ulp )
232         first = .false.
233      end if
234c
235c     %-------------------------------%
236c     | Initialize timing statistics  |
237c     | & message level for debugging |
238c     %-------------------------------%
239c
240      call arscnd (t0)
241      msglvl = mnapps
242      kplusp = kev + np
243c
244c     %--------------------------------------------%
245c     | Initialize Q to the identity to accumulate |
246c     | the rotations and reflections              |
247c     %--------------------------------------------%
248c
249      call dlaset ('All', kplusp, kplusp, zero, one, q, ldq)
250c
251c     %----------------------------------------------%
252c     | Quick return if there are no shifts to apply |
253c     %----------------------------------------------%
254c
255      if (np .eq. 0) go to 9000
256c
257c     %----------------------------------------------%
258c     | Chase the bulge with the application of each |
259c     | implicit shift. Each shift is applied to the |
260c     | whole matrix including each block.           |
261c     %----------------------------------------------%
262c
263      cconj = .false.
264      do 110 jj = 1, np
265         sigmar = shiftr(jj)
266         sigmai = shifti(jj)
267c
268         if (msglvl .gt. 2 ) then
269            call ivout (logfil, 1, [jj], ndigit,
270     &               '_napps: shift number.')
271            call dvout (logfil, 1, [sigmar], ndigit,
272     &               '_napps: The real part of the shift ')
273            call dvout (logfil, 1, [sigmai], ndigit,
274     &               '_napps: The imaginary part of the shift ')
275         end if
276c
277c        %-------------------------------------------------%
278c        | The following set of conditionals is necessary  |
279c        | in order that complex conjugate pairs of shifts |
280c        | are applied together or not at all.             |
281c        %-------------------------------------------------%
282c
283         if ( cconj ) then
284c
285c           %-----------------------------------------%
286c           | cconj = .true. means the previous shift |
287c           | had non-zero imaginary part.            |
288c           %-----------------------------------------%
289c
290            cconj = .false.
291            go to 110
292         else if ( jj .lt. np .and. abs( sigmai ) .gt. zero ) then
293c
294c           %------------------------------------%
295c           | Start of a complex conjugate pair. |
296c           %------------------------------------%
297c
298            cconj = .true.
299         else if ( jj .eq. np .and. abs( sigmai ) .gt. zero ) then
300c
301c           %----------------------------------------------%
302c           | The last shift has a nonzero imaginary part. |
303c           | Don't apply it; thus the order of the        |
304c           | compressed H is order KEV+1 since only np-1  |
305c           | were applied.                                |
306c           %----------------------------------------------%
307c
308            kev = kev + 1
309            go to 110
310         end if
311         istart = 1
312   20    continue
313c
314c        %--------------------------------------------------%
315c        | if sigmai = 0 then                               |
316c        |    Apply the jj-th shift ...                     |
317c        | else                                             |
318c        |    Apply the jj-th and (jj+1)-th together ...    |
319c        |    (Note that jj < np at this point in the code) |
320c        | end                                              |
321c        | to the current block of H. The next do loop      |
322c        | determines the current block ;                   |
323c        %--------------------------------------------------%
324c
325         do 30 i = istart, kplusp-1
326c
327c           %----------------------------------------%
328c           | Check for splitting and deflation. Use |
329c           | a standard test as in the QR algorithm |
330c           | REFERENCE: LAPACK subroutine dlahqr    |
331c           %----------------------------------------%
332c
333            tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
334            if( tst1.eq.zero )
335     &         tst1 = dlanhs( '1', kplusp-jj+1, h, ldh, workl )
336            if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) then
337               if (msglvl .gt. 0) then
338                  call ivout (logfil, 1, [i], ndigit,
339     &                 '_napps: matrix splitting at row/column no.')
340                  call ivout (logfil, 1, [jj], ndigit,
341     &                 '_napps: matrix splitting with shift number.')
342                  call dvout (logfil, 1, h(i+1,i), ndigit,
343     &                 '_napps: off diagonal element.')
344               end if
345               iend = i
346               h(i+1,i) = zero
347               go to 40
348            end if
349   30    continue
350         iend = kplusp
351   40    continue
352c
353         if (msglvl .gt. 2) then
354             call ivout (logfil, 1, [istart], ndigit,
355     &                   '_napps: Start of current block ')
356             call ivout (logfil, 1, [iend], ndigit,
357     &                   '_napps: End of current block ')
358         end if
359c
360c        %------------------------------------------------%
361c        | No reason to apply a shift to block of order 1 |
362c        %------------------------------------------------%
363c
364         if ( istart .eq. iend ) go to 100
365c
366c        %------------------------------------------------------%
367c        | If istart + 1 = iend then no reason to apply a       |
368c        | complex conjugate pair of shifts on a 2 by 2 matrix. |
369c        %------------------------------------------------------%
370c
371         if ( istart + 1 .eq. iend .and. abs( sigmai ) .gt. zero )
372     &      go to 100
373c
374         h11 = h(istart,istart)
375         h21 = h(istart+1,istart)
376         if ( abs( sigmai ) .le. zero ) then
377c
378c           %---------------------------------------------%
379c           | Real-valued shift ==> apply single shift QR |
380c           %---------------------------------------------%
381c
382            f = h11 - sigmar
383            g = h21
384c
385            do 80 i = istart, iend-1
386c
387c              %-----------------------------------------------------%
388c              | Construct the plane rotation G to zero out the bulge |
389c              %-----------------------------------------------------%
390c
391               call dlartg (f, g, c, s, r)
392               if (i .gt. istart) then
393c
394c                 %-------------------------------------------%
395c                 | The following ensures that h(1:iend-1,1), |
396c                 | the first iend-2 off diagonal of elements |
397c                 | H, remain non negative.                   |
398c                 %-------------------------------------------%
399c
400                  if (r .lt. zero) then
401                     r = -r
402                     c = -c
403                     s = -s
404                  end if
405                  h(i,i-1) = r
406                  h(i+1,i-1) = zero
407               end if
408c
409c              %---------------------------------------------%
410c              | Apply rotation to the left of H;  H <- G'*H |
411c              %---------------------------------------------%
412c
413               do 50 j = i, kplusp
414                  t        =  c*h(i,j) + s*h(i+1,j)
415                  h(i+1,j) = -s*h(i,j) + c*h(i+1,j)
416                  h(i,j)   = t
417   50          continue
418c
419c              %---------------------------------------------%
420c              | Apply rotation to the right of H;  H <- H*G |
421c              %---------------------------------------------%
422c
423               do 60 j = 1, min(i+2,iend)
424                  t        =  c*h(j,i) + s*h(j,i+1)
425                  h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
426                  h(j,i)   = t
427   60          continue
428c
429c              %----------------------------------------------------%
430c              | Accumulate the rotation in the matrix Q;  Q <- Q*G |
431c              %----------------------------------------------------%
432c
433               do 70 j = 1, min( i+jj, kplusp )
434                  t        =   c*q(j,i) + s*q(j,i+1)
435                  q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
436                  q(j,i)   = t
437   70          continue
438c
439c              %---------------------------%
440c              | Prepare for next rotation |
441c              %---------------------------%
442c
443               if (i .lt. iend-1) then
444                  f = h(i+1,i)
445                  g = h(i+2,i)
446               end if
447   80       continue
448c
449c           %-----------------------------------%
450c           | Finished applying the real shift. |
451c           %-----------------------------------%
452c
453         else
454c
455c           %----------------------------------------------------%
456c           | Complex conjugate shifts ==> apply double shift QR |
457c           %----------------------------------------------------%
458c
459            h12 = h(istart,istart+1)
460            h22 = h(istart+1,istart+1)
461            h32 = h(istart+2,istart+1)
462c
463c           %---------------------------------------------------------%
464c           | Compute 1st column of (H - shift*I)*(H - conj(shift)*I) |
465c           %---------------------------------------------------------%
466c
467            s    = 2.0*sigmar
468            t = dlapy2 ( sigmar, sigmai )
469            u(1) = ( h11 * (h11 - s) + t * t ) / h21 + h12
470            u(2) = h11 + h22 - s
471            u(3) = h32
472c
473            do 90 i = istart, iend-1
474c
475               nr = min ( 3, iend-i+1 )
476c
477c              %-----------------------------------------------------%
478c              | Construct Householder reflector G to zero out u(1). |
479c              | G is of the form I - tau*( 1 u )' * ( 1 u' ).       |
480c              %-----------------------------------------------------%
481c
482               call dlarfg ( nr, u(1), u(2), 1, tau )
483c
484               if (i .gt. istart) then
485                  h(i,i-1)   = u(1)
486                  h(i+1,i-1) = zero
487                  if (i .lt. iend-1) h(i+2,i-1) = zero
488               end if
489               u(1) = one
490c
491c              %--------------------------------------%
492c              | Apply the reflector to the left of H |
493c              %--------------------------------------%
494c
495               call dlarf ('Left', nr, kplusp-i+1, u, 1, tau,
496     &                     h(i,i), ldh, workl)
497c
498c              %---------------------------------------%
499c              | Apply the reflector to the right of H |
500c              %---------------------------------------%
501c
502               ir = min ( i+3, iend )
503               call dlarf ('Right', ir, nr, u, 1, tau,
504     &                     h(1,i), ldh, workl)
505c
506c              %-----------------------------------------------------%
507c              | Accumulate the reflector in the matrix Q;  Q <- Q*G |
508c              %-----------------------------------------------------%
509c
510               call dlarf ('Right', kplusp, nr, u, 1, tau,
511     &                     q(1,i), ldq, workl)
512c
513c              %----------------------------%
514c              | Prepare for next reflector |
515c              %----------------------------%
516c
517               if (i .lt. iend-1) then
518                  u(1) = h(i+1,i)
519                  u(2) = h(i+2,i)
520                  if (i .lt. iend-2) u(3) = h(i+3,i)
521               end if
522c
523   90       continue
524c
525c           %--------------------------------------------%
526c           | Finished applying a complex pair of shifts |
527c           | to the current block                       |
528c           %--------------------------------------------%
529c
530         end if
531c
532  100    continue
533c
534c        %---------------------------------------------------------%
535c        | Apply the same shift to the next block if there is any. |
536c        %---------------------------------------------------------%
537c
538         istart = iend + 1
539         if (iend .lt. kplusp) go to 20
540c
541c        %---------------------------------------------%
542c        | Loop back to the top to get the next shift. |
543c        %---------------------------------------------%
544c
545  110 continue
546c
547c     %--------------------------------------------------%
548c     | Perform a similarity transformation that makes   |
549c     | sure that H will have non negative sub diagonals |
550c     %--------------------------------------------------%
551c
552      do 120 j=1,kev
553         if ( h(j+1,j) .lt. zero ) then
554              call dscal( kplusp-j+1, -one, h(j+1,j), ldh )
555              call dscal( min(j+2, kplusp), -one, h(1,j+1), 1 )
556              call dscal( min(j+np+1,kplusp), -one, q(1,j+1), 1 )
557         end if
558 120  continue
559c
560      do 130 i = 1, kev
561c
562c        %--------------------------------------------%
563c        | Final check for splitting and deflation.   |
564c        | Use a standard test as in the QR algorithm |
565c        | REFERENCE: LAPACK subroutine dlahqr        |
566c        %--------------------------------------------%
567c
568         tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
569         if( tst1.eq.zero )
570     &       tst1 = dlanhs( '1', kev, h, ldh, workl )
571         if( h( i+1,i ) .le. max( ulp*tst1, smlnum ) )
572     &       h(i+1,i) = zero
573 130  continue
574c
575c     %-------------------------------------------------%
576c     | Compute the (kev+1)-st column of (V*Q) and      |
577c     | temporarily store the result in WORKD(N+1:2*N). |
578c     | This is needed in the residual update since we  |
579c     | cannot GUARANTEE that the corresponding entry   |
580c     | of H would be zero as in exact arithmetic.      |
581c     %-------------------------------------------------%
582c
583      if (h(kev+1,kev) .gt. zero)
584     &    call dgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
585     &                workd(n+1), 1)
586c
587c     %----------------------------------------------------------%
588c     | Compute column 1 to kev of (V*Q) in backward order       |
589c     | taking advantage of the upper Hessenberg structure of Q. |
590c     %----------------------------------------------------------%
591c
592      do 140 i = 1, kev
593         call dgemv ('N', n, kplusp-i+1, one, v, ldv,
594     &               q(1,kev-i+1), 1, zero, workd, 1)
595         call dcopy (n, workd, 1, v(1,kplusp-i+1), 1)
596  140 continue
597c
598c     %-------------------------------------------------%
599c     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
600c     %-------------------------------------------------%
601c
602      do 150 i = 1, kev
603         call dcopy(n, v(1,kplusp-kev+i), 1, v(1,i), 1)
604  150 continue
605c
606c     %--------------------------------------------------------------%
607c     | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
608c     %--------------------------------------------------------------%
609c
610      if (h(kev+1,kev) .gt. zero)
611     &   call dcopy (n, workd(n+1), 1, v(1,kev+1), 1)
612c
613c     %-------------------------------------%
614c     | Update the residual vector:         |
615c     |    r <- sigmak*r + betak*v(:,kev+1) |
616c     | where                               |
617c     |    sigmak = (e_{kplusp}'*Q)*e_{kev} |
618c     |    betak = e_{kev+1}'*H*e_{kev}     |
619c     %-------------------------------------%
620c
621      call dscal (n, q(kplusp,kev), resid, 1)
622      if (h(kev+1,kev) .gt. zero)
623     &   call daxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
624c
625      if (msglvl .gt. 1) then
626         call dvout (logfil, 1, q(kplusp,kev), ndigit,
627     &        '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
628         call dvout (logfil, 1, h(kev+1,kev), ndigit,
629     &        '_napps: betak = e_{kev+1}^T*H*e_{kev}')
630         call ivout (logfil, 1, [kev], ndigit,
631     &               '_napps: Order of the final Hessenberg matrix ')
632         if (msglvl .gt. 2) then
633            call dmout (logfil, kev, kev, h, ldh, ndigit,
634     &      '_napps: updated Hessenberg matrix H for next iteration')
635         end if
636c
637      end if
638c
639 9000 continue
640      call arscnd (t1)
641      tnapps = tnapps + (t1 - t0)
642c
643      return
644c
645c     %---------------%
646c     | End of dnapps |
647c     %---------------%
648c
649      end
650