1c-----------------------------------------------------------------------
2c\BeginDoc
3c
4c\Name: ssapps
5c
6c\Description:
7c  Given the Arnoldi factorization
8c
9c     A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
10c
11c  apply NP shifts implicitly resulting in
12c
13c     A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
14c
15c  where Q is an orthogonal matrix of order KEV+NP. Q is the product of
16c  rotations resulting from the NP bulge chasing sweeps.  The updated Arnoldi
17c  factorization becomes:
18c
19c     A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
20c
21c\Usage:
22c  call ssapps
23c     ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, WORKD )
24c
25c\Arguments
26c  N       Integer.  (INPUT)
27c          Problem size, i.e. dimension of matrix A.
28c
29c  KEV     Integer.  (INPUT)
30c          INPUT: KEV+NP is the size of the input matrix H.
31c          OUTPUT: KEV is the size of the updated matrix HNEW.
32c
33c  NP      Integer.  (INPUT)
34c          Number of implicit shifts to be applied.
35c
36c  SHIFT   Real array of length NP.  (INPUT)
37c          The shifts to be applied.
38c
39c  V       Real N by (KEV+NP) array.  (INPUT/OUTPUT)
40c          INPUT: V contains the current KEV+NP Arnoldi vectors.
41c          OUTPUT: VNEW = V(1:n,1:KEV); the updated Arnoldi vectors
42c          are in the first KEV columns of V.
43c
44c  LDV     Integer.  (INPUT)
45c          Leading dimension of V exactly as declared in the calling
46c          program.
47c
48c  H       Real (KEV+NP) by 2 array.  (INPUT/OUTPUT)
49c          INPUT: H contains the symmetric tridiagonal matrix of the
50c          Arnoldi factorization with the subdiagonal in the 1st column
51c          starting at H(2,1) and the main diagonal in the 2nd column.
52c          OUTPUT: H contains the updated tridiagonal matrix in the
53c          KEV leading submatrix.
54c
55c  LDH     Integer.  (INPUT)
56c          Leading dimension of H exactly as declared in the calling
57c          program.
58c
59c  RESID   Real array of length (N).  (INPUT/OUTPUT)
60c          INPUT: RESID contains the the residual vector r_{k+p}.
61c          OUTPUT: RESID is the updated residual vector rnew_{k}.
62c
63c  Q       Real KEV+NP by KEV+NP work array.  (WORKSPACE)
64c          Work array used to accumulate the rotations during the bulge
65c          chase sweep.
66c
67c  LDQ     Integer.  (INPUT)
68c          Leading dimension of Q exactly as declared in the calling
69c          program.
70c
71c  WORKD   Real work array of length 2*N.  (WORKSPACE)
72c          Distributed array used in the application of the accumulated
73c          orthogonal matrix Q.
74c
75c\EndDoc
76c
77c-----------------------------------------------------------------------
78c
79c\BeginLib
80c
81c\Local variables:
82c     xxxxxx  real
83c
84c\References:
85c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
86c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
87c     pp 357-385.
88c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
89c     Restarted Arnoldi Iteration", Rice University Technical Report
90c     TR95-13, Department of Computational and Applied Mathematics.
91c
92c\Routines called:
93c     ivout   ARPACK utility routine that prints integers.
94c     arscnd  ARPACK utility routine for timing.
95c     svout   ARPACK utility routine that prints vectors.
96c     slamch  LAPACK routine that determines machine constants.
97c     slartg  LAPACK Givens rotation construction routine.
98c     slacpy  LAPACK matrix copy routine.
99c     slaset  LAPACK matrix initialization routine.
100c     sgemv   Level 2 BLAS routine for matrix vector multiplication.
101c     saxpy   Level 1 BLAS that computes a vector triad.
102c     scopy   Level 1 BLAS that copies one vector to another.
103c     sscal   Level 1 BLAS that scales a vector.
104c
105c\Author
106c     Danny Sorensen               Phuong Vu
107c     Richard Lehoucq              CRPC / Rice University
108c     Dept. of Computational &     Houston, Texas
109c     Applied Mathematics
110c     Rice University
111c     Houston, Texas
112c
113c\Revision history:
114c     12/16/93: Version ' 2.4'
115c
116c\SCCS Information: @(#)
117c FILE: sapps.F   SID: 2.6   DATE OF SID: 3/28/97   RELEASE: 2
118c
119c\Remarks
120c  1. In this version, each shift is applied to all the subblocks of
121c     the tridiagonal matrix H and not just to the submatrix that it
122c     comes from. This routine assumes that the subdiagonal elements
123c     of H that are stored in h(1:kev+np,1) are nonegative upon input
124c     and enforce this condition upon output. This version incorporates
125c     deflation. See code for documentation.
126c
127c\EndLib
128c
129c-----------------------------------------------------------------------
130c
131      subroutine ssapps
132     &   ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, workd )
133c
134c     %----------------------------------------------------%
135c     | Include files for debugging and timing information |
136c     %----------------------------------------------------%
137c
138      include   'debug.h'
139      include   'stat.h'
140c
141c     %------------------%
142c     | Scalar Arguments |
143c     %------------------%
144c
145      integer    kev, ldh, ldq, ldv, n, np
146c
147c     %-----------------%
148c     | Array Arguments |
149c     %-----------------%
150c
151      Real
152     &           h(ldh,2), q(ldq,kev+np), resid(n), shift(np),
153     &           v(ldv,kev+np), workd(2*n)
154c
155c     %------------%
156c     | Parameters |
157c     %------------%
158c
159      Real
160     &           one, zero
161      parameter (one = 1.0E+0, zero = 0.0E+0)
162c
163c     %---------------%
164c     | Local Scalars |
165c     %---------------%
166c
167      integer    i, iend, istart, itop, j, jj, kplusp, msglvl
168      logical    first
169      Real
170     &           a1, a2, a3, a4, big, c, epsmch, f, g, r, s
171      save       epsmch, first
172c
173c
174c     %----------------------%
175c     | External Subroutines |
176c     %----------------------%
177c
178      external   saxpy, scopy, sscal, slacpy, slartg, slaset, svout,
179     &           ivout, arscnd, sgemv
180c
181c     %--------------------%
182c     | External Functions |
183c     %--------------------%
184c
185      Real
186     &           slamch
187      external   slamch
188c
189c     %----------------------%
190c     | Intrinsics Functions |
191c     %----------------------%
192c
193      intrinsic  abs
194c
195c     %----------------%
196c     | Data statments |
197c     %----------------%
198c
199      data       first / .true. /
200c
201c     %-----------------------%
202c     | Executable Statements |
203c     %-----------------------%
204c
205      if (first) then
206         epsmch = slamch('Epsilon-Machine')
207         first = .false.
208      end if
209      itop = 1
210c
211c     %-------------------------------%
212c     | Initialize timing statistics  |
213c     | & message level for debugging |
214c     %-------------------------------%
215c
216      call arscnd (t0)
217      msglvl = msapps
218c
219      kplusp = kev + np
220c
221c     %----------------------------------------------%
222c     | Initialize Q to the identity matrix of order |
223c     | kplusp used to accumulate the rotations.     |
224c     %----------------------------------------------%
225c
226      call slaset ('All', kplusp, kplusp, zero, one, q, ldq)
227c
228c     %----------------------------------------------%
229c     | Quick return if there are no shifts to apply |
230c     %----------------------------------------------%
231c
232      if (np .eq. 0) go to 9000
233c
234c     %----------------------------------------------------------%
235c     | Apply the np shifts implicitly. Apply each shift to the  |
236c     | whole matrix and not just to the submatrix from which it |
237c     | comes.                                                   |
238c     %----------------------------------------------------------%
239c
240      do 90 jj = 1, np
241c
242         istart = itop
243c
244c        %----------------------------------------------------------%
245c        | Check for splitting and deflation. Currently we consider |
246c        | an off-diagonal element h(i+1,1) negligible if           |
247c        |         h(i+1,1) .le. epsmch*( |h(i,2)| + |h(i+1,2)| )   |
248c        | for i=1:KEV+NP-1.                                        |
249c        | If above condition tests true then we set h(i+1,1) = 0.  |
250c        | Note that h(1:KEV+NP,1) are assumed to be non negative.  |
251c        %----------------------------------------------------------%
252c
253   20    continue
254c
255c        %------------------------------------------------%
256c        | The following loop exits early if we encounter |
257c        | a negligible off diagonal element.             |
258c        %------------------------------------------------%
259c
260         do 30 i = istart, kplusp-1
261            big   = abs(h(i,2)) + abs(h(i+1,2))
262            if (h(i+1,1) .le. epsmch*big) then
263               if (msglvl .gt. 0) then
264                  call ivout (logfil, 1, i, ndigit,
265     &                 '_sapps: deflation at row/column no.')
266                  call ivout (logfil, 1, jj, ndigit,
267     &                 '_sapps: occured before shift number.')
268                  call svout (logfil, 1, h(i+1,1), ndigit,
269     &                 '_sapps: the corresponding off diagonal element')
270               end if
271               h(i+1,1) = zero
272               iend = i
273               go to 40
274            end if
275   30    continue
276         iend = kplusp
277   40    continue
278c
279         if (istart .lt. iend) then
280c
281c           %--------------------------------------------------------%
282c           | Construct the plane rotation G'(istart,istart+1,theta) |
283c           | that attempts to drive h(istart+1,1) to zero.          |
284c           %--------------------------------------------------------%
285c
286             f = h(istart,2) - shift(jj)
287             g = h(istart+1,1)
288             call slartg (f, g, c, s, r)
289c
290c            %-------------------------------------------------------%
291c            | Apply rotation to the left and right of H;            |
292c            | H <- G' * H * G,  where G = G(istart,istart+1,theta). |
293c            | This will create a "bulge".                           |
294c            %-------------------------------------------------------%
295c
296             a1 = c*h(istart,2)   + s*h(istart+1,1)
297             a2 = c*h(istart+1,1) + s*h(istart+1,2)
298             a4 = c*h(istart+1,2) - s*h(istart+1,1)
299             a3 = c*h(istart+1,1) - s*h(istart,2)
300             h(istart,2)   = c*a1 + s*a2
301             h(istart+1,2) = c*a4 - s*a3
302             h(istart+1,1) = c*a3 + s*a4
303c
304c            %----------------------------------------------------%
305c            | Accumulate the rotation in the matrix Q;  Q <- Q*G |
306c            %----------------------------------------------------%
307c
308             do 60 j = 1, min(istart+jj,kplusp)
309                a1            =   c*q(j,istart) + s*q(j,istart+1)
310                q(j,istart+1) = - s*q(j,istart) + c*q(j,istart+1)
311                q(j,istart)   = a1
312   60        continue
313c
314c
315c            %----------------------------------------------%
316c            | The following loop chases the bulge created. |
317c            | Note that the previous rotation may also be  |
318c            | done within the following loop. But it is    |
319c            | kept separate to make the distinction among  |
320c            | the bulge chasing sweeps and the first plane |
321c            | rotation designed to drive h(istart+1,1) to  |
322c            | zero.                                        |
323c            %----------------------------------------------%
324c
325             do 70 i = istart+1, iend-1
326c
327c               %----------------------------------------------%
328c               | Construct the plane rotation G'(i,i+1,theta) |
329c               | that zeros the i-th bulge that was created   |
330c               | by G(i-1,i,theta). g represents the bulge.   |
331c               %----------------------------------------------%
332c
333                f = h(i,1)
334                g = s*h(i+1,1)
335c
336c               %----------------------------------%
337c               | Final update with G(i-1,i,theta) |
338c               %----------------------------------%
339c
340                h(i+1,1) = c*h(i+1,1)
341                call slartg (f, g, c, s, r)
342c
343c               %-------------------------------------------%
344c               | The following ensures that h(1:iend-1,1), |
345c               | the first iend-2 off diagonal of elements |
346c               | H, remain non negative.                   |
347c               %-------------------------------------------%
348c
349                if (r .lt. zero) then
350                   r = -r
351                   c = -c
352                   s = -s
353                end if
354c
355c               %--------------------------------------------%
356c               | Apply rotation to the left and right of H; |
357c               | H <- G * H * G',  where G = G(i,i+1,theta) |
358c               %--------------------------------------------%
359c
360                h(i,1) = r
361c
362                a1 = c*h(i,2)   + s*h(i+1,1)
363                a2 = c*h(i+1,1) + s*h(i+1,2)
364                a3 = c*h(i+1,1) - s*h(i,2)
365                a4 = c*h(i+1,2) - s*h(i+1,1)
366c
367                h(i,2)   = c*a1 + s*a2
368                h(i+1,2) = c*a4 - s*a3
369                h(i+1,1) = c*a3 + s*a4
370c
371c               %----------------------------------------------------%
372c               | Accumulate the rotation in the matrix Q;  Q <- Q*G |
373c               %----------------------------------------------------%
374c
375                do 50 j = 1, min( i+jj, kplusp )
376                   a1       =   c*q(j,i) + s*q(j,i+1)
377                   q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
378                   q(j,i)   = a1
379   50           continue
380c
381   70        continue
382c
383         end if
384c
385c        %--------------------------%
386c        | Update the block pointer |
387c        %--------------------------%
388c
389         istart = iend + 1
390c
391c        %------------------------------------------%
392c        | Make sure that h(iend,1) is non-negative |
393c        | If not then set h(iend,1) <-- -h(iend,1) |
394c        | and negate the last column of Q.         |
395c        | We have effectively carried out a        |
396c        | similarity on transformation H           |
397c        %------------------------------------------%
398c
399         if (h(iend,1) .lt. zero) then
400             h(iend,1) = -h(iend,1)
401             call sscal(kplusp, -one, q(1,iend), 1)
402         end if
403c
404c        %--------------------------------------------------------%
405c        | Apply the same shift to the next block if there is any |
406c        %--------------------------------------------------------%
407c
408         if (iend .lt. kplusp) go to 20
409c
410c        %-----------------------------------------------------%
411c        | Check if we can increase the the start of the block |
412c        %-----------------------------------------------------%
413c
414         do 80 i = itop, kplusp-1
415            if (h(i+1,1) .gt. zero) go to 90
416            itop  = itop + 1
417   80    continue
418c
419c        %-----------------------------------%
420c        | Finished applying the jj-th shift |
421c        %-----------------------------------%
422c
423   90 continue
424c
425c     %------------------------------------------%
426c     | All shifts have been applied. Check for  |
427c     | more possible deflation that might occur |
428c     | after the last shift is applied.         |
429c     %------------------------------------------%
430c
431      do 100 i = itop, kplusp-1
432         big   = abs(h(i,2)) + abs(h(i+1,2))
433         if (h(i+1,1) .le. epsmch*big) then
434            if (msglvl .gt. 0) then
435               call ivout (logfil, 1, i, ndigit,
436     &              '_sapps: deflation at row/column no.')
437               call svout (logfil, 1, h(i+1,1), ndigit,
438     &              '_sapps: the corresponding off diagonal element')
439            end if
440            h(i+1,1) = zero
441         end if
442 100  continue
443c
444c     %-------------------------------------------------%
445c     | Compute the (kev+1)-st column of (V*Q) and      |
446c     | temporarily store the result in WORKD(N+1:2*N). |
447c     | This is not necessary if h(kev+1,1) = 0.         |
448c     %-------------------------------------------------%
449c
450      if ( h(kev+1,1) .gt. zero )
451     &   call sgemv ('N', n, kplusp, one, v, ldv,
452     &                q(1,kev+1), 1, zero, workd(n+1), 1)
453c
454c     %-------------------------------------------------------%
455c     | Compute column 1 to kev of (V*Q) in backward order    |
456c     | taking advantage that Q is an upper triangular matrix |
457c     | with lower bandwidth np.                              |
458c     | Place results in v(:,kplusp-kev:kplusp) temporarily.  |
459c     %-------------------------------------------------------%
460c
461      do 130 i = 1, kev
462         call sgemv ('N', n, kplusp-i+1, one, v, ldv,
463     &               q(1,kev-i+1), 1, zero, workd, 1)
464         call scopy (n, workd, 1, v(1,kplusp-i+1), 1)
465  130 continue
466c
467c     %-------------------------------------------------%
468c     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
469c     %-------------------------------------------------%
470c
471      call slacpy ('All', n, kev, v(1,np+1), ldv, v, ldv)
472c
473c     %--------------------------------------------%
474c     | Copy the (kev+1)-st column of (V*Q) in the |
475c     | appropriate place if h(kev+1,1) .ne. zero. |
476c     %--------------------------------------------%
477c
478      if ( h(kev+1,1) .gt. zero )
479     &     call scopy (n, workd(n+1), 1, v(1,kev+1), 1)
480c
481c     %-------------------------------------%
482c     | Update the residual vector:         |
483c     |    r <- sigmak*r + betak*v(:,kev+1) |
484c     | where                               |
485c     |    sigmak = (e_{kev+p}'*Q)*e_{kev}  |
486c     |    betak = e_{kev+1}'*H*e_{kev}     |
487c     %-------------------------------------%
488c
489      call sscal (n, q(kplusp,kev), resid, 1)
490      if (h(kev+1,1) .gt. zero)
491     &   call saxpy (n, h(kev+1,1), v(1,kev+1), 1, resid, 1)
492c
493      if (msglvl .gt. 1) then
494         call svout (logfil, 1, q(kplusp,kev), ndigit,
495     &      '_sapps: sigmak of the updated residual vector')
496         call svout (logfil, 1, h(kev+1,1), ndigit,
497     &      '_sapps: betak of the updated residual vector')
498         call svout (logfil, kev, h(1,2), ndigit,
499     &      '_sapps: updated main diagonal of H for next iteration')
500         if (kev .gt. 1) then
501         call svout (logfil, kev-1, h(2,1), ndigit,
502     &      '_sapps: updated sub diagonal of H for next iteration')
503         end if
504      end if
505c
506      call arscnd (t1)
507      tsapps = tsapps + (t1 - t0)
508c
509 9000 continue
510      return
511c
512c     %---------------%
513c     | End of ssapps |
514c     %---------------%
515c
516      end
517