1c-----------------------------------------------------------------------
2c\BeginDoc
3c
4c\Name: dsaitr
5c
6c\Description:
7c  Reverse communication interface for applying NP additional steps to
8c  a K step symmetric Arnoldi factorization.
9c
10c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
11c
12c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
13c
14c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
15c
16c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
17c
18c  where OP and B are as in dsaupd.  The B-norm of r_{k+p} is also
19c  computed and returned.
20c
21c\Usage:
22c  call dsaitr
23c     ( IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH,
24c       IPNTR, WORKD, INFO )
25c
26c\Arguments
27c  IDO     Integer.  (INPUT/OUTPUT)
28c          Reverse communication flag.
29c          -------------------------------------------------------------
30c          IDO =  0: first call to the reverse communication interface
31c          IDO = -1: compute  Y = OP * X  where
32c                    IPNTR(1) is the pointer into WORK for X,
33c                    IPNTR(2) is the pointer into WORK for Y.
34c                    This is for the restart phase to force the new
35c                    starting vector into the range of OP.
36c          IDO =  1: compute  Y = OP * X  where
37c                    IPNTR(1) is the pointer into WORK for X,
38c                    IPNTR(2) is the pointer into WORK for Y,
39c                    IPNTR(3) is the pointer into WORK for B * X.
40c          IDO =  2: compute  Y = B * X  where
41c                    IPNTR(1) is the pointer into WORK for X,
42c                    IPNTR(2) is the pointer into WORK for Y.
43c          IDO = 99: done
44c          -------------------------------------------------------------
45c          When the routine is used in the "shift-and-invert" mode, the
46c          vector B * Q is already available and does not need to be
47c          recomputed in forming OP * Q.
48c
49c  BMAT    Character*1.  (INPUT)
50c          BMAT specifies the type of matrix B that defines the
51c          semi-inner product for the operator OP.  See dsaupd.
52c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
53c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M*x
54c
55c  N       Integer.  (INPUT)
56c          Dimension of the eigenproblem.
57c
58c  K       Integer.  (INPUT)
59c          Current order of H and the number of columns of V.
60c
61c  NP      Integer.  (INPUT)
62c          Number of additional Arnoldi steps to take.
63c
64c  MODE    Integer.  (INPUT)
65c          Signifies which form for "OP". If MODE=2 then
66c          a reduction in the number of B matrix vector multiplies
67c          is possible since the B-norm of OP*x is equivalent to
68c          the inv(B)-norm of A*x.
69c
70c  RESID   Double precision array of length N.  (INPUT/OUTPUT)
71c          On INPUT:  RESID contains the residual vector r_{k}.
72c          On OUTPUT: RESID contains the residual vector r_{k+p}.
73c
74c  RNORM   Double precision scalar.  (INPUT/OUTPUT)
75c          On INPUT the B-norm of r_{k}.
76c          On OUTPUT the B-norm of the updated residual r_{k+p}.
77c
78c  V       Double precision N by K+NP array.  (INPUT/OUTPUT)
79c          On INPUT:  V contains the Arnoldi vectors in the first K
80c          columns.
81c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
82c          NP columns.  The first K columns are unchanged.
83c
84c  LDV     Integer.  (INPUT)
85c          Leading dimension of V exactly as declared in the calling
86c          program.
87c
88c  H       Double precision (K+NP) by 2 array.  (INPUT/OUTPUT)
89c          H is used to store the generated symmetric tridiagonal matrix
90c          with the subdiagonal in the first column starting at H(2,1)
91c          and the main diagonal in the second column.
92c
93c  LDH     Integer.  (INPUT)
94c          Leading dimension of H exactly as declared in the calling
95c          program.
96c
97c  IPNTR   Integer array of length 3.  (OUTPUT)
98c          Pointer to mark the starting locations in the WORK for
99c          vectors used by the Arnoldi iteration.
100c          -------------------------------------------------------------
101c          IPNTR(1): pointer to the current operand vector X.
102c          IPNTR(2): pointer to the current result vector Y.
103c          IPNTR(3): pointer to the vector B * X when used in the
104c                    shift-and-invert mode.  X is the current operand.
105c          -------------------------------------------------------------
106c
107c  WORKD   Double precision work array of length 3*N.  (REVERSE COMMUNICATION)
108c          Distributed array to be used in the basic Arnoldi iteration
109c          for reverse communication.  The calling program should not
110c          use WORKD as temporary workspace during the iteration !!!!!!
111c          On INPUT, WORKD(1:N) = B*RESID where RESID is associated
112c          with the K step Arnoldi factorization. Used to save some
113c          computation at the first step.
114c          On OUTPUT, WORKD(1:N) = B*RESID where RESID is associated
115c          with the K+NP step Arnoldi factorization.
116c
117c  INFO    Integer.  (OUTPUT)
118c          = 0: Normal exit.
119c          > 0: Size of an invariant subspace of OP is found that is
120c               less than K + NP.
121c
122c\EndDoc
123c
124c-----------------------------------------------------------------------
125c
126c\BeginLib
127c
128c\Local variables:
129c     xxxxxx  real
130c
131c\Routines called:
132c     dgetv0  ARPACK routine to generate the initial vector.
133c     ivout   ARPACK utility routine that prints integers.
134c     dmout   ARPACK utility routine that prints matrices.
135c     dlamch  LAPACK routine that determines machine constants.
136c     dlascl  LAPACK routine for careful scaling of a matrix.
137c     dgemv   Level 2 BLAS routine for matrix vector multiplication.
138c     daxpy   Level 1 BLAS that computes a vector triad.
139c     dscal   Level 1 BLAS that scales a vector.
140c     dcopy   Level 1 BLAS that copies one vector to another .
141c     ddot    Level 1 BLAS that computes the scalar product of two vectors.
142c     dnrm2   Level 1 BLAS that computes the norm of a vector.
143c
144c\Author
145c     Danny Sorensen               Phuong Vu
146c     Richard Lehoucq              CRPC / Rice University
147c     Dept. of Computational &     Houston, Texas
148c     Applied Mathematics
149c     Rice University
150c     Houston, Texas
151c
152c\Revision history:
153c     xx/xx/93: Version ' 2.4'
154c
155c\SCCS Information: @(#)
156c FILE: saitr.F   SID: 2.6   DATE OF SID: 8/28/96   RELEASE: 2
157c
158c\Remarks
159c  The algorithm implemented is:
160c
161c  restart = .false.
162c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
163c  r_{k} contains the initial residual vector even for k = 0;
164c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
165c  computed by the calling program.
166c
167c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
168c  For  j = k+1, ..., k+np  Do
169c     1) if ( betaj < tol ) stop or restart depending on j.
170c        if ( restart ) generate a new starting vector.
171c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];
172c        p_{j} = p_{j}/betaj
173c     3) r_{j} = OP*v_{j} where OP is defined as in dsaupd
174c        For shift-invert mode p_{j} = B*v_{j} is already available.
175c        wnorm = || OP*v_{j} ||
176c     4) Compute the j-th step residual vector.
177c        w_{j} =  V_{j}^T * B * OP * v_{j}
178c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
179c        alphaj <- j-th component of w_{j}
180c        rnorm = || r_{j} ||
181c        betaj+1 = rnorm
182c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
183c     5) Re-orthogonalization step:
184c        s = V_{j}'*B*r_{j}
185c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
186c        alphaj = alphaj + s_{j};
187c     6) Iterative refinement step:
188c        If (rnorm1 > 0.717*rnorm) then
189c           rnorm = rnorm1
190c           accept step and go back to 1)
191c        Else
192c           rnorm = rnorm1
193c           If this is the first time in step 6), go to 5)
194c           Else r_{j} lies in the span of V_{j} numerically.
195c              Set r_{j} = 0 and rnorm = 0; go to 1)
196c        EndIf
197c  End Do
198c
199c\EndLib
200c
201c-----------------------------------------------------------------------
202c
203      subroutine dsaitr
204     &   (ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh,
205     &    ipntr, workd, info)
206c
207c     %----------------------------------------------------%
208c     | Include files for debugging and timing information |
209c     %----------------------------------------------------%
210c
211      include   'debug.h'
212      include   'stat.h'
213c
214c     %------------------%
215c     | Scalar Arguments |
216c     %------------------%
217c
218      character  bmat*1
219      integer    ido, info, k, ldh, ldv, n, mode, np
220      Double precision
221     &           rnorm
222c
223c     %-----------------%
224c     | Array Arguments |
225c     %-----------------%
226c
227      integer    ipntr(3)
228      Double precision
229     &           h(ldh,2), resid(n), v(ldv,k+np), workd(3*n)
230c
231c     %------------%
232c     | Parameters |
233c     %------------%
234c
235      Double precision
236     &           one, zero
237      parameter (one = 1.0D+0, zero = 0.0D+0)
238c
239c     %---------------%
240c     | Local Scalars |
241c     %---------------%
242c
243      logical    first, orth1, orth2, rstart, step3, step4
244      integer    i, ierr, ipj, irj, ivj, iter, itry, j, msglvl,
245     &           infol, jj
246      Double precision
247     &           rnorm1, wnorm, safmin, temp1
248      save       orth1, orth2, rstart, step3, step4,
249     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl,
250     &           rnorm1, safmin, wnorm
251c
252c     %-----------------------%
253c     | Local Array Arguments |
254c     %-----------------------%
255c
256      Double precision
257     &           xtemp(2)
258c
259c     %----------------------%
260c     | External Subroutines |
261c     %----------------------%
262c
263      external   daxpy, dcopy, dscal, dgemv, dgetv0, dmout,
264     &           dlascl, second
265c
266c     %--------------------%
267c     | External Functions |
268c     %--------------------%
269c
270      Double precision
271     &           ddot, dnrm2, dlamch
272      external   ddot, dnrm2, dlamch
273c
274c     %-----------------%
275c     | Data statements |
276c     %-----------------%
277c
278      data      first / .true. /
279c
280c     %-----------------------%
281c     | Executable Statements |
282c     %-----------------------%
283c
284      if (first) then
285         first = .false.
286c
287c        %--------------------------------%
288c        | safmin = safe minimum is such  |
289c        | that 1/sfmin does not overflow |
290c        %--------------------------------%
291c
292         safmin = dlamch('safmin')
293      end if
294c
295      if (ido .eq. 0) then
296c
297c        %-------------------------------%
298c        | Initialize timing statistics  |
299c        | & message level for debugging |
300c        %-------------------------------%
301c
302         call second (t0)
303         msglvl = msaitr
304c
305c        %------------------------------%
306c        | Initial call to this routine |
307c        %------------------------------%
308c
309         info   = 0
310         step3  = .false.
311         step4  = .false.
312         rstart = .false.
313         orth1  = .false.
314         orth2  = .false.
315c
316c        %--------------------------------%
317c        | Pointer to the current step of |
318c        | the factorization to build     |
319c        %--------------------------------%
320c
321         j      = k + 1
322c
323c        %------------------------------------------%
324c        | Pointers used for reverse communication  |
325c        | when using WORKD.                        |
326c        %------------------------------------------%
327c
328         ipj    = 1
329         irj    = ipj   + n
330         ivj    = irj   + n
331      end if
332c
333c     %-------------------------------------------------%
334c     | When in reverse communication mode one of:      |
335c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
336c     | will be .true.                                  |
337c     | STEP3: return from computing OP*v_{j}.          |
338c     | STEP4: return from computing B-norm of OP*v_{j} |
339c     | ORTH1: return from computing B-norm of r_{j+1}  |
340c     | ORTH2: return from computing B-norm of          |
341c     |        correction to the residual vector.       |
342c     | RSTART: return from OP computations needed by   |
343c     |         dgetv0.                                 |
344c     %-------------------------------------------------%
345c
346      if (step3)  go to 50
347      if (step4)  go to 60
348      if (orth1)  go to 70
349      if (orth2)  go to 90
350      if (rstart) go to 30
351c
352c     %------------------------------%
353c     | Else this is the first step. |
354c     %------------------------------%
355c
356c     %--------------------------------------------------------------%
357c     |                                                              |
358c     |        A R N O L D I     I T E R A T I O N     L O O P       |
359c     |                                                              |
360c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
361c     %--------------------------------------------------------------%
362c
363 1000 continue
364c
365c         if (msglvl .gt. 2) then
366c            call ivout (logfil, 1, j, ndigit,
367c     &                  '_saitr: generating Arnoldi vector no.')
368c            call dvout (logfil, 1, rnorm, ndigit,
369c     &                  '_saitr: B-norm of the current residual =')
370c         end if
371c
372c        %---------------------------------------------------------%
373c        | Check for exact zero. Equivalent to determining whether |
374c        | a j-step Arnoldi factorization is present.              |
375c        %---------------------------------------------------------%
376c
377         if (rnorm .gt. zero) go to 40
378c
379c           %---------------------------------------------------%
380c           | Invariant subspace found, generate a new starting |
381c           | vector which is orthogonal to the current Arnoldi |
382c           | basis and continue the iteration.                 |
383c           %---------------------------------------------------%
384c
385c            if (msglvl .gt. 0) then
386c               call ivout (logfil, 1, j, ndigit,
387c     &                     '_saitr: ****** restart at step ******')
388c            end if
389c
390c           %---------------------------------------------%
391c           | ITRY is the loop variable that controls the |
392c           | maximum amount of times that a restart is   |
393c           | attempted. NRSTRT is used by stat.h         |
394c           %---------------------------------------------%
395c
396            nrstrt = nrstrt + 1
397            itry   = 1
398   20       continue
399            rstart = .true.
400            ido    = 0
401   30       continue
402c
403c           %--------------------------------------%
404c           | If in reverse communication mode and |
405c           | RSTART = .true. flow returns here.   |
406c           %--------------------------------------%
407c
408            call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv,
409     &                   resid, rnorm, ipntr, workd, ierr)
410            if (ido .ne. 99) go to 9000
411            if (ierr .lt. 0) then
412               itry = itry + 1
413               if (itry .le. 3) go to 20
414c
415c              %------------------------------------------------%
416c              | Give up after several restart attempts.        |
417c              | Set INFO to the size of the invariant subspace |
418c              | which spans OP and exit.                       |
419c              %------------------------------------------------%
420c
421               info = j - 1
422               call second (t1)
423               tsaitr = tsaitr + (t1 - t0)
424               ido = 99
425               go to 9000
426            end if
427c
428   40    continue
429c
430c        %---------------------------------------------------------%
431c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
432c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
433c        | when reciprocating a small RNORM, test against lower    |
434c        | machine bound.                                          |
435c        %---------------------------------------------------------%
436c
437         call dcopy (n, resid, 1, v(1,j), 1)
438         if (rnorm .ge. safmin) then
439             temp1 = one / rnorm
440             call dscal (n, temp1, v(1,j), 1)
441             call dscal (n, temp1, workd(ipj), 1)
442         else
443c
444c            %-----------------------------------------%
445c            | To scale both v_{j} and p_{j} carefully |
446c            | use LAPACK routine SLASCL               |
447c            %-----------------------------------------%
448c
449             call dlascl ('General', i, i, rnorm, one, n, 1,
450     &                    v(1,j), n, infol)
451             call dlascl ('General', i, i, rnorm, one, n, 1,
452     &                    workd(ipj), n, infol)
453         end if
454c
455c        %------------------------------------------------------%
456c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
457c        | Note that this is not quite yet r_{j}. See STEP 4    |
458c        %------------------------------------------------------%
459c
460         step3 = .true.
461         nopx  = nopx + 1
462         call second (t2)
463         call dcopy (n, v(1,j), 1, workd(ivj), 1)
464         ipntr(1) = ivj
465         ipntr(2) = irj
466         ipntr(3) = ipj
467         ido = 1
468c
469c        %-----------------------------------%
470c        | Exit in order to compute OP*v_{j} |
471c        %-----------------------------------%
472c
473         go to 9000
474   50    continue
475c
476c        %-----------------------------------%
477c        | Back from reverse communication;  |
478c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}.   |
479c        %-----------------------------------%
480c
481         call second (t3)
482         tmvopx = tmvopx + (t3 - t2)
483c
484         step3 = .false.
485c
486c        %------------------------------------------%
487c        | Put another copy of OP*v_{j} into RESID. |
488c        %------------------------------------------%
489c
490         call dcopy (n, workd(irj), 1, resid, 1)
491c
492c        %-------------------------------------------%
493c        | STEP 4:  Finish extending the symmetric   |
494c        |          Arnoldi to length j. If MODE = 2 |
495c        |          then B*OP = B*inv(B)*A = A and   |
496c        |          we don't need to compute B*OP.   |
497c        | NOTE: If MODE = 2 WORKD(IVJ:IVJ+N-1) is   |
498c        | assumed to have A*v_{j}.                  |
499c        %-------------------------------------------%
500c
501         if (mode .eq. 2) go to 65
502         call second (t2)
503         if (bmat .eq. 'G') then
504            nbx = nbx + 1
505            step4 = .true.
506            ipntr(1) = irj
507            ipntr(2) = ipj
508            ido = 2
509c
510c           %-------------------------------------%
511c           | Exit in order to compute B*OP*v_{j} |
512c           %-------------------------------------%
513c
514            go to 9000
515         else if (bmat .eq. 'I') then
516              call dcopy(n, resid, 1 , workd(ipj), 1)
517         end if
518   60    continue
519c
520c        %-----------------------------------%
521c        | Back from reverse communication;  |
522c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j}. |
523c        %-----------------------------------%
524c
525         if (bmat .eq. 'G') then
526            call second (t3)
527            tmvbx = tmvbx + (t3 - t2)
528         end if
529c
530         step4 = .false.
531c
532c        %-------------------------------------%
533c        | The following is needed for STEP 5. |
534c        | Compute the B-norm of OP*v_{j}.     |
535c        %-------------------------------------%
536c
537   65    continue
538         if (mode .eq. 2) then
539c
540c           %----------------------------------%
541c           | Note that the B-norm of OP*v_{j} |
542c           | is the inv(B)-norm of A*v_{j}.   |
543c           %----------------------------------%
544c
545            wnorm = ddot (n, resid, 1, workd(ivj), 1)
546            wnorm = sqrt(abs(wnorm))
547         else if (bmat .eq. 'G') then
548            wnorm = ddot (n, resid, 1, workd(ipj), 1)
549            wnorm = sqrt(abs(wnorm))
550         else if (bmat .eq. 'I') then
551            wnorm = dnrm2(n, resid, 1)
552         end if
553c
554c        %-----------------------------------------%
555c        | Compute the j-th residual corresponding |
556c        | to the j step factorization.            |
557c        | Use Classical Gram Schmidt and compute: |
558c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
559c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
560c        %-----------------------------------------%
561c
562c
563c        %------------------------------------------%
564c        | Compute the j Fourier coefficients w_{j} |
565c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
566c        %------------------------------------------%
567c
568         if (mode .ne. 2 ) then
569            call dgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero,
570     &                  workd(irj), 1)
571         else if (mode .eq. 2) then
572            call dgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero,
573     &                  workd(irj), 1)
574         end if
575c
576c        %--------------------------------------%
577c        | Orthgonalize r_{j} against V_{j}.    |
578c        | RESID contains OP*v_{j}. See STEP 3. |
579c        %--------------------------------------%
580c
581         call dgemv('N', n, j, -one, v, ldv, workd(irj), 1, one,
582     &               resid, 1)
583c
584c        %--------------------------------------%
585c        | Extend H to have j rows and columns. |
586c        %--------------------------------------%
587c
588         h(j,2) = workd(irj + j - 1)
589         if (j .eq. 1  .or.  rstart) then
590            h(j,1) = zero
591         else
592            h(j,1) = rnorm
593         end if
594         call second (t4)
595c
596         orth1 = .true.
597         iter  = 0
598c
599         call second (t2)
600         if (bmat .eq. 'G') then
601            nbx = nbx + 1
602            call dcopy (n, resid, 1, workd(irj), 1)
603            ipntr(1) = irj
604            ipntr(2) = ipj
605            ido = 2
606c
607c           %----------------------------------%
608c           | Exit in order to compute B*r_{j} |
609c           %----------------------------------%
610c
611            go to 9000
612         else if (bmat .eq. 'I') then
613            call dcopy (n, resid, 1, workd(ipj), 1)
614         end if
615   70    continue
616c
617c        %---------------------------------------------------%
618c        | Back from reverse communication if ORTH1 = .true. |
619c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
620c        %---------------------------------------------------%
621c
622         if (bmat .eq. 'G') then
623            call second (t3)
624            tmvbx = tmvbx + (t3 - t2)
625         end if
626c
627         orth1 = .false.
628c
629c        %------------------------------%
630c        | Compute the B-norm of r_{j}. |
631c        %------------------------------%
632c
633         if (bmat .eq. 'G') then
634            rnorm = ddot (n, resid, 1, workd(ipj), 1)
635            rnorm = sqrt(abs(rnorm))
636         else if (bmat .eq. 'I') then
637            rnorm = dnrm2(n, resid, 1)
638         end if
639c
640c        %-----------------------------------------------------------%
641c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
642c        | Maximum NITER_ITREF tries.                                |
643c        |                                                           |
644c        |          s      = V_{j}^T * B * r_{j}                     |
645c        |          r_{j}  = r_{j} - V_{j}*s                         |
646c        |          alphaj = alphaj + s_{j}                          |
647c        |                                                           |
648c        | The stopping criteria used for iterative refinement is    |
649c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
650c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
651c        | Determine if we need to correct the residual. The goal is |
652c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
653c        %-----------------------------------------------------------%
654c
655         if (rnorm .gt. 0.717*wnorm) go to 100
656         nrorth = nrorth + 1
657c
658c        %---------------------------------------------------%
659c        | Enter the Iterative refinement phase. If further  |
660c        | refinement is necessary, loop back here. The loop |
661c        | variable is ITER. Perform a step of Classical     |
662c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
663c        %---------------------------------------------------%
664c
665   80    continue
666c
667         if (msglvl .gt. 2) then
668            xtemp(1) = wnorm
669            xtemp(2) = rnorm
670c            call dvout (logfil, 2, xtemp, ndigit,
671c     &           '_saitr: re-orthonalization ; wnorm and rnorm are')
672         end if
673c
674c        %----------------------------------------------------%
675c        | Compute V_{j}^T * B * r_{j}.                       |
676c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
677c        %----------------------------------------------------%
678c
679         call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
680     &               zero, workd(irj), 1)
681c
682c        %----------------------------------------------%
683c        | Compute the correction to the residual:      |
684c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1).  |
685c        | The correction to H is v(:,1:J)*H(1:J,1:J) + |
686c        | v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j, but only   |
687c        | H(j,j) is updated.                           |
688c        %----------------------------------------------%
689c
690         call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1,
691     &               one, resid, 1)
692c
693         if (j .eq. 1  .or.  rstart) h(j,1) = zero
694         h(j,2) = h(j,2) + workd(irj + j - 1)
695c
696         orth2 = .true.
697         call second (t2)
698         if (bmat .eq. 'G') then
699            nbx = nbx + 1
700            call dcopy (n, resid, 1, workd(irj), 1)
701            ipntr(1) = irj
702            ipntr(2) = ipj
703            ido = 2
704c
705c           %-----------------------------------%
706c           | Exit in order to compute B*r_{j}. |
707c           | r_{j} is the corrected residual.  |
708c           %-----------------------------------%
709c
710            go to 9000
711         else if (bmat .eq. 'I') then
712            call dcopy (n, resid, 1, workd(ipj), 1)
713         end if
714   90    continue
715c
716c        %---------------------------------------------------%
717c        | Back from reverse communication if ORTH2 = .true. |
718c        %---------------------------------------------------%
719c
720         if (bmat .eq. 'G') then
721            call second (t3)
722            tmvbx = tmvbx + (t3 - t2)
723         end if
724c
725c        %-----------------------------------------------------%
726c        | Compute the B-norm of the corrected residual r_{j}. |
727c        %-----------------------------------------------------%
728c
729         if (bmat .eq. 'G') then
730             rnorm1 = ddot (n, resid, 1, workd(ipj), 1)
731             rnorm1 = sqrt(abs(rnorm1))
732         else if (bmat .eq. 'I') then
733             rnorm1 = dnrm2(n, resid, 1)
734         end if
735c
736c         if (msglvl .gt. 0 .and. iter .gt. 0) then
737c            call ivout (logfil, 1, j, ndigit,
738c     &           '_saitr: Iterative refinement for Arnoldi residual')
739c            if (msglvl .gt. 2) then
740c                xtemp(1) = rnorm
741c                xtemp(2) = rnorm1
742c                call dvout (logfil, 2, xtemp, ndigit,
743c     &           '_saitr: iterative refinement ; rnorm and rnorm1 are')
744c            end if
745c         end if
746c
747c        %-----------------------------------------%
748c        | Determine if we need to perform another |
749c        | step of re-orthogonalization.           |
750c        %-----------------------------------------%
751c
752         if (rnorm1 .gt. 0.717*rnorm) then
753c
754c           %--------------------------------%
755c           | No need for further refinement |
756c           %--------------------------------%
757c
758            rnorm = rnorm1
759c
760         else
761c
762c           %-------------------------------------------%
763c           | Another step of iterative refinement step |
764c           | is required. NITREF is used by stat.h     |
765c           %-------------------------------------------%
766c
767            nitref = nitref + 1
768            rnorm  = rnorm1
769            iter   = iter + 1
770            if (iter .le. 1) go to 80
771c
772c           %-------------------------------------------------%
773c           | Otherwise RESID is numerically in the span of V |
774c           %-------------------------------------------------%
775c
776            do 95 jj = 1, n
777               resid(jj) = zero
778  95        continue
779            rnorm = zero
780         end if
781c
782c        %----------------------------------------------%
783c        | Branch here directly if iterative refinement |
784c        | wasn't necessary or after at most NITER_REF  |
785c        | steps of iterative refinement.               |
786c        %----------------------------------------------%
787c
788  100    continue
789c
790         rstart = .false.
791         orth2  = .false.
792c
793         call second (t5)
794         titref = titref + (t5 - t4)
795c
796c        %----------------------------------------------------------%
797c        | Make sure the last off-diagonal element is non negative  |
798c        | If not perform a similarity transformation on H(1:j,1:j) |
799c        | and scale v(:,j) by -1.                                  |
800c        %----------------------------------------------------------%
801c
802         if (h(j,1) .lt. zero) then
803            h(j,1) = -h(j,1)
804            if ( j .lt. k+np) then
805               call dscal(n, -one, v(1,j+1), 1)
806            else
807               call dscal(n, -one, resid, 1)
808            end if
809         end if
810c
811c        %------------------------------------%
812c        | STEP 6: Update  j = j+1;  Continue |
813c        %------------------------------------%
814c
815         j = j + 1
816         if (j .gt. k+np) then
817            call second (t1)
818            tsaitr = tsaitr + (t1 - t0)
819            ido = 99
820c
821c            if (msglvl .gt. 1) then
822c               call dvout (logfil, k+np, h(1,2), ndigit,
823c     &         '_saitr: main diagonal of matrix H of step K+NP.')
824c               if (k+np .gt. 1) then
825c               call dvout (logfil, k+np-1, h(2,1), ndigit,
826c     &         '_saitr: sub diagonal of matrix H of step K+NP.')
827c               end if
828c            end if
829c
830            go to 9000
831         end if
832c
833c        %--------------------------------------------------------%
834c        | Loop back to extend the factorization by another step. |
835c        %--------------------------------------------------------%
836c
837      go to 1000
838c
839c     %---------------------------------------------------------------%
840c     |                                                               |
841c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
842c     |                                                               |
843c     %---------------------------------------------------------------%
844c
845 9000 continue
846      return
847c
848c     %---------------%
849c     | End of dsaitr |
850c     %---------------%
851c
852      end
853