1!
2!    SLATEC: public domain
3!
4*DECK DGMRES
5      SUBROUTINE DGMRES (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
6     +   ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, SB, SX, RGWK, LRGW,
7     +   IGWK, LIGW, RWORK, IWORK)
8C***BEGIN PROLOGUE  DGMRES
9C***PURPOSE  Preconditioned GMRES iterative sparse Ax=b solver.
10C            This routine uses the generalized minimum residual
11C            (GMRES) method with preconditioning to solve
12C            non-symmetric linear systems of the form: Ax = b.
13C***LIBRARY   SLATEC (SLAP)
14C***CATEGORY  D2A4, D2B4
15C***TYPE      DOUBLE PRECISION (SGMRES-S, DGMRES-D)
16C***KEYWORDS  GENERALIZED MINIMUM RESIDUAL, ITERATIVE PRECONDITION,
17C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
18C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
19C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
20C           Seager, Mark K., (LLNL), seager@llnl.gov
21C             Lawrence Livermore National Laboratory
22C             PO Box 808, L-60
23C             Livermore, CA 94550 (510) 423-3141
24C***DESCRIPTION
25C
26C *Usage:
27C      INTEGER   N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
28C      INTEGER   ITER, IERR, IUNIT, LRGW, IGWK(LIGW), LIGW
29C      INTEGER   IWORK(USER DEFINED)
30C      DOUBLE PRECISION B(N), X(N), A(NELT), TOL, ERR, SB(N), SX(N)
31C      DOUBLE PRECISION RGWK(LRGW), RWORK(USER DEFINED)
32C      EXTERNAL  MATVEC, MSOLVE
33C
34C      CALL DGMRES(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
35C     $     ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, SB, SX,
36C     $     RGWK, LRGW, IGWK, LIGW, RWORK, IWORK)
37C
38C *Arguments:
39C N      :IN       Integer.
40C         Order of the Matrix.
41C B      :IN       Double Precision B(N).
42C         Right-hand side vector.
43C X      :INOUT    Double Precision X(N).
44C         On input X is your initial guess for the solution vector.
45C         On output X is the final approximate solution.
46C NELT   :IN       Integer.
47C         Number of Non-Zeros stored in A.
48C IA     :IN       Integer IA(NELT).
49C JA     :IN       Integer JA(NELT).
50C A      :IN       Double Precision A(NELT).
51C         These arrays contain the matrix data structure for A.
52C         It could take any form.  See "Description", below,
53C         for more details.
54C ISYM   :IN       Integer.
55C         Flag to indicate symmetric storage format.
56C         If ISYM=0, all non-zero entries of the matrix are stored.
57C         If ISYM=1, the matrix is symmetric, and only the upper
58C         or lower triangle of the matrix is stored.
59C MATVEC :EXT      External.
60C         Name of a routine which performs the matrix vector multiply
61C         Y = A*X given A and X.  The name of the MATVEC routine must
62C         be declared external in the calling program.  The calling
63C         sequence to MATVEC is:
64C             CALL MATVEC(N, X, Y, NELT, IA, JA, A, ISYM)
65C         where N is the number of unknowns, Y is the product A*X
66C         upon return, X is an input vector, and NELT is the number of
67C         non-zeros in the SLAP IA, JA, A storage for the matrix A.
68C         ISYM is a flag which, if non-zero, denotes that A is
69C         symmetric and only the lower or upper triangle is stored.
70C MSOLVE :EXT      External.
71C         Name of the routine which solves a linear system Mz = r for
72C         z given r with the preconditioning matrix M (M is supplied via
73C         RWORK and IWORK arrays.  The name of the MSOLVE routine must
74C         be declared external in the calling program.  The calling
75C         sequence to MSOLVE is:
76C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
77C         Where N is the number of unknowns, R is the right-hand side
78C         vector and Z is the solution upon return.  NELT, IA, JA, A and
79C         ISYM are defined as above.  RWORK is a double precision array
80C         that can be used to pass necessary preconditioning information
81C         and/or workspace to MSOLVE.  IWORK is an integer work array
82C         for the same purpose as RWORK.
83C ITOL   :IN       Integer.
84C         Flag to indicate the type of convergence criterion used.
85C         ITOL=0  Means the  iteration stops when the test described
86C                 below on  the  residual RL  is satisfied.  This is
87C                 the  "Natural Stopping Criteria" for this routine.
88C                 Other values  of   ITOL  cause  extra,   otherwise
89C                 unnecessary, computation per iteration and     are
90C                 therefore  much less  efficient.  See  ISDGMR (the
91C                 stop test routine) for more information.
92C         ITOL=1  Means   the  iteration stops   when the first test
93C                 described below on  the residual RL  is satisfied,
94C                 and there  is either right  or  no preconditioning
95C                 being used.
96C         ITOL=2  Implies     that   the  user    is   using    left
97C                 preconditioning, and the second stopping criterion
98C                 below is used.
99C         ITOL=3  Means the  iteration stops   when  the  third test
100C                 described below on Minv*Residual is satisfied, and
101C                 there is either left  or no  preconditioning being
102C                 used.
103C         ITOL=11 is    often  useful  for   checking  and comparing
104C                 different routines.  For this case, the  user must
105C                 supply  the  "exact" solution or  a  very accurate
106C                 approximation (one with  an  error much less  than
107C                 TOL) through a common block,
108C                     COMMON /DSLBLK/ SOLN( )
109C                 If ITOL=11, iteration stops when the 2-norm of the
110C                 difference between the iterative approximation and
111C                 the user-supplied solution  divided by the  2-norm
112C                 of the  user-supplied solution  is  less than TOL.
113C                 Note that this requires  the  user to  set up  the
114C                 "COMMON     /DSLBLK/ SOLN(LENGTH)"  in the calling
115C                 routine.  The routine with this declaration should
116C                 be loaded before the stop test so that the correct
117C                 length is used by  the loader.  This procedure  is
118C                 not standard Fortran and may not work correctly on
119C                 your   system (although  it  has  worked  on every
120C                 system the authors have tried).  If ITOL is not 11
121C                 then this common block is indeed standard Fortran.
122C TOL    :INOUT    Double Precision.
123C         Convergence criterion, as described below.  If TOL is set
124C         to zero on input, then a default value of 500*(the smallest
125C         positive magnitude, machine epsilon) is used.
126C ITMAX  :DUMMY    Integer.
127C         Maximum number of iterations in most SLAP routines.  In
128C         this routine this does not make sense.  The maximum number
129C         of iterations here is given by ITMAX = MAXL*(NRMAX+1).
130C         See IGWK for definitions of MAXL and NRMAX.
131C ITER   :OUT      Integer.
132C         Number of iterations required to reach convergence, or
133C         ITMAX if convergence criterion could not be achieved in
134C         ITMAX iterations.
135C ERR    :OUT      Double Precision.
136C         Error estimate of error in final approximate solution, as
137C         defined by ITOL.  Letting norm() denote the Euclidean
138C         norm, ERR is defined as follows..
139C
140C         If ITOL=0, then ERR = norm(SB*(B-A*X(L)))/norm(SB*B),
141C                               for right or no preconditioning, and
142C                         ERR = norm(SB*(M-inverse)*(B-A*X(L)))/
143C                                norm(SB*(M-inverse)*B),
144C                               for left preconditioning.
145C         If ITOL=1, then ERR = norm(SB*(B-A*X(L)))/norm(SB*B),
146C                               since right or no preconditioning
147C                               being used.
148C         If ITOL=2, then ERR = norm(SB*(M-inverse)*(B-A*X(L)))/
149C                                norm(SB*(M-inverse)*B),
150C                               since left preconditioning is being
151C                               used.
152C         If ITOL=3, then ERR =  Max  |(Minv*(B-A*X(L)))(i)/x(i)|
153C                               i=1,n
154C         If ITOL=11, then ERR = norm(SB*(X(L)-SOLN))/norm(SB*SOLN).
155C IERR   :OUT      Integer.
156C         Return error flag.
157C               IERR = 0 => All went well.
158C               IERR = 1 => Insufficient storage allocated for
159C                           RGWK or IGWK.
160C               IERR = 2 => Routine DGMRES failed to reduce the norm
161C                           of the current residual on its last call,
162C                           and so the iteration has stalled.  In
163C                           this case, X equals the last computed
164C                           approximation.  The user must either
165C                           increase MAXL, or choose a different
166C                           initial guess.
167C               IERR =-1 => Insufficient length for RGWK array.
168C                           IGWK(6) contains the required minimum
169C                           length of the RGWK array.
170C               IERR =-2 => Illegal value of ITOL, or ITOL and JPRE
171C                           values are inconsistent.
172C         For IERR <= 2, RGWK(1) = RHOL, which is the norm on the
173C         left-hand-side of the relevant stopping test defined
174C         below associated with the residual for the current
175C         approximation X(L).
176C IUNIT  :IN       Integer.
177C         Unit number on which to write the error at each iteration,
178C         if this is desired for monitoring convergence.  If unit
179C         number is 0, no writing will occur.
180C SB     :IN       Double Precision SB(N).
181C         Array of length N containing scale factors for the right
182C         hand side vector B.  If JSCAL.eq.0 (see below), SB need
183C         not be supplied.
184C SX     :IN       Double Precision SX(N).
185C         Array of length N containing scale factors for the solution
186C         vector X.  If JSCAL.eq.0 (see below), SX need not be
187C         supplied.  SB and SX can be the same array in the calling
188C         program if desired.
189C RGWK   :INOUT    Double Precision RGWK(LRGW).
190C         Double Precision array used for workspace by DGMRES.
191C         On return, RGWK(1) = RHOL.  See IERR for definition of RHOL.
192C LRGW   :IN       Integer.
193C         Length of the double precision workspace, RGWK.
194C         LRGW >= 1 + N*(MAXL+6) + MAXL*(MAXL+3).
195C         See below for definition of MAXL.
196C         For the default values, RGWK has size at least 131 + 16*N.
197C IGWK   :INOUT    Integer IGWK(LIGW).
198C         The following IGWK parameters should be set by the user
199C         before calling this routine.
200C         IGWK(1) = MAXL.  Maximum dimension of Krylov subspace in
201C            which X - X0 is to be found (where, X0 is the initial
202C            guess).  The default value of MAXL is 10.
203C         IGWK(2) = KMP.  Maximum number of previous Krylov basis
204C            vectors to which each new basis vector is made orthogonal.
205C            The default value of KMP is MAXL.
206C         IGWK(3) = JSCAL.  Flag indicating whether the scaling
207C            arrays SB and SX are to be used.
208C            JSCAL = 0 => SB and SX are not used and the algorithm
209C               will perform as if all SB(I) = 1 and SX(I) = 1.
210C            JSCAL = 1 =>  Only SX is used, and the algorithm
211C               performs as if all SB(I) = 1.
212C            JSCAL = 2 =>  Only SB is used, and the algorithm
213C               performs as if all SX(I) = 1.
214C            JSCAL = 3 =>  Both SB and SX are used.
215C         IGWK(4) = JPRE.  Flag indicating whether preconditioning
216C            is being used.
217C            JPRE = 0  =>  There is no preconditioning.
218C            JPRE > 0  =>  There is preconditioning on the right
219C               only, and the solver will call routine MSOLVE.
220C            JPRE < 0  =>  There is preconditioning on the left
221C               only, and the solver will call routine MSOLVE.
222C         IGWK(5) = NRMAX.  Maximum number of restarts of the
223C            Krylov iteration.  The default value of NRMAX = 10.
224C            if IWORK(5) = -1,  then no restarts are performed (in
225C            this case, NRMAX is set to zero internally).
226C         The following IWORK parameters are diagnostic information
227C         made available to the user after this routine completes.
228C         IGWK(6) = MLWK.  Required minimum length of RGWK array.
229C         IGWK(7) = NMS.  The total number of calls to MSOLVE.
230C LIGW   :IN       Integer.
231C         Length of the integer workspace, IGWK.  LIGW >= 20.
232C RWORK  :WORK     Double Precision RWORK(USER DEFINED).
233C         Double Precision array that can be used for workspace in
234C         MSOLVE.
235C IWORK  :WORK     Integer IWORK(USER DEFINED).
236C         Integer array that can be used for workspace in MSOLVE.
237C
238C *Description:
239C       DGMRES solves a linear system A*X = B rewritten in the form:
240C
241C        (SB*A*(M-inverse)*(SX-inverse))*(SX*M*X) = SB*B,
242C
243C       with right preconditioning, or
244C
245C        (SB*(M-inverse)*A*(SX-inverse))*(SX*X) = SB*(M-inverse)*B,
246C
247C       with left preconditioning, where A is an N-by-N double precision
248C       matrix, X and B are N-vectors,  SB and SX  are diagonal scaling
249C       matrices,   and M is  a preconditioning    matrix.   It uses
250C       preconditioned  Krylov   subpace  methods  based     on  the
251C       generalized minimum residual  method (GMRES).   This routine
252C       optionally performs  either  the  full     orthogonalization
253C       version of the  GMRES  algorithm or an incomplete variant of
254C       it.  Both versions use restarting of the linear iteration by
255C       default, although the user can disable this feature.
256C
257C       The GMRES  algorithm generates a sequence  of approximations
258C       X(L) to the  true solution of the above  linear system.  The
259C       convergence criteria for stopping the  iteration is based on
260C       the size  of the  scaled norm of  the residual  R(L)  =  B -
261C       A*X(L).  The actual stopping test is either:
262C
263C               norm(SB*(B-A*X(L))) .le. TOL*norm(SB*B),
264C
265C       for right preconditioning, or
266C
267C               norm(SB*(M-inverse)*(B-A*X(L))) .le.
268C                       TOL*norm(SB*(M-inverse)*B),
269C
270C       for left preconditioning, where norm() denotes the Euclidean
271C       norm, and TOL is  a positive scalar less  than one  input by
272C       the user.  If TOL equals zero  when DGMRES is called, then a
273C       default  value  of 500*(the   smallest  positive  magnitude,
274C       machine epsilon) is used.  If the  scaling arrays SB  and SX
275C       are used, then  ideally they  should be chosen  so  that the
276C       vectors SX*X(or SX*M*X) and  SB*B have all their  components
277C       approximately equal  to  one in  magnitude.  If one wants to
278C       use the same scaling in X  and B, then  SB and SX can be the
279C       same array in the calling program.
280C
281C       The following is a list of the other routines and their
282C       functions used by DGMRES:
283C       DPIGMR  Contains the main iteration loop for GMRES.
284C       DORTH   Orthogonalizes a new vector against older basis vectors.
285C       DHEQR   Computes a QR decomposition of a Hessenberg matrix.
286C       DHELS   Solves a Hessenberg least-squares system, using QR
287C               factors.
288C       DRLCAL  Computes the scaled residual RL.
289C       DXLCAL  Computes the solution XL.
290C       ISDGMR  User-replaceable stopping routine.
291C
292C       This routine does  not care  what matrix data   structure is
293C       used for  A and M.  It simply   calls  the MATVEC and MSOLVE
294C       routines, with  the arguments as  described above.  The user
295C       could write any type of structure and the appropriate MATVEC
296C       and MSOLVE routines.  It is assumed  that A is stored in the
297C       IA, JA, A  arrays in some fashion and  that M (or INV(M)) is
298C       stored  in  IWORK  and  RWORK   in  some fashion.   The SLAP
299C       routines DSDCG and DSICCG are examples of this procedure.
300C
301C       Two  examples  of  matrix  data structures  are the: 1) SLAP
302C       Triad  format and 2) SLAP Column format.
303C
304C       =================== S L A P Triad format ===================
305C       This routine requires that the  matrix A be   stored in  the
306C       SLAP  Triad format.  In  this format only the non-zeros  are
307C       stored.  They may appear in  *ANY* order.  The user supplies
308C       three arrays of  length NELT, where  NELT is  the number  of
309C       non-zeros in the matrix: (IA(NELT), JA(NELT), A(NELT)).  For
310C       each non-zero the user puts the row and column index of that
311C       matrix element  in the IA and  JA arrays.  The  value of the
312C       non-zero   matrix  element is  placed  in  the corresponding
313C       location of the A array.   This is  an  extremely  easy data
314C       structure to generate.  On  the  other hand it   is  not too
315C       efficient on vector computers for  the iterative solution of
316C       linear systems.  Hence,   SLAP changes   this  input    data
317C       structure to the SLAP Column format  for  the iteration (but
318C       does not change it back).
319C
320C       Here is an example of the  SLAP Triad   storage format for a
321C       5x5 Matrix.  Recall that the entries may appear in any order.
322C
323C           5x5 Matrix      SLAP Triad format for 5x5 matrix on left.
324C                              1  2  3  4  5  6  7  8  9 10 11
325C       |11 12  0  0 15|   A: 51 12 11 33 15 53 55 22 35 44 21
326C       |21 22  0  0  0|  IA:  5  1  1  3  1  5  5  2  3  4  2
327C       | 0  0 33  0 35|  JA:  1  2  1  3  5  3  5  2  5  4  1
328C       | 0  0  0 44  0|
329C       |51  0 53  0 55|
330C
331C       =================== S L A P Column format ==================
332C
333C       This routine  requires that  the matrix A  be stored in  the
334C       SLAP Column format.  In this format the non-zeros are stored
335C       counting down columns (except for  the diagonal entry, which
336C       must appear first in each  "column")  and are stored  in the
337C       double precision array A.   In other words,  for each column
338C       in the matrix put the diagonal entry in  A.  Then put in the
339C       other non-zero  elements going down  the column (except  the
340C       diagonal) in order.   The  IA array holds the  row index for
341C       each non-zero.  The JA array holds the offsets  into the IA,
342C       A arrays  for  the  beginning  of each   column.   That  is,
343C       IA(JA(ICOL)),  A(JA(ICOL)) points   to the beginning  of the
344C       ICOL-th   column    in    IA and   A.      IA(JA(ICOL+1)-1),
345C       A(JA(ICOL+1)-1) points to  the  end of the   ICOL-th column.
346C       Note that we always have  JA(N+1) = NELT+1,  where N is  the
347C       number of columns in  the matrix and NELT  is the number  of
348C       non-zeros in the matrix.
349C
350C       Here is an example of the  SLAP Column  storage format for a
351C       5x5 Matrix (in the A and IA arrays '|'  denotes the end of a
352C       column):
353C
354C           5x5 Matrix      SLAP Column format for 5x5 matrix on left.
355C                              1  2  3    4  5    6  7    8    9 10 11
356C       |11 12  0  0 15|   A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
357C       |21 22  0  0  0|  IA:  1  2  5 |  2  1 |  3  5 |  4 |  5  1  3
358C       | 0  0 33  0 35|  JA:  1  4  6    8  9   12
359C       | 0  0  0 44  0|
360C       |51  0 53  0 55|
361C
362C *Cautions:
363C     This routine will attempt to write to the Fortran logical output
364C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
365C     this logical unit is attached to a file or terminal before calling
366C     this routine with a non-zero value for IUNIT.  This routine does
367C     not check for the validity of a non-zero IUNIT unit number.
368C
369C***REFERENCES  1. Peter N. Brown and A. C. Hindmarsh, Reduced Storage
370C                  Matrix Methods in Stiff ODE Systems, Lawrence Liver-
371C                  more National Laboratory Report UCRL-95088, Rev. 1,
372C                  Livermore, California, June 1987.
373C               2. Mark K. Seager, A SLAP for the Masses, in
374C                  G. F. Carey, Ed., Parallel Supercomputing: Methods,
375C                  Algorithms and Applications, Wiley, 1989, pp.135-155.
376C***ROUTINES CALLED  D1MACH, DCOPY, DNRM2, DPIGMR
377C***REVISION HISTORY  (YYMMDD)
378C   890404  DATE WRITTEN
379C   890404  Previous REVISION DATE
380C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
381C   890922  Numerous changes to prologue to make closer to SLATEC
382C           standard.  (FNF)
383C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
384C   891004  Added new reference.
385C   910411  Prologue converted to Version 4.0 format.  (BAB)
386C   910506  Corrected errors in C***ROUTINES CALLED list.  (FNF)
387C   920407  COMMON BLOCK renamed DSLBLK.  (WRB)
388C   920511  Added complete declaration section.  (WRB)
389C   920929  Corrected format of references.  (FNF)
390C   921019  Changed 500.0 to 500 to reduce SP/DP differences.  (FNF)
391C   921026  Added check for valid value of ITOL.  (FNF)
392C***END PROLOGUE  DGMRES
393C         The following is for optimized compilation on LLNL/LTSS Crays.
394CLLL. OPTIMIZE
395C     .. Scalar Arguments ..
396      DOUBLE PRECISION ERR, TOL
397      INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, LIGW, LRGW, N, NELT
398C     .. Array Arguments ..
399      DOUBLE PRECISION A(NELT), B(N), RGWK(LRGW), RWORK(*), SB(N),
400     +                 SX(N), X(N)
401      INTEGER IA(NELT), IGWK(LIGW), IWORK(*), JA(NELT)
402C     .. Subroutine Arguments ..
403      EXTERNAL MATVEC, MSOLVE
404C     .. Local Scalars ..
405      DOUBLE PRECISION BNRM, RHOL, SUM
406      INTEGER I, IFLAG, JPRE, JSCAL, KMP, LDL, LGMR, LHES, LQ, LR, LV,
407     +        LW, LXL, LZ, LZM1, MAXL, MAXLP1, NMS, NMSL, NRMAX, NRSTS
408C     .. External Functions ..
409      DOUBLE PRECISION D1MACH, DNRM2
410      EXTERNAL D1MACH, DNRM2
411C     .. External Subroutines ..
412      EXTERNAL DCOPY, DPIGMR
413C     .. Intrinsic Functions ..
414      INTRINSIC SQRT
415C***FIRST EXECUTABLE STATEMENT  DGMRES
416      IERR = 0
417C   ------------------------------------------------------------------
418C         Load method parameters with user values or defaults.
419C   ------------------------------------------------------------------
420      MAXL = IGWK(1)
421      IF (MAXL .EQ. 0) MAXL = 10
422      IF (MAXL .GT. N) MAXL = N
423      KMP = IGWK(2)
424      IF (KMP .EQ. 0) KMP = MAXL
425      IF (KMP .GT. MAXL) KMP = MAXL
426      JSCAL = IGWK(3)
427      JPRE = IGWK(4)
428C         Check for valid value of ITOL.
429      IF( (ITOL.LT.0) .OR. ((ITOL.GT.3).AND.(ITOL.NE.11)) ) GOTO 650
430C         Check for consistent values of ITOL and JPRE.
431      IF( ITOL.EQ.1 .AND. JPRE.LT.0 ) GOTO 650
432      IF( ITOL.EQ.2 .AND. JPRE.GE.0 ) GOTO 650
433      NRMAX = IGWK(5)
434      IF( NRMAX.EQ.0 ) NRMAX = 10
435C         If NRMAX .eq. -1, then set NRMAX = 0 to turn off restarting.
436      IF( NRMAX.EQ.-1 ) NRMAX = 0
437C         If input value of TOL is zero, set it to its default value.
438      IF( TOL.EQ.0.0D0 ) TOL = 500*D1MACH(3)
439C
440C         Initialize counters.
441      ITER = 0
442      NMS = 0
443      NRSTS = 0
444C   ------------------------------------------------------------------
445C         Form work array segment pointers.
446C   ------------------------------------------------------------------
447      MAXLP1 = MAXL + 1
448      LV = 1
449      LR = LV + N*MAXLP1
450      LHES = LR + N + 1
451      LQ = LHES + MAXL*MAXLP1
452      LDL = LQ + 2*MAXL
453      LW = LDL + N
454      LXL = LW + N
455      LZ = LXL + N
456C
457C         Load IGWK(6) with required minimum length of the RGWK array.
458      IGWK(6) = LZ + N - 1
459      IF( LZ+N-1.GT.LRGW ) GOTO 640
460C   ------------------------------------------------------------------
461C         Calculate scaled-preconditioned norm of RHS vector b.
462C   ------------------------------------------------------------------
463      IF (JPRE .LT. 0) THEN
464         CALL MSOLVE(N, B, RGWK(LR), NELT, IA, JA, A, ISYM,
465     $        RWORK, IWORK)
466         NMS = NMS + 1
467      ELSE
468         CALL DCOPY(N, B, 1, RGWK(LR), 1)
469      ENDIF
470      IF( JSCAL.EQ.2 .OR. JSCAL.EQ.3 ) THEN
471         SUM = 0
472         DO 10 I = 1,N
473            SUM = SUM + (RGWK(LR-1+I)*SB(I))**2
474 10      CONTINUE
475         BNRM = SQRT(SUM)
476      ELSE
477         BNRM = DNRM2(N,RGWK(LR),1)
478      ENDIF
479C   ------------------------------------------------------------------
480C         Calculate initial residual.
481C   ------------------------------------------------------------------
482      CALL MATVEC(N, X, RGWK(LR), NELT, IA, JA, A, ISYM)
483      DO 50 I = 1,N
484         RGWK(LR-1+I) = B(I) - RGWK(LR-1+I)
485 50   CONTINUE
486C   ------------------------------------------------------------------
487C         If performing restarting, then load the residual into the
488C         correct location in the RGWK array.
489C   ------------------------------------------------------------------
490 100  CONTINUE
491      IF( NRSTS.GT.NRMAX ) GOTO 610
492      IF( NRSTS.GT.0 ) THEN
493C         Copy the current residual to a different location in the RGWK
494C         array.
495         CALL DCOPY(N, RGWK(LDL), 1, RGWK(LR), 1)
496      ENDIF
497C   ------------------------------------------------------------------
498C         Use the DPIGMR algorithm to solve the linear system A*Z = R.
499C   ------------------------------------------------------------------
500      CALL DPIGMR(N, RGWK(LR), SB, SX, JSCAL, MAXL, MAXLP1, KMP,
501     $       NRSTS, JPRE, MATVEC, MSOLVE, NMSL, RGWK(LZ), RGWK(LV),
502     $       RGWK(LHES), RGWK(LQ), LGMR, RWORK, IWORK, RGWK(LW),
503     $       RGWK(LDL), RHOL, NRMAX, B, BNRM, X, RGWK(LXL), ITOL,
504     $       TOL, NELT, IA, JA, A, ISYM, IUNIT, IFLAG, ERR)
505      ITER = ITER + LGMR
506      NMS = NMS + NMSL
507C
508C         Increment X by the current approximate solution Z of A*Z = R.
509C
510      LZM1 = LZ - 1
511      DO 110 I = 1,N
512         X(I) = X(I) + RGWK(LZM1+I)
513 110  CONTINUE
514      IF( IFLAG.EQ.0 ) GOTO 600
515      IF( IFLAG.EQ.1 ) THEN
516         NRSTS = NRSTS + 1
517         GOTO 100
518      ENDIF
519      IF( IFLAG.EQ.2 ) GOTO 620
520C   ------------------------------------------------------------------
521C         All returns are made through this section.
522C   ------------------------------------------------------------------
523C         The iteration has converged.
524C
525 600  CONTINUE
526      IGWK(7) = NMS
527      RGWK(1) = RHOL
528      IERR = 0
529      RETURN
530C
531C         Max number((NRMAX+1)*MAXL) of linear iterations performed.
532 610  CONTINUE
533      IGWK(7) = NMS
534      RGWK(1) = RHOL
535      IERR = 1
536      RETURN
537C
538C         GMRES failed to reduce last residual in MAXL iterations.
539C         The iteration has stalled.
540 620  CONTINUE
541      IGWK(7) = NMS
542      RGWK(1) = RHOL
543      IERR = 2
544      RETURN
545C         Error return.  Insufficient length for RGWK array.
546 640  CONTINUE
547      ERR = TOL
548      IERR = -1
549      RETURN
550C         Error return.  Inconsistent ITOL and JPRE values.
551 650  CONTINUE
552      ERR = TOL
553      IERR = -2
554      RETURN
555C------------- LAST LINE OF DGMRES FOLLOWS ----------------------------
556      END
557*DECK DNRM2
558      DOUBLE PRECISION FUNCTION DNRM2 (N, DX, INCX)
559C***BEGIN PROLOGUE  DNRM2
560C***PURPOSE  Compute the Euclidean length (L2 norm) of a vector.
561C***LIBRARY   SLATEC (BLAS)
562C***CATEGORY  D1A3B
563C***TYPE      DOUBLE PRECISION (SNRM2-S, DNRM2-D, SCNRM2-C)
564C***KEYWORDS  BLAS, EUCLIDEAN LENGTH, EUCLIDEAN NORM, L2,
565C             LINEAR ALGEBRA, UNITARY, VECTOR
566C***AUTHOR  Lawson, C. L., (JPL)
567C           Hanson, R. J., (SNLA)
568C           Kincaid, D. R., (U. of Texas)
569C           Krogh, F. T., (JPL)
570C***DESCRIPTION
571C
572C                B L A S  Subprogram
573C    Description of parameters
574C
575C     --Input--
576C        N  number of elements in input vector(s)
577C       DX  double precision vector with N elements
578C     INCX  storage spacing between elements of DX
579C
580C     --Output--
581C    DNRM2  double precision result (zero if N .LE. 0)
582C
583C     Euclidean norm of the N-vector stored in DX with storage
584C     increment INCX.
585C     If N .LE. 0, return with result = 0.
586C     If N .GE. 1, then INCX must be .GE. 1
587C
588C     Four phase method using two built-in constants that are
589C     hopefully applicable to all machines.
590C         CUTLO = maximum of  SQRT(U/EPS)  over all known machines.
591C         CUTHI = minimum of  SQRT(V)      over all known machines.
592C     where
593C         EPS = smallest no. such that EPS + 1. .GT. 1.
594C         U   = smallest positive no.   (underflow limit)
595C         V   = largest  no.            (overflow  limit)
596C
597C     Brief outline of algorithm.
598C
599C     Phase 1 scans zero components.
600C     move to phase 2 when a component is nonzero and .LE. CUTLO
601C     move to phase 3 when a component is .GT. CUTLO
602C     move to phase 4 when a component is .GE. CUTHI/M
603C     where M = N for X() real and M = 2*N for complex.
604C
605C     Values for CUTLO and CUTHI.
606C     From the environmental parameters listed in the IMSL converter
607C     document the limiting values are as follows:
608C     CUTLO, S.P.   U/EPS = 2**(-102) for  Honeywell.  Close seconds are
609C                   Univac and DEC at 2**(-103)
610C                   Thus CUTLO = 2**(-51) = 4.44089E-16
611C     CUTHI, S.P.   V = 2**127 for Univac, Honeywell, and DEC.
612C                   Thus CUTHI = 2**(63.5) = 1.30438E19
613C     CUTLO, D.P.   U/EPS = 2**(-67) for Honeywell and DEC.
614C                   Thus CUTLO = 2**(-33.5) = 8.23181D-11
615C     CUTHI, D.P.   same as S.P.  CUTHI = 1.30438D19
616C     DATA CUTLO, CUTHI /8.232D-11,  1.304D19/
617C     DATA CUTLO, CUTHI /4.441E-16,  1.304E19/
618C
619C***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
620C                 Krogh, Basic linear algebra subprograms for Fortran
621C                 usage, Algorithm No. 539, Transactions on Mathematical
622C                 Software 5, 3 (September 1979), pp. 308-323.
623C***ROUTINES CALLED  (NONE)
624C***REVISION HISTORY  (YYMMDD)
625C   791001  DATE WRITTEN
626C   890531  Changed all specific intrinsics to generic.  (WRB)
627C   890831  Modified array declarations.  (WRB)
628C   890831  REVISION DATE from Version 3.2
629C   891214  Prologue converted to Version 4.0 format.  (BAB)
630C   920501  Reformatted the REFERENCES section.  (WRB)
631C***END PROLOGUE  DNRM2
632      INTEGER NEXT
633      DOUBLE PRECISION DX(*), CUTLO, CUTHI, HITEST, SUM, XMAX, ZERO,
634     +                 ONE
635      SAVE CUTLO, CUTHI, ZERO, ONE
636      DATA ZERO, ONE /0.0D0, 1.0D0/
637C
638      DATA CUTLO, CUTHI /8.232D-11,  1.304D19/
639C***FIRST EXECUTABLE STATEMENT  DNRM2
640      IF (N .GT. 0) GO TO 10
641         DNRM2  = ZERO
642         GO TO 300
643C
644   10 ASSIGN 30 TO NEXT
645      SUM = ZERO
646      NN = N * INCX
647C
648C                                                 BEGIN MAIN LOOP
649C
650      I = 1
651   20    GO TO NEXT,(30, 50, 70, 110)
652   30 IF (ABS(DX(I)) .GT. CUTLO) GO TO 85
653      ASSIGN 50 TO NEXT
654      XMAX = ZERO
655C
656C                        PHASE 1.  SUM IS ZERO
657C
658   50 IF (DX(I) .EQ. ZERO) GO TO 200
659      IF (ABS(DX(I)) .GT. CUTLO) GO TO 85
660C
661C                                PREPARE FOR PHASE 2.
662C
663      ASSIGN 70 TO NEXT
664      GO TO 105
665C
666C                                PREPARE FOR PHASE 4.
667C
668  100 I = J
669      ASSIGN 110 TO NEXT
670      SUM = (SUM / DX(I)) / DX(I)
671  105 XMAX = ABS(DX(I))
672      GO TO 115
673C
674C                   PHASE 2.  SUM IS SMALL.
675C                             SCALE TO AVOID DESTRUCTIVE UNDERFLOW.
676C
677   70 IF (ABS(DX(I)) .GT. CUTLO) GO TO 75
678C
679C                     COMMON CODE FOR PHASES 2 AND 4.
680C                     IN PHASE 4 SUM IS LARGE.  SCALE TO AVOID OVERFLOW.
681C
682  110 IF (ABS(DX(I)) .LE. XMAX) GO TO 115
683         SUM = ONE + SUM * (XMAX / DX(I))**2
684         XMAX = ABS(DX(I))
685         GO TO 200
686C
687  115 SUM = SUM + (DX(I)/XMAX)**2
688      GO TO 200
689C
690C                  PREPARE FOR PHASE 3.
691C
692   75 SUM = (SUM * XMAX) * XMAX
693C
694C     FOR REAL OR D.P. SET HITEST = CUTHI/N
695C     FOR COMPLEX      SET HITEST = CUTHI/(2*N)
696C
697   85 HITEST = CUTHI / N
698C
699C                   PHASE 3.  SUM IS MID-RANGE.  NO SCALING.
700C
701      DO 95 J = I,NN,INCX
702      IF (ABS(DX(J)) .GE. HITEST) GO TO 100
703   95    SUM = SUM + DX(J)**2
704      DNRM2 = SQRT(SUM)
705      GO TO 300
706C
707  200 CONTINUE
708      I = I + INCX
709      IF (I .LE. NN) GO TO 20
710C
711C              END OF MAIN LOOP.
712C
713C              COMPUTE SQUARE ROOT AND ADJUST FOR SCALING.
714C
715      DNRM2 = XMAX * SQRT(SUM)
716  300 CONTINUE
717      RETURN
718      END
719*DECK DPIGMR
720      SUBROUTINE DPIGMR (N, R0, SR, SZ, JSCAL, MAXL, MAXLP1, KMP, NRSTS,
721     +   JPRE, MATVEC, MSOLVE, NMSL, Z, V, HES, Q, LGMR, RPAR, IPAR, WK,
722     +   DL, RHOL, NRMAX, B, BNRM, X, XL, ITOL, TOL, NELT, IA, JA, A,
723     +   ISYM, IUNIT, IFLAG, ERR)
724C***BEGIN PROLOGUE  DPIGMR
725C***SUBSIDIARY
726C***PURPOSE  Internal routine for DGMRES.
727C***LIBRARY   SLATEC (SLAP)
728C***CATEGORY  D2A4, D2B4
729C***TYPE      DOUBLE PRECISION (SPIGMR-S, DPIGMR-D)
730C***KEYWORDS  GENERALIZED MINIMUM RESIDUAL, ITERATIVE PRECONDITION,
731C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
732C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
733C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
734C           Seager, Mark K., (LLNL), seager@llnl.gov
735C             Lawrence Livermore National Laboratory
736C             PO Box 808, L-60
737C             Livermore, CA 94550 (510) 423-3141
738C***DESCRIPTION
739C         This routine solves the linear system A * Z = R0 using a
740C         scaled preconditioned version of the generalized minimum
741C         residual method.  An initial guess of Z = 0 is assumed.
742C
743C *Usage:
744C      INTEGER N, JSCAL, MAXL, MAXLP1, KMP, NRSTS, JPRE, NMSL, LGMR
745C      INTEGER IPAR(USER DEFINED), NRMAX, ITOL, NELT, IA(NELT), JA(NELT)
746C      INTEGER ISYM, IUNIT, IFLAG
747C      DOUBLE PRECISION R0(N), SR(N), SZ(N), Z(N), V(N,MAXLP1),
748C     $                 HES(MAXLP1,MAXL), Q(2*MAXL), RPAR(USER DEFINED),
749C     $                 WK(N), DL(N), RHOL, B(N), BNRM, X(N), XL(N),
750C     $                 TOL, A(NELT), ERR
751C      EXTERNAL MATVEC, MSOLVE
752C
753C      CALL DPIGMR(N, R0, SR, SZ, JSCAL, MAXL, MAXLP1, KMP,
754C     $     NRSTS, JPRE, MATVEC, MSOLVE, NMSL, Z, V, HES, Q, LGMR,
755C     $     RPAR, IPAR, WK, DL, RHOL, NRMAX, B, BNRM, X, XL,
756C     $     ITOL, TOL, NELT, IA, JA, A, ISYM, IUNIT, IFLAG, ERR)
757C
758C *Arguments:
759C N      :IN       Integer
760C         The order of the matrix A, and the lengths
761C         of the vectors SR, SZ, R0 and Z.
762C R0     :IN       Double Precision R0(N)
763C         R0 = the right hand side of the system A*Z = R0.
764C         R0 is also used as workspace when computing
765C         the final approximation.
766C         (R0 is the same as V(*,MAXL+1) in the call to DPIGMR.)
767C SR     :IN       Double Precision SR(N)
768C         SR is a vector of length N containing the non-zero
769C         elements of the diagonal scaling matrix for R0.
770C SZ     :IN       Double Precision SZ(N)
771C         SZ is a vector of length N containing the non-zero
772C         elements of the diagonal scaling matrix for Z.
773C JSCAL  :IN       Integer
774C         A flag indicating whether arrays SR and SZ are used.
775C         JSCAL=0 means SR and SZ are not used and the
776C                 algorithm will perform as if all
777C                 SR(i) = 1 and SZ(i) = 1.
778C         JSCAL=1 means only SZ is used, and the algorithm
779C                 performs as if all SR(i) = 1.
780C         JSCAL=2 means only SR is used, and the algorithm
781C                 performs as if all SZ(i) = 1.
782C         JSCAL=3 means both SR and SZ are used.
783C MAXL   :IN       Integer
784C         The maximum allowable order of the matrix H.
785C MAXLP1 :IN       Integer
786C         MAXPL1 = MAXL + 1, used for dynamic dimensioning of HES.
787C KMP    :IN       Integer
788C         The number of previous vectors the new vector VNEW
789C         must be made orthogonal to.  (KMP .le. MAXL)
790C NRSTS  :IN       Integer
791C         Counter for the number of restarts on the current
792C         call to DGMRES.  If NRSTS .gt. 0, then the residual
793C         R0 is already scaled, and so scaling of it is
794C         not necessary.
795C JPRE   :IN       Integer
796C         Preconditioner type flag.
797C MATVEC :EXT      External.
798C         Name of a routine which performs the matrix vector multiply
799C         Y = A*X given A and X.  The name of the MATVEC routine must
800C         be declared external in the calling program.  The calling
801C         sequence to MATVEC is:
802C             CALL MATVEC(N, X, Y, NELT, IA, JA, A, ISYM)
803C         where N is the number of unknowns, Y is the product A*X
804C         upon return, X is an input vector, and NELT is the number of
805C         non-zeros in the SLAP IA, JA, A storage for the matrix A.
806C         ISYM is a flag which, if non-zero, denotes that A is
807C         symmetric and only the lower or upper triangle is stored.
808C MSOLVE :EXT      External.
809C         Name of the routine which solves a linear system Mz = r for
810C         z given r with the preconditioning matrix M (M is supplied via
811C         RPAR and IPAR arrays.  The name of the MSOLVE routine must
812C         be declared external in the calling program.  The calling
813C         sequence to MSOLVE is:
814C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RPAR, IPAR)
815C         Where N is the number of unknowns, R is the right-hand side
816C         vector and Z is the solution upon return.  NELT, IA, JA, A and
817C         ISYM are defined as below.  RPAR is a double precision array
818C         that can be used to pass necessary preconditioning information
819C         and/or workspace to MSOLVE.  IPAR is an integer work array
820C         for the same purpose as RPAR.
821C NMSL   :OUT      Integer
822C         The number of calls to MSOLVE.
823C Z      :OUT      Double Precision Z(N)
824C         The final computed approximation to the solution
825C         of the system A*Z = R0.
826C V      :OUT      Double Precision V(N,MAXLP1)
827C         The N by (LGMR+1) array containing the LGMR
828C         orthogonal vectors V(*,1) to V(*,LGMR).
829C HES    :OUT      Double Precision HES(MAXLP1,MAXL)
830C         The upper triangular factor of the QR decomposition
831C         of the (LGMR+1) by LGMR upper Hessenberg matrix whose
832C         entries are the scaled inner-products of A*V(*,I)
833C         and V(*,K).
834C Q      :OUT      Double Precision Q(2*MAXL)
835C         A double precision array of length 2*MAXL containing the
836C         components of the Givens rotations used in the QR
837C         decomposition of HES.  It is loaded in DHEQR and used in
838C         DHELS.
839C LGMR   :OUT      Integer
840C         The number of iterations performed and
841C         the current order of the upper Hessenberg
842C         matrix HES.
843C RPAR   :IN       Double Precision RPAR(USER DEFINED)
844C         Double Precision workspace passed directly to the MSOLVE
845C         routine.
846C IPAR   :IN       Integer IPAR(USER DEFINED)
847C         Integer workspace passed directly to the MSOLVE routine.
848C WK     :IN       Double Precision WK(N)
849C         A double precision work array of length N used by routines
850C         MATVEC and MSOLVE.
851C DL     :INOUT    Double Precision DL(N)
852C         On input, a double precision work array of length N used for
853C         calculation of the residual norm RHO when the method is
854C         incomplete (KMP.lt.MAXL), and/or when using restarting.
855C         On output, the scaled residual vector RL.  It is only loaded
856C         when performing restarts of the Krylov iteration.
857C RHOL   :OUT      Double Precision
858C         A double precision scalar containing the norm of the final
859C         residual.
860C NRMAX  :IN       Integer
861C         The maximum number of restarts of the Krylov iteration.
862C         NRMAX .gt. 0 means restarting is active, while
863C         NRMAX = 0 means restarting is not being used.
864C B      :IN       Double Precision B(N)
865C         The right hand side of the linear system A*X = b.
866C BNRM   :IN       Double Precision
867C         The scaled norm of b.
868C X      :IN       Double Precision X(N)
869C         The current approximate solution as of the last
870C         restart.
871C XL     :IN       Double Precision XL(N)
872C         An array of length N used to hold the approximate
873C         solution X(L) when ITOL=11.
874C ITOL   :IN       Integer
875C         A flag to indicate the type of convergence criterion
876C         used.  See the driver for its description.
877C TOL    :IN       Double Precision
878C         The tolerance on residuals R0-A*Z in scaled norm.
879C NELT   :IN       Integer
880C         The length of arrays IA, JA and A.
881C IA     :IN       Integer IA(NELT)
882C         An integer array of length NELT containing matrix data.
883C         It is passed directly to the MATVEC and MSOLVE routines.
884C JA     :IN       Integer JA(NELT)
885C         An integer array of length NELT containing matrix data.
886C         It is passed directly to the MATVEC and MSOLVE routines.
887C A      :IN       Double Precision A(NELT)
888C         A double precision array of length NELT containing matrix
889C         data. It is passed directly to the MATVEC and MSOLVE routines.
890C ISYM   :IN       Integer
891C         A flag to indicate symmetric matrix storage.
892C         If ISYM=0, all non-zero entries of the matrix are
893C         stored.  If ISYM=1, the matrix is symmetric and
894C         only the upper or lower triangular part is stored.
895C IUNIT  :IN       Integer
896C         The i/o unit number for writing intermediate residual
897C         norm values.
898C IFLAG  :OUT      Integer
899C         An integer error flag..
900C         0 means convergence in LGMR iterations, LGMR.le.MAXL.
901C         1 means the convergence test did not pass in MAXL
902C           iterations, but the residual norm is .lt. norm(R0),
903C           and so Z is computed.
904C         2 means the convergence test did not pass in MAXL
905C           iterations, residual .ge. norm(R0), and Z = 0.
906C ERR    :OUT      Double Precision.
907C         Error estimate of error in final approximate solution, as
908C         defined by ITOL.
909C
910C *Cautions:
911C     This routine will attempt to write to the Fortran logical output
912C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
913C     this logical unit is attached to a file or terminal before calling
914C     this routine with a non-zero value for IUNIT.  This routine does
915C     not check for the validity of a non-zero IUNIT unit number.
916C
917C***SEE ALSO  DGMRES
918C***ROUTINES CALLED  DAXPY, DCOPY, DHELS, DHEQR, DNRM2, DORTH, DRLCAL,
919C                    DSCAL, ISDGMR
920C***REVISION HISTORY  (YYMMDD)
921C   890404  DATE WRITTEN
922C   890404  Previous REVISION DATE
923C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
924C   890922  Numerous changes to prologue to make closer to SLATEC
925C           standard.  (FNF)
926C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
927C   910411  Prologue converted to Version 4.0 format.  (BAB)
928C   910502  Removed MATVEC and MSOLVE from ROUTINES CALLED list.  (FNF)
929C   910506  Made subsidiary to DGMRES.  (FNF)
930C   920511  Added complete declaration section.  (WRB)
931C***END PROLOGUE  DPIGMR
932C         The following is for optimized compilation on LLNL/LTSS Crays.
933CLLL. OPTIMIZE
934C     .. Scalar Arguments ..
935      DOUBLE PRECISION BNRM, ERR, RHOL, TOL
936      INTEGER IFLAG, ISYM, ITOL, IUNIT, JPRE, JSCAL, KMP, LGMR, MAXL,
937     +        MAXLP1, N, NELT, NMSL, NRMAX, NRSTS
938C     .. Array Arguments ..
939      DOUBLE PRECISION A(NELT), B(*), DL(*), HES(MAXLP1,*), Q(*), R0(*),
940     +                 RPAR(*), SR(*), SZ(*), V(N,*), WK(*), X(*),
941     +                 XL(*), Z(*)
942      INTEGER IA(NELT), IPAR(*), JA(NELT)
943C     .. Subroutine Arguments ..
944      EXTERNAL MATVEC, MSOLVE
945C     .. Local Scalars ..
946      DOUBLE PRECISION C, DLNRM, PROD, R0NRM, RHO, S, SNORMW, TEM
947      INTEGER I, I2, INFO, IP1, ITER, ITMAX, J, K, LL, LLP1
948C     .. External Functions ..
949      DOUBLE PRECISION DNRM2
950      INTEGER ISDGMR
951      EXTERNAL DNRM2, ISDGMR
952C     .. External Subroutines ..
953      EXTERNAL DAXPY, DCOPY, DHELS, DHEQR, DORTH, DRLCAL, DSCAL
954C     .. Intrinsic Functions ..
955      INTRINSIC ABS
956C***FIRST EXECUTABLE STATEMENT  DPIGMR
957C
958C         Zero out the Z array.
959C
960      DO 5 I = 1,N
961         Z(I) = 0
962 5    CONTINUE
963C
964      IFLAG = 0
965      LGMR = 0
966      NMSL = 0
967C         Load ITMAX, the maximum number of iterations.
968      ITMAX =(NRMAX+1)*MAXL
969C   -------------------------------------------------------------------
970C         The initial residual is the vector R0.
971C         Apply left precon. if JPRE < 0 and this is not a restart.
972C         Apply scaling to R0 if JSCAL = 2 or 3.
973C   -------------------------------------------------------------------
974      IF ((JPRE .LT. 0) .AND.(NRSTS .EQ. 0)) THEN
975         CALL DCOPY(N, R0, 1, WK, 1)
976         CALL MSOLVE(N, WK, R0, NELT, IA, JA, A, ISYM, RPAR, IPAR)
977         NMSL = NMSL + 1
978      ENDIF
979      IF (((JSCAL.EQ.2) .OR.(JSCAL.EQ.3)) .AND.(NRSTS.EQ.0)) THEN
980         DO 10 I = 1,N
981            V(I,1) = R0(I)*SR(I)
982 10      CONTINUE
983      ELSE
984         DO 20 I = 1,N
985            V(I,1) = R0(I)
986 20      CONTINUE
987      ENDIF
988      R0NRM = DNRM2(N, V, 1)
989      ITER = NRSTS*MAXL
990C
991C         Call stopping routine ISDGMR.
992C
993      IF (ISDGMR(N, B, X, XL, NELT, IA, JA, A, ISYM, MSOLVE,
994     $    NMSL, ITOL, TOL, ITMAX, ITER, ERR, IUNIT, V(1,1), Z, WK,
995     $    RPAR, IPAR, R0NRM, BNRM, SR, SZ, JSCAL,
996     $    KMP, LGMR, MAXL, MAXLP1, V, Q, SNORMW, PROD, R0NRM,
997     $    HES, JPRE) .NE. 0) RETURN
998      TEM = 1.0D0/R0NRM
999      CALL DSCAL(N, TEM, V(1,1), 1)
1000C
1001C         Zero out the HES array.
1002C
1003      DO 50 J = 1,MAXL
1004         DO 40 I = 1,MAXLP1
1005            HES(I,J) = 0
1006 40      CONTINUE
1007 50   CONTINUE
1008C   -------------------------------------------------------------------
1009C         Main loop to compute the vectors V(*,2) to V(*,MAXL).
1010C         The running product PROD is needed for the convergence test.
1011C   -------------------------------------------------------------------
1012      PROD = 1
1013      DO 90 LL = 1,MAXL
1014         LGMR = LL
1015C   -------------------------------------------------------------------
1016C        Unscale  the  current V(LL)  and store  in WK.  Call routine
1017C        MSOLVE    to   compute(M-inverse)*WK,   where    M   is  the
1018C        preconditioner matrix.  Save the answer in Z.   Call routine
1019C        MATVEC to compute  VNEW  = A*Z,  where  A is  the the system
1020C        matrix.  save the answer in  V(LL+1).  Scale V(LL+1).   Call
1021C        routine DORTH  to  orthogonalize the    new vector VNEW   =
1022C        V(*,LL+1).  Call routine DHEQR to update the factors of HES.
1023C   -------------------------------------------------------------------
1024        IF ((JSCAL .EQ. 1) .OR.(JSCAL .EQ. 3)) THEN
1025           DO 60 I = 1,N
1026              WK(I) = V(I,LL)/SZ(I)
1027 60        CONTINUE
1028        ELSE
1029           CALL DCOPY(N, V(1,LL), 1, WK, 1)
1030        ENDIF
1031        IF (JPRE .GT. 0) THEN
1032           CALL MSOLVE(N, WK, Z, NELT, IA, JA, A, ISYM, RPAR, IPAR)
1033           NMSL = NMSL + 1
1034           CALL MATVEC(N, Z, V(1,LL+1), NELT, IA, JA, A, ISYM)
1035        ELSE
1036           CALL MATVEC(N, WK, V(1,LL+1), NELT, IA, JA, A, ISYM)
1037        ENDIF
1038        IF (JPRE .LT. 0) THEN
1039           CALL DCOPY(N, V(1,LL+1), 1, WK, 1)
1040           CALL MSOLVE(N,WK,V(1,LL+1),NELT,IA,JA,A,ISYM,RPAR,IPAR)
1041           NMSL = NMSL + 1
1042        ENDIF
1043        IF ((JSCAL .EQ. 2) .OR.(JSCAL .EQ. 3)) THEN
1044           DO 65 I = 1,N
1045              V(I,LL+1) = V(I,LL+1)*SR(I)
1046 65        CONTINUE
1047        ENDIF
1048        CALL DORTH(V(1,LL+1), V, HES, N, LL, MAXLP1, KMP, SNORMW)
1049        HES(LL+1,LL) = SNORMW
1050        CALL DHEQR(HES, MAXLP1, LL, Q, INFO, LL)
1051        IF (INFO .EQ. LL) GO TO 120
1052C   -------------------------------------------------------------------
1053C         Update RHO, the estimate of the norm of the residual R0-A*ZL.
1054C         If KMP <  MAXL, then the vectors V(*,1),...,V(*,LL+1) are not
1055C         necessarily orthogonal for LL > KMP.  The vector DL must then
1056C         be computed, and its norm used in the calculation of RHO.
1057C   -------------------------------------------------------------------
1058        PROD = PROD*Q(2*LL)
1059        RHO = ABS(PROD*R0NRM)
1060        IF ((LL.GT.KMP) .AND.(KMP.LT.MAXL)) THEN
1061           IF (LL .EQ. KMP+1) THEN
1062              CALL DCOPY(N, V(1,1), 1, DL, 1)
1063              DO 75 I = 1,KMP
1064                 IP1 = I + 1
1065                 I2 = I*2
1066                 S = Q(I2)
1067                 C = Q(I2-1)
1068                 DO 70 K = 1,N
1069                    DL(K) = S*DL(K) + C*V(K,IP1)
1070 70              CONTINUE
1071 75           CONTINUE
1072           ENDIF
1073           S = Q(2*LL)
1074           C = Q(2*LL-1)/SNORMW
1075           LLP1 = LL + 1
1076           DO 80 K = 1,N
1077              DL(K) = S*DL(K) + C*V(K,LLP1)
1078 80        CONTINUE
1079           DLNRM = DNRM2(N, DL, 1)
1080           RHO = RHO*DLNRM
1081        ENDIF
1082        RHOL = RHO
1083C   -------------------------------------------------------------------
1084C         Test for convergence.  If passed, compute approximation ZL.
1085C         If failed and LL < MAXL, then continue iterating.
1086C   -------------------------------------------------------------------
1087        ITER = NRSTS*MAXL + LGMR
1088        IF (ISDGMR(N, B, X, XL, NELT, IA, JA, A, ISYM, MSOLVE,
1089     $      NMSL, ITOL, TOL, ITMAX, ITER, ERR, IUNIT, DL, Z, WK,
1090     $      RPAR, IPAR, RHOL, BNRM, SR, SZ, JSCAL,
1091     $      KMP, LGMR, MAXL, MAXLP1, V, Q, SNORMW, PROD, R0NRM,
1092     $      HES, JPRE) .NE. 0) GO TO 200
1093        IF (LL .EQ. MAXL) GO TO 100
1094C   -------------------------------------------------------------------
1095C         Rescale so that the norm of V(1,LL+1) is one.
1096C   -------------------------------------------------------------------
1097        TEM = 1.0D0/SNORMW
1098        CALL DSCAL(N, TEM, V(1,LL+1), 1)
1099 90   CONTINUE
1100 100  CONTINUE
1101      IF (RHO .LT. R0NRM) GO TO 150
1102 120  CONTINUE
1103      IFLAG = 2
1104C
1105C         Load approximate solution with zero.
1106C
1107      DO 130 I = 1,N
1108         Z(I) = 0
1109 130  CONTINUE
1110      RETURN
1111 150  IFLAG = 1
1112C
1113C         Tolerance not met, but residual norm reduced.
1114C
1115      IF (NRMAX .GT. 0) THEN
1116C
1117C        If performing restarting (NRMAX > 0)  calculate the residual
1118C        vector RL and  store it in the DL  array.  If the incomplete
1119C        version is being used (KMP < MAXL) then DL has  already been
1120C        calculated up to a scaling factor.   Use DRLCAL to calculate
1121C        the scaled residual vector.
1122C
1123         CALL DRLCAL(N, KMP, MAXL, MAXL, V, Q, DL, SNORMW, PROD,
1124     $        R0NRM)
1125      ENDIF
1126C   -------------------------------------------------------------------
1127C         Compute the approximation ZL to the solution.  Since the
1128C         vector Z was used as workspace, and the initial guess
1129C         of the linear iteration is zero, Z must be reset to zero.
1130C   -------------------------------------------------------------------
1131 200  CONTINUE
1132      LL = LGMR
1133      LLP1 = LL + 1
1134      DO 210 K = 1,LLP1
1135         R0(K) = 0
1136 210  CONTINUE
1137      R0(1) = R0NRM
1138      CALL DHELS(HES, MAXLP1, LL, Q, R0)
1139      DO 220 K = 1,N
1140         Z(K) = 0
1141 220  CONTINUE
1142      DO 230 I = 1,LL
1143         CALL DAXPY(N, R0(I), V(1,I), 1, Z, 1)
1144 230  CONTINUE
1145      IF ((JSCAL .EQ. 1) .OR.(JSCAL .EQ. 3)) THEN
1146         DO 240 I = 1,N
1147            Z(I) = Z(I)/SZ(I)
1148 240     CONTINUE
1149      ENDIF
1150      IF (JPRE .GT. 0) THEN
1151         CALL DCOPY(N, Z, 1, WK, 1)
1152         CALL MSOLVE(N, WK, Z, NELT, IA, JA, A, ISYM, RPAR, IPAR)
1153         NMSL = NMSL + 1
1154      ENDIF
1155      RETURN
1156C------------- LAST LINE OF DPIGMR FOLLOWS ----------------------------
1157      END
1158*DECK DRLCAL
1159      SUBROUTINE DRLCAL (N, KMP, LL, MAXL, V, Q, RL, SNORMW, PROD,
1160     +   R0NRM)
1161C***BEGIN PROLOGUE  DRLCAL
1162C***SUBSIDIARY
1163C***PURPOSE  Internal routine for DGMRES.
1164C***LIBRARY   SLATEC (SLAP)
1165C***CATEGORY  D2A4, D2B4
1166C***TYPE      DOUBLE PRECISION (SRLCAL-S, DRLCAL-D)
1167C***KEYWORDS  GENERALIZED MINIMUM RESIDUAL, ITERATIVE PRECONDITION,
1168C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
1169C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
1170C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
1171C           Seager, Mark K., (LLNL), seager@llnl.gov
1172C             Lawrence Livermore National Laboratory
1173C             PO Box 808, L-60
1174C             Livermore, CA 94550 (510) 423-3141
1175C***DESCRIPTION
1176C         This routine calculates the scaled residual RL from the
1177C         V(I)'s.
1178C *Usage:
1179C      INTEGER N, KMP, LL, MAXL
1180C      DOUBLE PRECISION V(N,LL), Q(2*MAXL), RL(N), SNORMW, PROD, R0NORM
1181C
1182C      CALL DRLCAL(N, KMP, LL, MAXL, V, Q, RL, SNORMW, PROD, R0NRM)
1183C
1184C *Arguments:
1185C N      :IN       Integer
1186C         The order of the matrix A, and the lengths
1187C         of the vectors SR, SZ, R0 and Z.
1188C KMP    :IN       Integer
1189C         The number of previous V vectors the new vector VNEW
1190C         must be made orthogonal to. (KMP .le. MAXL)
1191C LL     :IN       Integer
1192C         The current dimension of the Krylov subspace.
1193C MAXL   :IN       Integer
1194C         The maximum dimension of the Krylov subspace.
1195C V      :IN       Double Precision V(N,LL)
1196C         The N x LL array containing the orthogonal vectors
1197C         V(*,1) to V(*,LL).
1198C Q      :IN       Double Precision Q(2*MAXL)
1199C         A double precision array of length 2*MAXL containing the
1200C         components of the Givens rotations used in the QR
1201C         decomposition of HES.  It is loaded in DHEQR and used in
1202C         DHELS.
1203C RL     :OUT      Double Precision RL(N)
1204C         The residual vector RL.  This is either SB*(B-A*XL) if
1205C         not preconditioning or preconditioning on the right,
1206C         or SB*(M-inverse)*(B-A*XL) if preconditioning on the
1207C         left.
1208C SNORMW :IN       Double Precision
1209C         Scale factor.
1210C PROD   :IN       Double Precision
1211C         The product s1*s2*...*sl = the product of the sines of the
1212C         Givens rotations used in the QR factorization of
1213C         the Hessenberg matrix HES.
1214C R0NRM  :IN       Double Precision
1215C         The scaled norm of initial residual R0.
1216C
1217C***SEE ALSO  DGMRES
1218C***ROUTINES CALLED  DCOPY, DSCAL
1219C***REVISION HISTORY  (YYMMDD)
1220C   890404  DATE WRITTEN
1221C   890404  Previous REVISION DATE
1222C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
1223C   890922  Numerous changes to prologue to make closer to SLATEC
1224C           standard.  (FNF)
1225C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
1226C   910411  Prologue converted to Version 4.0 format.  (BAB)
1227C   910506  Made subsidiary to DGMRES.  (FNF)
1228C   920511  Added complete declaration section.  (WRB)
1229C***END PROLOGUE  DRLCAL
1230C         The following is for optimized compilation on LLNL/LTSS Crays.
1231CLLL. OPTIMIZE
1232C     .. Scalar Arguments ..
1233      DOUBLE PRECISION PROD, R0NRM, SNORMW
1234      INTEGER KMP, LL, MAXL, N
1235C     .. Array Arguments ..
1236      DOUBLE PRECISION Q(*), RL(N), V(N,*)
1237C     .. Local Scalars ..
1238      DOUBLE PRECISION C, S, TEM
1239      INTEGER I, I2, IP1, K, LLM1, LLP1
1240C     .. External Subroutines ..
1241      EXTERNAL DCOPY, DSCAL
1242C***FIRST EXECUTABLE STATEMENT  DRLCAL
1243      IF (KMP .EQ. MAXL) THEN
1244C
1245C         calculate RL.  Start by copying V(*,1) into RL.
1246C
1247         CALL DCOPY(N, V(1,1), 1, RL, 1)
1248         LLM1 = LL - 1
1249         DO 20 I = 1,LLM1
1250            IP1 = I + 1
1251            I2 = I*2
1252            S = Q(I2)
1253            C = Q(I2-1)
1254            DO 10 K = 1,N
1255               RL(K) = S*RL(K) + C*V(K,IP1)
1256 10         CONTINUE
1257 20      CONTINUE
1258         S = Q(2*LL)
1259         C = Q(2*LL-1)/SNORMW
1260         LLP1 = LL + 1
1261         DO 30 K = 1,N
1262            RL(K) = S*RL(K) + C*V(K,LLP1)
1263 30      CONTINUE
1264      ENDIF
1265C
1266C         When KMP < MAXL, RL vector already partially calculated.
1267C         Scale RL by R0NRM*PROD to obtain the residual RL.
1268C
1269      TEM = R0NRM*PROD
1270      CALL DSCAL(N, TEM, RL, 1)
1271      RETURN
1272C------------- LAST LINE OF DRLCAL FOLLOWS ----------------------------
1273      END
1274*DECK DAXPY
1275      SUBROUTINE DAXPY (N, DA, DX, INCX, DY, INCY)
1276      use omp_lib
1277C***BEGIN PROLOGUE  DAXPY
1278C***PURPOSE  Compute a constant times a vector plus a vector.
1279C***LIBRARY   SLATEC (BLAS)
1280C***CATEGORY  D1A7
1281C***TYPE      DOUBLE PRECISION (SAXPY-S, DAXPY-D, CAXPY-C)
1282C***KEYWORDS  BLAS, LINEAR ALGEBRA, TRIAD, VECTOR
1283C***AUTHOR  Lawson, C. L., (JPL)
1284C           Hanson, R. J., (SNLA)
1285C           Kincaid, D. R., (U. of Texas)
1286C           Krogh, F. T., (JPL)
1287C***DESCRIPTION
1288C
1289C                B L A S  Subprogram
1290C    Description of Parameters
1291C
1292C     --Input--
1293C        N  number of elements in input vector(s)
1294C       DA  double precision scalar multiplier
1295C       DX  double precision vector with N elements
1296C     INCX  storage spacing between elements of DX
1297C       DY  double precision vector with N elements
1298C     INCY  storage spacing between elements of DY
1299C
1300C     --Output--
1301C       DY  double precision result (unchanged if N .LE. 0)
1302C
1303C     Overwrite double precision DY with double precision DA*DX + DY.
1304C     For I = 0 to N-1, replace  DY(LY+I*INCY) with DA*DX(LX+I*INCX) +
1305C       DY(LY+I*INCY),
1306C     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
1307C     defined in a similar way using INCY.
1308C
1309C***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
1310C                 Krogh, Basic linear algebra subprograms for Fortran
1311C                 usage, Algorithm No. 539, Transactions on Mathematical
1312C                 Software 5, 3 (September 1979), pp. 308-323.
1313C***ROUTINES CALLED  (NONE)
1314C***REVISION HISTORY  (YYMMDD)
1315C   791001  DATE WRITTEN
1316C   890831  Modified array declarations.  (WRB)
1317C   890831  REVISION DATE from Version 3.2
1318C   891214  Prologue converted to Version 4.0 format.  (BAB)
1319C   920310  Corrected definition of LX in DESCRIPTION.  (WRB)
1320C   920501  Reformatted the REFERENCES section.  (WRB)
1321C***END PROLOGUE  DAXPY
1322      DOUBLE PRECISION DX(*), DY(*), DA
1323C***FIRST EXECUTABLE STATEMENT  DAXPY
1324      IF (N.LE.0 .OR. DA.EQ.0.0D0) RETURN
1325      IF (INCX .EQ. INCY) IF (INCX-1) 5,20,60
1326C
1327C     Code for unequal or nonpositive increments.
1328C
1329    5 IX = 1
1330      IY = 1
1331      IF (INCX .LT. 0) IX = (-N+1)*INCX + 1
1332      IF (INCY .LT. 0) IY = (-N+1)*INCY + 1
1333      DO 10 I = 1,N
1334        DY(IY) = DY(IY) + DA*DX(IX)
1335        IX = IX + INCX
1336        IY = IY + INCY
1337   10 CONTINUE
1338      RETURN
1339C
1340C     Code for both increments equal to 1.
1341C
1342C     Clean-up loop so remaining vector length is a multiple of 4.
1343C
1344   20 M = MOD(N,4)
1345      IF (M .EQ. 0) GO TO 40
1346      DO 30 I = 1,M
1347        DY(I) = DY(I) + DA*DX(I)
1348   30 CONTINUE
1349      IF (N .LT. 4) RETURN
1350   40 MP1 = M + 1
1351      DO 50 I = MP1,N,4
1352        DY(I) = DY(I) + DA*DX(I)
1353        DY(I+1) = DY(I+1) + DA*DX(I+1)
1354        DY(I+2) = DY(I+2) + DA*DX(I+2)
1355        DY(I+3) = DY(I+3) + DA*DX(I+3)
1356   50 CONTINUE
1357      RETURN
1358C
1359C     Code for equal, positive, non-unit increments.
1360C
1361   60 NS = N*INCX
1362      DO 70 I = 1,NS,INCX
1363        DY(I) = DA*DX(I) + DY(I)
1364   70 CONTINUE
1365      RETURN
1366      END
1367*DECK DCOPY
1368      SUBROUTINE DCOPY (N, DX, INCX, DY, INCY)
1369      use omp_lib
1370C***BEGIN PROLOGUE  DCOPY
1371C***PURPOSE  Copy a vector.
1372C***LIBRARY   SLATEC (BLAS)
1373C***CATEGORY  D1A5
1374C***TYPE      DOUBLE PRECISION (SCOPY-S, DCOPY-D, CCOPY-C, ICOPY-I)
1375C***KEYWORDS  BLAS, COPY, LINEAR ALGEBRA, VECTOR
1376C***AUTHOR  Lawson, C. L., (JPL)
1377C           Hanson, R. J., (SNLA)
1378C           Kincaid, D. R., (U. of Texas)
1379C           Krogh, F. T., (JPL)
1380C***DESCRIPTION
1381C
1382C                B L A S  Subprogram
1383C    Description of Parameters
1384C
1385C     --Input--
1386C        N  number of elements in input vector(s)
1387C       DX  double precision vector with N elements
1388C     INCX  storage spacing between elements of DX
1389C       DY  double precision vector with N elements
1390C     INCY  storage spacing between elements of DY
1391C
1392C     --Output--
1393C       DY  copy of vector DX (unchanged if N .LE. 0)
1394C
1395C     Copy double precision DX to double precision DY.
1396C     For I = 0 to N-1, copy DX(LX+I*INCX) to DY(LY+I*INCY),
1397C     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
1398C     defined in a similar way using INCY.
1399C
1400C***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
1401C                 Krogh, Basic linear algebra subprograms for Fortran
1402C                 usage, Algorithm No. 539, Transactions on Mathematical
1403C                 Software 5, 3 (September 1979), pp. 308-323.
1404C***ROUTINES CALLED  (NONE)
1405C***REVISION HISTORY  (YYMMDD)
1406C   791001  DATE WRITTEN
1407C   890831  Modified array declarations.  (WRB)
1408C   890831  REVISION DATE from Version 3.2
1409C   891214  Prologue converted to Version 4.0 format.  (BAB)
1410C   920310  Corrected definition of LX in DESCRIPTION.  (WRB)
1411C   920501  Reformatted the REFERENCES section.  (WRB)
1412C***END PROLOGUE  DCOPY
1413      DOUBLE PRECISION DX(*), DY(*)
1414C***FIRST EXECUTABLE STATEMENT  DCOPY
1415      IF (N .LE. 0) RETURN
1416      IF (INCX .EQ. INCY) IF (INCX-1) 5,20,60
1417C
1418C     Code for unequal or nonpositive increments.
1419C
1420    5 IX = 1
1421      IY = 1
1422      IF (INCX .LT. 0) IX = (-N+1)*INCX + 1
1423      IF (INCY .LT. 0) IY = (-N+1)*INCY + 1
1424      DO 10 I = 1,N
1425        DY(IY) = DX(IX)
1426        IX = IX + INCX
1427        IY = IY + INCY
1428   10 CONTINUE
1429      RETURN
1430C
1431C     Code for both increments equal to 1.
1432C
1433C     Clean-up loop so remaining vector length is a multiple of 7.
1434C
1435   20 M = MOD(N,7)
1436      IF (M .EQ. 0) GO TO 40
1437      DO 30 I = 1,M
1438        DY(I) = DX(I)
1439   30 CONTINUE
1440      IF (N .LT. 7) RETURN
1441   40 MP1 = M + 1
1442      DO 50 I = MP1,N,7
1443        DY(I) = DX(I)
1444        DY(I+1) = DX(I+1)
1445        DY(I+2) = DX(I+2)
1446        DY(I+3) = DX(I+3)
1447        DY(I+4) = DX(I+4)
1448        DY(I+5) = DX(I+5)
1449        DY(I+6) = DX(I+6)
1450   50 CONTINUE
1451      RETURN
1452C
1453C     Code for equal, positive, non-unit increments.
1454C
1455   60 NS = N*INCX
1456      DO 70 I = 1,NS,INCX
1457        DY(I) = DX(I)
1458   70 CONTINUE
1459      RETURN
1460      END
1461*DECK DHELS
1462      SUBROUTINE DHELS (A, LDA, N, Q, B)
1463C***BEGIN PROLOGUE  DHELS
1464C***SUBSIDIARY
1465C***PURPOSE  Internal routine for DGMRES.
1466C***LIBRARY   SLATEC (SLAP)
1467C***CATEGORY  D2A4, D2B4
1468C***TYPE      DOUBLE PRECISION (SHELS-S, DHELS-D)
1469C***KEYWORDS  GENERALIZED MINIMUM RESIDUAL, ITERATIVE PRECONDITION,
1470C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
1471C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
1472C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
1473C           Seager, Mark K., (LLNL), seager@llnl.gov
1474C             Lawrence Livermore National Laboratory
1475C             PO Box 808, L-60
1476C             Livermore, CA 94550 (510) 423-3141
1477C***DESCRIPTION
1478C        This routine is extracted from the LINPACK routine SGESL with
1479C        changes due to the fact that A is an upper Hessenberg matrix.
1480C
1481C        DHELS solves the least squares problem:
1482C
1483C                   MIN(B-A*X,B-A*X)
1484C
1485C        using the factors computed by DHEQR.
1486C
1487C *Usage:
1488C      INTEGER LDA, N
1489C      DOUBLE PRECISION A(LDA,N), Q(2*N), B(N+1)
1490C
1491C      CALL DHELS(A, LDA, N, Q, B)
1492C
1493C *Arguments:
1494C A       :IN       Double Precision A(LDA,N)
1495C          The output from DHEQR which contains the upper
1496C          triangular factor R in the QR decomposition of A.
1497C LDA     :IN       Integer
1498C          The leading dimension of the array A.
1499C N       :IN       Integer
1500C          A is originally an (N+1) by N matrix.
1501C Q       :IN       Double Precision Q(2*N)
1502C          The coefficients of the N Givens rotations
1503C          used in the QR factorization of A.
1504C B       :INOUT    Double Precision B(N+1)
1505C          On input, B is the right hand side vector.
1506C          On output, B is the solution vector X.
1507C
1508C***SEE ALSO  DGMRES
1509C***ROUTINES CALLED  DAXPY
1510C***REVISION HISTORY  (YYMMDD)
1511C   890404  DATE WRITTEN
1512C   890404  Previous REVISION DATE
1513C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
1514C   890922  Numerous changes to prologue to make closer to SLATEC
1515C           standard.  (FNF)
1516C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
1517C   910411  Prologue converted to Version 4.0 format.  (BAB)
1518C   910502  Added C***FIRST EXECUTABLE STATEMENT line.  (FNF)
1519C   910506  Made subsidiary to DGMRES.  (FNF)
1520C   920511  Added complete declaration section.  (WRB)
1521C***END PROLOGUE  DHELS
1522C         The following is for optimized compilation on LLNL/LTSS Crays.
1523CLLL. OPTIMIZE
1524C     .. Scalar Arguments ..
1525      INTEGER LDA, N
1526C     .. Array Arguments ..
1527      DOUBLE PRECISION A(LDA,*), B(*), Q(*)
1528C     .. Local Scalars ..
1529      DOUBLE PRECISION C, S, T, T1, T2
1530      INTEGER IQ, K, KB, KP1
1531C     .. External Subroutines ..
1532      EXTERNAL DAXPY
1533C***FIRST EXECUTABLE STATEMENT  DHELS
1534C
1535C         Minimize(B-A*X,B-A*X).  First form Q*B.
1536C
1537      DO 20 K = 1, N
1538         KP1 = K + 1
1539         IQ = 2*(K-1) + 1
1540         C = Q(IQ)
1541         S = Q(IQ+1)
1542         T1 = B(K)
1543         T2 = B(KP1)
1544         B(K) = C*T1 - S*T2
1545         B(KP1) = S*T1 + C*T2
1546 20   CONTINUE
1547C
1548C         Now solve  R*X = Q*B.
1549C
1550      DO 40 KB = 1, N
1551         K = N + 1 - KB
1552         B(K) = B(K)/A(K,K)
1553         T = -B(K)
1554         CALL DAXPY(K-1, T, A(1,K), 1, B(1), 1)
1555 40   CONTINUE
1556      RETURN
1557C------------- LAST LINE OF DHELS FOLLOWS ----------------------------
1558      END
1559*DECK DHEQR
1560      SUBROUTINE DHEQR (A, LDA, N, Q, INFO, IJOB)
1561C***BEGIN PROLOGUE  DHEQR
1562C***SUBSIDIARY
1563C***PURPOSE  Internal routine for DGMRES.
1564C***LIBRARY   SLATEC (SLAP)
1565C***CATEGORY  D2A4, D2B4
1566C***TYPE      DOUBLE PRECISION (SHEQR-S, DHEQR-D)
1567C***KEYWORDS  GENERALIZED MINIMUM RESIDUAL, ITERATIVE PRECONDITION,
1568C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
1569C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
1570C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
1571C           Seager, Mark K., (LLNL), seager@llnl.gov
1572C             Lawrence Livermore National Laboratory
1573C             PO Box 808, L-60
1574C             Livermore, CA 94550 (510) 423-3141
1575C***DESCRIPTION
1576C        This   routine  performs  a QR   decomposition  of an  upper
1577C        Hessenberg matrix A using Givens  rotations.  There  are two
1578C        options  available: 1)  Performing  a fresh decomposition 2)
1579C        updating the QR factors by adding a row and  a column to the
1580C        matrix A.
1581C
1582C *Usage:
1583C      INTEGER LDA, N, INFO, IJOB
1584C      DOUBLE PRECISION A(LDA,N), Q(2*N)
1585C
1586C      CALL DHEQR(A, LDA, N, Q, INFO, IJOB)
1587C
1588C *Arguments:
1589C A      :INOUT    Double Precision A(LDA,N)
1590C         On input, the matrix to be decomposed.
1591C         On output, the upper triangular matrix R.
1592C         The factorization can be written Q*A = R, where
1593C         Q is a product of Givens rotations and R is upper
1594C         triangular.
1595C LDA    :IN       Integer
1596C         The leading dimension of the array A.
1597C N      :IN       Integer
1598C         A is an (N+1) by N Hessenberg matrix.
1599C Q      :OUT      Double Precision Q(2*N)
1600C         The factors c and s of each Givens rotation used
1601C         in decomposing A.
1602C INFO   :OUT      Integer
1603C         = 0  normal value.
1604C         = K  if  A(K,K) .eq. 0.0 .  This is not an error
1605C           condition for this subroutine, but it does
1606C           indicate that DHELS will divide by zero
1607C           if called.
1608C IJOB   :IN       Integer
1609C         = 1     means that a fresh decomposition of the
1610C                 matrix A is desired.
1611C         .ge. 2  means that the current decomposition of A
1612C                 will be updated by the addition of a row
1613C                 and a column.
1614C
1615C***SEE ALSO  DGMRES
1616C***ROUTINES CALLED  (NONE)
1617C***REVISION HISTORY  (YYMMDD)
1618C   890404  DATE WRITTEN
1619C   890404  Previous REVISION DATE
1620C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
1621C   890922  Numerous changes to prologue to make closer to SLATEC
1622C           standard.  (FNF)
1623C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
1624C   910411  Prologue converted to Version 4.0 format.  (BAB)
1625C   910506  Made subsidiary to DGMRES.  (FNF)
1626C   920511  Added complete declaration section.  (WRB)
1627C***END PROLOGUE  DHEQR
1628C         The following is for optimized compilation on LLNL/LTSS Crays.
1629CLLL. OPTIMIZE
1630C     .. Scalar Arguments ..
1631      INTEGER IJOB, INFO, LDA, N
1632C     .. Array Arguments ..
1633      DOUBLE PRECISION A(LDA,*), Q(*)
1634C     .. Local Scalars ..
1635      DOUBLE PRECISION C, S, T, T1, T2
1636      INTEGER I, IQ, J, K, KM1, KP1, NM1
1637C     .. Intrinsic Functions ..
1638      INTRINSIC ABS, SQRT
1639C***FIRST EXECUTABLE STATEMENT  DHEQR
1640      IF (IJOB .GT. 1) GO TO 70
1641C   -------------------------------------------------------------------
1642C         A new factorization is desired.
1643C   -------------------------------------------------------------------
1644C         QR decomposition without pivoting.
1645C
1646      INFO = 0
1647      DO 60 K = 1, N
1648         KM1 = K - 1
1649         KP1 = K + 1
1650C
1651C           Compute K-th column of R.
1652C           First, multiply the K-th column of A by the previous
1653C           K-1 Givens rotations.
1654C
1655         IF (KM1 .LT. 1) GO TO 20
1656         DO 10 J = 1, KM1
1657            I = 2*(J-1) + 1
1658            T1 = A(J,K)
1659            T2 = A(J+1,K)
1660            C = Q(I)
1661            S = Q(I+1)
1662            A(J,K) = C*T1 - S*T2
1663            A(J+1,K) = S*T1 + C*T2
1664 10      CONTINUE
1665C
1666C         Compute Givens components C and S.
1667C
1668 20      CONTINUE
1669         IQ = 2*KM1 + 1
1670         T1 = A(K,K)
1671         T2 = A(KP1,K)
1672         IF( T2.EQ.0.0D0 ) THEN
1673            C = 1
1674            S = 0
1675         ELSEIF( ABS(T2).GE.ABS(T1) ) THEN
1676            T = T1/T2
1677            S = -1.0D0/SQRT(1.0D0+T*T)
1678            C = -S*T
1679         ELSE
1680            T = T2/T1
1681            C = 1.0D0/SQRT(1.0D0+T*T)
1682            S = -C*T
1683         ENDIF
1684         Q(IQ) = C
1685         Q(IQ+1) = S
1686         A(K,K) = C*T1 - S*T2
1687         IF( A(K,K).EQ.0.0D0 ) INFO = K
1688 60   CONTINUE
1689      RETURN
1690C   -------------------------------------------------------------------
1691C         The old factorization of a will be updated.  A row and a
1692C         column has been added to the matrix A.  N by N-1 is now
1693C         the old size of the matrix.
1694C   -------------------------------------------------------------------
1695 70   CONTINUE
1696      NM1 = N - 1
1697C   -------------------------------------------------------------------
1698C         Multiply the new column by the N previous Givens rotations.
1699C   -------------------------------------------------------------------
1700      DO 100 K = 1,NM1
1701         I = 2*(K-1) + 1
1702         T1 = A(K,N)
1703         T2 = A(K+1,N)
1704         C = Q(I)
1705         S = Q(I+1)
1706         A(K,N) = C*T1 - S*T2
1707         A(K+1,N) = S*T1 + C*T2
1708 100  CONTINUE
1709C   -------------------------------------------------------------------
1710C         Complete update of decomposition by forming last Givens
1711C         rotation, and multiplying it times the column
1712C         vector(A(N,N),A(NP1,N)).
1713C   -------------------------------------------------------------------
1714      INFO = 0
1715      T1 = A(N,N)
1716      T2 = A(N+1,N)
1717      IF ( T2.EQ.0.0D0 ) THEN
1718         C = 1
1719         S = 0
1720      ELSEIF( ABS(T2).GE.ABS(T1) ) THEN
1721         T = T1/T2
1722         S = -1.0D0/SQRT(1.0D0+T*T)
1723         C = -S*T
1724      ELSE
1725         T = T2/T1
1726         C = 1.0D0/SQRT(1.0D0+T*T)
1727         S = -C*T
1728      ENDIF
1729      IQ = 2*N - 1
1730      Q(IQ) = C
1731      Q(IQ+1) = S
1732      A(N,N) = C*T1 - S*T2
1733      IF (A(N,N) .EQ. 0.0D0) INFO = N
1734      RETURN
1735C------------- LAST LINE OF DHEQR FOLLOWS ----------------------------
1736      END
1737*DECK ISDGMR
1738      INTEGER FUNCTION ISDGMR (N, B, X, XL, NELT, IA, JA, A, ISYM,
1739     +   MSOLVE, NMSL, ITOL, TOL, ITMAX, ITER, ERR, IUNIT, R, Z, DZ,
1740     +   RWORK, IWORK, RNRM, BNRM, SB, SX, JSCAL, KMP, LGMR, MAXL,
1741     +   MAXLP1, V, Q, SNORMW, PROD, R0NRM, HES, JPRE)
1742C***BEGIN PROLOGUE  ISDGMR
1743C***SUBSIDIARY
1744C***PURPOSE  Generalized Minimum Residual Stop Test.
1745C            This routine calculates the stop test for the Generalized
1746C            Minimum RESidual (GMRES) iteration scheme.  It returns a
1747C            non-zero if the error estimate (the type of which is
1748C            determined by ITOL) is less than the user specified
1749C            tolerance TOL.
1750C***LIBRARY   SLATEC (SLAP)
1751C***CATEGORY  D2A4, D2B4
1752C***TYPE      DOUBLE PRECISION (ISSGMR-S, ISDGMR-D)
1753C***KEYWORDS  GMRES, LINEAR SYSTEM, SLAP, SPARSE, STOP TEST
1754C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
1755C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
1756C           Seager, Mark K., (LLNL), seager@llnl.gov
1757C             Lawrence Livermore National Laboratory
1758C             PO Box 808, L-60
1759C             Livermore, CA 94550 (510) 423-3141
1760C***DESCRIPTION
1761C
1762C *Usage:
1763C      INTEGER N, NELT, IA(NELT), JA(NELT), ISYM, NMSL, ITOL
1764C      INTEGER ITMAX, ITER, IUNIT, IWORK(USER DEFINED), JSCAL
1765C      INTEGER KMP, LGMR, MAXL, MAXLP1, JPRE
1766C      DOUBLE PRECISION B(N), X(N), XL(MAXL), A(NELT), TOL, ERR,
1767C     $                 R(N), Z(N), DZ(N), RWORK(USER DEFINED),
1768C     $                 RNRM, BNRM, SB(N), SX(N), V(N,MAXLP1),
1769C     $                 Q(2*MAXL), SNORMW, PROD, R0NRM,
1770C     $                 HES(MAXLP1,MAXL)
1771C      EXTERNAL MSOLVE
1772C
1773C      IF (ISDGMR(N, B, X, XL, NELT, IA, JA, A, ISYM, MSOLVE,
1774C     $     NMSL, ITOL, TOL, ITMAX, ITER, ERR, IUNIT, R, Z, DZ,
1775C     $     RWORK, IWORK, RNRM, BNRM, SB, SX, JSCAL,
1776C     $     KMP, LGMR, MAXL, MAXLP1, V, Q, SNORMW, PROD, R0NRM,
1777C     $     HES, JPRE) .NE. 0) THEN ITERATION DONE
1778C
1779C *Arguments:
1780C N      :IN       Integer.
1781C         Order of the Matrix.
1782C B      :IN       Double Precision B(N).
1783C         Right-hand-side vector.
1784C X      :IN       Double Precision X(N).
1785C         Approximate solution vector as of the last restart.
1786C XL     :OUT      Double Precision XL(N)
1787C         An array of length N used to hold the approximate
1788C         solution as of the current iteration.  Only computed by
1789C         this routine when ITOL=11.
1790C NELT   :IN       Integer.
1791C         Number of Non-Zeros stored in A.
1792C IA     :IN       Integer IA(NELT).
1793C JA     :IN       Integer JA(NELT).
1794C A      :IN       Double Precision A(NELT).
1795C         These arrays contain the matrix data structure for A.
1796C         It could take any form.  See "Description", in the DGMRES,
1797C         DSLUGM and DSDGMR routines for more details.
1798C ISYM   :IN       Integer.
1799C         Flag to indicate symmetric storage format.
1800C         If ISYM=0, all non-zero entries of the matrix are stored.
1801C         If ISYM=1, the matrix is symmetric, and only the upper
1802C         or lower triangle of the matrix is stored.
1803C MSOLVE :EXT      External.
1804C         Name of a routine which solves a linear system Mz = r for  z
1805C         given r with the preconditioning matrix M (M is supplied via
1806C         RWORK and IWORK arrays.  The name of the MSOLVE routine must
1807C         be declared external in the calling program.  The calling
1808C         sequence to MSOLVE is:
1809C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
1810C         Where N is the number of unknowns, R is the right-hand side
1811C         vector and Z is the solution upon return.  NELT, IA, JA, A and
1812C         ISYM are defined as above.  RWORK is a double precision array
1813C         that can be used to pass necessary preconditioning information
1814C         and/or workspace to MSOLVE.  IWORK is an integer work array
1815C         for the same purpose as RWORK.
1816C NMSL   :INOUT    Integer.
1817C         A counter for the number of calls to MSOLVE.
1818C ITOL   :IN       Integer.
1819C         Flag to indicate the type of convergence criterion used.
1820C         ITOL=0  Means the  iteration stops when the test described
1821C                 below on  the  residual RL  is satisfied.  This is
1822C                 the  "Natural Stopping Criteria" for this routine.
1823C                 Other values  of   ITOL  cause  extra,   otherwise
1824C                 unnecessary, computation per iteration and     are
1825C                 therefore much less efficient.
1826C         ITOL=1  Means   the  iteration stops   when the first test
1827C                 described below on  the residual RL  is satisfied,
1828C                 and there  is either right  or  no preconditioning
1829C                 being used.
1830C         ITOL=2  Implies     that   the  user    is   using    left
1831C                 preconditioning, and the second stopping criterion
1832C                 below is used.
1833C         ITOL=3  Means the  iteration stops   when  the  third test
1834C                 described below on Minv*Residual is satisfied, and
1835C                 there is either left  or no  preconditioning begin
1836C                 used.
1837C         ITOL=11 is    often  useful  for   checking  and comparing
1838C                 different routines.  For this case, the  user must
1839C                 supply  the  "exact" solution or  a  very accurate
1840C                 approximation (one with  an  error much less  than
1841C                 TOL) through a common block,
1842C                     COMMON /DSLBLK/ SOLN( )
1843C                 If ITOL=11, iteration stops when the 2-norm of the
1844C                 difference between the iterative approximation and
1845C                 the user-supplied solution  divided by the  2-norm
1846C                 of the  user-supplied solution  is  less than TOL.
1847C                 Note that this requires  the  user to  set up  the
1848C                 "COMMON     /DSLBLK/ SOLN(LENGTH)"  in the calling
1849C                 routine.  The routine with this declaration should
1850C                 be loaded before the stop test so that the correct
1851C                 length is used by  the loader.  This procedure  is
1852C                 not standard Fortran and may not work correctly on
1853C                 your   system (although  it  has  worked  on every
1854C                 system the authors have tried).  If ITOL is not 11
1855C                 then this common block is indeed standard Fortran.
1856C TOL    :IN       Double Precision.
1857C         Convergence criterion, as described above.
1858C ITMAX  :IN       Integer.
1859C         Maximum number of iterations.
1860C ITER   :IN       Integer.
1861C         The iteration for which to check for convergence.
1862C ERR    :OUT      Double Precision.
1863C         Error estimate of error in final approximate solution, as
1864C         defined by ITOL.  Letting norm() denote the Euclidean
1865C         norm, ERR is defined as follows..
1866C
1867C         If ITOL=0, then ERR = norm(SB*(B-A*X(L)))/norm(SB*B),
1868C                               for right or no preconditioning, and
1869C                         ERR = norm(SB*(M-inverse)*(B-A*X(L)))/
1870C                                norm(SB*(M-inverse)*B),
1871C                               for left preconditioning.
1872C         If ITOL=1, then ERR = norm(SB*(B-A*X(L)))/norm(SB*B),
1873C                               since right or no preconditioning
1874C                               being used.
1875C         If ITOL=2, then ERR = norm(SB*(M-inverse)*(B-A*X(L)))/
1876C                                norm(SB*(M-inverse)*B),
1877C                               since left preconditioning is being
1878C                               used.
1879C         If ITOL=3, then ERR =  Max  |(Minv*(B-A*X(L)))(i)/x(i)|
1880C                               i=1,n
1881C         If ITOL=11, then ERR = norm(SB*(X(L)-SOLN))/norm(SB*SOLN).
1882C IUNIT  :IN       Integer.
1883C         Unit number on which to write the error at each iteration,
1884C         if this is desired for monitoring convergence.  If unit
1885C         number is 0, no writing will occur.
1886C R      :INOUT    Double Precision R(N).
1887C         Work array used in calling routine.  It contains
1888C         information necessary to compute the residual RL = B-A*XL.
1889C Z      :WORK     Double Precision Z(N).
1890C         Workspace used to hold the pseudo-residual M z = r.
1891C DZ     :WORK     Double Precision DZ(N).
1892C         Workspace used to hold temporary vector(s).
1893C RWORK  :WORK     Double Precision RWORK(USER DEFINED).
1894C         Double Precision array that can be used by MSOLVE.
1895C IWORK  :WORK     Integer IWORK(USER DEFINED).
1896C         Integer array that can be used by MSOLVE.
1897C RNRM   :IN       Double Precision.
1898C         Norm of the current residual.  Type of norm depends on ITOL.
1899C BNRM   :IN       Double Precision.
1900C         Norm of the right hand side.  Type of norm depends on ITOL.
1901C SB     :IN       Double Precision SB(N).
1902C         Scaling vector for B.
1903C SX     :IN       Double Precision SX(N).
1904C         Scaling vector for X.
1905C JSCAL  :IN       Integer.
1906C         Flag indicating if scaling arrays SB and SX are being
1907C         used in the calling routine DPIGMR.
1908C         JSCAL=0 means SB and SX are not used and the
1909C                 algorithm will perform as if all
1910C                 SB(i) = 1 and SX(i) = 1.
1911C         JSCAL=1 means only SX is used, and the algorithm
1912C                 performs as if all SB(i) = 1.
1913C         JSCAL=2 means only SB is used, and the algorithm
1914C                 performs as if all SX(i) = 1.
1915C         JSCAL=3 means both SB and SX are used.
1916C KMP    :IN       Integer
1917C         The number of previous vectors the new vector VNEW
1918C         must be made orthogonal to.  (KMP .le. MAXL)
1919C LGMR   :IN       Integer
1920C         The number of GMRES iterations performed on the current call
1921C         to DPIGMR (i.e., # iterations since the last restart) and
1922C         the current order of the upper Hessenberg
1923C         matrix HES.
1924C MAXL   :IN       Integer
1925C         The maximum allowable order of the matrix H.
1926C MAXLP1 :IN       Integer
1927C         MAXPL1 = MAXL + 1, used for dynamic dimensioning of HES.
1928C V      :IN       Double Precision V(N,MAXLP1)
1929C         The N by (LGMR+1) array containing the LGMR
1930C         orthogonal vectors V(*,1) to V(*,LGMR).
1931C Q      :IN       Double Precision Q(2*MAXL)
1932C         A double precision array of length 2*MAXL containing the
1933C         components of the Givens rotations used in the QR
1934C         decomposition of HES.
1935C SNORMW :IN       Double Precision
1936C         A scalar containing the scaled norm of VNEW before it
1937C         is renormalized in DPIGMR.
1938C PROD   :IN       Double Precision
1939C         The product s1*s2*...*sl = the product of the sines of the
1940C         Givens rotations used in the QR factorization of the
1941C         Hessenberg matrix HES.
1942C R0NRM  :IN       Double Precision
1943C         The scaled norm of initial residual R0.
1944C HES    :IN       Double Precision HES(MAXLP1,MAXL)
1945C         The upper triangular factor of the QR decomposition
1946C         of the (LGMR+1) by LGMR upper Hessenberg matrix whose
1947C         entries are the scaled inner-products of A*V(*,I)
1948C         and V(*,K).
1949C JPRE   :IN       Integer
1950C         Preconditioner type flag.
1951C         (See description of IGWK(4) in DGMRES.)
1952C
1953C *Description
1954C       When using the GMRES solver,  the preferred value  for ITOL
1955C       is 0.  This is due to the fact that when ITOL=0 the norm of
1956C       the residual required in the stopping test is  obtained for
1957C       free, since this value is already  calculated  in the GMRES
1958C       algorithm.   The  variable  RNRM contains the   appropriate
1959C       norm, which is equal to norm(SB*(RL - A*XL))  when right or
1960C       no   preconditioning is  being  performed,   and equal   to
1961C       norm(SB*Minv*(RL - A*XL))  when using left preconditioning.
1962C       Here, norm() is the Euclidean norm.  Nonzero values of ITOL
1963C       require  additional work  to  calculate the  actual  scaled
1964C       residual  or its scaled/preconditioned  form,  and/or   the
1965C       approximate solution XL.  Hence, these values of  ITOL will
1966C       not be as efficient as ITOL=0.
1967C
1968C *Cautions:
1969C     This routine will attempt to write to the Fortran logical output
1970C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
1971C     this logical unit is attached to a file or terminal before calling
1972C     this routine with a non-zero value for IUNIT.  This routine does
1973C     not check for the validity of a non-zero IUNIT unit number.
1974C
1975C     This routine does not verify that ITOL has a valid value.
1976C     The calling routine should make such a test before calling
1977C     ISDGMR, as is done in DGMRES.
1978C
1979C***SEE ALSO  DGMRES
1980C***ROUTINES CALLED  D1MACH, DCOPY, DNRM2, DRLCAL, DSCAL, DXLCAL
1981C***COMMON BLOCKS    DSLBLK
1982C***REVISION HISTORY  (YYMMDD)
1983C   890404  DATE WRITTEN
1984C   890404  Previous REVISION DATE
1985C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
1986C   890922  Numerous changes to prologue to make closer to SLATEC
1987C           standard.  (FNF)
1988C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
1989C   910411  Prologue converted to Version 4.0 format.  (BAB)
1990C   910502  Corrected conversion errors, etc.  (FNF)
1991C   910502  Removed MSOLVE from ROUTINES CALLED list.  (FNF)
1992C   910506  Made subsidiary to DGMRES.  (FNF)
1993C   920407  COMMON BLOCK renamed DSLBLK.  (WRB)
1994C   920511  Added complete declaration section.  (WRB)
1995C   921026  Corrected D to E in output format.  (FNF)
1996C   921113  Corrected C***CATEGORY line.  (FNF)
1997C***END PROLOGUE  ISDGMR
1998C     .. Scalar Arguments ..
1999      DOUBLE PRECISION BNRM, ERR, PROD, R0NRM, RNRM, SNORMW, TOL
2000      INTEGER ISYM, ITER, ITMAX, ITOL, IUNIT, JPRE, JSCAL, KMP, LGMR,
2001     +        MAXL, MAXLP1, N, NELT, NMSL
2002C     .. Array Arguments ..
2003      DOUBLE PRECISION A(*), B(*), DZ(*), HES(MAXLP1, MAXL), Q(*), R(*),
2004     +                 RWORK(*), SB(*), SX(*), V(N,*), X(*), XL(*), Z(*)
2005      INTEGER IA(*), IWORK(*), JA(*)
2006C     .. Subroutine Arguments ..
2007      EXTERNAL MSOLVE
2008C     .. Arrays in Common ..
2009      DOUBLE PRECISION SOLN(1)
2010C     .. Local Scalars ..
2011      DOUBLE PRECISION DXNRM, FUZZ, RAT, RATMAX, SOLNRM, TEM
2012      INTEGER I, IELMAX
2013C     .. External Functions ..
2014      DOUBLE PRECISION D1MACH, DNRM2
2015      EXTERNAL D1MACH, DNRM2
2016C     .. External Subroutines ..
2017      EXTERNAL DCOPY, DRLCAL, DSCAL, DXLCAL
2018C     .. Intrinsic Functions ..
2019      INTRINSIC ABS, MAX, SQRT
2020C     .. Common blocks ..
2021      COMMON /DSLBLK/ SOLN
2022C     .. Save statement ..
2023      SAVE SOLNRM
2024C***FIRST EXECUTABLE STATEMENT  ISDGMR
2025      ISDGMR = 0
2026      IF ( ITOL.EQ.0 ) THEN
2027C
2028C       Use input from DPIGMR to determine if stop conditions are met.
2029C
2030         ERR = RNRM/BNRM
2031      ENDIF
2032      IF ( (ITOL.GT.0) .AND. (ITOL.LE.3) ) THEN
2033C
2034C       Use DRLCAL to calculate the scaled residual vector.
2035C       Store answer in R.
2036C
2037         IF ( LGMR.NE.0 ) CALL DRLCAL(N, KMP, LGMR, MAXL, V, Q, R,
2038     $                                SNORMW, PROD, R0NRM)
2039         IF ( ITOL.LE.2 ) THEN
2040C         err = ||Residual||/||RightHandSide||(2-Norms).
2041            ERR = DNRM2(N, R, 1)/BNRM
2042C
2043C         Unscale R by R0NRM*PROD when KMP < MAXL.
2044C
2045            IF ( (KMP.LT.MAXL) .AND. (LGMR.NE.0) ) THEN
2046               TEM = 1.0D0/(R0NRM*PROD)
2047               CALL DSCAL(N, TEM, R, 1)
2048            ENDIF
2049         ELSEIF ( ITOL.EQ.3 ) THEN
2050C         err = Max |(Minv*Residual)(i)/x(i)|
2051C         When JPRE .lt. 0, R already contains Minv*Residual.
2052            IF ( JPRE.GT.0 ) THEN
2053               CALL MSOLVE(N, R, DZ, NELT, IA, JA, A, ISYM, RWORK,
2054     $              IWORK)
2055               NMSL = NMSL + 1
2056            ENDIF
2057C
2058C         Unscale R by R0NRM*PROD when KMP < MAXL.
2059C
2060            IF ( (KMP.LT.MAXL) .AND. (LGMR.NE.0) ) THEN
2061               TEM = 1.0D0/(R0NRM*PROD)
2062               CALL DSCAL(N, TEM, R, 1)
2063            ENDIF
2064C
2065            FUZZ = D1MACH(1)
2066            IELMAX = 1
2067            RATMAX = ABS(DZ(1))/MAX(ABS(X(1)),FUZZ)
2068            DO 25 I = 2, N
2069               RAT = ABS(DZ(I))/MAX(ABS(X(I)),FUZZ)
2070               IF( RAT.GT.RATMAX ) THEN
2071                  IELMAX = I
2072                  RATMAX = RAT
2073               ENDIF
2074 25         CONTINUE
2075            ERR = RATMAX
2076            IF( RATMAX.LE.TOL ) ISDGMR = 1
2077            IF( IUNIT.GT.0 ) WRITE(IUNIT,1020) ITER, IELMAX, RATMAX
2078            RETURN
2079         ENDIF
2080      ENDIF
2081      IF ( ITOL.EQ.11 ) THEN
2082C
2083C       Use DXLCAL to calculate the approximate solution XL.
2084C
2085         IF ( (LGMR.NE.0) .AND. (ITER.GT.0) ) THEN
2086            CALL DXLCAL(N, LGMR, X, XL, XL, HES, MAXLP1, Q, V, R0NRM,
2087     $           DZ, SX, JSCAL, JPRE, MSOLVE, NMSL, RWORK, IWORK,
2088     $           NELT, IA, JA, A, ISYM)
2089         ELSEIF ( ITER.EQ.0 ) THEN
2090C         Copy X to XL to check if initial guess is good enough.
2091            CALL DCOPY(N, X, 1, XL, 1)
2092         ELSE
2093C         Return since this is the first call to DPIGMR on a restart.
2094            RETURN
2095         ENDIF
2096C
2097         IF ((JSCAL .EQ. 0) .OR.(JSCAL .EQ. 2)) THEN
2098C         err = ||x-TrueSolution||/||TrueSolution||(2-Norms).
2099            IF ( ITER.EQ.0 ) SOLNRM = DNRM2(N, SOLN, 1)
2100            DO 30 I = 1, N
2101               DZ(I) = XL(I) - SOLN(I)
2102 30         CONTINUE
2103            ERR = DNRM2(N, DZ, 1)/SOLNRM
2104         ELSE
2105            IF (ITER .EQ. 0) THEN
2106               SOLNRM = 0
2107               DO 40 I = 1,N
2108                  SOLNRM = SOLNRM + (SX(I)*SOLN(I))**2
2109 40            CONTINUE
2110               SOLNRM = SQRT(SOLNRM)
2111            ENDIF
2112            DXNRM = 0
2113            DO 50 I = 1,N
2114               DXNRM = DXNRM + (SX(I)*(XL(I)-SOLN(I)))**2
2115 50         CONTINUE
2116            DXNRM = SQRT(DXNRM)
2117C         err = ||SX*(x-TrueSolution)||/||SX*TrueSolution|| (2-Norms).
2118            ERR = DXNRM/SOLNRM
2119         ENDIF
2120      ENDIF
2121C
2122      IF( IUNIT.NE.0 ) THEN
2123         IF( ITER.EQ.0 ) THEN
2124            WRITE(IUNIT,1000) N, ITOL, MAXL, KMP
2125         ENDIF
2126         WRITE(IUNIT,1010) ITER, RNRM/BNRM, ERR
2127      ENDIF
2128      IF ( ERR.LE.TOL ) ISDGMR = 1
2129C
2130      RETURN
2131 1000 FORMAT(' Generalized Minimum Residual(',I3,I3,') for ',
2132     $     'N, ITOL = ',I5, I5,
2133     $     /' ITER','   Natural Err Est','   Error Estimate')
2134 1010 FORMAT(1X,I4,1X,D16.7,1X,D16.7)
2135 1020 FORMAT(1X,' ITER = ',I5, ' IELMAX = ',I5,
2136     $     ' |R(IELMAX)/X(IELMAX)| = ',D12.5)
2137C------------- LAST LINE OF ISDGMR FOLLOWS ----------------------------
2138      END
2139*DECK DORTH
2140      SUBROUTINE DORTH (VNEW, V, HES, N, LL, LDHES, KMP, SNORMW)
2141C***BEGIN PROLOGUE  DORTH
2142C***SUBSIDIARY
2143C***PURPOSE  Internal routine for DGMRES.
2144C***LIBRARY   SLATEC (SLAP)
2145C***CATEGORY  D2A4, D2B4
2146C***TYPE      DOUBLE PRECISION (SORTH-S, DORTH-D)
2147C***KEYWORDS  GENERALIZED MINIMUM RESIDUAL, ITERATIVE PRECONDITION,
2148C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
2149C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
2150C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
2151C           Seager, Mark K., (LLNL), seager@llnl.gov
2152C             Lawrence Livermore National Laboratory
2153C             PO Box 808, L-60
2154C             Livermore, CA 94550 (510) 423-3141
2155C***DESCRIPTION
2156C        This routine  orthogonalizes  the  vector  VNEW  against the
2157C        previous KMP  vectors in the   V array.  It uses  a modified
2158C        Gram-Schmidt   orthogonalization procedure with  conditional
2159C        reorthogonalization.
2160C
2161C *Usage:
2162C      INTEGER N, LL, LDHES, KMP
2163C      DOUBLE PRECISION VNEW(N), V(N,LL), HES(LDHES,LL), SNORMW
2164C
2165C      CALL DORTH(VNEW, V, HES, N, LL, LDHES, KMP, SNORMW)
2166C
2167C *Arguments:
2168C VNEW   :INOUT    Double Precision VNEW(N)
2169C         On input, the vector of length N containing a scaled
2170C         product of the Jacobian and the vector V(*,LL).
2171C         On output, the new vector orthogonal to V(*,i0) to V(*,LL),
2172C         where i0 = max(1, LL-KMP+1).
2173C V      :IN       Double Precision V(N,LL)
2174C         The N x LL array containing the previous LL
2175C         orthogonal vectors V(*,1) to V(*,LL).
2176C HES    :INOUT    Double Precision HES(LDHES,LL)
2177C         On input, an LL x LL upper Hessenberg matrix containing,
2178C         in HES(I,K), K.lt.LL, the scaled inner products of
2179C         A*V(*,K) and V(*,i).
2180C         On return, column LL of HES is filled in with
2181C         the scaled inner products of A*V(*,LL) and V(*,i).
2182C N      :IN       Integer
2183C         The order of the matrix A, and the length of VNEW.
2184C LL     :IN       Integer
2185C         The current order of the matrix HES.
2186C LDHES  :IN       Integer
2187C         The leading dimension of the HES array.
2188C KMP    :IN       Integer
2189C         The number of previous vectors the new vector VNEW
2190C         must be made orthogonal to (KMP .le. MAXL).
2191C SNORMW :OUT      DOUBLE PRECISION
2192C         Scalar containing the l-2 norm of VNEW.
2193C
2194C***SEE ALSO  DGMRES
2195C***ROUTINES CALLED  DAXPY, DDOT, DNRM2
2196C***REVISION HISTORY  (YYMMDD)
2197C   890404  DATE WRITTEN
2198C   890404  Previous REVISION DATE
2199C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
2200C   890922  Numerous changes to prologue to make closer to SLATEC
2201C           standard.  (FNF)
2202C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
2203C   910411  Prologue converted to Version 4.0 format.  (BAB)
2204C   910506  Made subsidiary to DGMRES.  (FNF)
2205C   920511  Added complete declaration section.  (WRB)
2206C***END PROLOGUE  DORTH
2207C         The following is for optimized compilation on LLNL/LTSS Crays.
2208CLLL. OPTIMIZE
2209C     .. Scalar Arguments ..
2210      DOUBLE PRECISION SNORMW
2211      INTEGER KMP, LDHES, LL, N
2212C     .. Array Arguments ..
2213      DOUBLE PRECISION HES(LDHES,*), V(N,*), VNEW(*)
2214C     .. Local Scalars ..
2215      DOUBLE PRECISION ARG, SUMDSQ, TEM, VNRM
2216      INTEGER I, I0
2217C     .. External Functions ..
2218      DOUBLE PRECISION DDOT, DNRM2
2219      EXTERNAL DDOT, DNRM2
2220C     .. External Subroutines ..
2221      EXTERNAL DAXPY
2222C     .. Intrinsic Functions ..
2223      INTRINSIC MAX, SQRT
2224C***FIRST EXECUTABLE STATEMENT  DORTH
2225C
2226C         Get norm of unaltered VNEW for later use.
2227C
2228      VNRM = DNRM2(N, VNEW, 1)
2229C   -------------------------------------------------------------------
2230C         Perform the modified Gram-Schmidt procedure on VNEW =A*V(LL).
2231C         Scaled inner products give new column of HES.
2232C         Projections of earlier vectors are subtracted from VNEW.
2233C   -------------------------------------------------------------------
2234      I0 = MAX(1,LL-KMP+1)
2235      DO 10 I = I0,LL
2236         HES(I,LL) = DDOT(N, V(1,I), 1, VNEW, 1)
2237         TEM = -HES(I,LL)
2238         CALL DAXPY(N, TEM, V(1,I), 1, VNEW, 1)
2239 10   CONTINUE
2240C   -------------------------------------------------------------------
2241C         Compute SNORMW = norm of VNEW.  If VNEW is small compared
2242C         to its input value (in norm), then reorthogonalize VNEW to
2243C         V(*,1) through V(*,LL).  Correct if relative correction
2244C         exceeds 1000*(unit roundoff).  Finally, correct SNORMW using
2245C         the dot products involved.
2246C   -------------------------------------------------------------------
2247      SNORMW = DNRM2(N, VNEW, 1)
2248      IF (VNRM + 0.001D0*SNORMW .NE. VNRM) RETURN
2249      SUMDSQ = 0
2250      DO 30 I = I0,LL
2251         TEM = -DDOT(N, V(1,I), 1, VNEW, 1)
2252         IF (HES(I,LL) + 0.001D0*TEM .EQ. HES(I,LL)) GO TO 30
2253         HES(I,LL) = HES(I,LL) - TEM
2254         CALL DAXPY(N, TEM, V(1,I), 1, VNEW, 1)
2255         SUMDSQ = SUMDSQ + TEM**2
2256 30   CONTINUE
2257      IF (SUMDSQ .EQ. 0.0D0) RETURN
2258      ARG = MAX(0.0D0,SNORMW**2 - SUMDSQ)
2259      SNORMW = SQRT(ARG)
2260C
2261      RETURN
2262C------------- LAST LINE OF DORTH FOLLOWS ----------------------------
2263      END
2264*DECK DXLCAL
2265      SUBROUTINE DXLCAL (N, LGMR, X, XL, ZL, HES, MAXLP1, Q, V, R0NRM,
2266     +   WK, SZ, JSCAL, JPRE, MSOLVE, NMSL, RPAR, IPAR, NELT, IA, JA, A,
2267     +   ISYM)
2268C***BEGIN PROLOGUE  DXLCAL
2269C***SUBSIDIARY
2270C***PURPOSE  Internal routine for DGMRES.
2271C***LIBRARY   SLATEC (SLAP)
2272C***CATEGORY  D2A4, D2B4
2273C***TYPE      DOUBLE PRECISION (SXLCAL-S, DXLCAL-D)
2274C***KEYWORDS  GENERALIZED MINIMUM RESIDUAL, ITERATIVE PRECONDITION,
2275C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
2276C***AUTHOR  Brown, Peter, (LLNL), pnbrown@llnl.gov
2277C           Hindmarsh, Alan, (LLNL), alanh@llnl.gov
2278C           Seager, Mark K., (LLNL), seager@llnl.gov
2279C             Lawrence Livermore National Laboratory
2280C             PO Box 808, L-60
2281C             Livermore, CA 94550 (510) 423-3141
2282C***DESCRIPTION
2283C        This  routine computes the solution  XL,  the current DGMRES
2284C        iterate, given the  V(I)'s and  the  QR factorization of the
2285C        Hessenberg  matrix HES.   This routine  is  only called when
2286C        ITOL=11.
2287C
2288C *Usage:
2289C      INTEGER N, LGMR, MAXLP1, JSCAL, JPRE, NMSL, IPAR(USER DEFINED)
2290C      INTEGER NELT, IA(NELT), JA(NELT), ISYM
2291C      DOUBLE PRECISION X(N), XL(N), ZL(N), HES(MAXLP1,MAXL), Q(2*MAXL),
2292C     $                 V(N,MAXLP1), R0NRM, WK(N), SZ(N),
2293C     $                 RPAR(USER DEFINED), A(NELT)
2294C      EXTERNAL MSOLVE
2295C
2296C      CALL DXLCAL(N, LGMR, X, XL, ZL, HES, MAXLP1, Q, V, R0NRM,
2297C     $     WK, SZ, JSCAL, JPRE, MSOLVE, NMSL, RPAR, IPAR,
2298C     $     NELT, IA, JA, A, ISYM)
2299C
2300C *Arguments:
2301C N      :IN       Integer
2302C         The order of the matrix A, and the lengths
2303C         of the vectors SR, SZ, R0 and Z.
2304C LGMR   :IN       Integer
2305C         The number of iterations performed and
2306C         the current order of the upper Hessenberg
2307C         matrix HES.
2308C X      :IN       Double Precision X(N)
2309C         The current approximate solution as of the last restart.
2310C XL     :OUT      Double Precision XL(N)
2311C         An array of length N used to hold the approximate
2312C         solution X(L).
2313C         Warning: XL and ZL are the same array in the calling routine.
2314C ZL     :IN       Double Precision ZL(N)
2315C         An array of length N used to hold the approximate
2316C         solution Z(L).
2317C HES    :IN       Double Precision HES(MAXLP1,MAXL)
2318C         The upper triangular factor of the QR decomposition
2319C         of the (LGMR+1) by LGMR upper Hessenberg matrix whose
2320C         entries are the scaled inner-products of A*V(*,i) and V(*,k).
2321C MAXLP1 :IN       Integer
2322C         MAXLP1 = MAXL + 1, used for dynamic dimensioning of HES.
2323C         MAXL is the maximum allowable order of the matrix HES.
2324C Q      :IN       Double Precision Q(2*MAXL)
2325C         A double precision array of length 2*MAXL containing the
2326C         components of the Givens rotations used in the QR
2327C         decomposition of HES.  It is loaded in DHEQR.
2328C V      :IN       Double Precision V(N,MAXLP1)
2329C         The N by(LGMR+1) array containing the LGMR
2330C         orthogonal vectors V(*,1) to V(*,LGMR).
2331C R0NRM  :IN       Double Precision
2332C         The scaled norm of the initial residual for the
2333C         current call to DPIGMR.
2334C WK     :IN       Double Precision WK(N)
2335C         A double precision work array of length N.
2336C SZ     :IN       Double Precision SZ(N)
2337C         A vector of length N containing the non-zero
2338C         elements of the diagonal scaling matrix for Z.
2339C JSCAL  :IN       Integer
2340C         A flag indicating whether arrays SR and SZ are used.
2341C         JSCAL=0 means SR and SZ are not used and the
2342C                 algorithm will perform as if all
2343C                 SR(i) = 1 and SZ(i) = 1.
2344C         JSCAL=1 means only SZ is used, and the algorithm
2345C                 performs as if all SR(i) = 1.
2346C         JSCAL=2 means only SR is used, and the algorithm
2347C                 performs as if all SZ(i) = 1.
2348C         JSCAL=3 means both SR and SZ are used.
2349C JPRE   :IN       Integer
2350C         The preconditioner type flag.
2351C MSOLVE :EXT      External.
2352C         Name of the routine which solves a linear system Mz = r for
2353C         z given r with the preconditioning matrix M (M is supplied via
2354C         RPAR and IPAR arrays.  The name of the MSOLVE routine must
2355C         be declared external in the calling program.  The calling
2356C         sequence to MSOLVE is:
2357C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RPAR, IPAR)
2358C         Where N is the number of unknowns, R is the right-hand side
2359C         vector and Z is the solution upon return.  NELT, IA, JA, A and
2360C         ISYM are defined as below.  RPAR is a double precision array
2361C         that can be used to pass necessary preconditioning information
2362C         and/or workspace to MSOLVE.  IPAR is an integer work array
2363C         for the same purpose as RPAR.
2364C NMSL   :IN       Integer
2365C         The number of calls to MSOLVE.
2366C RPAR   :IN       Double Precision RPAR(USER DEFINED)
2367C         Double Precision workspace passed directly to the MSOLVE
2368C         routine.
2369C IPAR   :IN       Integer IPAR(USER DEFINED)
2370C         Integer workspace passed directly to the MSOLVE routine.
2371C NELT   :IN       Integer
2372C         The length of arrays IA, JA and A.
2373C IA     :IN       Integer IA(NELT)
2374C         An integer array of length NELT containing matrix data.
2375C         It is passed directly to the MATVEC and MSOLVE routines.
2376C JA     :IN       Integer JA(NELT)
2377C         An integer array of length NELT containing matrix data.
2378C         It is passed directly to the MATVEC and MSOLVE routines.
2379C A      :IN       Double Precision A(NELT)
2380C         A double precision array of length NELT containing matrix
2381C         data.
2382C         It is passed directly to the MATVEC and MSOLVE routines.
2383C ISYM   :IN       Integer
2384C         A flag to indicate symmetric matrix storage.
2385C         If ISYM=0, all non-zero entries of the matrix are
2386C         stored.  If ISYM=1, the matrix is symmetric and
2387C         only the upper or lower triangular part is stored.
2388C
2389C***SEE ALSO  DGMRES
2390C***ROUTINES CALLED  DAXPY, DCOPY, DHELS
2391C***REVISION HISTORY  (YYMMDD)
2392C   890404  DATE WRITTEN
2393C   890404  Previous REVISION DATE
2394C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
2395C   890922  Numerous changes to prologue to make closer to SLATEC
2396C           standard.  (FNF)
2397C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
2398C   910411  Prologue converted to Version 4.0 format.  (BAB)
2399C   910502  Removed MSOLVE from ROUTINES CALLED list.  (FNF)
2400C   910506  Made subsidiary to DGMRES.  (FNF)
2401C   920511  Added complete declaration section.  (WRB)
2402C***END PROLOGUE  DXLCAL
2403C         The following is for optimized compilation on LLNL/LTSS Crays.
2404CLLL. OPTIMIZE
2405C     .. Scalar Arguments ..
2406      DOUBLE PRECISION R0NRM
2407      INTEGER ISYM, JPRE, JSCAL, LGMR, MAXLP1, N, NELT, NMSL
2408C     .. Array Arguments ..
2409      DOUBLE PRECISION A(NELT), HES(MAXLP1,*), Q(*), RPAR(*), SZ(*),
2410     +                 V(N,*), WK(N), X(N), XL(N), ZL(N)
2411      INTEGER IA(NELT), IPAR(*), JA(NELT)
2412C     .. Subroutine Arguments ..
2413      EXTERNAL MSOLVE
2414C     .. Local Scalars ..
2415      INTEGER I, K, LL, LLP1
2416C     .. External Subroutines ..
2417      EXTERNAL DAXPY, DCOPY, DHELS
2418C***FIRST EXECUTABLE STATEMENT  DXLCAL
2419      LL = LGMR
2420      LLP1 = LL + 1
2421      DO 10 K = 1,LLP1
2422         WK(K) = 0
2423 10   CONTINUE
2424      WK(1) = R0NRM
2425      CALL DHELS(HES, MAXLP1, LL, Q, WK)
2426      DO 20 K = 1,N
2427         ZL(K) = 0
2428 20   CONTINUE
2429      DO 30 I = 1,LL
2430         CALL DAXPY(N, WK(I), V(1,I), 1, ZL, 1)
2431 30   CONTINUE
2432      IF ((JSCAL .EQ. 1) .OR.(JSCAL .EQ. 3)) THEN
2433         DO 40 K = 1,N
2434            ZL(K) = ZL(K)/SZ(K)
2435 40      CONTINUE
2436      ENDIF
2437      IF (JPRE .GT. 0) THEN
2438         CALL DCOPY(N, ZL, 1, WK, 1)
2439         CALL MSOLVE(N, WK, ZL, NELT, IA, JA, A, ISYM, RPAR, IPAR)
2440         NMSL = NMSL + 1
2441      ENDIF
2442C         calculate XL from X and ZL.
2443      DO 50 K = 1,N
2444         XL(K) = X(K) + ZL(K)
2445 50   CONTINUE
2446      RETURN
2447C------------- LAST LINE OF DXLCAL FOLLOWS ----------------------------
2448      END
2449*DECK DDOT
2450      DOUBLE PRECISION FUNCTION DDOT (N, DX, INCX, DY, INCY)
2451      use omp_lib
2452C***BEGIN PROLOGUE  DDOT
2453C***PURPOSE  Compute the inner product of two vectors.
2454C***LIBRARY   SLATEC (BLAS)
2455C***CATEGORY  D1A4
2456C***TYPE      DOUBLE PRECISION (SDOT-S, DDOT-D, CDOTU-C)
2457C***KEYWORDS  BLAS, INNER PRODUCT, LINEAR ALGEBRA, VECTOR
2458C***AUTHOR  Lawson, C. L., (JPL)
2459C           Hanson, R. J., (SNLA)
2460C           Kincaid, D. R., (U. of Texas)
2461C           Krogh, F. T., (JPL)
2462C***DESCRIPTION
2463C
2464C                B L A S  Subprogram
2465C    Description of Parameters
2466C
2467C     --Input--
2468C        N  number of elements in input vector(s)
2469C       DX  double precision vector with N elements
2470C     INCX  storage spacing between elements of DX
2471C       DY  double precision vector with N elements
2472C     INCY  storage spacing between elements of DY
2473C
2474C     --Output--
2475C     DDOT  double precision dot product (zero if N .LE. 0)
2476C
2477C     Returns the dot product of double precision DX and DY.
2478C     DDOT = sum for I = 0 to N-1 of  DX(LX+I*INCX) * DY(LY+I*INCY),
2479C     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
2480C     defined in a similar way using INCY.
2481C
2482C***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
2483C                 Krogh, Basic linear algebra subprograms for Fortran
2484C                 usage, Algorithm No. 539, Transactions on Mathematical
2485C                 Software 5, 3 (September 1979), pp. 308-323.
2486C***ROUTINES CALLED  (NONE)
2487C***REVISION HISTORY  (YYMMDD)
2488C   791001  DATE WRITTEN
2489C   890831  Modified array declarations.  (WRB)
2490C   890831  REVISION DATE from Version 3.2
2491C   891214  Prologue converted to Version 4.0 format.  (BAB)
2492C   920310  Corrected definition of LX in DESCRIPTION.  (WRB)
2493C   920501  Reformatted the REFERENCES section.  (WRB)
2494C***END PROLOGUE  DDOT
2495      DOUBLE PRECISION DX(*), DY(*)
2496C***FIRST EXECUTABLE STATEMENT  DDOT
2497      DDOT = 0.0D0
2498      IF (N .LE. 0) RETURN
2499      IF (INCX .EQ. INCY) IF (INCX-1) 5,20,60
2500C
2501C     Code for unequal or nonpositive increments.
2502C
2503    5 IX = 1
2504      IY = 1
2505      IF (INCX .LT. 0) IX = (-N+1)*INCX + 1
2506      IF (INCY .LT. 0) IY = (-N+1)*INCY + 1
2507      DO 10 I = 1,N
2508        DDOT = DDOT + DX(IX)*DY(IY)
2509        IX = IX + INCX
2510        IY = IY + INCY
2511   10 CONTINUE
2512      RETURN
2513C
2514C     Code for both increments equal to 1.
2515C
2516C     Clean-up loop so remaining vector length is a multiple of 5.
2517C
2518   20 M = MOD(N,5)
2519      IF (M .EQ. 0) GO TO 40
2520      DO 30 I = 1,M
2521         DDOT = DDOT + DX(I)*DY(I)
2522   30 CONTINUE
2523      IF (N .LT. 5) RETURN
2524   40 MP1 = M + 1
2525      DO 50 I = MP1,N,5
2526      DDOT = DDOT + DX(I)*DY(I) + DX(I+1)*DY(I+1) + DX(I+2)*DY(I+2) +
2527     1              DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4)
2528   50 CONTINUE
2529      RETURN
2530C
2531C     Code for equal, positive, non-unit increments.
2532C
2533   60 NS = N*INCX
2534      DO 70 I = 1,NS,INCX
2535        DDOT = DDOT + DX(I)*DY(I)
2536   70 CONTINUE
2537      RETURN
2538      END
2539