1*DECK SIR
2      SUBROUTINE SIR (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
3     +   ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z, DZ, RWORK,
4     +   IWORK)
5C***BEGIN PROLOGUE  SIR
6C***PURPOSE  Preconditioned Iterative Refinement Sparse Ax = b Solver.
7C            Routine to solve a general linear system  Ax = b  using
8C            iterative refinement with a matrix splitting.
9C***LIBRARY   SLATEC (SLAP)
10C***CATEGORY  D2A4, D2B4
11C***TYPE      SINGLE PRECISION (SIR-S, DIR-D)
12C***KEYWORDS  ITERATIVE PRECONDITION, LINEAR SYSTEM, SLAP, SPARSE
13C***AUTHOR  Greenbaum, Anne, (Courant Institute)
14C           Seager, Mark K., (LLNL)
15C             Lawrence Livermore National Laboratory
16C             PO BOX 808, L-60
17C             Livermore, CA 94550 (510) 423-3141
18C             seager@llnl.gov
19C***DESCRIPTION
20C
21C *Usage:
22C     INTEGER  N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
23C     INTEGER  ITER, IERR, IUNIT, IWORK(USER DEFINED)
24C     REAL     B(N), X(N), A(NELT), TOL, ERR, R(N), Z(N), DZ(N),
25C     REAL     RWORK(USER DEFINED)
26C     EXTERNAL MATVEC, MSOLVE
27C
28C     CALL SIR(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE, ITOL,
29C    $     TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z, DZ, RWORK, IWORK)
30C
31C *Arguments:
32C N      :IN       Integer.
33C         Order of the Matrix.
34C B      :IN       Real B(N).
35C         Right-hand side vector.
36C X      :INOUT    Real X(N).
37C         On input X is your initial guess for solution vector.
38C         On output X is the final approximate solution.
39C NELT   :IN       Integer.
40C         Number of Non-Zeros stored in A.
41C IA     :IN       Integer IA(NELT).
42C JA     :IN       Integer JA(NELT).
43C A      :IN       Real A(NELT).
44C         These arrays contain the matrix data structure for A.
45C         It could take any form.  See "Description", below,
46C         for more details.
47C ISYM   :IN       Integer.
48C         Flag to indicate symmetric storage format.
49C         If ISYM=0, all non-zero entries of the matrix are stored.
50C         If ISYM=1, the matrix is symmetric, and only the upper
51C         or lower triangle of the matrix is stored.
52C MATVEC :EXT      External.
53C         Name of a routine which performs the matrix vector multiply
54C         Y = A*X given A and X.  The name of the MATVEC routine must
55C         be declared external in the calling program.  The calling
56C         sequence to MATVEC is:
57C             CALL MATVEC( N, X, Y, NELT, IA, JA, A, ISYM )
58C         Where N is the number of unknowns, Y is the product A*X
59C         upon return, X is an input vector, NELT is the number of
60C         non-zeros in the SLAP IA, JA, A storage for the matrix A.
61C         ISYM is a flag which, if non-zero, denotes that A is
62C         symmetric and only the lower or upper triangle is stored.
63C MSOLVE :EXT      External.
64C         Name of a routine which solves a linear system MZ = R for
65C         Z given R with the preconditioning matrix M (M is supplied via
66C         RWORK and IWORK arrays).  The name of the MSOLVE routine must
67C         be declared external in the calling program.  The calling
68C         sequence to MSOLVE is:
69C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
70C         Where N is the number of unknowns, R is the right-hand side
71C         vector and Z is the solution upon return.  NELT, IA, JA, A and
72C         ISYM are defined as above.  RWORK is a real array that can
73C         be used to pass necessary preconditioning information and/or
74C         workspace to MSOLVE.  IWORK is an integer work array for
75C         the same purpose as RWORK.
76C ITOL   :IN       Integer.
77C         Flag to indicate type of convergence criterion.
78C         If ITOL=1, iteration stops when the 2-norm of the residual
79C         divided by the 2-norm of the right-hand side is less than TOL.
80C         If ITOL=2, iteration stops when the 2-norm of M-inv times the
81C         residual divided by the 2-norm of M-inv times the right hand
82C         side is less than TOL, where M-inv is the inverse of the
83C         diagonal of A.
84C         ITOL=11 is often useful for checking and comparing different
85C         routines.  For this case, the user must supply the "exact"
86C         solution or a very accurate approximation (one with an error
87C         much less than TOL) through a common block,
88C             COMMON /SSLBLK/ SOLN( )
89C         If ITOL=11, iteration stops when the 2-norm of the difference
90C         between the iterative approximation and the user-supplied
91C         solution divided by the 2-norm of the user-supplied solution
92C         is less than TOL.  Note that this requires the user to set up
93C         the "COMMON /SSLBLK/ SOLN(LENGTH)" in the calling routine.
94C         The routine with this declaration should be loaded before the
95C         stop test so that the correct length is used by the loader.
96C         This procedure is not standard Fortran and may not work
97C         correctly on your system (although it has worked on every
98C         system the authors have tried).  If ITOL is not 11 then this
99C         common block is indeed standard Fortran.
100C TOL    :INOUT    Real.
101C         Convergence criterion, as described above.  (Reset if IERR=4.)
102C ITMAX  :IN       Integer.
103C         Maximum number of iterations.
104C ITER   :OUT      Integer.
105C         Number of iterations required to reach convergence, or
106C         ITMAX+1 if convergence criterion could not be achieved in
107C         ITMAX iterations.
108C ERR    :OUT      Real.
109C         Error estimate of error in final approximate solution, as
110C         defined by ITOL.
111C IERR   :OUT      Integer.
112C         Return error flag.
113C           IERR = 0 => All went well.
114C           IERR = 1 => Insufficient space allocated for WORK or IWORK.
115C           IERR = 2 => Method failed to converge in ITMAX steps.
116C           IERR = 3 => Error in user input.
117C                       Check input values of N, ITOL.
118C           IERR = 4 => User error tolerance set too tight.
119C                       Reset to 500*R1MACH(3).  Iteration proceeded.
120C           IERR = 5 => Preconditioning matrix, M, is not positive
121C                       definite.  (r,z) < 0.
122C           IERR = 6 => Matrix A is not positive definite.  (p,Ap) < 0.
123C IUNIT  :IN       Integer.
124C         Unit number on which to write the error at each iteration,
125C         if this is desired for monitoring convergence.  If unit
126C         number is 0, no writing will occur.
127C R      :WORK     Real R(N).
128C Z      :WORK     Real Z(N).
129C DZ     :WORK     Real DZ(N).
130C         Real arrays used for workspace.
131C RWORK  :WORK     Real RWORK(USER DEFINED).
132C         Real array that can be used by  MSOLVE.
133C IWORK  :WORK     Integer IWORK(USER DEFINED).
134C         Integer array that can be used by  MSOLVE.
135C
136C *Description:
137C       The basic algorithm for iterative refinement (also known as
138C       iterative improvement) is:
139C
140C            n+1    n    -1       n
141C           X    = X  + M  (B - AX  ).
142C
143C           -1   -1
144C       If M =  A then this  is the  standard  iterative  refinement
145C       algorithm and the "subtraction" in the  residual calculation
146C       should be done in double precision (which it is  not in this
147C       routine).
148C       If M = DIAG(A), the diagonal of A, then iterative refinement
149C       is  known  as  Jacobi's  method.   The  SLAP  routine  SSJAC
150C       implements this iterative strategy.
151C       If M = L, the lower triangle of A, then iterative refinement
152C       is known as Gauss-Seidel.   The SLAP routine SSGS implements
153C       this iterative strategy.
154C
155C       This routine does  not care  what matrix data   structure is
156C       used for  A and M.  It simply   calls  the MATVEC and MSOLVE
157C       routines, with  the arguments as  described above.  The user
158C       could write any type of structure and the appropriate MATVEC
159C       and MSOLVE routines.  It is assumed  that A is stored in the
160C       IA, JA, A  arrays in some fashion and  that M (or INV(M)) is
161C       stored  in  IWORK  and  RWORK)  in  some fashion.   The SLAP
162C       routines SSJAC and SSGS are examples of this procedure.
163C
164C       Two  examples  of  matrix  data structures  are the: 1) SLAP
165C       Triad  format and 2) SLAP Column format.
166C
167C       =================== S L A P Triad format ===================
168C
169C       In  this   format only the  non-zeros are  stored.  They may
170C       appear  in *ANY* order.   The user  supplies three arrays of
171C       length NELT, where  NELT  is the number  of non-zeros in the
172C       matrix:  (IA(NELT), JA(NELT),  A(NELT)).  For each  non-zero
173C       the  user puts   the row  and  column index   of that matrix
174C       element in the IA and JA arrays.  The  value of the non-zero
175C       matrix  element is  placed in  the corresponding location of
176C       the A  array.  This is  an extremely easy data  structure to
177C       generate.  On  the other hand it  is  not too  efficient  on
178C       vector  computers   for the  iterative  solution  of  linear
179C       systems.  Hence, SLAP  changes this input  data structure to
180C       the SLAP   Column  format for the  iteration (but   does not
181C       change it back).
182C
183C       Here is an example of the  SLAP Triad   storage format for a
184C       5x5 Matrix.  Recall that the entries may appear in any order.
185C
186C
187C           5x5 Matrix      SLAP Triad format for 5x5 matrix on left.
188C                              1  2  3  4  5  6  7  8  9 10 11
189C       |11 12  0  0 15|   A: 51 12 11 33 15 53 55 22 35 44 21
190C       |21 22  0  0  0|  IA:  5  1  1  3  1  5  5  2  3  4  2
191C       | 0  0 33  0 35|  JA:  1  2  1  3  5  3  5  2  5  4  1
192C       | 0  0  0 44  0|
193C       |51  0 53  0 55|
194C
195C       =================== S L A P Column format ==================
196C
197C       In  this format   the non-zeros are    stored counting  down
198C       columns (except  for the diagonal  entry, which must  appear
199C       first in each "column") and are  stored in the real array A.
200C       In other words,  for  each column    in the matrix   put the
201C       diagonal  entry  in A.   Then   put  in the  other  non-zero
202C       elements going   down the  column (except  the  diagonal) in
203C       order.  The IA array holds the row index  for each non-zero.
204C       The JA array holds the offsets into the IA, A arrays for the
205C       beginning   of   each  column.      That is,   IA(JA(ICOL)),
206C       A(JA(ICOL)) points to the beginning of the ICOL-th column in
207C       IA and  A.  IA(JA(ICOL+1)-1), A(JA(ICOL+1)-1)  points to the
208C       end of the ICOL-th column.  Note that we always have JA(N+1)
209C       = NELT+1, where N is the number of columns in the matrix and
210C       NELT is the number of non-zeros in the matrix.
211C
212C       Here is an example of the  SLAP Column  storage format for a
213C       5x5 Matrix (in the A and IA arrays '|'  denotes the end of a
214C       column):
215C
216C           5x5 Matrix      SLAP Column format for 5x5 matrix on left.
217C                              1  2  3    4  5    6  7    8    9 10 11
218C       |11 12  0  0 15|   A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
219C       |21 22  0  0  0|  IA:  1  2  5 |  2  1 |  3  5 |  4 |  5  1  3
220C       | 0  0 33  0 35|  JA:  1  4  6    8  9   12
221C       | 0  0  0 44  0|
222C       |51  0 53  0 55|
223C
224C *Examples:
225C       See the SLAP routines SSJAC, SSGS
226C
227C *Cautions:
228C     This routine will attempt to write to the Fortran logical output
229C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
230C     this logical unit is attached to a file or terminal before calling
231C     this routine with a non-zero value for IUNIT.  This routine does
232C     not check for the validity of a non-zero IUNIT unit number.
233C
234C***SEE ALSO  SSJAC, SSGS
235C***REFERENCES  1. Gene Golub and Charles Van Loan, Matrix Computations,
236C                  Johns Hopkins University Press, Baltimore, Maryland,
237C                  1983.
238C               2. Mark K. Seager, A SLAP for the Masses, in
239C                  G. F. Carey, Ed., Parallel Supercomputing: Methods,
240C                  Algorithms and Applications, Wiley, 1989, pp.135-155.
241C***ROUTINES CALLED  ISSIR, R1MACH
242C***REVISION HISTORY  (YYMMDD)
243C   871119  DATE WRITTEN
244C   881213  Previous REVISION DATE
245C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
246C   890921  Removed TeX from comments.  (FNF)
247C   890922  Numerous changes to prologue to make closer to SLATEC
248C           standard.  (FNF)
249C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
250C   891004  Added new reference.
251C   910411  Prologue converted to Version 4.0 format.  (BAB)
252C   910502  Removed MATVEC and MSOLVE from ROUTINES CALLED list.  (FNF)
253C   920407  COMMON BLOCK renamed SSLBLK.  (WRB)
254C   920511  Added complete declaration section.  (WRB)
255C   920929  Corrected format of references.  (FNF)
256C   921019  Changed 500.0 to 500 to reduce SP/DP differences.  (FNF)
257C***END PROLOGUE  SIR
258C     .. Scalar Arguments ..
259      REAL ERR, TOL
260      INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, N, NELT
261C     .. Array Arguments ..
262      REAL A(NELT), B(N), DZ(N), R(N), RWORK(*), X(N), Z(N)
263      INTEGER IA(NELT), IWORK(*), JA(NELT)
264C     .. Subroutine Arguments ..
265      EXTERNAL MATVEC, MSOLVE
266C     .. Local Scalars ..
267      REAL BNRM, SOLNRM, TOLMIN
268      INTEGER I, K
269C     .. External Functions ..
270      REAL R1MACH
271      INTEGER ISSIR
272      EXTERNAL R1MACH, ISSIR
273C***FIRST EXECUTABLE STATEMENT  SIR
274C
275C         Check some of the input data.
276C
277      ITER = 0
278      IERR = 0
279      IF( N.LT.1 ) THEN
280         IERR = 3
281         RETURN
282      ENDIF
283      TOLMIN = 500*R1MACH(3)
284      IF( TOL.LT.TOLMIN ) THEN
285         TOL = TOLMIN
286         IERR = 4
287      ENDIF
288C
289C         Calculate initial residual and pseudo-residual, and check
290C         stopping criterion.
291      CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
292      DO 10 I = 1, N
293         R(I) = B(I) - R(I)
294 10   CONTINUE
295      CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
296C
297      IF( ISSIR(N, B, X, NELT, IA, JA, A, ISYM, MSOLVE, ITOL, TOL,
298     $     ITMAX, ITER, ERR, IERR, IUNIT, R, Z, DZ, RWORK,
299     $     IWORK, BNRM, SOLNRM) .NE. 0 ) GO TO 200
300      IF( IERR.NE.0 ) RETURN
301C
302C         ***** iteration loop *****
303C
304      DO 100 K=1,ITMAX
305         ITER = K
306C
307C         Calculate new iterate x, new residual r, and new
308C         pseudo-residual z.
309         DO 20 I = 1, N
310            X(I) = X(I) + Z(I)
311 20      CONTINUE
312         CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
313         DO 30 I = 1, N
314            R(I) = B(I) - R(I)
315 30      CONTINUE
316         CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
317C
318C         check stopping criterion.
319         IF( ISSIR(N, B, X, NELT, IA, JA, A, ISYM, MSOLVE, ITOL, TOL,
320     $        ITMAX, ITER, ERR, IERR, IUNIT, R, Z, DZ, RWORK,
321     $        IWORK, BNRM, SOLNRM) .NE. 0 ) GO TO 200
322C
323 100  CONTINUE
324C
325C         *****   end of loop  *****
326C         Stopping criterion not satisfied.
327      ITER = ITMAX + 1
328      IERR = 2
329C
330 200  RETURN
331C------------- LAST LINE OF SIR FOLLOWS -------------------------------
332      END
333