1*DECK SCGS
2      SUBROUTINE SCGS (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
3     +   ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, R0, P, Q, U, V1,
4     +   V2, RWORK, IWORK)
5C***BEGIN PROLOGUE  SCGS
6C***PURPOSE  Preconditioned BiConjugate Gradient Squared Ax=b Solver.
7C            Routine to solve a Non-Symmetric linear system  Ax = b
8C            using the Preconditioned BiConjugate Gradient Squared
9C            method.
10C***LIBRARY   SLATEC (SLAP)
11C***CATEGORY  D2A4, D2B4
12C***TYPE      SINGLE PRECISION (SCGS-S, DCGS-D)
13C***KEYWORDS  BICONJUGATE GRADIENT, ITERATIVE PRECONDITION,
14C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
15C***AUTHOR  Greenbaum, Anne, (Courant Institute)
16C           Seager, Mark K., (LLNL)
17C             Lawrence Livermore National Laboratory
18C             PO BOX 808, L-60
19C             Livermore, CA 94550 (510) 423-3141
20C             seager@llnl.gov
21C***DESCRIPTION
22C
23C *Usage:
24C      INTEGER N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
25C      INTEGER ITER, IERR, IUNIT, IWORK(USER DEFINED)
26C      REAL    B(N), X(N), A(NELT), TOL, ERR, R(N), R0(N), P(N)
27C      REAL    Q(N), U(N), V1(N), V2(N), RWORK(USER DEFINED)
28C      EXTERNAL MATVEC, MSOLVE
29C
30C      CALL SCGS(N, B, X, NELT, IA, JA, A, ISYM, MATVEC,
31C     $     MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT,
32C     $     R, R0, P, Q, U, V1, V2, RWORK, IWORK)
33C
34C *Arguments:
35C N      :IN       Integer
36C         Order of the Matrix.
37C B      :IN       Real B(N).
38C         Right-hand side vector.
39C X      :INOUT    Real X(N).
40C         On input X is your initial guess for solution vector.
41C         On output X is the final approximate solution.
42C NELT   :IN       Integer.
43C         Number of Non-Zeros stored in A.
44C IA     :IN       Integer IA(NELT).
45C JA     :IN       Integer JA(NELT).
46C A      :IN       Real A(NELT).
47C         These arrays contain the matrix data structure for A.
48C         It could take any form.  See "Description", below,
49C         for more details.
50C ISYM   :IN       Integer.
51C         Flag to indicate symmetric storage format.
52C         If ISYM=0, all non-zero entries of the matrix are stored.
53C         If ISYM=1, the matrix is symmetric, and only the upper
54C         or lower triangle of the matrix is stored.
55C MATVEC :EXT      External.
56C         Name of a routine which  performs the matrix vector multiply
57C         operation  Y = A*X  given A and X.  The  name of  the MATVEC
58C         routine must  be declared external  in the  calling program.
59C         The calling sequence of MATVEC is:
60C             CALL MATVEC( N, X, Y, NELT, IA, JA, A, ISYM )
61C         Where N is the number of unknowns, Y is the product A*X upon
62C         return,  X is an input  vector.  NELT, IA,  JA,  A and  ISYM
63C         define the SLAP matrix data structure: see Description,below.
64C MSOLVE :EXT      External.
65C         Name of a routine which solves a linear system MZ = R  for Z
66C         given R with the preconditioning matrix M (M is supplied via
67C         RWORK  and IWORK arrays).   The name  of  the MSOLVE routine
68C         must be declared  external  in the  calling   program.   The
69C         calling sequence of MSOLVE is:
70C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
71C         Where N is the number of unknowns, R is  the right-hand side
72C         vector, and Z is the solution upon return.  NELT,  IA, JA, A
73C         and  ISYM define the SLAP  matrix  data structure: see
74C         Description, below.  RWORK is a  real array that can be used
75C         to  pass   necessary  preconditioning     information and/or
76C         workspace to MSOLVE.  IWORK is an integer work array for the
77C         same purpose as RWORK.
78C ITOL   :IN       Integer.
79C         Flag to indicate type of convergence criterion.
80C         If ITOL=1, iteration stops when the 2-norm of the residual
81C         divided by the 2-norm of the right-hand side is less than TOL.
82C         This routine must calculate the residual from R = A*X - B.
83C         This is unnatural and hence expensive for this type of iter-
84C         ative method.  ITOL=2 is *STRONGLY* recommended.
85C         If ITOL=2, iteration stops when the 2-norm of M-inv times the
86C         residual divided by the 2-norm of M-inv times the right hand
87C         side is less than TOL, where M-inv time a vector is the pre-
88C         conditioning step.  This is the *NATURAL* stopping for this
89C         iterative method and is *STRONGLY* recommended.
90C         ITOL=11 is often useful for checking and comparing different
91C         routines.  For this case, the user must supply the "exact"
92C         solution or a very accurate approximation (one with an error
93C         much less than TOL) through a common block,
94C             COMMON /SSLBLK/ SOLN( )
95C         If ITOL=11, iteration stops when the 2-norm of the difference
96C         between the iterative approximation and the user-supplied
97C         solution divided by the 2-norm of the user-supplied solution
98C         is less than TOL.
99C TOL    :INOUT    Real.
100C         Convergence criterion, as described above.  (Reset if IERR=4.)
101C ITMAX  :IN       Integer.
102C         Maximum number of iterations.
103C ITER   :OUT      Integer.
104C         Number of iterations required to reach convergence, or
105C         ITMAX+1 if convergence criterion could not be achieved in
106C         ITMAX iterations.
107C ERR    :OUT      Real.
108C         Error estimate of error in final approximate solution, as
109C         defined by ITOL.
110C IERR   :OUT      Integer.
111C         Return error flag.
112C           IERR = 0 => All went well.
113C           IERR = 1 => Insufficient space allocated for WORK or IWORK.
114C           IERR = 2 => Method failed to converge in ITMAX steps.
115C           IERR = 3 => Error in user input.
116C                       Check input values of N, ITOL.
117C           IERR = 4 => User error tolerance set too tight.
118C                       Reset to 500*R1MACH(3).  Iteration proceeded.
119C           IERR = 5 => Breakdown of the method detected.
120C                       (r0,r) approximately 0.
121C           IERR = 6 => Stagnation of the method detected.
122C                       (r0,v) approximately 0.
123C IUNIT  :IN       Integer.
124C         Unit number on which to write the error at each iteration,
125C         if this is desired for monitoring convergence.  If unit
126C         number is 0, no writing will occur.
127C R      :WORK     Real R(N).
128C R0     :WORK     Real R0(N).
129C P      :WORK     Real P(N).
130C Q      :WORK     Real Q(N).
131C U      :WORK     Real U(N).
132C V1     :WORK     Real V1(N).
133C V2     :WORK     Real V2(N).
134C         Real arrays used for workspace.
135C RWORK  :WORK     Real RWORK(USER DEFINED).
136C         Real array that can be used for workspace in MSOLVE.
137C IWORK  :WORK     Integer IWORK(USER DEFINED).
138C         Integer array that can be used for workspace in MSOLVE.
139C
140C *Description
141C       This routine does  not care  what matrix data   structure is
142C       used for  A and M.  It simply   calls  the MATVEC and MSOLVE
143C       routines, with  the arguments as  described above.  The user
144C       could write any type of structure and the appropriate MATVEC
145C       and MSOLVE routines.  It is assumed  that A is stored in the
146C       IA, JA, A  arrays in some fashion and  that M (or INV(M)) is
147C       stored  in  IWORK  and  RWORK   in  some fashion.   The SLAP
148C       routines SSDBCG and SSLUCS are examples of this procedure.
149C
150C       Two  examples  of  matrix  data structures  are the: 1) SLAP
151C       Triad  format and 2) SLAP Column format.
152C
153C       =================== S L A P Triad format ===================
154C
155C       In  this   format only the  non-zeros are  stored.  They may
156C       appear  in *ANY* order.   The user  supplies three arrays of
157C       length NELT, where  NELT  is the number  of non-zeros in the
158C       matrix:  (IA(NELT), JA(NELT),  A(NELT)).  For each  non-zero
159C       the  user puts   the row  and  column index   of that matrix
160C       element in the IA and JA arrays.  The  value of the non-zero
161C       matrix  element is  placed in  the corresponding location of
162C       the A  array.  This is  an extremely easy data  structure to
163C       generate.  On  the other hand it  is  not too  efficient  on
164C       vector  computers   for the  iterative  solution  of  linear
165C       systems.  Hence, SLAP  changes this input  data structure to
166C       the SLAP   Column  format for the  iteration (but   does not
167C       change it back).
168C
169C       Here is an example of the  SLAP Triad   storage format for a
170C       5x5 Matrix.  Recall that the entries may appear in any order.
171C
172C           5x5 Matrix      SLAP Triad format for 5x5 matrix on left.
173C                              1  2  3  4  5  6  7  8  9 10 11
174C       |11 12  0  0 15|   A: 51 12 11 33 15 53 55 22 35 44 21
175C       |21 22  0  0  0|  IA:  5  1  1  3  1  5  5  2  3  4  2
176C       | 0  0 33  0 35|  JA:  1  2  1  3  5  3  5  2  5  4  1
177C       | 0  0  0 44  0|
178C       |51  0 53  0 55|
179C
180C       =================== S L A P Column format ==================
181C
182C       In  this format   the non-zeros are    stored counting  down
183C       columns (except  for the diagonal  entry, which must  appear
184C       first in each "column") and are  stored in the real array A.
185C       In other words,  for  each column    in the matrix   put the
186C       diagonal  entry  in A.   Then   put  in the  other  non-zero
187C       elements going   down the  column (except  the  diagonal) in
188C       order.  The IA array holds the row index  for each non-zero.
189C       The JA array holds the offsets into the IA, A arrays for the
190C       beginning   of   each  column.      That is,   IA(JA(ICOL)),
191C       A(JA(ICOL)) points to the beginning of the ICOL-th column in
192C       IA and  A.  IA(JA(ICOL+1)-1), A(JA(ICOL+1)-1)  points to the
193C       end of the ICOL-th column.  Note that we always have JA(N+1)
194C       = NELT+1, where N is the number of columns in the matrix and
195C       NELT is the number of non-zeros in the matrix.
196C
197C       Here is an example of the  SLAP Column  storage format for a
198C       5x5 Matrix (in the A and IA arrays '|'  denotes the end of a
199C       column):
200C
201C           5x5 Matrix      SLAP Column format for 5x5 matrix on left.
202C                              1  2  3    4  5    6  7    8    9 10 11
203C       |11 12  0  0 15|   A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
204C       |21 22  0  0  0|  IA:  1  2  5 |  2  1 |  3  5 |  4 |  5  1  3
205C       | 0  0 33  0 35|  JA:  1  4  6    8  9   12
206C       | 0  0  0 44  0|
207C       |51  0 53  0 55|
208C
209C *Cautions:
210C     This routine will attempt to write to the Fortran logical output
211C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
212C     this logical unit is attached to a file or terminal before calling
213C     this routine with a non-zero value for IUNIT.  This routine does
214C     not check for the validity of a non-zero IUNIT unit number.
215C
216C***SEE ALSO  SSDCGS, SSLUCS
217C***REFERENCES  1. P. Sonneveld, CGS, a fast Lanczos-type solver
218C                  for nonsymmetric linear systems, Delft University
219C                  of Technology Report 84-16, Department of Mathe-
220C                  matics and Informatics, Delft, The Netherlands.
221C               2. E. F. Kaasschieter, The solution of non-symmetric
222C                  linear systems by biconjugate gradients or conjugate
223C                  gradients squared,  Delft University of Technology
224C                  Report 86-21, Department of Mathematics and Informa-
225C                  tics, Delft, The Netherlands.
226C               3. Mark K. Seager, A SLAP for the Masses, in
227C                  G. F. Carey, Ed., Parallel Supercomputing: Methods,
228C                  Algorithms and Applications, Wiley, 1989, pp.135-155.
229C***ROUTINES CALLED  ISSCGS, R1MACH, SAXPY, SDOT
230C***REVISION HISTORY  (YYMMDD)
231C   871119  DATE WRITTEN
232C   881213  Previous REVISION DATE
233C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
234C   890921  Removed TeX from comments.  (FNF)
235C   890922  Numerous changes to prologue to make closer to SLATEC
236C           standard.  (FNF)
237C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
238C   891004  Added new reference.
239C   910411  Prologue converted to Version 4.0 format.  (BAB)
240C   910502  Removed MATVEC and MSOLVE from ROUTINES CALLED list.  (FNF)
241C   920407  COMMON BLOCK renamed SSLBLK.  (WRB)
242C   920511  Added complete declaration section.  (WRB)
243C   920929  Corrected format of references.  (FNF)
244C   921019  Changed 500.0 to 500 to reduce SP/DP differences.  (FNF)
245C   921113  Corrected C***CATEGORY line.  (FNF)
246C***END PROLOGUE  SCGS
247C     .. Scalar Arguments ..
248      REAL ERR, TOL
249      INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, N, NELT
250C     .. Array Arguments ..
251      REAL A(NELT), B(N), P(N), Q(N), R(N), R0(N), RWORK(*), U(N),
252     +     V1(N), V2(N), X(N)
253      INTEGER IA(NELT), IWORK(*), JA(NELT)
254C     .. Subroutine Arguments ..
255      EXTERNAL MATVEC, MSOLVE
256C     .. Local Scalars ..
257      REAL AK, AKM, BK, BNRM, FUZZ, RHON, RHONM1, SIGMA, SOLNRM, TOLMIN
258      INTEGER I, K
259C     .. External Functions ..
260      REAL R1MACH, SDOT
261      INTEGER ISSCGS
262      EXTERNAL R1MACH, SDOT, ISSCGS
263C     .. External Subroutines ..
264      EXTERNAL SAXPY
265C     .. Intrinsic Functions ..
266      INTRINSIC ABS
267C***FIRST EXECUTABLE STATEMENT  SCGS
268C
269C         Check some of the input data.
270C
271      ITER = 0
272      IERR = 0
273      IF( N.LT.1 ) THEN
274         IERR = 3
275         RETURN
276      ENDIF
277      TOLMIN = 500*R1MACH(3)
278      IF( TOL.LT.TOLMIN ) THEN
279         TOL = TOLMIN
280         IERR = 4
281      ENDIF
282C
283C         Calculate initial residual and pseudo-residual, and check
284C         stopping criterion.
285      CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
286      DO 10 I = 1, N
287         V1(I)  = R(I) - B(I)
288 10   CONTINUE
289      CALL MSOLVE(N, V1, R, NELT, IA, JA, A, ISYM, RWORK, IWORK)
290C
291      IF( ISSCGS(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
292     $     ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, R0, P, Q,
293     $     U, V1, V2, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
294     $     GO TO 200
295      IF( IERR.NE.0 ) RETURN
296C
297C         Set initial values.
298C
299      FUZZ = R1MACH(3)**2
300      DO 20 I = 1, N
301         R0(I) = R(I)
302 20   CONTINUE
303      RHONM1 = 1
304C
305C         ***** ITERATION LOOP *****
306C
307      DO 100 K=1,ITMAX
308         ITER = K
309C
310C         Calculate coefficient BK and direction vectors U, V and P.
311         RHON = SDOT(N, R0, 1, R, 1)
312         IF( ABS(RHONM1).LT.FUZZ ) GOTO 998
313         BK = RHON/RHONM1
314         IF( ITER.EQ.1 ) THEN
315            DO 30 I = 1, N
316               U(I) = R(I)
317               P(I) = R(I)
318 30         CONTINUE
319         ELSE
320            DO 40 I = 1, N
321               U(I) = R(I) + BK*Q(I)
322               V1(I) = Q(I) + BK*P(I)
323 40         CONTINUE
324            DO 50 I = 1, N
325               P(I) = U(I) + BK*V1(I)
326 50         CONTINUE
327         ENDIF
328C
329C         Calculate coefficient AK, new iterate X, Q
330         CALL MATVEC(N, P, V2, NELT, IA, JA, A, ISYM)
331         CALL MSOLVE(N, V2, V1, NELT, IA, JA, A, ISYM, RWORK, IWORK)
332         SIGMA = SDOT(N, R0, 1, V1, 1)
333         IF( ABS(SIGMA).LT.FUZZ ) GOTO 999
334         AK = RHON/SIGMA
335         AKM = -AK
336         DO 60 I = 1, N
337            Q(I) = U(I) + AKM*V1(I)
338 60      CONTINUE
339         DO 70 I = 1, N
340            V1(I) = U(I) + Q(I)
341 70      CONTINUE
342C         X = X - ak*V1.
343         CALL SAXPY( N, AKM, V1, 1, X, 1 )
344C                     -1
345C         R = R - ak*M  *A*V1
346         CALL MATVEC(N, V1, V2, NELT, IA, JA, A, ISYM)
347         CALL MSOLVE(N, V2, V1, NELT, IA, JA, A, ISYM, RWORK, IWORK)
348         CALL SAXPY( N, AKM, V1, 1, R, 1 )
349C
350C         check stopping criterion.
351         IF( ISSCGS(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MSOLVE,
352     $        ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, R0, P, Q,
353     $        U, V1, V2, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
354     $        GO TO 200
355C
356C         Update RHO.
357         RHONM1 = RHON
358 100  CONTINUE
359C
360C         *****   end of loop  *****
361C         Stopping criterion not satisfied.
362      ITER = ITMAX + 1
363      IERR = 2
364 200  RETURN
365C
366C         Breakdown of method detected.
367 998  IERR = 5
368      RETURN
369C
370C         Stagnation of method detected.
371 999  IERR = 6
372      RETURN
373C------------- LAST LINE OF SCGS FOLLOWS ----------------------------
374      END
375