1*DECK SCGN
2      SUBROUTINE SCGN (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
3     +   MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, ATP,
4     +   ATZ, DZ, ATDZ, RWORK, IWORK)
5C***BEGIN PROLOGUE  SCGN
6C***PURPOSE  Preconditioned CG Sparse Ax=b Solver for Normal Equations.
7C            Routine to solve a general linear system  Ax = b  using the
8C            Preconditioned Conjugate Gradient method applied to the
9C            normal equations  AA'y = b, x=A'y.
10C***LIBRARY   SLATEC (SLAP)
11C***CATEGORY  D2A4, D2B4
12C***TYPE      SINGLE PRECISION (SCGN-S, DCGN-D)
13C***KEYWORDS  ITERATIVE PRECONDITION, NON-SYMMETRIC LINEAR SYSTEM SOLVE,
14C             NORMAL EQUATIONS., SLAP, SPARSE
15C***AUTHOR  Greenbaum, Anne, (Courant Institute)
16C           Seager, Mark K., (LLNL)
17C             Lawrence Livermore National Laboratory
18C             PO BOX 808, L-60
19C             Livermore, CA 94550 (510) 423-3141
20C             seager@llnl.gov
21C***DESCRIPTION
22C
23C *Usage:
24C     INTEGER  N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
25C     INTEGER  ITER, IERR, IUNIT, IWORK(USER DEFINED)
26C     REAL     B(N), X(N), A(NELT), TOL, ERR, R(N), Z(N)
27C     REAL     P(N), ATP(N), ATZ(N), DZ(N), ATDZ(N)
28C     REAL     RWORK(USER DEFINED)
29C     EXTERNAL MATVEC, MTTVEC, MSOLVE
30C
31C     CALL SCGN(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
32C    $     MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R,
33C    $     Z, P, ATP, ATZ, DZ, ATDZ, RWORK, IWORK)
34C
35C *Arguments:
36C N      :IN       Integer
37C         Order of the Matrix.
38C B      :IN       Real B(N).
39C         Right-hand side vector.
40C X      :INOUT    Real X(N).
41C         On input X is your initial guess for solution vector.
42C         On output X is the final approximate solution.
43C NELT   :IN       Integer.
44C         Number of Non-Zeros stored in A.
45C IA     :IN       Integer IA(NELT).
46C JA     :IN       Integer JA(NELT).
47C A      :IN       Real A(NELT).
48C         These arrays contain the matrix data structure for A.
49C         It could take any form.  See "Description", below,
50C         for more details.
51C ISYM   :IN       Integer.
52C         Flag to indicate symmetric storage format.
53C         If ISYM=0, all non-zero entries of the matrix are stored.
54C         If ISYM=1, the matrix is symmetric, and only the upper
55C         or lower triangle of the matrix is stored.
56C MATVEC :EXT      External.
57C         Name of a routine which performs the matrix vector multiply
58C         y = A*X given A and X.  The name of the MATVEC routine must
59C         be declared external in the calling program.  The calling
60C         sequence to MATVEC is:
61C             CALL MATVEC( N, X, Y, NELT, IA, JA, A, ISYM )
62C         Where N is the number of unknowns, Y is the product A*X
63C         upon return X is an input vector, NELT is the number of
64C         non-zeros in the SLAP-Column IA, JA, A storage for the matrix
65C         A.  ISYM is a flag which, if non-zero, denotes that A is
66C         symmetric and only the lower or upper triangle is stored.
67C MTTVEC :EXT      External.
68C         Name of a routine which performs the matrix transpose vector
69C         multiply y = A'*X given A and X (where ' denotes transpose).
70C         The name of the MTTVEC routine must be declared external in
71C         the calling program.  The calling sequence to MTTVEC is the
72C         same as that for MATVEC, viz.:
73C             CALL MTTVEC( N, X, Y, NELT, IA, JA, A, ISYM )
74C         Where N is the number of unknowns, Y is the product A'*X
75C         upon return X is an input vector, NELT is the number of
76C         non-zeros in the SLAP-Column IA, JA, A storage for the matrix
77C         A.  ISYM is a flag which, if non-zero, denotes that A is
78C         symmetric and only the lower or upper triangle is stored.
79C MSOLVE :EXT      External.
80C         Name of a routine which solves a linear system MZ = R for
81C         Z given R with the preconditioning matrix M (M is supplied via
82C         RWORK and IWORK arrays).  The name of the MSOLVE routine must
83C         be declared external in the calling program.  The calling
84C         sequence to MSOLVE is:
85C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
86C         Where N is the number of unknowns, R is the right-hand side
87C         vector and Z is the solution upon return.  NELT, IA, JA, A and
88C         ISYM are defined as above.  RWORK is a real array that can
89C         be used to pass necessary preconditioning information and/or
90C         workspace to MSOLVE.  IWORK is an integer work array for
91C         the same purpose as RWORK.
92C ITOL   :IN       Integer.
93C         Flag to indicate type of convergence criterion.
94C         If ITOL=1, iteration stops when the 2-norm of the residual
95C         divided by the 2-norm of the right-hand side is less than TOL.
96C         If ITOL=2, iteration stops when the 2-norm of M-inv times the
97C         residual divided by the 2-norm of M-inv times the right hand
98C         side is less than TOL, where M-inv is the inverse of the
99C         diagonal of A.
100C         ITOL=11 is often useful for checking and comparing different
101C         routines.  For this case, the user must supply the "exact"
102C         solution or a very accurate approximation (one with an error
103C         much less than TOL) through a common block,
104C             COMMON /SSLBLK/ SOLN( )
105C         If ITOL=11, iteration stops when the 2-norm of the difference
106C         between the iterative approximation and the user-supplied
107C         solution divided by the 2-norm of the user-supplied solution
108C         is less than TOL.  Note that this requires the user to set up
109C         the "COMMON /SSLBLK/ SOLN(LENGTH)" in the calling routine.
110C         The routine with this declaration should be loaded before the
111C         stop test so that the correct length is used by the loader.
112C         This procedure is not standard Fortran and may not work
113C         correctly on your system (although it has worked on every
114C         system the authors have tried).  If ITOL is not 11 then this
115C         common block is indeed standard Fortran.
116C TOL    :INOUT    Real.
117C         Convergence criterion, as described above.  (Reset if IERR=4.)
118C ITMAX  :IN       Integer.
119C         Maximum number of iterations.
120C ITER   :OUT      Integer.
121C         Number of iterations required to reach convergence, or
122C         ITMAX+1 if convergence criterion could not be achieved in
123C         ITMAX iterations.
124C ERR    :OUT      Real.
125C         Error estimate of error in final approximate solution, as
126C         defined by ITOL.
127C IERR   :OUT      Integer.
128C         Return error flag.
129C           IERR = 0 => All went well.
130C           IERR = 1 => Insufficient space allocated for WORK or IWORK.
131C           IERR = 2 => Method failed to converge in ITMAX steps.
132C           IERR = 3 => Error in user input.
133C                       Check input values of N, ITOL.
134C           IERR = 4 => User error tolerance set too tight.
135C                       Reset to 500*R1MACH(3).  Iteration proceeded.
136C           IERR = 5 => Preconditioning matrix, M, is not positive
137C                       definite.  (r,z) < 0.
138C           IERR = 6 => Matrix A is not positive definite.  (p,Ap) < 0.
139C IUNIT  :IN       Integer.
140C         Unit number on which to write the error at each iteration,
141C         if this is desired for monitoring convergence.  If unit
142C         number is 0, no writing will occur.
143C R      :WORK     Real R(N).
144C Z      :WORK     Real Z(N).
145C P      :WORK     Real P(N).
146C ATP    :WORK     Real ATP(N).
147C ATZ    :WORK     Real ATZ(N).
148C DZ     :WORK     Real DZ(N).
149C ATDZ   :WORK     Real ATDZ(N).
150C         Real arrays used for workspace.
151C RWORK  :WORK     Real RWORK(USER DEFINED).
152C         Real array that can be used by  MSOLVE.
153C IWORK  :WORK     Integer IWORK(USER DEFINED).
154C         Integer array that can be used by  MSOLVE.
155C
156C *Description:
157C       This  routine applies the  preconditioned conjugate gradient
158C       (PCG) method to a non-symmetric system of equations Ax=b. To
159C       do this the normal equations are solved:
160C               AA' y  = b, where  x  = A'y.
161C       In PCG method the iteration count is determined by condition
162C                               -1
163C       number of the  matrix (M  A).   In the  situation where  the
164C       normal equations are  used  to solve a  non-symmetric system
165C       the condition number depends on  AA' and should therefore be
166C       much worse than that of A.  This is the conventional wisdom.
167C       When one has a good preconditioner for AA' this may not hold.
168C       The latter is the situation when SCGN should be tried.
169C
170C       If one is trying to solve  a symmetric system, SCG should be
171C       used instead.
172C
173C       This routine does  not care  what matrix data   structure is
174C       used for A and M.  It simply calls MATVEC, MTTVEC and MSOLVE
175C       routines, with arguments as described above.  The user could
176C       write any type of structure, and  appropriate MATVEC, MTTVEC
177C       and MSOLVE routines.  It is assumed  that A is stored in the
178C       IA, JA, A  arrays in some fashion and  that M (or INV(M)) is
179C       stored  in  IWORK  and  RWORK)  in  some fashion.   The SLAP
180C       routines SSDCGN and SSLUCN are examples of this procedure.
181C
182C       Two  examples  of  matrix  data structures  are the: 1) SLAP
183C       Triad  format and 2) SLAP Column format.
184C
185C       =================== S L A P Triad format ===================
186C
187C       In  this   format only the  non-zeros are  stored.  They may
188C       appear  in *ANY* order.   The user  supplies three arrays of
189C       length NELT, where  NELT  is the number  of non-zeros in the
190C       matrix:  (IA(NELT), JA(NELT),  A(NELT)).  For each  non-zero
191C       the  user puts   the row  and  column index   of that matrix
192C       element in the IA and JA arrays.  The  value of the non-zero
193C       matrix  element is  placed in  the corresponding location of
194C       the A  array.  This is  an extremely easy data  structure to
195C       generate.  On  the other hand it  is  not too  efficient  on
196C       vector  computers   for the  iterative  solution  of  linear
197C       systems.  Hence, SLAP  changes this input  data structure to
198C       the SLAP   Column  format for the  iteration (but   does not
199C       change it back).
200C
201C       Here is an example of the  SLAP Triad   storage format for a
202C       5x5 Matrix.  Recall that the entries may appear in any order.
203C
204C           5x5 Matrix      SLAP Triad format for 5x5 matrix on left.
205C                              1  2  3  4  5  6  7  8  9 10 11
206C       |11 12  0  0 15|   A: 51 12 11 33 15 53 55 22 35 44 21
207C       |21 22  0  0  0|  IA:  5  1  1  3  1  5  5  2  3  4  2
208C       | 0  0 33  0 35|  JA:  1  2  1  3  5  3  5  2  5  4  1
209C       | 0  0  0 44  0|
210C       |51  0 53  0 55|
211C
212C       =================== S L A P Column format ==================
213C
214C       In  this format   the non-zeros are    stored counting  down
215C       columns (except  for the diagonal  entry, which must  appear
216C       first in each "column") and are  stored in the real array A.
217C       In other words,  for  each column    in the matrix   put the
218C       diagonal  entry  in A.   Then   put  in the  other  non-zero
219C       elements going   down the  column (except  the  diagonal) in
220C       order.  The IA array holds the row index  for each non-zero.
221C       The JA array holds the offsets into the IA, A arrays for the
222C       beginning   of   each  column.      That is,   IA(JA(ICOL)),
223C       A(JA(ICOL)) points to the beginning of the ICOL-th column in
224C       IA and  A.  IA(JA(ICOL+1)-1), A(JA(ICOL+1)-1)  points to the
225C       end of the ICOL-th column.  Note that we always have JA(N+1)
226C       = NELT+1, where N is the number of columns in the matrix and
227C       NELT is the number of non-zeros in the matrix.
228C
229C       Here is an example of the  SLAP Column  storage format for a
230C       5x5 Matrix (in the A and IA arrays '|'  denotes the end of a
231C       column):
232C
233C           5x5 Matrix      SLAP Column format for 5x5 matrix on left.
234C                              1  2  3    4  5    6  7    8    9 10 11
235C       |11 12  0  0 15|   A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
236C       |21 22  0  0  0|  IA:  1  2  5 |  2  1 |  3  5 |  4 |  5  1  3
237C       | 0  0 33  0 35|  JA:  1  4  6    8  9   12
238C       | 0  0  0 44  0|
239C       |51  0 53  0 55|
240C
241C *Cautions:
242C     This routine will attempt to write to the Fortran logical output
243C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
244C     this logical unit is attached to a file or terminal before calling
245C     this routine with a non-zero value for IUNIT.  This routine does
246C     not check for the validity of a non-zero IUNIT unit number.
247C
248C***SEE ALSO  SSDCGN, SSLUCN, ISSCGN
249C***REFERENCES  1. Mark K. Seager, A SLAP for the Masses, in
250C                  G. F. Carey, Ed., Parallel Supercomputing: Methods,
251C                  Algorithms and Applications, Wiley, 1989, pp.135-155.
252C***ROUTINES CALLED  ISSCGN, R1MACH, SAXPY, SCOPY, SDOT
253C***REVISION HISTORY  (YYMMDD)
254C   871119  DATE WRITTEN
255C   881213  Previous REVISION DATE
256C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
257C   890921  Removed TeX from comments.  (FNF)
258C   890922  Numerous changes to prologue to make closer to SLATEC
259C           standard.  (FNF)
260C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
261C   891004  Added new reference.
262C   910411  Prologue converted to Version 4.0 format.  (BAB)
263C   910502  Removed MATVEC, MTTVEC and MSOLVE from ROUTINES CALLED
264C           list.  (FNF)
265C   920407  COMMON BLOCK renamed SSLBLK.  (WRB)
266C   920511  Added complete declaration section.  (WRB)
267C   920929  Corrected format of reference.  (FNF)
268C   921019  Changed 500.0 to 500 to reduce SP/DP differences.  (FNF)
269C   921113  Corrected C***CATEGORY line.  (FNF)
270C***END PROLOGUE  SCGN
271C     .. Scalar Arguments ..
272      REAL ERR, TOL
273      INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, N, NELT
274C     .. Array Arguments ..
275      REAL A(N), ATDZ(N), ATP(N), ATZ(N), B(N), DZ(N), P(N), R(N),
276     +     RWORK(*), X(N), Z(N)
277      INTEGER IA(NELT), IWORK(*), JA(NELT)
278C     .. Subroutine Arguments ..
279      EXTERNAL MATVEC, MSOLVE, MTTVEC
280C     .. Local Scalars ..
281      REAL AK, AKDEN, BK, BKDEN, BKNUM, BNRM, SOLNRM, TOLMIN
282      INTEGER I, K
283C     .. External Functions ..
284      REAL R1MACH, SDOT
285      INTEGER ISSCGN
286      EXTERNAL R1MACH, SDOT, ISSCGN
287C     .. External Subroutines ..
288      EXTERNAL SAXPY, SCOPY
289C***FIRST EXECUTABLE STATEMENT  SCGN
290C
291C         Check user input.
292C
293      ITER = 0
294      IERR = 0
295      IF( N.LT.1 ) THEN
296         IERR = 3
297         RETURN
298      ENDIF
299      TOLMIN = 500*R1MACH(3)
300      IF( TOL.LT.TOLMIN ) THEN
301         TOL = TOLMIN
302         IERR = 4
303      ENDIF
304C         Calculate initial residual and pseudo-residual, and check
305C         stopping criterion.
306      CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
307      DO 10 I = 1, N
308         R(I) = B(I) - R(I)
309 10   CONTINUE
310      CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
311      CALL MTTVEC(N, Z, ATZ, NELT, IA, JA, A, ISYM)
312C
313      IF( ISSCGN(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC, MSOLVE,
314     $     ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, ATP, ATZ,
315     $     DZ, ATDZ, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
316     $     GO TO 200
317      IF( IERR.NE.0 ) RETURN
318C
319C         ***** iteration loop *****
320C
321      DO 100 K=1,ITMAX
322         ITER = K
323C
324C         Calculate coefficient BK and direction vector P.
325         BKNUM = SDOT(N, Z, 1, R, 1)
326         IF( BKNUM.LE.0.0E0 ) THEN
327            IERR = 6
328            RETURN
329         ENDIF
330         IF(ITER .EQ. 1) THEN
331            CALL SCOPY(N, Z, 1, P, 1)
332         ELSE
333            BK = BKNUM/BKDEN
334            DO 20 I = 1, N
335               P(I) = Z(I) + BK*P(I)
336 20         CONTINUE
337         ENDIF
338         BKDEN = BKNUM
339C
340C         Calculate coefficient AK, new iterate X, new residual R,
341C         and new pseudo-residual ATZ.
342         IF(ITER .NE. 1) CALL SAXPY(N, BK, ATP, 1, ATZ, 1)
343         CALL SCOPY(N, ATZ, 1, ATP, 1)
344         AKDEN = SDOT(N, ATP, 1, ATP, 1)
345         IF( AKDEN.LE.0.0E0 ) THEN
346            IERR = 6
347            RETURN
348         ENDIF
349         AK = BKNUM/AKDEN
350         CALL SAXPY(N, AK, ATP, 1, X, 1)
351         CALL MATVEC(N, ATP, Z, NELT, IA, JA, A, ISYM)
352         CALL SAXPY(N, -AK, Z, 1, R, 1)
353         CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
354         CALL MTTVEC(N, Z, ATZ, NELT, IA, JA, A, ISYM)
355C
356C         check stopping criterion.
357         IF( ISSCGN(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
358     $        MSOLVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R,
359     $        Z, P, ATP, ATZ, DZ, ATDZ, RWORK, IWORK, AK, BK, BNRM,
360     $        SOLNRM) .NE. 0) GOTO 200
361C
362 100  CONTINUE
363C
364C         *****   end of loop  *****
365C
366C         stopping criterion not satisfied.
367      ITER = ITMAX + 1
368C
369 200  RETURN
370C------------- LAST LINE OF SCGN FOLLOWS ----------------------------
371      END
372