1*DECK DBCG
2      SUBROUTINE DBCG (N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
3     +   MSOLVE, MTSOLV, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT, R, Z,
4     +   P, RR, ZZ, PP, DZ, RWORK, IWORK)
5C***BEGIN PROLOGUE  DBCG
6C***PURPOSE  Preconditioned BiConjugate Gradient Sparse Ax = b Solver.
7C            Routine to solve a Non-Symmetric linear system  Ax = b
8C            using the Preconditioned BiConjugate Gradient method.
9C***LIBRARY   SLATEC (SLAP)
10C***CATEGORY  D2A4, D2B4
11C***TYPE      DOUBLE PRECISION (SBCG-S, DBCG-D)
12C***KEYWORDS  BICONJUGATE GRADIENT, ITERATIVE PRECONDITION,
13C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
14C***AUTHOR  Greenbaum, Anne, (Courant Institute)
15C           Seager, Mark K., (LLNL)
16C             Lawrence Livermore National Laboratory
17C             PO BOX 808, L-60
18C             Livermore, CA 94550 (510) 423-3141
19C             seager@llnl.gov
20C***DESCRIPTION
21C
22C *Usage:
23C      INTEGER N, NELT, IA(NELT), JA(NELT), ISYM, ITOL, ITMAX
24C      INTEGER ITER, IERR, IUNIT, IWORK(USER DEFINED)
25C      DOUBLE PRECISION B(N), X(N), A(NELT), TOL, ERR, R(N), Z(N), P(N)
26C      DOUBLE PRECISION RR(N), ZZ(N), PP(N), DZ(N)
27C      DOUBLE PRECISION RWORK(USER DEFINED)
28C      EXTERNAL MATVEC, MTTVEC, MSOLVE, MTSOLV
29C
30C      CALL DBCG(N, B, X, NELT, IA, JA, A, ISYM, MATVEC, MTTVEC,
31C     $     MSOLVE, MTSOLV, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT,
32C     $     R, Z, P, RR, ZZ, PP, DZ, RWORK, IWORK)
33C
34C *Arguments:
35C N      :IN       Integer
36C         Order of the Matrix.
37C B      :IN       Double Precision B(N).
38C         Right-hand side vector.
39C X      :INOUT    Double Precision X(N).
40C         On input X is your initial guess for solution vector.
41C         On output X is the final approximate solution.
42C NELT   :IN       Integer.
43C         Number of Non-Zeros stored in A.
44C IA     :IN       Integer IA(NELT).
45C JA     :IN       Integer JA(NELT).
46C A      :IN       Double Precision A(NELT).
47C         These arrays contain the matrix data structure for A.
48C         It could take any form.  See "Description", below, for more
49C         details.
50C ISYM   :IN       Integer.
51C         Flag to indicate symmetric storage format.
52C         If ISYM=0, all non-zero entries of the matrix are stored.
53C         If ISYM=1, the matrix is symmetric, and only the upper
54C         or lower triangle of the matrix is stored.
55C MATVEC :EXT      External.
56C         Name of a routine which  performs the matrix vector multiply
57C         operation  Y = A*X  given A and X.  The  name of  the MATVEC
58C         routine must  be declared external  in the  calling program.
59C         The calling sequence of MATVEC is:
60C             CALL MATVEC( N, X, Y, NELT, IA, JA, A, ISYM )
61C         Where N is the number of unknowns, Y is the product A*X upon
62C         return,  X is an input  vector.  NELT, IA,  JA,  A and  ISYM
63C         define the SLAP matrix data structure: see Description,below.
64C MTTVEC :EXT      External.
65C         Name of a routine which performs the matrix transpose vector
66C         multiply y = A'*X given A and X (where ' denotes transpose).
67C         The name of the MTTVEC routine must be declared external  in
68C         the calling program.  The calling sequence to MTTVEC is  the
69C         same as that for MTTVEC, viz.:
70C             CALL MTTVEC( N, X, Y, NELT, IA, JA, A, ISYM )
71C         Where N  is the number  of unknowns, Y is the   product A'*X
72C         upon return, X is an input vector.  NELT, IA, JA, A and ISYM
73C         define the SLAP matrix data structure: see Description,below.
74C MSOLVE :EXT      External.
75C         Name of a routine which solves a linear system MZ = R  for Z
76C         given R with the preconditioning matrix M (M is supplied via
77C         RWORK  and IWORK arrays).   The name  of  the MSOLVE routine
78C         must be declared  external  in the  calling   program.   The
79C         calling sequence of MSOLVE is:
80C             CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
81C         Where N is the number of unknowns, R is  the right-hand side
82C         vector, and Z is the solution upon return.  NELT,  IA, JA, A
83C         and  ISYM define the SLAP  matrix  data structure: see
84C         Description, below.  RWORK is a  double precision array that
85C         can be used to pass necessary preconditioning information and/
86C         or workspace to MSOLVE.  IWORK is an integer work array for
87C         the same purpose as RWORK.
88C MTSOLV :EXT      External.
89C         Name of a routine which solves a linear system M'ZZ = RR for
90C         ZZ given RR with the preconditioning matrix M (M is supplied
91C         via RWORK and IWORK arrays).  The name of the MTSOLV routine
92C         must be declared external in the calling program.  The call-
93C         ing sequence to MTSOLV is:
94C            CALL MTSOLV(N, RR, ZZ, NELT, IA, JA, A, ISYM, RWORK, IWORK)
95C         Where N is the number of unknowns, RR is the right-hand side
96C         vector, and ZZ is the solution upon return.  NELT, IA, JA, A
97C         and  ISYM define the SLAP  matrix  data structure: see
98C         Description, below.  RWORK is a  double precision array that
99C         can be used to pass necessary preconditioning information and/
100C         or workspace to MTSOLV.  IWORK is an integer work array for
101C         the same purpose as RWORK.
102C ITOL   :IN       Integer.
103C         Flag to indicate type of convergence criterion.
104C         If ITOL=1, iteration stops when the 2-norm of the residual
105C         divided by the 2-norm of the right-hand side is less than TOL.
106C         If ITOL=2, iteration stops when the 2-norm of M-inv times the
107C         residual divided by the 2-norm of M-inv times the right hand
108C         side is less than TOL, where M-inv is the inverse of the
109C         diagonal of A.
110C         ITOL=11 is often useful for checking and comparing different
111C         routines.  For this case, the user must supply the "exact"
112C         solution or a very accurate approximation (one with an error
113C         much less than TOL) through a common block,
114C             COMMON /DSLBLK/ SOLN( )
115C         If ITOL=11, iteration stops when the 2-norm of the difference
116C         between the iterative approximation and the user-supplied
117C         solution divided by the 2-norm of the user-supplied solution
118C         is less than TOL.  Note that this requires the user to set up
119C         the "COMMON /DSLBLK/ SOLN(LENGTH)" in the calling routine.
120C         The routine with this declaration should be loaded before the
121C         stop test so that the correct length is used by the loader.
122C         This procedure is not standard Fortran and may not work
123C         correctly on your system (although it has worked on every
124C         system the authors have tried).  If ITOL is not 11 then this
125C         common block is indeed standard Fortran.
126C TOL    :INOUT    Double Precision.
127C         Convergence criterion, as described above.  (Reset if IERR=4.)
128C ITMAX  :IN       Integer.
129C         Maximum number of iterations.
130C ITER   :OUT      Integer.
131C         Number of iterations required to reach convergence, or
132C         ITMAX+1 if convergence criterion could not be achieved in
133C         ITMAX iterations.
134C ERR    :OUT      Double Precision.
135C         Error estimate of error in final approximate solution, as
136C         defined by ITOL.
137C IERR   :OUT      Integer.
138C         Return error flag.
139C           IERR = 0 => All went well.
140C           IERR = 1 => Insufficient space allocated for WORK or IWORK.
141C           IERR = 2 => Method failed to converge in ITMAX steps.
142C           IERR = 3 => Error in user input.
143C                       Check input values of N, ITOL.
144C           IERR = 4 => User error tolerance set too tight.
145C                       Reset to 500*D1MACH(3).  Iteration proceeded.
146C           IERR = 5 => Preconditioning matrix, M, is not positive
147C                       definite.  (r,z) < 0.
148C           IERR = 6 => Matrix A is not positive definite.  (p,Ap) < 0.
149C IUNIT  :IN       Integer.
150C         Unit number on which to write the error at each iteration,
151C         if this is desired for monitoring convergence.  If unit
152C         number is 0, no writing will occur.
153C R      :WORK     Double Precision R(N).
154C Z      :WORK     Double Precision Z(N).
155C P      :WORK     Double Precision P(N).
156C RR     :WORK     Double Precision RR(N).
157C ZZ     :WORK     Double Precision ZZ(N).
158C PP     :WORK     Double Precision PP(N).
159C DZ     :WORK     Double Precision DZ(N).
160C         Double Precision arrays used for workspace.
161C RWORK  :WORK     Double Precision RWORK(USER DEFINED).
162C         Double Precision array that can be used for workspace in
163C         MSOLVE and MTSOLV.
164C IWORK  :WORK     Integer IWORK(USER DEFINED).
165C         Integer array that can be used for workspace in MSOLVE
166C         and MTSOLV.
167C
168C *Description
169C      This routine does not care what matrix data structure is used
170C       for A and M.  It simply calls MATVEC, MTTVEC, MSOLVE, MTSOLV
171C       routines, with arguments as above.  The user could write any
172C       type of structure, and  appropriate  MATVEC, MSOLVE, MTTVEC,
173C       and MTSOLV routines.  It  is assumed that A is stored in the
174C       IA, JA, A  arrays in some fashion and  that M (or INV(M)) is
175C       stored  in  IWORK  and  RWORK   in  some fashion.   The SLAP
176C       routines DSDBCG and DSLUBC are examples of this procedure.
177C
178C       Two  examples  of  matrix  data structures  are the: 1) SLAP
179C       Triad  format and 2) SLAP Column format.
180C
181C       =================== S L A P Triad format ===================
182C       In  this   format only the  non-zeros are  stored.  They may
183C       appear  in *ANY* order.   The user  supplies three arrays of
184C       length NELT, where  NELT  is the number  of non-zeros in the
185C       matrix:  (IA(NELT), JA(NELT),  A(NELT)).  For each  non-zero
186C       the  user puts   the row  and  column index   of that matrix
187C       element in the IA and JA arrays.  The  value of the non-zero
188C       matrix  element is  placed in  the corresponding location of
189C       the A  array.  This is  an extremely easy data  structure to
190C       generate.  On  the other hand it  is  not too  efficient  on
191C       vector  computers   for the  iterative  solution  of  linear
192C       systems.  Hence, SLAP  changes this input  data structure to
193C       the SLAP   Column  format for the  iteration (but   does not
194C       change it back).
195C
196C       Here is an example of the  SLAP Triad   storage format for a
197C       5x5 Matrix.  Recall that the entries may appear in any order.
198C
199C           5x5 Matrix      SLAP Triad format for 5x5 matrix on left.
200C                              1  2  3  4  5  6  7  8  9 10 11
201C       |11 12  0  0 15|   A: 51 12 11 33 15 53 55 22 35 44 21
202C       |21 22  0  0  0|  IA:  5  1  1  3  1  5  5  2  3  4  2
203C       | 0  0 33  0 35|  JA:  1  2  1  3  5  3  5  2  5  4  1
204C       | 0  0  0 44  0|
205C       |51  0 53  0 55|
206C
207C       =================== S L A P Column format ==================
208C
209C       In  this format   the non-zeros are    stored counting  down
210C       columns (except  for the diagonal  entry, which must  appear
211C       first  in each "column") and are  stored in the  double pre-
212C       cision array  A. In  other  words,  for each  column  in the
213C       matrix  first put  the diagonal entry in A.  Then put in the
214C       other non-zero  elements going  down the column  (except the
215C       diagonal)  in order.  The IA array  holds the  row index for
216C       each non-zero.  The JA array  holds the offsets into the IA,
217C       A  arrays  for  the  beginning  of  each  column.  That  is,
218C       IA(JA(ICOL)),A(JA(ICOL)) are the first elements of the ICOL-
219C       th column in IA and A, and IA(JA(ICOL+1)-1), A(JA(ICOL+1)-1)
220C       are  the last elements of the ICOL-th column.   Note that we
221C       always have JA(N+1)=NELT+1, where N is the number of columns
222C       in the matrix  and NELT  is the number  of non-zeros  in the
223C       matrix.
224C
225C       Here is an example of the  SLAP Column  storage format for a
226C       5x5 Matrix (in the A and IA arrays '|'  denotes the end of a
227C       column):
228C
229C           5x5 Matrix      SLAP Column format for 5x5 matrix on left.
230C                              1  2  3    4  5    6  7    8    9 10 11
231C       |11 12  0  0 15|   A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
232C       |21 22  0  0  0|  IA:  1  2  5 |  2  1 |  3  5 |  4 |  5  1  3
233C       | 0  0 33  0 35|  JA:  1  4  6    8  9   12
234C       | 0  0  0 44  0|
235C       |51  0 53  0 55|
236C
237C *Cautions:
238C     This routine will attempt to write to the Fortran logical output
239C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
240C     this logical unit is attached to a file or terminal before calling
241C     this routine with a non-zero value for IUNIT.  This routine does
242C     not check for the validity of a non-zero IUNIT unit number.
243C
244C***SEE ALSO  DSDBCG, DSLUBC
245C***REFERENCES  1. Mark K. Seager, A SLAP for the Masses, in
246C                  G. F. Carey, Ed., Parallel Supercomputing: Methods,
247C                  Algorithms and Applications, Wiley, 1989, pp.135-155.
248C***ROUTINES CALLED  D1MACH, DAXPY, DCOPY, DDOT, ISDBCG
249C***REVISION HISTORY  (YYMMDD)
250C   890404  DATE WRITTEN
251C   890404  Previous REVISION DATE
252C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
253C   890921  Removed TeX from comments.  (FNF)
254C   890922  Numerous changes to prologue to make closer to SLATEC
255C           standard.  (FNF)
256C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
257C   891004  Added new reference.
258C   910411  Prologue converted to Version 4.0 format.  (BAB)
259C   910502  Removed MATVEC, MTTVEC, MSOLVE, MTSOLV from ROUTINES
260C           CALLED list.  (FNF)
261C   920407  COMMON BLOCK renamed DSLBLK.  (WRB)
262C   920511  Added complete declaration section.  (WRB)
263C   920929  Corrected format of reference.  (FNF)
264C   921019  Changed 500.0 to 500 to reduce SP/DP differences.  (FNF)
265C   921113  Corrected C***CATEGORY line.  (FNF)
266C***END PROLOGUE  DBCG
267C     .. Scalar Arguments ..
268      DOUBLE PRECISION ERR, TOL
269      INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, N, NELT
270C     .. Array Arguments ..
271      DOUBLE PRECISION A(NELT), B(N), DZ(N), P(N), PP(N), R(N), RR(N),
272     +                 RWORK(*), X(N), Z(N), ZZ(N)
273      INTEGER IA(NELT), IWORK(*), JA(NELT)
274C     .. Subroutine Arguments ..
275      EXTERNAL MATVEC, MSOLVE, MTSOLV, MTTVEC
276C     .. Local Scalars ..
277      DOUBLE PRECISION AK, AKDEN, BK, BKDEN, BKNUM, BNRM, FUZZ, SOLNRM,
278     +                 TOLMIN
279      INTEGER I, K
280C     .. External Functions ..
281      DOUBLE PRECISION D1MACH, DDOT
282      INTEGER ISDBCG
283      EXTERNAL D1MACH, DDOT, ISDBCG
284C     .. External Subroutines ..
285      EXTERNAL DAXPY, DCOPY
286C     .. Intrinsic Functions ..
287      INTRINSIC ABS
288C***FIRST EXECUTABLE STATEMENT  DBCG
289C
290C         Check some of the input data.
291C
292      ITER = 0
293      IERR = 0
294      IF( N.LT.1 ) THEN
295         IERR = 3
296         RETURN
297      ENDIF
298      FUZZ = D1MACH(3)
299      TOLMIN = 500*FUZZ
300      FUZZ = FUZZ*FUZZ
301      IF( TOL.LT.TOLMIN ) THEN
302         TOL = TOLMIN
303         IERR = 4
304      ENDIF
305C
306C         Calculate initial residual and pseudo-residual, and check
307C         stopping criterion.
308      CALL MATVEC(N, X, R, NELT, IA, JA, A, ISYM)
309      DO 10 I = 1, N
310         R(I)  = B(I) - R(I)
311         RR(I) = R(I)
312 10   CONTINUE
313      CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
314      CALL MTSOLV(N, RR, ZZ, NELT, IA, JA, A, ISYM, RWORK, IWORK)
315C
316      IF( ISDBCG(N, B, X, NELT, IA, JA, A, ISYM, MSOLVE, ITOL, TOL,
317     $     ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, RR, ZZ, PP,
318     $     DZ, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
319     $     GO TO 200
320      IF( IERR.NE.0 ) RETURN
321C
322C         ***** iteration loop *****
323C
324      DO 100 K=1,ITMAX
325         ITER = K
326C
327C         Calculate coefficient BK and direction vectors P and PP.
328         BKNUM = DDOT(N, Z, 1, RR, 1)
329         IF( ABS(BKNUM).LE.FUZZ ) THEN
330            IERR = 6
331            RETURN
332         ENDIF
333         IF(ITER .EQ. 1) THEN
334            CALL DCOPY(N, Z, 1, P, 1)
335            CALL DCOPY(N, ZZ, 1, PP, 1)
336         ELSE
337            BK = BKNUM/BKDEN
338            DO 20 I = 1, N
339               P(I) = Z(I) + BK*P(I)
340               PP(I) = ZZ(I) + BK*PP(I)
341 20         CONTINUE
342         ENDIF
343         BKDEN = BKNUM
344C
345C         Calculate coefficient AK, new iterate X, new residuals R and
346C         RR, and new pseudo-residuals Z and ZZ.
347         CALL MATVEC(N, P, Z, NELT, IA, JA, A, ISYM)
348         AKDEN = DDOT(N, PP, 1, Z, 1)
349         AK = BKNUM/AKDEN
350         IF( ABS(AKDEN).LE.FUZZ ) THEN
351            IERR = 6
352            RETURN
353         ENDIF
354         CALL DAXPY(N, AK, P, 1, X, 1)
355         CALL DAXPY(N, -AK, Z, 1, R, 1)
356         CALL MTTVEC(N, PP, ZZ, NELT, IA, JA, A, ISYM)
357         CALL DAXPY(N, -AK, ZZ, 1, RR, 1)
358         CALL MSOLVE(N, R, Z, NELT, IA, JA, A, ISYM, RWORK, IWORK)
359         CALL MTSOLV(N, RR, ZZ, NELT, IA, JA, A, ISYM, RWORK, IWORK)
360C
361C         check stopping criterion.
362         IF( ISDBCG(N, B, X, NELT, IA, JA, A, ISYM, MSOLVE, ITOL, TOL,
363     $        ITMAX, ITER, ERR, IERR, IUNIT, R, Z, P, RR, ZZ,
364     $        PP, DZ, RWORK, IWORK, AK, BK, BNRM, SOLNRM) .NE. 0 )
365     $        GO TO 200
366C
367 100  CONTINUE
368C
369C         *****   end of loop  *****
370C
371C         stopping criterion not satisfied.
372      ITER = ITMAX + 1
373      IERR = 2
374C
375 200  RETURN
376C------------- LAST LINE OF DBCG FOLLOWS ----------------------------
377      END
378