1*DECK SSLUOM
2      SUBROUTINE SSLUOM (N, B, X, NELT, IA, JA, A, ISYM, NSAVE, ITOL,
3     +   TOL, ITMAX, ITER, ERR, IERR, IUNIT, RWORK, LENW, IWORK, LENIW)
4C***BEGIN PROLOGUE  SSLUOM
5C***PURPOSE  Incomplete LU Orthomin Sparse Iterative Ax=b Solver.
6C            Routine to solve a general linear system  Ax = b  using
7C            the Orthomin method with Incomplete LU decomposition.
8C***LIBRARY   SLATEC (SLAP)
9C***CATEGORY  D2A4, D2B4
10C***TYPE      SINGLE PRECISION (SSLUOM-S, DSLUOM-D)
11C***KEYWORDS  ITERATIVE INCOMPLETE LU PRECONDITION,
12C             NON-SYMMETRIC LINEAR SYSTEM, SLAP, SPARSE
13C***AUTHOR  Greenbaum, Anne, (Courant Institute)
14C           Seager, Mark K., (LLNL)
15C             Lawrence Livermore National Laboratory
16C             PO BOX 808, L-60
17C             Livermore, CA 94550 (510) 423-3141
18C             seager@llnl.gov
19C***DESCRIPTION
20C
21C *Usage:
22C     INTEGER N, NELT, IA(NELT), JA(NELT), ISYM, NSAVE, ITOL, ITMAX
23C     INTEGER ITER, IERR, IUNIT, LENW, IWORK(NL+NU+4*N+2), LENIW
24C     REAL B(N), X(N), A(NELT), TOL, ERR
25C     REAL RWORK(NL+NU+7*N+3*N*NSAVE+NSAVE)
26C
27C     CALL SSLUOM(N, B, X, NELT, IA, JA, A, ISYM, NSAVE, ITOL, TOL,
28C    $     ITMAX, ITER, ERR, IERR, IUNIT, RWORK, LENW, IWORK, LENIW )
29C
30C *Arguments:
31C N      :IN       Integer.
32C         Order of the matrix.
33C B      :IN       Real B(N).
34C         Right-hand side vector.
35C X      :INOUT    Real X(N).
36C         On input X is your initial guess for solution vector.
37C         On output X is the final approximate solution.
38C NELT   :IN       Integer.
39C         Number of Non-Zeros stored in A.
40C IA     :INOUT    Integer IA(NELT).
41C JA     :INOUT    Integer JA(NELT).
42C A      :INOUT    Real A(NELT).
43C         These arrays should hold the matrix A in either the SLAP
44C         Triad format or the SLAP Column format.  See "Description",
45C         below.  If the SLAP Triad format is chosen, it is changed
46C         internally to the SLAP Column format.
47C ISYM   :IN       Integer.
48C         Flag to indicate symmetric storage format.
49C         If ISYM=0, all non-zero entries of the matrix are stored.
50C         If ISYM=1, the matrix is symmetric, and only the upper
51C         or lower triangle of the matrix is stored.
52C NSAVE  :IN       Integer.
53C         Number of direction vectors to save and orthogonalize against.
54C ITOL   :IN       Integer.
55C         Flag to indicate type of convergence criterion.
56C         If ITOL=1, iteration stops when the 2-norm of the residual
57C         divided by the 2-norm of the right-hand side is less than TOL.
58C         If ITOL=2, iteration stops when the 2-norm of M-inv times the
59C         residual divided by the 2-norm of M-inv times the right hand
60C         side is less than TOL, where M-inv is the inverse of the
61C         diagonal of A.
62C         ITOL=11 is often useful for checking and comparing different
63C         routines.  For this case, the user must supply the "exact"
64C         solution or a very accurate approximation (one with an error
65C         much less than TOL) through a common block,
66C             COMMON /SSLBLK/ SOLN( )
67C         If ITOL=11, iteration stops when the 2-norm of the difference
68C         between the iterative approximation and the user-supplied
69C         solution divided by the 2-norm of the user-supplied solution
70C         is less than TOL.  Note that this requires the user to set up
71C         the "COMMON /SSLBLK/ SOLN(LENGTH)" in the calling routine.
72C         The routine with this declaration should be loaded before the
73C         stop test so that the correct length is used by the loader.
74C         This procedure is not standard Fortran and may not work
75C         correctly on your system (although it has worked on every
76C         system the authors have tried).  If ITOL is not 11 then this
77C         common block is indeed standard Fortran.
78C TOL    :INOUT    Real.
79C         Convergence criterion, as described above.  (Reset if IERR=4.)
80C ITMAX  :IN       Integer.
81C         Maximum number of iterations.
82C ITER   :OUT      Integer.
83C         Number of iterations required to reach convergence, or
84C         ITMAX+1 if convergence criterion could not be achieved in
85C         ITMAX iterations.
86C ERR    :OUT      Real.
87C         Error estimate of error in final approximate solution, as
88C         defined by ITOL.
89C IERR   :OUT      Integer.
90C         Return error flag.
91C           IERR = 0 => All went well.
92C           IERR = 1 => Insufficient space allocated for WORK or IWORK.
93C           IERR = 2 => Method failed to converge in ITMAX steps.
94C           IERR = 3 => Error in user input.
95C                       Check input values of N, ITOL.
96C           IERR = 4 => User error tolerance set too tight.
97C                       Reset to 500*R1MACH(3).  Iteration proceeded.
98C           IERR = 5 => Preconditioning matrix, M, is not positive
99C                       definite.  (r,z) < 0.
100C           IERR = 6 => Breakdown of the method detected.
101C                       (p,Ap) < epsilon**2.
102C           IERR = 7 => Incomplete factorization broke down and was
103C                       fudged.  Resulting preconditioning may be less
104C                       than the best.
105C IUNIT  :IN       Integer.
106C         Unit number on which to write the error at each iteration,
107C         if this is desired for monitoring convergence.  If unit
108C         number is 0, no writing will occur.
109C RWORK  :WORK     Real RWORK(LENW).
110C         Real array used for workspace.  NL is the number of non-
111C         zeros in the lower triangle of the matrix (including the
112C         diagonal).  NU is the number of non-zeros in the upper
113C         triangle of the matrix (including the diagonal).
114C LENW   :IN       Integer.
115C         Length of the real workspace, RWORK.
116C         LENW >= NL+NU+4*N+NSAVE*(3*N+1)
117C IWORK  :WORK     Integer IWORK(LENIW)
118C         Integer array used for workspace.  NL is the number of non-
119C         zeros in the lower triangle of the matrix (including the
120C         diagonal).  NU is the number of non-zeros in the upper
121C         triangle of the matrix (including the diagonal).
122C         Upon return the following locations of IWORK hold information
123C         which may be of use to the user:
124C         IWORK(9)  Amount of Integer workspace actually used.
125C         IWORK(10) Amount of Real workspace actually used.
126C LENIW  :IN       Integer.
127C         Length of the integer workspace, IWORK.
128C         LENIW >= NL+NU+4*N+12.
129C
130C *Description:
131C       This routine is  simply a driver  for  the SOMN routine.  It
132C       calls the SSILUS routine  to set  up the preconditioning and
133C       then  calls   SOMN  with the appropriate  MATVEC  and MSOLVE
134C       routines.
135C
136C       The Sparse Linear Algebra Package (SLAP) utilizes two matrix
137C       data structures: 1) the  SLAP Triad  format or  2)  the SLAP
138C       Column format.  The user can hand this routine either of the
139C       of these data structures and SLAP  will figure out  which on
140C       is being used and act accordingly.
141C
142C       =================== S L A P Triad format ===================
143C
144C       This routine requires that the  matrix A be   stored in  the
145C       SLAP  Triad format.  In  this format only the non-zeros  are
146C       stored.  They may appear in  *ANY* order.  The user supplies
147C       three arrays of  length NELT, where  NELT is  the number  of
148C       non-zeros in the matrix: (IA(NELT), JA(NELT), A(NELT)).  For
149C       each non-zero the user puts the row and column index of that
150C       matrix element  in the IA and  JA arrays.  The  value of the
151C       non-zero   matrix  element is  placed  in  the corresponding
152C       location of the A array.   This is  an  extremely  easy data
153C       structure to generate.  On  the  other hand it   is  not too
154C       efficient on vector computers for  the iterative solution of
155C       linear systems.  Hence,   SLAP changes   this  input    data
156C       structure to the SLAP Column format  for  the iteration (but
157C       does not change it back).
158C
159C       Here is an example of the  SLAP Triad   storage format for a
160C       5x5 Matrix.  Recall that the entries may appear in any order.
161C
162C           5x5 Matrix      SLAP Triad format for 5x5 matrix on left.
163C                              1  2  3  4  5  6  7  8  9 10 11
164C       |11 12  0  0 15|   A: 51 12 11 33 15 53 55 22 35 44 21
165C       |21 22  0  0  0|  IA:  5  1  1  3  1  5  5  2  3  4  2
166C       | 0  0 33  0 35|  JA:  1  2  1  3  5  3  5  2  5  4  1
167C       | 0  0  0 44  0|
168C       |51  0 53  0 55|
169C
170C       =================== S L A P Column format ==================
171C
172C       This routine  requires that  the matrix A  be stored in  the
173C       SLAP Column format.  In this format the non-zeros are stored
174C       counting down columns (except for  the diagonal entry, which
175C       must appear first in each  "column")  and are stored  in the
176C       real array A.  In other words, for each column in the matrix
177C       put the diagonal entry in A.  Then put in the other non-zero
178C       elements going down   the  column (except  the diagonal)  in
179C       order.  The IA array holds the row  index for each non-zero.
180C       The JA array holds the offsets into the IA, A arrays for the
181C       beginning of   each    column.    That  is,    IA(JA(ICOL)),
182C       A(JA(ICOL)) points to the beginning of the ICOL-th column in
183C       IA and  A.  IA(JA(ICOL+1)-1),  A(JA(ICOL+1)-1) points to the
184C       end  of   the ICOL-th  column.  Note   that  we  always have
185C       JA(N+1) = NELT+1, where  N  is the number of columns in  the
186C       matrix and  NELT   is the number of non-zeros in the matrix.
187C
188C       Here is an example of the  SLAP Column  storage format for a
189C       5x5 Matrix (in the A and IA arrays '|'  denotes the end of a
190C       column):
191C
192C           5x5 Matrix      SLAP Column format for 5x5 matrix on left.
193C                              1  2  3    4  5    6  7    8    9 10 11
194C       |11 12  0  0 15|   A: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35
195C       |21 22  0  0  0|  IA:  1  2  5 |  2  1 |  3  5 |  4 |  5  1  3
196C       | 0  0 33  0 35|  JA:  1  4  6    8  9   12
197C       | 0  0  0 44  0|
198C       |51  0 53  0 55|
199C
200C *Side Effects:
201C       The SLAP Triad format (IA, JA,  A) is modified internally to
202C       be the SLAP Column format.  See above.
203C
204C *Cautions:
205C     This routine will attempt to write to the Fortran logical output
206C     unit IUNIT, if IUNIT .ne. 0.  Thus, the user must make sure that
207C     this logical unit is attached to a file or terminal before calling
208C     this routine with a non-zero value for IUNIT.  This routine does
209C     not check for the validity of a non-zero IUNIT unit number.
210C
211C***SEE ALSO  SOMN, SSDOMN
212C***REFERENCES  (NONE)
213C***ROUTINES CALLED  SCHKW, SOMN, SS2Y, SSILUS, SSLUI, SSMV
214C***REVISION HISTORY  (YYMMDD)
215C   871119  DATE WRITTEN
216C   881213  Previous REVISION DATE
217C   890915  Made changes requested at July 1989 CML Meeting.  (MKS)
218C   890921  Removed TeX from comments.  (FNF)
219C   890922  Numerous changes to prologue to make closer to SLATEC
220C           standard.  (FNF)
221C   890929  Numerous changes to reduce SP/DP differences.  (FNF)
222C   910411  Prologue converted to Version 4.0 format.  (BAB)
223C   920407  COMMON BLOCK renamed SSLBLK.  (WRB)
224C   920511  Added complete declaration section.  (WRB)
225C   921019  Corrected NEL to NL.  (FNF)
226C   921113  Corrected C***CATEGORY line.  (FNF)
227C***END PROLOGUE  SSLUOM
228C     .. Parameters ..
229      INTEGER LOCRB, LOCIB
230      PARAMETER (LOCRB=1, LOCIB=11)
231C     .. Scalar Arguments ..
232      REAL ERR, TOL
233      INTEGER IERR, ISYM, ITER, ITMAX, ITOL, IUNIT, LENIW, LENW, N,
234     +        NELT, NSAVE
235C     .. Array Arguments ..
236      REAL A(N), B(N), RWORK(LENW), X(N)
237      INTEGER IA(NELT), IWORK(LENIW), JA(NELT)
238C     .. Local Scalars ..
239      INTEGER ICOL, J, JBGN, JEND, LOCAP, LOCCSA, LOCDIN, LOCDZ, LOCEMA,
240     +        LOCIL, LOCIU, LOCIW, LOCJL, LOCJU, LOCL, LOCNC, LOCNR,
241     +        LOCP, LOCR, LOCU, LOCW, LOCZ, NL, NU
242C     .. External Subroutines ..
243      EXTERNAL SCHKW, SOMN, SS2Y, SSILUS, SSLUI, SSMV
244C***FIRST EXECUTABLE STATEMENT  SSLUOM
245C
246      IERR = 0
247      IF( N.LT.1 .OR. NELT.LT.1 ) THEN
248         IERR = 3
249         RETURN
250      ENDIF
251C
252C         Change the SLAP input matrix IA, JA, A to SLAP-Column format.
253      CALL SS2Y( N, NELT, IA, JA, A, ISYM )
254C
255C         Count number of Non-Zero elements preconditioner ILU matrix.
256C         Then set up the work arrays.
257      NL = 0
258      NU = 0
259      DO 20 ICOL = 1, N
260C         Don't count diagonal.
261         JBGN = JA(ICOL)+1
262         JEND = JA(ICOL+1)-1
263         IF( JBGN.LE.JEND ) THEN
264CVD$ NOVECTOR
265            DO 10 J = JBGN, JEND
266               IF( IA(J).GT.ICOL ) THEN
267                  NL = NL + 1
268                  IF( ISYM.NE.0 ) NU = NU + 1
269               ELSE
270                  NU = NU + 1
271               ENDIF
272 10         CONTINUE
273         ENDIF
274 20   CONTINUE
275C
276      LOCIL = LOCIB
277      LOCJL = LOCIL + N+1
278      LOCIU = LOCJL + NL
279      LOCJU = LOCIU + NU
280      LOCNR = LOCJU + N+1
281      LOCNC = LOCNR + N
282      LOCIW = LOCNC + N
283C
284      LOCL   = LOCRB
285      LOCDIN = LOCL + NL
286      LOCU   = LOCDIN + N
287      LOCR   = LOCU + NU
288      LOCZ   = LOCR + N
289      LOCP   = LOCZ + N
290      LOCAP  = LOCP + N*(NSAVE+1)
291      LOCEMA = LOCAP + N*(NSAVE+1)
292      LOCDZ  = LOCEMA + N*(NSAVE+1)
293      LOCCSA = LOCDZ + N
294      LOCW   = LOCCSA + NSAVE
295C
296C         Check the workspace allocations.
297      CALL SCHKW( 'SSLUOM', LOCIW, LENIW, LOCW, LENW, IERR, ITER, ERR )
298      IF( IERR.NE.0 ) RETURN
299C
300      IWORK(1) = LOCIL
301      IWORK(2) = LOCJL
302      IWORK(3) = LOCIU
303      IWORK(4) = LOCJU
304      IWORK(5) = LOCL
305      IWORK(6) = LOCDIN
306      IWORK(7) = LOCU
307      IWORK(9) = LOCIW
308      IWORK(10) = LOCW
309C
310C         Compute the Incomplete LU decomposition.
311      CALL SSILUS( N, NELT, IA, JA, A, ISYM, NL, IWORK(LOCIL),
312     $     IWORK(LOCJL), RWORK(LOCL), RWORK(LOCDIN), NU, IWORK(LOCIU),
313     $     IWORK(LOCJU), RWORK(LOCU), IWORK(LOCNR), IWORK(LOCNC) )
314C
315C         Perform the incomplete LU preconditioned OrthoMin algorithm.
316      CALL SOMN(N, B, X, NELT, IA, JA, A, ISYM, SSMV,
317     $     SSLUI, NSAVE, ITOL, TOL, ITMAX, ITER, ERR, IERR, IUNIT,
318     $     RWORK(LOCR), RWORK(LOCZ), RWORK(LOCP), RWORK(LOCAP),
319     $     RWORK(LOCEMA), RWORK(LOCDZ), RWORK(LOCCSA),
320     $     RWORK, IWORK )
321      RETURN
322      END
323