1
2 /*! @file sfgmr.c
3 * \brief flexible GMRES from ITSOL developed by Yousef Saad.
4 */
5
6 /* ITSOL COPYRIGHT
7
8 Copyright (C) 2006, the University of Minnesota
9
10 ITSOL is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation [version 2 of the License, or any later version]
13 For details, see
14
15 http://www.gnu.org/licenses/gpl-2.0.txt
16
17 A copy of the GNU licencing agreement is attached to the ITSOL package
18 in the file GNU. For additional information contact the Free Software
19 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20
21 DISCLAIMER
22 ----------
23
24 This program is distributed in the hope that it will be useful, but
25 WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 General Public License for more details.
28
29 For information on ITSOL contact saad@cs.umn.edu
30 */
31
32 #include "slu_sdefs.h"
33
34 #define epsmac 1.0e-16
35
36 extern float sdot_(int *, float [], int *, float [], int *);
37 extern float snrm2_(int *, float [], int *);
38
39
sfgmr(int n,void (* smatvec)(float,float[],float,float[]),void (* spsolve)(int,float[],float[]),float * rhs,float * sol,double tol,int im,int * itmax,FILE * fits)40 int sfgmr(int n,
41 void (*smatvec) (float, float[], float, float[]),
42 void (*spsolve) (int, float[], float[]),
43 float *rhs, float *sol, double tol, int im, int *itmax, FILE * fits)
44 {
45 /*----------------------------------------------------------------------
46 | *** Preconditioned FGMRES ***
47 +-----------------------------------------------------------------------
48 | This is a simple version of the ARMS preconditioned FGMRES algorithm.
49 +-----------------------------------------------------------------------
50 | Y. S. Dec. 2000. -- Apr. 2008
51 +-----------------------------------------------------------------------
52 | on entry:
53 |----------
54 |
55 | rhs = real vector of length n containing the right hand side.
56 | sol = real vector of length n containing an initial guess to the
57 | solution on input.
58 | tol = tolerance for stopping iteration
59 | im = Krylov subspace dimension
60 | (itmax) = max number of iterations allowed.
61 | fits = NULL: no output
62 | != NULL: file handle to output " resid vs time and its"
63 |
64 | on return:
65 |----------
66 | fgmr int = 0 --> successful return.
67 | int = 1 --> convergence not achieved in itmax iterations.
68 | sol = contains an approximate solution (upon successful return).
69 | itmax = has changed. It now contains the number of steps required
70 | to converge --
71 +-----------------------------------------------------------------------
72 | internal work arrays:
73 |----------
74 | vv = work array of length [im+1][n] (used to store the Arnoldi
75 | basis)
76 | hh = work array of length [im][im+1] (Householder matrix)
77 | z = work array of length [im][n] to store preconditioned vectors
78 +-----------------------------------------------------------------------
79 | subroutines called :
80 | matvec - matrix-vector multiplication operation
81 | psolve - (right) preconditionning operation
82 | psolve can be a NULL pointer (GMRES without preconditioner)
83 +---------------------------------------------------------------------*/
84
85 int maxits = *itmax;
86 int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2;
87 float beta, eps1 = 0.0, t, t0, gam;
88 float **hh, *c, *s, *rs;
89 float **vv, **z, tt;
90 float zero = 0.0;
91 float one = 1.0;
92
93 its = 0;
94 vv = (float **)SUPERLU_MALLOC((im + 1) * sizeof(float *));
95 for (i = 0; i <= im; i++) vv[i] = floatMalloc(n);
96 z = (float **)SUPERLU_MALLOC(im * sizeof(float *));
97 hh = (float **)SUPERLU_MALLOC(im * sizeof(float *));
98 for (i = 0; i < im; i++)
99 {
100 hh[i] = floatMalloc(i + 2);
101 z[i] = floatMalloc(n);
102 }
103 c = floatMalloc(im);
104 s = floatMalloc(im);
105 rs = floatMalloc(im + 1);
106
107 /*---- outer loop starts here ----*/
108 do
109 {
110 /*---- compute initial residual vector ----*/
111 smatvec(one, sol, zero, vv[0]);
112 for (j = 0; j < n; j++)
113 vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */
114 beta = snrm2_(&n, vv[0], &i_1);
115
116 /*---- print info if fits != null ----*/
117 if (fits != NULL && its == 0)
118 fprintf(fits, "%8d %10.2e\n", its, beta);
119 /*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/
120 if ( !(beta > tol * snrm2_(&n, rhs, &i_1)) )
121 break;
122 t = 1.0 / beta;
123
124 /*---- normalize: vv[0] = vv[0] / beta ----*/
125 for (j = 0; j < n; j++)
126 vv[0][j] = vv[0][j] * t;
127 if (its == 0)
128 eps1 = tol * beta;
129
130 /*---- initialize 1-st term of rhs of hessenberg system ----*/
131 rs[0] = beta;
132 for (i = 0; i < im; i++)
133 {
134 its++;
135 i1 = i + 1;
136
137 /*------------------------------------------------------------
138 | (Right) Preconditioning Operation z_{j} = M^{-1} v_{j}
139 +-----------------------------------------------------------*/
140 if (spsolve)
141 spsolve(n, z[i], vv[i]);
142 else
143 scopy_(&n, vv[i], &i_1, z[i], &i_1);
144
145 /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
146 smatvec(one, z[i], zero, vv[i1]);
147
148 /*------------------------------------------------------------
149 | modified gram - schmidt...
150 | h_{i,j} = (w,v_{i})
151 | w = w - h_{i,j} v_{i}
152 +------------------------------------------------------------*/
153 t0 = snrm2_(&n, vv[i1], &i_1);
154 for (j = 0; j <= i; j++)
155 {
156 float negt;
157 tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
158 hh[i][j] = tt;
159 negt = -tt;
160 saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
161 }
162
163 /*---- h_{j+1,j} = ||w||_{2} ----*/
164 t = snrm2_(&n, vv[i1], &i_1);
165 while (t < 0.5 * t0)
166 {
167 t0 = t;
168 for (j = 0; j <= i; j++)
169 {
170 float negt;
171 tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
172 hh[i][j] += tt;
173 negt = -tt;
174 saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
175 }
176 t = snrm2_(&n, vv[i1], &i_1);
177 }
178
179 hh[i][i1] = t;
180
181 if (t != 0.0)
182 {
183 /*---- v_{j+1} = w / h_{j+1,j} ----*/
184 t = 1.0 / t;
185 for (k = 0; k < n; k++)
186 vv[i1][k] = vv[i1][k] * t;
187 }
188 /*---------------------------------------------------
189 | done with modified gram schimdt and arnoldi step
190 | now update factorization of hh
191 +--------------------------------------------------*/
192
193 /*--------------------------------------------------------
194 | perform previous transformations on i-th column of h
195 +-------------------------------------------------------*/
196 for (k = 1; k <= i; k++)
197 {
198 k1 = k - 1;
199 tt = hh[i][k1];
200 hh[i][k1] = c[k1] * tt + s[k1] * hh[i][k];
201 hh[i][k] = -s[k1] * tt + c[k1] * hh[i][k];
202 }
203
204 gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2));
205
206 /*---------------------------------------------------
207 | if gamma is zero then any small value will do
208 | affect only residual estimate
209 +--------------------------------------------------*/
210 /* if (gam == 0.0) gam = epsmac; */
211
212 /*---- get next plane rotation ---*/
213 if (gam == 0.0)
214 {
215 c[i] = one;
216 s[i] = zero;
217 }
218 else
219 {
220 c[i] = hh[i][i] / gam;
221 s[i] = hh[i][i1] / gam;
222 }
223
224 rs[i1] = -s[i] * rs[i];
225 rs[i] = c[i] * rs[i];
226
227 /*----------------------------------------------------
228 | determine residual norm and test for convergence
229 +---------------------------------------------------*/
230 hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1];
231 beta = fabs(rs[i1]);
232 if (fits != NULL)
233 fprintf(fits, "%8d %10.2e\n", its, beta);
234 if (beta <= eps1 || its >= maxits)
235 break;
236 }
237
238 if (i == im) i--;
239
240 /*---- now compute solution. 1st, solve upper triangular system ----*/
241 rs[i] = rs[i] / hh[i][i];
242
243 for (ii = 1; ii <= i; ii++)
244 {
245 k = i - ii;
246 k1 = k + 1;
247 tt = rs[k];
248 for (j = k1; j <= i; j++)
249 tt = tt - hh[j][k] * rs[j];
250 rs[k] = tt / hh[k][k];
251 }
252
253 /*---- linear combination of v[i]'s to get sol. ----*/
254 for (j = 0; j <= i; j++)
255 {
256 tt = rs[j];
257 for (k = 0; k < n; k++)
258 sol[k] += tt * z[j][k];
259 }
260
261 /* calculate the residual and output */
262 smatvec(one, sol, zero, vv[0]);
263 for (j = 0; j < n; j++)
264 vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */
265
266 /*---- print info if fits != null ----*/
267 beta = snrm2_(&n, vv[0], &i_1);
268
269 /*---- restart outer loop if needed ----*/
270 /*if (beta >= eps1 / tol)*/
271 if ( !(beta < eps1 / tol) )
272 {
273 its = maxits + 10;
274 break;
275 }
276 if (beta <= eps1)
277 break;
278 } while(its < maxits);
279
280 retval = (its >= maxits);
281 for (i = 0; i <= im; i++)
282 SUPERLU_FREE(vv[i]);
283 SUPERLU_FREE(vv);
284 for (i = 0; i < im; i++)
285 {
286 SUPERLU_FREE(hh[i]);
287 SUPERLU_FREE(z[i]);
288 }
289 SUPERLU_FREE(hh);
290 SUPERLU_FREE(z);
291 SUPERLU_FREE(c);
292 SUPERLU_FREE(s);
293 SUPERLU_FREE(rs);
294
295 *itmax = its;
296
297 return retval;
298 } /*----end of fgmr ----*/
299