1 
2 /*! @file sfgmr.c
3  * \brief flexible GMRES from ITSOL developed by Yousef Saad.
4  */
5 
6 /* ITSOL COPYRIGHT
7 
8 Copyright (C) 2006, the University of Minnesota
9 
10 ITSOL is free software; you can redistribute it and/or modify it under
11 the terms of  the GNU General Public License as  published by the Free
12 Software Foundation [version 2 of the License, or any later version]
13 For details, see
14 
15 http://www.gnu.org/licenses/gpl-2.0.txt
16 
17 A copy of the GNU licencing agreement is attached to the ITSOL package
18 in the file GNU.  For additional information contact the Free Software
19 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 
21 DISCLAIMER
22 ----------
23 
24 This program  is distributed in the  hope that it will  be useful, but
25 WITHOUT   ANY  WARRANTY;   without  even   the  implied   warranty  of
26 MERCHANTABILITY  or FITNESS  FOR A  PARTICULAR PURPOSE.   See  the GNU
27 General Public License for more details.
28 
29 For information on ITSOL contact saad@cs.umn.edu
30 */
31 
32 #include "slu_sdefs.h"
33 
34 #define  epsmac  1.0e-16
35 
36 extern float sdot_(int *, float [], int *, float [], int *);
37 extern float snrm2_(int *, float [], int *);
38 
39 
sfgmr(int n,void (* smatvec)(float,float[],float,float[]),void (* spsolve)(int,float[],float[]),float * rhs,float * sol,double tol,int im,int * itmax,FILE * fits)40 int sfgmr(int n,
41      void (*smatvec) (float, float[], float, float[]),
42      void (*spsolve) (int, float[], float[]),
43      float *rhs, float *sol, double tol, int im, int *itmax, FILE * fits)
44 {
45 /*----------------------------------------------------------------------
46 |                 *** Preconditioned FGMRES ***
47 +-----------------------------------------------------------------------
48 | This is a simple version of the ARMS preconditioned FGMRES algorithm.
49 +-----------------------------------------------------------------------
50 | Y. S. Dec. 2000. -- Apr. 2008
51 +-----------------------------------------------------------------------
52 | on entry:
53 |----------
54 |
55 | rhs     = real vector of length n containing the right hand side.
56 | sol     = real vector of length n containing an initial guess to the
57 |           solution on input.
58 | tol     = tolerance for stopping iteration
59 | im      = Krylov subspace dimension
60 | (itmax) = max number of iterations allowed.
61 | fits    = NULL: no output
62 |        != NULL: file handle to output " resid vs time and its"
63 |
64 | on return:
65 |----------
66 | fgmr      int =  0 --> successful return.
67 |           int =  1 --> convergence not achieved in itmax iterations.
68 | sol     = contains an approximate solution (upon successful return).
69 | itmax   = has changed. It now contains the number of steps required
70 |           to converge --
71 +-----------------------------------------------------------------------
72 | internal work arrays:
73 |----------
74 | vv      = work array of length [im+1][n] (used to store the Arnoldi
75 |           basis)
76 | hh      = work array of length [im][im+1] (Householder matrix)
77 | z       = work array of length [im][n] to store preconditioned vectors
78 +-----------------------------------------------------------------------
79 | subroutines called :
80 | matvec - matrix-vector multiplication operation
81 | psolve - (right) preconditionning operation
82 |	   psolve can be a NULL pointer (GMRES without preconditioner)
83 +---------------------------------------------------------------------*/
84 
85     int maxits = *itmax;
86     int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2;
87     float beta, eps1 = 0.0, t, t0, gam;
88     float **hh, *c, *s, *rs;
89     float **vv, **z, tt;
90     float zero = 0.0;
91     float one = 1.0;
92 
93     its = 0;
94     vv = (float **)SUPERLU_MALLOC((im + 1) * sizeof(float *));
95     for (i = 0; i <= im; i++) vv[i] = floatMalloc(n);
96     z = (float **)SUPERLU_MALLOC(im * sizeof(float *));
97     hh = (float **)SUPERLU_MALLOC(im * sizeof(float *));
98     for (i = 0; i < im; i++)
99     {
100 	hh[i] = floatMalloc(i + 2);
101 	z[i] = floatMalloc(n);
102     }
103     c = floatMalloc(im);
104     s = floatMalloc(im);
105     rs = floatMalloc(im + 1);
106 
107     /*---- outer loop starts here ----*/
108     do
109     {
110 	/*---- compute initial residual vector ----*/
111 	smatvec(one, sol, zero, vv[0]);
112 	for (j = 0; j < n; j++)
113 	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */
114 	beta = snrm2_(&n, vv[0], &i_1);
115 
116 	/*---- print info if fits != null ----*/
117 	if (fits != NULL && its == 0)
118 	    fprintf(fits, "%8d   %10.2e\n", its, beta);
119 	/*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/
120 	if ( !(beta > tol * snrm2_(&n, rhs, &i_1)) )
121 	    break;
122 	t = 1.0 / beta;
123 
124 	/*---- normalize: vv[0] = vv[0] / beta ----*/
125 	for (j = 0; j < n; j++)
126 	    vv[0][j] = vv[0][j] * t;
127 	if (its == 0)
128 	    eps1 = tol * beta;
129 
130 	/*---- initialize 1-st term of rhs of hessenberg system ----*/
131 	rs[0] = beta;
132 	for (i = 0; i < im; i++)
133 	{
134 	    its++;
135 	    i1 = i + 1;
136 
137 	    /*------------------------------------------------------------
138 	    |  (Right) Preconditioning Operation   z_{j} = M^{-1} v_{j}
139 	    +-----------------------------------------------------------*/
140 	    if (spsolve)
141 		spsolve(n, z[i], vv[i]);
142 	    else
143 		scopy_(&n, vv[i], &i_1, z[i], &i_1);
144 
145 	    /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
146 	    smatvec(one, z[i], zero, vv[i1]);
147 
148 	    /*------------------------------------------------------------
149 	    |     modified gram - schmidt...
150 	    |     h_{i,j} = (w,v_{i})
151 	    |     w  = w - h_{i,j} v_{i}
152 	    +------------------------------------------------------------*/
153 	    t0 = snrm2_(&n, vv[i1], &i_1);
154 	    for (j = 0; j <= i; j++)
155 	    {
156 		float negt;
157 		tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
158 		hh[i][j] = tt;
159 		negt = -tt;
160 		saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
161 	    }
162 
163 	    /*---- h_{j+1,j} = ||w||_{2} ----*/
164 	    t = snrm2_(&n, vv[i1], &i_1);
165 	    while (t < 0.5 * t0)
166 	    {
167 		t0 = t;
168 		for (j = 0; j <= i; j++)
169 		{
170 		    float negt;
171 		    tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
172 		    hh[i][j] += tt;
173 		    negt = -tt;
174 		    saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
175 		}
176 		t = snrm2_(&n, vv[i1], &i_1);
177 	    }
178 
179 	    hh[i][i1] = t;
180 
181 	    if (t != 0.0)
182 	    {
183 		/*---- v_{j+1} = w / h_{j+1,j} ----*/
184 		t = 1.0 / t;
185 		for (k = 0; k < n; k++)
186 		    vv[i1][k] = vv[i1][k] * t;
187 	    }
188 	    /*---------------------------------------------------
189 	    |     done with modified gram schimdt and arnoldi step
190 	    |     now  update factorization of hh
191 	    +--------------------------------------------------*/
192 
193 	    /*--------------------------------------------------------
194 	    |   perform previous transformations  on i-th column of h
195 	    +-------------------------------------------------------*/
196 	    for (k = 1; k <= i; k++)
197 	    {
198 		k1 = k - 1;
199 		tt = hh[i][k1];
200 		hh[i][k1] = c[k1] * tt + s[k1] * hh[i][k];
201 		hh[i][k] = -s[k1] * tt + c[k1] * hh[i][k];
202 	    }
203 
204 	    gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2));
205 
206 	    /*---------------------------------------------------
207 	    |     if gamma is zero then any small value will do
208 	    |     affect only residual estimate
209 	    +--------------------------------------------------*/
210 	    /* if (gam == 0.0) gam = epsmac; */
211 
212 	    /*---- get next plane rotation ---*/
213 	    if (gam == 0.0)
214 	    {
215 		c[i] = one;
216 		s[i] = zero;
217 	    }
218             else
219 	    {
220 		c[i] = hh[i][i] / gam;
221 		s[i] = hh[i][i1] / gam;
222 	    }
223 
224 	    rs[i1] = -s[i] * rs[i];
225 	    rs[i] = c[i] * rs[i];
226 
227 	    /*----------------------------------------------------
228 	    |   determine residual norm and test for convergence
229 	    +---------------------------------------------------*/
230 	    hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1];
231 	    beta = fabs(rs[i1]);
232 	    if (fits != NULL)
233 		fprintf(fits, "%8d   %10.2e\n", its, beta);
234 	    if (beta <= eps1 || its >= maxits)
235 		break;
236 	}
237 
238 	if (i == im) i--;
239 
240 	/*---- now compute solution. 1st, solve upper triangular system ----*/
241 	rs[i] = rs[i] / hh[i][i];
242 
243 	for (ii = 1; ii <= i; ii++)
244 	{
245 	    k = i - ii;
246 	    k1 = k + 1;
247 	    tt = rs[k];
248 	    for (j = k1; j <= i; j++)
249 		tt = tt - hh[j][k] * rs[j];
250 	    rs[k] = tt / hh[k][k];
251 	}
252 
253 	/*---- linear combination of v[i]'s to get sol. ----*/
254 	for (j = 0; j <= i; j++)
255 	{
256 	    tt = rs[j];
257 	    for (k = 0; k < n; k++)
258 		sol[k] += tt * z[j][k];
259 	}
260 
261 	/* calculate the residual and output */
262 	smatvec(one, sol, zero, vv[0]);
263 	for (j = 0; j < n; j++)
264 	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */
265 
266 	/*---- print info if fits != null ----*/
267 	beta = snrm2_(&n, vv[0], &i_1);
268 
269 	/*---- restart outer loop if needed ----*/
270 	/*if (beta >= eps1 / tol)*/
271 	if ( !(beta < eps1 / tol) )
272 	{
273 	    its = maxits + 10;
274 	    break;
275 	}
276 	if (beta <= eps1)
277 	    break;
278     } while(its < maxits);
279 
280     retval = (its >= maxits);
281     for (i = 0; i <= im; i++)
282 	SUPERLU_FREE(vv[i]);
283     SUPERLU_FREE(vv);
284     for (i = 0; i < im; i++)
285     {
286 	SUPERLU_FREE(hh[i]);
287 	SUPERLU_FREE(z[i]);
288     }
289     SUPERLU_FREE(hh);
290     SUPERLU_FREE(z);
291     SUPERLU_FREE(c);
292     SUPERLU_FREE(s);
293     SUPERLU_FREE(rs);
294 
295     *itmax = its;
296 
297     return retval;
298 } /*----end of fgmr ----*/
299