1 /*
2 * This file is part of CLAPACK, the f2c'ed version of LAPACK
3 * hosted at netlib.org.
4 *
5 * For more information, see the LAPACK User's Guide:
6 *
7 @BOOK{laug,
8 AUTHOR = {Anderson, E. and Bai, Z. and Bischof, C. and
9 Blackford, S. and Demmel, J. and Dongarra, J. and
10 Du Croz, J. and Greenbaum, A. and Hammarling, S. and
11 McKenney, A. and Sorensen, D.},
12 TITLE = {{LAPACK} Users' Guide},
13 EDITION = {Third},
14 PUBLISHER = {Society for Industrial and Applied Mathematics},
15 YEAR = {1999},
16 ADDRESS = {Philadelphia, PA},
17 ISBN = {0-89871-447-8 (paperback)} }
18 *
19 */
20
21 /* GMRES.f -- translated by f2c (version of 20 August 1993 13:15:44).
22 You must link the resulting object file with the libraries:
23 -lf2c -lm (in that order)
24 */
25
26 #include "f2c.h"
27 #include <stdio.h>
28
29 /* Table of constant values */
30
31 static integer c__1 = 1;
32 static doublereal c_b7 = -1.;
33 static doublereal c_b8 = 1.;
34 static doublereal c_b20 = 0.;
35
36 /* -- Iterative template routine --
37 * Univ. of Tennessee and Oak Ridge National Laboratory
38 * October 1, 1993
39 * Details of this algorithm are described in "Templates for the
40 * Solution of Linear Systems: Building Blocks for Iterative
41 * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
42 * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
43 * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
44 *
45 * Purpose
46 * =======
47 *
48 * GMRES solves the linear system Ax = b using the
49 * Generalized Minimal Residual iterative method with preconditioning.
50 *
51 * Convergence test: ( norm( b - A*x ) / norm( b ) ) < TOL.
52 * For other measures, see the above reference.
53 *
54 * Arguments
55 * =========
56 *
57 * N (input) INTEGER.
58 * On entry, the dimension of the matrix.
59 * Unchanged on exit.
60 *
61 * B (input) DOUBLE PRECISION array, dimension N.
62 * On entry, right hand side vector B.
63 * Unchanged on exit.
64 *
65 * X (input/output) DOUBLE PRECISION array, dimension N.
66 * On input, the initial guess; on exit, the iterated solution.
67 *
68 * RESTRT (input) INTEGER
69 * Restart parameter, <= N. This parameter controls the amount
70 * of memory required for matrix H (see WORK and H).
71 *
72 * WORK (workspace) DOUBLE PRECISION array, dimension (LDW,RESTRT+4).
73 *
74 * LDW (input) INTEGER
75 * The leading dimension of the array WORK. LDW >= max(1,N).
76 *
77 * H (workspace) DOUBLE PRECISION array, dimension (LDH,RESTRT+2).
78 * This workspace is used for constructing and storing the
79 * upper Hessenberg matrix. The two extra columns are used to
80 * store the Givens rotation matrices.
81 *
82 * LDH (input) INTEGER
83 * The leading dimension of the array H. LDH >= max(1,RESTRT+1).
84 *
85 * ITER (input/output) INTEGER
86 * On input, the maximum iterations to be performed.
87 * On output, actual number of iterations performed.
88 *
89 * RESID (input/output) DOUBLE PRECISION
90 * On input, the allowable convergence measure for
91 * norm( b - A*x ) / norm( b ).
92 * On output, the final value of this measure.
93 *
94 * MATVEC (external subroutine)
95 * The user must provide a subroutine to perform the
96 * matrix-vector product
97 *
98 * y := alpha*A*x + beta*y,
99 *
100 * where alpha and beta are scalars, x and y are vectors,
101 * and A is a matrix. Vector x must remain unchanged.
102 * The solution is over-written on vector y.
103 *
104 * The call is:
105 *
106 * CALL MATVEC( ALPHA, X, BETA, Y )
107 *
108 * The matrix is passed into the routine in a common block.
109 *
110 * PSOLVE (external subroutine)
111 * The user must provide a subroutine to perform the
112 * preconditioner solve routine for the linear system
113 *
114 * M*x = b,
115 *
116 * where x and b are vectors, and M a matrix. Vector b must
117 * remain unchanged.
118 * The solution is over-written on vector x.
119 *
120 * The call is:
121 *
122 * CALL PSOLVE( X, B )
123 *
124 * The preconditioner is passed into the routine in a common
125 * block.
126 *
127 * INFO (output) INTEGER
128 *
129 * = 0: Successful exit. Iterated approximate solution returned.
130 *
131 * > 0: Convergence to tolerance not achieved. This will be
132 * set to the number of iterations performed.
133 *
134 * < 0: Illegal input parameter.
135 *
136 * -1: matrix dimension N < 0
137 * -2: LDW < N
138 * -3: Maximum number of iterations ITER <= 0.
139 * -4: LDH < RESTRT
140 *
141 * BLAS CALLS: DAXPY, DCOPY, DDOT, DNRM2, DROT, DROTG, DSCAL
142 * ============================================================
143 */
144
145
146 //*****************************************************************
gmres_(n,b,x,restrt,work,ldw,h,ldh,iter,resid,matvec,psolve,info)147 int gmres_(n, b, x, restrt, work, ldw, h, ldh, iter, resid, matvec, psolve,
148 info)
149 integer *n, *restrt, *ldw, *ldh, *iter, *info;
150 doublereal *b, *x, *work, *h, *resid;
151 int (*matvec) (), (*psolve) ();
152 {
153 /* System generated locals */
154 integer work_dim1, work_offset, h_dim1, h_offset, i__1;
155 doublereal d__1;
156
157 /* Local variables */
158 // extern /* Subroutine */ int drot_();
159 static doublereal bnrm2;
160 extern doublereal dnrm2_();
161 static integer i, k, r, s, v, w, y;
162 extern /* Subroutine */ int dscal_(), basis_(), dcopy_(), drot_(), drotg_();
163 static integer maxit;
164 static doublereal rnorm, aa, bb;
165 static integer cs, av, sn;
166 extern /* Subroutine */ int update_();
167 extern /* Subroutine */ int basis_();
168 static doublereal tol;
169
170 /* Parameter adjustments */
171
172 //printf("%d\t%f\t%f\t%d\t%d\t%d\t%d\t%f\n",*n,b[0],x[0],*restrt,*ldw,*ldh,*iter,*resid);
173
174 h_dim1 = *ldh;
175 h_offset = h_dim1 + 1;
176 h -= h_offset;
177 work_dim1 = *ldw;
178 work_offset = work_dim1 + 1;
179 work -= work_offset;
180 --x;
181 --b;
182
183 /* Executable Statements */
184 *info = 0;
185
186 /* Test the input parameters. */
187
188 if (*n < 0) {
189 *info = -1;
190 } else if (*ldw < max(1,*n)) {
191 *info = -2;
192 } else if (*iter <= 0) {
193 *info = -3;
194 } else if (*ldh < *restrt + 1) {
195 *info = -4;
196 }
197 if (*info != 0) {
198 return 0;
199 }
200
201 maxit = *iter;
202 tol = *resid;
203
204 /* Alias workspace columns. */
205
206 r = 1;
207 s = r + 1;
208 w = s + 1;
209 y = w;
210 av = y;
211 v = av + 1;
212
213 /* Store the Givens parameters in matrix H. */
214
215 cs = *restrt + 1;
216 sn = cs + 1;
217
218 /* Set initial residual (AV is temporary workspace here). */
219
220 //printf("llf%d\t%f\n",*n,b[1]);
221 //getchar();
222 dcopy_(n, &b[1], &c__1, &work[av * work_dim1 + 1], &c__1);
223 //for (i=0;i<10;i++) printf("lsf%e\t%e\n",work[av * work_dim1 + 1+i],work[av * work_dim1 + 1+i+work_dim1/2] );
224 //getchar();
225
226 if (dnrm2_(n, &x[1], &c__1) != 0.) {
227 printf("call matvec at line 207\n");
228 /* AV is temporary workspace here. */
229
230 dcopy_(n, &b[1], &c__1, &work[av * work_dim1 + 1], &c__1);
231 (*matvec)(&c_b7, &x[1], &c_b8, &work[av * work_dim1 + 1]);
232 // (*matvec)(&x[1],&work[av * work_dim1 + 1]);
233
234 }
235
236 (*psolve)(&work[r * work_dim1 + 1], &work[av * work_dim1 + 1]);
237
238 bnrm2 = dnrm2_(n, &b[1], &c__1);
239 if (bnrm2 == 0.) {
240 bnrm2 = 1.;
241 }
242 if (dnrm2_(n, &work[r * work_dim1 + 1], &c__1) / bnrm2 < tol) {
243 goto L70;
244 }
245
246 *iter = 0;
247
248 L10:
249
250 i = 0;
251
252 /* Construct the first column of V. */
253
254 dcopy_(n, &work[r * work_dim1 + 1], &c__1, &work[v * work_dim1 + 1], &
255 c__1);
256 rnorm = dnrm2_(n, &work[v * work_dim1 + 1], &c__1);
257 d__1 = 1. / rnorm;
258 dscal_(n, &d__1, &work[v * work_dim1 + 1], &c__1);
259
260 /* Initialize S to the elementary vector E1 scaled by RNORM. */
261
262 work[s * work_dim1 + 1] = rnorm;
263 i__1 = *n;
264 for (k = 2; k <= i__1; ++k) {
265 work[k + s * work_dim1] = 0.;
266 /* L20: */
267 }
268
269 L30:
270
271 ++i;
272 ++(*iter);
273 //int ii;
274 // for (ii=39000;ii<39010;ii++) printf("ggss %e\t%e\n",work[(v + i - 1) * work_dim1 + 1+ii],work[av * work_dim1 + 1+ii] );
275
276 //for(ii=0;ii<10;ii++) printf("work=%f, for i=%d\n",
277 // work[(v + i - 1) * work_dim1+1+ii],(v + i - 1) * work_dim1+1+ii);
278
279
280 //printf("call iteration at 253, index start at %d, last of work is %f\n",
281 // (v + i - 1) * work_dim1 + 1, work[(v + i - 1) * work_dim1 + 2*30000]);
282 //work[(v + i - 1) * work_dim1 + 1]=10;
283 (*matvec)(&c_b8, &work[(v + i - 1) * work_dim1 + 1], &c_b20, &work[av *
284 work_dim1 + 1]);
285
286 //(*matvec)(&work[(v + i - 1) * work_dim1 + 1], &work[av *work_dim1 + 1]);
287 //int ii;
288 // for (ii=39000;ii<39010;ii++) printf("llff %e\t%e\n",work[(v + i - 1) * work_dim1 + 1+ii],work[av * work_dim1 + 1+ii] );
289 //getchar();
290
291
292 (*psolve)(&work[w * work_dim1 + 1], &work[av * work_dim1 + 1]);
293
294 /* Construct I-th column of H orthnormal to the previous */
295 /* I-1 columns. */
296
297 basis_(&i, n, &h[i * h_dim1 + 1], &work[v * work_dim1 + 1], ldw, &work[w *
298 work_dim1 + 1]);
299
300 /* Apply Givens rotations to the I-th column of H. This */
301 /* "updating" of the QR factorization effectively reduces */
302 /* the Hessenberg matrix to upper triangular form during */
303 /* the RESTRT iterations. */
304
305 i__1 = i - 1;
306 for (k = 1; k <= i__1; ++k) {
307 drot_(&c__1, &h[k + i * h_dim1], ldh, &h[k + 1 + i * h_dim1], ldh, &h[
308 k + cs * h_dim1], &h[k + sn * h_dim1]);
309 /* L40: */
310 }
311
312 /* Construct the I-th rotation matrix, and apply it to H so that
313 */
314 /* H(I+1,I) = 0. */
315
316 aa = h[i + i * h_dim1];
317 bb = h[i + 1 + i * h_dim1];
318 drotg_(&aa, &bb, &h[i + cs * h_dim1], &h[i + sn * h_dim1]);
319 drot_(&c__1, &h[i + i * h_dim1], ldh, &h[i + 1 + i * h_dim1], ldh, &h[i +
320 cs * h_dim1], &h[i + sn * h_dim1]);
321
322 /* Apply the I-th rotation matrix to [ S(I), S(I+1) ]'. This */
323 /* gives an approximation of the residual norm. If less than */
324 /* tolerance, update the approximation vector X and quit. */
325
326 drot_(&c__1, &work[i + s * work_dim1], ldw, &work[i + 1 + s * work_dim1],
327 ldw, &h[i + cs * h_dim1], &h[i + sn * h_dim1]);
328 *resid = (d__1 = work[i + 1 + s * work_dim1], abs(d__1)) / bnrm2;
329
330 printf("iteration no.=%ld, error=%e\n", *iter, *resid);
331
332 if (*resid <= tol) {
333 update_(&i, n, &x[1], &h[h_offset], ldh, &work[y * work_dim1 + 1], &
334 work[s * work_dim1 + 1], &work[v * work_dim1 + 1], ldw);
335 goto L70;
336 }
337 if (*iter == maxit) {
338 goto L50;
339 }
340 if (i < *restrt) {
341 goto L30;
342 }
343
344 L50:
345
346 /* Compute current solution vector X. */
347
348 update_(restrt, n, &x[1], &h[h_offset], ldh, &work[y * work_dim1 + 1], &
349 work[s * work_dim1 + 1], &work[v * work_dim1 + 1], ldw);
350
351 /* Compute residual vector R, find norm, then check for tolerance.
352 */
353 /* (AV is temporary workspace here.) */
354
355 dcopy_(n, &b[1], &c__1, &work[av * work_dim1 + 1], &c__1);
356 //printf("call matvec at line 327\n");
357 (*matvec)(&c_b7, &x[1], &c_b8, &work[av * work_dim1 + 1]);
358 // (*matvec)(&x[1], &work[av * work_dim1 + 1]);
359 (*psolve)(&work[r * work_dim1 + 1], &work[av * work_dim1 + 1]);
360 work[i + 1 + s * work_dim1] = dnrm2_(n, &work[r * work_dim1 + 1], &c__1);
361 *resid = work[i + 1 + s * work_dim1] / bnrm2;
362 if (*resid <= tol) {
363 goto L70;
364 }
365 if (*iter == maxit) {
366 goto L60;
367 }
368
369 /* Restart. */
370
371 goto L10;
372
373 L60:
374
375 /* Iteration fails. */
376
377 *info = 1;
378 return 0;
379
380 L70:
381
382 /* Iteration successful; return. */
383
384 return 0;
385
386 /* End of GMRES */
387
388 } /* gmres_ */
389
390
391 /* =============================================================== */
update_(i,n,x,h,ldh,y,s,v,ldv)392 /* Subroutine */ int update_(i, n, x, h, ldh, y, s, v, ldv)
393 integer *i, *n;
394 doublereal *x, *h;
395 integer *ldh;
396 doublereal *y, *s, *v;
397 integer *ldv;
398 {
399 /* System generated locals */
400 integer h_dim1, h_offset, v_dim1, v_offset;
401
402 /* Local variables */
403 extern /* Subroutine */ int dgemv_(), dcopy_(), dtrsv_();
404
405 /* This routine updates the GMRES iterated solution approximation. */
406
407
408 /* .. Executable Statements .. */
409
410 /* Solve H*Y = S for upper triangualar H. */
411
412 /* Parameter adjustments */
413 v_dim1 = *ldv;
414 v_offset = v_dim1 + 1;
415 v -= v_offset;
416 --s;
417 --y;
418 h_dim1 = *ldh;
419 h_offset = h_dim1 + 1;
420 h -= h_offset;
421 --x;
422
423 /* Function Body */
424 dcopy_(i, &s[1], &c__1, &y[1], &c__1);
425 dtrsv_("UPPER", "NOTRANS", "NONUNIT", i, &h[h_offset], ldh, &y[1], &c__1,
426 5L, 7L, 7L);
427
428 /* Compute current solution vector X = X + V*Y. */
429
430 dgemv_("NOTRANS", n, i, &c_b8, &v[v_offset], ldv, &y[1], &c__1, &c_b8, &x[
431 1], &c__1, 7L);
432
433 return 0;
434
435 } /* update_ */
436
437
438 /* ========================================================= */
basis_(i,n,h,v,ldv,w)439 /* Subroutine */ int basis_(i, n, h, v, ldv, w)
440 integer *i, *n;
441 doublereal *h, *v;
442 integer *ldv;
443 doublereal *w;
444 {
445 /* System generated locals */
446 integer v_dim1, v_offset, i__1;
447 doublereal d__1;
448
449 /* Local variables */
450 extern doublereal ddot_(), dnrm2_();
451 static integer k;
452 extern /* Subroutine */ int dscal_(), dcopy_(), daxpy_();
453
454
455
456 /* Construct the I-th column of the upper Hessenberg matrix H */
457 /* using the Gram-Schmidt process on V and W. */
458
459
460 /* Parameter adjustments */
461 --w;
462 v_dim1 = *ldv;
463 v_offset = v_dim1 + 1;
464 v -= v_offset;
465 --h;
466
467 /* Function Body */
468 i__1 = *i;
469 for (k = 1; k <= i__1; ++k) {
470 h[k] = ddot_(n, &w[1], &c__1, &v[k * v_dim1 + 1], &c__1);
471 d__1 = -h[k];
472 daxpy_(n, &d__1, &v[k * v_dim1 + 1], &c__1, &w[1], &c__1);
473 /* L10: */
474 }
475 h[*i + 1] = dnrm2_(n, &w[1], &c__1);
476 dcopy_(n, &w[1], &c__1, &v[(*i + 1) * v_dim1 + 1], &c__1);
477 d__1 = 1. / h[*i + 1];
478 dscal_(n, &d__1, &v[(*i + 1) * v_dim1 + 1], &c__1);
479
480 return 0;
481
482 }
483
484
485