1 /*
2  * This file is part of CLAPACK, the f2c'ed version of LAPACK
3  * hosted at netlib.org.
4  *
5  * For more information, see the LAPACK User's Guide:
6  *
7   @BOOK{laug,
8         AUTHOR = {Anderson, E. and Bai, Z. and Bischof, C. and
9                   Blackford, S. and Demmel, J. and Dongarra, J. and
10                   Du Croz, J. and Greenbaum, A. and Hammarling, S. and
11                   McKenney, A. and Sorensen, D.},
12         TITLE = {{LAPACK} Users' Guide},
13         EDITION = {Third},
14         PUBLISHER = {Society for Industrial and Applied Mathematics},
15         YEAR = {1999},
16         ADDRESS = {Philadelphia, PA},
17         ISBN = {0-89871-447-8 (paperback)}  }
18  *
19  */
20 
21 /* GMRES.f -- translated by f2c (version of 20 August 1993  13:15:44).
22    You must link the resulting object file with the libraries:
23 	-lf2c -lm   (in that order)
24  */
25 
26 #include "f2c.h"
27 #include <stdio.h>
28 
29 /* Table of constant values */
30 
31 static integer c__1 = 1;
32 static doublereal c_b7 = -1.;
33 static doublereal c_b8 = 1.;
34 static doublereal c_b20 = 0.;
35 
36 /*  -- Iterative template routine --
37 *     Univ. of Tennessee and Oak Ridge National Laboratory
38 *     October 1, 1993
39 *     Details of this algorithm are described in "Templates for the
40 *     Solution of Linear Systems: Building Blocks for Iterative
41 *     Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra,
42 *     Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications,
43 *     1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps).
44 *
45 *  Purpose
46 *  =======
47 *
48 *  GMRES solves the linear system Ax = b using the
49 *  Generalized Minimal Residual iterative method with preconditioning.
50 *
51 *  Convergence test: ( norm( b - A*x ) / norm( b ) ) < TOL.
52 *  For other measures, see the above reference.
53 *
54 *  Arguments
55 *  =========
56 *
57 *  N       (input) INTEGER.
58 *          On entry, the dimension of the matrix.
59 *          Unchanged on exit.
60 *
61 *  B       (input) DOUBLE PRECISION array, dimension N.
62 *          On entry, right hand side vector B.
63 *          Unchanged on exit.
64 *
65 *  X       (input/output) DOUBLE PRECISION array, dimension N.
66 *          On input, the initial guess; on exit, the iterated solution.
67 *
68 *  RESTRT  (input) INTEGER
69 *          Restart parameter, <= N. This parameter controls the amount
70 *          of memory required for matrix H (see WORK and H).
71 *
72 *  WORK    (workspace) DOUBLE PRECISION array, dimension (LDW,RESTRT+4).
73 *
74 *  LDW     (input) INTEGER
75 *          The leading dimension of the array WORK. LDW >= max(1,N).
76 *
77 *  H       (workspace) DOUBLE PRECISION array, dimension (LDH,RESTRT+2).
78 *          This workspace is used for constructing and storing the
79 *          upper Hessenberg matrix. The two extra columns are used to
80 *          store the Givens rotation matrices.
81 *
82 *  LDH    (input) INTEGER
83 *          The leading dimension of the array H. LDH >= max(1,RESTRT+1).
84 *
85 *  ITER    (input/output) INTEGER
86 *          On input, the maximum iterations to be performed.
87 *          On output, actual number of iterations performed.
88 *
89 *  RESID   (input/output) DOUBLE PRECISION
90 *          On input, the allowable convergence measure for
91 *          norm( b - A*x ) / norm( b ).
92 *          On output, the final value of this measure.
93 *
94 *  MATVEC  (external subroutine)
95 *          The user must provide a subroutine to perform the
96 *          matrix-vector product
97 *
98 *               y := alpha*A*x + beta*y,
99 *
100 *          where alpha and beta are scalars, x and y are vectors,
101 *          and A is a matrix. Vector x must remain unchanged.
102 *          The solution is over-written on vector y.
103 *
104 *          The call is:
105 *
106 *             CALL MATVEC( ALPHA, X, BETA, Y )
107 *
108 *          The matrix is passed into the routine in a common block.
109 *
110 *  PSOLVE  (external subroutine)
111 *          The user must provide a subroutine to perform the
112 *          preconditioner solve routine for the linear system
113 *
114 *               M*x = b,
115 *
116 *          where x and b are vectors, and M a matrix. Vector b must
117 *          remain unchanged.
118 *          The solution is over-written on vector x.
119 *
120 *          The call is:
121 *
122 *             CALL PSOLVE( X, B )
123 *
124 *          The preconditioner is passed into the routine in a common
125 *          block.
126 *
127 *  INFO    (output) INTEGER
128 *
129 *          =  0: Successful exit. Iterated approximate solution returned.
130 *
131 *          >  0: Convergence to tolerance not achieved. This will be
132 *                set to the number of iterations performed.
133 *
134 *          <  0: Illegal input parameter.
135 *
136 *                   -1: matrix dimension N < 0
137 *                   -2: LDW < N
138 *                   -3: Maximum number of iterations ITER <= 0.
139 *                   -4: LDH < RESTRT
140 *
141 *  BLAS CALLS:   DAXPY, DCOPY, DDOT, DNRM2, DROT, DROTG, DSCAL
142 *  ============================================================
143 */
144 
145 
146 //*****************************************************************
gmres_(n,b,x,restrt,work,ldw,h,ldh,iter,resid,matvec,psolve,info)147 int gmres_(n, b, x, restrt, work, ldw, h, ldh, iter, resid, matvec, psolve,
148            info)
149    integer *n, *restrt, *ldw, *ldh, *iter, *info;
150    doublereal *b, *x, *work, *h, *resid;
151    int (*matvec) (), (*psolve) ();
152 {
153     /* System generated locals */
154     integer work_dim1, work_offset, h_dim1, h_offset, i__1;
155     doublereal d__1;
156 
157     /* Local variables */
158 //    extern /* Subroutine */ int drot_();
159     static doublereal bnrm2;
160     extern doublereal dnrm2_();
161     static integer i, k, r, s, v, w, y;
162     extern /* Subroutine */ int dscal_(), basis_(), dcopy_(), drot_(), drotg_();
163     static integer maxit;
164     static doublereal rnorm, aa, bb;
165     static integer cs, av, sn;
166     extern /* Subroutine */ int update_();
167     extern /* Subroutine */ int basis_();
168     static doublereal tol;
169 
170     /* Parameter adjustments */
171 
172     //printf("%d\t%f\t%f\t%d\t%d\t%d\t%d\t%f\n",*n,b[0],x[0],*restrt,*ldw,*ldh,*iter,*resid);
173 
174     h_dim1 = *ldh;
175     h_offset = h_dim1 + 1;
176     h -= h_offset;
177     work_dim1 = *ldw;
178     work_offset = work_dim1 + 1;
179     work -= work_offset;
180     --x;
181     --b;
182 
183     /* Executable Statements */
184     *info = 0;
185 
186 /*     Test the input parameters. */
187 
188     if (*n < 0) {
189 	*info = -1;
190     } else if (*ldw < max(1,*n)) {
191 	*info = -2;
192     } else if (*iter <= 0) {
193 	*info = -3;
194     } else if (*ldh < *restrt + 1) {
195 	*info = -4;
196     }
197     if (*info != 0) {
198 	return 0;
199     }
200 
201     maxit = *iter;
202     tol = *resid;
203 
204 /*     Alias workspace columns. */
205 
206     r = 1;
207     s = r + 1;
208     w = s + 1;
209     y = w;
210     av = y;
211     v = av + 1;
212 
213 /*     Store the Givens parameters in matrix H. */
214 
215     cs = *restrt + 1;
216     sn = cs + 1;
217 
218 /*     Set initial residual (AV is temporary workspace here). */
219 
220 	//printf("llf%d\t%f\n",*n,b[1]);
221 	//getchar();
222     dcopy_(n, &b[1], &c__1, &work[av * work_dim1 + 1], &c__1);
223 	//for (i=0;i<10;i++) printf("lsf%e\t%e\n",work[av * work_dim1 + 1+i],work[av * work_dim1 + 1+i+work_dim1/2] );
224 	//getchar();
225 
226     if (dnrm2_(n, &x[1], &c__1) != 0.) {
227 printf("call matvec at line 207\n");
228 /*        AV is temporary workspace here. */
229 
230 		dcopy_(n, &b[1], &c__1, &work[av * work_dim1 + 1], &c__1);
231 		(*matvec)(&c_b7, &x[1], &c_b8, &work[av * work_dim1 + 1]);
232 //		(*matvec)(&x[1],&work[av * work_dim1 + 1]);
233 
234 	}
235 
236     (*psolve)(&work[r * work_dim1 + 1], &work[av * work_dim1 + 1]);
237 
238     bnrm2 = dnrm2_(n, &b[1], &c__1);
239     if (bnrm2 == 0.) {
240 	bnrm2 = 1.;
241     }
242     if (dnrm2_(n, &work[r * work_dim1 + 1], &c__1) / bnrm2 < tol) {
243 	goto L70;
244     }
245 
246     *iter = 0;
247 
248 L10:
249 
250     i = 0;
251 
252 /*        Construct the first column of V. */
253 
254     dcopy_(n, &work[r * work_dim1 + 1], &c__1, &work[v * work_dim1 + 1], &
255 	    c__1);
256     rnorm = dnrm2_(n, &work[v * work_dim1 + 1], &c__1);
257     d__1 = 1. / rnorm;
258     dscal_(n, &d__1, &work[v * work_dim1 + 1], &c__1);
259 
260 /*        Initialize S to the elementary vector E1 scaled by RNORM. */
261 
262     work[s * work_dim1 + 1] = rnorm;
263     i__1 = *n;
264     for (k = 2; k <= i__1; ++k) {
265 	work[k + s * work_dim1] = 0.;
266 /* L20: */
267     }
268 
269 L30:
270 
271     ++i;
272     ++(*iter);
273 //int ii;
274 //	for (ii=39000;ii<39010;ii++) printf("ggss %e\t%e\n",work[(v + i - 1) * work_dim1 + 1+ii],work[av * work_dim1 + 1+ii] );
275 
276 //for(ii=0;ii<10;ii++) printf("work=%f, for i=%d\n",
277 //    work[(v + i - 1) * work_dim1+1+ii],(v + i - 1) * work_dim1+1+ii);
278 
279 
280 //printf("call iteration at 253, index start at %d, last of work is %f\n",
281 //       (v + i - 1) * work_dim1 + 1, work[(v + i - 1) * work_dim1 + 2*30000]);
282 //work[(v + i - 1) * work_dim1 + 1]=10;
283        (*matvec)(&c_b8, &work[(v + i - 1) * work_dim1 + 1], &c_b20, &work[av *
284 	    work_dim1 + 1]);
285 
286 	//(*matvec)(&work[(v + i - 1) * work_dim1 + 1], &work[av *work_dim1 + 1]);
287 //int ii;
288 //	for (ii=39000;ii<39010;ii++) printf("llff %e\t%e\n",work[(v + i - 1) * work_dim1 + 1+ii],work[av * work_dim1 + 1+ii] );
289 	//getchar();
290 
291 
292 	(*psolve)(&work[w * work_dim1 + 1], &work[av * work_dim1 + 1]);
293 
294 /*           Construct I-th column of H orthnormal to the previous */
295 /*           I-1 columns. */
296 
297     basis_(&i, n, &h[i * h_dim1 + 1], &work[v * work_dim1 + 1], ldw, &work[w *
298 	     work_dim1 + 1]);
299 
300 /*           Apply Givens rotations to the I-th column of H. This */
301 /*           "updating" of the QR factorization effectively reduces */
302 /*           the Hessenberg matrix to upper triangular form during */
303 /*           the RESTRT iterations. */
304 
305     i__1 = i - 1;
306     for (k = 1; k <= i__1; ++k) {
307 	drot_(&c__1, &h[k + i * h_dim1], ldh, &h[k + 1 + i * h_dim1], ldh, &h[
308 		k + cs * h_dim1], &h[k + sn * h_dim1]);
309 /* L40: */
310     }
311 
312 /*           Construct the I-th rotation matrix, and apply it to H so that
313  */
314 /*           H(I+1,I) = 0. */
315 
316     aa = h[i + i * h_dim1];
317     bb = h[i + 1 + i * h_dim1];
318     drotg_(&aa, &bb, &h[i + cs * h_dim1], &h[i + sn * h_dim1]);
319     drot_(&c__1, &h[i + i * h_dim1], ldh, &h[i + 1 + i * h_dim1], ldh, &h[i +
320 	    cs * h_dim1], &h[i + sn * h_dim1]);
321 
322 /*           Apply the I-th rotation matrix to [ S(I), S(I+1) ]'. This */
323 /*           gives an approximation of the residual norm. If less than */
324 /*           tolerance, update the approximation vector X and quit. */
325 
326     drot_(&c__1, &work[i + s * work_dim1], ldw, &work[i + 1 + s * work_dim1],
327 	    ldw, &h[i + cs * h_dim1], &h[i + sn * h_dim1]);
328     *resid = (d__1 = work[i + 1 + s * work_dim1], abs(d__1)) / bnrm2;
329 
330 	printf("iteration no.=%ld, error=%e\n", *iter, *resid);
331 
332     if (*resid <= tol) {
333 	update_(&i, n, &x[1], &h[h_offset], ldh, &work[y * work_dim1 + 1], &
334 		work[s * work_dim1 + 1], &work[v * work_dim1 + 1], ldw);
335 	goto L70;
336     }
337     if (*iter == maxit) {
338 	goto L50;
339     }
340     if (i < *restrt) {
341 	goto L30;
342     }
343 
344 L50:
345 
346 /*        Compute current solution vector X. */
347 
348     update_(restrt, n, &x[1], &h[h_offset], ldh, &work[y * work_dim1 + 1], &
349 	    work[s * work_dim1 + 1], &work[v * work_dim1 + 1], ldw);
350 
351 /*        Compute residual vector R, find norm, then check for tolerance.
352 */
353 /*        (AV is temporary workspace here.) */
354 
355     dcopy_(n, &b[1], &c__1, &work[av * work_dim1 + 1], &c__1);
356 //printf("call matvec at line 327\n");
357     (*matvec)(&c_b7, &x[1], &c_b8, &work[av * work_dim1 + 1]);
358 //	(*matvec)(&x[1], &work[av * work_dim1 + 1]);
359     (*psolve)(&work[r * work_dim1 + 1], &work[av * work_dim1 + 1]);
360     work[i + 1 + s * work_dim1] = dnrm2_(n, &work[r * work_dim1 + 1], &c__1);
361     *resid = work[i + 1 + s * work_dim1] / bnrm2;
362     if (*resid <= tol) {
363 	goto L70;
364     }
365     if (*iter == maxit) {
366 	goto L60;
367     }
368 
369 /*        Restart. */
370 
371     goto L10;
372 
373 L60:
374 
375 /*     Iteration fails. */
376 
377     *info = 1;
378     return 0;
379 
380 L70:
381 
382 /*     Iteration successful; return. */
383 
384     return 0;
385 
386 /*     End of GMRES */
387 
388 } /* gmres_ */
389 
390 
391 /*     =============================================================== */
update_(i,n,x,h,ldh,y,s,v,ldv)392 /* Subroutine */ int update_(i, n, x, h, ldh, y, s, v, ldv)
393 integer *i, *n;
394 doublereal *x, *h;
395 integer *ldh;
396 doublereal *y, *s, *v;
397 integer *ldv;
398 {
399     /* System generated locals */
400     integer h_dim1, h_offset, v_dim1, v_offset;
401 
402     /* Local variables */
403     extern /* Subroutine */ int dgemv_(), dcopy_(), dtrsv_();
404 
405 /*     This routine updates the GMRES iterated solution approximation. */
406 
407 
408 /*     .. Executable Statements .. */
409 
410 /*     Solve H*Y = S for upper triangualar H. */
411 
412     /* Parameter adjustments */
413     v_dim1 = *ldv;
414     v_offset = v_dim1 + 1;
415     v -= v_offset;
416     --s;
417     --y;
418     h_dim1 = *ldh;
419     h_offset = h_dim1 + 1;
420     h -= h_offset;
421     --x;
422 
423     /* Function Body */
424     dcopy_(i, &s[1], &c__1, &y[1], &c__1);
425     dtrsv_("UPPER", "NOTRANS", "NONUNIT", i, &h[h_offset], ldh, &y[1], &c__1,
426 	    5L, 7L, 7L);
427 
428 /*     Compute current solution vector X = X + V*Y. */
429 
430     dgemv_("NOTRANS", n, i, &c_b8, &v[v_offset], ldv, &y[1], &c__1, &c_b8, &x[
431 	    1], &c__1, 7L);
432 
433 		return 0;
434 
435 } /* update_ */
436 
437 
438 /*     ========================================================= */
basis_(i,n,h,v,ldv,w)439 /* Subroutine */ int basis_(i, n, h, v, ldv, w)
440 integer *i, *n;
441 doublereal *h, *v;
442 integer *ldv;
443 doublereal *w;
444 {
445     /* System generated locals */
446     integer v_dim1, v_offset, i__1;
447     doublereal d__1;
448 
449     /* Local variables */
450     extern doublereal ddot_(), dnrm2_();
451     static integer k;
452     extern /* Subroutine */ int dscal_(), dcopy_(), daxpy_();
453 
454 
455 
456 /*     Construct the I-th column of the upper Hessenberg matrix H */
457 /*     using the Gram-Schmidt process on V and W. */
458 
459 
460     /* Parameter adjustments */
461     --w;
462     v_dim1 = *ldv;
463     v_offset = v_dim1 + 1;
464     v -= v_offset;
465     --h;
466 
467     /* Function Body */
468     i__1 = *i;
469     for (k = 1; k <= i__1; ++k) {
470 	h[k] = ddot_(n, &w[1], &c__1, &v[k * v_dim1 + 1], &c__1);
471 	d__1 = -h[k];
472 	daxpy_(n, &d__1, &v[k * v_dim1 + 1], &c__1, &w[1], &c__1);
473 /* L10: */
474     }
475     h[*i + 1] = dnrm2_(n, &w[1], &c__1);
476     dcopy_(n, &w[1], &c__1, &v[(*i + 1) * v_dim1 + 1], &c__1);
477     d__1 = 1. / h[*i + 1];
478     dscal_(n, &d__1, &v[(*i + 1) * v_dim1 + 1], &c__1);
479 
480     return 0;
481 
482 }
483 
484 
485