1 /* Copyright (C) 2005 The Scalable Software Infrastructure Project. All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions are met:
5 1. Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7 2. Redistributions in binary form must reproduce the above copyright
8 notice, this list of conditions and the following disclaimer in the
9 documentation and/or other materials provided with the distribution.
10 3. Neither the name of the project nor the names of its contributors
11 may be used to endorse or promote products derived from this software
12 without specific prior written permission.
13
14 THIS SOFTWARE IS PROVIDED BY THE SCALABLE SOFTWARE INFRASTRUCTURE PROJECT
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE SCALABLE SOFTWARE INFRASTRUCTURE
18 PROJECT BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
19 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include "lis_config.h"
29 #else
30 #ifdef HAVE_CONFIG_WIN_H
31 #include "lis_config_win.h"
32 #endif
33 #endif
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #ifdef HAVE_MALLOC_H
38 #include <malloc.h>
39 #endif
40 #include <string.h>
41 #include <stdarg.h>
42 #ifdef _OPENMP
43 #include <omp.h>
44 #endif
45 #ifdef USE_MPI
46 #include <mpi.h>
47 #endif
48 #include "lislib.h"
49
50 /********************************************
51 * Preconditioned BiConjugate Gradient Safe *
52 ********************************************
53 r(0) = b - Ax(0)
54 rtld(0) = conj(r(0)) or random
55 mr(0) = M^-1 * r(0)
56 amr(0) = A * mr(0)
57 p(0) = mr(0)
58 ap(0) = amr(0)
59 rho(0) = <rtld,r(0)>
60 ********************************************
61 for k=1,2,...
62 tmpdot0 = <ap(k-1),rtld(0)>
63 alpha = rho(k-1) / tmpdot0
64 tmpdot0 = <y(k-1),y(k-1)>
65 tmpdot1 = <amr(k-1),r(k-1)>
66 tmpdot2 = <y(k-1),r(k-1)>
67 tmpdot3 = <amr(k-1),y(k-1)>
68 tmpdot4 = <amr(k-1),amr(k-1)>
69 tmp = tmpdot4*tmpdot0-tmpdot3*tmpdot3
70 qsi = (tmpdot0*tmpdot1-tmpdot2*tmpdot3) / tmp
71 eta = (tmpdot4*tmpdot2-tmpdot3*tmpdot1) / tmp
72 t(k-1) = qsi*ap(k-1) + eta*y(k-1)
73 mt(k-1) = M^-1 * t(k-1)
74 u(k-1) = mt(k-1) + eta*beta*u(k-2)
75 au(k-1) = A * u(k-1)
76 z(k-1) = qsi*mr(k-1) + eta*z(k-2) - alpha*u(k-1)
77 y(k) = qsi*amr(k-1) + eta*y(k-1) - alpha*au(k-1)
78 x(k) = x(k-1) + alpha*p(k-1) + z(k-1)
79 r(k) = r(k-1) - alpha*ap(k-1) - y(k)
80 mr(k) = M^-1 * r(k)
81 amr(k) = A * mr(k)
82 rho(k) = <rtld,r(k)>
83 beta = (rho(k) / rho(k-1)) * (alpha / qsi)
84 p(k) = mr(k) + beta*(p(k-1) - u(k-1))
85 ap(k-1) = amr(k) + beta*(ap(k-1) - au(k-1))
86 ********************************************/
87
88 #define NWORK 12
89 #undef __FUNC__
90 #define __FUNC__ "lis_bicgsafe_check_params"
lis_bicgsafe_check_params(LIS_SOLVER solver)91 LIS_INT lis_bicgsafe_check_params(LIS_SOLVER solver)
92 {
93 LIS_DEBUG_FUNC_IN;
94 LIS_DEBUG_FUNC_OUT;
95 return LIS_SUCCESS;
96 }
97
98 #undef __FUNC__
99 #define __FUNC__ "lis_bicgsafe_malloc_work"
lis_bicgsafe_malloc_work(LIS_SOLVER solver)100 LIS_INT lis_bicgsafe_malloc_work(LIS_SOLVER solver)
101 {
102 LIS_VECTOR *work;
103 LIS_INT i,j,worklen,err;
104
105 LIS_DEBUG_FUNC_IN;
106
107 worklen = NWORK;
108 work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_bicgsafe_malloc_work::work" );
109 if( work==NULL )
110 {
111 LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
112 return LIS_ERR_OUT_OF_MEMORY;
113 }
114 if( solver->precision==LIS_PRECISION_DEFAULT )
115 {
116 for(i=0;i<worklen;i++)
117 {
118 err = lis_vector_duplicate(solver->A,&work[i]);
119 if( err ) break;
120 }
121 }
122 else
123 {
124 for(i=0;i<worklen;i++)
125 {
126 err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]);
127 if( err ) break;
128 }
129 }
130 if( i<worklen )
131 {
132 for(j=0;j<i;j++) lis_vector_destroy(work[j]);
133 lis_free(work);
134 return err;
135 }
136 solver->worklen = worklen;
137 solver->work = work;
138
139 LIS_DEBUG_FUNC_OUT;
140 return LIS_SUCCESS;
141 }
142
143 #undef __FUNC__
144 #define __FUNC__ "lis_bicgsafe"
lis_bicgsafe(LIS_SOLVER solver)145 LIS_INT lis_bicgsafe(LIS_SOLVER solver)
146 {
147 LIS_Comm comm;
148 LIS_MATRIX A;
149 LIS_VECTOR x;
150 LIS_VECTOR r, rtld, mr, amr, t, mt, p, ap;
151 LIS_VECTOR y, u, au, z;
152 LIS_SCALAR alpha, beta;
153 LIS_SCALAR rho, rho_old;
154 LIS_SCALAR qsi, eta;
155 LIS_SCALAR tmp, tmpdot[5];
156 LIS_REAL bnrm2, nrm2, tol;
157 LIS_INT iter,maxiter,output,conv;
158 double time,ptime;
159
160 LIS_DEBUG_FUNC_IN;
161
162 comm = LIS_COMM_WORLD;
163
164 A = solver->A;
165 x = solver->x;
166 maxiter = solver->options[LIS_OPTIONS_MAXITER];
167 output = solver->options[LIS_OPTIONS_OUTPUT];
168 conv = solver->options[LIS_OPTIONS_CONV_COND];
169 ptime = 0.0;
170
171 rtld = solver->work[0];
172 r = solver->work[1];
173 mr = solver->work[2];
174 amr = solver->work[3];
175 p = solver->work[4];
176 ap = solver->work[5];
177 t = solver->work[6];
178 mt = solver->work[7];
179 y = solver->work[8];
180 u = solver->work[9];
181 z = solver->work[10];
182 au = solver->work[11];
183
184
185 /* Initial Residual */
186 if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
187 {
188 LIS_DEBUG_FUNC_OUT;
189 return LIS_SUCCESS;
190 }
191 tol = solver->tol;
192
193 lis_solver_set_shadowresidual(solver,r,rtld);
194
195 time = lis_wtime();
196 lis_psolve(solver, r, mr);
197 ptime += lis_wtime()-time;
198 lis_matvec(A,mr,amr);
199 lis_vector_dot(rtld,r,&rho_old);
200 lis_vector_copy(amr,ap);
201 lis_vector_copy(mr,p);
202 beta = 0.0;
203
204
205 for( iter=1; iter<=maxiter; iter++ )
206 {
207 /* tmpdot[0] = <rtld,ap> */
208 /* alpha = rho_old / tmpdot[0] */
209 lis_vector_dot(rtld,ap,&tmpdot[0]);
210 alpha = rho_old / tmpdot[0];
211
212
213 /* tmpdot[0] = <y,y> */
214 /* tmpdot[1] = <amr,r> */
215 /* tmpdot[2] = <y,r> */
216 /* tmpdot[3] = <amr,y> */
217 /* tmpdot[4] = <amr,amr> */
218 lis_vector_dot(y,y,&tmpdot[0]);
219 lis_vector_dot(amr,r,&tmpdot[1]);
220 lis_vector_dot(y,r,&tmpdot[2]);
221 lis_vector_dot(amr,y,&tmpdot[3]);
222 lis_vector_dot(amr,amr,&tmpdot[4]);
223 if(iter==1)
224 {
225 qsi = tmpdot[1] / tmpdot[4];
226 eta = 0.0;
227 }
228 else
229 {
230 tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3];
231 qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp;
232 eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp;
233 }
234
235 /* t = qsi*ap + eta*y */
236 lis_vector_copy(y,t);
237 lis_vector_scale(eta,t);
238 lis_vector_axpy(qsi,ap,t);
239
240 /* mt = M^-1 * t */
241 time = lis_wtime();
242 lis_psolve(solver, t, mt);
243 ptime += lis_wtime()-time;
244
245 /* u = mt + eta*beta*u */
246 /* au = A * u */
247 lis_vector_xpay(mt,eta*beta,u);
248 lis_matvec(A,u,au);
249
250 /* z = qsi*mr + eta*z - alpha*u */
251 lis_vector_scale(eta,z);
252 lis_vector_axpy(qsi,mr,z);
253 lis_vector_axpy(-alpha,u,z);
254
255 /* y = qsi*amr + eta*y - alpha*au */
256 lis_vector_scale(eta,y);
257 lis_vector_axpy(qsi,amr,y);
258 lis_vector_axpy(-alpha,au,y);
259
260 /* x = x + alpha*p + z */
261 lis_vector_axpy(alpha,p,x);
262 lis_vector_axpy(1.0,z,x);
263
264 /* r = r - alpha*ap - y */
265 lis_vector_axpy(-alpha,ap,r);
266 lis_vector_axpy(-1.0,y,r);
267
268 /* convergence check */
269 lis_solver_get_residual[conv](r,solver,&nrm2);
270 if( output )
271 {
272 if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
273 if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
274 }
275
276 if( tol >= nrm2 )
277 {
278 solver->retcode = LIS_SUCCESS;
279 solver->iter = iter;
280 solver->resid = nrm2;
281 solver->ptime = ptime;
282 LIS_DEBUG_FUNC_OUT;
283 return LIS_SUCCESS;
284 }
285
286 /* rho = <rtld,r> */
287 lis_vector_dot(rtld,r,&rho);
288 if( rho==0.0 )
289 {
290 solver->retcode = LIS_BREAKDOWN;
291 solver->iter = iter;
292 solver->resid = nrm2;
293 LIS_DEBUG_FUNC_OUT;
294 return LIS_BREAKDOWN;
295 }
296
297 /* beta = (rho / rho_old) * (alpha / qsi) */
298 beta = (rho / rho_old) * (alpha / qsi);
299
300 /* mr = M^-1 * r */
301 /* amr = A * mr */
302 time = lis_wtime();
303 lis_psolve(solver, r, mr);
304 ptime += lis_wtime()-time;
305 lis_matvec(A,mr,amr);
306
307 /* p = mr + beta*(p - u) */
308 /* ap = amr + beta*(ap - au) */
309 lis_vector_axpy(-1.0,u,p);
310 lis_vector_xpay(mr,beta,p);
311 lis_vector_axpy(-1.0,au,ap);
312 lis_vector_xpay(amr,beta,ap);
313
314 rho_old = rho;
315 }
316
317 solver->retcode = LIS_MAXITER;
318 solver->iter = iter;
319 solver->resid = nrm2;
320 LIS_DEBUG_FUNC_OUT;
321 return LIS_MAXITER;
322 }
323
324
325
326 #ifdef USE_QUAD_PRECISION
327 #undef __FUNC__
328 #define __FUNC__ "lis_bicgsafe_quad"
lis_bicgsafe_quad(LIS_SOLVER solver)329 LIS_INT lis_bicgsafe_quad(LIS_SOLVER solver)
330 {
331 LIS_Comm comm;
332 LIS_MATRIX A;
333 LIS_VECTOR x;
334 LIS_VECTOR r, rtld, rhat, p, ptld;
335 LIS_VECTOR t, ttld;
336 LIS_VECTOR y, v, u, utld, z;
337 LIS_QUAD_PTR alpha, beta, rho, rho_old;
338 LIS_QUAD_PTR qsi, eta;
339 LIS_QUAD_PTR tmp, tmpdot[5],one;
340 LIS_REAL bnrm2, nrm2, tol;
341 LIS_INT iter,maxiter,output,conv;
342 double time,ptime;
343
344 LIS_DEBUG_FUNC_IN;
345
346 comm = LIS_COMM_WORLD;
347
348 A = solver->A;
349 x = solver->x;
350 maxiter = solver->options[LIS_OPTIONS_MAXITER];
351 output = solver->options[LIS_OPTIONS_OUTPUT];
352 conv = solver->options[LIS_OPTIONS_CONV_COND];
353 ptime = 0.0;
354
355
356 rtld = solver->work[0];
357 r = solver->work[1];
358 rhat = solver->work[2];
359 p = solver->work[3];
360 ptld = solver->work[4];
361 t = solver->work[5];
362 ttld = solver->work[6];
363 y = solver->work[7];
364 v = solver->work[8];
365 u = solver->work[9];
366 z = solver->work[10];
367 utld = solver->work[11];
368
369 LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
370 LIS_QUAD_SCALAR_MALLOC(beta,1,1);
371 LIS_QUAD_SCALAR_MALLOC(rho,2,1);
372 LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
373 LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
374 LIS_QUAD_SCALAR_MALLOC(eta,5,1);
375 LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
376 LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
377 LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
378 LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
379 LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
380 LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
381 LIS_QUAD_SCALAR_MALLOC(one,13,1);
382
383 rho_old.hi[0] = 1.0;
384 rho_old.lo[0] = 0.0;
385 alpha.hi[0] = 1.0;
386 alpha.lo[0] = 0.0;
387 qsi.hi[0] = 1.0;
388 qsi.lo[0] = 0.0;
389 one.hi[0] = -1.0;
390 one.lo[0] = 0.0;
391
392
393 /* Initial Residual */
394 if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
395 {
396 LIS_DEBUG_FUNC_OUT;
397 return LIS_SUCCESS;
398 }
399 tol = solver->tol;
400
401 lis_solver_set_shadowresidual(solver,r,rtld);
402
403 lis_vector_set_allex_nm(0.0,p);
404 lis_vector_set_allex_nm(0.0,u);
405 lis_vector_set_allex_nm(0.0,ptld);
406 lis_vector_set_allex_nm(0.0,utld);
407
408 for( iter=1; iter<=maxiter; iter++ )
409 {
410 /* rho = <rtld,r> */
411 lis_vector_dotex_mmm(rtld,r,&rho);
412
413 /* test breakdown */
414 if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
415 {
416 solver->retcode = LIS_BREAKDOWN;
417 solver->iter = iter;
418 solver->resid = nrm2;
419 LIS_DEBUG_FUNC_OUT;
420 return LIS_BREAKDOWN;
421 }
422
423 /* beta = (rho / rho_old) * (alpha / qsi) */
424 lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
425 lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
426 lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);
427
428 /* rhat = M^-1 * r */
429 /* v = A * rhat */
430 time = lis_wtime();
431 lis_psolve(solver, r, rhat);
432 ptime += lis_wtime()-time;
433 lis_matvec(A,rhat,v);
434
435 /* p = rhat + beta*(p - u) */
436 lis_vector_axpyex_mmm(one,u,p);
437 lis_vector_xpayex_mmm(rhat,beta,p);
438
439 /* ptld = v + beta*(ptld - utld) */
440 lis_vector_axpyex_mmm(one,utld,ptld);
441 lis_vector_xpayex_mmm(v,beta,ptld);
442
443 /* tmpdot[0] = <rtld,ptld> */
444 lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]);
445 /* test breakdown */
446 /* */
447
448 /* alpha = rho / tmpdot[0] */
449 lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi);
450
451
452 /* tmpdot[0] = <y,y> */
453 /* tmpdot[1] = <v,r> */
454 /* tmpdot[2] = <y,r> */
455 /* tmpdot[3] = <v,y> */
456 /* tmpdot[4] = <v,v> */
457 lis_vector_dotex_mmm(y,y,&tmpdot[0]);
458 lis_vector_dotex_mmm(v,r,&tmpdot[1]);
459 lis_vector_dotex_mmm(y,r,&tmpdot[2]);
460 lis_vector_dotex_mmm(v,y,&tmpdot[3]);
461 lis_vector_dotex_mmm(v,v,&tmpdot[4]);
462 if(iter==1)
463 {
464 lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
465 eta.hi[0] = 0.0;
466 eta.lo[0] = 0.0;
467 }
468 else
469 {
470 lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
471 lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
472 lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);
473
474 lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
475 lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
476 lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
477 lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);
478
479 lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
480 lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
481 lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
482 lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
483 }
484
485 /* t = qsi*ptld + eta*y */
486 lis_vector_copyex_mm(y,t);
487 lis_vector_scaleex_mm(eta,t);
488 lis_vector_axpyex_mmm(qsi,ptld,t);
489
490 /* ttld = M^-1 * t */
491 time = lis_wtime();
492 lis_psolve(solver, t, ttld);
493 ptime += lis_wtime()-time;
494
495 /* u = ttld + eta*beta*u */
496 /* utld = A * u */
497 lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi);
498 lis_vector_xpayex_mmm(ttld,tmp,u);
499 lis_matvec(A,u,utld);
500
501 /* z = qsi*rhat + eta*z - alpha*u */
502 lis_vector_scaleex_mm(eta,z);
503 lis_vector_axpyex_mmm(qsi,rhat,z);
504 lis_quad_minus((LIS_QUAD *)alpha.hi);
505 lis_vector_axpyex_mmm(alpha,u,z);
506
507 /* y = qsi*v + eta*y - alpha*utld */
508 lis_vector_scaleex_mm(eta,y);
509 lis_vector_axpyex_mmm(qsi,v,y);
510 lis_vector_axpyex_mmm(alpha,utld,y);
511 lis_quad_minus((LIS_QUAD *)alpha.hi);
512
513 /* x = x + alpha*p + z */
514 lis_vector_axpyex_mmm(alpha,p,x);
515 lis_quad_minus((LIS_QUAD *)one.hi);
516 lis_vector_axpyex_mmm(one,z,x);
517 lis_quad_minus((LIS_QUAD *)one.hi);
518
519 /* r = r - alpha*ptld - y */
520 lis_quad_minus((LIS_QUAD *)alpha.hi);
521 lis_vector_axpyex_mmm(alpha,ptld,r);
522 lis_quad_minus((LIS_QUAD *)alpha.hi);
523 lis_vector_axpyex_mmm(one,y,r);
524
525 /* convergence check */
526 lis_solver_get_residual[conv](r,solver,&nrm2);
527 if( output )
528 {
529 if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
530 if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
531 }
532
533 if( tol > nrm2 )
534 {
535 solver->retcode = LIS_SUCCESS;
536 solver->iter = iter;
537 solver->resid = nrm2;
538 solver->ptime = ptime;
539 LIS_DEBUG_FUNC_OUT;
540 return LIS_SUCCESS;
541 }
542
543 rho_old.hi[0] = rho.hi[0];
544 rho_old.lo[0] = rho.lo[0];
545 }
546
547 solver->retcode = LIS_MAXITER;
548 solver->iter = iter;
549 solver->resid = nrm2;
550 LIS_DEBUG_FUNC_OUT;
551 return LIS_MAXITER;
552 }
553
554
555 #undef __FUNC__
556 #define __FUNC__ "lis_bicgsafe_switch"
lis_bicgsafe_switch(LIS_SOLVER solver)557 LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver)
558 {
559 LIS_Comm comm;
560 LIS_MATRIX A;
561 LIS_VECTOR x;
562 LIS_VECTOR r, rtld, rhat, p, ptld, phat;
563 LIS_VECTOR t, ttld, that, t0, t0hat;
564 LIS_VECTOR y, w, u, z;
565 LIS_QUAD_PTR alpha, beta, rho, rho_old;
566 LIS_QUAD_PTR qsi, eta, one;
567 LIS_QUAD_PTR tmp, tmpdot[5];
568 LIS_REAL bnrm2, nrm2, tol, tol2;
569 LIS_INT iter,maxiter,output,conv;
570 LIS_INT iter2,maxiter2;
571 double time,ptime;
572
573
574 LIS_DEBUG_FUNC_IN;
575
576 comm = LIS_COMM_WORLD;
577
578 A = solver->A;
579 x = solver->x;
580 maxiter = solver->options[LIS_OPTIONS_MAXITER];
581 maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
582 output = solver->options[LIS_OPTIONS_OUTPUT];
583 conv = solver->options[LIS_OPTIONS_CONV_COND];
584 tol = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
585 tol2 = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
586 ptime = 0.0;
587
588 rtld = solver->work[0];
589 r = solver->work[1];
590 rhat = solver->work[2];
591 p = solver->work[3];
592 ptld = solver->work[4];
593 phat = solver->work[5];
594 t = solver->work[6];
595 ttld = solver->work[7];
596 that = solver->work[8];
597 t0 = solver->work[9];
598 t0hat = solver->work[10];
599 y = solver->work[11];
600 w = solver->work[12];
601 u = solver->work[13];
602 z = solver->work[14];
603
604 LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
605 LIS_QUAD_SCALAR_MALLOC(beta,1,1);
606 LIS_QUAD_SCALAR_MALLOC(rho,2,1);
607 LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
608 LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
609 LIS_QUAD_SCALAR_MALLOC(eta,5,1);
610 LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
611 LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
612 LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
613 LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
614 LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
615 LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
616 LIS_QUAD_SCALAR_MALLOC(one,13,1);
617
618 rho_old.hi[0] = 1.0;
619 rho_old.lo[0] = 0.0;
620 alpha.hi[0] = 1.0;
621 alpha.lo[0] = 0.0;
622 qsi.hi[0] = 1.0;
623 qsi.lo[0] = 0.0;
624 one.hi[0] = -1.0;
625 one.lo[0] = 0.0;
626
627
628 /* Initial Residual */
629 if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
630 {
631 LIS_DEBUG_FUNC_OUT;
632 return LIS_SUCCESS;
633 }
634 tol2 = solver->tol_switch;
635
636 lis_solver_set_shadowresidual(solver,r,rtld);
637
638 lis_vector_set_allex_nm(0.0, ttld);
639 lis_vector_set_allex_nm(0.0, ptld);
640 lis_vector_set_allex_nm(0.0, p);
641 lis_vector_set_allex_nm(0.0, u);
642 lis_vector_set_allex_nm(0.0, t);
643 lis_vector_set_allex_nm(0.0, t0);
644
645 for( iter=1; iter<=maxiter2; iter++ )
646 {
647 /* rho = <rtld,r> */
648 lis_vector_dot(rtld,r,&rho.hi[0]);
649
650 /* test breakdown */
651 if( rho.hi[0]==0.0 )
652 {
653 solver->retcode = LIS_BREAKDOWN;
654 solver->iter = iter;
655 solver->iter2 = iter;
656 solver->resid = nrm2;
657 LIS_DEBUG_FUNC_OUT;
658 return LIS_BREAKDOWN;
659 }
660
661 /* beta = (rho / rho_old) * (alpha / qsi) */
662 beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]);
663
664 /* w = ttld + beta*ptld */
665 lis_vector_axpyz(beta.hi[0],ptld,ttld,w);
666
667 /* rhat = M^-1 * r */
668 time = lis_wtime();
669 lis_psolve(solver, r, rhat);
670 ptime += lis_wtime()-time;
671
672 /* p = rhat + beta*(p - u) */
673 lis_vector_axpy(-1,u,p);
674 lis_vector_xpay(rhat,beta.hi[0],p);
675
676 /* ptld = A * p */
677 lis_matvec(A,p,ptld);
678
679 /* tmpdot[0] = <rtld,ptld> */
680 lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]);
681 /* test breakdown */
682 /* */
683
684 /* alpha = rho / tmpdot[0] */
685 alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0];
686
687 /* y = t - r + alpha*(-w + ptld) */
688 lis_vector_axpyz(-1,w,ptld,y);
689 lis_vector_xpay(t,alpha.hi[0],y);
690 lis_vector_axpy(-1,r,y);
691
692 /* t = r - alpha*ptld */
693 lis_vector_axpyz(-alpha.hi[0],ptld,r,t);
694
695 /* that = M^-1 * t */
696 /* phat = M^-1 * ptld */
697 /* t0hat = M^-1 * t0 */
698 time = lis_wtime();
699 lis_psolve(solver, t, that);
700 lis_psolve(solver, ptld, phat);
701 lis_psolve(solver, t0, t0hat);
702 ptime += lis_wtime()-time;
703
704 /* ttld = A * that */
705 lis_matvec(A,that,ttld);
706
707 /* tmpdot[0] = <y,y> */
708 /* tmpdot[1] = <ttld,t> */
709 /* tmpdot[2] = <y,t> */
710 /* tmpdot[3] = <ttld,y> */
711 /* tmpdot[4] = <ttld,ttld> */
712 lis_vector_dot(y,y,&tmpdot[0].hi[0]);
713 lis_vector_dot(ttld,t,&tmpdot[1].hi[0]);
714 lis_vector_dot(y,t,&tmpdot[2].hi[0]);
715 lis_vector_dot(ttld,y,&tmpdot[3].hi[0]);
716 lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]);
717 if(iter==1)
718 {
719 qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0];
720 eta.hi[0] = 0.0;
721 }
722 else
723 {
724 tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0] - tmpdot[3].hi[0]*tmpdot[3].hi[0];
725 qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0];
726 eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0];
727 }
728
729 /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
730 lis_vector_xpay(t0hat,beta.hi[0],u);
731 lis_vector_axpy(-1,rhat,u);
732 lis_vector_scale(eta.hi[0],u);
733 lis_vector_axpy(qsi.hi[0],phat,u);
734
735 /* z = qsi*rhat + eta*z - alpha*u */
736 lis_vector_scale(eta.hi[0],z);
737 lis_vector_axpy(qsi.hi[0],rhat,z);
738 lis_vector_axpy(-alpha.hi[0],u,z);
739
740 /* x = x + alpha*p + z */
741 lis_vector_axpy(alpha.hi[0],p,x);
742 lis_vector_axpy(1,z,x);
743
744 /* r = t - eta*y - qsi*ttld */
745 lis_vector_axpyz(-eta.hi[0],y,t,r);
746 lis_vector_axpy(-qsi.hi[0],ttld,r);
747
748 /* convergence check */
749 lis_solver_get_residual[conv](r,solver,&nrm2);
750 if( output )
751 {
752 if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
753 if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
754 }
755
756 if( tol2 >= nrm2 )
757 {
758 solver->iter = iter;
759 solver->iter2 = iter;
760 solver->ptime = ptime;
761 break;
762 }
763
764 lis_vector_copy(t,t0);
765 rho_old.hi[0] = rho.hi[0];
766 }
767
768 r->precision = LIS_PRECISION_QUAD;
769 p->precision = LIS_PRECISION_QUAD;
770 t->precision = LIS_PRECISION_QUAD;
771 t0->precision = LIS_PRECISION_QUAD;
772 ptld->precision = LIS_PRECISION_QUAD;
773 that->precision = LIS_PRECISION_QUAD;
774
775 solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
776 lis_vector_copyex_mn(x,solver->xx);
777
778 rho_old.hi[0] = 1.0;
779 alpha.hi[0] = 1.0;
780 qsi.hi[0] = 1.0;
781 one.hi[0] = -1.0;
782
783 /* Initial Residual */
784 lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
785 tol = solver->tol;
786
787 lis_solver_set_shadowresidual(solver,r,rtld);
788
789 lis_vector_set_allex_nm(0.0, ttld);
790 lis_vector_set_allex_nm(0.0, ptld);
791 lis_vector_set_allex_nm(0.0, p);
792 lis_vector_set_allex_nm(0.0, u);
793 lis_vector_set_allex_nm(0.0, t);
794 lis_vector_set_allex_nm(0.0, t0);
795
796 for( iter2=iter+1; iter2<=maxiter; iter2++ )
797 {
798 /* rho = <rtld,r> */
799 lis_vector_dotex_mmm(rtld,r,&rho);
800
801 /* test breakdown */
802 if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
803 {
804 solver->retcode = LIS_BREAKDOWN;
805 solver->iter = iter2;
806 solver->iter2 = iter;
807 solver->resid = nrm2;
808 LIS_DEBUG_FUNC_OUT;
809 return LIS_BREAKDOWN;
810 }
811
812 /* beta = (rho / rho_old) * (alpha / qsi) */
813 lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
814 lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
815 lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);
816
817 /* w = ttld + beta*ptld */
818 lis_vector_axpyzex_mmmm(beta,ptld,ttld,w);
819
820 /* rhat = M^-1 * r */
821 time = lis_wtime();
822 lis_psolve(solver, r, rhat);
823 ptime += lis_wtime()-time;
824
825 /* p = rhat + beta*(p - u) */
826 lis_vector_axpyex_mmm(one,u,p);
827 lis_vector_xpayex_mmm(rhat,beta,p);
828
829 /* ptld = A * p */
830 lis_matvec(A,p,ptld);
831
832 /* tmpdot[0] = <rtld,ptld> */
833 lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]);
834 /* test breakdown */
835 /* */
836
837 /* alpha = rho / tmpdot[0] */
838 lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi);
839
840 /* y = t - r + alpha*(-w + ptld) */
841 lis_vector_axpyzex_mmmm(one,w,ptld,y);
842 lis_vector_xpayex_mmm(t,alpha,y);
843 lis_vector_axpyex_mmm(one,r,y);
844
845 /* t = r - alpha*ptld */
846 lis_quad_minus((LIS_QUAD *)alpha.hi);
847 lis_vector_axpyzex_mmmm(alpha,ptld,r,t);
848
849 /* that = M^-1 * t */
850 /* phat = M^-1 * ptld */
851 /* t0hat = M^-1 * t0 */
852 time = lis_wtime();
853 lis_psolve(solver, t, that);
854 lis_psolve(solver, ptld, phat);
855 lis_psolve(solver, t0, t0hat);
856 ptime += lis_wtime()-time;
857
858 /* ttld = A * that */
859 lis_matvec(A,that,ttld);
860
861 /* tmpdot[0] = <y,y> */
862 /* tmpdot[1] = <ttld,t> */
863 /* tmpdot[2] = <y,t> */
864 /* tmpdot[3] = <ttld,y> */
865 /* tmpdot[4] = <ttld,ttld> */
866 lis_vector_dotex_mmm(y,y,&tmpdot[0]);
867 lis_vector_dotex_mmm(ttld,t,&tmpdot[1]);
868 lis_vector_dotex_mmm(y,t,&tmpdot[2]);
869 lis_vector_dotex_mmm(ttld,y,&tmpdot[3]);
870 lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]);
871 if(iter==1)
872 {
873 lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
874 eta.hi[0] = 0.0;
875 eta.lo[0] = 0.0;
876 }
877 else
878 {
879 lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
880 lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
881 lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);
882
883 lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
884 lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
885 lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
886 lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);
887
888 lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
889 lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
890 lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
891 lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
892 }
893
894 /* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
895 lis_vector_xpayex_mmm(t0hat,beta,u);
896 lis_vector_axpyex_mmm(one,rhat,u);
897 lis_vector_scaleex_mm(eta,u);
898 lis_vector_axpyex_mmm(qsi,phat,u);
899
900 /* z = qsi*rhat + eta*z - alpha*u */
901 lis_vector_scaleex_mm(eta,z);
902 lis_vector_axpyex_mmm(qsi,rhat,z);
903 lis_vector_axpyex_mmm(alpha,u,z);
904
905 /* x = x + alpha*p + z */
906 lis_quad_minus((LIS_QUAD *)alpha.hi);
907 lis_quad_minus((LIS_QUAD *)one.hi);
908 lis_vector_axpyex_mmm(alpha,p,x);
909 lis_vector_axpyex_mmm(one,z,x);
910 lis_quad_minus((LIS_QUAD *)one.hi);
911
912 /* r = t - eta*y - qsi*ttld */
913 lis_quad_minus((LIS_QUAD *)eta.hi);
914 lis_quad_minus((LIS_QUAD *)qsi.hi);
915 lis_vector_axpyzex_mmmm(eta,y,t,r);
916 lis_vector_axpyex_mmm(qsi,ttld,r);
917 lis_quad_minus((LIS_QUAD *)eta.hi);
918 lis_quad_minus((LIS_QUAD *)qsi.hi);
919
920 /* convergence check */
921 lis_solver_get_residual[conv](r,solver,&nrm2);
922 if( output )
923 {
924 if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2;
925 if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
926 }
927
928 if( tol > nrm2 )
929 {
930 solver->retcode = LIS_SUCCESS;
931 solver->iter = iter2;
932 solver->iter2 = iter;
933 solver->resid = nrm2;
934 solver->ptime = ptime;
935 LIS_DEBUG_FUNC_OUT;
936 return LIS_SUCCESS;
937 }
938
939 lis_vector_copyex_mm(t,t0);
940 rho_old.hi[0] = rho.hi[0];
941 rho_old.lo[0] = rho.lo[0];
942 }
943 solver->retcode = LIS_MAXITER;
944 solver->iter = iter;
945 solver->iter2 = iter2;
946 solver->resid = nrm2;
947 LIS_DEBUG_FUNC_OUT;
948 return LIS_MAXITER;
949 }
950 #endif
951
952 /********************************************
953 * Preconditioned BiConjugate Residual Safe *
954 ********************************************
955 r(0) = b - Ax(0)
956 rtld(0) = conj(r(0)) or random
957 artld(0)= A^H * rtld(0)
958 mr(0) = M^-1 * r(0)
959 amr(0) = A * mr(0)
960 p(0) = mr(0)
961 ap(0) = amr(0)
962 rho(0) = <rtld,amr(0)>
963 ********************************************
964 for k=1,2,...
965 map(k-1)= M^-1 * ap(k-1)
966 tmpdot0 = <map(k-1),artld(0)>
967 alpha = rho(k-1) / tmpdot0
968 tmpdot0 = <y(k-1),y(k-1)>
969 tmpdot1 = <amr(k-1),r(k-1)>
970 tmpdot2 = <y(k-1),r(k-1)>
971 tmpdot3 = <amr(k-1),y(k-1)>
972 tmpdot4 = <amr(k-1),amr(k-1)>
973 tmp = tmpdot4*tmpdot0-tmpdot3*tmpdot3
974 qsi = (tmpdot0*tmpdot1-tmpdot2*tmpdot3) / tmp
975 eta = (tmpdot4*tmpdot2-tmpdot3*tmpdot1) / tmp
976 u(k-1) = qsi*map(k-1) + eta*my(k) + eta*beta*u(k-2)
977 au(k-1) = A * u(k-1)
978 z(k-1) = qsi*mr(k-1) + eta*z(k-2) - alpha*u(k-1)
979 y(k) = qsi*amr(k-1) + eta*y(k-1) - alpha*au(k-1)
980 my(k) = M^-1 * y(k)
981 x(k) = x(k-1) + alpha*p(k-1) + z(k-1)
982 r(k) = r(k-1) - alpha*ap(k-1) - y(k)
983 mr(k) = mr(k-1) - alpha*map(k-1) - my(k)
984 amr(k) = A * mr(k)
985 rho(k) = <rtld,amr(k)>
986 beta = (rho(k) / rho(k-1)) * (alpha / qsi)
987 p(k) = mr(k) + beta*(p(k-1) - u(k-1))
988 ap(k-1) = amr(k) + beta*(ap(k-1) - au(k-1))
989 ********************************************/
990 #undef NWORK
991 #define NWORK 13
992 #undef __FUNC__
993 #define __FUNC__ "lis_bicrsafe_check_params"
lis_bicrsafe_check_params(LIS_SOLVER solver)994 LIS_INT lis_bicrsafe_check_params(LIS_SOLVER solver)
995 {
996 LIS_DEBUG_FUNC_IN;
997 LIS_DEBUG_FUNC_OUT;
998 return LIS_SUCCESS;
999 }
1000
1001 #undef __FUNC__
1002 #define __FUNC__ "lis_bicrsafe_malloc_work"
lis_bicrsafe_malloc_work(LIS_SOLVER solver)1003 LIS_INT lis_bicrsafe_malloc_work(LIS_SOLVER solver)
1004 {
1005 LIS_VECTOR *work;
1006 LIS_INT i,j,worklen,err;
1007
1008 LIS_DEBUG_FUNC_IN;
1009
1010 worklen = NWORK;
1011 work = (LIS_VECTOR *)lis_malloc( worklen*sizeof(LIS_VECTOR),"lis_bicgsafe_malloc_work::work" );
1012 if( work==NULL )
1013 {
1014 LIS_SETERR_MEM(worklen*sizeof(LIS_VECTOR));
1015 return LIS_ERR_OUT_OF_MEMORY;
1016 }
1017 if( solver->precision==LIS_PRECISION_DEFAULT )
1018 {
1019 for(i=0;i<worklen;i++)
1020 {
1021 err = lis_vector_duplicate(solver->A,&work[i]);
1022 if( err ) break;
1023 }
1024 }
1025 else
1026 {
1027 for(i=0;i<worklen;i++)
1028 {
1029 err = lis_vector_duplicateex(LIS_PRECISION_QUAD,solver->A,&work[i]);
1030 if( err ) break;
1031 }
1032 }
1033 if( i<worklen )
1034 {
1035 for(j=0;j<i;j++) lis_vector_destroy(work[j]);
1036 lis_free(work);
1037 return err;
1038 }
1039 solver->worklen = worklen;
1040 solver->work = work;
1041
1042 LIS_DEBUG_FUNC_OUT;
1043 return LIS_SUCCESS;
1044 }
1045
1046 #undef __FUNC__
1047 #define __FUNC__ "lis_bicrsafe"
lis_bicrsafe(LIS_SOLVER solver)1048 LIS_INT lis_bicrsafe(LIS_SOLVER solver)
1049 {
1050 LIS_Comm comm;
1051 LIS_MATRIX A;
1052 LIS_VECTOR x;
1053 LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map;
1054 LIS_VECTOR y, my, u, au, z;
1055 LIS_SCALAR alpha, beta;
1056 LIS_SCALAR rho, rho_old;
1057 LIS_SCALAR qsi, eta;
1058 LIS_SCALAR tmp, tmpdot[5];
1059 LIS_REAL bnrm2, nrm2, tol;
1060 LIS_INT iter,maxiter,output,conv;
1061 double time,ptime;
1062
1063 LIS_DEBUG_FUNC_IN;
1064
1065 comm = LIS_COMM_WORLD;
1066
1067 A = solver->A;
1068 x = solver->x;
1069 maxiter = solver->options[LIS_OPTIONS_MAXITER];
1070 output = solver->options[LIS_OPTIONS_OUTPUT];
1071 conv = solver->options[LIS_OPTIONS_CONV_COND];
1072 ptime = 0.0;
1073
1074 rtld = solver->work[0];
1075 r = solver->work[1];
1076 mr = solver->work[2];
1077 amr = solver->work[3];
1078 p = solver->work[4];
1079 ap = solver->work[5];
1080 map = solver->work[6];
1081 my = solver->work[7];
1082 y = solver->work[8];
1083 u = solver->work[9];
1084 z = solver->work[10];
1085 au = solver->work[11];
1086 artld = solver->work[12];
1087
1088
1089 /* Initial Residual */
1090 if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
1091 {
1092 LIS_DEBUG_FUNC_OUT;
1093 return LIS_SUCCESS;
1094 }
1095 tol = solver->tol;
1096
1097 lis_solver_set_shadowresidual(solver,r,rtld);
1098
1099 lis_matvech(A,rtld,artld);
1100 time = lis_wtime();
1101 lis_psolve(solver, r, mr);
1102 ptime += lis_wtime()-time;
1103 lis_matvec(A,mr,amr);
1104 lis_vector_dot(rtld,amr,&rho_old);
1105 lis_vector_copy(amr,ap);
1106 lis_vector_copy(mr,p);
1107 beta = 0.0;
1108
1109
1110 for( iter=1; iter<=maxiter; iter++ )
1111 {
1112 /* map = M^-1 * ap */
1113 time = lis_wtime();
1114 lis_psolve(solver, ap, map);
1115 ptime += lis_wtime()-time;
1116
1117 /* tmpdot[0] = <artld,map> */
1118 /* alpha = rho_old / tmpdot[0] */
1119 lis_vector_dot(artld,map,&tmpdot[0]);
1120 alpha = rho_old / tmpdot[0];
1121
1122
1123 /* tmpdot[0] = <y,y> */
1124 /* tmpdot[1] = <amr,r> */
1125 /* tmpdot[2] = <y,r> */
1126 /* tmpdot[3] = <amr,y> */
1127 /* tmpdot[4] = <amr,amr> */
1128 lis_vector_dot(y,y,&tmpdot[0]);
1129 lis_vector_dot(amr,r,&tmpdot[1]);
1130 lis_vector_dot(y,r,&tmpdot[2]);
1131 lis_vector_dot(amr,y,&tmpdot[3]);
1132 lis_vector_dot(amr,amr,&tmpdot[4]);
1133 if(iter==1)
1134 {
1135 qsi = tmpdot[1] / tmpdot[4];
1136 eta = 0.0;
1137 }
1138 else
1139 {
1140 tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3];
1141 qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp;
1142 eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp;
1143 }
1144
1145 /* u = qsi*map + eta*my + eta*beta*u */
1146 /* au = A * u */
1147 lis_vector_scale(eta*beta,u);
1148 lis_vector_axpy(qsi,map,u);
1149 lis_vector_axpy(eta,my,u);
1150 lis_matvec(A,u,au);
1151
1152 /* z = qsi*mr + eta*z - alpha*u */
1153 lis_vector_scale(eta,z);
1154 lis_vector_axpy(qsi,mr,z);
1155 lis_vector_axpy(-alpha,u,z);
1156
1157 /* y = qsi*amr + eta*y - alpha*au */
1158 /* my = M^-1 * y */
1159 lis_vector_scale(eta,y);
1160 lis_vector_axpy(qsi,amr,y);
1161 lis_vector_axpy(-alpha,au,y);
1162 time = lis_wtime();
1163 lis_psolve(solver, y, my);
1164 ptime += lis_wtime()-time;
1165
1166 /* x = x + alpha*p + z */
1167 lis_vector_axpy(alpha,p,x);
1168 lis_vector_axpy(1.0,z,x);
1169
1170 /* r = r - alpha*ap - y */
1171 lis_vector_axpy(-alpha,ap,r);
1172 lis_vector_axpy(-1.0,y,r);
1173
1174 /* convergence check */
1175 lis_solver_get_residual[conv](r,solver,&nrm2);
1176 if( output )
1177 {
1178 if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
1179 if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
1180 }
1181
1182 if( tol >= nrm2 )
1183 {
1184 solver->retcode = LIS_SUCCESS;
1185 solver->iter = iter;
1186 solver->resid = nrm2;
1187 solver->ptime = ptime;
1188 LIS_DEBUG_FUNC_OUT;
1189 return LIS_SUCCESS;
1190 }
1191
1192 /* mr = mr - alpha*map - my */
1193 /* amr = A * mr */
1194 /* rho = <rtld,amr> */
1195 lis_vector_axpy(-alpha,map,mr);
1196 lis_vector_axpy(-1.0,my,mr);
1197 lis_matvec(A,mr,amr);
1198 lis_vector_dot(rtld,amr,&rho);
1199 if( rho==0.0 )
1200 {
1201 solver->retcode = LIS_BREAKDOWN;
1202 solver->iter = iter;
1203 solver->resid = nrm2;
1204 LIS_DEBUG_FUNC_OUT;
1205 return LIS_BREAKDOWN;
1206 }
1207
1208 /* beta = (rho / rho_old) * (alpha / qsi) */
1209 beta = (rho / rho_old) * (alpha / qsi);
1210
1211
1212 /* p = mr + beta*(p - u) */
1213 /* ap = amr + beta*(ap - au) */
1214 lis_vector_axpy(-1.0,u,p);
1215 lis_vector_xpay(mr,beta,p);
1216 lis_vector_axpy(-1.0,au,ap);
1217 lis_vector_xpay(amr,beta,ap);
1218
1219 rho_old = rho;
1220 }
1221
1222 solver->retcode = LIS_MAXITER;
1223 solver->iter = iter;
1224 solver->resid = nrm2;
1225 LIS_DEBUG_FUNC_OUT;
1226 return LIS_MAXITER;
1227 }
1228
1229 #ifdef USE_QUAD_PRECISION
1230 #undef __FUNC__
1231 #define __FUNC__ "lis_bicrsafe_quad"
lis_bicrsafe_quad(LIS_SOLVER solver)1232 LIS_INT lis_bicrsafe_quad(LIS_SOLVER solver)
1233 {
1234 LIS_Comm comm;
1235 LIS_MATRIX A;
1236 LIS_VECTOR x;
1237 LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map;
1238 LIS_VECTOR y, my, u, au, z;
1239 LIS_QUAD_PTR alpha, beta, rho, rho_old;
1240 LIS_QUAD_PTR qsi, eta, one;
1241 LIS_QUAD_PTR tmp, tmpdot[5];
1242 LIS_REAL bnrm2, nrm2, tol;
1243 LIS_INT iter,maxiter,output,conv;
1244 double time,ptime;
1245
1246 LIS_DEBUG_FUNC_IN;
1247
1248 comm = LIS_COMM_WORLD;
1249
1250 A = solver->A;
1251 x = solver->x;
1252 maxiter = solver->options[LIS_OPTIONS_MAXITER];
1253 output = solver->options[LIS_OPTIONS_OUTPUT];
1254 conv = solver->options[LIS_OPTIONS_CONV_COND];
1255 ptime = 0.0;
1256
1257 rtld = solver->work[0];
1258 r = solver->work[1];
1259 mr = solver->work[2];
1260 amr = solver->work[3];
1261 p = solver->work[4];
1262 ap = solver->work[5];
1263 map = solver->work[6];
1264 my = solver->work[7];
1265 y = solver->work[8];
1266 u = solver->work[9];
1267 z = solver->work[10];
1268 au = solver->work[11];
1269 artld = solver->work[12];
1270
1271 LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
1272 LIS_QUAD_SCALAR_MALLOC(beta,1,1);
1273 LIS_QUAD_SCALAR_MALLOC(rho,2,1);
1274 LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
1275 LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
1276 LIS_QUAD_SCALAR_MALLOC(eta,5,1);
1277 LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
1278 LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
1279 LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
1280 LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
1281 LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
1282 LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
1283 LIS_QUAD_SCALAR_MALLOC(one,13,1);
1284
1285
1286 /* Initial Residual */
1287 if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
1288 {
1289 LIS_DEBUG_FUNC_OUT;
1290 return LIS_SUCCESS;
1291 }
1292 tol = solver->tol;
1293
1294 lis_solver_set_shadowresidual(solver,r,rtld);
1295
1296 lis_matvech(A,rtld,artld);
1297 time = lis_wtime();
1298 lis_psolve(solver, r, mr);
1299 ptime += lis_wtime()-time;
1300 lis_matvec(A,mr,amr);
1301 lis_vector_dotex_mmm(rtld,amr,&rho_old);
1302 lis_vector_copyex_mm(amr,ap);
1303 lis_vector_copyex_mm(mr,p);
1304 one.hi[0] = -1.0;
1305 one.lo[0] = 0.0;
1306
1307
1308 for( iter=1; iter<=maxiter; iter++ )
1309 {
1310 /* map = M^-1 * ap */
1311 time = lis_wtime();
1312 lis_psolve(solver, ap, map);
1313 ptime += lis_wtime()-time;
1314
1315 /* tmpdot[0] = <artld,map> */
1316 /* alpha = rho_old / tmpdot[0] */
1317 lis_vector_dotex_mmm(artld,map,&tmpdot[0]);
1318 lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot[0].hi);
1319
1320
1321 /* tmpdot[0] = <y,y> */
1322 /* tmpdot[1] = <amr,r> */
1323 /* tmpdot[2] = <y,r> */
1324 /* tmpdot[3] = <amr,y> */
1325 /* tmpdot[4] = <amr,amr> */
1326 lis_vector_dotex_mmm(y,y,&tmpdot[0]);
1327 lis_vector_dotex_mmm(amr,r,&tmpdot[1]);
1328 lis_vector_dotex_mmm(y,r,&tmpdot[2]);
1329 lis_vector_dotex_mmm(amr,y,&tmpdot[3]);
1330 lis_vector_dotex_mmm(amr,amr,&tmpdot[4]);
1331 if(iter==1)
1332 {
1333 lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
1334 eta.hi[0] = 0.0;
1335 eta.lo[0] = 0.0;
1336 }
1337 else
1338 {
1339 lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
1340 lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
1341 lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);
1342
1343 lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
1344 lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
1345 lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
1346 lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);
1347
1348 lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
1349 lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
1350 lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
1351 lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
1352 }
1353
1354 /* u = qsi*map + eta*my + eta*beta*u */
1355 /* au = A * u */
1356 lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi);
1357 lis_vector_scaleex_mm(tmp,u);
1358 lis_vector_axpyex_mmm(qsi,map,u);
1359 lis_vector_axpyex_mmm(eta,my,u);
1360 lis_matvec(A,u,au);
1361
1362 /* z = qsi*mr + eta*z - alpha*u */
1363 lis_vector_scaleex_mm(eta,z);
1364 lis_vector_axpyex_mmm(qsi,mr,z);
1365 lis_quad_minus((LIS_QUAD *)alpha.hi);
1366 lis_vector_axpyex_mmm(alpha,u,z);
1367
1368 /* y = qsi*amr + eta*y - alpha*au */
1369 /* my = M^-1 * y */
1370 lis_vector_scaleex_mm(eta,y);
1371 lis_vector_axpyex_mmm(qsi,amr,y);
1372 lis_vector_axpyex_mmm(alpha,au,y);
1373 time = lis_wtime();
1374 lis_psolve(solver, y, my);
1375 ptime += lis_wtime()-time;
1376
1377 /* x = x + alpha*p + z */
1378 lis_quad_minus((LIS_QUAD *)alpha.hi);
1379 lis_vector_axpyex_mmm(alpha,p,x);
1380 lis_quad_minus((LIS_QUAD *)one.hi);
1381 lis_vector_axpyex_mmm(one,z,x);
1382
1383 /* r = r - alpha*ap - y */
1384 lis_quad_minus((LIS_QUAD *)alpha.hi);
1385 lis_quad_minus((LIS_QUAD *)one.hi);
1386 lis_vector_axpyex_mmm(alpha,ap,r);
1387 lis_vector_axpyex_mmm(one,y,r);
1388
1389 /* convergence check */
1390 lis_solver_get_residual[conv](r,solver,&nrm2);
1391 if( output )
1392 {
1393 if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
1394 if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
1395 }
1396
1397 if( tol >= nrm2 )
1398 {
1399 solver->retcode = LIS_SUCCESS;
1400 solver->iter = iter;
1401 solver->resid = nrm2;
1402 solver->ptime = ptime;
1403 LIS_DEBUG_FUNC_OUT;
1404 return LIS_SUCCESS;
1405 }
1406
1407 /* mr = mr - alpha*map - my */
1408 /* amr = A * mr */
1409 /* rho = <rtld,amr> */
1410 lis_vector_axpyex_mmm(alpha,map,mr);
1411 lis_vector_axpyex_mmm(one,my,mr);
1412 lis_matvec(A,mr,amr);
1413 lis_vector_dotex_mmm(rtld,amr,&rho);
1414 if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
1415 {
1416 solver->retcode = LIS_BREAKDOWN;
1417 solver->iter = iter;
1418 solver->resid = nrm2;
1419 LIS_DEBUG_FUNC_OUT;
1420 return LIS_BREAKDOWN;
1421 }
1422
1423 /* beta = (rho / rho_old) * (alpha / qsi) */
1424 lis_quad_minus((LIS_QUAD *)alpha.hi);
1425 lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
1426 lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
1427 lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);
1428
1429
1430 /* p = mr + beta*(p - u) */
1431 /* ap = amr + beta*(ap - au) */
1432 lis_vector_axpyex_mmm(one,u,p);
1433 lis_vector_xpayex_mmm(mr,beta,p);
1434 lis_vector_axpyex_mmm(one,au,ap);
1435 lis_vector_xpayex_mmm(amr,beta,ap);
1436
1437 rho_old.hi[0] = rho.hi[0];
1438 rho_old.lo[0] = rho.lo[0];
1439 }
1440
1441 solver->retcode = LIS_MAXITER;
1442 solver->iter = iter;
1443 solver->resid = nrm2;
1444 LIS_DEBUG_FUNC_OUT;
1445 return LIS_MAXITER;
1446 }
1447 #endif
1448