1 /*
2  * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
3  * "Enhanced implementation of BiCGStab(L) for solving linear systems
4  * of equations". This uses tricky delayed updating ideas to prevent
5  * round-off buildup.
6  *
7  * This has not been completely cleaned up into PETSc style.
8  *
9  * All the BLAS and LAPACK calls below should be removed and replaced with
10  * loops and the macros for block solvers converted from LINPACK; there is no way
11  * calls to BLAS/LAPACK make sense for size 2, 3, 4, etc.
12  */
13 #include <petsc/private/kspimpl.h>              /*I   "petscksp.h" I*/
14 #include <../src/ksp/ksp/impls/bcgsl/bcgslimpl.h>
15 #include <petscblaslapack.h>
16 
17 
KSPSolve_BCGSL(KSP ksp)18 static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
19 {
20   KSP_BCGSL      *bcgsl = (KSP_BCGSL*) ksp->data;
21   PetscScalar    alpha, beta, omega, sigma;
22   PetscScalar    rho0, rho1;
23   PetscReal      kappa0, kappaA, kappa1;
24   PetscReal      ghat;
25   PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
26   PetscBool      bUpdateX;
27   PetscInt       maxit;
28   PetscInt       h, i, j, k, vi, ell;
29   PetscBLASInt   ldMZ,bierr;
30   PetscScalar    utb;
31   PetscReal      max_s, pinv_tol;
32   PetscErrorCode ierr;
33 
34   PetscFunctionBegin;
35   /* set up temporary vectors */
36   vi         = 0;
37   ell        = bcgsl->ell;
38   bcgsl->vB  = ksp->work[vi]; vi++;
39   bcgsl->vRt = ksp->work[vi]; vi++;
40   bcgsl->vTm = ksp->work[vi]; vi++;
41   bcgsl->vvR = ksp->work+vi; vi += ell+1;
42   bcgsl->vvU = ksp->work+vi; vi += ell+1;
43   bcgsl->vXr = ksp->work[vi]; vi++;
44   ierr       = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);
45 
46   /* Prime the iterative solver */
47   ierr           = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
48   ierr           = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
49   KSPCheckNorm(ksp,zeta0);
50   rnmax_computed = zeta0;
51   rnmax_true     = zeta0;
52 
53 
54   ierr       = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
55   ksp->its   = 0;
56   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta0;
57   else ksp->rnorm = 0.0;
58   ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
59   ierr = (*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
60   if (ksp->reason) PetscFunctionReturn(0);
61 
62   ierr  = VecSet(VVU[0],0.0);CHKERRQ(ierr);
63   alpha = 0.;
64   rho0  = omega = 1;
65 
66   if (bcgsl->delta>0.0) {
67     ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
68     ierr = VecSet(VX,0.0);CHKERRQ(ierr);
69     ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
70   } else {
71     ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
72   }
73 
74   /* Life goes on */
75   ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
76   zeta = zeta0;
77 
78   ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);
79 
80   for (k=0; k<maxit; k += bcgsl->ell) {
81     ksp->its = k;
82     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
83     else ksp->rnorm = 0.0;
84 
85     ierr = KSPLogResidualHistory(ksp, ksp->rnorm);CHKERRQ(ierr);
86     ierr = KSPMonitor(ksp, ksp->its, ksp->rnorm);CHKERRQ(ierr);
87 
88     ierr = (*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
89     if (ksp->reason < 0) PetscFunctionReturn(0);
90     if (ksp->reason) {
91       if (bcgsl->delta>0.0) {
92         ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
93       }
94       PetscFunctionReturn(0);
95     }
96 
97     /* BiCG part */
98     rho0 = -omega*rho0;
99     nrm0 = zeta;
100     for (j=0; j<bcgsl->ell; j++) {
101       /* rho1 <- r_j' * r_tilde */
102       ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
103       KSPCheckDot(ksp,rho1);
104       if (rho1 == 0.0) {
105         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
106         PetscFunctionReturn(0);
107       }
108       beta = alpha*(rho1/rho0);
109       rho0 = rho1;
110       for (i=0; i<=j; i++) {
111         /* u_i <- r_i - beta*u_i */
112         ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
113       }
114       /* u_{j+1} <- inv(K)*A*u_j */
115       ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);
116 
117       ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
118       KSPCheckDot(ksp,sigma);
119       if (sigma == 0.0) {
120         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
121         PetscFunctionReturn(0);
122       }
123       alpha = rho1/sigma;
124 
125       /* x <- x + alpha*u_0 */
126       ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);
127 
128       for (i=0; i<=j; i++) {
129         /* r_i <- r_i - alpha*u_{i+1} */
130         ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
131       }
132 
133       /* r_{j+1} <- inv(K)*A*r_j */
134       ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);
135 
136       ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
137       KSPCheckNorm(ksp,nrm0);
138       if (bcgsl->delta>0.0) {
139         if (rnmax_computed<nrm0) rnmax_computed = nrm0;
140         if (rnmax_true<nrm0) rnmax_true = nrm0;
141       }
142 
143       /* NEW: check for early exit */
144       ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
145       if (ksp->reason) {
146         ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
147         ksp->its   = k+j;
148         ksp->rnorm = nrm0;
149 
150         ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
151         if (ksp->reason < 0) PetscFunctionReturn(0);
152       }
153     }
154 
155     /* Polynomial part */
156     for (i = 0; i <= bcgsl->ell; ++i) {
157       ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
158     }
159     /* Symmetrize MZa */
160     for (i = 0; i <= bcgsl->ell; ++i) {
161       for (j = i+1; j <= bcgsl->ell; ++j) {
162         MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
163       }
164     }
165     /* Copy MZa to MZb */
166     ierr = PetscArraycpy(MZb,MZa,ldMZ*ldMZ);CHKERRQ(ierr);
167 
168     if (!bcgsl->bConvex || bcgsl->ell==1) {
169       PetscBLASInt ione = 1,bell;
170       ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);
171 
172       AY0c[0] = -1;
173       if (bcgsl->pinv) {
174 #  if defined(PETSC_USE_COMPLEX)
175         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
176 #  else
177         PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
178 #  endif
179         if (bierr!=0) {
180           ksp->reason = KSP_DIVERGED_BREAKDOWN;
181           PetscFunctionReturn(0);
182         }
183         /* Apply pseudo-inverse */
184         max_s = bcgsl->s[0];
185         for (i=1; i<bell; i++) {
186           if (bcgsl->s[i] > max_s) {
187             max_s = bcgsl->s[i];
188           }
189         }
190         /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
191         pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
192         ierr = PetscArrayzero(&AY0c[1],bell);CHKERRQ(ierr);
193         for (i=0; i<bell; i++) {
194           if (bcgsl->s[i] >= pinv_tol) {
195             utb=0.;
196             for (j=0; j<bell; j++) {
197               utb += MZb[1+j]*bcgsl->u[i*bell+j];
198             }
199 
200             for (j=0; j<bell; j++) {
201               AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
202             }
203           }
204         }
205       } else {
206         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
207         if (bierr!=0) {
208           ksp->reason = KSP_DIVERGED_BREAKDOWN;
209           PetscFunctionReturn(0);
210         }
211         ierr = PetscArraycpy(&AY0c[1],&MZb[1],bcgsl->ell);CHKERRQ(ierr);
212         PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
213       }
214     } else {
215       PetscBLASInt ione = 1;
216       PetscScalar  aone = 1.0, azero = 0.0;
217       PetscBLASInt neqs;
218       ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);
219 
220       PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
221       if (bierr!=0) {
222         ksp->reason = KSP_DIVERGED_BREAKDOWN;
223         PetscFunctionReturn(0);
224       }
225       ierr = PetscArraycpy(&AY0c[1],&MZb[1],bcgsl->ell-1);CHKERRQ(ierr);
226       PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
227       AY0c[0]          = -1;
228       AY0c[bcgsl->ell] = 0.;
229 
230       ierr = PetscArraycpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],bcgsl->ell-1);CHKERRQ(ierr);
231       PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));
232 
233       AYlc[0]          = 0.;
234       AYlc[bcgsl->ell] = -1;
235 
236       PetscStackCallBLAS("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));
237 
238       kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));
239 
240       /* round-off can cause negative kappa's */
241       if (kappa0<0) kappa0 = -kappa0;
242       kappa0 = PetscSqrtReal(kappa0);
243 
244       kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
245 
246       PetscStackCallBLAS("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));
247 
248       kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
249 
250       if (kappa1<0) kappa1 = -kappa1;
251       kappa1 = PetscSqrtReal(kappa1);
252 
253       if (kappa0!=0.0 && kappa1!=0.0) {
254         if (kappaA<0.7*kappa0*kappa1) {
255           ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
256         } else {
257           ghat = kappaA/(kappa1*kappa1);
258         }
259         for (i=0; i<=bcgsl->ell; i++) {
260           AY0c[i] = AY0c[i] - ghat* AYlc[i];
261         }
262       }
263     }
264 
265     omega = AY0c[bcgsl->ell];
266     for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
267     if (omega==0.0) {
268       ksp->reason = KSP_DIVERGED_BREAKDOWN;
269       PetscFunctionReturn(0);
270     }
271 
272 
273     ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
274     for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
275     ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
276     ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
277     for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
278     ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);
279     KSPCheckNorm(ksp,zeta);
280 
281     /* Accurate Update */
282     if (bcgsl->delta>0.0) {
283       if (rnmax_computed<zeta) rnmax_computed = zeta;
284       if (rnmax_true<zeta) rnmax_true = zeta;
285 
286       bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
287       if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
288         /* r0 <- b-inv(K)*A*X */
289         ierr       = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
290         ierr       = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
291         rnmax_true = zeta;
292 
293         if (bUpdateX) {
294           ierr           = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
295           ierr           = VecSet(VX,0.0);CHKERRQ(ierr);
296           ierr           = VecCopy(VVR[0], VB);CHKERRQ(ierr);
297           rnmax_computed = zeta;
298         }
299       }
300     }
301   }
302   if (bcgsl->delta>0.0) {
303     ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
304   }
305 
306   ksp->its = k;
307   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
308   else ksp->rnorm = 0.0;
309   ierr = KSPMonitor(ksp, ksp->its, ksp->rnorm);CHKERRQ(ierr);
310   ierr = KSPLogResidualHistory(ksp, ksp->rnorm);CHKERRQ(ierr);
311   ierr = (*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
312   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
313   PetscFunctionReturn(0);
314 }
315 
316 /*@
317    KSPBCGSLSetXRes - Sets the parameter governing when
318    exact residuals will be used instead of computed residuals.
319 
320    Logically Collective on ksp
321 
322    Input Parameters:
323 +  ksp - iterative context obtained from KSPCreate
324 -  delta - computed residuals are used alone when delta is not positive
325 
326    Options Database Keys:
327 
328 .  -ksp_bcgsl_xres delta
329 
330    Level: intermediate
331 
332 .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol(), KSP
333 @*/
KSPBCGSLSetXRes(KSP ksp,PetscReal delta)334 PetscErrorCode  KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
335 {
336   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
337   PetscErrorCode ierr;
338 
339   PetscFunctionBegin;
340   PetscValidLogicalCollectiveReal(ksp,delta,2);
341   if (ksp->setupstage) {
342     if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
343       ierr            = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
344       ierr            = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
345       ierr            = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);
346       ksp->setupstage = KSP_SETUP_NEW;
347     }
348   }
349   bcgsl->delta = delta;
350   PetscFunctionReturn(0);
351 }
352 
353 /*@
354    KSPBCGSLSetUsePseudoinverse - Use pseudoinverse (via SVD) to solve polynomial part of update
355 
356    Logically Collective on ksp
357 
358    Input Parameters:
359 +  ksp - iterative context obtained from KSPCreate
360 -  use_pinv - set to PETSC_TRUE when using pseudoinverse
361 
362    Options Database Keys:
363 
364 .  -ksp_bcgsl_pinv - use pseudoinverse
365 
366    Level: intermediate
367 
368 .seealso: KSPBCGSLSetEll(), KSP
369 @*/
KSPBCGSLSetUsePseudoinverse(KSP ksp,PetscBool use_pinv)370 PetscErrorCode KSPBCGSLSetUsePseudoinverse(KSP ksp,PetscBool use_pinv)
371 {
372   KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
373 
374   PetscFunctionBegin;
375   bcgsl->pinv = use_pinv;
376   PetscFunctionReturn(0);
377 }
378 
379 /*@
380    KSPBCGSLSetPol - Sets the type of polynomial part will
381    be used in the BiCGSTab(L) solver.
382 
383    Logically Collective on ksp
384 
385    Input Parameters:
386 +  ksp - iterative context obtained from KSPCreate
387 -  uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
388 
389    Options Database Keys:
390 
391 +  -ksp_bcgsl_cxpoly - use enhanced polynomial
392 -  -ksp_bcgsl_mrpoly - use standard polynomial
393 
394    Level: intermediate
395 
396 .seealso: KSP, KSPBCGSL, KSPCreate(), KSPSetType()
397 @*/
KSPBCGSLSetPol(KSP ksp,PetscBool uMROR)398 PetscErrorCode  KSPBCGSLSetPol(KSP ksp, PetscBool uMROR)
399 {
400   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
401   PetscErrorCode ierr;
402 
403   PetscFunctionBegin;
404   PetscValidLogicalCollectiveBool(ksp,uMROR,2);
405 
406   if (!ksp->setupstage) bcgsl->bConvex = uMROR;
407   else if (bcgsl->bConvex != uMROR) {
408     /* free the data structures,
409        then create them again
410      */
411     ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
412     ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
413     ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);
414 
415     bcgsl->bConvex  = uMROR;
416     ksp->setupstage = KSP_SETUP_NEW;
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 /*@
422    KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
423 
424    Logically Collective on ksp
425 
426    Input Parameters:
427 +  ksp - iterative context obtained from KSPCreate
428 -  ell - number of search directions
429 
430    Options Database Keys:
431 
432 .  -ksp_bcgsl_ell ell
433 
434    Level: intermediate
435 
436    Notes:
437    For large ell it is common for the polynomial update problem to become singular (due to happy breakdown for smallish
438    test problems, but also for larger problems). Consequently, by default, the system is solved by pseudoinverse, which
439    allows the iteration to complete successfully. See KSPBCGSLSetUsePseudoinverse() to switch to a conventional solve.
440 
441 .seealso: KSPBCGSLSetUsePseudoinverse(), KSP, KSPBCGSL
442 @*/
KSPBCGSLSetEll(KSP ksp,PetscInt ell)443 PetscErrorCode  KSPBCGSLSetEll(KSP ksp, PetscInt ell)
444 {
445   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
446   PetscErrorCode ierr;
447 
448   PetscFunctionBegin;
449   if (ell < 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
450   PetscValidLogicalCollectiveInt(ksp,ell,2);
451 
452   if (!ksp->setupstage) bcgsl->ell = ell;
453   else if (bcgsl->ell != ell) {
454     /* free the data structures, then create them again */
455     ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
456     ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
457     ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);
458 
459     bcgsl->ell      = ell;
460     ksp->setupstage = KSP_SETUP_NEW;
461   }
462   PetscFunctionReturn(0);
463 }
464 
KSPView_BCGSL(KSP ksp,PetscViewer viewer)465 PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
466 {
467   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
468   PetscErrorCode ierr;
469   PetscBool      isascii;
470 
471   PetscFunctionBegin;
472   ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
473 
474   if (isascii) {
475     ierr = PetscViewerASCIIPrintf(viewer, "  Ell = %D\n", bcgsl->ell);CHKERRQ(ierr);
476     ierr = PetscViewerASCIIPrintf(viewer, "  Delta = %lg\n", bcgsl->delta);CHKERRQ(ierr);
477   }
478   PetscFunctionReturn(0);
479 }
480 
KSPSetFromOptions_BCGSL(PetscOptionItems * PetscOptionsObject,KSP ksp)481 PetscErrorCode KSPSetFromOptions_BCGSL(PetscOptionItems *PetscOptionsObject,KSP ksp)
482 {
483   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
484   PetscErrorCode ierr;
485   PetscInt       this_ell;
486   PetscReal      delta;
487   PetscBool      flga = PETSC_FALSE, flg;
488 
489   PetscFunctionBegin;
490   /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
491      don't need to be called here.
492   */
493   ierr = PetscOptionsHead(PetscOptionsObject,"KSP BiCGStab(L) Options");CHKERRQ(ierr);
494 
495   /* Set number of search directions */
496   ierr = PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);CHKERRQ(ierr);
497   if (flg) {
498     ierr = KSPBCGSLSetEll(ksp, this_ell);CHKERRQ(ierr);
499   }
500 
501   /* Set polynomial type */
502   ierr = PetscOptionsBool("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga,&flga,NULL);CHKERRQ(ierr);
503   if (flga) {
504     ierr = KSPBCGSLSetPol(ksp, PETSC_TRUE);CHKERRQ(ierr);
505   } else {
506     flg  = PETSC_FALSE;
507     ierr = PetscOptionsBool("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg,&flg,NULL);CHKERRQ(ierr);
508     ierr = KSPBCGSLSetPol(ksp, PETSC_FALSE);CHKERRQ(ierr);
509   }
510 
511   /* Will computed residual be refreshed? */
512   ierr = PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);CHKERRQ(ierr);
513   if (flg) {
514     ierr = KSPBCGSLSetXRes(ksp, delta);CHKERRQ(ierr);
515   }
516 
517   /* Use pseudoinverse? */
518   flg  = bcgsl->pinv;
519   ierr = PetscOptionsBool("-ksp_bcgsl_pinv", "Polynomial correction via pseudoinverse", "KSPBCGSLSetUsePseudoinverse",flg,&flg,NULL);CHKERRQ(ierr);
520   ierr = KSPBCGSLSetUsePseudoinverse(ksp,flg);CHKERRQ(ierr);
521   ierr = PetscOptionsTail();CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
KSPSetUp_BCGSL(KSP ksp)525 PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
526 {
527   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
528   PetscInt       ell    = bcgsl->ell,ldMZ = ell+1;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = KSPSetWorkVecs(ksp, 6+2*ell);CHKERRQ(ierr);
533   ierr = PetscMalloc5(ldMZ,&AY0c,ldMZ,&AYlc,ldMZ,&AYtc,ldMZ*ldMZ,&MZa,ldMZ*ldMZ,&MZb);CHKERRQ(ierr);
534   ierr = PetscBLASIntCast(5*ell,&bcgsl->lwork);CHKERRQ(ierr);
535   ierr = PetscMalloc5(bcgsl->lwork,&bcgsl->work,ell,&bcgsl->s,ell*ell,&bcgsl->u,ell*ell,&bcgsl->v,5*ell,&bcgsl->realwork);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
KSPReset_BCGSL(KSP ksp)539 PetscErrorCode KSPReset_BCGSL(KSP ksp)
540 {
541   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545   ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
546   ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
547   ierr = PetscFree5(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v,bcgsl->realwork);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
KSPDestroy_BCGSL(KSP ksp)551 PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
552 {
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = KSPReset_BCGSL(ksp);CHKERRQ(ierr);
557   ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 /*MC
562      KSPBCGSL - Implements a slight variant of the Enhanced
563                 BiCGStab(L) algorithm in (3) and (2).  The variation
564                 concerns cases when either kappa0**2 or kappa1**2 is
565                 negative due to round-off. Kappa0 has also been pulled
566                 out of the denominator in the formula for ghat.
567 
568     References:
569 +     1. - G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
570          approaches for the stable computation of hybrid BiCG
571          methods", Applied Numerical Mathematics: Transactions
572          f IMACS, 19(3), 1996.
573 .     2. -  G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
574          "BiCGStab(L) and other hybrid BiCG methods",
575           Numerical Algorithms, 7, 1994.
576 -     3. -  D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
577          for solving linear systems of equations", preprint
578          from www.citeseer.com.
579 
580    Contributed by: Joel M. Malard, email jm.malard@pnl.gov
581 
582    Options Database Keys:
583 +  -ksp_bcgsl_ell <ell> Number of Krylov search directions, defaults to 2 -- KSPBCGSLSetEll()
584 .  -ksp_bcgsl_cxpol - Use a convex function of the MinRes and OR polynomials after the BiCG step instead of default MinRes -- KSPBCGSLSetPol()
585 .  -ksp_bcgsl_mrpoly - Use the default MinRes polynomial after the BiCG step  -- KSPBCGSLSetPol()
586 .  -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals -- KSPBCGSLSetXRes()
587 -  -ksp_bcgsl_pinv <true/false> - (de)activate use of pseudoinverse -- KSPBCGSLSetUsePseudoinverse()
588 
589    Level: beginner
590 
591 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS, KSPSetPCSide(), KSPBCGSLSetEll(), KSPBCGSLSetXRes()
592 
593 M*/
KSPCreate_BCGSL(KSP ksp)594 PETSC_EXTERN PetscErrorCode KSPCreate_BCGSL(KSP ksp)
595 {
596   PetscErrorCode ierr;
597   KSP_BCGSL      *bcgsl;
598 
599   PetscFunctionBegin;
600   /* allocate BiCGStab(L) context */
601   ierr      = PetscNewLog(ksp,&bcgsl);CHKERRQ(ierr);
602   ksp->data = (void*)bcgsl;
603 
604   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);CHKERRQ(ierr);
605   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);CHKERRQ(ierr);
606   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr);
607 
608   ksp->ops->setup          = KSPSetUp_BCGSL;
609   ksp->ops->solve          = KSPSolve_BCGSL;
610   ksp->ops->reset          = KSPReset_BCGSL;
611   ksp->ops->destroy        = KSPDestroy_BCGSL;
612   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
613   ksp->ops->buildresidual  = KSPBuildResidualDefault;
614   ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
615   ksp->ops->view           = KSPView_BCGSL;
616 
617   /* Let the user redefine the number of directions vectors */
618   bcgsl->ell = 2;
619 
620   /*Choose between a single MR step or an averaged MR/OR */
621   bcgsl->bConvex = PETSC_FALSE;
622 
623   bcgsl->pinv = PETSC_TRUE;     /* Use the reliable method by default */
624 
625   /* Set the threshold for when exact residuals will be used */
626   bcgsl->delta = 0.0;
627   PetscFunctionReturn(0);
628 }
629