1 /*
2 * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
3 * "Enhanced implementation of BiCGStab(L) for solving linear systems
4 * of equations". This uses tricky delayed updating ideas to prevent
5 * round-off buildup.
6 *
7 * This has not been completely cleaned up into PETSc style.
8 *
9 * All the BLAS and LAPACK calls below should be removed and replaced with
10 * loops and the macros for block solvers converted from LINPACK; there is no way
11 * calls to BLAS/LAPACK make sense for size 2, 3, 4, etc.
12 */
13 #include <petsc/private/kspimpl.h> /*I "petscksp.h" I*/
14 #include <../src/ksp/ksp/impls/bcgsl/bcgslimpl.h>
15 #include <petscblaslapack.h>
16
17
KSPSolve_BCGSL(KSP ksp)18 static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
19 {
20 KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data;
21 PetscScalar alpha, beta, omega, sigma;
22 PetscScalar rho0, rho1;
23 PetscReal kappa0, kappaA, kappa1;
24 PetscReal ghat;
25 PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
26 PetscBool bUpdateX;
27 PetscInt maxit;
28 PetscInt h, i, j, k, vi, ell;
29 PetscBLASInt ldMZ,bierr;
30 PetscScalar utb;
31 PetscReal max_s, pinv_tol;
32 PetscErrorCode ierr;
33
34 PetscFunctionBegin;
35 /* set up temporary vectors */
36 vi = 0;
37 ell = bcgsl->ell;
38 bcgsl->vB = ksp->work[vi]; vi++;
39 bcgsl->vRt = ksp->work[vi]; vi++;
40 bcgsl->vTm = ksp->work[vi]; vi++;
41 bcgsl->vvR = ksp->work+vi; vi += ell+1;
42 bcgsl->vvU = ksp->work+vi; vi += ell+1;
43 bcgsl->vXr = ksp->work[vi]; vi++;
44 ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);
45
46 /* Prime the iterative solver */
47 ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
48 ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
49 KSPCheckNorm(ksp,zeta0);
50 rnmax_computed = zeta0;
51 rnmax_true = zeta0;
52
53
54 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
55 ksp->its = 0;
56 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta0;
57 else ksp->rnorm = 0.0;
58 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
59 ierr = (*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
60 if (ksp->reason) PetscFunctionReturn(0);
61
62 ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr);
63 alpha = 0.;
64 rho0 = omega = 1;
65
66 if (bcgsl->delta>0.0) {
67 ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
68 ierr = VecSet(VX,0.0);CHKERRQ(ierr);
69 ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
70 } else {
71 ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
72 }
73
74 /* Life goes on */
75 ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
76 zeta = zeta0;
77
78 ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);
79
80 for (k=0; k<maxit; k += bcgsl->ell) {
81 ksp->its = k;
82 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
83 else ksp->rnorm = 0.0;
84
85 ierr = KSPLogResidualHistory(ksp, ksp->rnorm);CHKERRQ(ierr);
86 ierr = KSPMonitor(ksp, ksp->its, ksp->rnorm);CHKERRQ(ierr);
87
88 ierr = (*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
89 if (ksp->reason < 0) PetscFunctionReturn(0);
90 if (ksp->reason) {
91 if (bcgsl->delta>0.0) {
92 ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
93 }
94 PetscFunctionReturn(0);
95 }
96
97 /* BiCG part */
98 rho0 = -omega*rho0;
99 nrm0 = zeta;
100 for (j=0; j<bcgsl->ell; j++) {
101 /* rho1 <- r_j' * r_tilde */
102 ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
103 KSPCheckDot(ksp,rho1);
104 if (rho1 == 0.0) {
105 ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
106 PetscFunctionReturn(0);
107 }
108 beta = alpha*(rho1/rho0);
109 rho0 = rho1;
110 for (i=0; i<=j; i++) {
111 /* u_i <- r_i - beta*u_i */
112 ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
113 }
114 /* u_{j+1} <- inv(K)*A*u_j */
115 ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);
116
117 ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
118 KSPCheckDot(ksp,sigma);
119 if (sigma == 0.0) {
120 ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
121 PetscFunctionReturn(0);
122 }
123 alpha = rho1/sigma;
124
125 /* x <- x + alpha*u_0 */
126 ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);
127
128 for (i=0; i<=j; i++) {
129 /* r_i <- r_i - alpha*u_{i+1} */
130 ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
131 }
132
133 /* r_{j+1} <- inv(K)*A*r_j */
134 ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);
135
136 ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
137 KSPCheckNorm(ksp,nrm0);
138 if (bcgsl->delta>0.0) {
139 if (rnmax_computed<nrm0) rnmax_computed = nrm0;
140 if (rnmax_true<nrm0) rnmax_true = nrm0;
141 }
142
143 /* NEW: check for early exit */
144 ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
145 if (ksp->reason) {
146 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
147 ksp->its = k+j;
148 ksp->rnorm = nrm0;
149
150 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
151 if (ksp->reason < 0) PetscFunctionReturn(0);
152 }
153 }
154
155 /* Polynomial part */
156 for (i = 0; i <= bcgsl->ell; ++i) {
157 ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
158 }
159 /* Symmetrize MZa */
160 for (i = 0; i <= bcgsl->ell; ++i) {
161 for (j = i+1; j <= bcgsl->ell; ++j) {
162 MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
163 }
164 }
165 /* Copy MZa to MZb */
166 ierr = PetscArraycpy(MZb,MZa,ldMZ*ldMZ);CHKERRQ(ierr);
167
168 if (!bcgsl->bConvex || bcgsl->ell==1) {
169 PetscBLASInt ione = 1,bell;
170 ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);
171
172 AY0c[0] = -1;
173 if (bcgsl->pinv) {
174 # if defined(PETSC_USE_COMPLEX)
175 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
176 # else
177 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
178 # endif
179 if (bierr!=0) {
180 ksp->reason = KSP_DIVERGED_BREAKDOWN;
181 PetscFunctionReturn(0);
182 }
183 /* Apply pseudo-inverse */
184 max_s = bcgsl->s[0];
185 for (i=1; i<bell; i++) {
186 if (bcgsl->s[i] > max_s) {
187 max_s = bcgsl->s[i];
188 }
189 }
190 /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
191 pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
192 ierr = PetscArrayzero(&AY0c[1],bell);CHKERRQ(ierr);
193 for (i=0; i<bell; i++) {
194 if (bcgsl->s[i] >= pinv_tol) {
195 utb=0.;
196 for (j=0; j<bell; j++) {
197 utb += MZb[1+j]*bcgsl->u[i*bell+j];
198 }
199
200 for (j=0; j<bell; j++) {
201 AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
202 }
203 }
204 }
205 } else {
206 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
207 if (bierr!=0) {
208 ksp->reason = KSP_DIVERGED_BREAKDOWN;
209 PetscFunctionReturn(0);
210 }
211 ierr = PetscArraycpy(&AY0c[1],&MZb[1],bcgsl->ell);CHKERRQ(ierr);
212 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
213 }
214 } else {
215 PetscBLASInt ione = 1;
216 PetscScalar aone = 1.0, azero = 0.0;
217 PetscBLASInt neqs;
218 ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);
219
220 PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
221 if (bierr!=0) {
222 ksp->reason = KSP_DIVERGED_BREAKDOWN;
223 PetscFunctionReturn(0);
224 }
225 ierr = PetscArraycpy(&AY0c[1],&MZb[1],bcgsl->ell-1);CHKERRQ(ierr);
226 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
227 AY0c[0] = -1;
228 AY0c[bcgsl->ell] = 0.;
229
230 ierr = PetscArraycpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],bcgsl->ell-1);CHKERRQ(ierr);
231 PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));
232
233 AYlc[0] = 0.;
234 AYlc[bcgsl->ell] = -1;
235
236 PetscStackCallBLAS("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));
237
238 kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));
239
240 /* round-off can cause negative kappa's */
241 if (kappa0<0) kappa0 = -kappa0;
242 kappa0 = PetscSqrtReal(kappa0);
243
244 kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
245
246 PetscStackCallBLAS("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));
247
248 kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));
249
250 if (kappa1<0) kappa1 = -kappa1;
251 kappa1 = PetscSqrtReal(kappa1);
252
253 if (kappa0!=0.0 && kappa1!=0.0) {
254 if (kappaA<0.7*kappa0*kappa1) {
255 ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
256 } else {
257 ghat = kappaA/(kappa1*kappa1);
258 }
259 for (i=0; i<=bcgsl->ell; i++) {
260 AY0c[i] = AY0c[i] - ghat* AYlc[i];
261 }
262 }
263 }
264
265 omega = AY0c[bcgsl->ell];
266 for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
267 if (omega==0.0) {
268 ksp->reason = KSP_DIVERGED_BREAKDOWN;
269 PetscFunctionReturn(0);
270 }
271
272
273 ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
274 for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
275 ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
276 ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
277 for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
278 ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);
279 KSPCheckNorm(ksp,zeta);
280
281 /* Accurate Update */
282 if (bcgsl->delta>0.0) {
283 if (rnmax_computed<zeta) rnmax_computed = zeta;
284 if (rnmax_true<zeta) rnmax_true = zeta;
285
286 bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
287 if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
288 /* r0 <- b-inv(K)*A*X */
289 ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
290 ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
291 rnmax_true = zeta;
292
293 if (bUpdateX) {
294 ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
295 ierr = VecSet(VX,0.0);CHKERRQ(ierr);
296 ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
297 rnmax_computed = zeta;
298 }
299 }
300 }
301 }
302 if (bcgsl->delta>0.0) {
303 ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
304 }
305
306 ksp->its = k;
307 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
308 else ksp->rnorm = 0.0;
309 ierr = KSPMonitor(ksp, ksp->its, ksp->rnorm);CHKERRQ(ierr);
310 ierr = KSPLogResidualHistory(ksp, ksp->rnorm);CHKERRQ(ierr);
311 ierr = (*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
312 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
313 PetscFunctionReturn(0);
314 }
315
316 /*@
317 KSPBCGSLSetXRes - Sets the parameter governing when
318 exact residuals will be used instead of computed residuals.
319
320 Logically Collective on ksp
321
322 Input Parameters:
323 + ksp - iterative context obtained from KSPCreate
324 - delta - computed residuals are used alone when delta is not positive
325
326 Options Database Keys:
327
328 . -ksp_bcgsl_xres delta
329
330 Level: intermediate
331
332 .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol(), KSP
333 @*/
KSPBCGSLSetXRes(KSP ksp,PetscReal delta)334 PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
335 {
336 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
337 PetscErrorCode ierr;
338
339 PetscFunctionBegin;
340 PetscValidLogicalCollectiveReal(ksp,delta,2);
341 if (ksp->setupstage) {
342 if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
343 ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
344 ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
345 ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);
346 ksp->setupstage = KSP_SETUP_NEW;
347 }
348 }
349 bcgsl->delta = delta;
350 PetscFunctionReturn(0);
351 }
352
353 /*@
354 KSPBCGSLSetUsePseudoinverse - Use pseudoinverse (via SVD) to solve polynomial part of update
355
356 Logically Collective on ksp
357
358 Input Parameters:
359 + ksp - iterative context obtained from KSPCreate
360 - use_pinv - set to PETSC_TRUE when using pseudoinverse
361
362 Options Database Keys:
363
364 . -ksp_bcgsl_pinv - use pseudoinverse
365
366 Level: intermediate
367
368 .seealso: KSPBCGSLSetEll(), KSP
369 @*/
KSPBCGSLSetUsePseudoinverse(KSP ksp,PetscBool use_pinv)370 PetscErrorCode KSPBCGSLSetUsePseudoinverse(KSP ksp,PetscBool use_pinv)
371 {
372 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
373
374 PetscFunctionBegin;
375 bcgsl->pinv = use_pinv;
376 PetscFunctionReturn(0);
377 }
378
379 /*@
380 KSPBCGSLSetPol - Sets the type of polynomial part will
381 be used in the BiCGSTab(L) solver.
382
383 Logically Collective on ksp
384
385 Input Parameters:
386 + ksp - iterative context obtained from KSPCreate
387 - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
388
389 Options Database Keys:
390
391 + -ksp_bcgsl_cxpoly - use enhanced polynomial
392 - -ksp_bcgsl_mrpoly - use standard polynomial
393
394 Level: intermediate
395
396 .seealso: KSP, KSPBCGSL, KSPCreate(), KSPSetType()
397 @*/
KSPBCGSLSetPol(KSP ksp,PetscBool uMROR)398 PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscBool uMROR)
399 {
400 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
401 PetscErrorCode ierr;
402
403 PetscFunctionBegin;
404 PetscValidLogicalCollectiveBool(ksp,uMROR,2);
405
406 if (!ksp->setupstage) bcgsl->bConvex = uMROR;
407 else if (bcgsl->bConvex != uMROR) {
408 /* free the data structures,
409 then create them again
410 */
411 ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
412 ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
413 ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);
414
415 bcgsl->bConvex = uMROR;
416 ksp->setupstage = KSP_SETUP_NEW;
417 }
418 PetscFunctionReturn(0);
419 }
420
421 /*@
422 KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
423
424 Logically Collective on ksp
425
426 Input Parameters:
427 + ksp - iterative context obtained from KSPCreate
428 - ell - number of search directions
429
430 Options Database Keys:
431
432 . -ksp_bcgsl_ell ell
433
434 Level: intermediate
435
436 Notes:
437 For large ell it is common for the polynomial update problem to become singular (due to happy breakdown for smallish
438 test problems, but also for larger problems). Consequently, by default, the system is solved by pseudoinverse, which
439 allows the iteration to complete successfully. See KSPBCGSLSetUsePseudoinverse() to switch to a conventional solve.
440
441 .seealso: KSPBCGSLSetUsePseudoinverse(), KSP, KSPBCGSL
442 @*/
KSPBCGSLSetEll(KSP ksp,PetscInt ell)443 PetscErrorCode KSPBCGSLSetEll(KSP ksp, PetscInt ell)
444 {
445 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
446 PetscErrorCode ierr;
447
448 PetscFunctionBegin;
449 if (ell < 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
450 PetscValidLogicalCollectiveInt(ksp,ell,2);
451
452 if (!ksp->setupstage) bcgsl->ell = ell;
453 else if (bcgsl->ell != ell) {
454 /* free the data structures, then create them again */
455 ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
456 ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
457 ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);
458
459 bcgsl->ell = ell;
460 ksp->setupstage = KSP_SETUP_NEW;
461 }
462 PetscFunctionReturn(0);
463 }
464
KSPView_BCGSL(KSP ksp,PetscViewer viewer)465 PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
466 {
467 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
468 PetscErrorCode ierr;
469 PetscBool isascii;
470
471 PetscFunctionBegin;
472 ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
473
474 if (isascii) {
475 ierr = PetscViewerASCIIPrintf(viewer, " Ell = %D\n", bcgsl->ell);CHKERRQ(ierr);
476 ierr = PetscViewerASCIIPrintf(viewer, " Delta = %lg\n", bcgsl->delta);CHKERRQ(ierr);
477 }
478 PetscFunctionReturn(0);
479 }
480
KSPSetFromOptions_BCGSL(PetscOptionItems * PetscOptionsObject,KSP ksp)481 PetscErrorCode KSPSetFromOptions_BCGSL(PetscOptionItems *PetscOptionsObject,KSP ksp)
482 {
483 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
484 PetscErrorCode ierr;
485 PetscInt this_ell;
486 PetscReal delta;
487 PetscBool flga = PETSC_FALSE, flg;
488
489 PetscFunctionBegin;
490 /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
491 don't need to be called here.
492 */
493 ierr = PetscOptionsHead(PetscOptionsObject,"KSP BiCGStab(L) Options");CHKERRQ(ierr);
494
495 /* Set number of search directions */
496 ierr = PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);CHKERRQ(ierr);
497 if (flg) {
498 ierr = KSPBCGSLSetEll(ksp, this_ell);CHKERRQ(ierr);
499 }
500
501 /* Set polynomial type */
502 ierr = PetscOptionsBool("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga,&flga,NULL);CHKERRQ(ierr);
503 if (flga) {
504 ierr = KSPBCGSLSetPol(ksp, PETSC_TRUE);CHKERRQ(ierr);
505 } else {
506 flg = PETSC_FALSE;
507 ierr = PetscOptionsBool("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg,&flg,NULL);CHKERRQ(ierr);
508 ierr = KSPBCGSLSetPol(ksp, PETSC_FALSE);CHKERRQ(ierr);
509 }
510
511 /* Will computed residual be refreshed? */
512 ierr = PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);CHKERRQ(ierr);
513 if (flg) {
514 ierr = KSPBCGSLSetXRes(ksp, delta);CHKERRQ(ierr);
515 }
516
517 /* Use pseudoinverse? */
518 flg = bcgsl->pinv;
519 ierr = PetscOptionsBool("-ksp_bcgsl_pinv", "Polynomial correction via pseudoinverse", "KSPBCGSLSetUsePseudoinverse",flg,&flg,NULL);CHKERRQ(ierr);
520 ierr = KSPBCGSLSetUsePseudoinverse(ksp,flg);CHKERRQ(ierr);
521 ierr = PetscOptionsTail();CHKERRQ(ierr);
522 PetscFunctionReturn(0);
523 }
524
KSPSetUp_BCGSL(KSP ksp)525 PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
526 {
527 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
528 PetscInt ell = bcgsl->ell,ldMZ = ell+1;
529 PetscErrorCode ierr;
530
531 PetscFunctionBegin;
532 ierr = KSPSetWorkVecs(ksp, 6+2*ell);CHKERRQ(ierr);
533 ierr = PetscMalloc5(ldMZ,&AY0c,ldMZ,&AYlc,ldMZ,&AYtc,ldMZ*ldMZ,&MZa,ldMZ*ldMZ,&MZb);CHKERRQ(ierr);
534 ierr = PetscBLASIntCast(5*ell,&bcgsl->lwork);CHKERRQ(ierr);
535 ierr = PetscMalloc5(bcgsl->lwork,&bcgsl->work,ell,&bcgsl->s,ell*ell,&bcgsl->u,ell*ell,&bcgsl->v,5*ell,&bcgsl->realwork);CHKERRQ(ierr);
536 PetscFunctionReturn(0);
537 }
538
KSPReset_BCGSL(KSP ksp)539 PetscErrorCode KSPReset_BCGSL(KSP ksp)
540 {
541 KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
542 PetscErrorCode ierr;
543
544 PetscFunctionBegin;
545 ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
546 ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
547 ierr = PetscFree5(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v,bcgsl->realwork);CHKERRQ(ierr);
548 PetscFunctionReturn(0);
549 }
550
KSPDestroy_BCGSL(KSP ksp)551 PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
552 {
553 PetscErrorCode ierr;
554
555 PetscFunctionBegin;
556 ierr = KSPReset_BCGSL(ksp);CHKERRQ(ierr);
557 ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr);
558 PetscFunctionReturn(0);
559 }
560
561 /*MC
562 KSPBCGSL - Implements a slight variant of the Enhanced
563 BiCGStab(L) algorithm in (3) and (2). The variation
564 concerns cases when either kappa0**2 or kappa1**2 is
565 negative due to round-off. Kappa0 has also been pulled
566 out of the denominator in the formula for ghat.
567
568 References:
569 + 1. - G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
570 approaches for the stable computation of hybrid BiCG
571 methods", Applied Numerical Mathematics: Transactions
572 f IMACS, 19(3), 1996.
573 . 2. - G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
574 "BiCGStab(L) and other hybrid BiCG methods",
575 Numerical Algorithms, 7, 1994.
576 - 3. - D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
577 for solving linear systems of equations", preprint
578 from www.citeseer.com.
579
580 Contributed by: Joel M. Malard, email jm.malard@pnl.gov
581
582 Options Database Keys:
583 + -ksp_bcgsl_ell <ell> Number of Krylov search directions, defaults to 2 -- KSPBCGSLSetEll()
584 . -ksp_bcgsl_cxpol - Use a convex function of the MinRes and OR polynomials after the BiCG step instead of default MinRes -- KSPBCGSLSetPol()
585 . -ksp_bcgsl_mrpoly - Use the default MinRes polynomial after the BiCG step -- KSPBCGSLSetPol()
586 . -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals -- KSPBCGSLSetXRes()
587 - -ksp_bcgsl_pinv <true/false> - (de)activate use of pseudoinverse -- KSPBCGSLSetUsePseudoinverse()
588
589 Level: beginner
590
591 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS, KSPSetPCSide(), KSPBCGSLSetEll(), KSPBCGSLSetXRes()
592
593 M*/
KSPCreate_BCGSL(KSP ksp)594 PETSC_EXTERN PetscErrorCode KSPCreate_BCGSL(KSP ksp)
595 {
596 PetscErrorCode ierr;
597 KSP_BCGSL *bcgsl;
598
599 PetscFunctionBegin;
600 /* allocate BiCGStab(L) context */
601 ierr = PetscNewLog(ksp,&bcgsl);CHKERRQ(ierr);
602 ksp->data = (void*)bcgsl;
603
604 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);CHKERRQ(ierr);
605 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);CHKERRQ(ierr);
606 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr);
607
608 ksp->ops->setup = KSPSetUp_BCGSL;
609 ksp->ops->solve = KSPSolve_BCGSL;
610 ksp->ops->reset = KSPReset_BCGSL;
611 ksp->ops->destroy = KSPDestroy_BCGSL;
612 ksp->ops->buildsolution = KSPBuildSolutionDefault;
613 ksp->ops->buildresidual = KSPBuildResidualDefault;
614 ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
615 ksp->ops->view = KSPView_BCGSL;
616
617 /* Let the user redefine the number of directions vectors */
618 bcgsl->ell = 2;
619
620 /*Choose between a single MR step or an averaged MR/OR */
621 bcgsl->bConvex = PETSC_FALSE;
622
623 bcgsl->pinv = PETSC_TRUE; /* Use the reliable method by default */
624
625 /* Set the threshold for when exact residuals will be used */
626 bcgsl->delta = 0.0;
627 PetscFunctionReturn(0);
628 }
629