1 /*
2 Contributed by Sascha M. Schnepp and Patrick Sanan
3 */
4
5 #include "petscsys.h"
6 #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h> /*I "petscksp.h" I*/
7
8 static PetscBool cited = PETSC_FALSE;
9 static const char citation[] =
10 "@article{SSM2016,\n"
11 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
12 " title = {Pipelined, Flexible Krylov Subspace Methods},\n"
13 " journal = {SIAM Journal on Scientific Computing},\n"
14 " volume = {38},\n"
15 " number = {5},\n"
16 " pages = {C441-C470},\n"
17 " year = {2016},\n"
18 " doi = {10.1137/15M1049130},\n"
19 " URL = {http://dx.doi.org/10.1137/15M1049130},\n"
20 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
21 "}\n";
22
23 #define KSPPIPEGCR_DEFAULT_MMAX 15
24 #define KSPPIPEGCR_DEFAULT_NPREALLOC 5
25 #define KSPPIPEGCR_DEFAULT_VECB 5
26 #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
27 #define KSPPIPEGCR_DEFAULT_UNROLL_W PETSC_TRUE
28
29 #include <petscksp.h>
30
KSPAllocateVectors_PIPEGCR(KSP ksp,PetscInt nvecsneeded,PetscInt chunksize)31 static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
32 {
33 PetscErrorCode ierr;
34 PetscInt i;
35 KSP_PIPEGCR *pipegcr;
36 PetscInt nnewvecs, nvecsprev;
37
38 PetscFunctionBegin;
39 pipegcr = (KSP_PIPEGCR*)ksp->data;
40
41 /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
42 if (pipegcr->nvecs < PetscMin(pipegcr->mmax+1,nvecsneeded)){
43 nvecsprev = pipegcr->nvecs;
44 nnewvecs = PetscMin(PetscMax(nvecsneeded-pipegcr->nvecs,chunksize),pipegcr->mmax+1-pipegcr->nvecs);
45 ierr = KSPCreateVecs(ksp,nnewvecs,&pipegcr->ppvecs[pipegcr->nchunks],0,NULL);CHKERRQ(ierr);
46 ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->ppvecs[pipegcr->nchunks]);CHKERRQ(ierr);
47 ierr = KSPCreateVecs(ksp,nnewvecs,&pipegcr->psvecs[pipegcr->nchunks],0,NULL);CHKERRQ(ierr);
48 ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->psvecs[pipegcr->nchunks]);CHKERRQ(ierr);
49 ierr = KSPCreateVecs(ksp,nnewvecs,&pipegcr->pqvecs[pipegcr->nchunks],0,NULL);CHKERRQ(ierr);
50 ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->pqvecs[pipegcr->nchunks]);CHKERRQ(ierr);
51 if (pipegcr->unroll_w) {
52 ierr = KSPCreateVecs(ksp,nnewvecs,&pipegcr->ptvecs[pipegcr->nchunks],0,NULL);CHKERRQ(ierr);
53 ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipegcr->ptvecs[pipegcr->nchunks]);CHKERRQ(ierr);
54 }
55 pipegcr->nvecs += nnewvecs;
56 for (i=0;i<nnewvecs;i++){
57 pipegcr->qvecs[nvecsprev+i] = pipegcr->pqvecs[pipegcr->nchunks][i];
58 pipegcr->pvecs[nvecsprev+i] = pipegcr->ppvecs[pipegcr->nchunks][i];
59 pipegcr->svecs[nvecsprev+i] = pipegcr->psvecs[pipegcr->nchunks][i];
60 if (pipegcr->unroll_w) {
61 pipegcr->tvecs[nvecsprev+i] = pipegcr->ptvecs[pipegcr->nchunks][i];
62 }
63 }
64 pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
65 pipegcr->nchunks++;
66 }
67 PetscFunctionReturn(0);
68 }
69
KSPSolve_PIPEGCR_cycle(KSP ksp)70 static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
71 {
72 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
73 PetscErrorCode ierr;
74 Mat A, B;
75 Vec x,r,b,z,w,m,n,p,s,q,t,*redux;
76 PetscInt i,j,k,idx,kdx,mi;
77 PetscScalar alpha=0.0,gamma,*betas,*dots;
78 PetscReal rnorm=0.0, delta,*eta,*etas;
79
80 PetscFunctionBegin;
81
82 /* !!PS We have not checked these routines for use with complex numbers. The inner products
83 are likely not defined correctly for that case */
84 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
85 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEGCR has not been implemented for use with complex scalars");
86 #endif
87
88 ierr = KSPGetOperators(ksp, &A, &B);CHKERRQ(ierr);
89 x = ksp->vec_sol;
90 b = ksp->vec_rhs;
91 r = ksp->work[0];
92 z = ksp->work[1];
93 w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
94 m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r)) (pipelining intermediate) */
95 n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
96 p = pipegcr->pvecs[0];
97 s = pipegcr->svecs[0];
98 q = pipegcr->qvecs[0];
99 t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;
100
101 redux = pipegcr->redux;
102 dots = pipegcr->dots;
103 etas = pipegcr->etas;
104 betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
105
106 /* cycle initial residual */
107 ierr = KSP_MatMult(ksp,A,x,r);CHKERRQ(ierr);
108 ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); /* r <- b - Ax */
109 ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr); /* z <- B(r) */
110 ierr = KSP_MatMult(ksp,A,z,w);CHKERRQ(ierr); /* w <- Az */
111
112 /* initialization of other variables and pipelining intermediates */
113 ierr = VecCopy(z,p);CHKERRQ(ierr);
114 ierr = KSP_MatMult(ksp,A,p,s);CHKERRQ(ierr);
115
116 /* overlap initial computation of delta, gamma */
117 redux[0] = w;
118 redux[1] = r;
119 ierr = VecMDotBegin(w,2,redux,dots);CHKERRQ(ierr); /* Start split reductions for gamma = (w,r), delta = (w,w) */
120 ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s));CHKERRQ(ierr); /* perform asynchronous reduction */
121 ierr = KSP_PCApply(ksp,s,q);CHKERRQ(ierr); /* q = B(s) */
122 if (pipegcr->unroll_w) {
123 ierr = KSP_MatMult(ksp,A,q,t);CHKERRQ(ierr); /* t = Aq */
124 }
125 ierr = VecMDotEnd(w,2,redux,dots);CHKERRQ(ierr); /* Finish split reduction */
126 delta = PetscRealPart(dots[0]);
127 etas[0] = delta;
128 gamma = dots[1];
129 alpha = gamma/delta;
130
131 i = 0;
132 do {
133 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
134 ksp->its++;
135 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
136
137 /* update solution, residuals, .. */
138 ierr = VecAXPY(x,+alpha,p);CHKERRQ(ierr);
139 ierr = VecAXPY(r,-alpha,s);CHKERRQ(ierr);
140 ierr = VecAXPY(z,-alpha,q);CHKERRQ(ierr);
141 if (pipegcr->unroll_w){
142 ierr = VecAXPY(w,-alpha,t);CHKERRQ(ierr);
143 } else {
144 ierr = KSP_MatMult(ksp,A,z,w);CHKERRQ(ierr);
145 }
146
147 /* Computations of current iteration done */
148 i++;
149
150 if (pipegcr->modifypc) {
151 ierr = (*pipegcr->modifypc)(ksp,ksp->its,ksp->rnorm,pipegcr->modifypc_ctx);CHKERRQ(ierr);
152 }
153
154 /* If needbe, allocate a new chunk of vectors */
155 ierr = KSPAllocateVectors_PIPEGCR(ksp,i+1,pipegcr->vecb);CHKERRQ(ierr);
156
157 /* Note that we wrap around and start clobbering old vectors */
158 idx = i % (pipegcr->mmax+1);
159 p = pipegcr->pvecs[idx];
160 s = pipegcr->svecs[idx];
161 q = pipegcr->qvecs[idx];
162 if (pipegcr->unroll_w) {
163 t = pipegcr->tvecs[idx];
164 }
165 eta = pipegcr->etas+idx;
166
167 /* number of old directions to orthogonalize against */
168 switch(pipegcr->truncstrat){
169 case KSP_FCD_TRUNC_TYPE_STANDARD:
170 mi = pipegcr->mmax;
171 break;
172 case KSP_FCD_TRUNC_TYPE_NOTAY:
173 mi = ((i-1) % pipegcr->mmax)+1;
174 break;
175 default:
176 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unrecognized Truncation Strategy");
177 }
178
179 /* Pick old p,s,q,zeta in a way suitable for VecMDot */
180 for (k=PetscMax(0,i-mi),j=0;k<i;j++,k++){
181 kdx = k % (pipegcr->mmax+1);
182 pipegcr->pold[j] = pipegcr->pvecs[kdx];
183 pipegcr->sold[j] = pipegcr->svecs[kdx];
184 pipegcr->qold[j] = pipegcr->qvecs[kdx];
185 if (pipegcr->unroll_w) {
186 pipegcr->told[j] = pipegcr->tvecs[kdx];
187 }
188 redux[j] = pipegcr->svecs[kdx];
189 }
190 /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
191 redux[j] = r;
192 redux[j+1] = w;
193
194 /* Dot products */
195 /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
196 ierr = VecMDotBegin(w,j+2,redux,dots);CHKERRQ(ierr);
197 ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w));CHKERRQ(ierr);
198
199 /* B(w-r) + u stabilization */
200 ierr = VecWAXPY(n,-1.0,r,w);CHKERRQ(ierr); /* m = u + B(w-r): (a) ntmp = w-r */
201 ierr = KSP_PCApply(ksp,n,m);CHKERRQ(ierr); /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
202 ierr = VecAXPY(m,1.0,z);CHKERRQ(ierr); /* m = u + B(w-r): (c) m = z + mtmp */
203 if (pipegcr->unroll_w){
204 ierr = KSP_MatMult(ksp,A,m,n);CHKERRQ(ierr); /* n = Am */
205 }
206
207 /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
208 ierr = VecMDotEnd(w,j+2,redux,dots);CHKERRQ(ierr);
209 gamma = dots[j];
210 delta = PetscRealPart(dots[j+1]);
211
212 /* compute new residual norm.
213 this cannot be done before this point so that the natural norm
214 is available for free and the communication involved is overlapped */
215 switch (ksp->normtype) {
216 case KSP_NORM_PRECONDITIONED:
217 ierr = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr); /* ||r|| <- sqrt(z'*z) */
218 break;
219 case KSP_NORM_UNPRECONDITIONED:
220 ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); /* ||r|| <- sqrt(r'*r) */
221 break;
222 case KSP_NORM_NATURAL:
223 rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
224 break;
225 case KSP_NORM_NONE:
226 rnorm = 0.0;
227 break;
228 default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
229 }
230
231 /* Check for convergence */
232 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
233 ksp->rnorm = rnorm;
234 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
235 KSPLogResidualHistory(ksp,rnorm);CHKERRQ(ierr);
236 ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr);
237 ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
238 if (ksp->reason) PetscFunctionReturn(0);
239
240 /* compute new eta and scale beta */
241 *eta = 0.;
242 for (k=PetscMax(0,i-mi),j=0;k<i;j++,k++){
243 kdx = k % (pipegcr->mmax+1);
244 betas[j] /= -etas[kdx]; /* betak /= etak */
245 *eta -= ((PetscReal)(PetscAbsScalar(betas[j])*PetscAbsScalar(betas[j]))) * etas[kdx];
246 /* etaitmp = -betaik^2 * etak */
247 }
248 *eta += delta; /* etai = delta -betaik^2 * etak */
249
250 /* check breakdown of eta = (s,s) */
251 if (*eta < 0.) {
252 pipegcr->norm_breakdown = PETSC_TRUE;
253 ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr);
254 break;
255 } else {
256 alpha= gamma/(*eta); /* alpha = gamma/etai */
257 }
258
259 /* project out stored search directions using classical G-S */
260 ierr = VecCopy(z,p);CHKERRQ(ierr);
261 ierr = VecCopy(w,s);CHKERRQ(ierr);
262 ierr = VecCopy(m,q);CHKERRQ(ierr);
263 if (pipegcr->unroll_w) {
264 ierr = VecCopy(n,t);CHKERRQ(ierr);
265 ierr = VecMAXPY(t,j,betas,pipegcr->told);CHKERRQ(ierr); /* ti <- n - sum_k beta_k t_k */
266 }
267 ierr = VecMAXPY(p,j,betas,pipegcr->pold);CHKERRQ(ierr); /* pi <- ui - sum_k beta_k p_k */
268 ierr = VecMAXPY(s,j,betas,pipegcr->sold);CHKERRQ(ierr); /* si <- wi - sum_k beta_k s_k */
269 ierr = VecMAXPY(q,j,betas,pipegcr->qold);CHKERRQ(ierr); /* qi <- m - sum_k beta_k q_k */
270
271 } while (ksp->its < ksp->max_it);
272 if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
273 PetscFunctionReturn(0);
274 }
275
KSPSolve_PIPEGCR(KSP ksp)276 static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
277 {
278 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
279 PetscErrorCode ierr;
280 Mat A, B;
281 Vec x,b,r,z,w;
282 PetscScalar gamma;
283 PetscReal rnorm=0.0;
284 PetscBool issym;
285
286 PetscFunctionBegin;
287 ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr);
288
289 ierr = KSPGetOperators(ksp, &A, &B);CHKERRQ(ierr);
290 x = ksp->vec_sol;
291 b = ksp->vec_rhs;
292 r = ksp->work[0];
293 z = ksp->work[1];
294 w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
295
296 /* compute initial residual */
297 if (!ksp->guess_zero) {
298 ierr = KSP_MatMult(ksp,A,x,r);CHKERRQ(ierr);
299 ierr = VecAYPX(r,-1.0,b);CHKERRQ(ierr); /* r <- b - Ax */
300 } else {
301 ierr = VecCopy(b,r);CHKERRQ(ierr); /* r <- b */
302 }
303
304 /* initial residual norm */
305 ierr = KSP_PCApply(ksp,r,z);CHKERRQ(ierr); /* z <- B(r) */
306 ierr = KSP_MatMult(ksp,A,z,w);CHKERRQ(ierr); /* w <- Az */
307 ierr = VecDot(r,w,&gamma);CHKERRQ(ierr); /* gamma = (r,w) */
308
309 switch (ksp->normtype) {
310 case KSP_NORM_PRECONDITIONED:
311 ierr = VecNorm(z,NORM_2,&rnorm);CHKERRQ(ierr); /* ||r|| <- sqrt(z'*z) */
312 break;
313 case KSP_NORM_UNPRECONDITIONED:
314 ierr = VecNorm(r,NORM_2,&rnorm);CHKERRQ(ierr); /* ||r|| <- sqrt(r'*r) */
315 break;
316 case KSP_NORM_NATURAL:
317 rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
318 break;
319 case KSP_NORM_NONE:
320 rnorm = 0.0;
321 break;
322 default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
323 }
324
325 /* Is A symmetric? */
326 ierr = PetscObjectTypeCompareAny((PetscObject)A,&issym,MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
327 if (!issym) {
328 ierr = PetscInfo(A,"Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?");CHKERRQ(ierr);
329 }
330
331 /* logging */
332 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
333 ksp->its = 0;
334 ksp->rnorm0 = rnorm;
335 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
336 ierr = KSPLogResidualHistory(ksp,ksp->rnorm0);CHKERRQ(ierr);
337 ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm0);CHKERRQ(ierr);
338 ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm0,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
339 if (ksp->reason) PetscFunctionReturn(0);
340
341 do {
342 ierr = KSPSolve_PIPEGCR_cycle(ksp);CHKERRQ(ierr);
343 if (ksp->reason) PetscFunctionReturn(0);
344 if (pipegcr->norm_breakdown) {
345 pipegcr->n_restarts++;
346 pipegcr->norm_breakdown = PETSC_FALSE;
347 }
348 } while (ksp->its < ksp->max_it);
349
350 if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
351 PetscFunctionReturn(0);
352 }
353
KSPView_PIPEGCR(KSP ksp,PetscViewer viewer)354 static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
355 {
356 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
357 PetscErrorCode ierr;
358 PetscBool isascii,isstring;
359 const char *truncstr;
360
361 PetscFunctionBegin;
362 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
363 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
364
365 if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD){
366 truncstr = "Using standard truncation strategy";
367 } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY){
368 truncstr = "Using Notay's truncation strategy";
369 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Undefined FCD truncation strategy");
370
371
372 if (isascii) {
373 ierr = PetscViewerASCIIPrintf(viewer," max previous directions = %D\n",pipegcr->mmax);CHKERRQ(ierr);
374 ierr = PetscViewerASCIIPrintf(viewer," preallocated %D directions\n",PetscMin(pipegcr->nprealloc,pipegcr->mmax+1));CHKERRQ(ierr);
375 ierr = PetscViewerASCIIPrintf(viewer," %s\n",truncstr);CHKERRQ(ierr);
376 ierr = PetscViewerASCIIPrintf(viewer," w unrolling = %D \n", pipegcr->unroll_w);CHKERRQ(ierr);
377 ierr = PetscViewerASCIIPrintf(viewer," restarts performed = %D \n", pipegcr->n_restarts);CHKERRQ(ierr);
378 } else if (isstring) {
379 ierr = PetscViewerStringSPrintf(viewer, "max previous directions = %D, preallocated %D directions, %s truncation strategy", pipegcr->mmax,pipegcr->nprealloc,truncstr);CHKERRQ(ierr);
380 }
381 PetscFunctionReturn(0);
382 }
383
384
KSPSetUp_PIPEGCR(KSP ksp)385 static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
386 {
387 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
388 PetscErrorCode ierr;
389 Mat A;
390 PetscBool diagonalscale;
391 const PetscInt nworkstd = 5;
392
393 PetscFunctionBegin;
394 ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr);
395 if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
396
397 ierr = KSPGetOperators(ksp, &A, NULL);CHKERRQ(ierr);
398
399 /* Allocate "standard" work vectors */
400 ierr = KSPSetWorkVecs(ksp,nworkstd);CHKERRQ(ierr);
401
402 /* Allocated space for pointers to additional work vectors
403 note that mmax is the number of previous directions, so we add 1 for the current direction */
404 ierr = PetscMalloc6(pipegcr->mmax+1,&(pipegcr->pvecs),pipegcr->mmax+1,&(pipegcr->ppvecs),pipegcr->mmax+1,&(pipegcr->svecs), pipegcr->mmax+1,&(pipegcr->psvecs),pipegcr->mmax+1,&(pipegcr->qvecs),pipegcr->mmax+1,&(pipegcr->pqvecs));CHKERRQ(ierr);
405 if (pipegcr->unroll_w) {
406 ierr = PetscMalloc3(pipegcr->mmax+1,&(pipegcr->tvecs),pipegcr->mmax+1,&(pipegcr->ptvecs),pipegcr->mmax+2,&(pipegcr->told));CHKERRQ(ierr);
407 }
408 ierr = PetscMalloc4(pipegcr->mmax+2,&(pipegcr->pold),pipegcr->mmax+2,&(pipegcr->sold),pipegcr->mmax+2,&(pipegcr->qold),pipegcr->mmax+2,&(pipegcr->chunksizes));CHKERRQ(ierr);
409 ierr = PetscMalloc3(pipegcr->mmax+2,&(pipegcr->dots),pipegcr->mmax+1,&(pipegcr->etas),pipegcr->mmax+2,&(pipegcr->redux));CHKERRQ(ierr);
410 /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
411 if (pipegcr->nprealloc > pipegcr->mmax+1){
412 ierr = PetscInfo2(NULL,"Requested nprealloc=%d is greater than m_max+1=%d. Resetting nprealloc = m_max+1.\n",pipegcr->nprealloc, pipegcr->mmax+1);CHKERRQ(ierr);
413 }
414
415 /* Preallocate additional work vectors */
416 ierr = KSPAllocateVectors_PIPEGCR(ksp,pipegcr->nprealloc,pipegcr->nprealloc);CHKERRQ(ierr);
417
418 ierr = PetscLogObjectMemory(
419 (PetscObject)ksp,
420 (pipegcr->mmax + 1) * 4 * sizeof(Vec*) + /* old dirs */
421 (pipegcr->mmax + 1) * 4 * sizeof(Vec**) + /* old pdirs */
422 (pipegcr->mmax + 1) * 4 * sizeof(Vec*) + /* p/s/qold/told */
423 (pipegcr->mmax + 1) * sizeof(PetscInt) + /* chunksizes */
424 (pipegcr->mmax + 2) * sizeof(Vec*) + /* redux */
425 (pipegcr->mmax + 2) * sizeof(PetscScalar) + /* dots */
426 (pipegcr->mmax + 1) * sizeof(PetscReal) /* etas */
427 );CHKERRQ(ierr);
428 PetscFunctionReturn(0);
429 }
430
KSPReset_PIPEGCR(KSP ksp)431 static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
432 {
433 PetscErrorCode ierr;
434 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
435
436 PetscFunctionBegin;
437 if (pipegcr->modifypc_destroy) {
438 ierr = (*pipegcr->modifypc_destroy)(pipegcr->modifypc_ctx);CHKERRQ(ierr);
439 }
440 PetscFunctionReturn(0);
441 }
442
KSPDestroy_PIPEGCR(KSP ksp)443 static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
444 {
445 PetscErrorCode ierr;
446 PetscInt i;
447 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
448
449 PetscFunctionBegin;
450 VecDestroyVecs(ksp->nwork,&ksp->work); /* Destroy "standard" work vecs */
451
452 /* Destroy vectors for old directions and the arrays that manage pointers to them */
453 if (pipegcr->nvecs){
454 for (i=0;i<pipegcr->nchunks;i++){
455 ierr = VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->ppvecs[i]);CHKERRQ(ierr);
456 ierr = VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->psvecs[i]);CHKERRQ(ierr);
457 ierr = VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->pqvecs[i]);CHKERRQ(ierr);
458 if (pipegcr->unroll_w) {
459 ierr = VecDestroyVecs(pipegcr->chunksizes[i],&pipegcr->ptvecs[i]);CHKERRQ(ierr);
460 }
461 }
462 }
463
464 ierr = PetscFree6(pipegcr->pvecs,pipegcr->ppvecs,pipegcr->svecs,pipegcr->psvecs,pipegcr->qvecs,pipegcr->pqvecs);CHKERRQ(ierr);
465 ierr = PetscFree4(pipegcr->pold,pipegcr->sold,pipegcr->qold,pipegcr->chunksizes);CHKERRQ(ierr);
466 ierr = PetscFree3(pipegcr->dots,pipegcr->etas,pipegcr->redux);CHKERRQ(ierr);
467 if (pipegcr->unroll_w) {
468 ierr = PetscFree3(pipegcr->tvecs,pipegcr->ptvecs,pipegcr->told);CHKERRQ(ierr);
469 }
470
471 ierr = KSPReset_PIPEGCR(ksp);CHKERRQ(ierr);
472 ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr);
473 PetscFunctionReturn(0);
474 }
475
476 /*@
477 KSPPIPEGCRSetUnrollW - Set to PETSC_TRUE to use PIPEGCR with unrolling of the w vector
478
479 Logically Collective on ksp
480
481 Input Parameters:
482 + ksp - the Krylov space context
483 - unroll_w - use unrolling
484
485 Level: intermediate
486
487 Options Database:
488 . -ksp_pipegcr_unroll_w
489
490 .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType(), KSPPIPEGCRSetNprealloc(),KSPPIPEGCRGetUnrollW()
491 @*/
KSPPIPEGCRSetUnrollW(KSP ksp,PetscBool unroll_w)492 PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp,PetscBool unroll_w)
493 {
494 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
495
496 PetscFunctionBegin;
497 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
498 PetscValidLogicalCollectiveBool(ksp,unroll_w,2);
499 pipegcr->unroll_w=unroll_w;
500 PetscFunctionReturn(0);
501 }
502
503 /*@
504 KSPPIPEGCRGetUnrollW - Get information on PIPEGCR unrolling the w vector
505
506 Logically Collective on ksp
507
508 Input Parameter:
509 . ksp - the Krylov space context
510
511 Output Parameter:
512 . unroll_w - PIPEGCR uses unrolling (bool)
513
514 Level: intermediate
515
516 Options Database:
517 . -ksp_pipegcr_unroll_w
518
519 .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRGetNprealloc(),KSPPIPEGCRSetUnrollW()
520 @*/
KSPPIPEGCRGetUnrollW(KSP ksp,PetscBool * unroll_w)521 PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp,PetscBool *unroll_w)
522 {
523 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
524
525 PetscFunctionBegin;
526 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
527 *unroll_w=pipegcr->unroll_w;
528 PetscFunctionReturn(0);
529 }
530
531 /*@
532 KSPPIPEGCRSetMmax - set the maximum number of previous directions PIPEGCR will store for orthogonalization
533
534 Note: mmax + 1 directions are stored (mmax previous ones along with a current one)
535 and whether all are used in each iteration also depends on the truncation strategy
536 (see KSPPIPEGCRSetTruncationType)
537
538 Logically Collective on ksp
539
540 Input Parameters:
541 + ksp - the Krylov space context
542 - mmax - the maximum number of previous directions to orthogonalize againt
543
544 Level: intermediate
545
546 Options Database:
547 . -ksp_pipegcr_mmax <N>
548
549 .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType(), KSPPIPEGCRSetNprealloc()
550 @*/
KSPPIPEGCRSetMmax(KSP ksp,PetscInt mmax)551 PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp,PetscInt mmax)
552 {
553 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
554
555 PetscFunctionBegin;
556 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
557 PetscValidLogicalCollectiveEnum(ksp,mmax,2);
558 pipegcr->mmax=mmax;
559 PetscFunctionReturn(0);
560 }
561
562 /*@
563 KSPPIPEGCRGetMmax - get the maximum number of previous directions PIPEGCR will store
564
565 Note: PIPEGCR stores mmax+1 directions at most (mmax previous ones, and one current one)
566
567 Not Collective
568
569 Input Parameter:
570 . ksp - the Krylov space context
571
572 Output Parameter:
573 . mmax - the maximum number of previous directons allowed for orthogonalization
574
575 Options Database:
576 . -ksp_pipegcr_mmax <N>
577
578 Level: intermediate
579
580 .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRGetNprealloc(), KSPPIPEGCRSetMmax()
581 @*/
582
KSPPIPEGCRGetMmax(KSP ksp,PetscInt * mmax)583 PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp,PetscInt *mmax)
584 {
585 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
586
587 PetscFunctionBegin;
588 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
589 *mmax=pipegcr->mmax;
590 PetscFunctionReturn(0);
591 }
592
593 /*@
594 KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with PIPEGCR
595
596 Logically Collective on ksp
597
598 Input Parameters:
599 + ksp - the Krylov space context
600 - nprealloc - the number of vectors to preallocate
601
602 Level: advanced
603
604 Options Database:
605 . -ksp_pipegcr_nprealloc <N>
606
607 .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRGetNprealloc()
608 @*/
KSPPIPEGCRSetNprealloc(KSP ksp,PetscInt nprealloc)609 PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp,PetscInt nprealloc)
610 {
611 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
612
613 PetscFunctionBegin;
614 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
615 PetscValidLogicalCollectiveEnum(ksp,nprealloc,2);
616 pipegcr->nprealloc = nprealloc;
617 PetscFunctionReturn(0);
618 }
619
620 /*@
621 KSPPIPEGCRGetNprealloc - get the number of directions preallocate by PIPEGCR
622
623 Not Collective
624
625 Input Parameter:
626 . ksp - the Krylov space context
627
628 Output Parameter:
629 . nprealloc - the number of directions preallocated
630
631 Options Database:
632 . -ksp_pipegcr_nprealloc <N>
633
634 Level: advanced
635
636 .seealso: KSPPIPEGCR, KSPPIPEGCRGetTruncationType(), KSPPIPEGCRSetNprealloc()
637 @*/
KSPPIPEGCRGetNprealloc(KSP ksp,PetscInt * nprealloc)638 PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp,PetscInt *nprealloc)
639 {
640 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
641
642 PetscFunctionBegin;
643 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
644 *nprealloc = pipegcr->nprealloc;
645 PetscFunctionReturn(0);
646 }
647
648 /*@
649 KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions PIPEGCR uses during orthoganalization
650
651 Logically Collective on ksp
652
653 KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
654 KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
655
656 Input Parameters:
657 + ksp - the Krylov space context
658 - truncstrat - the choice of strategy
659
660 Level: intermediate
661
662 Options Database:
663 . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
664
665 .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType, KSPPIPEGCRTruncationType, KSPFCDTruncationType
666 @*/
KSPPIPEGCRSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)667 PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)
668 {
669 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
670
671 PetscFunctionBegin;
672 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
673 PetscValidLogicalCollectiveEnum(ksp,truncstrat,2);
674 pipegcr->truncstrat=truncstrat;
675 PetscFunctionReturn(0);
676 }
677
678 /*@
679 KSPPIPEGCRGetTruncationType - get the truncation strategy employed by PIPEGCR
680
681 Not Collective
682
683 KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
684 KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
685
686 Input Parameter:
687 . ksp - the Krylov space context
688
689 Output Parameter:
690 . truncstrat - the strategy type
691
692 Options Database:
693 . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
694
695 Level: intermediate
696
697 .seealso: KSPPIPEGCR, KSPPIPEGCRSetTruncationType, KSPPIPEGCRTruncationType, KSPFCDTruncationType
698 @*/
KSPPIPEGCRGetTruncationType(KSP ksp,KSPFCDTruncationType * truncstrat)699 PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp,KSPFCDTruncationType *truncstrat)
700 {
701 KSP_PIPEGCR *pipegcr=(KSP_PIPEGCR*)ksp->data;
702
703 PetscFunctionBegin;
704 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
705 *truncstrat=pipegcr->truncstrat;
706 PetscFunctionReturn(0);
707 }
708
KSPSetFromOptions_PIPEGCR(PetscOptionItems * PetscOptionsObject,KSP ksp)709 static PetscErrorCode KSPSetFromOptions_PIPEGCR(PetscOptionItems *PetscOptionsObject,KSP ksp)
710 {
711 PetscErrorCode ierr;
712 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
713 PetscInt mmax,nprealloc;
714 PetscBool flg;
715
716 PetscFunctionBegin;
717 ierr = PetscOptionsHead(PetscOptionsObject,"KSP PIPEGCR options");CHKERRQ(ierr);
718 ierr = PetscOptionsInt("-ksp_pipegcr_mmax","Number of search directions to storue","KSPPIPEGCRSetMmax",pipegcr->mmax,&mmax,&flg);CHKERRQ(ierr);
719 if (flg) ierr = KSPPIPEGCRSetMmax(ksp,mmax);CHKERRQ(ierr);
720 ierr = PetscOptionsInt("-ksp_pipegcr_nprealloc","Number of directions to preallocate","KSPPIPEGCRSetNprealloc",pipegcr->nprealloc,&nprealloc,&flg);CHKERRQ(ierr);
721 if (flg) { ierr = KSPPIPEGCRSetNprealloc(ksp,nprealloc);CHKERRQ(ierr); }
722 ierr = PetscOptionsEnum("-ksp_pipegcr_truncation_type","Truncation approach for directions","KSPFCGSetTruncationType",KSPFCDTruncationTypes,(PetscEnum)pipegcr->truncstrat,(PetscEnum*)&pipegcr->truncstrat,NULL);CHKERRQ(ierr);
723 ierr = PetscOptionsBool("-ksp_pipegcr_unroll_w","Use unrolling of w","KSPPIPEGCRSetUnrollW",pipegcr->unroll_w,&pipegcr->unroll_w,NULL);CHKERRQ(ierr);
724 ierr = PetscOptionsTail();CHKERRQ(ierr);
725 PetscFunctionReturn(0);
726 }
727
728 /* Force these parameters to not be EXTERN_C */
729 typedef PetscErrorCode (*KSPPIPEGCRModifyPCFunction)(KSP,PetscInt,PetscReal,void*);
730 typedef PetscErrorCode (*KSPPIPEGCRDestroyFunction)(void*);
731
KSPPIPEGCRSetModifyPC_PIPEGCR(KSP ksp,KSPPIPEGCRModifyPCFunction function,void * data,KSPPIPEGCRDestroyFunction destroy)732 static PetscErrorCode KSPPIPEGCRSetModifyPC_PIPEGCR(KSP ksp,KSPPIPEGCRModifyPCFunction function,void *data,KSPPIPEGCRDestroyFunction destroy)
733 {
734 KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR*)ksp->data;
735
736 PetscFunctionBegin;
737 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
738 pipegcr->modifypc = function;
739 pipegcr->modifypc_destroy = destroy;
740 pipegcr->modifypc_ctx = data;
741 PetscFunctionReturn(0);
742 }
743
744 /*@C
745 KSPPIPEGCRSetModifyPC - Sets the routine used by PIPEGCR to modify the preconditioner.
746
747 Logically Collective on ksp
748
749 Input Parameters:
750 + ksp - iterative context obtained from KSPCreate()
751 . function - user defined function to modify the preconditioner
752 . ctx - user provided contex for the modify preconditioner function
753 - destroy - the function to use to destroy the user provided application context.
754
755 Calling Sequence of function:
756 PetscErrorCode function (KSP ksp, PetscInt n, PetscReal rnorm, void *ctx)
757
758 ksp - iterative context
759 n - the total number of PIPEGCR iterations that have occurred
760 rnorm - 2-norm residual value
761 ctx - the user provided application context
762
763 Level: intermediate
764
765 Notes:
766 The default modifypc routine is KSPPIPEGCRModifyPCNoChange()
767
768 .seealso: KSPPIPEGCRModifyPCNoChange()
769
770 @*/
KSPPIPEGCRSetModifyPC(KSP ksp,PetscErrorCode (* function)(KSP,PetscInt,PetscReal,void *),void * data,PetscErrorCode (* destroy)(void *))771 PetscErrorCode KSPPIPEGCRSetModifyPC(KSP ksp,PetscErrorCode (*function)(KSP,PetscInt,PetscReal,void*),void *data,PetscErrorCode (*destroy)(void*))
772 {
773 PetscErrorCode ierr;
774
775 PetscFunctionBegin;
776 ierr = PetscUseMethod(ksp,"KSPPIPEGCRSetModifyPC_C",(KSP,PetscErrorCode (*)(KSP,PetscInt,PetscReal,void*),void *data,PetscErrorCode (*)(void*)),(ksp,function,data,destroy));CHKERRQ(ierr);
777 PetscFunctionReturn(0);
778 }
779
780 /*MC
781 KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method.
782
783 Options Database Keys:
784 + -ksp_pipegcr_mmax <N> - the max number of Krylov directions to orthogonalize against
785 . -ksp_pipegcr_unroll_w - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: PETSC_TRUE)
786 . -ksp_pipegcr_nprealloc <N> - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
787 - -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against
788
789
790 Notes:
791 The PIPEGCR Krylov method supports non-symmetric matrices and permits the use of a preconditioner
792 which may vary from one iteration to the next. Users can can define a method to vary the
793 preconditioner between iterates via KSPPIPEGCRSetModifyPC().
794 Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
795 solution is given by the current estimate for x which was obtained by the last restart
796 iterations of the PIPEGCR algorithm.
797 The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as GCR.
798
799 Only supports left preconditioning.
800
801 The natural "norm" for this method is (u,Au), where u is the preconditioned residual. This norm is available at no additional computational cost, as with standard CG. Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.
802
803 Reference:
804 P. Sanan, S.M. Schnepp, and D.A. May,
805 "Pipelined, Flexible Krylov Subspace Methods,"
806 SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
807 DOI: 10.1137/15M1049130
808
809 Level: intermediate
810
811 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP,
812 KSPPIPEFGMRES, KSPPIPECG, KSPPIPECR, KSPPIPEFCG,KSPPIPEGCRSetTruncationType(),KSPPIPEGCRSetNprealloc(),KSPPIPEGCRSetUnrollW(),KSPPIPEGCRSetMmax()
813
814
815 M*/
KSPCreate_PIPEGCR(KSP ksp)816 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
817 {
818 PetscErrorCode ierr;
819 KSP_PIPEGCR *pipegcr;
820
821 PetscFunctionBegin;
822 ierr = PetscNewLog(ksp,&pipegcr);CHKERRQ(ierr);
823 pipegcr->mmax = KSPPIPEGCR_DEFAULT_MMAX;
824 pipegcr->nprealloc = KSPPIPEGCR_DEFAULT_NPREALLOC;
825 pipegcr->nvecs = 0;
826 pipegcr->vecb = KSPPIPEGCR_DEFAULT_VECB;
827 pipegcr->nchunks = 0;
828 pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
829 pipegcr->n_restarts = 0;
830 pipegcr->unroll_w = KSPPIPEGCR_DEFAULT_UNROLL_W;
831
832 ksp->data = (void*)pipegcr;
833
834 /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
835 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);CHKERRQ(ierr);
836 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,1);CHKERRQ(ierr);
837 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,1);CHKERRQ(ierr);
838 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);CHKERRQ(ierr);
839
840 ksp->ops->setup = KSPSetUp_PIPEGCR;
841 ksp->ops->solve = KSPSolve_PIPEGCR;
842 ksp->ops->reset = KSPReset_PIPEGCR;
843 ksp->ops->destroy = KSPDestroy_PIPEGCR;
844 ksp->ops->view = KSPView_PIPEGCR;
845 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
846 ksp->ops->buildsolution = KSPBuildSolutionDefault;
847 ksp->ops->buildresidual = KSPBuildResidualDefault;
848
849 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPPIPEGCRSetModifyPC_C",KSPPIPEGCRSetModifyPC_PIPEGCR);CHKERRQ(ierr);
850 PetscFunctionReturn(0);
851 }
852