1 /*
2     Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/fcg/pipefcg/pipefcgimpl.h>       /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited = PETSC_FALSE;
8 static const char citation[] =
9   "@article{SSM2016,\n"
10   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12   "  journal = {SIAM Journal on Scientific Computing},\n"
13   "  volume = {38},\n"
14   "  number = {5},\n"
15   "  pages = {C441-C470},\n"
16   "  year = {2016},\n"
17   "  doi = {10.1137/15M1049130},\n"
18   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20   "}\n";
21 
22 #define KSPPIPEFCG_DEFAULT_MMAX 15
23 #define KSPPIPEFCG_DEFAULT_NPREALLOC 5
24 #define KSPPIPEFCG_DEFAULT_VECB 5
25 #define KSPPIPEFCG_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
26 
KSPAllocateVectors_PIPEFCG(KSP ksp,PetscInt nvecsneeded,PetscInt chunksize)27 static PetscErrorCode KSPAllocateVectors_PIPEFCG(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
28 {
29   PetscErrorCode  ierr;
30   PetscInt        i;
31   KSP_PIPEFCG     *pipefcg;
32   PetscInt        nnewvecs, nvecsprev;
33 
34   PetscFunctionBegin;
35   pipefcg = (KSP_PIPEFCG*)ksp->data;
36 
37   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
38   if (pipefcg->nvecs < PetscMin(pipefcg->mmax+1,nvecsneeded)){
39     nvecsprev = pipefcg->nvecs;
40     nnewvecs = PetscMin(PetscMax(nvecsneeded-pipefcg->nvecs,chunksize),pipefcg->mmax+1-pipefcg->nvecs);
41     ierr = KSPCreateVecs(ksp,nnewvecs,&pipefcg->pQvecs[pipefcg->nchunks],0,NULL);CHKERRQ(ierr);
42     ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipefcg->pQvecs[pipefcg->nchunks]);CHKERRQ(ierr);
43     ierr = KSPCreateVecs(ksp,nnewvecs,&pipefcg->pZETAvecs[pipefcg->nchunks],0,NULL);CHKERRQ(ierr);
44     ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipefcg->pZETAvecs[pipefcg->nchunks]);CHKERRQ(ierr);
45     ierr = KSPCreateVecs(ksp,nnewvecs,&pipefcg->pPvecs[pipefcg->nchunks],0,NULL);CHKERRQ(ierr);
46     ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipefcg->pPvecs[pipefcg->nchunks]);CHKERRQ(ierr);
47     ierr = KSPCreateVecs(ksp,nnewvecs,&pipefcg->pSvecs[pipefcg->nchunks],0,NULL);CHKERRQ(ierr);
48     ierr = PetscLogObjectParents((PetscObject)ksp,nnewvecs,pipefcg->pSvecs[pipefcg->nchunks]);CHKERRQ(ierr);
49     pipefcg->nvecs += nnewvecs;
50     for (i=0;i<nnewvecs;++i){
51       pipefcg->Qvecs[nvecsprev + i]    = pipefcg->pQvecs[pipefcg->nchunks][i];
52       pipefcg->ZETAvecs[nvecsprev + i] = pipefcg->pZETAvecs[pipefcg->nchunks][i];
53       pipefcg->Pvecs[nvecsprev + i]    = pipefcg->pPvecs[pipefcg->nchunks][i];
54       pipefcg->Svecs[nvecsprev + i]    = pipefcg->pSvecs[pipefcg->nchunks][i];
55     }
56     pipefcg->chunksizes[pipefcg->nchunks] = nnewvecs;
57     ++pipefcg->nchunks;
58   }
59   PetscFunctionReturn(0);
60 }
61 
KSPSetUp_PIPEFCG(KSP ksp)62 static PetscErrorCode KSPSetUp_PIPEFCG(KSP ksp)
63 {
64   PetscErrorCode ierr;
65   KSP_PIPEFCG    *pipefcg;
66   const PetscInt nworkstd = 5;
67 
68   PetscFunctionBegin;
69   pipefcg = (KSP_PIPEFCG*)ksp->data;
70 
71   /* Allocate "standard" work vectors (not including the basis and transformed basis vectors) */
72   ierr = KSPSetWorkVecs(ksp,nworkstd);CHKERRQ(ierr);
73 
74   /* Allocated space for pointers to additional work vectors
75    note that mmax is the number of previous directions, so we add 1 for the current direction,
76    and an extra 1 for the prealloc (which might be empty) */
77   ierr = PetscMalloc4(pipefcg->mmax+1,&(pipefcg->Pvecs),pipefcg->mmax+1,&(pipefcg->pPvecs),pipefcg->mmax+1,&(pipefcg->Svecs),pipefcg->mmax+1,&(pipefcg->pSvecs));CHKERRQ(ierr);
78   ierr = PetscMalloc4(pipefcg->mmax+1,&(pipefcg->Qvecs),pipefcg->mmax+1,&(pipefcg->pQvecs),pipefcg->mmax+1,&(pipefcg->ZETAvecs),pipefcg->mmax+1,&(pipefcg->pZETAvecs));CHKERRQ(ierr);
79   ierr = PetscMalloc4(pipefcg->mmax+1,&(pipefcg->Pold),pipefcg->mmax+1,&(pipefcg->Sold),pipefcg->mmax+1,&(pipefcg->Qold),pipefcg->mmax+1,&(pipefcg->ZETAold));CHKERRQ(ierr);
80   ierr = PetscMalloc1(pipefcg->mmax+1,&(pipefcg->chunksizes));CHKERRQ(ierr);
81   ierr = PetscMalloc3(pipefcg->mmax+2,&(pipefcg->dots),pipefcg->mmax+1,&(pipefcg->etas),pipefcg->mmax+2,&(pipefcg->redux));CHKERRQ(ierr);
82 
83   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
84   if (pipefcg->nprealloc > pipefcg->mmax+1){
85     ierr = PetscInfo2(NULL,"Requested nprealloc=%d is greater than m_max+1=%d. Resetting nprealloc = m_max+1.\n",pipefcg->nprealloc, pipefcg->mmax+1);CHKERRQ(ierr);
86   }
87 
88   /* Preallocate additional work vectors */
89   ierr = KSPAllocateVectors_PIPEFCG(ksp,pipefcg->nprealloc,pipefcg->nprealloc);CHKERRQ(ierr);
90 
91   ierr = PetscLogObjectMemory((PetscObject)ksp,(pipefcg->mmax+1)*4*sizeof(Vec*)+(pipefcg->mmax+1)*4*sizeof(Vec**)+(pipefcg->mmax+1)*4*sizeof(Vec*)+
92     (pipefcg->mmax+1)*sizeof(PetscInt)+(pipefcg->mmax+2)*sizeof(Vec*)+(pipefcg->mmax+2)*sizeof(PetscScalar)+(pipefcg->mmax+1)*sizeof(PetscReal));CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 
KSPSolve_PIPEFCG_cycle(KSP ksp)96 static PetscErrorCode KSPSolve_PIPEFCG_cycle(KSP ksp)
97 {
98   PetscErrorCode ierr;
99   PetscInt       i,j,k,idx,kdx,mi;
100   KSP_PIPEFCG    *pipefcg;
101   PetscScalar    alpha=0.0,gamma,*betas,*dots;
102   PetscReal      dp=0.0, delta,*eta,*etas;
103   Vec            B,R,Z,X,Qcurr,W,ZETAcurr,M,N,Pcurr,Scurr,*redux;
104   Mat            Amat,Pmat;
105 
106   PetscFunctionBegin;
107 
108   /* We have not checked these routines for use with complex numbers. The inner products
109      are likely not defined correctly for that case */
110 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
111   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
112 #endif
113 
114 #define VecXDot(x,y,a)         (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDot       (x,y,a)   : VecTDot       (x,y,a))
115 #define VecXDotBegin(x,y,a)    (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotBegin  (x,y,a)   : VecTDotBegin  (x,y,a))
116 #define VecXDotEnd(x,y,a)      (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotEnd    (x,y,a)   : VecTDotEnd    (x,y,a))
117 #define VecMXDot(x,n,y,a)      (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDot      (x,n,y,a) : VecMTDot      (x,n,y,a))
118 #define VecMXDotBegin(x,n,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotBegin (x,n,y,a) : VecMTDotBegin (x,n,y,a))
119 #define VecMXDotEnd(x,n,y,a)   (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotEnd   (x,n,y,a) : VecMTDotEnd   (x,n,y,a))
120 
121   pipefcg       = (KSP_PIPEFCG*)ksp->data;
122   X             = ksp->vec_sol;
123   B             = ksp->vec_rhs;
124   R             = ksp->work[0];
125   Z             = ksp->work[1];
126   W             = ksp->work[2];
127   M             = ksp->work[3];
128   N             = ksp->work[4];
129 
130   redux = pipefcg->redux;
131   dots  = pipefcg->dots;
132   etas  = pipefcg->etas;
133   betas = dots;        /* dots takes the result of all dot products of which the betas are a subset */
134 
135   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
136 
137   /* Compute cycle initial residual */
138   ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr);
139   ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr);                   /* r <- b - Ax */
140   ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr);                /* z <- Br     */
141 
142   Pcurr = pipefcg->Pvecs[0];
143   Scurr = pipefcg->Svecs[0];
144   Qcurr = pipefcg->Qvecs[0];
145   ZETAcurr = pipefcg->ZETAvecs[0];
146   ierr  = VecCopy(Z,Pcurr);CHKERRQ(ierr);
147   ierr  = KSP_MatMult(ksp,Amat,Pcurr,Scurr);CHKERRQ(ierr);  /* S = Ap     */
148   ierr  = VecCopy(Scurr,W);CHKERRQ(ierr);                   /* w = s = Az */
149 
150   /* Initial state of pipelining intermediates */
151   redux[0] = R;
152   redux[1] = W;
153   ierr     = VecMXDotBegin(Z,2,redux,dots);CHKERRQ(ierr);
154   ierr     = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z));CHKERRQ(ierr); /* perform asynchronous reduction */
155   ierr     = KSP_PCApply(ksp,W,M);CHKERRQ(ierr);            /* m = B(w) */
156   ierr     = KSP_MatMult(ksp,Amat,M,N);CHKERRQ(ierr);       /* n = Am   */
157   ierr     = VecCopy(M,Qcurr);CHKERRQ(ierr);                /* q = m    */
158   ierr     = VecCopy(N,ZETAcurr);CHKERRQ(ierr);             /* zeta = n */
159   ierr     = VecMXDotEnd(Z,2,redux,dots);CHKERRQ(ierr);
160   gamma    = dots[0];
161   delta    = PetscRealPart(dots[1]);
162   etas[0]  = delta;
163   alpha    = gamma/delta;
164 
165   i = 0;
166   do {
167     ksp->its++;
168 
169     /* Update X, R, Z, W */
170     ierr = VecAXPY(X,+alpha,Pcurr);CHKERRQ(ierr);           /* x <- x + alpha * pi    */
171     ierr = VecAXPY(R,-alpha,Scurr);CHKERRQ(ierr);           /* r <- r - alpha * si    */
172     ierr = VecAXPY(Z,-alpha,Qcurr);CHKERRQ(ierr);           /* z <- z - alpha * qi    */
173     ierr = VecAXPY(W,-alpha,ZETAcurr);CHKERRQ(ierr);        /* w <- w - alpha * zetai */
174 
175     /* Compute norm for convergence check */
176     switch (ksp->normtype) {
177       case KSP_NORM_PRECONDITIONED:
178         ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr);         /* dp <- sqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
179         break;
180       case KSP_NORM_UNPRECONDITIONED:
181         ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr);         /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
182         break;
183       case KSP_NORM_NATURAL:
184         dp = PetscSqrtReal(PetscAbsScalar(gamma));          /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
185         break;
186       case KSP_NORM_NONE:
187         dp = 0.0;
188         break;
189       default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
190     }
191 
192     /* Check for convergence */
193     ksp->rnorm = dp;
194     KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
195     ierr = KSPMonitor(ksp,ksp->its,dp);CHKERRQ(ierr);
196     ierr = (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
197     if (ksp->reason) PetscFunctionReturn(0);
198 
199     /* Computations of current iteration done */
200     ++i;
201 
202     /* If needbe, allocate a new chunk of vectors in P and C */
203     ierr = KSPAllocateVectors_PIPEFCG(ksp,i+1,pipefcg->vecb);CHKERRQ(ierr);
204 
205     /* Note that we wrap around and start clobbering old vectors */
206     idx = i % (pipefcg->mmax+1);
207     Pcurr    = pipefcg->Pvecs[idx];
208     Scurr    = pipefcg->Svecs[idx];
209     Qcurr    = pipefcg->Qvecs[idx];
210     ZETAcurr = pipefcg->ZETAvecs[idx];
211     eta      = pipefcg->etas+idx;
212 
213     /* number of old directions to orthogonalize against */
214     switch(pipefcg->truncstrat){
215       case KSP_FCD_TRUNC_TYPE_STANDARD:
216         mi = pipefcg->mmax;
217         break;
218       case KSP_FCD_TRUNC_TYPE_NOTAY:
219         mi = ((i-1) % pipefcg->mmax)+1;
220         break;
221       default:
222         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unrecognized Truncation Strategy");
223     }
224 
225     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
226     ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr);
227     for (k=PetscMax(0,i-mi),j=0;k<i;++j,++k){
228       kdx = k % (pipefcg->mmax+1);
229       pipefcg->Pold[j]    = pipefcg->Pvecs[kdx];
230       pipefcg->Sold[j]    = pipefcg->Svecs[kdx];
231       pipefcg->Qold[j]    = pipefcg->Qvecs[kdx];
232       pipefcg->ZETAold[j] = pipefcg->ZETAvecs[kdx];
233       redux[j]            = pipefcg->Svecs[kdx];
234     }
235     redux[j]   = R;   /* If the above loop is not executed redux contains only R => all beta_k = 0, only gamma, delta != 0 */
236     redux[j+1] = W;
237 
238     ierr = VecMXDotBegin(Z,j+2,redux,betas);CHKERRQ(ierr);  /* Start split reductions for beta_k = (z,s_k), gamma = (z,r), delta = (z,w) */
239     ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z));CHKERRQ(ierr); /* perform asynchronous reduction */
240     ierr = VecWAXPY(N,-1.0,R,W);CHKERRQ(ierr);              /* m = u + B(w-r): (a) ntmp = w-r              */
241     ierr = KSP_PCApply(ksp,N,M);CHKERRQ(ierr);              /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
242     ierr = VecAXPY(M,1.0,Z);CHKERRQ(ierr);                  /* m = u + B(w-r): (c) m = z + mtmp            */
243     ierr = KSP_MatMult(ksp,Amat,M,N);CHKERRQ(ierr);         /* n = Am                                      */
244     ierr = VecMXDotEnd(Z,j+2,redux,betas);CHKERRQ(ierr);    /* Finish split reductions */
245     gamma = betas[j];
246     delta = PetscRealPart(betas[j+1]);
247 
248     *eta = 0.;
249     for (k=PetscMax(0,i-mi),j=0;k<i;++j,++k){
250       kdx = k % (pipefcg->mmax+1);
251       betas[j] /= -etas[kdx];                               /* betak  /= etak */
252       *eta -= ((PetscReal)(PetscAbsScalar(betas[j])*PetscAbsScalar(betas[j]))) * etas[kdx];
253                                                             /* etaitmp = -betaik^2 * etak */
254     }
255     *eta += delta;                                          /* etai    = delta -betaik^2 * etak */
256     if (*eta < 0.) {
257       pipefcg->norm_breakdown = PETSC_TRUE;
258       ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr);
259       break;
260     } else {
261       alpha= gamma/(*eta);                                  /* alpha = gamma/etai */
262     }
263 
264     /* project out stored search directions using classical G-S */
265     ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr);
266     ierr = VecCopy(W,Scurr);CHKERRQ(ierr);
267     ierr = VecCopy(M,Qcurr);CHKERRQ(ierr);
268     ierr = VecCopy(N,ZETAcurr);CHKERRQ(ierr);
269     ierr = VecMAXPY(Pcurr   ,j,betas,pipefcg->Pold);CHKERRQ(ierr);    /* pi    <- ui - sum_k beta_k p_k    */
270     ierr = VecMAXPY(Scurr   ,j,betas,pipefcg->Sold);CHKERRQ(ierr);    /* si    <- wi - sum_k beta_k s_k    */
271     ierr = VecMAXPY(Qcurr   ,j,betas,pipefcg->Qold);CHKERRQ(ierr);    /* qi    <- m  - sum_k beta_k q_k    */
272     ierr = VecMAXPY(ZETAcurr,j,betas,pipefcg->ZETAold);CHKERRQ(ierr); /* zetai <- n  - sum_k beta_k zeta_k */
273 
274   } while (ksp->its < ksp->max_it);
275   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
276   PetscFunctionReturn(0);
277 }
278 
KSPSolve_PIPEFCG(KSP ksp)279 static PetscErrorCode KSPSolve_PIPEFCG(KSP ksp)
280 {
281   PetscErrorCode ierr;
282   KSP_PIPEFCG    *pipefcg;
283   PetscScalar    gamma;
284   PetscReal      dp=0.0;
285   Vec            B,R,Z,X;
286   Mat            Amat,Pmat;
287 
288 #define VecXDot(x,y,a)         (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDot       (x,y,a)   : VecTDot       (x,y,a))
289 
290   PetscFunctionBegin;
291   ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr);
292 
293   pipefcg       = (KSP_PIPEFCG*)ksp->data;
294   X             = ksp->vec_sol;
295   B             = ksp->vec_rhs;
296   R             = ksp->work[0];
297   Z             = ksp->work[1];
298 
299   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
300 
301   /* Compute initial residual needed for convergence check*/
302   ksp->its = 0;
303   if (!ksp->guess_zero) {
304     ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr);
305     ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr);                 /* r <- b - Ax                             */
306   } else {
307     ierr = VecCopy(B,R);CHKERRQ(ierr);                      /* r <- b (x is 0)                         */
308   }
309   switch (ksp->normtype) {
310     case KSP_NORM_PRECONDITIONED:
311       ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr);            /* z <- Br                                 */
312       ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr);           /* dp <- dqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
313       break;
314     case KSP_NORM_UNPRECONDITIONED:
315       ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr);           /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
316       break;
317     case KSP_NORM_NATURAL:
318       ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr);            /* z <- Br                                 */
319       ierr = VecXDot(Z,R,&gamma);CHKERRQ(ierr);
320       dp = PetscSqrtReal(PetscAbsScalar(gamma));            /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
321       break;
322     case KSP_NORM_NONE:
323       dp = 0.0;
324       break;
325     default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
326   }
327 
328   /* Initial Convergence Check */
329   ierr       = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
330   ierr       = KSPMonitor(ksp,0,dp);CHKERRQ(ierr);
331   ksp->rnorm = dp;
332   ierr       = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
333   if (ksp->reason) PetscFunctionReturn(0);
334 
335   do {
336     /* A cycle is broken only if a norm breakdown occurs. If not the entire solve happens in a single cycle.
337        This is coded this way to allow both truncation and truncation-restart strategy
338        (see KSPFCDGetNumOldDirections()) */
339     ierr = KSPSolve_PIPEFCG_cycle(ksp);CHKERRQ(ierr);
340     if (ksp->reason) PetscFunctionReturn(0);
341     if (pipefcg->norm_breakdown) {
342       pipefcg->n_restarts++;
343       pipefcg->norm_breakdown = PETSC_FALSE;
344     }
345   } while (ksp->its < ksp->max_it);
346 
347   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
348   PetscFunctionReturn(0);
349 }
350 
KSPDestroy_PIPEFCG(KSP ksp)351 static PetscErrorCode KSPDestroy_PIPEFCG(KSP ksp)
352 {
353   PetscErrorCode ierr;
354   PetscInt       i;
355   KSP_PIPEFCG    *pipefcg;
356 
357   PetscFunctionBegin;
358   pipefcg = (KSP_PIPEFCG*)ksp->data;
359 
360   /* Destroy "standard" work vecs */
361   VecDestroyVecs(ksp->nwork,&ksp->work);
362 
363   /* Destroy vectors of old directions and the arrays that manage pointers to them */
364   if (pipefcg->nvecs){
365     for (i=0;i<pipefcg->nchunks;++i){
366       ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pPvecs[i]);CHKERRQ(ierr);
367       ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pSvecs[i]);CHKERRQ(ierr);
368       ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pQvecs[i]);CHKERRQ(ierr);
369       ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pZETAvecs[i]);CHKERRQ(ierr);
370     }
371   }
372   ierr = PetscFree4(pipefcg->Pvecs,pipefcg->Svecs,pipefcg->pPvecs,pipefcg->pSvecs);CHKERRQ(ierr);
373   ierr = PetscFree4(pipefcg->Qvecs,pipefcg->ZETAvecs,pipefcg->pQvecs,pipefcg->pZETAvecs);CHKERRQ(ierr);
374   ierr = PetscFree4(pipefcg->Pold,pipefcg->Sold,pipefcg->Qold,pipefcg->ZETAold);CHKERRQ(ierr);
375   ierr = PetscFree(pipefcg->chunksizes);CHKERRQ(ierr);
376   ierr = PetscFree3(pipefcg->dots,pipefcg->etas,pipefcg->redux);CHKERRQ(ierr);
377   ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr);
378   PetscFunctionReturn(0);
379 }
380 
KSPView_PIPEFCG(KSP ksp,PetscViewer viewer)381 static PetscErrorCode KSPView_PIPEFCG(KSP ksp,PetscViewer viewer)
382 {
383   KSP_PIPEFCG    *pipefcg = (KSP_PIPEFCG*)ksp->data;
384   PetscErrorCode ierr;
385   PetscBool      iascii,isstring;
386   const char     *truncstr;
387 
388   PetscFunctionBegin;
389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
391 
392   if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD){
393     truncstr = "Using standard truncation strategy";
394   } else if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY){
395     truncstr = "Using Notay's truncation strategy";
396   } else {
397     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Undefined FCD truncation strategy");
398   }
399 
400   if (iascii) {
401     ierr = PetscViewerASCIIPrintf(viewer,"  max previous directions = %D\n",pipefcg->mmax);CHKERRQ(ierr);
402     ierr = PetscViewerASCIIPrintf(viewer,"  preallocated %D directions\n",PetscMin(pipefcg->nprealloc,pipefcg->mmax+1));CHKERRQ(ierr);
403     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",truncstr);CHKERRQ(ierr);
404     ierr = PetscViewerASCIIPrintf(viewer,"  restarts performed = %D \n", pipefcg->n_restarts);CHKERRQ(ierr);
405   } else if (isstring) {
406     ierr = PetscViewerStringSPrintf(viewer,
407       "max previous directions = %D, preallocated %D directions, %s truncation strategy",
408       pipefcg->mmax,pipefcg->nprealloc,truncstr);CHKERRQ(ierr);
409   }
410   PetscFunctionReturn(0);
411 }
412 
413 /*@
414   KSPPIPEFCGSetMmax - set the maximum number of previous directions PIPEFCG will store for orthogonalization
415 
416   Note: mmax + 1 directions are stored (mmax previous ones along with the current one)
417   and whether all are used in each iteration also depends on the truncation strategy
418   (see KSPPIPEFCGSetTruncationType)
419 
420   Logically Collective on ksp
421 
422   Input Parameters:
423 +  ksp - the Krylov space context
424 -  mmax - the maximum number of previous directions to orthogonalize against
425 
426   Level: intermediate
427 
428   Options Database:
429 . -ksp_pipefcg_mmax <N>
430 
431 .seealso: KSPPIPEFCG, KSPPIPEFCGSetTruncationType(), KSPPIPEFCGSetNprealloc()
432 @*/
KSPPIPEFCGSetMmax(KSP ksp,PetscInt mmax)433 PetscErrorCode KSPPIPEFCGSetMmax(KSP ksp,PetscInt mmax)
434 {
435   KSP_PIPEFCG *pipefcg=(KSP_PIPEFCG*)ksp->data;
436 
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
439   PetscValidLogicalCollectiveEnum(ksp,mmax,2);
440   pipefcg->mmax=mmax;
441   PetscFunctionReturn(0);
442 }
443 
444 /*@
445   KSPPIPEFCGGetMmax - get the maximum number of previous directions PIPEFCG will store
446 
447   Note: PIPEFCG stores mmax+1 directions at most (mmax previous ones, and the current one)
448 
449    Not Collective
450 
451    Input Parameter:
452 .  ksp - the Krylov space context
453 
454    Output Parameter:
455 .  mmax - the maximum number of previous directons allowed for orthogonalization
456 
457   Options Database:
458 . -ksp_pipefcg_mmax <N>
459 
460    Level: intermediate
461 
462 .seealso: KSPPIPEFCG, KSPPIPEFCGGetTruncationType(), KSPPIPEFCGGetNprealloc(), KSPPIPEFCGSetMmax()
463 @*/
KSPPIPEFCGGetMmax(KSP ksp,PetscInt * mmax)464 PetscErrorCode KSPPIPEFCGGetMmax(KSP ksp,PetscInt *mmax)
465 {
466   KSP_PIPEFCG *pipefcg=(KSP_PIPEFCG*)ksp->data;
467 
468   PetscFunctionBegin;
469   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
470   *mmax=pipefcg->mmax;
471   PetscFunctionReturn(0);
472 }
473 
474 /*@
475   KSPPIPEFCGSetNprealloc - set the number of directions to preallocate with PIPEFCG
476 
477   Logically Collective on ksp
478 
479   Input Parameters:
480 +  ksp - the Krylov space context
481 -  nprealloc - the number of vectors to preallocate
482 
483   Level: advanced
484 
485   Options Database:
486 . -ksp_pipefcg_nprealloc <N>
487 
488 .seealso: KSPPIPEFCG, KSPPIPEFCGSetTruncationType(), KSPPIPEFCGGetNprealloc()
489 @*/
KSPPIPEFCGSetNprealloc(KSP ksp,PetscInt nprealloc)490 PetscErrorCode KSPPIPEFCGSetNprealloc(KSP ksp,PetscInt nprealloc)
491 {
492   KSP_PIPEFCG *pipefcg=(KSP_PIPEFCG*)ksp->data;
493 
494   PetscFunctionBegin;
495   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
496   PetscValidLogicalCollectiveEnum(ksp,nprealloc,2);
497   pipefcg->nprealloc = nprealloc;
498   PetscFunctionReturn(0);
499 }
500 
501 /*@
502   KSPPIPEFCGGetNprealloc - get the number of directions to preallocate by PIPEFCG
503 
504    Not Collective
505 
506    Input Parameter:
507 .  ksp - the Krylov space context
508 
509    Output Parameter:
510 .  nprealloc - the number of directions preallocated
511 
512   Options Database:
513 . -ksp_pipefcg_nprealloc <N>
514 
515    Level: advanced
516 
517 .seealso: KSPPIPEFCG, KSPPIPEFCGGetTruncationType(), KSPPIPEFCGSetNprealloc()
518 @*/
KSPPIPEFCGGetNprealloc(KSP ksp,PetscInt * nprealloc)519 PetscErrorCode KSPPIPEFCGGetNprealloc(KSP ksp,PetscInt *nprealloc)
520 {
521   KSP_PIPEFCG *pipefcg=(KSP_PIPEFCG*)ksp->data;
522 
523   PetscFunctionBegin;
524   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
525   *nprealloc = pipefcg->nprealloc;
526   PetscFunctionReturn(0);
527 }
528 
529 /*@
530   KSPPIPEFCGSetTruncationType - specify how many of its stored previous directions PIPEFCG uses during orthoganalization
531 
532   Logically Collective on ksp
533 
534   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
535   KSP_FCD_TRUNC_TYPE_NOTAY uses max(1,mod(i,mmax)) stored directions at iteration i=0,1,..
536 
537   Input Parameters:
538 +  ksp - the Krylov space context
539 -  truncstrat - the choice of strategy
540 
541   Level: intermediate
542 
543   Options Database:
544 .  -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against
545 
546 .seealso: KSPPIPEFCG, KSPPIPEFCGGetTruncationType, KSPFCDTruncationType
547 @*/
KSPPIPEFCGSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)548 PetscErrorCode KSPPIPEFCGSetTruncationType(KSP ksp,KSPFCDTruncationType truncstrat)
549 {
550   KSP_PIPEFCG *pipefcg=(KSP_PIPEFCG*)ksp->data;
551 
552   PetscFunctionBegin;
553   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
554   PetscValidLogicalCollectiveEnum(ksp,truncstrat,2);
555   pipefcg->truncstrat=truncstrat;
556   PetscFunctionReturn(0);
557 }
558 
559 /*@
560   KSPPIPEFCGGetTruncationType - get the truncation strategy employed by PIPEFCG
561 
562    Not Collective
563 
564    Input Parameter:
565 .  ksp - the Krylov space context
566 
567    Output Parameter:
568 .  truncstrat - the strategy type
569 
570   Options Database:
571 . -ksp_pipefcg_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
572 
573    Level: intermediate
574 
575 .seealso: KSPPIPEFCG, KSPPIPEFCGSetTruncationType, KSPFCDTruncationType
576 @*/
KSPPIPEFCGGetTruncationType(KSP ksp,KSPFCDTruncationType * truncstrat)577 PetscErrorCode KSPPIPEFCGGetTruncationType(KSP ksp,KSPFCDTruncationType *truncstrat)
578 {
579   KSP_PIPEFCG *pipefcg=(KSP_PIPEFCG*)ksp->data;
580 
581   PetscFunctionBegin;
582   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
583   *truncstrat=pipefcg->truncstrat;
584   PetscFunctionReturn(0);
585 }
586 
KSPSetFromOptions_PIPEFCG(PetscOptionItems * PetscOptionsObject,KSP ksp)587 static PetscErrorCode KSPSetFromOptions_PIPEFCG(PetscOptionItems *PetscOptionsObject,KSP ksp)
588 {
589   PetscErrorCode ierr;
590   KSP_PIPEFCG    *pipefcg=(KSP_PIPEFCG*)ksp->data;
591   PetscInt       mmax,nprealloc;
592   PetscBool      flg;
593 
594   PetscFunctionBegin;
595   ierr = PetscOptionsHead(PetscOptionsObject,"KSP PIPEFCG options");CHKERRQ(ierr);
596   ierr = PetscOptionsInt("-ksp_pipefcg_mmax","Number of search directions to storue","KSPPIPEFCGSetMmax",pipefcg->mmax,&mmax,&flg);CHKERRQ(ierr);
597   if (flg) ierr = KSPPIPEFCGSetMmax(ksp,mmax);CHKERRQ(ierr);
598   ierr = PetscOptionsInt("-ksp_pipefcg_nprealloc","Number of directions to preallocate","KSPPIPEFCGSetNprealloc",pipefcg->nprealloc,&nprealloc,&flg);CHKERRQ(ierr);
599   if (flg) { ierr = KSPPIPEFCGSetNprealloc(ksp,nprealloc);CHKERRQ(ierr); }
600   ierr = PetscOptionsEnum("-ksp_pipefcg_truncation_type","Truncation approach for directions","KSPFCGSetTruncationType",KSPFCDTruncationTypes,(PetscEnum)pipefcg->truncstrat,(PetscEnum*)&pipefcg->truncstrat,NULL);CHKERRQ(ierr);
601   ierr = PetscOptionsTail();CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 /*MC
606 
607   KSPPIPEFCG - Implements a Pipelined, Flexible Conjugate Gradient method.
608 
609   Options Database Keys:
610 +   -ksp_pipefcg_mmax <N> - The number of previous search directions to store
611 .   -ksp_pipefcg_nprealloc <N> - The number of previous search directions to preallocate
612 -   -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against
613 
614   Notes:
615    Supports left preconditioning only.
616 
617    The natural "norm" for this method is (u,Au), where u is the preconditioned residual. As with standard CG, this norm is available at no additional computational cost. Choosing preconditioned or unpreconditioned norms involve an extra blocking global reduction, thus removing any benefit from pipelining.
618 
619    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
620    See the FAQ on the PETSc website for details.
621 
622   Reference:
623     P. Sanan, S.M. Schnepp, and D.A. May,
624     "Pipelined, Flexible Krylov Subspace Methods,"
625     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
626     DOI: 10.1137/15M1049130
627 
628   Level: intermediate
629 
630 .seealso: KSPFCG, KSPPIPECG, KSPPIPECR, KSPGCR, KSPPIPEGCR, KSPFGMRES, KSPCG, KSPPIPEFCGSetMmax(), KSPPIPEFCGGetMmax(), KSPPIPEFCGSetNprealloc(), KSPPIPEFCGGetNprealloc(), KSPPIPEFCGSetTruncationType(), KSPPIPEFCGGetTruncationType()
631 
632 M*/
KSPCreate_PIPEFCG(KSP ksp)633 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFCG(KSP ksp)
634 {
635   PetscErrorCode ierr;
636   KSP_PIPEFCG    *pipefcg;
637 
638   PetscFunctionBegin;
639   ierr = PetscNewLog(ksp,&pipefcg);CHKERRQ(ierr);
640 #if !defined(PETSC_USE_COMPLEX)
641   pipefcg->type       = KSP_CG_SYMMETRIC;
642 #else
643   pipefcg->type       = KSP_CG_HERMITIAN;
644 #endif
645   pipefcg->mmax       = KSPPIPEFCG_DEFAULT_MMAX;
646   pipefcg->nprealloc  = KSPPIPEFCG_DEFAULT_NPREALLOC;
647   pipefcg->nvecs      = 0;
648   pipefcg->vecb       = KSPPIPEFCG_DEFAULT_VECB;
649   pipefcg->nchunks    = 0;
650   pipefcg->truncstrat = KSPPIPEFCG_DEFAULT_TRUNCSTRAT;
651   pipefcg->n_restarts = 0;
652 
653   ksp->data = (void*)pipefcg;
654 
655   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);CHKERRQ(ierr);
656   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,1);CHKERRQ(ierr);
657   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,1);CHKERRQ(ierr);
658   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);CHKERRQ(ierr);
659 
660   ksp->ops->setup          = KSPSetUp_PIPEFCG;
661   ksp->ops->solve          = KSPSolve_PIPEFCG;
662   ksp->ops->destroy        = KSPDestroy_PIPEFCG;
663   ksp->ops->view           = KSPView_PIPEFCG;
664   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFCG;
665   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
666   ksp->ops->buildresidual  = KSPBuildResidualDefault;
667   PetscFunctionReturn(0);
668 }
669