1 static char help[] =  "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2 Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3 Spectral degree can be specified by passing values to -p option.\n\
4 Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5 Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6 Exaple usage: \n\
7 1D: mpiexec -n 4 ex59 -nex 7\n\
8 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10 Subdomain decomposition can be specified with -np_ parameters.\n\
11 Dirichlet boundaries on one side by default:\n\
12 it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13 Pure Neumann case can be requested by passing in -pureneumann.\n\
14 In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
15 
16 #include <petscksp.h>
17 #include <petscpc.h>
18 #include <petscdm.h>
19 #include <petscdmda.h>
20 #include <petscblaslapack.h>
21 #define DEBUG 0
22 
23 /* structure holding domain data */
24 typedef struct {
25   /* communicator */
26   MPI_Comm gcomm;
27   /* space dimension */
28   PetscInt dim;
29   /* spectral degree */
30   PetscInt p;
31   /* subdomains per dimension */
32   PetscInt npx,npy,npz;
33   /* subdomain index in cartesian dimensions */
34   PetscInt ipx,ipy,ipz;
35   /* elements per dimension */
36   PetscInt nex,ney,nez;
37   /* local elements per dimension */
38   PetscInt nex_l,ney_l,nez_l;
39   /* global number of dofs per dimension */
40   PetscInt xm,ym,zm;
41   /* local number of dofs per dimension */
42   PetscInt xm_l,ym_l,zm_l;
43   /* starting global indexes for subdomain in lexicographic ordering */
44   PetscInt startx,starty,startz;
45   /* pure Neumann problem */
46   PetscBool pure_neumann;
47   /* Dirichlet BC implementation */
48   PetscBool DBC_zerorows;
49   /* Scaling factor for subdomain */
50   PetscScalar scalingfactor;
51   PetscBool   testkspfetidp;
52 } DomainData;
53 
54 /* structure holding GLL data */
55 typedef struct {
56   /* GLL nodes */
57   PetscReal   *zGL;
58   /* GLL weights */
59   PetscScalar *rhoGL;
60   /* aux_mat */
61   PetscScalar **A;
62   /* Element matrix */
63   Mat elem_mat;
64 } GLLData;
65 
66 
BuildCSRGraph(DomainData dd,PetscInt ** xadj,PetscInt ** adjncy)67 static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
68 {
69   PetscErrorCode ierr;
70   PetscInt       *xadj_temp,*adjncy_temp;
71   PetscInt       i,j,k,ii,jj,kk,iindex,count_adj;
72   PetscInt       istart_csr,iend_csr,jstart_csr,jend_csr,kstart_csr,kend_csr;
73   PetscBool      internal_node;
74 
75   /* first count dimension of adjncy */
76   count_adj=0;
77   for (k=0; k<dd.zm_l; k++) {
78     internal_node = PETSC_TRUE;
79     kstart_csr    = k>0 ? k-1 : k;
80     kend_csr      = k<dd.zm_l-1 ? k+2 : k+1;
81     if (k == 0 || k == dd.zm_l-1) {
82       internal_node = PETSC_FALSE;
83       kstart_csr    = k;
84       kend_csr      = k+1;
85     }
86     for (j=0; j<dd.ym_l; j++) {
87       jstart_csr = j>0 ? j-1 : j;
88       jend_csr   = j<dd.ym_l-1 ? j+2 : j+1;
89       if (j == 0 || j == dd.ym_l-1) {
90         internal_node = PETSC_FALSE;
91         jstart_csr    = j;
92         jend_csr      = j+1;
93       }
94       for (i=0; i<dd.xm_l; i++) {
95         istart_csr = i>0 ? i-1 : i;
96         iend_csr   = i<dd.xm_l-1 ? i+2 : i+1;
97         if (i == 0 || i == dd.xm_l-1) {
98           internal_node = PETSC_FALSE;
99           istart_csr    = i;
100           iend_csr      = i+1;
101         }
102         if (internal_node) {
103           istart_csr = i;
104           iend_csr   = i+1;
105           jstart_csr = j;
106           jend_csr   = j+1;
107           kstart_csr = k;
108           kend_csr   = k+1;
109         }
110         for (kk=kstart_csr;kk<kend_csr;kk++) {
111           for (jj=jstart_csr;jj<jend_csr;jj++) {
112             for (ii=istart_csr;ii<iend_csr;ii++) {
113               count_adj=count_adj+1;
114             }
115           }
116         }
117       }
118     }
119   }
120   ierr = PetscMalloc1(dd.xm_l*dd.ym_l*dd.zm_l+1,&xadj_temp);CHKERRQ(ierr);
121   ierr = PetscMalloc1(count_adj,&adjncy_temp);CHKERRQ(ierr);
122 
123   /* now fill CSR data structure */
124   count_adj=0;
125   for (k=0; k<dd.zm_l; k++) {
126     internal_node = PETSC_TRUE;
127     kstart_csr    = k>0 ? k-1 : k;
128     kend_csr      = k<dd.zm_l-1 ? k+2 : k+1;
129     if (k == 0 || k == dd.zm_l-1) {
130       internal_node = PETSC_FALSE;
131       kstart_csr    = k;
132       kend_csr      = k+1;
133     }
134     for (j=0; j<dd.ym_l; j++) {
135       jstart_csr = j>0 ? j-1 : j;
136       jend_csr   = j<dd.ym_l-1 ? j+2 : j+1;
137       if (j == 0 || j == dd.ym_l-1) {
138         internal_node = PETSC_FALSE;
139         jstart_csr    = j;
140         jend_csr      = j+1;
141       }
142       for (i=0; i<dd.xm_l; i++) {
143         istart_csr = i>0 ? i-1 : i;
144         iend_csr   = i<dd.xm_l-1 ? i+2 : i+1;
145         if (i == 0 || i == dd.xm_l-1) {
146           internal_node = PETSC_FALSE;
147           istart_csr    = i;
148           iend_csr      = i+1;
149         }
150         iindex            = k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
151         xadj_temp[iindex] = count_adj;
152         if (internal_node) {
153           istart_csr = i;
154           iend_csr   = i+1;
155           jstart_csr = j;
156           jend_csr   = j+1;
157           kstart_csr = k;
158           kend_csr   = k+1;
159         }
160         for (kk=kstart_csr; kk<kend_csr; kk++) {
161           for (jj=jstart_csr; jj<jend_csr; jj++) {
162             for (ii=istart_csr; ii<iend_csr; ii++) {
163               iindex = kk*dd.xm_l*dd.ym_l+jj*dd.xm_l+ii;
164 
165               adjncy_temp[count_adj] = iindex;
166               count_adj              = count_adj+1;
167             }
168           }
169         }
170       }
171     }
172   }
173   xadj_temp[dd.xm_l*dd.ym_l*dd.zm_l] = count_adj;
174 
175   *xadj   = xadj_temp;
176   *adjncy = adjncy_temp;
177   PetscFunctionReturn(0);
178 }
179 
ComputeSpecialBoundaryIndices(DomainData dd,IS * dirichlet,IS * neumann)180 static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd,IS *dirichlet,IS *neumann)
181 {
182   PetscErrorCode ierr;
183   IS             temp_dirichlet=0,temp_neumann=0;
184   PetscInt       localsize,i,j,k,*indices;
185   PetscBool      *touched;
186 
187   PetscFunctionBeginUser;
188   localsize = dd.xm_l*dd.ym_l*dd.zm_l;
189 
190   ierr = PetscMalloc1(localsize,&indices);CHKERRQ(ierr);
191   ierr = PetscMalloc1(localsize,&touched);CHKERRQ(ierr);
192   for (i=0; i<localsize; i++) touched[i] = PETSC_FALSE;
193 
194   if (dirichlet) {
195     i = 0;
196     /* west boundary */
197     if (dd.ipx == 0) {
198       for (k=0;k<dd.zm_l;k++) {
199         for (j=0;j<dd.ym_l;j++) {
200           indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
201           touched[indices[i]]=PETSC_TRUE;
202           i++;
203         }
204       }
205     }
206     ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_dirichlet);CHKERRQ(ierr);
207   }
208   if (neumann) {
209     i = 0;
210     /* west boundary */
211     if (dd.ipx == 0) {
212       for (k=0;k<dd.zm_l;k++) {
213         for (j=0;j<dd.ym_l;j++) {
214           indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
215           if (!touched[indices[i]]) {
216             touched[indices[i]]=PETSC_TRUE;
217             i++;
218           }
219         }
220       }
221     }
222     /* east boundary */
223     if (dd.ipx == dd.npx-1) {
224       for (k=0;k<dd.zm_l;k++) {
225         for (j=0;j<dd.ym_l;j++) {
226           indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l+dd.xm_l-1;
227           if (!touched[indices[i]]) {
228             touched[indices[i]]=PETSC_TRUE;
229             i++;
230           }
231         }
232       }
233     }
234     /* south boundary */
235     if (dd.ipy == 0 && dd.dim > 1) {
236       for (k=0;k<dd.zm_l;k++) {
237         for (j=0;j<dd.xm_l;j++) {
238           indices[i]=k*dd.ym_l*dd.xm_l+j;
239           if (!touched[indices[i]]) {
240             touched[indices[i]]=PETSC_TRUE;
241             i++;
242           }
243         }
244       }
245     }
246     /* north boundary */
247     if (dd.ipy == dd.npy-1 && dd.dim > 1) {
248       for (k=0;k<dd.zm_l;k++) {
249         for (j=0;j<dd.xm_l;j++) {
250           indices[i]=k*dd.ym_l*dd.xm_l+(dd.ym_l-1)*dd.xm_l+j;
251           if (!touched[indices[i]]) {
252             touched[indices[i]]=PETSC_TRUE;
253             i++;
254           }
255         }
256       }
257     }
258     /* bottom boundary */
259     if (dd.ipz == 0 && dd.dim > 2) {
260       for (k=0;k<dd.ym_l;k++) {
261         for (j=0;j<dd.xm_l;j++) {
262           indices[i]=k*dd.xm_l+j;
263           if (!touched[indices[i]]) {
264             touched[indices[i]]=PETSC_TRUE;
265             i++;
266           }
267         }
268       }
269     }
270     /* top boundary */
271     if (dd.ipz == dd.npz-1 && dd.dim > 2) {
272       for (k=0;k<dd.ym_l;k++) {
273         for (j=0;j<dd.xm_l;j++) {
274           indices[i]=(dd.zm_l-1)*dd.ym_l*dd.xm_l+k*dd.xm_l+j;
275           if (!touched[indices[i]]) {
276             touched[indices[i]]=PETSC_TRUE;
277             i++;
278           }
279         }
280       }
281     }
282     ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_neumann);CHKERRQ(ierr);
283   }
284   if (dirichlet) *dirichlet = temp_dirichlet;
285   if (neumann) *neumann = temp_neumann;
286   ierr = PetscFree(indices);CHKERRQ(ierr);
287   ierr = PetscFree(touched);CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
ComputeMapping(DomainData dd,ISLocalToGlobalMapping * isg2lmap)291 static PetscErrorCode ComputeMapping(DomainData dd,ISLocalToGlobalMapping *isg2lmap)
292 {
293   PetscErrorCode         ierr;
294   DM                     da;
295   AO                     ao;
296   DMBoundaryType         bx = DM_BOUNDARY_NONE,by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
297   DMDAStencilType        stype = DMDA_STENCIL_BOX;
298   ISLocalToGlobalMapping temp_isg2lmap;
299   PetscInt               i,j,k,ig,jg,kg,lindex,gindex,localsize;
300   PetscInt               *global_indices;
301 
302   PetscFunctionBeginUser;
303   /* Not an efficient mapping: this function computes a very simple lexicographic mapping
304      just to illustrate the creation of a MATIS object */
305   localsize = dd.xm_l*dd.ym_l*dd.zm_l;
306   ierr      = PetscMalloc1(localsize,&global_indices);CHKERRQ(ierr);
307   for (k=0; k<dd.zm_l; k++) {
308     kg=dd.startz+k;
309     for (j=0; j<dd.ym_l; j++) {
310       jg=dd.starty+j;
311       for (i=0; i<dd.xm_l; i++) {
312         ig                    =dd.startx+i;
313         lindex                =k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
314         gindex                =kg*dd.xm*dd.ym+jg*dd.xm+ig;
315         global_indices[lindex]=gindex;
316       }
317     }
318   }
319   if (dd.dim==3) {
320     ierr = DMDACreate3d(dd.gcomm,bx,by,bz,stype,dd.xm,dd.ym,dd.zm,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,NULL,&da);CHKERRQ(ierr);
321   } else if (dd.dim==2) {
322     ierr = DMDACreate2d(dd.gcomm,bx,by,stype,dd.xm,dd.ym,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&da);CHKERRQ(ierr);
323   } else {
324     ierr = DMDACreate1d(dd.gcomm,bx,dd.xm,1,1,NULL,&da);CHKERRQ(ierr);
325   }
326   ierr = DMSetFromOptions(da);CHKERRQ(ierr);
327   ierr = DMSetUp(da);CHKERRQ(ierr);
328   ierr = DMDASetAOType(da,AOMEMORYSCALABLE);CHKERRQ(ierr);
329   ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
330   ierr = AOApplicationToPetsc(ao,dd.xm_l*dd.ym_l*dd.zm_l,global_indices);CHKERRQ(ierr);
331   ierr = ISLocalToGlobalMappingCreate(dd.gcomm,1,localsize,global_indices,PETSC_OWN_POINTER,&temp_isg2lmap);CHKERRQ(ierr);
332   ierr = DMDestroy(&da);CHKERRQ(ierr);
333   *isg2lmap = temp_isg2lmap;
334   PetscFunctionReturn(0);
335 }
ComputeSubdomainMatrix(DomainData dd,GLLData glldata,Mat * local_mat)336 static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
337 {
338   PetscErrorCode ierr;
339   PetscInt       localsize,zloc,yloc,xloc,auxnex,auxney,auxnez;
340   PetscInt       ie,je,ke,i,j,k,ig,jg,kg,ii,ming;
341   PetscInt       *indexg,*cols,*colsg;
342   PetscScalar    *vals;
343   Mat            temp_local_mat,elem_mat_DBC=0,*usedmat;
344   IS             submatIS;
345 
346   PetscFunctionBeginUser;
347   ierr = MatGetSize(glldata.elem_mat,&i,&j);CHKERRQ(ierr);
348   ierr = PetscMalloc1(i,&indexg);CHKERRQ(ierr);
349   ierr = PetscMalloc1(i,&colsg);CHKERRQ(ierr);
350   /* get submatrix of elem_mat without dirichlet nodes */
351   if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
352     xloc = dd.p+1;
353     yloc = 1;
354     zloc = 1;
355     if (dd.dim>1) yloc = dd.p+1;
356     if (dd.dim>2) zloc = dd.p+1;
357     ii = 0;
358     for (k=0;k<zloc;k++) {
359       for (j=0;j<yloc;j++) {
360         for (i=1;i<xloc;i++) {
361           indexg[ii]=k*xloc*yloc+j*xloc+i;
362           ii++;
363         }
364       }
365     }
366     ierr = ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);CHKERRQ(ierr);
367     ierr = MatCreateSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);CHKERRQ(ierr);
368     ierr = ISDestroy(&submatIS);CHKERRQ(ierr);
369   }
370 
371   /* Assemble subdomain matrix */
372   localsize = dd.xm_l*dd.ym_l*dd.zm_l;
373   ierr      = MatCreate(PETSC_COMM_SELF,&temp_local_mat);CHKERRQ(ierr);
374   ierr      = MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);CHKERRQ(ierr);
375   ierr      = MatSetOptionsPrefix(temp_local_mat,"subdomain_");CHKERRQ(ierr);
376   /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
377   /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
378   if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
379     ierr = MatSetType(temp_local_mat,MATSEQAIJ);CHKERRQ(ierr);
380   } else {
381     ierr = MatSetType(temp_local_mat,MATSEQSBAIJ);CHKERRQ(ierr);
382   }
383   ierr = MatSetFromOptions(temp_local_mat);CHKERRQ(ierr);
384 
385   i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim);
386 
387   ierr = MatSeqAIJSetPreallocation(temp_local_mat,i,NULL);CHKERRQ(ierr);      /* very overestimated */
388   ierr = MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL);CHKERRQ(ierr);      /* very overestimated */
389   ierr = MatSeqBAIJSetPreallocation(temp_local_mat,1,i,NULL);CHKERRQ(ierr);      /* very overestimated */
390   ierr = MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
391 
392   yloc = dd.p+1;
393   zloc = dd.p+1;
394   if (dd.dim < 3) zloc = 1;
395   if (dd.dim < 2) yloc = 1;
396 
397   auxnez = dd.nez_l;
398   auxney = dd.ney_l;
399   auxnex = dd.nex_l;
400   if (dd.dim < 3) auxnez = 1;
401   if (dd.dim < 2) auxney = 1;
402 
403   for (ke=0; ke<auxnez; ke++) {
404     for (je=0; je<auxney; je++) {
405       for (ie=0; ie<auxnex; ie++) {
406         /* customize element accounting for BC */
407         xloc    = dd.p+1;
408         ming    = 0;
409         usedmat = &glldata.elem_mat;
410         if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
411           if (ie == 0) {
412             xloc    = dd.p;
413             usedmat = &elem_mat_DBC;
414           } else {
415             ming    = -1;
416             usedmat = &glldata.elem_mat;
417           }
418         }
419         /* local to the element/global to the subdomain indexing */
420         for (k=0; k<zloc; k++) {
421           kg = ke*dd.p+k;
422           for (j=0; j<yloc; j++) {
423             jg = je*dd.p+j;
424             for (i=0; i<xloc; i++) {
425               ig         = ie*dd.p+i+ming;
426               ii         = k*xloc*yloc+j*xloc+i;
427               indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig;
428             }
429           }
430         }
431         /* Set values */
432         for (i=0; i<xloc*yloc*zloc; i++) {
433           ierr = MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr);
434           for (k=0; k<j; k++) colsg[k] = indexg[cols[k]];
435           ierr = MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);CHKERRQ(ierr);
436           ierr = MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr);
437         }
438       }
439     }
440   }
441   ierr = PetscFree(indexg);CHKERRQ(ierr);
442   ierr = PetscFree(colsg);CHKERRQ(ierr);
443   ierr = MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
444   ierr = MatAssemblyEnd  (temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
445 #if DEBUG
446   {
447     Vec       lvec,rvec;
448     PetscReal norm;
449     ierr = MatCreateVecs(temp_local_mat,&lvec,&rvec);CHKERRQ(ierr);
450     ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
451     ierr = MatMult(temp_local_mat,lvec,rvec);CHKERRQ(ierr);
452     ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
453     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
454     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
455   }
456 #endif
457   *local_mat = temp_local_mat;
458   ierr       = MatDestroy(&elem_mat_DBC);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
GLLStuffs(DomainData dd,GLLData * glldata)462 static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
463 {
464   PetscErrorCode ierr;
465   PetscReal      *M,si;
466   PetscScalar    x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk;
467   PetscBLASInt   pm1,lierr;
468   PetscInt       i,j,n,k,s,r,q,ii,jj,p=dd.p;
469   PetscInt       xloc,yloc,zloc,xyloc,xyzloc;
470 
471   PetscFunctionBeginUser;
472   /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
473   ierr = PetscCalloc1(p+1,&glldata->zGL);CHKERRQ(ierr);
474 
475   glldata->zGL[0]=-1.0;
476   glldata->zGL[p]= 1.0;
477   if (p > 1) {
478     if (p == 2) glldata->zGL[1]=0.0;
479     else {
480       ierr = PetscMalloc1(p-1,&M);CHKERRQ(ierr);
481       for (i=0; i<p-1; i++) {
482         si  = (PetscReal)(i+1.0);
483         M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5)));
484       }
485       pm1  = p-1;
486       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
487       PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr));
488       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in STERF Lapack routine %d",(int)lierr);
489       ierr = PetscFPTrapPop();CHKERRQ(ierr);
490       ierr = PetscFree(M);CHKERRQ(ierr);
491     }
492   }
493 
494   /* Weights for 1D quadrature */
495   ierr = PetscMalloc1(p+1,&glldata->rhoGL);CHKERRQ(ierr);
496 
497   glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0));
498   glldata->rhoGL[p]=glldata->rhoGL[0];
499   z2 = -1;                      /* Dummy value to avoid -Wmaybe-initialized */
500   for (i=1; i<p; i++) {
501     x  = glldata->zGL[i];
502     z0 = 1.0;
503     z1 = x;
504     for (n=1; n<p; n++) {
505       z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
506       z0 = z1;
507       z1 = z2;
508     }
509     glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2);
510   }
511 
512   /* Auxiliary mat for laplacian */
513   ierr = PetscMalloc1(p+1,&glldata->A);CHKERRQ(ierr);
514   ierr = PetscMalloc1((p+1)*(p+1),&glldata->A[0]);CHKERRQ(ierr);
515   for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1;
516 
517   for (j=1; j<p; j++) {
518     x =glldata->zGL[j];
519     z0=1.0;
520     z1=x;
521     for (n=1; n<p; n++) {
522       z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
523       z0=z1;
524       z1=z2;
525     }
526     Lpj=z2;
527     for (r=1; r<p; r++) {
528       if (r == j) {
529         glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj);
530       } else {
531         x  = glldata->zGL[r];
532         z0 = 1.0;
533         z1 = x;
534         for (n=1; n<p; n++) {
535           z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
536           z0=z1;
537           z1=z2;
538         }
539         Lpr             = z2;
540         glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r]));
541       }
542     }
543   }
544   for (j=1; j<p+1; j++) {
545     x  = glldata->zGL[j];
546     z0 = 1.0;
547     z1 = x;
548     for (n=1; n<p; n++) {
549       z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
550       z0=z1;
551       z1=z2;
552     }
553     Lpj             = z2;
554     glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j]));
555     glldata->A[0][j]=glldata->A[j][0];
556   }
557   for (j=0; j<p; j++) {
558     x  = glldata->zGL[j];
559     z0 = 1.0;
560     z1 = x;
561     for (n=1; n<p; n++) {
562       z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
563       z0=z1;
564       z1=z2;
565     }
566     Lpj=z2;
567 
568     glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j]));
569     glldata->A[j][p]=glldata->A[p][j];
570   }
571   glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0;
572   glldata->A[p][p]=glldata->A[0][0];
573 
574   /* compute element matrix */
575   xloc = p+1;
576   yloc = p+1;
577   zloc = p+1;
578   if (dd.dim<2) yloc=1;
579   if (dd.dim<3) zloc=1;
580   xyloc  = xloc*yloc;
581   xyzloc = xloc*yloc*zloc;
582 
583   ierr = MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);CHKERRQ(ierr);
584   ierr = MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);CHKERRQ(ierr);
585   ierr = MatSetType(glldata->elem_mat,MATSEQAIJ);CHKERRQ(ierr);
586   ierr = MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL);CHKERRQ(ierr); /* overestimated */
587   ierr = MatZeroEntries(glldata->elem_mat);CHKERRQ(ierr);
588   ierr = MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
589 
590   for (k=0; k<zloc; k++) {
591     if (dd.dim>2) rhoGLk=glldata->rhoGL[k];
592     else rhoGLk=1.0;
593 
594     for (j=0; j<yloc; j++) {
595       if (dd.dim>1) rhoGLj=glldata->rhoGL[j];
596       else rhoGLj=1.0;
597 
598       for (i=0; i<xloc; i++) {
599         ii = k*xyloc+j*xloc+i;
600         s  = k;
601         r  = j;
602         for (q=0; q<xloc; q++) {
603           jj   = s*xyloc+r*xloc+q;
604           ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);CHKERRQ(ierr);
605         }
606         if (dd.dim>1) {
607           s=k;
608           q=i;
609           for (r=0; r<yloc; r++) {
610             jj   = s*xyloc+r*xloc+q;
611             ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);CHKERRQ(ierr);
612           }
613         }
614         if (dd.dim>2) {
615           r=j;
616           q=i;
617           for (s=0; s<zloc; s++) {
618             jj   = s*xyloc+r*xloc+q;
619             ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);CHKERRQ(ierr);
620           }
621         }
622       }
623     }
624   }
625   ierr = MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
626   ierr = MatAssemblyEnd  (glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
627 #if DEBUG
628   {
629     Vec       lvec,rvec;
630     PetscReal norm;
631     ierr = MatCreateVecs(glldata->elem_mat,&lvec,&rvec);CHKERRQ(ierr);
632     ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
633     ierr = MatMult(glldata->elem_mat,lvec,rvec);CHKERRQ(ierr);
634     ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
635     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
636     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
637   }
638 #endif
639   PetscFunctionReturn(0);
640 }
641 
DomainDecomposition(DomainData * dd)642 static PetscErrorCode DomainDecomposition(DomainData *dd)
643 {
644   PetscMPIInt rank;
645   PetscInt    i,j,k;
646 
647   PetscFunctionBeginUser;
648   /* Subdomain index in cartesian coordinates */
649   MPI_Comm_rank(dd->gcomm,&rank);
650   dd->ipx = rank%dd->npx;
651   if (dd->dim>1) dd->ipz = rank/(dd->npx*dd->npy);
652   else dd->ipz = 0;
653 
654   dd->ipy = rank/dd->npx-dd->ipz*dd->npy;
655   /* number of local elements */
656   dd->nex_l = dd->nex/dd->npx;
657   if (dd->ipx < dd->nex%dd->npx) dd->nex_l++;
658   if (dd->dim>1) {
659     dd->ney_l = dd->ney/dd->npy;
660     if (dd->ipy < dd->ney%dd->npy) dd->ney_l++;
661   } else {
662     dd->ney_l = 0;
663   }
664   if (dd->dim>2) {
665     dd->nez_l = dd->nez/dd->npz;
666     if (dd->ipz < dd->nez%dd->npz) dd->nez_l++;
667   } else {
668     dd->nez_l = 0;
669   }
670   /* local and global number of dofs */
671   dd->xm_l = dd->nex_l*dd->p+1;
672   dd->xm   = dd->nex*dd->p+1;
673   dd->ym_l = dd->ney_l*dd->p+1;
674   dd->ym   = dd->ney*dd->p+1;
675   dd->zm_l = dd->nez_l*dd->p+1;
676   dd->zm   = dd->nez*dd->p+1;
677   if (!dd->pure_neumann) {
678     if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
679     if (!dd->DBC_zerorows) dd->xm--;
680   }
681 
682   /* starting global index for local dofs (simple lexicographic order) */
683   dd->startx = 0;
684   j          = dd->nex/dd->npx;
685   for (i=0; i<dd->ipx; i++) {
686     k = j;
687     if (i<dd->nex%dd->npx) k++;
688     dd->startx = dd->startx+k*dd->p;
689   }
690   if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
691 
692   dd->starty = 0;
693   if (dd->dim > 1) {
694     j = dd->ney/dd->npy;
695     for (i=0; i<dd->ipy; i++) {
696       k = j;
697       if (i<dd->ney%dd->npy) k++;
698       dd->starty = dd->starty+k*dd->p;
699     }
700   }
701   dd->startz = 0;
702   if (dd->dim > 2) {
703     j = dd->nez/dd->npz;
704     for (i=0; i<dd->ipz; i++) {
705       k = j;
706       if (i<dd->nez%dd->npz) k++;
707       dd->startz = dd->startz+k*dd->p;
708     }
709   }
710   PetscFunctionReturn(0);
711 }
ComputeMatrix(DomainData dd,Mat * A)712 static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
713 {
714   PetscErrorCode         ierr;
715   GLLData                gll;
716   Mat                    local_mat  =0,temp_A=0;
717   ISLocalToGlobalMapping matis_map  =0;
718   IS                     dirichletIS=0;
719 
720   PetscFunctionBeginUser;
721   /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
722   ierr = GLLStuffs(dd,&gll);CHKERRQ(ierr);
723   /* Compute matrix of subdomain Neumann problem */
724   ierr = ComputeSubdomainMatrix(dd,gll,&local_mat);CHKERRQ(ierr);
725   /* Compute global mapping of local dofs */
726   ierr = ComputeMapping(dd,&matis_map);CHKERRQ(ierr);
727   /* Create MATIS object needed by BDDC */
728   ierr = MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,NULL,&temp_A);CHKERRQ(ierr);
729   /* Set local subdomain matrices into MATIS object */
730   ierr = MatScale(local_mat,dd.scalingfactor);CHKERRQ(ierr);
731   ierr = MatISSetLocalMat(temp_A,local_mat);CHKERRQ(ierr);
732   /* Call assembly functions */
733   ierr = MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
734   ierr = MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
735 
736   if (dd.DBC_zerorows) {
737     PetscInt dirsize;
738 
739     ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);CHKERRQ(ierr);
740     ierr = MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
741     ierr = MatZeroRowsColumnsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);CHKERRQ(ierr);
742     ierr = ISGetLocalSize(dirichletIS,&dirsize);CHKERRQ(ierr);
743     ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr);
744   }
745 
746   /* giving hints to local and global matrices could be useful for the BDDC */
747   ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
748   ierr = MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
749 #if DEBUG
750   {
751     Vec       lvec,rvec;
752     PetscReal norm;
753     ierr = MatCreateVecs(temp_A,&lvec,&rvec);CHKERRQ(ierr);
754     ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
755     ierr = MatMult(temp_A,lvec,rvec);CHKERRQ(ierr);
756     ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
757     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
758     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
759   }
760 #endif
761   /* free allocated workspace */
762   ierr = PetscFree(gll.zGL);CHKERRQ(ierr);
763   ierr = PetscFree(gll.rhoGL);CHKERRQ(ierr);
764   ierr = PetscFree(gll.A[0]);CHKERRQ(ierr);
765   ierr = PetscFree(gll.A);CHKERRQ(ierr);
766   ierr = MatDestroy(&gll.elem_mat);CHKERRQ(ierr);
767   ierr = MatDestroy(&local_mat);CHKERRQ(ierr);
768   ierr = ISLocalToGlobalMappingDestroy(&matis_map);CHKERRQ(ierr);
769   /* give back the pointer to te MATIS object */
770   *A = temp_A;
771   PetscFunctionReturn(0);
772 }
773 
ComputeKSPFETIDP(DomainData dd,KSP ksp_bddc,KSP * ksp_fetidp)774 static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
775 {
776   PetscErrorCode ierr;
777   KSP            temp_ksp;
778   PC             pc;
779 
780   PetscFunctionBeginUser;
781   ierr = KSPGetPC(ksp_bddc,&pc);CHKERRQ(ierr);
782   if (!dd.testkspfetidp) {
783     PC  D;
784     Mat F;
785 
786     ierr = PCBDDCCreateFETIDPOperators(pc,PETSC_TRUE,NULL,&F,&D);CHKERRQ(ierr);
787     ierr = KSPCreate(PetscObjectComm((PetscObject)F),&temp_ksp);CHKERRQ(ierr);
788     ierr = KSPSetOperators(temp_ksp,F,F);CHKERRQ(ierr);
789     ierr = KSPSetType(temp_ksp,KSPCG);CHKERRQ(ierr);
790     ierr = KSPSetPC(temp_ksp,D);CHKERRQ(ierr);
791     ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr);
792     ierr = KSPSetOptionsPrefix(temp_ksp,"fluxes_");CHKERRQ(ierr);
793     ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr);
794     ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr);
795     ierr = MatDestroy(&F);CHKERRQ(ierr);
796     ierr = PCDestroy(&D);CHKERRQ(ierr);
797   } else {
798     Mat A,Ap;
799 
800     ierr = KSPCreate(PetscObjectComm((PetscObject)ksp_bddc),&temp_ksp);CHKERRQ(ierr);
801     ierr = KSPGetOperators(ksp_bddc,&A,&Ap);CHKERRQ(ierr);
802     ierr = KSPSetOperators(temp_ksp,A,Ap);CHKERRQ(ierr);
803     ierr = KSPSetOptionsPrefix(temp_ksp,"fluxes_");CHKERRQ(ierr);
804     ierr = KSPSetType(temp_ksp,KSPFETIDP);CHKERRQ(ierr);
805     ierr = KSPFETIDPSetInnerBDDC(temp_ksp,pc);CHKERRQ(ierr);
806     ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr);
807     ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr);
808     ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr);
809   }
810   *ksp_fetidp = temp_ksp;
811   PetscFunctionReturn(0);
812 }
813 
814 
ComputeKSPBDDC(DomainData dd,Mat A,KSP * ksp)815 static PetscErrorCode ComputeKSPBDDC(DomainData dd,Mat A,KSP *ksp)
816 {
817   PetscErrorCode ierr;
818   KSP            temp_ksp;
819   PC             pc;
820   IS             primals,dirichletIS=0,neumannIS=0,*bddc_dofs_splitting;
821   PetscInt       vidx[8],localsize,*xadj=NULL,*adjncy=NULL;
822   MatNullSpace   near_null_space;
823 
824   PetscFunctionBeginUser;
825   ierr = KSPCreate(dd.gcomm,&temp_ksp);CHKERRQ(ierr);
826   ierr = KSPSetOperators(temp_ksp,A,A);CHKERRQ(ierr);
827   ierr = KSPSetType(temp_ksp,KSPCG);CHKERRQ(ierr);
828   ierr = KSPGetPC(temp_ksp,&pc);CHKERRQ(ierr);
829   ierr = PCSetType(pc,PCBDDC);CHKERRQ(ierr);
830 
831   localsize = dd.xm_l*dd.ym_l*dd.zm_l;
832 
833   /* BDDC customization */
834 
835   /* jumping coefficients case */
836   ierr = PCISSetSubdomainScalingFactor(pc,dd.scalingfactor);CHKERRQ(ierr);
837 
838   /* Dofs splitting
839      Simple stride-1 IS
840      It is not needed since, by default, PCBDDC assumes a stride-1 split */
841   ierr = PetscMalloc1(1,&bddc_dofs_splitting);CHKERRQ(ierr);
842 #if 1
843   ierr = ISCreateStride(PETSC_COMM_WORLD,localsize,0,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr);
844   ierr = PCBDDCSetDofsSplittingLocal(pc,1,bddc_dofs_splitting);CHKERRQ(ierr);
845 #else
846   /* examples for global ordering */
847 
848   /* each process lists the nodes it owns */
849   PetscInt sr,er;
850   ierr = MatGetOwnershipRange(A,&sr,&er);CHKERRQ(ierr);
851   ierr = ISCreateStride(PETSC_COMM_WORLD,er-sr,sr,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr);
852   ierr = PCBDDCSetDofsSplitting(pc,1,bddc_dofs_splitting);CHKERRQ(ierr);
853   /* Split can be passed in a more general way since any process can list any node */
854 #endif
855   ierr = ISDestroy(&bddc_dofs_splitting[0]);CHKERRQ(ierr);
856   ierr = PetscFree(bddc_dofs_splitting);CHKERRQ(ierr);
857 
858   /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
859     (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
860 #if 0
861   Vec vecs[2];
862   PetscRandom rctx;
863   ierr = MatCreateVecs(A,&vecs[0],&vecs[1]);CHKERRQ(ierr);
864   ierr = PetscRandomCreate(dd.gcomm,&rctx);CHKERRQ(ierr);
865   ierr = VecSetRandom(vecs[0],rctx);CHKERRQ(ierr);
866   ierr = VecSetRandom(vecs[1],rctx);CHKERRQ(ierr);
867   ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);CHKERRQ(ierr);
868   ierr = VecDestroy(&vecs[0]);CHKERRQ(ierr);
869   ierr = VecDestroy(&vecs[1]);CHKERRQ(ierr);
870   ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
871 #else
872   ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,0,NULL,&near_null_space);CHKERRQ(ierr);
873 #endif
874   ierr = MatSetNearNullSpace(A,near_null_space);CHKERRQ(ierr);
875   ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr);
876 
877   /* CSR graph of subdomain dofs */
878   ierr = BuildCSRGraph(dd,&xadj,&adjncy);CHKERRQ(ierr);
879   ierr = PCBDDCSetLocalAdjacencyGraph(pc,localsize,xadj,adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
880 
881   /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
882   vidx[0] = 0*dd.xm_l+0;
883   vidx[1] = 0*dd.xm_l+dd.xm_l-1;
884   vidx[2] = (dd.ym_l-1)*dd.xm_l+0;
885   vidx[3] = (dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
886   vidx[4] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+0;
887   vidx[5] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+dd.xm_l-1;
888   vidx[6] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+0;
889   vidx[7] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
890   ierr = ISCreateGeneral(dd.gcomm,8,vidx,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr);
891   ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr);
892   ierr = ISDestroy(&primals);CHKERRQ(ierr);
893 
894   /* Neumann/Dirichlet indices on the global boundary */
895   if (dd.DBC_zerorows) {
896     /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
897     ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr);
898     ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
899     ierr = PCBDDCSetDirichletBoundariesLocal(pc,dirichletIS);CHKERRQ(ierr);
900   } else {
901     if (dd.pure_neumann) {
902       /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
903       ierr = ComputeSpecialBoundaryIndices(dd,NULL,&neumannIS);CHKERRQ(ierr);
904       ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
905     } else {
906       /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
907       /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
908       ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr);
909       ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
910     }
911   }
912 
913   /* Pass local null space information to local matrices (needed when using approximate local solvers) */
914   if (dd.ipx || dd.pure_neumann) {
915     MatNullSpace nsp;
916     Mat          local_mat;
917 
918     ierr = MatISGetLocalMat(A,&local_mat);CHKERRQ(ierr);
919     ierr = MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nsp);CHKERRQ(ierr);
920     ierr = MatSetNullSpace(local_mat,nsp);CHKERRQ(ierr);
921     ierr = MatNullSpaceDestroy(&nsp);CHKERRQ(ierr);
922   }
923   ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr);
924   ierr = KSPSetOptionsPrefix(temp_ksp,"physical_");CHKERRQ(ierr);
925   ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr);
926   ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr);
927   *ksp = temp_ksp;
928   ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr);
929   ierr = ISDestroy(&neumannIS);CHKERRQ(ierr);
930   PetscFunctionReturn(0);
931 }
932 
InitializeDomainData(DomainData * dd)933 static PetscErrorCode InitializeDomainData(DomainData *dd)
934 {
935   PetscErrorCode ierr;
936   PetscMPIInt    sizes,rank;
937   PetscInt       factor;
938 
939   PetscFunctionBeginUser;
940   dd->gcomm = PETSC_COMM_WORLD;
941   ierr      = MPI_Comm_size(dd->gcomm,&sizes);CHKERRQ(ierr);
942   ierr      = MPI_Comm_rank(dd->gcomm,&rank);CHKERRQ(ierr);
943   /* Get informations from command line */
944   /* Processors/subdomains per dimension */
945   /* Default is 1d problem */
946   dd->npx = sizes;
947   dd->npy = 0;
948   dd->npz = 0;
949   dd->dim = 1;
950   ierr    = PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);CHKERRQ(ierr);
951   ierr    = PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);CHKERRQ(ierr);
952   ierr    = PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);CHKERRQ(ierr);
953   if (dd->npy) dd->dim++;
954   if (dd->npz) dd->dim++;
955   /* Number of elements per dimension */
956   /* Default is one element per subdomain */
957   dd->nex = dd->npx;
958   dd->ney = dd->npy;
959   dd->nez = dd->npz;
960   ierr    = PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);CHKERRQ(ierr);
961   ierr    = PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);CHKERRQ(ierr);
962   ierr    = PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);CHKERRQ(ierr);
963   if (!dd->npy) {
964     dd->ney=0;
965     dd->nez=0;
966   }
967   if (!dd->npz) dd->nez=0;
968   /* Spectral degree */
969   dd->p = 3;
970   ierr  = PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);CHKERRQ(ierr);
971   /* pure neumann problem? */
972   dd->pure_neumann = PETSC_FALSE;
973   ierr             = PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);CHKERRQ(ierr);
974 
975   /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
976   dd->DBC_zerorows = PETSC_FALSE;
977 
978   ierr = PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);CHKERRQ(ierr);
979   if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
980   dd->scalingfactor = 1.0;
981 
982   factor = 0.0;
983   ierr   = PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);CHKERRQ(ierr);
984   /* checkerboard pattern */
985   dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank));
986   /* test data passed in */
987   if (dd->dim==1) {
988     if (sizes!=dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 1D must be equal to npx");
989     if (dd->nex<dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
990   } else if (dd->dim==2) {
991     if (sizes!=dd->npx*dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 2D must be equal to npx*npy");
992     if (dd->nex<dd->npx || dd->ney<dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
993   } else {
994     if (sizes!=dd->npx*dd->npy*dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 3D must be equal to npx*npy*npz");
995     if (dd->nex<dd->npx || dd->ney<dd->npy || dd->nez<dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
996   }
997   PetscFunctionReturn(0);
998 }
999 
main(int argc,char ** args)1000 int main(int argc,char **args)
1001 {
1002   PetscErrorCode     ierr;
1003   DomainData         dd;
1004   PetscReal          norm,maxeig,mineig;
1005   PetscScalar        scalar_value;
1006   PetscInt           ndofs,its;
1007   Mat                A = NULL,F = NULL;
1008   KSP                KSPwithBDDC = NULL,KSPwithFETIDP = NULL;
1009   KSPConvergedReason reason;
1010   Vec                exact_solution = NULL,bddc_solution = NULL,bddc_rhs = NULL;
1011   PetscBool          testfetidp = PETSC_TRUE;
1012 
1013   /* Init PETSc */
1014   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
1015   /* Initialize DomainData */
1016   ierr = InitializeDomainData(&dd);CHKERRQ(ierr);
1017   /* Decompose domain */
1018   ierr = DomainDecomposition(&dd);CHKERRQ(ierr);
1019 #if DEBUG
1020   printf("Subdomain data\n");
1021   printf("IPS   : %d %d %d\n",dd.ipx,dd.ipy,dd.ipz);
1022   printf("NEG   : %d %d %d\n",dd.nex,dd.ney,dd.nez);
1023   printf("NEL   : %d %d %d\n",dd.nex_l,dd.ney_l,dd.nez_l);
1024   printf("LDO   : %d %d %d\n",dd.xm_l,dd.ym_l,dd.zm_l);
1025   printf("SIZES : %d %d %d\n",dd.xm,dd.ym,dd.zm);
1026   printf("STARTS: %d %d %d\n",dd.startx,dd.starty,dd.startz);
1027 #endif
1028   dd.testkspfetidp = PETSC_TRUE;
1029   ierr = PetscOptionsGetBool(NULL,NULL,"-testfetidp",&testfetidp,NULL);CHKERRQ(ierr);
1030   ierr = PetscOptionsGetBool(NULL,NULL,"-testkspfetidp",&dd.testkspfetidp,NULL);CHKERRQ(ierr);
1031   /* assemble global matrix */
1032   ierr = ComputeMatrix(dd,&A);CHKERRQ(ierr);
1033   /* get work vectors */
1034   ierr = MatCreateVecs(A,&bddc_solution,NULL);CHKERRQ(ierr);
1035   ierr = VecDuplicate(bddc_solution,&bddc_rhs);CHKERRQ(ierr);
1036   ierr = VecDuplicate(bddc_solution,&exact_solution);CHKERRQ(ierr);
1037   /* create and customize KSP/PC for BDDC */
1038   ierr = ComputeKSPBDDC(dd,A,&KSPwithBDDC);CHKERRQ(ierr);
1039   /* create KSP/PC for FETIDP */
1040   if (testfetidp) {
1041     ierr = ComputeKSPFETIDP(dd,KSPwithBDDC,&KSPwithFETIDP);CHKERRQ(ierr);
1042   }
1043   /* create random exact solution */
1044 #if defined(PETSC_USE_COMPLEX)
1045   ierr = VecSet(exact_solution,1.0 + PETSC_i);CHKERRQ(ierr);
1046 #else
1047   ierr = VecSetRandom(exact_solution,NULL);CHKERRQ(ierr);
1048 #endif
1049   ierr = VecShift(exact_solution,-0.5);CHKERRQ(ierr);
1050   ierr = VecScale(exact_solution,100.0);CHKERRQ(ierr);
1051   ierr = VecGetSize(exact_solution,&ndofs);CHKERRQ(ierr);
1052   if (dd.pure_neumann) {
1053     ierr = VecSum(exact_solution,&scalar_value);CHKERRQ(ierr);
1054     scalar_value = -scalar_value/(PetscScalar)ndofs;
1055     ierr = VecShift(exact_solution,scalar_value);CHKERRQ(ierr);
1056   }
1057   /* assemble BDDC rhs */
1058   ierr = MatMult(A,exact_solution,bddc_rhs);CHKERRQ(ierr);
1059   /* test ksp with BDDC */
1060   ierr = KSPSolve(KSPwithBDDC,bddc_rhs,bddc_solution);CHKERRQ(ierr);
1061   ierr = KSPGetIterationNumber(KSPwithBDDC,&its);CHKERRQ(ierr);
1062   ierr = KSPGetConvergedReason(KSPwithBDDC,&reason);CHKERRQ(ierr);
1063   ierr = KSPComputeExtremeSingularValues(KSPwithBDDC,&maxeig,&mineig);CHKERRQ(ierr);
1064   if (dd.pure_neumann) {
1065     ierr = VecSum(bddc_solution,&scalar_value);CHKERRQ(ierr);
1066     scalar_value = -scalar_value/(PetscScalar)ndofs;
1067     ierr = VecShift(bddc_solution,scalar_value);CHKERRQ(ierr);
1068   }
1069   /* check exact_solution and BDDC solultion */
1070   ierr = VecAXPY(bddc_solution,-1.0,exact_solution);CHKERRQ(ierr);
1071   ierr = VecNorm(bddc_solution,NORM_INFINITY,&norm);CHKERRQ(ierr);
1072   ierr = PetscPrintf(dd.gcomm,"---------------------BDDC stats-------------------------------\n");CHKERRQ(ierr);
1073   ierr = PetscPrintf(dd.gcomm,"Number of degrees of freedom               : %8D\n",ndofs);CHKERRQ(ierr);
1074   if (reason < 0) {
1075     ierr = PetscPrintf(dd.gcomm,"Number of iterations                       : %8D\n",its);CHKERRQ(ierr);
1076     ierr = PetscPrintf(dd.gcomm,"Converged reason                           : %s\n",KSPConvergedReasons[reason]);CHKERRQ(ierr);
1077   }
1078   if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1079   ierr = PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator        : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);CHKERRQ(ierr);
1080   if (norm > 1.e-1 || reason < 0) {
1081     ierr = PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);CHKERRQ(ierr);
1082   }
1083   ierr = PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1084   if (testfetidp) {
1085     Vec fetidp_solution_all = NULL,fetidp_solution = NULL,fetidp_rhs = NULL;
1086 
1087     ierr = VecDuplicate(bddc_solution,&fetidp_solution_all);CHKERRQ(ierr);
1088     if (!dd.testkspfetidp) {
1089       /* assemble fetidp rhs on the space of Lagrange multipliers */
1090       ierr = KSPGetOperators(KSPwithFETIDP,&F,NULL);CHKERRQ(ierr);
1091       ierr = MatCreateVecs(F,&fetidp_solution,&fetidp_rhs);CHKERRQ(ierr);
1092       ierr = PCBDDCMatFETIDPGetRHS(F,bddc_rhs,fetidp_rhs);CHKERRQ(ierr);
1093       ierr = VecSet(fetidp_solution,0.0);CHKERRQ(ierr);
1094       /* test ksp with FETIDP */
1095       ierr = KSPSolve(KSPwithFETIDP,fetidp_rhs,fetidp_solution);CHKERRQ(ierr);
1096       /* assemble fetidp solution on physical domain */
1097       ierr = PCBDDCMatFETIDPGetSolution(F,fetidp_solution,fetidp_solution_all);CHKERRQ(ierr);
1098     } else {
1099       KSP kspF;
1100       ierr = KSPSolve(KSPwithFETIDP,bddc_rhs,fetidp_solution_all);CHKERRQ(ierr);
1101       ierr = KSPFETIDPGetInnerKSP(KSPwithFETIDP,&kspF);CHKERRQ(ierr);
1102       ierr = KSPGetOperators(kspF,&F,NULL);CHKERRQ(ierr);
1103     }
1104     ierr = MatGetSize(F,&ndofs,NULL);CHKERRQ(ierr);
1105     ierr = KSPGetIterationNumber(KSPwithFETIDP,&its);CHKERRQ(ierr);
1106     ierr = KSPGetConvergedReason(KSPwithFETIDP,&reason);CHKERRQ(ierr);
1107     ierr = KSPComputeExtremeSingularValues(KSPwithFETIDP,&maxeig,&mineig);CHKERRQ(ierr);
1108     /* check FETIDP sol */
1109     if (dd.pure_neumann) {
1110       ierr = VecSum(fetidp_solution_all,&scalar_value);CHKERRQ(ierr);
1111       scalar_value = -scalar_value/(PetscScalar)ndofs;
1112       ierr = VecShift(fetidp_solution_all,scalar_value);CHKERRQ(ierr);
1113     }
1114     ierr = VecAXPY(fetidp_solution_all,-1.0,exact_solution);CHKERRQ(ierr);
1115     ierr = VecNorm(fetidp_solution_all,NORM_INFINITY,&norm);CHKERRQ(ierr);
1116     ierr = PetscPrintf(dd.gcomm,"------------------FETI-DP stats-------------------------------\n");CHKERRQ(ierr);
1117     ierr = PetscPrintf(dd.gcomm,"Number of degrees of freedom               : %8D\n",ndofs);CHKERRQ(ierr);
1118     if (reason < 0) {
1119       ierr = PetscPrintf(dd.gcomm,"Number of iterations                       : %8D\n",its);CHKERRQ(ierr);
1120       ierr = PetscPrintf(dd.gcomm,"Converged reason                           : %D\n",reason);CHKERRQ(ierr);
1121     }
1122     if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1123     ierr = PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator        : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);CHKERRQ(ierr);
1124     if (norm > 1.e-1 || reason < 0) {
1125       ierr = PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);CHKERRQ(ierr);
1126     }
1127     ierr = PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1128     ierr = VecDestroy(&fetidp_solution);CHKERRQ(ierr);
1129     ierr = VecDestroy(&fetidp_solution_all);CHKERRQ(ierr);
1130     ierr = VecDestroy(&fetidp_rhs);CHKERRQ(ierr);
1131   }
1132   ierr = KSPDestroy(&KSPwithFETIDP);CHKERRQ(ierr);
1133   ierr = VecDestroy(&exact_solution);CHKERRQ(ierr);
1134   ierr = VecDestroy(&bddc_solution);CHKERRQ(ierr);
1135   ierr = VecDestroy(&bddc_rhs);CHKERRQ(ierr);
1136   ierr = MatDestroy(&A);CHKERRQ(ierr);
1137   ierr = KSPDestroy(&KSPwithBDDC);CHKERRQ(ierr);
1138   /* Quit PETSc */
1139   ierr = PetscFinalize();
1140   return ierr;
1141 }
1142 
1143 /*TEST
1144 
1145  testset:
1146    nsize: 4
1147    args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1148    output_file: output/ex59_bddc_fetidp_1.out
1149    test:
1150      suffix: bddc_fetidp_1
1151    test:
1152      requires: viennacl
1153      suffix: bddc_fetidp_1_viennacl
1154      args: -subdomain_mat_type aijviennacl
1155    test:
1156      requires: cuda
1157      suffix: bddc_fetidp_1_cuda
1158      args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1159 
1160  testset:
1161    nsize: 4
1162    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1163    requires: !single
1164    test:
1165      suffix: bddc_fetidp_2
1166    test:
1167      suffix: bddc_fetidp_3
1168      args: -npz 1 -nez 1
1169    test:
1170      suffix: bddc_fetidp_4
1171      args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1172 
1173  testset:
1174    nsize: 8
1175    suffix: bddc_fetidp_approximate
1176    args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1177 
1178  testset:
1179    nsize: 4
1180    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1181    filter: grep -v "variant HERMITIAN"
1182    requires: !single
1183    test:
1184      suffix: bddc_fetidp_ml_1
1185      args: -physical_pc_bddc_coarsening_ratio 1
1186    test:
1187      suffix: bddc_fetidp_ml_2
1188      args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1189    test:
1190      suffix: bddc_fetidp_ml_3
1191      args: -physical_pc_bddc_coarsening_ratio 4
1192 
1193  testset:
1194    nsize: 9
1195    args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1196    output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1197    test:
1198      suffix: bddc_fetidp_ml_eqlimit_1
1199      args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1200    test:
1201      suffix: bddc_fetidp_ml_eqlimit_2
1202      args: -physical_pc_bddc_coarse_eqs_limit 46
1203 
1204 
1205 TEST*/
1206