1 static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2 Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3 Spectral degree can be specified by passing values to -p option.\n\
4 Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5 Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6 Exaple usage: \n\
7 1D: mpiexec -n 4 ex59 -nex 7\n\
8 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10 Subdomain decomposition can be specified with -np_ parameters.\n\
11 Dirichlet boundaries on one side by default:\n\
12 it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13 Pure Neumann case can be requested by passing in -pureneumann.\n\
14 In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
15
16 #include <petscksp.h>
17 #include <petscpc.h>
18 #include <petscdm.h>
19 #include <petscdmda.h>
20 #include <petscblaslapack.h>
21 #define DEBUG 0
22
23 /* structure holding domain data */
24 typedef struct {
25 /* communicator */
26 MPI_Comm gcomm;
27 /* space dimension */
28 PetscInt dim;
29 /* spectral degree */
30 PetscInt p;
31 /* subdomains per dimension */
32 PetscInt npx,npy,npz;
33 /* subdomain index in cartesian dimensions */
34 PetscInt ipx,ipy,ipz;
35 /* elements per dimension */
36 PetscInt nex,ney,nez;
37 /* local elements per dimension */
38 PetscInt nex_l,ney_l,nez_l;
39 /* global number of dofs per dimension */
40 PetscInt xm,ym,zm;
41 /* local number of dofs per dimension */
42 PetscInt xm_l,ym_l,zm_l;
43 /* starting global indexes for subdomain in lexicographic ordering */
44 PetscInt startx,starty,startz;
45 /* pure Neumann problem */
46 PetscBool pure_neumann;
47 /* Dirichlet BC implementation */
48 PetscBool DBC_zerorows;
49 /* Scaling factor for subdomain */
50 PetscScalar scalingfactor;
51 PetscBool testkspfetidp;
52 } DomainData;
53
54 /* structure holding GLL data */
55 typedef struct {
56 /* GLL nodes */
57 PetscReal *zGL;
58 /* GLL weights */
59 PetscScalar *rhoGL;
60 /* aux_mat */
61 PetscScalar **A;
62 /* Element matrix */
63 Mat elem_mat;
64 } GLLData;
65
66
BuildCSRGraph(DomainData dd,PetscInt ** xadj,PetscInt ** adjncy)67 static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
68 {
69 PetscErrorCode ierr;
70 PetscInt *xadj_temp,*adjncy_temp;
71 PetscInt i,j,k,ii,jj,kk,iindex,count_adj;
72 PetscInt istart_csr,iend_csr,jstart_csr,jend_csr,kstart_csr,kend_csr;
73 PetscBool internal_node;
74
75 /* first count dimension of adjncy */
76 count_adj=0;
77 for (k=0; k<dd.zm_l; k++) {
78 internal_node = PETSC_TRUE;
79 kstart_csr = k>0 ? k-1 : k;
80 kend_csr = k<dd.zm_l-1 ? k+2 : k+1;
81 if (k == 0 || k == dd.zm_l-1) {
82 internal_node = PETSC_FALSE;
83 kstart_csr = k;
84 kend_csr = k+1;
85 }
86 for (j=0; j<dd.ym_l; j++) {
87 jstart_csr = j>0 ? j-1 : j;
88 jend_csr = j<dd.ym_l-1 ? j+2 : j+1;
89 if (j == 0 || j == dd.ym_l-1) {
90 internal_node = PETSC_FALSE;
91 jstart_csr = j;
92 jend_csr = j+1;
93 }
94 for (i=0; i<dd.xm_l; i++) {
95 istart_csr = i>0 ? i-1 : i;
96 iend_csr = i<dd.xm_l-1 ? i+2 : i+1;
97 if (i == 0 || i == dd.xm_l-1) {
98 internal_node = PETSC_FALSE;
99 istart_csr = i;
100 iend_csr = i+1;
101 }
102 if (internal_node) {
103 istart_csr = i;
104 iend_csr = i+1;
105 jstart_csr = j;
106 jend_csr = j+1;
107 kstart_csr = k;
108 kend_csr = k+1;
109 }
110 for (kk=kstart_csr;kk<kend_csr;kk++) {
111 for (jj=jstart_csr;jj<jend_csr;jj++) {
112 for (ii=istart_csr;ii<iend_csr;ii++) {
113 count_adj=count_adj+1;
114 }
115 }
116 }
117 }
118 }
119 }
120 ierr = PetscMalloc1(dd.xm_l*dd.ym_l*dd.zm_l+1,&xadj_temp);CHKERRQ(ierr);
121 ierr = PetscMalloc1(count_adj,&adjncy_temp);CHKERRQ(ierr);
122
123 /* now fill CSR data structure */
124 count_adj=0;
125 for (k=0; k<dd.zm_l; k++) {
126 internal_node = PETSC_TRUE;
127 kstart_csr = k>0 ? k-1 : k;
128 kend_csr = k<dd.zm_l-1 ? k+2 : k+1;
129 if (k == 0 || k == dd.zm_l-1) {
130 internal_node = PETSC_FALSE;
131 kstart_csr = k;
132 kend_csr = k+1;
133 }
134 for (j=0; j<dd.ym_l; j++) {
135 jstart_csr = j>0 ? j-1 : j;
136 jend_csr = j<dd.ym_l-1 ? j+2 : j+1;
137 if (j == 0 || j == dd.ym_l-1) {
138 internal_node = PETSC_FALSE;
139 jstart_csr = j;
140 jend_csr = j+1;
141 }
142 for (i=0; i<dd.xm_l; i++) {
143 istart_csr = i>0 ? i-1 : i;
144 iend_csr = i<dd.xm_l-1 ? i+2 : i+1;
145 if (i == 0 || i == dd.xm_l-1) {
146 internal_node = PETSC_FALSE;
147 istart_csr = i;
148 iend_csr = i+1;
149 }
150 iindex = k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
151 xadj_temp[iindex] = count_adj;
152 if (internal_node) {
153 istart_csr = i;
154 iend_csr = i+1;
155 jstart_csr = j;
156 jend_csr = j+1;
157 kstart_csr = k;
158 kend_csr = k+1;
159 }
160 for (kk=kstart_csr; kk<kend_csr; kk++) {
161 for (jj=jstart_csr; jj<jend_csr; jj++) {
162 for (ii=istart_csr; ii<iend_csr; ii++) {
163 iindex = kk*dd.xm_l*dd.ym_l+jj*dd.xm_l+ii;
164
165 adjncy_temp[count_adj] = iindex;
166 count_adj = count_adj+1;
167 }
168 }
169 }
170 }
171 }
172 }
173 xadj_temp[dd.xm_l*dd.ym_l*dd.zm_l] = count_adj;
174
175 *xadj = xadj_temp;
176 *adjncy = adjncy_temp;
177 PetscFunctionReturn(0);
178 }
179
ComputeSpecialBoundaryIndices(DomainData dd,IS * dirichlet,IS * neumann)180 static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd,IS *dirichlet,IS *neumann)
181 {
182 PetscErrorCode ierr;
183 IS temp_dirichlet=0,temp_neumann=0;
184 PetscInt localsize,i,j,k,*indices;
185 PetscBool *touched;
186
187 PetscFunctionBeginUser;
188 localsize = dd.xm_l*dd.ym_l*dd.zm_l;
189
190 ierr = PetscMalloc1(localsize,&indices);CHKERRQ(ierr);
191 ierr = PetscMalloc1(localsize,&touched);CHKERRQ(ierr);
192 for (i=0; i<localsize; i++) touched[i] = PETSC_FALSE;
193
194 if (dirichlet) {
195 i = 0;
196 /* west boundary */
197 if (dd.ipx == 0) {
198 for (k=0;k<dd.zm_l;k++) {
199 for (j=0;j<dd.ym_l;j++) {
200 indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
201 touched[indices[i]]=PETSC_TRUE;
202 i++;
203 }
204 }
205 }
206 ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_dirichlet);CHKERRQ(ierr);
207 }
208 if (neumann) {
209 i = 0;
210 /* west boundary */
211 if (dd.ipx == 0) {
212 for (k=0;k<dd.zm_l;k++) {
213 for (j=0;j<dd.ym_l;j++) {
214 indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l;
215 if (!touched[indices[i]]) {
216 touched[indices[i]]=PETSC_TRUE;
217 i++;
218 }
219 }
220 }
221 }
222 /* east boundary */
223 if (dd.ipx == dd.npx-1) {
224 for (k=0;k<dd.zm_l;k++) {
225 for (j=0;j<dd.ym_l;j++) {
226 indices[i]=k*dd.ym_l*dd.xm_l+j*dd.xm_l+dd.xm_l-1;
227 if (!touched[indices[i]]) {
228 touched[indices[i]]=PETSC_TRUE;
229 i++;
230 }
231 }
232 }
233 }
234 /* south boundary */
235 if (dd.ipy == 0 && dd.dim > 1) {
236 for (k=0;k<dd.zm_l;k++) {
237 for (j=0;j<dd.xm_l;j++) {
238 indices[i]=k*dd.ym_l*dd.xm_l+j;
239 if (!touched[indices[i]]) {
240 touched[indices[i]]=PETSC_TRUE;
241 i++;
242 }
243 }
244 }
245 }
246 /* north boundary */
247 if (dd.ipy == dd.npy-1 && dd.dim > 1) {
248 for (k=0;k<dd.zm_l;k++) {
249 for (j=0;j<dd.xm_l;j++) {
250 indices[i]=k*dd.ym_l*dd.xm_l+(dd.ym_l-1)*dd.xm_l+j;
251 if (!touched[indices[i]]) {
252 touched[indices[i]]=PETSC_TRUE;
253 i++;
254 }
255 }
256 }
257 }
258 /* bottom boundary */
259 if (dd.ipz == 0 && dd.dim > 2) {
260 for (k=0;k<dd.ym_l;k++) {
261 for (j=0;j<dd.xm_l;j++) {
262 indices[i]=k*dd.xm_l+j;
263 if (!touched[indices[i]]) {
264 touched[indices[i]]=PETSC_TRUE;
265 i++;
266 }
267 }
268 }
269 }
270 /* top boundary */
271 if (dd.ipz == dd.npz-1 && dd.dim > 2) {
272 for (k=0;k<dd.ym_l;k++) {
273 for (j=0;j<dd.xm_l;j++) {
274 indices[i]=(dd.zm_l-1)*dd.ym_l*dd.xm_l+k*dd.xm_l+j;
275 if (!touched[indices[i]]) {
276 touched[indices[i]]=PETSC_TRUE;
277 i++;
278 }
279 }
280 }
281 }
282 ierr = ISCreateGeneral(dd.gcomm,i,indices,PETSC_COPY_VALUES,&temp_neumann);CHKERRQ(ierr);
283 }
284 if (dirichlet) *dirichlet = temp_dirichlet;
285 if (neumann) *neumann = temp_neumann;
286 ierr = PetscFree(indices);CHKERRQ(ierr);
287 ierr = PetscFree(touched);CHKERRQ(ierr);
288 PetscFunctionReturn(0);
289 }
290
ComputeMapping(DomainData dd,ISLocalToGlobalMapping * isg2lmap)291 static PetscErrorCode ComputeMapping(DomainData dd,ISLocalToGlobalMapping *isg2lmap)
292 {
293 PetscErrorCode ierr;
294 DM da;
295 AO ao;
296 DMBoundaryType bx = DM_BOUNDARY_NONE,by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
297 DMDAStencilType stype = DMDA_STENCIL_BOX;
298 ISLocalToGlobalMapping temp_isg2lmap;
299 PetscInt i,j,k,ig,jg,kg,lindex,gindex,localsize;
300 PetscInt *global_indices;
301
302 PetscFunctionBeginUser;
303 /* Not an efficient mapping: this function computes a very simple lexicographic mapping
304 just to illustrate the creation of a MATIS object */
305 localsize = dd.xm_l*dd.ym_l*dd.zm_l;
306 ierr = PetscMalloc1(localsize,&global_indices);CHKERRQ(ierr);
307 for (k=0; k<dd.zm_l; k++) {
308 kg=dd.startz+k;
309 for (j=0; j<dd.ym_l; j++) {
310 jg=dd.starty+j;
311 for (i=0; i<dd.xm_l; i++) {
312 ig =dd.startx+i;
313 lindex =k*dd.xm_l*dd.ym_l+j*dd.xm_l+i;
314 gindex =kg*dd.xm*dd.ym+jg*dd.xm+ig;
315 global_indices[lindex]=gindex;
316 }
317 }
318 }
319 if (dd.dim==3) {
320 ierr = DMDACreate3d(dd.gcomm,bx,by,bz,stype,dd.xm,dd.ym,dd.zm,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,NULL,&da);CHKERRQ(ierr);
321 } else if (dd.dim==2) {
322 ierr = DMDACreate2d(dd.gcomm,bx,by,stype,dd.xm,dd.ym,PETSC_DECIDE,PETSC_DECIDE,1,1,NULL,NULL,&da);CHKERRQ(ierr);
323 } else {
324 ierr = DMDACreate1d(dd.gcomm,bx,dd.xm,1,1,NULL,&da);CHKERRQ(ierr);
325 }
326 ierr = DMSetFromOptions(da);CHKERRQ(ierr);
327 ierr = DMSetUp(da);CHKERRQ(ierr);
328 ierr = DMDASetAOType(da,AOMEMORYSCALABLE);CHKERRQ(ierr);
329 ierr = DMDAGetAO(da,&ao);CHKERRQ(ierr);
330 ierr = AOApplicationToPetsc(ao,dd.xm_l*dd.ym_l*dd.zm_l,global_indices);CHKERRQ(ierr);
331 ierr = ISLocalToGlobalMappingCreate(dd.gcomm,1,localsize,global_indices,PETSC_OWN_POINTER,&temp_isg2lmap);CHKERRQ(ierr);
332 ierr = DMDestroy(&da);CHKERRQ(ierr);
333 *isg2lmap = temp_isg2lmap;
334 PetscFunctionReturn(0);
335 }
ComputeSubdomainMatrix(DomainData dd,GLLData glldata,Mat * local_mat)336 static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
337 {
338 PetscErrorCode ierr;
339 PetscInt localsize,zloc,yloc,xloc,auxnex,auxney,auxnez;
340 PetscInt ie,je,ke,i,j,k,ig,jg,kg,ii,ming;
341 PetscInt *indexg,*cols,*colsg;
342 PetscScalar *vals;
343 Mat temp_local_mat,elem_mat_DBC=0,*usedmat;
344 IS submatIS;
345
346 PetscFunctionBeginUser;
347 ierr = MatGetSize(glldata.elem_mat,&i,&j);CHKERRQ(ierr);
348 ierr = PetscMalloc1(i,&indexg);CHKERRQ(ierr);
349 ierr = PetscMalloc1(i,&colsg);CHKERRQ(ierr);
350 /* get submatrix of elem_mat without dirichlet nodes */
351 if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
352 xloc = dd.p+1;
353 yloc = 1;
354 zloc = 1;
355 if (dd.dim>1) yloc = dd.p+1;
356 if (dd.dim>2) zloc = dd.p+1;
357 ii = 0;
358 for (k=0;k<zloc;k++) {
359 for (j=0;j<yloc;j++) {
360 for (i=1;i<xloc;i++) {
361 indexg[ii]=k*xloc*yloc+j*xloc+i;
362 ii++;
363 }
364 }
365 }
366 ierr = ISCreateGeneral(PETSC_COMM_SELF,ii,indexg,PETSC_COPY_VALUES,&submatIS);CHKERRQ(ierr);
367 ierr = MatCreateSubMatrix(glldata.elem_mat,submatIS,submatIS,MAT_INITIAL_MATRIX,&elem_mat_DBC);CHKERRQ(ierr);
368 ierr = ISDestroy(&submatIS);CHKERRQ(ierr);
369 }
370
371 /* Assemble subdomain matrix */
372 localsize = dd.xm_l*dd.ym_l*dd.zm_l;
373 ierr = MatCreate(PETSC_COMM_SELF,&temp_local_mat);CHKERRQ(ierr);
374 ierr = MatSetSizes(temp_local_mat,localsize,localsize,localsize,localsize);CHKERRQ(ierr);
375 ierr = MatSetOptionsPrefix(temp_local_mat,"subdomain_");CHKERRQ(ierr);
376 /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
377 /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
378 if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
379 ierr = MatSetType(temp_local_mat,MATSEQAIJ);CHKERRQ(ierr);
380 } else {
381 ierr = MatSetType(temp_local_mat,MATSEQSBAIJ);CHKERRQ(ierr);
382 }
383 ierr = MatSetFromOptions(temp_local_mat);CHKERRQ(ierr);
384
385 i = PetscPowRealInt(3.0*(dd.p+1.0),dd.dim);
386
387 ierr = MatSeqAIJSetPreallocation(temp_local_mat,i,NULL);CHKERRQ(ierr); /* very overestimated */
388 ierr = MatSeqSBAIJSetPreallocation(temp_local_mat,1,i,NULL);CHKERRQ(ierr); /* very overestimated */
389 ierr = MatSeqBAIJSetPreallocation(temp_local_mat,1,i,NULL);CHKERRQ(ierr); /* very overestimated */
390 ierr = MatSetOption(temp_local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
391
392 yloc = dd.p+1;
393 zloc = dd.p+1;
394 if (dd.dim < 3) zloc = 1;
395 if (dd.dim < 2) yloc = 1;
396
397 auxnez = dd.nez_l;
398 auxney = dd.ney_l;
399 auxnex = dd.nex_l;
400 if (dd.dim < 3) auxnez = 1;
401 if (dd.dim < 2) auxney = 1;
402
403 for (ke=0; ke<auxnez; ke++) {
404 for (je=0; je<auxney; je++) {
405 for (ie=0; ie<auxnex; ie++) {
406 /* customize element accounting for BC */
407 xloc = dd.p+1;
408 ming = 0;
409 usedmat = &glldata.elem_mat;
410 if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
411 if (ie == 0) {
412 xloc = dd.p;
413 usedmat = &elem_mat_DBC;
414 } else {
415 ming = -1;
416 usedmat = &glldata.elem_mat;
417 }
418 }
419 /* local to the element/global to the subdomain indexing */
420 for (k=0; k<zloc; k++) {
421 kg = ke*dd.p+k;
422 for (j=0; j<yloc; j++) {
423 jg = je*dd.p+j;
424 for (i=0; i<xloc; i++) {
425 ig = ie*dd.p+i+ming;
426 ii = k*xloc*yloc+j*xloc+i;
427 indexg[ii] = kg*dd.xm_l*dd.ym_l+jg*dd.xm_l+ig;
428 }
429 }
430 }
431 /* Set values */
432 for (i=0; i<xloc*yloc*zloc; i++) {
433 ierr = MatGetRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr);
434 for (k=0; k<j; k++) colsg[k] = indexg[cols[k]];
435 ierr = MatSetValues(temp_local_mat,1,&indexg[i],j,colsg,vals,ADD_VALUES);CHKERRQ(ierr);
436 ierr = MatRestoreRow(*usedmat,i,&j,(const PetscInt**)&cols,(const PetscScalar**)&vals);CHKERRQ(ierr);
437 }
438 }
439 }
440 }
441 ierr = PetscFree(indexg);CHKERRQ(ierr);
442 ierr = PetscFree(colsg);CHKERRQ(ierr);
443 ierr = MatAssemblyBegin(temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
444 ierr = MatAssemblyEnd (temp_local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
445 #if DEBUG
446 {
447 Vec lvec,rvec;
448 PetscReal norm;
449 ierr = MatCreateVecs(temp_local_mat,&lvec,&rvec);CHKERRQ(ierr);
450 ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
451 ierr = MatMult(temp_local_mat,lvec,rvec);CHKERRQ(ierr);
452 ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
453 ierr = VecDestroy(&lvec);CHKERRQ(ierr);
454 ierr = VecDestroy(&rvec);CHKERRQ(ierr);
455 }
456 #endif
457 *local_mat = temp_local_mat;
458 ierr = MatDestroy(&elem_mat_DBC);CHKERRQ(ierr);
459 PetscFunctionReturn(0);
460 }
461
GLLStuffs(DomainData dd,GLLData * glldata)462 static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
463 {
464 PetscErrorCode ierr;
465 PetscReal *M,si;
466 PetscScalar x,z0,z1,z2,Lpj,Lpr,rhoGLj,rhoGLk;
467 PetscBLASInt pm1,lierr;
468 PetscInt i,j,n,k,s,r,q,ii,jj,p=dd.p;
469 PetscInt xloc,yloc,zloc,xyloc,xyzloc;
470
471 PetscFunctionBeginUser;
472 /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
473 ierr = PetscCalloc1(p+1,&glldata->zGL);CHKERRQ(ierr);
474
475 glldata->zGL[0]=-1.0;
476 glldata->zGL[p]= 1.0;
477 if (p > 1) {
478 if (p == 2) glldata->zGL[1]=0.0;
479 else {
480 ierr = PetscMalloc1(p-1,&M);CHKERRQ(ierr);
481 for (i=0; i<p-1; i++) {
482 si = (PetscReal)(i+1.0);
483 M[i]=0.5*PetscSqrtReal(si*(si+2.0)/((si+0.5)*(si+1.5)));
484 }
485 pm1 = p-1;
486 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
487 PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("N",&pm1,&glldata->zGL[1],M,&x,&pm1,M,&lierr));
488 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in STERF Lapack routine %d",(int)lierr);
489 ierr = PetscFPTrapPop();CHKERRQ(ierr);
490 ierr = PetscFree(M);CHKERRQ(ierr);
491 }
492 }
493
494 /* Weights for 1D quadrature */
495 ierr = PetscMalloc1(p+1,&glldata->rhoGL);CHKERRQ(ierr);
496
497 glldata->rhoGL[0]=2.0/(PetscScalar)(p*(p+1.0));
498 glldata->rhoGL[p]=glldata->rhoGL[0];
499 z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */
500 for (i=1; i<p; i++) {
501 x = glldata->zGL[i];
502 z0 = 1.0;
503 z1 = x;
504 for (n=1; n<p; n++) {
505 z2 = x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
506 z0 = z1;
507 z1 = z2;
508 }
509 glldata->rhoGL[i]=2.0/(p*(p+1.0)*z2*z2);
510 }
511
512 /* Auxiliary mat for laplacian */
513 ierr = PetscMalloc1(p+1,&glldata->A);CHKERRQ(ierr);
514 ierr = PetscMalloc1((p+1)*(p+1),&glldata->A[0]);CHKERRQ(ierr);
515 for (i=1; i<p+1; i++) glldata->A[i]=glldata->A[i-1]+p+1;
516
517 for (j=1; j<p; j++) {
518 x =glldata->zGL[j];
519 z0=1.0;
520 z1=x;
521 for (n=1; n<p; n++) {
522 z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
523 z0=z1;
524 z1=z2;
525 }
526 Lpj=z2;
527 for (r=1; r<p; r++) {
528 if (r == j) {
529 glldata->A[j][j]=2.0/(3.0*(1.0-glldata->zGL[j]*glldata->zGL[j])*Lpj*Lpj);
530 } else {
531 x = glldata->zGL[r];
532 z0 = 1.0;
533 z1 = x;
534 for (n=1; n<p; n++) {
535 z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
536 z0=z1;
537 z1=z2;
538 }
539 Lpr = z2;
540 glldata->A[r][j]=4.0/(p*(p+1.0)*Lpj*Lpr*(glldata->zGL[j]-glldata->zGL[r])*(glldata->zGL[j]-glldata->zGL[r]));
541 }
542 }
543 }
544 for (j=1; j<p+1; j++) {
545 x = glldata->zGL[j];
546 z0 = 1.0;
547 z1 = x;
548 for (n=1; n<p; n++) {
549 z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
550 z0=z1;
551 z1=z2;
552 }
553 Lpj = z2;
554 glldata->A[j][0]=4.0*PetscPowRealInt(-1.0,p)/(p*(p+1.0)*Lpj*(1.0+glldata->zGL[j])*(1.0+glldata->zGL[j]));
555 glldata->A[0][j]=glldata->A[j][0];
556 }
557 for (j=0; j<p; j++) {
558 x = glldata->zGL[j];
559 z0 = 1.0;
560 z1 = x;
561 for (n=1; n<p; n++) {
562 z2=x*z1*(2.0*n+1.0)/(n+1.0)-z0*(PetscScalar)(n/(n+1.0));
563 z0=z1;
564 z1=z2;
565 }
566 Lpj=z2;
567
568 glldata->A[p][j]=4.0/(p*(p+1.0)*Lpj*(1.0-glldata->zGL[j])*(1.0-glldata->zGL[j]));
569 glldata->A[j][p]=glldata->A[p][j];
570 }
571 glldata->A[0][0]=0.5+(p*(p+1.0)-2.0)/6.0;
572 glldata->A[p][p]=glldata->A[0][0];
573
574 /* compute element matrix */
575 xloc = p+1;
576 yloc = p+1;
577 zloc = p+1;
578 if (dd.dim<2) yloc=1;
579 if (dd.dim<3) zloc=1;
580 xyloc = xloc*yloc;
581 xyzloc = xloc*yloc*zloc;
582
583 ierr = MatCreate(PETSC_COMM_SELF,&glldata->elem_mat);CHKERRQ(ierr);
584 ierr = MatSetSizes(glldata->elem_mat,xyzloc,xyzloc,xyzloc,xyzloc);CHKERRQ(ierr);
585 ierr = MatSetType(glldata->elem_mat,MATSEQAIJ);CHKERRQ(ierr);
586 ierr = MatSeqAIJSetPreallocation(glldata->elem_mat,xyzloc,NULL);CHKERRQ(ierr); /* overestimated */
587 ierr = MatZeroEntries(glldata->elem_mat);CHKERRQ(ierr);
588 ierr = MatSetOption(glldata->elem_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
589
590 for (k=0; k<zloc; k++) {
591 if (dd.dim>2) rhoGLk=glldata->rhoGL[k];
592 else rhoGLk=1.0;
593
594 for (j=0; j<yloc; j++) {
595 if (dd.dim>1) rhoGLj=glldata->rhoGL[j];
596 else rhoGLj=1.0;
597
598 for (i=0; i<xloc; i++) {
599 ii = k*xyloc+j*xloc+i;
600 s = k;
601 r = j;
602 for (q=0; q<xloc; q++) {
603 jj = s*xyloc+r*xloc+q;
604 ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[i][q]*rhoGLj*rhoGLk,ADD_VALUES);CHKERRQ(ierr);
605 }
606 if (dd.dim>1) {
607 s=k;
608 q=i;
609 for (r=0; r<yloc; r++) {
610 jj = s*xyloc+r*xloc+q;
611 ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[j][r]*glldata->rhoGL[i]*rhoGLk,ADD_VALUES);CHKERRQ(ierr);
612 }
613 }
614 if (dd.dim>2) {
615 r=j;
616 q=i;
617 for (s=0; s<zloc; s++) {
618 jj = s*xyloc+r*xloc+q;
619 ierr = MatSetValue(glldata->elem_mat,jj,ii,glldata->A[k][s]*rhoGLj*glldata->rhoGL[i],ADD_VALUES);CHKERRQ(ierr);
620 }
621 }
622 }
623 }
624 }
625 ierr = MatAssemblyBegin(glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
626 ierr = MatAssemblyEnd (glldata->elem_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
627 #if DEBUG
628 {
629 Vec lvec,rvec;
630 PetscReal norm;
631 ierr = MatCreateVecs(glldata->elem_mat,&lvec,&rvec);CHKERRQ(ierr);
632 ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
633 ierr = MatMult(glldata->elem_mat,lvec,rvec);CHKERRQ(ierr);
634 ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
635 ierr = VecDestroy(&lvec);CHKERRQ(ierr);
636 ierr = VecDestroy(&rvec);CHKERRQ(ierr);
637 }
638 #endif
639 PetscFunctionReturn(0);
640 }
641
DomainDecomposition(DomainData * dd)642 static PetscErrorCode DomainDecomposition(DomainData *dd)
643 {
644 PetscMPIInt rank;
645 PetscInt i,j,k;
646
647 PetscFunctionBeginUser;
648 /* Subdomain index in cartesian coordinates */
649 MPI_Comm_rank(dd->gcomm,&rank);
650 dd->ipx = rank%dd->npx;
651 if (dd->dim>1) dd->ipz = rank/(dd->npx*dd->npy);
652 else dd->ipz = 0;
653
654 dd->ipy = rank/dd->npx-dd->ipz*dd->npy;
655 /* number of local elements */
656 dd->nex_l = dd->nex/dd->npx;
657 if (dd->ipx < dd->nex%dd->npx) dd->nex_l++;
658 if (dd->dim>1) {
659 dd->ney_l = dd->ney/dd->npy;
660 if (dd->ipy < dd->ney%dd->npy) dd->ney_l++;
661 } else {
662 dd->ney_l = 0;
663 }
664 if (dd->dim>2) {
665 dd->nez_l = dd->nez/dd->npz;
666 if (dd->ipz < dd->nez%dd->npz) dd->nez_l++;
667 } else {
668 dd->nez_l = 0;
669 }
670 /* local and global number of dofs */
671 dd->xm_l = dd->nex_l*dd->p+1;
672 dd->xm = dd->nex*dd->p+1;
673 dd->ym_l = dd->ney_l*dd->p+1;
674 dd->ym = dd->ney*dd->p+1;
675 dd->zm_l = dd->nez_l*dd->p+1;
676 dd->zm = dd->nez*dd->p+1;
677 if (!dd->pure_neumann) {
678 if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
679 if (!dd->DBC_zerorows) dd->xm--;
680 }
681
682 /* starting global index for local dofs (simple lexicographic order) */
683 dd->startx = 0;
684 j = dd->nex/dd->npx;
685 for (i=0; i<dd->ipx; i++) {
686 k = j;
687 if (i<dd->nex%dd->npx) k++;
688 dd->startx = dd->startx+k*dd->p;
689 }
690 if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
691
692 dd->starty = 0;
693 if (dd->dim > 1) {
694 j = dd->ney/dd->npy;
695 for (i=0; i<dd->ipy; i++) {
696 k = j;
697 if (i<dd->ney%dd->npy) k++;
698 dd->starty = dd->starty+k*dd->p;
699 }
700 }
701 dd->startz = 0;
702 if (dd->dim > 2) {
703 j = dd->nez/dd->npz;
704 for (i=0; i<dd->ipz; i++) {
705 k = j;
706 if (i<dd->nez%dd->npz) k++;
707 dd->startz = dd->startz+k*dd->p;
708 }
709 }
710 PetscFunctionReturn(0);
711 }
ComputeMatrix(DomainData dd,Mat * A)712 static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
713 {
714 PetscErrorCode ierr;
715 GLLData gll;
716 Mat local_mat =0,temp_A=0;
717 ISLocalToGlobalMapping matis_map =0;
718 IS dirichletIS=0;
719
720 PetscFunctionBeginUser;
721 /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
722 ierr = GLLStuffs(dd,&gll);CHKERRQ(ierr);
723 /* Compute matrix of subdomain Neumann problem */
724 ierr = ComputeSubdomainMatrix(dd,gll,&local_mat);CHKERRQ(ierr);
725 /* Compute global mapping of local dofs */
726 ierr = ComputeMapping(dd,&matis_map);CHKERRQ(ierr);
727 /* Create MATIS object needed by BDDC */
728 ierr = MatCreateIS(dd.gcomm,1,PETSC_DECIDE,PETSC_DECIDE,dd.xm*dd.ym*dd.zm,dd.xm*dd.ym*dd.zm,matis_map,NULL,&temp_A);CHKERRQ(ierr);
729 /* Set local subdomain matrices into MATIS object */
730 ierr = MatScale(local_mat,dd.scalingfactor);CHKERRQ(ierr);
731 ierr = MatISSetLocalMat(temp_A,local_mat);CHKERRQ(ierr);
732 /* Call assembly functions */
733 ierr = MatAssemblyBegin(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
734 ierr = MatAssemblyEnd(temp_A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
735
736 if (dd.DBC_zerorows) {
737 PetscInt dirsize;
738
739 ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,NULL);CHKERRQ(ierr);
740 ierr = MatSetOption(local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
741 ierr = MatZeroRowsColumnsLocalIS(temp_A,dirichletIS,1.0,NULL,NULL);CHKERRQ(ierr);
742 ierr = ISGetLocalSize(dirichletIS,&dirsize);CHKERRQ(ierr);
743 ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr);
744 }
745
746 /* giving hints to local and global matrices could be useful for the BDDC */
747 ierr = MatSetOption(local_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
748 ierr = MatSetOption(local_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
749 #if DEBUG
750 {
751 Vec lvec,rvec;
752 PetscReal norm;
753 ierr = MatCreateVecs(temp_A,&lvec,&rvec);CHKERRQ(ierr);
754 ierr = VecSet(lvec,1.0);CHKERRQ(ierr);
755 ierr = MatMult(temp_A,lvec,rvec);CHKERRQ(ierr);
756 ierr = VecNorm(rvec,NORM_INFINITY,&norm);CHKERRQ(ierr);
757 ierr = VecDestroy(&lvec);CHKERRQ(ierr);
758 ierr = VecDestroy(&rvec);CHKERRQ(ierr);
759 }
760 #endif
761 /* free allocated workspace */
762 ierr = PetscFree(gll.zGL);CHKERRQ(ierr);
763 ierr = PetscFree(gll.rhoGL);CHKERRQ(ierr);
764 ierr = PetscFree(gll.A[0]);CHKERRQ(ierr);
765 ierr = PetscFree(gll.A);CHKERRQ(ierr);
766 ierr = MatDestroy(&gll.elem_mat);CHKERRQ(ierr);
767 ierr = MatDestroy(&local_mat);CHKERRQ(ierr);
768 ierr = ISLocalToGlobalMappingDestroy(&matis_map);CHKERRQ(ierr);
769 /* give back the pointer to te MATIS object */
770 *A = temp_A;
771 PetscFunctionReturn(0);
772 }
773
ComputeKSPFETIDP(DomainData dd,KSP ksp_bddc,KSP * ksp_fetidp)774 static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
775 {
776 PetscErrorCode ierr;
777 KSP temp_ksp;
778 PC pc;
779
780 PetscFunctionBeginUser;
781 ierr = KSPGetPC(ksp_bddc,&pc);CHKERRQ(ierr);
782 if (!dd.testkspfetidp) {
783 PC D;
784 Mat F;
785
786 ierr = PCBDDCCreateFETIDPOperators(pc,PETSC_TRUE,NULL,&F,&D);CHKERRQ(ierr);
787 ierr = KSPCreate(PetscObjectComm((PetscObject)F),&temp_ksp);CHKERRQ(ierr);
788 ierr = KSPSetOperators(temp_ksp,F,F);CHKERRQ(ierr);
789 ierr = KSPSetType(temp_ksp,KSPCG);CHKERRQ(ierr);
790 ierr = KSPSetPC(temp_ksp,D);CHKERRQ(ierr);
791 ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr);
792 ierr = KSPSetOptionsPrefix(temp_ksp,"fluxes_");CHKERRQ(ierr);
793 ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr);
794 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr);
795 ierr = MatDestroy(&F);CHKERRQ(ierr);
796 ierr = PCDestroy(&D);CHKERRQ(ierr);
797 } else {
798 Mat A,Ap;
799
800 ierr = KSPCreate(PetscObjectComm((PetscObject)ksp_bddc),&temp_ksp);CHKERRQ(ierr);
801 ierr = KSPGetOperators(ksp_bddc,&A,&Ap);CHKERRQ(ierr);
802 ierr = KSPSetOperators(temp_ksp,A,Ap);CHKERRQ(ierr);
803 ierr = KSPSetOptionsPrefix(temp_ksp,"fluxes_");CHKERRQ(ierr);
804 ierr = KSPSetType(temp_ksp,KSPFETIDP);CHKERRQ(ierr);
805 ierr = KSPFETIDPSetInnerBDDC(temp_ksp,pc);CHKERRQ(ierr);
806 ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr);
807 ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr);
808 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr);
809 }
810 *ksp_fetidp = temp_ksp;
811 PetscFunctionReturn(0);
812 }
813
814
ComputeKSPBDDC(DomainData dd,Mat A,KSP * ksp)815 static PetscErrorCode ComputeKSPBDDC(DomainData dd,Mat A,KSP *ksp)
816 {
817 PetscErrorCode ierr;
818 KSP temp_ksp;
819 PC pc;
820 IS primals,dirichletIS=0,neumannIS=0,*bddc_dofs_splitting;
821 PetscInt vidx[8],localsize,*xadj=NULL,*adjncy=NULL;
822 MatNullSpace near_null_space;
823
824 PetscFunctionBeginUser;
825 ierr = KSPCreate(dd.gcomm,&temp_ksp);CHKERRQ(ierr);
826 ierr = KSPSetOperators(temp_ksp,A,A);CHKERRQ(ierr);
827 ierr = KSPSetType(temp_ksp,KSPCG);CHKERRQ(ierr);
828 ierr = KSPGetPC(temp_ksp,&pc);CHKERRQ(ierr);
829 ierr = PCSetType(pc,PCBDDC);CHKERRQ(ierr);
830
831 localsize = dd.xm_l*dd.ym_l*dd.zm_l;
832
833 /* BDDC customization */
834
835 /* jumping coefficients case */
836 ierr = PCISSetSubdomainScalingFactor(pc,dd.scalingfactor);CHKERRQ(ierr);
837
838 /* Dofs splitting
839 Simple stride-1 IS
840 It is not needed since, by default, PCBDDC assumes a stride-1 split */
841 ierr = PetscMalloc1(1,&bddc_dofs_splitting);CHKERRQ(ierr);
842 #if 1
843 ierr = ISCreateStride(PETSC_COMM_WORLD,localsize,0,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr);
844 ierr = PCBDDCSetDofsSplittingLocal(pc,1,bddc_dofs_splitting);CHKERRQ(ierr);
845 #else
846 /* examples for global ordering */
847
848 /* each process lists the nodes it owns */
849 PetscInt sr,er;
850 ierr = MatGetOwnershipRange(A,&sr,&er);CHKERRQ(ierr);
851 ierr = ISCreateStride(PETSC_COMM_WORLD,er-sr,sr,1,&bddc_dofs_splitting[0]);CHKERRQ(ierr);
852 ierr = PCBDDCSetDofsSplitting(pc,1,bddc_dofs_splitting);CHKERRQ(ierr);
853 /* Split can be passed in a more general way since any process can list any node */
854 #endif
855 ierr = ISDestroy(&bddc_dofs_splitting[0]);CHKERRQ(ierr);
856 ierr = PetscFree(bddc_dofs_splitting);CHKERRQ(ierr);
857
858 /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
859 (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
860 #if 0
861 Vec vecs[2];
862 PetscRandom rctx;
863 ierr = MatCreateVecs(A,&vecs[0],&vecs[1]);CHKERRQ(ierr);
864 ierr = PetscRandomCreate(dd.gcomm,&rctx);CHKERRQ(ierr);
865 ierr = VecSetRandom(vecs[0],rctx);CHKERRQ(ierr);
866 ierr = VecSetRandom(vecs[1],rctx);CHKERRQ(ierr);
867 ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space);CHKERRQ(ierr);
868 ierr = VecDestroy(&vecs[0]);CHKERRQ(ierr);
869 ierr = VecDestroy(&vecs[1]);CHKERRQ(ierr);
870 ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
871 #else
872 ierr = MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,0,NULL,&near_null_space);CHKERRQ(ierr);
873 #endif
874 ierr = MatSetNearNullSpace(A,near_null_space);CHKERRQ(ierr);
875 ierr = MatNullSpaceDestroy(&near_null_space);CHKERRQ(ierr);
876
877 /* CSR graph of subdomain dofs */
878 ierr = BuildCSRGraph(dd,&xadj,&adjncy);CHKERRQ(ierr);
879 ierr = PCBDDCSetLocalAdjacencyGraph(pc,localsize,xadj,adjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
880
881 /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
882 vidx[0] = 0*dd.xm_l+0;
883 vidx[1] = 0*dd.xm_l+dd.xm_l-1;
884 vidx[2] = (dd.ym_l-1)*dd.xm_l+0;
885 vidx[3] = (dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
886 vidx[4] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+0;
887 vidx[5] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+0*dd.xm_l+dd.xm_l-1;
888 vidx[6] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+0;
889 vidx[7] = (dd.zm_l-1)*dd.xm_l*dd.ym_l+(dd.ym_l-1)*dd.xm_l+dd.xm_l-1;
890 ierr = ISCreateGeneral(dd.gcomm,8,vidx,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr);
891 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr);
892 ierr = ISDestroy(&primals);CHKERRQ(ierr);
893
894 /* Neumann/Dirichlet indices on the global boundary */
895 if (dd.DBC_zerorows) {
896 /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
897 ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr);
898 ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
899 ierr = PCBDDCSetDirichletBoundariesLocal(pc,dirichletIS);CHKERRQ(ierr);
900 } else {
901 if (dd.pure_neumann) {
902 /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
903 ierr = ComputeSpecialBoundaryIndices(dd,NULL,&neumannIS);CHKERRQ(ierr);
904 ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
905 } else {
906 /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
907 /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
908 ierr = ComputeSpecialBoundaryIndices(dd,&dirichletIS,&neumannIS);CHKERRQ(ierr);
909 ierr = PCBDDCSetNeumannBoundariesLocal(pc,neumannIS);CHKERRQ(ierr);
910 }
911 }
912
913 /* Pass local null space information to local matrices (needed when using approximate local solvers) */
914 if (dd.ipx || dd.pure_neumann) {
915 MatNullSpace nsp;
916 Mat local_mat;
917
918 ierr = MatISGetLocalMat(A,&local_mat);CHKERRQ(ierr);
919 ierr = MatNullSpaceCreate(PETSC_COMM_SELF,PETSC_TRUE,0,NULL,&nsp);CHKERRQ(ierr);
920 ierr = MatSetNullSpace(local_mat,nsp);CHKERRQ(ierr);
921 ierr = MatNullSpaceDestroy(&nsp);CHKERRQ(ierr);
922 }
923 ierr = KSPSetComputeSingularValues(temp_ksp,PETSC_TRUE);CHKERRQ(ierr);
924 ierr = KSPSetOptionsPrefix(temp_ksp,"physical_");CHKERRQ(ierr);
925 ierr = KSPSetFromOptions(temp_ksp);CHKERRQ(ierr);
926 ierr = KSPSetUp(temp_ksp);CHKERRQ(ierr);
927 *ksp = temp_ksp;
928 ierr = ISDestroy(&dirichletIS);CHKERRQ(ierr);
929 ierr = ISDestroy(&neumannIS);CHKERRQ(ierr);
930 PetscFunctionReturn(0);
931 }
932
InitializeDomainData(DomainData * dd)933 static PetscErrorCode InitializeDomainData(DomainData *dd)
934 {
935 PetscErrorCode ierr;
936 PetscMPIInt sizes,rank;
937 PetscInt factor;
938
939 PetscFunctionBeginUser;
940 dd->gcomm = PETSC_COMM_WORLD;
941 ierr = MPI_Comm_size(dd->gcomm,&sizes);CHKERRQ(ierr);
942 ierr = MPI_Comm_rank(dd->gcomm,&rank);CHKERRQ(ierr);
943 /* Get informations from command line */
944 /* Processors/subdomains per dimension */
945 /* Default is 1d problem */
946 dd->npx = sizes;
947 dd->npy = 0;
948 dd->npz = 0;
949 dd->dim = 1;
950 ierr = PetscOptionsGetInt(NULL,NULL,"-npx",&dd->npx,NULL);CHKERRQ(ierr);
951 ierr = PetscOptionsGetInt(NULL,NULL,"-npy",&dd->npy,NULL);CHKERRQ(ierr);
952 ierr = PetscOptionsGetInt(NULL,NULL,"-npz",&dd->npz,NULL);CHKERRQ(ierr);
953 if (dd->npy) dd->dim++;
954 if (dd->npz) dd->dim++;
955 /* Number of elements per dimension */
956 /* Default is one element per subdomain */
957 dd->nex = dd->npx;
958 dd->ney = dd->npy;
959 dd->nez = dd->npz;
960 ierr = PetscOptionsGetInt(NULL,NULL,"-nex",&dd->nex,NULL);CHKERRQ(ierr);
961 ierr = PetscOptionsGetInt(NULL,NULL,"-ney",&dd->ney,NULL);CHKERRQ(ierr);
962 ierr = PetscOptionsGetInt(NULL,NULL,"-nez",&dd->nez,NULL);CHKERRQ(ierr);
963 if (!dd->npy) {
964 dd->ney=0;
965 dd->nez=0;
966 }
967 if (!dd->npz) dd->nez=0;
968 /* Spectral degree */
969 dd->p = 3;
970 ierr = PetscOptionsGetInt(NULL,NULL,"-p",&dd->p,NULL);CHKERRQ(ierr);
971 /* pure neumann problem? */
972 dd->pure_neumann = PETSC_FALSE;
973 ierr = PetscOptionsGetBool(NULL,NULL,"-pureneumann",&dd->pure_neumann,NULL);CHKERRQ(ierr);
974
975 /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
976 dd->DBC_zerorows = PETSC_FALSE;
977
978 ierr = PetscOptionsGetBool(NULL,NULL,"-usezerorows",&dd->DBC_zerorows,NULL);CHKERRQ(ierr);
979 if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
980 dd->scalingfactor = 1.0;
981
982 factor = 0.0;
983 ierr = PetscOptionsGetInt(NULL,NULL,"-jump",&factor,NULL);CHKERRQ(ierr);
984 /* checkerboard pattern */
985 dd->scalingfactor = PetscPowScalar(10.0,(PetscScalar)factor*PetscPowScalar(-1.0,(PetscScalar)rank));
986 /* test data passed in */
987 if (dd->dim==1) {
988 if (sizes!=dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 1D must be equal to npx");
989 if (dd->nex<dd->npx) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
990 } else if (dd->dim==2) {
991 if (sizes!=dd->npx*dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 2D must be equal to npx*npy");
992 if (dd->nex<dd->npx || dd->ney<dd->npy) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
993 } else {
994 if (sizes!=dd->npx*dd->npy*dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of mpi procs in 3D must be equal to npx*npy*npz");
995 if (dd->nex<dd->npx || dd->ney<dd->npy || dd->nez<dd->npz) SETERRQ(dd->gcomm,PETSC_ERR_USER,"Number of elements per dim must be greater/equal than number of procs per dim");
996 }
997 PetscFunctionReturn(0);
998 }
999
main(int argc,char ** args)1000 int main(int argc,char **args)
1001 {
1002 PetscErrorCode ierr;
1003 DomainData dd;
1004 PetscReal norm,maxeig,mineig;
1005 PetscScalar scalar_value;
1006 PetscInt ndofs,its;
1007 Mat A = NULL,F = NULL;
1008 KSP KSPwithBDDC = NULL,KSPwithFETIDP = NULL;
1009 KSPConvergedReason reason;
1010 Vec exact_solution = NULL,bddc_solution = NULL,bddc_rhs = NULL;
1011 PetscBool testfetidp = PETSC_TRUE;
1012
1013 /* Init PETSc */
1014 ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
1015 /* Initialize DomainData */
1016 ierr = InitializeDomainData(&dd);CHKERRQ(ierr);
1017 /* Decompose domain */
1018 ierr = DomainDecomposition(&dd);CHKERRQ(ierr);
1019 #if DEBUG
1020 printf("Subdomain data\n");
1021 printf("IPS : %d %d %d\n",dd.ipx,dd.ipy,dd.ipz);
1022 printf("NEG : %d %d %d\n",dd.nex,dd.ney,dd.nez);
1023 printf("NEL : %d %d %d\n",dd.nex_l,dd.ney_l,dd.nez_l);
1024 printf("LDO : %d %d %d\n",dd.xm_l,dd.ym_l,dd.zm_l);
1025 printf("SIZES : %d %d %d\n",dd.xm,dd.ym,dd.zm);
1026 printf("STARTS: %d %d %d\n",dd.startx,dd.starty,dd.startz);
1027 #endif
1028 dd.testkspfetidp = PETSC_TRUE;
1029 ierr = PetscOptionsGetBool(NULL,NULL,"-testfetidp",&testfetidp,NULL);CHKERRQ(ierr);
1030 ierr = PetscOptionsGetBool(NULL,NULL,"-testkspfetidp",&dd.testkspfetidp,NULL);CHKERRQ(ierr);
1031 /* assemble global matrix */
1032 ierr = ComputeMatrix(dd,&A);CHKERRQ(ierr);
1033 /* get work vectors */
1034 ierr = MatCreateVecs(A,&bddc_solution,NULL);CHKERRQ(ierr);
1035 ierr = VecDuplicate(bddc_solution,&bddc_rhs);CHKERRQ(ierr);
1036 ierr = VecDuplicate(bddc_solution,&exact_solution);CHKERRQ(ierr);
1037 /* create and customize KSP/PC for BDDC */
1038 ierr = ComputeKSPBDDC(dd,A,&KSPwithBDDC);CHKERRQ(ierr);
1039 /* create KSP/PC for FETIDP */
1040 if (testfetidp) {
1041 ierr = ComputeKSPFETIDP(dd,KSPwithBDDC,&KSPwithFETIDP);CHKERRQ(ierr);
1042 }
1043 /* create random exact solution */
1044 #if defined(PETSC_USE_COMPLEX)
1045 ierr = VecSet(exact_solution,1.0 + PETSC_i);CHKERRQ(ierr);
1046 #else
1047 ierr = VecSetRandom(exact_solution,NULL);CHKERRQ(ierr);
1048 #endif
1049 ierr = VecShift(exact_solution,-0.5);CHKERRQ(ierr);
1050 ierr = VecScale(exact_solution,100.0);CHKERRQ(ierr);
1051 ierr = VecGetSize(exact_solution,&ndofs);CHKERRQ(ierr);
1052 if (dd.pure_neumann) {
1053 ierr = VecSum(exact_solution,&scalar_value);CHKERRQ(ierr);
1054 scalar_value = -scalar_value/(PetscScalar)ndofs;
1055 ierr = VecShift(exact_solution,scalar_value);CHKERRQ(ierr);
1056 }
1057 /* assemble BDDC rhs */
1058 ierr = MatMult(A,exact_solution,bddc_rhs);CHKERRQ(ierr);
1059 /* test ksp with BDDC */
1060 ierr = KSPSolve(KSPwithBDDC,bddc_rhs,bddc_solution);CHKERRQ(ierr);
1061 ierr = KSPGetIterationNumber(KSPwithBDDC,&its);CHKERRQ(ierr);
1062 ierr = KSPGetConvergedReason(KSPwithBDDC,&reason);CHKERRQ(ierr);
1063 ierr = KSPComputeExtremeSingularValues(KSPwithBDDC,&maxeig,&mineig);CHKERRQ(ierr);
1064 if (dd.pure_neumann) {
1065 ierr = VecSum(bddc_solution,&scalar_value);CHKERRQ(ierr);
1066 scalar_value = -scalar_value/(PetscScalar)ndofs;
1067 ierr = VecShift(bddc_solution,scalar_value);CHKERRQ(ierr);
1068 }
1069 /* check exact_solution and BDDC solultion */
1070 ierr = VecAXPY(bddc_solution,-1.0,exact_solution);CHKERRQ(ierr);
1071 ierr = VecNorm(bddc_solution,NORM_INFINITY,&norm);CHKERRQ(ierr);
1072 ierr = PetscPrintf(dd.gcomm,"---------------------BDDC stats-------------------------------\n");CHKERRQ(ierr);
1073 ierr = PetscPrintf(dd.gcomm,"Number of degrees of freedom : %8D\n",ndofs);CHKERRQ(ierr);
1074 if (reason < 0) {
1075 ierr = PetscPrintf(dd.gcomm,"Number of iterations : %8D\n",its);CHKERRQ(ierr);
1076 ierr = PetscPrintf(dd.gcomm,"Converged reason : %s\n",KSPConvergedReasons[reason]);CHKERRQ(ierr);
1077 }
1078 if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1079 ierr = PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);CHKERRQ(ierr);
1080 if (norm > 1.e-1 || reason < 0) {
1081 ierr = PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);CHKERRQ(ierr);
1082 }
1083 ierr = PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1084 if (testfetidp) {
1085 Vec fetidp_solution_all = NULL,fetidp_solution = NULL,fetidp_rhs = NULL;
1086
1087 ierr = VecDuplicate(bddc_solution,&fetidp_solution_all);CHKERRQ(ierr);
1088 if (!dd.testkspfetidp) {
1089 /* assemble fetidp rhs on the space of Lagrange multipliers */
1090 ierr = KSPGetOperators(KSPwithFETIDP,&F,NULL);CHKERRQ(ierr);
1091 ierr = MatCreateVecs(F,&fetidp_solution,&fetidp_rhs);CHKERRQ(ierr);
1092 ierr = PCBDDCMatFETIDPGetRHS(F,bddc_rhs,fetidp_rhs);CHKERRQ(ierr);
1093 ierr = VecSet(fetidp_solution,0.0);CHKERRQ(ierr);
1094 /* test ksp with FETIDP */
1095 ierr = KSPSolve(KSPwithFETIDP,fetidp_rhs,fetidp_solution);CHKERRQ(ierr);
1096 /* assemble fetidp solution on physical domain */
1097 ierr = PCBDDCMatFETIDPGetSolution(F,fetidp_solution,fetidp_solution_all);CHKERRQ(ierr);
1098 } else {
1099 KSP kspF;
1100 ierr = KSPSolve(KSPwithFETIDP,bddc_rhs,fetidp_solution_all);CHKERRQ(ierr);
1101 ierr = KSPFETIDPGetInnerKSP(KSPwithFETIDP,&kspF);CHKERRQ(ierr);
1102 ierr = KSPGetOperators(kspF,&F,NULL);CHKERRQ(ierr);
1103 }
1104 ierr = MatGetSize(F,&ndofs,NULL);CHKERRQ(ierr);
1105 ierr = KSPGetIterationNumber(KSPwithFETIDP,&its);CHKERRQ(ierr);
1106 ierr = KSPGetConvergedReason(KSPwithFETIDP,&reason);CHKERRQ(ierr);
1107 ierr = KSPComputeExtremeSingularValues(KSPwithFETIDP,&maxeig,&mineig);CHKERRQ(ierr);
1108 /* check FETIDP sol */
1109 if (dd.pure_neumann) {
1110 ierr = VecSum(fetidp_solution_all,&scalar_value);CHKERRQ(ierr);
1111 scalar_value = -scalar_value/(PetscScalar)ndofs;
1112 ierr = VecShift(fetidp_solution_all,scalar_value);CHKERRQ(ierr);
1113 }
1114 ierr = VecAXPY(fetidp_solution_all,-1.0,exact_solution);CHKERRQ(ierr);
1115 ierr = VecNorm(fetidp_solution_all,NORM_INFINITY,&norm);CHKERRQ(ierr);
1116 ierr = PetscPrintf(dd.gcomm,"------------------FETI-DP stats-------------------------------\n");CHKERRQ(ierr);
1117 ierr = PetscPrintf(dd.gcomm,"Number of degrees of freedom : %8D\n",ndofs);CHKERRQ(ierr);
1118 if (reason < 0) {
1119 ierr = PetscPrintf(dd.gcomm,"Number of iterations : %8D\n",its);CHKERRQ(ierr);
1120 ierr = PetscPrintf(dd.gcomm,"Converged reason : %D\n",reason);CHKERRQ(ierr);
1121 }
1122 if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1123 ierr = PetscPrintf(dd.gcomm,"Eigenvalues preconditioned operator : %1.1e %1.1e\n",(double)PetscFloorReal(100.*mineig)/100.,(double)PetscCeilReal(100.*maxeig)/100.);CHKERRQ(ierr);
1124 if (norm > 1.e-1 || reason < 0) {
1125 ierr = PetscPrintf(dd.gcomm,"Error betweeen exact and computed solution : %1.2e\n",(double)norm);CHKERRQ(ierr);
1126 }
1127 ierr = PetscPrintf(dd.gcomm,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1128 ierr = VecDestroy(&fetidp_solution);CHKERRQ(ierr);
1129 ierr = VecDestroy(&fetidp_solution_all);CHKERRQ(ierr);
1130 ierr = VecDestroy(&fetidp_rhs);CHKERRQ(ierr);
1131 }
1132 ierr = KSPDestroy(&KSPwithFETIDP);CHKERRQ(ierr);
1133 ierr = VecDestroy(&exact_solution);CHKERRQ(ierr);
1134 ierr = VecDestroy(&bddc_solution);CHKERRQ(ierr);
1135 ierr = VecDestroy(&bddc_rhs);CHKERRQ(ierr);
1136 ierr = MatDestroy(&A);CHKERRQ(ierr);
1137 ierr = KSPDestroy(&KSPwithBDDC);CHKERRQ(ierr);
1138 /* Quit PETSc */
1139 ierr = PetscFinalize();
1140 return ierr;
1141 }
1142
1143 /*TEST
1144
1145 testset:
1146 nsize: 4
1147 args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1148 output_file: output/ex59_bddc_fetidp_1.out
1149 test:
1150 suffix: bddc_fetidp_1
1151 test:
1152 requires: viennacl
1153 suffix: bddc_fetidp_1_viennacl
1154 args: -subdomain_mat_type aijviennacl
1155 test:
1156 requires: cuda
1157 suffix: bddc_fetidp_1_cuda
1158 args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1159
1160 testset:
1161 nsize: 4
1162 args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1163 requires: !single
1164 test:
1165 suffix: bddc_fetidp_2
1166 test:
1167 suffix: bddc_fetidp_3
1168 args: -npz 1 -nez 1
1169 test:
1170 suffix: bddc_fetidp_4
1171 args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1172
1173 testset:
1174 nsize: 8
1175 suffix: bddc_fetidp_approximate
1176 args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1177
1178 testset:
1179 nsize: 4
1180 args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1181 filter: grep -v "variant HERMITIAN"
1182 requires: !single
1183 test:
1184 suffix: bddc_fetidp_ml_1
1185 args: -physical_pc_bddc_coarsening_ratio 1
1186 test:
1187 suffix: bddc_fetidp_ml_2
1188 args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1189 test:
1190 suffix: bddc_fetidp_ml_3
1191 args: -physical_pc_bddc_coarsening_ratio 4
1192
1193 testset:
1194 nsize: 9
1195 args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1196 output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1197 test:
1198 suffix: bddc_fetidp_ml_eqlimit_1
1199 args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1200 test:
1201 suffix: bddc_fetidp_ml_eqlimit_2
1202 args: -physical_pc_bddc_coarse_eqs_limit 46
1203
1204
1205 TEST*/
1206