1 /*
2 This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3 In this version each processor may intersect multiple subdomains and any subdomain may
4 intersect multiple processors. Intersections of subdomains with processors are called *local
5 subdomains*.
6
7 N - total number of distinct global subdomains (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8 n - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9 nmax - maximum number of local subdomains per processor (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
12 #include <petscdm.h>
13
14 typedef struct {
15 PetscInt N,n,nmax;
16 PetscInt overlap; /* overlap requested by user */
17 PCGASMType type; /* use reduced interpolation, restriction or both */
18 PetscBool type_set; /* if user set this value (so won't change it for symmetric problems) */
19 PetscBool same_subdomain_solvers; /* flag indicating whether all local solvers are same */
20 PetscBool sort_indices; /* flag to sort subdomain indices */
21 PetscBool user_subdomains; /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22 PetscBool dm_subdomains; /* whether DM is allowed to define subdomains */
23 PetscBool hierarchicalpartitioning;
24 IS *ois; /* index sets that define the outer (conceptually, overlapping) subdomains */
25 IS *iis; /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26 KSP *ksp; /* linear solvers for each subdomain */
27 Mat *pmat; /* subdomain block matrices */
28 Vec gx,gy; /* Merged work vectors */
29 Vec *x,*y; /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30 VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
31 VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
32 VecScatter pctoouter;
33 IS permutationIS;
34 Mat permutationP;
35 Mat pcmat;
36 Vec pcx,pcy;
37 } PC_GASM;
38
PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt ** numbering,PetscInt ** permutation)39 static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41 PC_GASM *osm = (PC_GASM*)pc->data;
42 PetscInt i;
43 PetscErrorCode ierr;
44
45 PetscFunctionBegin;
46 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47 ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49 for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50 ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51 PetscFunctionReturn(0);
52 }
53
PCGASMSubdomainView_Private(PC pc,PetscInt i,PetscViewer viewer)54 static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56 PC_GASM *osm = (PC_GASM*)pc->data;
57 PetscInt j,nidx;
58 const PetscInt *idx;
59 PetscViewer sviewer;
60 char *cidx;
61 PetscErrorCode ierr;
62
63 PetscFunctionBegin;
64 if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65 /* Inner subdomains. */
66 ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67 /*
68 No more than 15 characters per index plus a space.
69 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70 in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71 For nidx == 0, the whole string 16 '\0'.
72 */
73 #define len 16*(nidx+1)+1
74 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
75 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
76 #undef len
77 ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
78 for (j = 0; j < nidx; ++j) {
79 ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80 }
81 ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
82 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
83 ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
84 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
86 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
87 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
88 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
90 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
91 ierr = PetscFree(cidx);CHKERRQ(ierr);
92 /* Outer subdomains. */
93 ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
94 /*
95 No more than 15 characters per index plus a space.
96 PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97 in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98 For nidx == 0, the whole string 16 '\0'.
99 */
100 #define len 16*(nidx+1)+1
101 ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
102 ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
103 #undef len
104 ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105 for (j = 0; j < nidx; ++j) {
106 ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107 }
108 ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109 ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110 ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113 ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116 ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118 ierr = PetscFree(cidx);CHKERRQ(ierr);
119 PetscFunctionReturn(0);
120 }
121
PCGASMPrintSubdomains(PC pc)122 static PetscErrorCode PCGASMPrintSubdomains(PC pc)
123 {
124 PC_GASM *osm = (PC_GASM*)pc->data;
125 const char *prefix;
126 char fname[PETSC_MAX_PATH_LEN+1];
127 PetscInt l, d, count;
128 PetscBool doprint,found;
129 PetscViewer viewer, sviewer = NULL;
130 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
131 PetscErrorCode ierr;
132
133 PetscFunctionBegin;
134 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
135 doprint = PETSC_FALSE;
136 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
137 if (!doprint) PetscFunctionReturn(0);
138 ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr);
139 if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
140 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
141 /*
142 Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144 */
145 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
146 l = 0;
147 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
148 for (count = 0; count < osm->N; ++count) {
149 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150 if (l<osm->n) {
151 d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152 if (numbering[d] == count) {
153 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
154 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
155 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
156 ++l;
157 }
158 }
159 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
160 }
161 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
162 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
163 PetscFunctionReturn(0);
164 }
165
PCView_GASM(PC pc,PetscViewer viewer)166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
167 {
168 PC_GASM *osm = (PC_GASM*)pc->data;
169 const char *prefix;
170 PetscErrorCode ierr;
171 PetscMPIInt rank, size;
172 PetscInt bsz;
173 PetscBool iascii,view_subdomains=PETSC_FALSE;
174 PetscViewer sviewer;
175 PetscInt count, l;
176 char overlap[256] = "user-defined overlap";
177 char gsubdomains[256] = "unknown total number of subdomains";
178 char msubdomains[256] = "unknown max number of local subdomains";
179 PetscInt *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
180
181 PetscFunctionBegin;
182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRQ(ierr);
183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRQ(ierr);
184
185 if (osm->overlap >= 0) {
186 ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
187 }
188 if (osm->N != PETSC_DETERMINE) {
189 ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
190 }
191 if (osm->nmax != PETSC_DETERMINE) {
192 ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
193 }
194
195 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
196 ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
197
198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
199 if (iascii) {
200 /*
201 Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
202 the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
203 collectively on the comm.
204 */
205 ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
206 ierr = PetscViewerASCIIPrintf(viewer," Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
207 ierr = PetscViewerASCIIPrintf(viewer," %s\n",overlap);CHKERRQ(ierr);
208 ierr = PetscViewerASCIIPrintf(viewer," %s\n",gsubdomains);CHKERRQ(ierr);
209 ierr = PetscViewerASCIIPrintf(viewer," %s\n",msubdomains);CHKERRQ(ierr);
210 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
213 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
214 /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
215 ierr = PetscViewerASCIIPrintf(viewer," Subdomain solver info is as follows:\n");CHKERRQ(ierr);
216 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
217 ierr = PetscViewerASCIIPrintf(viewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
218 /* Make sure that everybody waits for the banner to be printed. */
219 ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
220 /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
221 ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
222 l = 0;
223 for (count = 0; count < osm->N; ++count) {
224 PetscMPIInt srank, ssize;
225 if (l<osm->n) {
226 PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
227 if (numbering[d] == count) {
228 ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRQ(ierr);
229 ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRQ(ierr);
230 ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
231 ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
232 ierr = PetscViewerASCIISynchronizedPrintf(sviewer," [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
233 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
234 if (view_subdomains) {
235 ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
236 }
237 if (!pc->setupcalled) {
238 ierr = PetscViewerASCIIPrintf(sviewer, " Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
239 } else {
240 ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
241 }
242 ierr = PetscViewerASCIIPrintf(sviewer," - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
243 ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
244 ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
245 ++l;
246 }
247 }
248 ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
249 }
250 ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
251 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
252 }
253 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
254 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
255 PetscFunctionReturn(0);
256 }
257
258 PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
259
PCGASMSetHierarchicalPartitioning(PC pc)260 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
261 {
262 PC_GASM *osm = (PC_GASM*)pc->data;
263 MatPartitioning part;
264 MPI_Comm comm;
265 PetscMPIInt size;
266 PetscInt nlocalsubdomains,fromrows_localsize;
267 IS partitioning,fromrows,isn;
268 Vec outervec;
269 PetscErrorCode ierr;
270
271 PetscFunctionBegin;
272 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
273 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
274 /* we do not need a hierarchical partitioning when
275 * the total number of subdomains is consistent with
276 * the number of MPI tasks.
277 * For the following cases, we do not need to use HP
278 * */
279 if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
280 if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
281 nlocalsubdomains = size/osm->N;
282 osm->n = 1;
283 ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
284 ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
285 ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
286 ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
287 ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
288 ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
289 /* get new processor owner number of each vertex */
290 ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
291 ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
292 ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
293 ierr = ISDestroy(&isn);CHKERRQ(ierr);
294 ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
295 ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
296 ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
297 ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
298 ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
299 ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
300 ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
301 ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
302 osm->permutationIS = fromrows;
303 osm->pcmat = pc->pmat;
304 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
305 pc->pmat = osm->permutationP;
306 ierr = VecDestroy(&outervec);CHKERRQ(ierr);
307 ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
308 ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
309 osm->n = PETSC_DETERMINE;
310 PetscFunctionReturn(0);
311 }
312
PCSetUp_GASM(PC pc)313 static PetscErrorCode PCSetUp_GASM(PC pc)
314 {
315 PC_GASM *osm = (PC_GASM*)pc->data;
316 PetscErrorCode ierr;
317 PetscInt i,nInnerIndices,nTotalInnerIndices;
318 PetscMPIInt rank, size;
319 MatReuse scall = MAT_REUSE_MATRIX;
320 KSP ksp;
321 PC subpc;
322 const char *prefix,*pprefix;
323 Vec x,y;
324 PetscInt oni; /* Number of indices in the i-th local outer subdomain. */
325 const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain. */
326 PetscInt on; /* Number of indices in the disjoint union of local outer subdomains. */
327 PetscInt *oidx; /* Indices in the disjoint union of local outer subdomains. */
328 IS gois; /* Disjoint union the global indices of outer subdomains. */
329 IS goid; /* Identity IS of the size of the disjoint union of outer subdomains. */
330 PetscScalar *gxarray, *gyarray;
331 PetscInt gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
332 PetscInt num_subdomains = 0;
333 DM *subdomain_dm = NULL;
334 char **subdomain_names = NULL;
335 PetscInt *numbering;
336
337
338 PetscFunctionBegin;
339 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
340 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
341 if (!pc->setupcalled) {
342 /* use a hierarchical partitioning */
343 if (osm->hierarchicalpartitioning){
344 ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
345 }
346 if (osm->n == PETSC_DETERMINE) {
347 if (osm->N != PETSC_DETERMINE) {
348 /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
349 ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
350 } else if (osm->dm_subdomains && pc->dm) {
351 /* try pc->dm next, if allowed */
352 PetscInt d;
353 IS *inner_subdomain_is, *outer_subdomain_is;
354 ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
355 if (num_subdomains) {
356 ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
357 }
358 for (d = 0; d < num_subdomains; ++d) {
359 if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
360 if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
361 }
362 ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
363 ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
364 } else {
365 /* still no subdomains; use one per processor */
366 osm->nmax = osm->n = 1;
367 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
368 osm->N = size;
369 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
370 }
371 }
372 if (!osm->iis) {
373 /*
374 osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
375 We create the requisite number of local inner subdomains and then expand them into
376 out subdomains, if necessary.
377 */
378 ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
379 }
380 if (!osm->ois) {
381 /*
382 Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
383 has been requested, copy the inner subdomains over so they can be modified.
384 */
385 ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
386 for (i=0; i<osm->n; ++i) {
387 if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
388 ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
389 ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
390 } else {
391 ierr = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
392 osm->ois[i] = osm->iis[i];
393 }
394 }
395 if (osm->overlap>0 && osm->N>1) {
396 /* Extend the "overlapping" regions by a number of steps */
397 ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
398 }
399 }
400
401 /* Now the subdomains are defined. Determine their global and max local numbers, if necessary. */
402 if (osm->nmax == PETSC_DETERMINE) {
403 PetscMPIInt inwork,outwork;
404 /* determine global number of subdomains and the max number of local subdomains */
405 inwork = osm->n;
406 ierr = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
407 osm->nmax = outwork;
408 }
409 if (osm->N == PETSC_DETERMINE) {
410 /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
411 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
412 }
413
414 if (osm->sort_indices) {
415 for (i=0; i<osm->n; i++) {
416 ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
417 ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
418 }
419 }
420 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
421 ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
422
423 /*
424 Merge the ISs, create merged vectors and restrictions.
425 */
426 /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
427 on = 0;
428 for (i=0; i<osm->n; i++) {
429 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
430 on += oni;
431 }
432 ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
433 on = 0;
434 /* Merge local indices together */
435 for (i=0; i<osm->n; i++) {
436 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
437 ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
438 ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr);
439 ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
440 on += oni;
441 }
442 ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
443 nTotalInnerIndices = 0;
444 for (i=0; i<osm->n; i++){
445 ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
446 nTotalInnerIndices += nInnerIndices;
447 }
448 ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
449 ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
450
451 ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
452 ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
453 ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
454 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
455 /* gois might indices not on local */
456 ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
457 ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
458 ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
459 ierr = VecDestroy(&x);CHKERRQ(ierr);
460 ierr = ISDestroy(&gois);CHKERRQ(ierr);
461
462 /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
463 {
464 PetscInt ini; /* Number of indices the i-th a local inner subdomain. */
465 PetscInt in; /* Number of indices in the disjoint union of local inner subdomains. */
466 PetscInt *iidx; /* Global indices in the merged local inner subdomain. */
467 PetscInt *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
468 IS giis; /* IS for the disjoint union of inner subdomains. */
469 IS giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
470 PetscScalar *array;
471 const PetscInt *indices;
472 PetscInt k;
473 on = 0;
474 for (i=0; i<osm->n; i++) {
475 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
476 on += oni;
477 }
478 ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
479 ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
480 ierr = VecGetArray(y,&array);CHKERRQ(ierr);
481 /* set communicator id to determine where overlap is */
482 in = 0;
483 for (i=0; i<osm->n; i++) {
484 ierr = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
485 for (k = 0; k < ini; ++k){
486 array[in+k] = numbering[i];
487 }
488 in += ini;
489 }
490 ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
491 ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492 ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493 ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
494 ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
495 on = 0;
496 in = 0;
497 for (i=0; i<osm->n; i++) {
498 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
499 ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
500 for (k=0; k<oni; k++) {
501 /* skip overlapping indices to get inner domain */
502 if (PetscRealPart(array[on+k]) != numbering[i]) continue;
503 iidx[in] = indices[k];
504 ioidx[in++] = gostart+on+k;
505 }
506 ierr = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
507 on += oni;
508 }
509 ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
510 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
511 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
512 ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
513 ierr = VecDestroy(&y);CHKERRQ(ierr);
514 ierr = ISDestroy(&giis);CHKERRQ(ierr);
515 ierr = ISDestroy(&giois);CHKERRQ(ierr);
516 }
517 ierr = ISDestroy(&goid);CHKERRQ(ierr);
518 ierr = PetscFree(numbering);CHKERRQ(ierr);
519
520 /* Create the subdomain work vectors. */
521 ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
522 ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
523 ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
524 ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
525 for (i=0, on=0; i<osm->n; ++i, on += oni) {
526 PetscInt oNi;
527 ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
528 /* on a sub communicator */
529 ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
530 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
531 ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
532 }
533 ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
534 ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
535 /* Create the subdomain solvers */
536 ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
537 for (i=0; i<osm->n; i++) {
538 char subprefix[PETSC_MAX_PATH_LEN+1];
539 ierr = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
540 ierr = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
541 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
542 ierr = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
543 ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
544 ierr = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
545 if (subdomain_dm) {
546 ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
547 ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
548 }
549 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
550 ierr = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
551 if (subdomain_names && subdomain_names[i]) {
552 ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
553 ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
554 ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
555 }
556 ierr = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
557 osm->ksp[i] = ksp;
558 }
559 ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
560 ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
561 scall = MAT_INITIAL_MATRIX;
562 } else { /* if (pc->setupcalled) */
563 /*
564 Destroy the submatrices from the previous iteration
565 */
566 if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
567 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
568 scall = MAT_INITIAL_MATRIX;
569 }
570 if (osm->permutationIS){
571 ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
572 ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
573 osm->pcmat = pc->pmat;
574 pc->pmat = osm->permutationP;
575 }
576 }
577
578 /*
579 Extract the submatrices.
580 */
581 if (size > 1) {
582 ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
583 } else {
584 ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
585 }
586 if (scall == MAT_INITIAL_MATRIX) {
587 ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
588 for (i=0; i<osm->n; i++) {
589 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
590 ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
591 }
592 }
593
594 /* Return control to the user so that the submatrices can be modified (e.g., to apply
595 different boundary conditions for the submatrices than for the global problem) */
596 ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
597
598 /*
599 Loop over submatrices putting them into local ksps
600 */
601 for (i=0; i<osm->n; i++) {
602 ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
603 if (!pc->setupcalled) {
604 ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
605 }
606 }
607 if (osm->pcmat){
608 ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
609 pc->pmat = osm->pcmat;
610 osm->pcmat = NULL;
611 }
612 PetscFunctionReturn(0);
613 }
614
PCSetUpOnBlocks_GASM(PC pc)615 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
616 {
617 PC_GASM *osm = (PC_GASM*)pc->data;
618 PetscErrorCode ierr;
619 PetscInt i;
620
621 PetscFunctionBegin;
622 for (i=0; i<osm->n; i++) {
623 ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
624 }
625 PetscFunctionReturn(0);
626 }
627
PCApply_GASM(PC pc,Vec xin,Vec yout)628 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
629 {
630 PC_GASM *osm = (PC_GASM*)pc->data;
631 PetscErrorCode ierr;
632 PetscInt i;
633 Vec x,y;
634 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
635
636 PetscFunctionBegin;
637 if (osm->pctoouter) {
638 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
639 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
640 x = osm->pcx;
641 y = osm->pcy;
642 } else {
643 x = xin;
644 y = yout;
645 }
646 /*
647 support for limiting the restriction or interpolation only to the inner
648 subdomain values (leaving the other values 0).
649 */
650 if (!(osm->type & PC_GASM_RESTRICT)) {
651 /* have to zero the work RHS since scatter may leave some slots empty */
652 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
653 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
654 } else {
655 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
656 }
657 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
658 if (!(osm->type & PC_GASM_RESTRICT)) {
659 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
660 } else {
661 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
662 }
663 /* do the subdomain solves */
664 for (i=0; i<osm->n; ++i) {
665 ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
666 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
667 }
668 /* do we need to zero y? */
669 ierr = VecZeroEntries(y);CHKERRQ(ierr);
670 if (!(osm->type & PC_GASM_INTERPOLATE)) {
671 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
672 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
673 } else {
674 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
675 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
676 }
677 if (osm->pctoouter) {
678 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
679 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
680 }
681 PetscFunctionReturn(0);
682 }
683
PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)684 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
685 {
686 PC_GASM *osm = (PC_GASM*)pc->data;
687 Mat X,Y,O=NULL,Z,W;
688 Vec x,y;
689 PetscInt i,m,M,N;
690 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
691 PetscErrorCode ierr;
692
693 PetscFunctionBegin;
694 if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
695 ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr);
696 if (osm->pctoouter) {
697 ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr);
698 ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr);
699 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr);
700 for (i = 0; i < N; ++i) {
701 ierr = MatDenseGetColumnVecRead(Xin,i,&x);
702 ierr = MatDenseGetColumnVecWrite(O,i,&y);
703 ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
704 ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
705 ierr = MatDenseRestoreColumnVecWrite(O,i,&y);
706 ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);
707 }
708 X = Y = O;
709 } else {
710 X = Xin;
711 Y = Yout;
712 }
713 /*
714 support for limiting the restriction or interpolation only to the inner
715 subdomain values (leaving the other values 0).
716 */
717 ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr);
718 ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr);
719 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr);
720 for (i = 0; i < N; ++i) {
721 ierr = MatDenseGetColumnVecRead(X,i,&x);
722 ierr = MatDenseGetColumnVecWrite(Z,i,&y);
723 if (!(osm->type & PC_GASM_RESTRICT)) {
724 /* have to zero the work RHS since scatter may leave some slots empty */
725 ierr = VecZeroEntries(y);CHKERRQ(ierr);
726 ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
727 ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
728 } else {
729 ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
730 ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
731 }
732 ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);
733 ierr = MatDenseRestoreColumnVecRead(X,i,&x);
734 }
735 ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr);
736 ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
737 ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
738 ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
739 /* do the subdomain solve */
740 ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr);
741 ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr);
742 ierr = MatDestroy(&Z);CHKERRQ(ierr);
743 /* do we need to zero y? */
744 ierr = MatZeroEntries(Y);CHKERRQ(ierr);
745 for (i = 0; i < N; ++i) {
746 ierr = MatDenseGetColumnVecWrite(Y,i,&y);
747 ierr = MatDenseGetColumnVecRead(W,i,&x);
748 if (!(osm->type & PC_GASM_INTERPOLATE)) {
749 ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
750 ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
751 } else {
752 ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
753 ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
754 }
755 ierr = MatDenseRestoreColumnVecRead(W,i,&x);
756 if (osm->pctoouter) {
757 ierr = MatDenseGetColumnVecWrite(Yout,i,&x);
758 ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
759 ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760 ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);
761 }
762 ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);
763 }
764 ierr = MatDestroy(&W);CHKERRQ(ierr);
765 ierr = MatDestroy(&O);CHKERRQ(ierr);
766 PetscFunctionReturn(0);
767 }
768
PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)769 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
770 {
771 PC_GASM *osm = (PC_GASM*)pc->data;
772 PetscErrorCode ierr;
773 PetscInt i;
774 Vec x,y;
775 ScatterMode forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
776
777 PetscFunctionBegin;
778 if (osm->pctoouter){
779 ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780 ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
781 x = osm->pcx;
782 y = osm->pcy;
783 }else{
784 x = xin;
785 y = yout;
786 }
787 /*
788 Support for limiting the restriction or interpolation to only local
789 subdomain values (leaving the other values 0).
790
791 Note: these are reversed from the PCApply_GASM() because we are applying the
792 transpose of the three terms
793 */
794 if (!(osm->type & PC_GASM_INTERPOLATE)) {
795 /* have to zero the work RHS since scatter may leave some slots empty */
796 ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
797 ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
798 } else {
799 ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
800 }
801 ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
802 if (!(osm->type & PC_GASM_INTERPOLATE)) {
803 ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
804 } else {
805 ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
806 }
807 /* do the local solves */
808 for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
809 ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
810 ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
811 }
812 ierr = VecZeroEntries(y);CHKERRQ(ierr);
813 if (!(osm->type & PC_GASM_RESTRICT)) {
814 ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
815 ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
816 } else {
817 ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
818 ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
819 }
820 if (osm->pctoouter){
821 ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822 ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823 }
824 PetscFunctionReturn(0);
825 }
826
PCReset_GASM(PC pc)827 static PetscErrorCode PCReset_GASM(PC pc)
828 {
829 PC_GASM *osm = (PC_GASM*)pc->data;
830 PetscErrorCode ierr;
831 PetscInt i;
832
833 PetscFunctionBegin;
834 if (osm->ksp) {
835 for (i=0; i<osm->n; i++) {
836 ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
837 }
838 }
839 if (osm->pmat) {
840 if (osm->n > 0) {
841 PetscMPIInt size;
842 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
843 if (size > 1) {
844 /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
845 ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
846 } else {
847 ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
848 }
849 }
850 }
851 if (osm->x) {
852 for (i=0; i<osm->n; i++) {
853 ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
854 ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
855 }
856 }
857 ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
858 ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
859
860 ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
861 ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
862 if (!osm->user_subdomains) {
863 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
864 osm->N = PETSC_DETERMINE;
865 osm->nmax = PETSC_DETERMINE;
866 }
867 if (osm->pctoouter){
868 ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
869 }
870 if (osm->permutationIS){
871 ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
872 }
873 if (osm->pcx){
874 ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
875 }
876 if (osm->pcy){
877 ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
878 }
879 if (osm->permutationP){
880 ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
881 }
882 if (osm->pcmat){
883 ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
884 }
885 PetscFunctionReturn(0);
886 }
887
PCDestroy_GASM(PC pc)888 static PetscErrorCode PCDestroy_GASM(PC pc)
889 {
890 PC_GASM *osm = (PC_GASM*)pc->data;
891 PetscErrorCode ierr;
892 PetscInt i;
893
894 PetscFunctionBegin;
895 ierr = PCReset_GASM(pc);CHKERRQ(ierr);
896 /* PCReset will not destroy subdomains, if user_subdomains is true. */
897 ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
898 if (osm->ksp) {
899 for (i=0; i<osm->n; i++) {
900 ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
901 }
902 ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
903 }
904 ierr = PetscFree(osm->x);CHKERRQ(ierr);
905 ierr = PetscFree(osm->y);CHKERRQ(ierr);
906 ierr = PetscFree(pc->data);CHKERRQ(ierr);
907 PetscFunctionReturn(0);
908 }
909
PCSetFromOptions_GASM(PetscOptionItems * PetscOptionsObject,PC pc)910 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
911 {
912 PC_GASM *osm = (PC_GASM*)pc->data;
913 PetscErrorCode ierr;
914 PetscInt blocks,ovl;
915 PetscBool flg;
916 PCGASMType gasmtype;
917
918 PetscFunctionBegin;
919 ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
920 ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
921 ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
922 if (flg) {
923 ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
924 }
925 ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
926 if (flg) {
927 ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
928 osm->dm_subdomains = PETSC_FALSE;
929 }
930 flg = PETSC_FALSE;
931 ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
932 if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
933 ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
934 ierr = PetscOptionsTail();CHKERRQ(ierr);
935 PetscFunctionReturn(0);
936 }
937
938 /*------------------------------------------------------------------------------------*/
939
940 /*@
941 PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
942 communicator.
943 Logically collective on pc
944
945 Input Parameters:
946 + pc - the preconditioner
947 - N - total number of subdomains
948
949
950 Level: beginner
951
952 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
953 PCGASMCreateSubdomains2D()
954 @*/
PCGASMSetTotalSubdomains(PC pc,PetscInt N)955 PetscErrorCode PCGASMSetTotalSubdomains(PC pc,PetscInt N)
956 {
957 PC_GASM *osm = (PC_GASM*)pc->data;
958 PetscMPIInt size,rank;
959 PetscErrorCode ierr;
960
961 PetscFunctionBegin;
962 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
963 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
964
965 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
966 osm->ois = osm->iis = NULL;
967
968 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
969 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
970 osm->N = N;
971 osm->n = PETSC_DETERMINE;
972 osm->nmax = PETSC_DETERMINE;
973 osm->dm_subdomains = PETSC_FALSE;
974 PetscFunctionReturn(0);
975 }
976
PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])977 static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
978 {
979 PC_GASM *osm = (PC_GASM*)pc->data;
980 PetscErrorCode ierr;
981 PetscInt i;
982
983 PetscFunctionBegin;
984 if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
985 if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
986
987 ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
988 osm->iis = osm->ois = NULL;
989 osm->n = n;
990 osm->N = PETSC_DETERMINE;
991 osm->nmax = PETSC_DETERMINE;
992 if (ois) {
993 ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
994 for (i=0; i<n; i++) {
995 ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
996 osm->ois[i] = ois[i];
997 }
998 /*
999 Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1000 it will be ignored. To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1001 */
1002 osm->overlap = -1;
1003 /* inner subdomains must be provided */
1004 if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1005 }/* end if */
1006 if (iis) {
1007 ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
1008 for (i=0; i<n; i++) {
1009 ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
1010 osm->iis[i] = iis[i];
1011 }
1012 if (!ois) {
1013 osm->ois = NULL;
1014 /* if user does not provide outer indices, we will create the corresponding outer indices using osm->overlap =1 in PCSetUp_GASM */
1015 }
1016 }
1017 if (PetscDefined(USE_DEBUG)) {
1018 PetscInt j,rstart,rend,*covered,lsize;
1019 const PetscInt *indices;
1020 /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1021 ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
1022 ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
1023 /* check if the current processor owns indices from others */
1024 for (i=0; i<n; i++) {
1025 ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1026 ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
1027 for (j=0; j<lsize; j++) {
1028 if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1029 else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1030 else covered[indices[j]-rstart] = 1;
1031 }
1032 ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1033 }
1034 /* check if we miss any indices */
1035 for (i=rstart; i<rend; i++) {
1036 if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1037 }
1038 ierr = PetscFree(covered);CHKERRQ(ierr);
1039 }
1040 if (iis) osm->user_subdomains = PETSC_TRUE;
1041 PetscFunctionReturn(0);
1042 }
1043
PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)1044 static PetscErrorCode PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1045 {
1046 PC_GASM *osm = (PC_GASM*)pc->data;
1047
1048 PetscFunctionBegin;
1049 if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1050 if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1051 if (!pc->setupcalled) osm->overlap = ovl;
1052 PetscFunctionReturn(0);
1053 }
1054
PCGASMSetType_GASM(PC pc,PCGASMType type)1055 static PetscErrorCode PCGASMSetType_GASM(PC pc,PCGASMType type)
1056 {
1057 PC_GASM *osm = (PC_GASM*)pc->data;
1058
1059 PetscFunctionBegin;
1060 osm->type = type;
1061 osm->type_set = PETSC_TRUE;
1062 PetscFunctionReturn(0);
1063 }
1064
PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)1065 static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1066 {
1067 PC_GASM *osm = (PC_GASM*)pc->data;
1068
1069 PetscFunctionBegin;
1070 osm->sort_indices = doSort;
1071 PetscFunctionReturn(0);
1072 }
1073
1074 /*
1075 FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1076 In particular, it would upset the global subdomain number calculation.
1077 */
PCGASMGetSubKSP_GASM(PC pc,PetscInt * n,PetscInt * first,KSP ** ksp)1078 static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1079 {
1080 PC_GASM *osm = (PC_GASM*)pc->data;
1081 PetscErrorCode ierr;
1082
1083 PetscFunctionBegin;
1084 if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1085
1086 if (n) *n = osm->n;
1087 if (first) {
1088 ierr = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1089 *first -= osm->n;
1090 }
1091 if (ksp) {
1092 /* Assume that local solves are now different; not necessarily
1093 true, though! This flag is used only for PCView_GASM() */
1094 *ksp = osm->ksp;
1095 osm->same_subdomain_solvers = PETSC_FALSE;
1096 }
1097 PetscFunctionReturn(0);
1098 } /* PCGASMGetSubKSP_GASM() */
1099
1100 /*@C
1101 PCGASMSetSubdomains - Sets the subdomains for this processor
1102 for the additive Schwarz preconditioner.
1103
1104 Collective on pc
1105
1106 Input Parameters:
1107 + pc - the preconditioner object
1108 . n - the number of subdomains for this processor
1109 . iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1110 - ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1111
1112 Notes:
1113 The IS indices use the parallel, global numbering of the vector entries.
1114 Inner subdomains are those where the correction is applied.
1115 Outer subdomains are those where the residual necessary to obtain the
1116 corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1117 Both inner and outer subdomains can extend over several processors.
1118 This processor's portion of a subdomain is known as a local subdomain.
1119
1120 Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1121 and have to cover the entire local subdomain owned by the current processor. The index sets on each
1122 process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1123 on another MPI process.
1124
1125 By default the GASM preconditioner uses 1 (local) subdomain per processor.
1126
1127
1128 Level: advanced
1129
1130 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1131 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1132 @*/
PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])1133 PetscErrorCode PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1134 {
1135 PC_GASM *osm = (PC_GASM*)pc->data;
1136 PetscErrorCode ierr;
1137
1138 PetscFunctionBegin;
1139 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1140 ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1141 osm->dm_subdomains = PETSC_FALSE;
1142 PetscFunctionReturn(0);
1143 }
1144
1145 /*@
1146 PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1147 additive Schwarz preconditioner. Either all or no processors in the
1148 pc communicator must call this routine.
1149
1150 Logically Collective on pc
1151
1152 Input Parameters:
1153 + pc - the preconditioner context
1154 - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1155
1156 Options Database Key:
1157 . -pc_gasm_overlap <overlap> - Sets overlap
1158
1159 Notes:
1160 By default the GASM preconditioner uses 1 subdomain per processor. To use
1161 multiple subdomain per perocessor or "straddling" subdomains that intersect
1162 multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1163
1164 The overlap defaults to 0, so if one desires that no additional
1165 overlap be computed beyond what may have been set with a call to
1166 PCGASMSetSubdomains(), then ovl must be set to be 0. In particular, if one does
1167 not explicitly set the subdomains in application code, then all overlap would be computed
1168 internally by PETSc, and using an overlap of 0 would result in an GASM
1169 variant that is equivalent to the block Jacobi preconditioner.
1170
1171 Note that one can define initial index sets with any overlap via
1172 PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1173 PETSc to extend that overlap further, if desired.
1174
1175 Level: intermediate
1176
1177 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1178 PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1179 @*/
PCGASMSetOverlap(PC pc,PetscInt ovl)1180 PetscErrorCode PCGASMSetOverlap(PC pc,PetscInt ovl)
1181 {
1182 PetscErrorCode ierr;
1183 PC_GASM *osm = (PC_GASM*)pc->data;
1184
1185 PetscFunctionBegin;
1186 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1187 PetscValidLogicalCollectiveInt(pc,ovl,2);
1188 ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1189 osm->dm_subdomains = PETSC_FALSE;
1190 PetscFunctionReturn(0);
1191 }
1192
1193 /*@
1194 PCGASMSetType - Sets the type of restriction and interpolation used
1195 for local problems in the additive Schwarz method.
1196
1197 Logically Collective on PC
1198
1199 Input Parameters:
1200 + pc - the preconditioner context
1201 - type - variant of GASM, one of
1202 .vb
1203 PC_GASM_BASIC - full interpolation and restriction
1204 PC_GASM_RESTRICT - full restriction, local processor interpolation
1205 PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1206 PC_GASM_NONE - local processor restriction and interpolation
1207 .ve
1208
1209 Options Database Key:
1210 . -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1211
1212 Level: intermediate
1213
1214 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1215 PCGASMCreateSubdomains2D()
1216 @*/
PCGASMSetType(PC pc,PCGASMType type)1217 PetscErrorCode PCGASMSetType(PC pc,PCGASMType type)
1218 {
1219 PetscErrorCode ierr;
1220
1221 PetscFunctionBegin;
1222 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1223 PetscValidLogicalCollectiveEnum(pc,type,2);
1224 ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1225 PetscFunctionReturn(0);
1226 }
1227
1228 /*@
1229 PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1230
1231 Logically Collective on PC
1232
1233 Input Parameters:
1234 + pc - the preconditioner context
1235 - doSort - sort the subdomain indices
1236
1237 Level: intermediate
1238
1239 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1240 PCGASMCreateSubdomains2D()
1241 @*/
PCGASMSetSortIndices(PC pc,PetscBool doSort)1242 PetscErrorCode PCGASMSetSortIndices(PC pc,PetscBool doSort)
1243 {
1244 PetscErrorCode ierr;
1245
1246 PetscFunctionBegin;
1247 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1248 PetscValidLogicalCollectiveBool(pc,doSort,2);
1249 ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1250 PetscFunctionReturn(0);
1251 }
1252
1253 /*@C
1254 PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1255 this processor.
1256
1257 Collective on PC iff first_local is requested
1258
1259 Input Parameter:
1260 . pc - the preconditioner context
1261
1262 Output Parameters:
1263 + n_local - the number of blocks on this processor or NULL
1264 . first_local - the global number of the first block on this processor or NULL,
1265 all processors must request or all must pass NULL
1266 - ksp - the array of KSP contexts
1267
1268 Note:
1269 After PCGASMGetSubKSP() the array of KSPes is not to be freed
1270
1271 Currently for some matrix implementations only 1 block per processor
1272 is supported.
1273
1274 You must call KSPSetUp() before calling PCGASMGetSubKSP().
1275
1276 Level: advanced
1277
1278 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1279 PCGASMCreateSubdomains2D(),
1280 @*/
PCGASMGetSubKSP(PC pc,PetscInt * n_local,PetscInt * first_local,KSP * ksp[])1281 PetscErrorCode PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1282 {
1283 PetscErrorCode ierr;
1284
1285 PetscFunctionBegin;
1286 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1287 ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1288 PetscFunctionReturn(0);
1289 }
1290
1291 /* -------------------------------------------------------------------------------------*/
1292 /*MC
1293 PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1294 its own KSP object.
1295
1296 Options Database Keys:
1297 + -pc_gasm_total_subdomains <n> - Sets total number of local subdomains to be distributed among processors
1298 . -pc_gasm_view_subdomains - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1299 . -pc_gasm_print_subdomains - activates the printing of subdomain indices in PCSetUp()
1300 . -pc_gasm_overlap <ovl> - Sets overlap by which to (automatically) extend local subdomains
1301 - -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1302
1303 IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1304 will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1305 -pc_gasm_type basic to use the standard GASM.
1306
1307 Notes:
1308 Blocks can be shared by multiple processes.
1309
1310 To set options on the solvers for each block append -sub_ to all the KSP, and PC
1311 options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1312
1313 To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1314 and set the options directly on the resulting KSP object (you can access its PC
1315 with KSPGetPC())
1316
1317
1318 Level: beginner
1319
1320 References:
1321 + 1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1322 Courant Institute, New York University Technical report
1323 - 2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1324 Cambridge University Press.
1325
1326 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
1327 PCBJACOBI, PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1328 PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1329
1330 M*/
1331
PCCreate_GASM(PC pc)1332 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1333 {
1334 PetscErrorCode ierr;
1335 PC_GASM *osm;
1336
1337 PetscFunctionBegin;
1338 ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1339
1340 osm->N = PETSC_DETERMINE;
1341 osm->n = PETSC_DECIDE;
1342 osm->nmax = PETSC_DETERMINE;
1343 osm->overlap = 0;
1344 osm->ksp = NULL;
1345 osm->gorestriction = NULL;
1346 osm->girestriction = NULL;
1347 osm->pctoouter = NULL;
1348 osm->gx = NULL;
1349 osm->gy = NULL;
1350 osm->x = NULL;
1351 osm->y = NULL;
1352 osm->pcx = NULL;
1353 osm->pcy = NULL;
1354 osm->permutationIS = NULL;
1355 osm->permutationP = NULL;
1356 osm->pcmat = NULL;
1357 osm->ois = NULL;
1358 osm->iis = NULL;
1359 osm->pmat = NULL;
1360 osm->type = PC_GASM_RESTRICT;
1361 osm->same_subdomain_solvers = PETSC_TRUE;
1362 osm->sort_indices = PETSC_TRUE;
1363 osm->dm_subdomains = PETSC_FALSE;
1364 osm->hierarchicalpartitioning = PETSC_FALSE;
1365
1366 pc->data = (void*)osm;
1367 pc->ops->apply = PCApply_GASM;
1368 pc->ops->matapply = PCMatApply_GASM;
1369 pc->ops->applytranspose = PCApplyTranspose_GASM;
1370 pc->ops->setup = PCSetUp_GASM;
1371 pc->ops->reset = PCReset_GASM;
1372 pc->ops->destroy = PCDestroy_GASM;
1373 pc->ops->setfromoptions = PCSetFromOptions_GASM;
1374 pc->ops->setuponblocks = PCSetUpOnBlocks_GASM;
1375 pc->ops->view = PCView_GASM;
1376 pc->ops->applyrichardson = NULL;
1377
1378 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1379 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1380 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1381 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1382 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1383 PetscFunctionReturn(0);
1384 }
1385
PCGASMCreateLocalSubdomains(Mat A,PetscInt nloc,IS * iis[])1386 PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1387 {
1388 MatPartitioning mpart;
1389 const char *prefix;
1390 PetscInt i,j,rstart,rend,bs;
1391 PetscBool hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1392 Mat Ad = NULL, adj;
1393 IS ispart,isnumb,*is;
1394 PetscErrorCode ierr;
1395
1396 PetscFunctionBegin;
1397 if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1398
1399 /* Get prefix, row distribution, and block size */
1400 ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1401 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1402 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1403 if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1404
1405 /* Get diagonal block from matrix if possible */
1406 ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1407 if (hasop) {
1408 ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1409 }
1410 if (Ad) {
1411 ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1412 if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1413 }
1414 if (Ad && nloc > 1) {
1415 PetscBool match,done;
1416 /* Try to setup a good matrix partitioning if available */
1417 ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1418 ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1419 ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1420 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1421 if (!match) {
1422 ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1423 }
1424 if (!match) { /* assume a "good" partitioner is available */
1425 PetscInt na;
1426 const PetscInt *ia,*ja;
1427 ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1428 if (done) {
1429 /* Build adjacency matrix by hand. Unfortunately a call to
1430 MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1431 remove the block-aij structure and we cannot expect
1432 MatPartitioning to split vertices as we need */
1433 PetscInt i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1434 const PetscInt *row;
1435 nnz = 0;
1436 for (i=0; i<na; i++) { /* count number of nonzeros */
1437 len = ia[i+1] - ia[i];
1438 row = ja + ia[i];
1439 for (j=0; j<len; j++) {
1440 if (row[j] == i) { /* don't count diagonal */
1441 len--; break;
1442 }
1443 }
1444 nnz += len;
1445 }
1446 ierr = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1447 ierr = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1448 nnz = 0;
1449 iia[0] = 0;
1450 for (i=0; i<na; i++) { /* fill adjacency */
1451 cnt = 0;
1452 len = ia[i+1] - ia[i];
1453 row = ja + ia[i];
1454 for (j=0; j<len; j++) {
1455 if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1456 }
1457 nnz += cnt;
1458 iia[i+1] = nnz;
1459 }
1460 /* Partitioning of the adjacency matrix */
1461 ierr = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1462 ierr = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1463 ierr = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1464 ierr = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1465 ierr = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1466 ierr = MatDestroy(&adj);CHKERRQ(ierr);
1467 foundpart = PETSC_TRUE;
1468 }
1469 ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1470 }
1471 ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1472 }
1473 ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1474 if (!foundpart) {
1475
1476 /* Partitioning by contiguous chunks of rows */
1477
1478 PetscInt mbs = (rend-rstart)/bs;
1479 PetscInt start = rstart;
1480 for (i=0; i<nloc; i++) {
1481 PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1482 ierr = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1483 start += count;
1484 }
1485
1486 } else {
1487
1488 /* Partitioning by adjacency of diagonal block */
1489
1490 const PetscInt *numbering;
1491 PetscInt *count,nidx,*indices,*newidx,start=0;
1492 /* Get node count in each partition */
1493 ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1494 ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1495 if (isbaij && bs > 1) { /* adjust for the block-aij case */
1496 for (i=0; i<nloc; i++) count[i] *= bs;
1497 }
1498 /* Build indices from node numbering */
1499 ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1500 ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1501 for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1502 ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1503 ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1504 ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1505 if (isbaij && bs > 1) { /* adjust for the block-aij case */
1506 ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1507 for (i=0; i<nidx; i++) {
1508 for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1509 }
1510 ierr = PetscFree(indices);CHKERRQ(ierr);
1511 nidx *= bs;
1512 indices = newidx;
1513 }
1514 /* Shift to get global indices */
1515 for (i=0; i<nidx; i++) indices[i] += rstart;
1516
1517 /* Build the index sets for each block */
1518 for (i=0; i<nloc; i++) {
1519 ierr = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1520 ierr = ISSort(is[i]);CHKERRQ(ierr);
1521 start += count[i];
1522 }
1523
1524 ierr = PetscFree(count);CHKERRQ(ierr);
1525 ierr = PetscFree(indices);CHKERRQ(ierr);
1526 ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1527 ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1528 }
1529 *iis = is;
1530 PetscFunctionReturn(0);
1531 }
1532
PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt * n,IS * iis[])1533 PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1534 {
1535 PetscErrorCode ierr;
1536
1537 PetscFunctionBegin;
1538 ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1539 PetscFunctionReturn(0);
1540 }
1541
1542 /*@C
1543 PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1544 Schwarz preconditioner for a any problem based on its matrix.
1545
1546 Collective
1547
1548 Input Parameters:
1549 + A - The global matrix operator
1550 - N - the number of global subdomains requested
1551
1552 Output Parameters:
1553 + n - the number of subdomains created on this processor
1554 - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1555
1556 Level: advanced
1557
1558 Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1559 When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1560 The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL). The overlapping
1561 outer subdomains will be automatically generated from these according to the requested amount of
1562 overlap; this is currently supported only with local subdomains.
1563
1564
1565 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1566 @*/
PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt * n,IS * iis[])1567 PetscErrorCode PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1568 {
1569 PetscMPIInt size;
1570 PetscErrorCode ierr;
1571
1572 PetscFunctionBegin;
1573 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1574 PetscValidPointer(iis,4);
1575
1576 if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1577 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1578 if (N >= size) {
1579 *n = N/size + (N%size);
1580 ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1581 } else {
1582 ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1583 }
1584 PetscFunctionReturn(0);
1585 }
1586
1587 /*@C
1588 PCGASMDestroySubdomains - Destroys the index sets created with
1589 PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1590 called after setting subdomains with PCGASMSetSubdomains().
1591
1592 Collective
1593
1594 Input Parameters:
1595 + n - the number of index sets
1596 . iis - the array of inner subdomains,
1597 - ois - the array of outer subdomains, can be NULL
1598
1599 Level: intermediate
1600
1601 Notes:
1602 this is merely a convenience subroutine that walks each list,
1603 destroys each IS on the list, and then frees the list. At the end the
1604 list pointers are set to NULL.
1605
1606 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1607 @*/
PCGASMDestroySubdomains(PetscInt n,IS ** iis,IS ** ois)1608 PetscErrorCode PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1609 {
1610 PetscInt i;
1611 PetscErrorCode ierr;
1612
1613 PetscFunctionBegin;
1614 if (n <= 0) PetscFunctionReturn(0);
1615 if (ois) {
1616 PetscValidPointer(ois,3);
1617 if (*ois) {
1618 PetscValidPointer(*ois,3);
1619 for (i=0; i<n; i++) {
1620 ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1621 }
1622 ierr = PetscFree((*ois));CHKERRQ(ierr);
1623 }
1624 }
1625 if (iis) {
1626 PetscValidPointer(iis,2);
1627 if (*iis) {
1628 PetscValidPointer(*iis,2);
1629 for (i=0; i<n; i++) {
1630 ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1631 }
1632 ierr = PetscFree((*iis));CHKERRQ(ierr);
1633 }
1634 }
1635 PetscFunctionReturn(0);
1636 }
1637
1638 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1639 { \
1640 PetscInt first_row = first/M, last_row = last/M+1; \
1641 /* \
1642 Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners \
1643 of the bounding box of the intersection of the subdomain with the local ownership range (local \
1644 subdomain). \
1645 Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows \
1646 of the intersection. \
1647 */ \
1648 /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1649 *ylow_loc = PetscMax(first_row,ylow); \
1650 /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1651 *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft; \
1652 /* yhigh_loc is the grid row above the last local subdomain element */ \
1653 *yhigh_loc = PetscMin(last_row,yhigh); \
1654 /* xright is the offset of the end of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1655 *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright; \
1656 /* Now compute the size of the local subdomain n. */ \
1657 *n = 0; \
1658 if (*ylow_loc < *yhigh_loc) { \
1659 PetscInt width = xright-xleft; \
1660 *n += width*(*yhigh_loc-*ylow_loc-1); \
1661 *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1662 *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1663 } \
1664 }
1665
1666 /*@
1667 PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1668 preconditioner for a two-dimensional problem on a regular grid.
1669
1670 Collective
1671
1672 Input Parameters:
1673 + M, N - the global number of grid points in the x and y directions
1674 . Mdomains, Ndomains - the global number of subdomains in the x and y directions
1675 . dof - degrees of freedom per node
1676 - overlap - overlap in mesh lines
1677
1678 Output Parameters:
1679 + Nsub - the number of local subdomains created
1680 . iis - array of index sets defining inner (nonoverlapping) subdomains
1681 - ois - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1682
1683
1684 Level: advanced
1685
1686 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1687 @*/
PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt * nsub,IS ** iis,IS ** ois)1688 PetscErrorCode PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1689 {
1690 PetscErrorCode ierr;
1691 PetscMPIInt size, rank;
1692 PetscInt i, j;
1693 PetscInt maxheight, maxwidth;
1694 PetscInt xstart, xleft, xright, xleft_loc, xright_loc;
1695 PetscInt ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1696 PetscInt x[2][2], y[2][2], n[2];
1697 PetscInt first, last;
1698 PetscInt nidx, *idx;
1699 PetscInt ii,jj,s,q,d;
1700 PetscInt k,kk;
1701 PetscMPIInt color;
1702 MPI_Comm comm, subcomm;
1703 IS **xis = NULL, **is = ois, **is_local = iis;
1704
1705 PetscFunctionBegin;
1706 ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1707 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1708 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1709 ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1710 if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1711 "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1712
1713 /* Determine the number of domains with nonzero intersections with the local ownership range. */
1714 s = 0;
1715 ystart = 0;
1716 for (j=0; j<Ndomains; ++j) {
1717 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1718 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1719 /* Vertical domain limits with an overlap. */
1720 ylow = PetscMax(ystart - overlap,0);
1721 yhigh = PetscMin(ystart + maxheight + overlap,N);
1722 xstart = 0;
1723 for (i=0; i<Mdomains; ++i) {
1724 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1725 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1726 /* Horizontal domain limits with an overlap. */
1727 xleft = PetscMax(xstart - overlap,0);
1728 xright = PetscMin(xstart + maxwidth + overlap,M);
1729 /*
1730 Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1731 */
1732 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1733 if (nidx) ++s;
1734 xstart += maxwidth;
1735 } /* for (i = 0; i < Mdomains; ++i) */
1736 ystart += maxheight;
1737 } /* for (j = 0; j < Ndomains; ++j) */
1738
1739 /* Now we can allocate the necessary number of ISs. */
1740 *nsub = s;
1741 ierr = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1742 ierr = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1743 s = 0;
1744 ystart = 0;
1745 for (j=0; j<Ndomains; ++j) {
1746 maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1747 if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1748 /* Vertical domain limits with an overlap. */
1749 y[0][0] = PetscMax(ystart - overlap,0);
1750 y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1751 /* Vertical domain limits without an overlap. */
1752 y[1][0] = ystart;
1753 y[1][1] = PetscMin(ystart + maxheight,N);
1754 xstart = 0;
1755 for (i=0; i<Mdomains; ++i) {
1756 maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1757 if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1758 /* Horizontal domain limits with an overlap. */
1759 x[0][0] = PetscMax(xstart - overlap,0);
1760 x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1761 /* Horizontal domain limits without an overlap. */
1762 x[1][0] = xstart;
1763 x[1][1] = PetscMin(xstart+maxwidth,M);
1764 /*
1765 Determine whether this domain intersects this processor's ownership range of pc->pmat.
1766 Do this twice: first for the domains with overlaps, and once without.
1767 During the first pass create the subcommunicators, and use them on the second pass as well.
1768 */
1769 for (q = 0; q < 2; ++q) {
1770 PetscBool split = PETSC_FALSE;
1771 /*
1772 domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1773 according to whether the domain with an overlap or without is considered.
1774 */
1775 xleft = x[q][0]; xright = x[q][1];
1776 ylow = y[q][0]; yhigh = y[q][1];
1777 PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1778 nidx *= dof;
1779 n[q] = nidx;
1780 /*
1781 Based on the counted number of indices in the local domain *with an overlap*,
1782 construct a subcommunicator of all the processors supporting this domain.
1783 Observe that a domain with an overlap might have nontrivial local support,
1784 while the domain without an overlap might not. Hence, the decision to participate
1785 in the subcommunicator must be based on the domain with an overlap.
1786 */
1787 if (q == 0) {
1788 if (nidx) color = 1;
1789 else color = MPI_UNDEFINED;
1790 ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1791 split = PETSC_TRUE;
1792 }
1793 /*
1794 Proceed only if the number of local indices *with an overlap* is nonzero.
1795 */
1796 if (n[0]) {
1797 if (q == 0) xis = is;
1798 if (q == 1) {
1799 /*
1800 The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1801 Moreover, if the overlap is zero, the two ISs are identical.
1802 */
1803 if (overlap == 0) {
1804 (*is_local)[s] = (*is)[s];
1805 ierr = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1806 continue;
1807 } else {
1808 xis = is_local;
1809 subcomm = ((PetscObject)(*is)[s])->comm;
1810 }
1811 } /* if (q == 1) */
1812 idx = NULL;
1813 ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1814 if (nidx) {
1815 k = 0;
1816 for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1817 PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1818 PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1819 kk = dof*(M*jj + x0);
1820 for (ii=x0; ii<x1; ++ii) {
1821 for (d = 0; d < dof; ++d) {
1822 idx[k++] = kk++;
1823 }
1824 }
1825 }
1826 }
1827 ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1828 if (split) {
1829 ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
1830 }
1831 }/* if (n[0]) */
1832 }/* for (q = 0; q < 2; ++q) */
1833 if (n[0]) ++s;
1834 xstart += maxwidth;
1835 } /* for (i = 0; i < Mdomains; ++i) */
1836 ystart += maxheight;
1837 } /* for (j = 0; j < Ndomains; ++j) */
1838 PetscFunctionReturn(0);
1839 }
1840
1841 /*@C
1842 PCGASMGetSubdomains - Gets the subdomains supported on this processor
1843 for the additive Schwarz preconditioner.
1844
1845 Not Collective
1846
1847 Input Parameter:
1848 . pc - the preconditioner context
1849
1850 Output Parameters:
1851 + n - the number of subdomains for this processor (default value = 1)
1852 . iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1853 - ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1854
1855
1856 Notes:
1857 The user is responsible for destroying the ISs and freeing the returned arrays.
1858 The IS numbering is in the parallel, global numbering of the vector.
1859
1860 Level: advanced
1861
1862 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1863 PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1864 @*/
PCGASMGetSubdomains(PC pc,PetscInt * n,IS * iis[],IS * ois[])1865 PetscErrorCode PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1866 {
1867 PC_GASM *osm;
1868 PetscErrorCode ierr;
1869 PetscBool match;
1870 PetscInt i;
1871
1872 PetscFunctionBegin;
1873 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1874 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1875 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1876 osm = (PC_GASM*)pc->data;
1877 if (n) *n = osm->n;
1878 if (iis) {
1879 ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1880 }
1881 if (ois) {
1882 ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1883 }
1884 if (iis || ois) {
1885 for (i = 0; i < osm->n; ++i) {
1886 if (iis) (*iis)[i] = osm->iis[i];
1887 if (ois) (*ois)[i] = osm->ois[i];
1888 }
1889 }
1890 PetscFunctionReturn(0);
1891 }
1892
1893 /*@C
1894 PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1895 only) for the additive Schwarz preconditioner.
1896
1897 Not Collective
1898
1899 Input Parameter:
1900 . pc - the preconditioner context
1901
1902 Output Parameters:
1903 + n - the number of matrices for this processor (default value = 1)
1904 - mat - the matrices
1905
1906 Notes:
1907 matrices returned by this routine have the same communicators as the index sets (IS)
1908 used to define subdomains in PCGASMSetSubdomains()
1909 Level: advanced
1910
1911 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1912 PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1913 @*/
PCGASMGetSubmatrices(PC pc,PetscInt * n,Mat * mat[])1914 PetscErrorCode PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1915 {
1916 PC_GASM *osm;
1917 PetscErrorCode ierr;
1918 PetscBool match;
1919
1920 PetscFunctionBegin;
1921 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1922 PetscValidIntPointer(n,2);
1923 if (mat) PetscValidPointer(mat,3);
1924 if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1925 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1926 if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1927 osm = (PC_GASM*)pc->data;
1928 if (n) *n = osm->n;
1929 if (mat) *mat = osm->pmat;
1930 PetscFunctionReturn(0);
1931 }
1932
1933 /*@
1934 PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1935 Logically Collective
1936
1937 Input Parameter:
1938 + pc - the preconditioner
1939 - flg - boolean indicating whether to use subdomains defined by the DM
1940
1941 Options Database Key:
1942 . -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1943
1944 Level: intermediate
1945
1946 Notes:
1947 PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1948 so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1949 automatically turns the latter off.
1950
1951 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1952 PCGASMCreateSubdomains2D()
1953 @*/
PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)1954 PetscErrorCode PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1955 {
1956 PC_GASM *osm = (PC_GASM*)pc->data;
1957 PetscErrorCode ierr;
1958 PetscBool match;
1959
1960 PetscFunctionBegin;
1961 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1962 PetscValidLogicalCollectiveBool(pc,flg,2);
1963 if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1964 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1965 if (match) {
1966 if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1967 osm->dm_subdomains = flg;
1968 }
1969 }
1970 PetscFunctionReturn(0);
1971 }
1972
1973 /*@
1974 PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1975 Not Collective
1976
1977 Input Parameter:
1978 . pc - the preconditioner
1979
1980 Output Parameter:
1981 . flg - boolean indicating whether to use subdomains defined by the DM
1982
1983 Level: intermediate
1984
1985 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1986 PCGASMCreateSubdomains2D()
1987 @*/
PCGASMGetUseDMSubdomains(PC pc,PetscBool * flg)1988 PetscErrorCode PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1989 {
1990 PC_GASM *osm = (PC_GASM*)pc->data;
1991 PetscErrorCode ierr;
1992 PetscBool match;
1993
1994 PetscFunctionBegin;
1995 PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1996 PetscValidBoolPointer(flg,2);
1997 ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1998 if (match) {
1999 if (flg) *flg = osm->dm_subdomains;
2000 }
2001 PetscFunctionReturn(0);
2002 }
2003