1 /*
2   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
3   In this version each processor may intersect multiple subdomains and any subdomain may
4   intersect multiple processors.  Intersections of subdomains with processors are called *local
5   subdomains*.
6 
7        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
8        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
9        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
10 */
11 #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
12 #include <petscdm.h>
13 
14 typedef struct {
15   PetscInt    N,n,nmax;
16   PetscInt    overlap;                  /* overlap requested by user */
17   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
18   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
19   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
20   PetscBool   sort_indices;             /* flag to sort subdomain indices */
21   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
22   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
23   PetscBool   hierarchicalpartitioning;
24   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
25   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
26   KSP         *ksp;                     /* linear solvers for each subdomain */
27   Mat         *pmat;                    /* subdomain block matrices */
28   Vec         gx,gy;                    /* Merged work vectors */
29   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
30   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
31   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
32   VecScatter  pctoouter;
33   IS          permutationIS;
34   Mat         permutationP;
35   Mat         pcmat;
36   Vec         pcx,pcy;
37 } PC_GASM;
38 
PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt ** numbering,PetscInt ** permutation)39 static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
40 {
41   PC_GASM        *osm = (PC_GASM*)pc->data;
42   PetscInt       i;
43   PetscErrorCode ierr;
44 
45   PetscFunctionBegin;
46   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
47   ierr = PetscMalloc2(osm->n,numbering,osm->n,permutation);CHKERRQ(ierr);
48   ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);CHKERRQ(ierr);
49   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
50   ierr = PetscSortIntWithPermutation(osm->n,*numbering,*permutation);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
PCGASMSubdomainView_Private(PC pc,PetscInt i,PetscViewer viewer)54 static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
55 {
56   PC_GASM        *osm = (PC_GASM*)pc->data;
57   PetscInt       j,nidx;
58   const PetscInt *idx;
59   PetscViewer    sviewer;
60   char           *cidx;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
65   /* Inner subdomains. */
66   ierr = ISGetLocalSize(osm->iis[i], &nidx);CHKERRQ(ierr);
67   /*
68    No more than 15 characters per index plus a space.
69    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
70    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
71    For nidx == 0, the whole string 16 '\0'.
72    */
73 #define len  16*(nidx+1)+1
74   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
75   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
76 #undef len
77   ierr = ISGetIndices(osm->iis[i], &idx);CHKERRQ(ierr);
78   for (j = 0; j < nidx; ++j) {
79     ierr = PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);CHKERRQ(ierr);
80   }
81   ierr = ISRestoreIndices(osm->iis[i],&idx);CHKERRQ(ierr);
82   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
83   ierr = PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");CHKERRQ(ierr);
84   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
86   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
87   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
88   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
89   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
90   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
91   ierr = PetscFree(cidx);CHKERRQ(ierr);
92   /* Outer subdomains. */
93   ierr = ISGetLocalSize(osm->ois[i], &nidx);CHKERRQ(ierr);
94   /*
95    No more than 15 characters per index plus a space.
96    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
97    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
98    For nidx == 0, the whole string 16 '\0'.
99    */
100 #define len  16*(nidx+1)+1
101   ierr = PetscMalloc1(len, &cidx);CHKERRQ(ierr);
102   ierr = PetscViewerStringOpen(PETSC_COMM_SELF, cidx, len, &sviewer);CHKERRQ(ierr);
103 #undef len
104   ierr = ISGetIndices(osm->ois[i], &idx);CHKERRQ(ierr);
105   for (j = 0; j < nidx; ++j) {
106     ierr = PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);CHKERRQ(ierr);
107   }
108   ierr = PetscViewerDestroy(&sviewer);CHKERRQ(ierr);
109   ierr = ISRestoreIndices(osm->ois[i],&idx);CHKERRQ(ierr);
110   ierr = PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");CHKERRQ(ierr);
111   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
112   ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
113   ierr = PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);CHKERRQ(ierr);
114   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
115   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
116   ierr = PetscViewerASCIIPrintf(viewer, "\n");CHKERRQ(ierr);
117   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
118   ierr = PetscFree(cidx);CHKERRQ(ierr);
119   PetscFunctionReturn(0);
120 }
121 
PCGASMPrintSubdomains(PC pc)122 static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
123 {
124   PC_GASM        *osm = (PC_GASM*)pc->data;
125   const char     *prefix;
126   char           fname[PETSC_MAX_PATH_LEN+1];
127   PetscInt       l, d, count;
128   PetscBool      doprint,found;
129   PetscViewer    viewer, sviewer = NULL;
130   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
135   doprint  = PETSC_FALSE;
136   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);CHKERRQ(ierr);
137   if (!doprint) PetscFunctionReturn(0);
138   ierr = PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,sizeof(fname),&found);CHKERRQ(ierr);
139   if (!found) { ierr = PetscStrcpy(fname,"stdout");CHKERRQ(ierr); };
140   ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);CHKERRQ(ierr);
141   /*
142    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
143    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
144   */
145   ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
146   l    = 0;
147   ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
148   for (count = 0; count < osm->N; ++count) {
149     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
150     if (l<osm->n) {
151       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
152       if (numbering[d] == count) {
153         ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
154         ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
155         ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
156         ++l;
157       }
158     }
159     ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
160   }
161   ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
162   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
PCView_GASM(PC pc,PetscViewer viewer)166 static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
167 {
168   PC_GASM        *osm = (PC_GASM*)pc->data;
169   const char     *prefix;
170   PetscErrorCode ierr;
171   PetscMPIInt    rank, size;
172   PetscInt       bsz;
173   PetscBool      iascii,view_subdomains=PETSC_FALSE;
174   PetscViewer    sviewer;
175   PetscInt       count, l;
176   char           overlap[256]     = "user-defined overlap";
177   char           gsubdomains[256] = "unknown total number of subdomains";
178   char           msubdomains[256] = "unknown max number of local subdomains";
179   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */
180 
181   PetscFunctionBegin;
182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);CHKERRQ(ierr);
183   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);CHKERRQ(ierr);
184 
185   if (osm->overlap >= 0) {
186     ierr = PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);CHKERRQ(ierr);
187   }
188   if (osm->N != PETSC_DETERMINE) {
189     ierr = PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);CHKERRQ(ierr);
190   }
191   if (osm->nmax != PETSC_DETERMINE) {
192     ierr = PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);CHKERRQ(ierr);
193   }
194 
195   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
196   ierr = PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);CHKERRQ(ierr);
197 
198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
199   if (iascii) {
200     /*
201      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
202      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
203      collectively on the comm.
204      */
205     ierr = PetscObjectName((PetscObject)viewer);CHKERRQ(ierr);
206     ierr = PetscViewerASCIIPrintf(viewer,"  Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);CHKERRQ(ierr);
207     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",overlap);CHKERRQ(ierr);
208     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",gsubdomains);CHKERRQ(ierr);
209     ierr = PetscViewerASCIIPrintf(viewer,"  %s\n",msubdomains);CHKERRQ(ierr);
210     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
211     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);CHKERRQ(ierr);
212     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
213     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
214     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
215     ierr = PetscViewerASCIIPrintf(viewer,"  Subdomain solver info is as follows:\n");CHKERRQ(ierr);
216     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
217     ierr = PetscViewerASCIIPrintf(viewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
218     /* Make sure that everybody waits for the banner to be printed. */
219     ierr = MPI_Barrier(PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
220     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
221     ierr = PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);CHKERRQ(ierr);
222     l = 0;
223     for (count = 0; count < osm->N; ++count) {
224       PetscMPIInt srank, ssize;
225       if (l<osm->n) {
226         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
227         if (numbering[d] == count) {
228           ierr = MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);CHKERRQ(ierr);
229           ierr = MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);CHKERRQ(ierr);
230           ierr = PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
231           ierr = ISGetLocalSize(osm->ois[d],&bsz);CHKERRQ(ierr);
232           ierr = PetscViewerASCIISynchronizedPrintf(sviewer,"  [%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);CHKERRQ(ierr);
233           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
234           if (view_subdomains) {
235             ierr = PCGASMSubdomainView_Private(pc,d,sviewer);CHKERRQ(ierr);
236           }
237           if (!pc->setupcalled) {
238             ierr = PetscViewerASCIIPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n");CHKERRQ(ierr);
239           } else {
240             ierr = KSPView(osm->ksp[d],sviewer);CHKERRQ(ierr);
241           }
242           ierr = PetscViewerASCIIPrintf(sviewer,"  - - - - - - - - - - - - - - - - - -\n");CHKERRQ(ierr);
243           ierr = PetscViewerFlush(sviewer);CHKERRQ(ierr);
244           ierr = PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);CHKERRQ(ierr);
245           ++l;
246         }
247       }
248       ierr = MPI_Barrier(PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
249     }
250     ierr = PetscFree2(numbering,permutation);CHKERRQ(ierr);
251     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
252   }
253   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
254   ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);
259 
PCGASMSetHierarchicalPartitioning(PC pc)260 PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
261 {
262    PC_GASM              *osm = (PC_GASM*)pc->data;
263    MatPartitioning       part;
264    MPI_Comm              comm;
265    PetscMPIInt           size;
266    PetscInt              nlocalsubdomains,fromrows_localsize;
267    IS                    partitioning,fromrows,isn;
268    Vec                   outervec;
269    PetscErrorCode        ierr;
270 
271    PetscFunctionBegin;
272    ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
273    ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
274    /* we do not need a hierarchical partitioning when
275     * the total number of subdomains is consistent with
276     * the number of MPI tasks.
277     * For the following cases, we do not need to use HP
278     * */
279    if (osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1) PetscFunctionReturn(0);
280    if (size%osm->N != 0) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %D to be a factor of the number of processors %d \n",osm->N,size);
281    nlocalsubdomains = size/osm->N;
282    osm->n           = 1;
283    ierr = MatPartitioningCreate(comm,&part);CHKERRQ(ierr);
284    ierr = MatPartitioningSetAdjacency(part,pc->pmat);CHKERRQ(ierr);
285    ierr = MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);CHKERRQ(ierr);
286    ierr = MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);CHKERRQ(ierr);
287    ierr = MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);CHKERRQ(ierr);
288    ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);
289    /* get new processor owner number of each vertex */
290    ierr = MatPartitioningApply(part,&partitioning);CHKERRQ(ierr);
291    ierr = ISBuildTwoSided(partitioning,NULL,&fromrows);CHKERRQ(ierr);
292    ierr = ISPartitioningToNumbering(partitioning,&isn);CHKERRQ(ierr);
293    ierr = ISDestroy(&isn);CHKERRQ(ierr);
294    ierr = ISGetLocalSize(fromrows,&fromrows_localsize);CHKERRQ(ierr);
295    ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
296    ierr = MatCreateVecs(pc->pmat,&outervec,NULL);CHKERRQ(ierr);
297    ierr = VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));CHKERRQ(ierr);
298    ierr = VecDuplicate(osm->pcx,&(osm->pcy));CHKERRQ(ierr);
299    ierr = VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));CHKERRQ(ierr);
300    ierr = MatCreateSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));CHKERRQ(ierr);
301    ierr = PetscObjectReference((PetscObject)fromrows);CHKERRQ(ierr);
302    osm->permutationIS = fromrows;
303    osm->pcmat =  pc->pmat;
304    ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
305    pc->pmat = osm->permutationP;
306    ierr = VecDestroy(&outervec);CHKERRQ(ierr);
307    ierr = ISDestroy(&fromrows);CHKERRQ(ierr);
308    ierr = ISDestroy(&partitioning);CHKERRQ(ierr);
309    osm->n           = PETSC_DETERMINE;
310    PetscFunctionReturn(0);
311 }
312 
PCSetUp_GASM(PC pc)313 static PetscErrorCode PCSetUp_GASM(PC pc)
314 {
315   PC_GASM        *osm = (PC_GASM*)pc->data;
316   PetscErrorCode ierr;
317   PetscInt       i,nInnerIndices,nTotalInnerIndices;
318   PetscMPIInt    rank, size;
319   MatReuse       scall = MAT_REUSE_MATRIX;
320   KSP            ksp;
321   PC             subpc;
322   const char     *prefix,*pprefix;
323   Vec            x,y;
324   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
325   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
326   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
327   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
328   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
329   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
330   PetscScalar    *gxarray, *gyarray;
331   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
332   PetscInt       num_subdomains    = 0;
333   DM             *subdomain_dm     = NULL;
334   char           **subdomain_names = NULL;
335   PetscInt       *numbering;
336 
337 
338   PetscFunctionBegin;
339   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
340   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
341   if (!pc->setupcalled) {
342         /* use a hierarchical partitioning */
343     if (osm->hierarchicalpartitioning){
344       ierr = PCGASMSetHierarchicalPartitioning(pc);CHKERRQ(ierr);
345     }
346     if (osm->n == PETSC_DETERMINE) {
347       if (osm->N != PETSC_DETERMINE) {
348            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
349            ierr = PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);CHKERRQ(ierr);
350       } else if (osm->dm_subdomains && pc->dm) {
351         /* try pc->dm next, if allowed */
352         PetscInt  d;
353         IS       *inner_subdomain_is, *outer_subdomain_is;
354         ierr = DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);CHKERRQ(ierr);
355         if (num_subdomains) {
356           ierr = PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);CHKERRQ(ierr);
357         }
358         for (d = 0; d < num_subdomains; ++d) {
359           if (inner_subdomain_is) {ierr = ISDestroy(&inner_subdomain_is[d]);CHKERRQ(ierr);}
360           if (outer_subdomain_is) {ierr = ISDestroy(&outer_subdomain_is[d]);CHKERRQ(ierr);}
361         }
362         ierr = PetscFree(inner_subdomain_is);CHKERRQ(ierr);
363         ierr = PetscFree(outer_subdomain_is);CHKERRQ(ierr);
364       } else {
365         /* still no subdomains; use one per processor */
366         osm->nmax = osm->n = 1;
367         ierr      = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
368         osm->N    = size;
369         ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
370       }
371     }
372     if (!osm->iis) {
373       /*
374        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
375        We create the requisite number of local inner subdomains and then expand them into
376        out subdomains, if necessary.
377        */
378       ierr = PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);CHKERRQ(ierr);
379     }
380     if (!osm->ois) {
381       /*
382             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
383             has been requested, copy the inner subdomains over so they can be modified.
384       */
385       ierr = PetscMalloc1(osm->n,&osm->ois);CHKERRQ(ierr);
386       for (i=0; i<osm->n; ++i) {
387         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
388           ierr = ISDuplicate(osm->iis[i],(osm->ois)+i);CHKERRQ(ierr);
389           ierr = ISCopy(osm->iis[i],osm->ois[i]);CHKERRQ(ierr);
390         } else {
391           ierr      = PetscObjectReference((PetscObject)((osm->iis)[i]));CHKERRQ(ierr);
392           osm->ois[i] = osm->iis[i];
393         }
394       }
395       if (osm->overlap>0 && osm->N>1) {
396         /* Extend the "overlapping" regions by a number of steps */
397         ierr = MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);CHKERRQ(ierr);
398       }
399     }
400 
401     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
402     if (osm->nmax == PETSC_DETERMINE) {
403       PetscMPIInt inwork,outwork;
404       /* determine global number of subdomains and the max number of local subdomains */
405       inwork     = osm->n;
406       ierr       = MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
407       osm->nmax  = outwork;
408     }
409     if (osm->N == PETSC_DETERMINE) {
410       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
411       ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);CHKERRQ(ierr);
412     }
413 
414     if (osm->sort_indices) {
415       for (i=0; i<osm->n; i++) {
416         ierr = ISSort(osm->ois[i]);CHKERRQ(ierr);
417         ierr = ISSort(osm->iis[i]);CHKERRQ(ierr);
418       }
419     }
420     ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
421     ierr = PCGASMPrintSubdomains(pc);CHKERRQ(ierr);
422 
423     /*
424        Merge the ISs, create merged vectors and restrictions.
425      */
426     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
427     on = 0;
428     for (i=0; i<osm->n; i++) {
429       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
430       on  += oni;
431     }
432     ierr = PetscMalloc1(on, &oidx);CHKERRQ(ierr);
433     on   = 0;
434     /* Merge local indices together */
435     for (i=0; i<osm->n; i++) {
436       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
437       ierr = ISGetIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
438       ierr = PetscArraycpy(oidx+on,oidxi,oni);CHKERRQ(ierr);
439       ierr = ISRestoreIndices(osm->ois[i],&oidxi);CHKERRQ(ierr);
440       on  += oni;
441     }
442     ierr = ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);CHKERRQ(ierr);
443     nTotalInnerIndices = 0;
444     for (i=0; i<osm->n; i++){
445       ierr = ISGetLocalSize(osm->iis[i],&nInnerIndices);CHKERRQ(ierr);
446       nTotalInnerIndices += nInnerIndices;
447     }
448     ierr = VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);CHKERRQ(ierr);
449     ierr = VecDuplicate(x,&y);CHKERRQ(ierr);
450 
451     ierr = VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);CHKERRQ(ierr);
452     ierr = VecDuplicate(osm->gx,&osm->gy);CHKERRQ(ierr);
453     ierr = VecGetOwnershipRange(osm->gx, &gostart, NULL);CHKERRQ(ierr);
454     ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);CHKERRQ(ierr);
455     /* gois might indices not on local */
456     ierr = VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));CHKERRQ(ierr);
457     ierr = PetscMalloc1(osm->n,&numbering);CHKERRQ(ierr);
458     ierr = PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);CHKERRQ(ierr);
459     ierr = VecDestroy(&x);CHKERRQ(ierr);
460     ierr = ISDestroy(&gois);CHKERRQ(ierr);
461 
462     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
463     {
464       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
465       PetscInt        in;            /* Number of indices in the disjoint union of local inner subdomains. */
466       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
467       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
468       IS              giis;          /* IS for the disjoint union of inner subdomains. */
469       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
470       PetscScalar    *array;
471       const PetscInt *indices;
472       PetscInt        k;
473       on = 0;
474       for (i=0; i<osm->n; i++) {
475         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
476         on  += oni;
477       }
478       ierr = PetscMalloc1(on, &iidx);CHKERRQ(ierr);
479       ierr = PetscMalloc1(on, &ioidx);CHKERRQ(ierr);
480       ierr = VecGetArray(y,&array);CHKERRQ(ierr);
481       /* set communicator id to determine where overlap is */
482       in   = 0;
483       for (i=0; i<osm->n; i++) {
484         ierr   = ISGetLocalSize(osm->iis[i],&ini);CHKERRQ(ierr);
485         for (k = 0; k < ini; ++k){
486           array[in+k] = numbering[i];
487         }
488         in += ini;
489       }
490       ierr = VecRestoreArray(y,&array);CHKERRQ(ierr);
491       ierr = VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492       ierr = VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493       ierr = VecGetOwnershipRange(osm->gy,&gostart, NULL);CHKERRQ(ierr);
494       ierr = VecGetArray(osm->gy,&array);CHKERRQ(ierr);
495       on  = 0;
496       in  = 0;
497       for (i=0; i<osm->n; i++) {
498         ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
499         ierr = ISGetIndices(osm->ois[i],&indices);CHKERRQ(ierr);
500         for (k=0; k<oni; k++) {
501           /*  skip overlapping indices to get inner domain */
502           if (PetscRealPart(array[on+k]) != numbering[i]) continue;
503           iidx[in]    = indices[k];
504           ioidx[in++] = gostart+on+k;
505         }
506         ierr   = ISRestoreIndices(osm->ois[i], &indices);CHKERRQ(ierr);
507         on += oni;
508       }
509       ierr = VecRestoreArray(osm->gy,&array);CHKERRQ(ierr);
510       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);CHKERRQ(ierr);
511       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);CHKERRQ(ierr);
512       ierr = VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);CHKERRQ(ierr);
513       ierr = VecDestroy(&y);CHKERRQ(ierr);
514       ierr = ISDestroy(&giis);CHKERRQ(ierr);
515       ierr = ISDestroy(&giois);CHKERRQ(ierr);
516     }
517     ierr = ISDestroy(&goid);CHKERRQ(ierr);
518     ierr = PetscFree(numbering);CHKERRQ(ierr);
519 
520     /* Create the subdomain work vectors. */
521     ierr = PetscMalloc1(osm->n,&osm->x);CHKERRQ(ierr);
522     ierr = PetscMalloc1(osm->n,&osm->y);CHKERRQ(ierr);
523     ierr = VecGetArray(osm->gx, &gxarray);CHKERRQ(ierr);
524     ierr = VecGetArray(osm->gy, &gyarray);CHKERRQ(ierr);
525     for (i=0, on=0; i<osm->n; ++i, on += oni) {
526       PetscInt oNi;
527       ierr = ISGetLocalSize(osm->ois[i],&oni);CHKERRQ(ierr);
528       /* on a sub communicator */
529       ierr = ISGetSize(osm->ois[i],&oNi);CHKERRQ(ierr);
530       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);CHKERRQ(ierr);
531       ierr = VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);CHKERRQ(ierr);
532     }
533     ierr = VecRestoreArray(osm->gx, &gxarray);CHKERRQ(ierr);
534     ierr = VecRestoreArray(osm->gy, &gyarray);CHKERRQ(ierr);
535     /* Create the subdomain solvers */
536     ierr = PetscMalloc1(osm->n,&osm->ksp);CHKERRQ(ierr);
537     for (i=0; i<osm->n; i++) {
538       char subprefix[PETSC_MAX_PATH_LEN+1];
539       ierr        = KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);CHKERRQ(ierr);
540       ierr        = KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);CHKERRQ(ierr);
541       ierr        = PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);CHKERRQ(ierr);
542       ierr        = PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);CHKERRQ(ierr);
543       ierr        = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
544       ierr        = KSPGetPC(ksp,&subpc);CHKERRQ(ierr); /* Why do we need this here? */
545       if (subdomain_dm) {
546         ierr = KSPSetDM(ksp,subdomain_dm[i]);CHKERRQ(ierr);
547         ierr = DMDestroy(subdomain_dm+i);CHKERRQ(ierr);
548       }
549       ierr        = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
550       ierr        = KSPSetOptionsPrefix(ksp,prefix);CHKERRQ(ierr);
551       if (subdomain_names && subdomain_names[i]) {
552         ierr = PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);CHKERRQ(ierr);
553         ierr = KSPAppendOptionsPrefix(ksp,subprefix);CHKERRQ(ierr);
554         ierr = PetscFree(subdomain_names[i]);CHKERRQ(ierr);
555       }
556       ierr        = KSPAppendOptionsPrefix(ksp,"sub_");CHKERRQ(ierr);
557       osm->ksp[i] = ksp;
558     }
559     ierr = PetscFree(subdomain_dm);CHKERRQ(ierr);
560     ierr = PetscFree(subdomain_names);CHKERRQ(ierr);
561     scall = MAT_INITIAL_MATRIX;
562   } else { /* if (pc->setupcalled) */
563     /*
564        Destroy the submatrices from the previous iteration
565     */
566     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
567       ierr  = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
568       scall = MAT_INITIAL_MATRIX;
569     }
570     if (osm->permutationIS){
571       ierr = MatCreateSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);CHKERRQ(ierr);
572       ierr = PetscObjectReference((PetscObject)osm->permutationP);CHKERRQ(ierr);
573       osm->pcmat = pc->pmat;
574       pc->pmat   = osm->permutationP;
575     }
576   }
577 
578   /*
579      Extract the submatrices.
580   */
581   if (size > 1) {
582     ierr = MatCreateSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
583   } else {
584     ierr = MatCreateSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);CHKERRQ(ierr);
585   }
586   if (scall == MAT_INITIAL_MATRIX) {
587     ierr = PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);CHKERRQ(ierr);
588     for (i=0; i<osm->n; i++) {
589       ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);CHKERRQ(ierr);
590       ierr = PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);CHKERRQ(ierr);
591     }
592   }
593 
594   /* Return control to the user so that the submatrices can be modified (e.g., to apply
595      different boundary conditions for the submatrices than for the global problem) */
596   ierr = PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);CHKERRQ(ierr);
597 
598   /*
599      Loop over submatrices putting them into local ksps
600   */
601   for (i=0; i<osm->n; i++) {
602     ierr = KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);CHKERRQ(ierr);
603     if (!pc->setupcalled) {
604       ierr = KSPSetFromOptions(osm->ksp[i]);CHKERRQ(ierr);
605     }
606   }
607   if (osm->pcmat){
608     ierr = MatDestroy(&pc->pmat);CHKERRQ(ierr);
609     pc->pmat   = osm->pcmat;
610     osm->pcmat = NULL;
611   }
612   PetscFunctionReturn(0);
613 }
614 
PCSetUpOnBlocks_GASM(PC pc)615 static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
616 {
617   PC_GASM        *osm = (PC_GASM*)pc->data;
618   PetscErrorCode ierr;
619   PetscInt       i;
620 
621   PetscFunctionBegin;
622   for (i=0; i<osm->n; i++) {
623     ierr = KSPSetUp(osm->ksp[i]);CHKERRQ(ierr);
624   }
625   PetscFunctionReturn(0);
626 }
627 
PCApply_GASM(PC pc,Vec xin,Vec yout)628 static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
629 {
630   PC_GASM        *osm = (PC_GASM*)pc->data;
631   PetscErrorCode ierr;
632   PetscInt       i;
633   Vec            x,y;
634   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
635 
636   PetscFunctionBegin;
637   if (osm->pctoouter) {
638     ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
639     ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
640     x = osm->pcx;
641     y = osm->pcy;
642   } else {
643     x = xin;
644     y = yout;
645   }
646   /*
647      support for limiting the restriction or interpolation only to the inner
648      subdomain values (leaving the other values 0).
649   */
650   if (!(osm->type & PC_GASM_RESTRICT)) {
651     /* have to zero the work RHS since scatter may leave some slots empty */
652     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
653     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
654   } else {
655     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
656   }
657   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
658   if (!(osm->type & PC_GASM_RESTRICT)) {
659     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
660   } else {
661     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
662   }
663   /* do the subdomain solves */
664   for (i=0; i<osm->n; ++i) {
665     ierr = KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
666     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
667   }
668   /* do we need to zero y? */
669   ierr = VecZeroEntries(y);CHKERRQ(ierr);
670   if (!(osm->type & PC_GASM_INTERPOLATE)) {
671     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
672     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
673   } else {
674     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
675     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
676   }
677   if (osm->pctoouter) {
678     ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
679     ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
680   }
681   PetscFunctionReturn(0);
682 }
683 
PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)684 static PetscErrorCode PCMatApply_GASM(PC pc,Mat Xin,Mat Yout)
685 {
686   PC_GASM        *osm = (PC_GASM*)pc->data;
687   Mat            X,Y,O=NULL,Z,W;
688   Vec            x,y;
689   PetscInt       i,m,M,N;
690   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
691   PetscErrorCode ierr;
692 
693   PetscFunctionBegin;
694   if (osm->n != 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
695   ierr = MatGetSize(Xin,NULL,&N);CHKERRQ(ierr);
696   if (osm->pctoouter) {
697     ierr = VecGetLocalSize(osm->pcx,&m);CHKERRQ(ierr);
698     ierr = VecGetSize(osm->pcx,&M);CHKERRQ(ierr);
699     ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&O);CHKERRQ(ierr);
700     for (i = 0; i < N; ++i) {
701       ierr = MatDenseGetColumnVecRead(Xin,i,&x);
702       ierr = MatDenseGetColumnVecWrite(O,i,&y);
703       ierr = VecScatterBegin(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
704       ierr = VecScatterEnd(osm->pctoouter,x,y,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
705       ierr = MatDenseRestoreColumnVecWrite(O,i,&y);
706       ierr = MatDenseRestoreColumnVecRead(Xin,i,&x);
707     }
708     X = Y = O;
709   } else {
710     X = Xin;
711     Y = Yout;
712   }
713   /*
714      support for limiting the restriction or interpolation only to the inner
715      subdomain values (leaving the other values 0).
716   */
717   ierr = VecGetLocalSize(osm->x[0],&m);CHKERRQ(ierr);
718   ierr = VecGetSize(osm->x[0],&M);CHKERRQ(ierr);
719   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&Z);CHKERRQ(ierr);
720   for (i = 0; i < N; ++i) {
721     ierr = MatDenseGetColumnVecRead(X,i,&x);
722     ierr = MatDenseGetColumnVecWrite(Z,i,&y);
723     if (!(osm->type & PC_GASM_RESTRICT)) {
724       /* have to zero the work RHS since scatter may leave some slots empty */
725       ierr = VecZeroEntries(y);CHKERRQ(ierr);
726       ierr = VecScatterBegin(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
727       ierr = VecScatterEnd(osm->girestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
728     } else {
729       ierr = VecScatterBegin(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
730       ierr = VecScatterEnd(osm->gorestriction,x,y,INSERT_VALUES,forward);CHKERRQ(ierr);
731     }
732     ierr = MatDenseRestoreColumnVecWrite(Z,i,&y);
733     ierr = MatDenseRestoreColumnVecRead(X,i,&x);
734   }
735   ierr = MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]),m,PETSC_DECIDE,M,N,NULL,&W);CHKERRQ(ierr);
736   ierr = MatSetOption(Z,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
737   ierr = MatAssemblyBegin(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
738   ierr = MatAssemblyEnd(Z,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
739   /* do the subdomain solve */
740   ierr = KSPMatSolve(osm->ksp[0],Z,W);CHKERRQ(ierr);
741   ierr = KSPCheckSolve(osm->ksp[0],pc,NULL);CHKERRQ(ierr);
742   ierr = MatDestroy(&Z);CHKERRQ(ierr);
743   /* do we need to zero y? */
744   ierr = MatZeroEntries(Y);CHKERRQ(ierr);
745   for (i = 0; i < N; ++i) {
746     ierr = MatDenseGetColumnVecWrite(Y,i,&y);
747     ierr = MatDenseGetColumnVecRead(W,i,&x);
748     if (!(osm->type & PC_GASM_INTERPOLATE)) {
749       ierr = VecScatterBegin(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
750       ierr = VecScatterEnd(osm->girestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
751     } else {
752       ierr = VecScatterBegin(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
753       ierr = VecScatterEnd(osm->gorestriction,x,y,ADD_VALUES,reverse);CHKERRQ(ierr);
754     }
755     ierr = MatDenseRestoreColumnVecRead(W,i,&x);
756     if (osm->pctoouter) {
757       ierr = MatDenseGetColumnVecWrite(Yout,i,&x);
758       ierr = VecScatterBegin(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
759       ierr = VecScatterEnd(osm->pctoouter,y,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760       ierr = MatDenseRestoreColumnVecRead(Yout,i,&x);
761     }
762     ierr = MatDenseRestoreColumnVecWrite(Y,i,&y);
763   }
764   ierr = MatDestroy(&W);CHKERRQ(ierr);
765   ierr = MatDestroy(&O);CHKERRQ(ierr);
766   PetscFunctionReturn(0);
767 }
768 
PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)769 static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
770 {
771   PC_GASM        *osm = (PC_GASM*)pc->data;
772   PetscErrorCode ierr;
773   PetscInt       i;
774   Vec            x,y;
775   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;
776 
777   PetscFunctionBegin;
778   if (osm->pctoouter){
779    ierr = VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780    ierr = VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
781    x = osm->pcx;
782    y = osm->pcy;
783   }else{
784         x = xin;
785         y = yout;
786   }
787   /*
788      Support for limiting the restriction or interpolation to only local
789      subdomain values (leaving the other values 0).
790 
791      Note: these are reversed from the PCApply_GASM() because we are applying the
792      transpose of the three terms
793   */
794   if (!(osm->type & PC_GASM_INTERPOLATE)) {
795     /* have to zero the work RHS since scatter may leave some slots empty */
796     ierr = VecZeroEntries(osm->gx);CHKERRQ(ierr);
797     ierr = VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
798   } else {
799     ierr = VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
800   }
801   ierr = VecZeroEntries(osm->gy);CHKERRQ(ierr);
802   if (!(osm->type & PC_GASM_INTERPOLATE)) {
803     ierr = VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
804   } else {
805     ierr = VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);CHKERRQ(ierr);
806   }
807   /* do the local solves */
808   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
809     ierr = KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);CHKERRQ(ierr);
810     ierr = KSPCheckSolve(osm->ksp[i],pc,osm->y[i]);CHKERRQ(ierr);
811   }
812   ierr = VecZeroEntries(y);CHKERRQ(ierr);
813   if (!(osm->type & PC_GASM_RESTRICT)) {
814     ierr = VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
815     ierr = VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
816   } else {
817     ierr = VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
818     ierr = VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);CHKERRQ(ierr);
819   }
820   if (osm->pctoouter){
821    ierr = VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822    ierr = VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823   }
824   PetscFunctionReturn(0);
825 }
826 
PCReset_GASM(PC pc)827 static PetscErrorCode PCReset_GASM(PC pc)
828 {
829   PC_GASM        *osm = (PC_GASM*)pc->data;
830   PetscErrorCode ierr;
831   PetscInt       i;
832 
833   PetscFunctionBegin;
834   if (osm->ksp) {
835     for (i=0; i<osm->n; i++) {
836       ierr = KSPReset(osm->ksp[i]);CHKERRQ(ierr);
837     }
838   }
839   if (osm->pmat) {
840     if (osm->n > 0) {
841       PetscMPIInt size;
842       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
843       if (size > 1) {
844         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
845         ierr = MatDestroyMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
846       } else {
847         ierr = MatDestroySubMatrices(osm->n,&osm->pmat);CHKERRQ(ierr);
848       }
849     }
850   }
851   if (osm->x) {
852     for (i=0; i<osm->n; i++) {
853       ierr = VecDestroy(&osm->x[i]);CHKERRQ(ierr);
854       ierr = VecDestroy(&osm->y[i]);CHKERRQ(ierr);
855     }
856   }
857   ierr = VecDestroy(&osm->gx);CHKERRQ(ierr);
858   ierr = VecDestroy(&osm->gy);CHKERRQ(ierr);
859 
860   ierr = VecScatterDestroy(&osm->gorestriction);CHKERRQ(ierr);
861   ierr = VecScatterDestroy(&osm->girestriction);CHKERRQ(ierr);
862   if (!osm->user_subdomains) {
863     ierr      = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
864     osm->N    = PETSC_DETERMINE;
865     osm->nmax = PETSC_DETERMINE;
866   }
867   if (osm->pctoouter){
868         ierr = VecScatterDestroy(&(osm->pctoouter));CHKERRQ(ierr);
869   }
870   if (osm->permutationIS){
871         ierr = ISDestroy(&(osm->permutationIS));CHKERRQ(ierr);
872   }
873   if (osm->pcx){
874         ierr = VecDestroy(&(osm->pcx));CHKERRQ(ierr);
875   }
876   if (osm->pcy){
877         ierr = VecDestroy(&(osm->pcy));CHKERRQ(ierr);
878   }
879   if (osm->permutationP){
880     ierr = MatDestroy(&(osm->permutationP));CHKERRQ(ierr);
881   }
882   if (osm->pcmat){
883         ierr = MatDestroy(&osm->pcmat);CHKERRQ(ierr);
884   }
885   PetscFunctionReturn(0);
886 }
887 
PCDestroy_GASM(PC pc)888 static PetscErrorCode PCDestroy_GASM(PC pc)
889 {
890   PC_GASM        *osm = (PC_GASM*)pc->data;
891   PetscErrorCode ierr;
892   PetscInt       i;
893 
894   PetscFunctionBegin;
895   ierr = PCReset_GASM(pc);CHKERRQ(ierr);
896   /* PCReset will not destroy subdomains, if user_subdomains is true. */
897   ierr = PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);CHKERRQ(ierr);
898   if (osm->ksp) {
899     for (i=0; i<osm->n; i++) {
900       ierr = KSPDestroy(&osm->ksp[i]);CHKERRQ(ierr);
901     }
902     ierr = PetscFree(osm->ksp);CHKERRQ(ierr);
903   }
904   ierr = PetscFree(osm->x);CHKERRQ(ierr);
905   ierr = PetscFree(osm->y);CHKERRQ(ierr);
906   ierr = PetscFree(pc->data);CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
PCSetFromOptions_GASM(PetscOptionItems * PetscOptionsObject,PC pc)910 static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
911 {
912   PC_GASM        *osm = (PC_GASM*)pc->data;
913   PetscErrorCode ierr;
914   PetscInt       blocks,ovl;
915   PetscBool      flg;
916   PCGASMType     gasmtype;
917 
918   PetscFunctionBegin;
919   ierr = PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");CHKERRQ(ierr);
920   ierr = PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);CHKERRQ(ierr);
921   ierr = PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);CHKERRQ(ierr);
922   if (flg) {
923     ierr = PCGASMSetTotalSubdomains(pc,blocks);CHKERRQ(ierr);
924   }
925   ierr = PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);CHKERRQ(ierr);
926   if (flg) {
927     ierr = PCGASMSetOverlap(pc,ovl);CHKERRQ(ierr);
928     osm->dm_subdomains = PETSC_FALSE;
929   }
930   flg  = PETSC_FALSE;
931   ierr = PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);CHKERRQ(ierr);
932   if (flg) {ierr = PCGASMSetType(pc,gasmtype);CHKERRQ(ierr);}
933   ierr = PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);CHKERRQ(ierr);
934   ierr = PetscOptionsTail();CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 /*------------------------------------------------------------------------------------*/
939 
940 /*@
941     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
942                                communicator.
943     Logically collective on pc
944 
945     Input Parameters:
946 +   pc  - the preconditioner
947 -   N   - total number of subdomains
948 
949 
950     Level: beginner
951 
952 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
953           PCGASMCreateSubdomains2D()
954 @*/
PCGASMSetTotalSubdomains(PC pc,PetscInt N)955 PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
956 {
957   PC_GASM        *osm = (PC_GASM*)pc->data;
958   PetscMPIInt    size,rank;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
963   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");
964 
965   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
966   osm->ois = osm->iis = NULL;
967 
968   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr);
969   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
970   osm->N    = N;
971   osm->n    = PETSC_DETERMINE;
972   osm->nmax = PETSC_DETERMINE;
973   osm->dm_subdomains = PETSC_FALSE;
974   PetscFunctionReturn(0);
975 }
976 
PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])977 static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
978 {
979   PC_GASM         *osm = (PC_GASM*)pc->data;
980   PetscErrorCode  ierr;
981   PetscInt        i;
982 
983   PetscFunctionBegin;
984   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
985   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");
986 
987   ierr = PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);CHKERRQ(ierr);
988   osm->iis  = osm->ois = NULL;
989   osm->n    = n;
990   osm->N    = PETSC_DETERMINE;
991   osm->nmax = PETSC_DETERMINE;
992   if (ois) {
993     ierr = PetscMalloc1(n,&osm->ois);CHKERRQ(ierr);
994     for (i=0; i<n; i++) {
995       ierr = PetscObjectReference((PetscObject)ois[i]);CHKERRQ(ierr);
996       osm->ois[i] = ois[i];
997     }
998     /*
999        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
1000        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
1001     */
1002     osm->overlap = -1;
1003     /* inner subdomains must be provided  */
1004     if (!iis) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"inner indices have to be provided \n");
1005   }/* end if */
1006   if (iis) {
1007     ierr = PetscMalloc1(n,&osm->iis);CHKERRQ(ierr);
1008     for (i=0; i<n; i++) {
1009       ierr = PetscObjectReference((PetscObject)iis[i]);CHKERRQ(ierr);
1010       osm->iis[i] = iis[i];
1011     }
1012     if (!ois) {
1013       osm->ois = NULL;
1014       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
1015     }
1016   }
1017   if (PetscDefined(USE_DEBUG)) {
1018     PetscInt        j,rstart,rend,*covered,lsize;
1019     const PetscInt  *indices;
1020     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
1021     ierr = MatGetOwnershipRange(pc->pmat,&rstart,&rend);CHKERRQ(ierr);
1022     ierr = PetscCalloc1(rend-rstart,&covered);CHKERRQ(ierr);
1023     /* check if the current processor owns indices from others */
1024     for (i=0; i<n; i++) {
1025       ierr = ISGetIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1026       ierr = ISGetLocalSize(osm->iis[i],&lsize);CHKERRQ(ierr);
1027       for (j=0; j<lsize; j++) {
1028         if (indices[j]<rstart || indices[j]>=rend) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not own an index %d from other processors", indices[j]);
1029         else if (covered[indices[j]-rstart]==1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"inner subdomains can not have an overlapping index %d ",indices[j]);
1030         else covered[indices[j]-rstart] = 1;
1031       }
1032     ierr = ISRestoreIndices(osm->iis[i],&indices);CHKERRQ(ierr);
1033     }
1034     /* check if we miss any indices */
1035     for (i=rstart; i<rend; i++) {
1036       if (!covered[i-rstart]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"local entity %d was not covered by inner subdomains",i);
1037     }
1038     ierr = PetscFree(covered);CHKERRQ(ierr);
1039   }
1040   if (iis)  osm->user_subdomains = PETSC_TRUE;
1041   PetscFunctionReturn(0);
1042 }
1043 
PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)1044 static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
1045 {
1046   PC_GASM *osm = (PC_GASM*)pc->data;
1047 
1048   PetscFunctionBegin;
1049   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
1050   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1051   if (!pc->setupcalled) osm->overlap = ovl;
1052   PetscFunctionReturn(0);
1053 }
1054 
PCGASMSetType_GASM(PC pc,PCGASMType type)1055 static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1056 {
1057   PC_GASM *osm = (PC_GASM*)pc->data;
1058 
1059   PetscFunctionBegin;
1060   osm->type     = type;
1061   osm->type_set = PETSC_TRUE;
1062   PetscFunctionReturn(0);
1063 }
1064 
PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)1065 static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1066 {
1067   PC_GASM *osm = (PC_GASM*)pc->data;
1068 
1069   PetscFunctionBegin;
1070   osm->sort_indices = doSort;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /*
1075    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1076         In particular, it would upset the global subdomain number calculation.
1077 */
PCGASMGetSubKSP_GASM(PC pc,PetscInt * n,PetscInt * first,KSP ** ksp)1078 static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1079 {
1080   PC_GASM        *osm = (PC_GASM*)pc->data;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");
1085 
1086   if (n) *n = osm->n;
1087   if (first) {
1088     ierr    = MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1089     *first -= osm->n;
1090   }
1091   if (ksp) {
1092     /* Assume that local solves are now different; not necessarily
1093        true, though!  This flag is used only for PCView_GASM() */
1094     *ksp                        = osm->ksp;
1095     osm->same_subdomain_solvers = PETSC_FALSE;
1096   }
1097   PetscFunctionReturn(0);
1098 } /* PCGASMGetSubKSP_GASM() */
1099 
1100 /*@C
1101     PCGASMSetSubdomains - Sets the subdomains for this processor
1102     for the additive Schwarz preconditioner.
1103 
1104     Collective on pc
1105 
1106     Input Parameters:
1107 +   pc  - the preconditioner object
1108 .   n   - the number of subdomains for this processor
1109 .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1110 -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)
1111 
1112     Notes:
1113     The IS indices use the parallel, global numbering of the vector entries.
1114     Inner subdomains are those where the correction is applied.
1115     Outer subdomains are those where the residual necessary to obtain the
1116     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1117     Both inner and outer subdomains can extend over several processors.
1118     This processor's portion of a subdomain is known as a local subdomain.
1119 
1120     Inner subdomains can not overlap with each other, do not have any entities from remote processors,
1121     and  have to cover the entire local subdomain owned by the current processor. The index sets on each
1122     process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1123     on another MPI process.
1124 
1125     By default the GASM preconditioner uses 1 (local) subdomain per processor.
1126 
1127 
1128     Level: advanced
1129 
1130 .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1131           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1132 @*/
PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])1133 PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1134 {
1135   PC_GASM *osm = (PC_GASM*)pc->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1140   ierr = PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));CHKERRQ(ierr);
1141   osm->dm_subdomains = PETSC_FALSE;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 /*@
1146     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1147     additive Schwarz preconditioner.  Either all or no processors in the
1148     pc communicator must call this routine.
1149 
1150     Logically Collective on pc
1151 
1152     Input Parameters:
1153 +   pc  - the preconditioner context
1154 -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)
1155 
1156     Options Database Key:
1157 .   -pc_gasm_overlap <overlap> - Sets overlap
1158 
1159     Notes:
1160     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1161     multiple subdomain per perocessor or "straddling" subdomains that intersect
1162     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).
1163 
1164     The overlap defaults to 0, so if one desires that no additional
1165     overlap be computed beyond what may have been set with a call to
1166     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1167     not explicitly set the subdomains in application code, then all overlap would be computed
1168     internally by PETSc, and using an overlap of 0 would result in an GASM
1169     variant that is equivalent to the block Jacobi preconditioner.
1170 
1171     Note that one can define initial index sets with any overlap via
1172     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1173     PETSc to extend that overlap further, if desired.
1174 
1175     Level: intermediate
1176 
1177 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1178           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1179 @*/
PCGASMSetOverlap(PC pc,PetscInt ovl)1180 PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1181 {
1182   PetscErrorCode ierr;
1183   PC_GASM *osm = (PC_GASM*)pc->data;
1184 
1185   PetscFunctionBegin;
1186   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1187   PetscValidLogicalCollectiveInt(pc,ovl,2);
1188   ierr = PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));CHKERRQ(ierr);
1189   osm->dm_subdomains = PETSC_FALSE;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 /*@
1194     PCGASMSetType - Sets the type of restriction and interpolation used
1195     for local problems in the additive Schwarz method.
1196 
1197     Logically Collective on PC
1198 
1199     Input Parameters:
1200 +   pc  - the preconditioner context
1201 -   type - variant of GASM, one of
1202 .vb
1203       PC_GASM_BASIC       - full interpolation and restriction
1204       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1205       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1206       PC_GASM_NONE        - local processor restriction and interpolation
1207 .ve
1208 
1209     Options Database Key:
1210 .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1211 
1212     Level: intermediate
1213 
1214 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1215           PCGASMCreateSubdomains2D()
1216 @*/
PCGASMSetType(PC pc,PCGASMType type)1217 PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1218 {
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1223   PetscValidLogicalCollectiveEnum(pc,type,2);
1224   ierr = PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*@
1229     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.
1230 
1231     Logically Collective on PC
1232 
1233     Input Parameters:
1234 +   pc  - the preconditioner context
1235 -   doSort - sort the subdomain indices
1236 
1237     Level: intermediate
1238 
1239 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1240           PCGASMCreateSubdomains2D()
1241 @*/
PCGASMSetSortIndices(PC pc,PetscBool doSort)1242 PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1243 {
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1248   PetscValidLogicalCollectiveBool(pc,doSort,2);
1249   ierr = PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 /*@C
1254    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1255    this processor.
1256 
1257    Collective on PC iff first_local is requested
1258 
1259    Input Parameter:
1260 .  pc - the preconditioner context
1261 
1262    Output Parameters:
1263 +  n_local - the number of blocks on this processor or NULL
1264 .  first_local - the global number of the first block on this processor or NULL,
1265                  all processors must request or all must pass NULL
1266 -  ksp - the array of KSP contexts
1267 
1268    Note:
1269    After PCGASMGetSubKSP() the array of KSPes is not to be freed
1270 
1271    Currently for some matrix implementations only 1 block per processor
1272    is supported.
1273 
1274    You must call KSPSetUp() before calling PCGASMGetSubKSP().
1275 
1276    Level: advanced
1277 
1278 .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1279           PCGASMCreateSubdomains2D(),
1280 @*/
PCGASMGetSubKSP(PC pc,PetscInt * n_local,PetscInt * first_local,KSP * ksp[])1281 PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1282 {
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1287   ierr = PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 /* -------------------------------------------------------------------------------------*/
1292 /*MC
1293    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1294            its own KSP object.
1295 
1296    Options Database Keys:
1297 +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1298 .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1299 .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1300 .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1301 -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type
1302 
1303      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1304       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1305       -pc_gasm_type basic to use the standard GASM.
1306 
1307    Notes:
1308     Blocks can be shared by multiple processes.
1309 
1310      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1311         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
1312 
1313      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1314          and set the options directly on the resulting KSP object (you can access its PC
1315          with KSPGetPC())
1316 
1317 
1318    Level: beginner
1319 
1320     References:
1321 +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1322      Courant Institute, New York University Technical report
1323 -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1324     Cambridge University Press.
1325 
1326 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1327            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1328            PCSetModifySubMatrices(), PCGASMSetOverlap(), PCGASMSetType()
1329 
1330 M*/
1331 
PCCreate_GASM(PC pc)1332 PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1333 {
1334   PetscErrorCode ierr;
1335   PC_GASM        *osm;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscNewLog(pc,&osm);CHKERRQ(ierr);
1339 
1340   osm->N                        = PETSC_DETERMINE;
1341   osm->n                        = PETSC_DECIDE;
1342   osm->nmax                     = PETSC_DETERMINE;
1343   osm->overlap                  = 0;
1344   osm->ksp                      = NULL;
1345   osm->gorestriction            = NULL;
1346   osm->girestriction            = NULL;
1347   osm->pctoouter                = NULL;
1348   osm->gx                       = NULL;
1349   osm->gy                       = NULL;
1350   osm->x                        = NULL;
1351   osm->y                        = NULL;
1352   osm->pcx                      = NULL;
1353   osm->pcy                      = NULL;
1354   osm->permutationIS            = NULL;
1355   osm->permutationP             = NULL;
1356   osm->pcmat                    = NULL;
1357   osm->ois                      = NULL;
1358   osm->iis                      = NULL;
1359   osm->pmat                     = NULL;
1360   osm->type                     = PC_GASM_RESTRICT;
1361   osm->same_subdomain_solvers   = PETSC_TRUE;
1362   osm->sort_indices             = PETSC_TRUE;
1363   osm->dm_subdomains            = PETSC_FALSE;
1364   osm->hierarchicalpartitioning = PETSC_FALSE;
1365 
1366   pc->data                 = (void*)osm;
1367   pc->ops->apply           = PCApply_GASM;
1368   pc->ops->matapply        = PCMatApply_GASM;
1369   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1370   pc->ops->setup           = PCSetUp_GASM;
1371   pc->ops->reset           = PCReset_GASM;
1372   pc->ops->destroy         = PCDestroy_GASM;
1373   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1374   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1375   pc->ops->view            = PCView_GASM;
1376   pc->ops->applyrichardson = NULL;
1377 
1378   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);CHKERRQ(ierr);
1379   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);CHKERRQ(ierr);
1380   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);CHKERRQ(ierr);
1381   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);CHKERRQ(ierr);
1382   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
PCGASMCreateLocalSubdomains(Mat A,PetscInt nloc,IS * iis[])1386 PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1387 {
1388   MatPartitioning mpart;
1389   const char      *prefix;
1390   PetscInt        i,j,rstart,rend,bs;
1391   PetscBool       hasop, isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1392   Mat             Ad     = NULL, adj;
1393   IS              ispart,isnumb,*is;
1394   PetscErrorCode  ierr;
1395 
1396   PetscFunctionBegin;
1397   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);
1398 
1399   /* Get prefix, row distribution, and block size */
1400   ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr);
1401   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1402   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
1403   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);
1404 
1405   /* Get diagonal block from matrix if possible */
1406   ierr = MatHasOperation(A,MATOP_GET_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1407   if (hasop) {
1408     ierr = MatGetDiagonalBlock(A,&Ad);CHKERRQ(ierr);
1409   }
1410   if (Ad) {
1411     ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
1412     if (!isbaij) {ierr = PetscObjectBaseTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);CHKERRQ(ierr);}
1413   }
1414   if (Ad && nloc > 1) {
1415     PetscBool  match,done;
1416     /* Try to setup a good matrix partitioning if available */
1417     ierr = MatPartitioningCreate(PETSC_COMM_SELF,&mpart);CHKERRQ(ierr);
1418     ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
1419     ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
1420     ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);CHKERRQ(ierr);
1421     if (!match) {
1422       ierr = PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);CHKERRQ(ierr);
1423     }
1424     if (!match) { /* assume a "good" partitioner is available */
1425       PetscInt       na;
1426       const PetscInt *ia,*ja;
1427       ierr = MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1428       if (done) {
1429         /* Build adjacency matrix by hand. Unfortunately a call to
1430            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1431            remove the block-aij structure and we cannot expect
1432            MatPartitioning to split vertices as we need */
1433         PetscInt       i,j,len,nnz,cnt,*iia=NULL,*jja=NULL;
1434         const PetscInt *row;
1435         nnz = 0;
1436         for (i=0; i<na; i++) { /* count number of nonzeros */
1437           len = ia[i+1] - ia[i];
1438           row = ja + ia[i];
1439           for (j=0; j<len; j++) {
1440             if (row[j] == i) { /* don't count diagonal */
1441               len--; break;
1442             }
1443           }
1444           nnz += len;
1445         }
1446         ierr   = PetscMalloc1(na+1,&iia);CHKERRQ(ierr);
1447         ierr   = PetscMalloc1(nnz,&jja);CHKERRQ(ierr);
1448         nnz    = 0;
1449         iia[0] = 0;
1450         for (i=0; i<na; i++) { /* fill adjacency */
1451           cnt = 0;
1452           len = ia[i+1] - ia[i];
1453           row = ja + ia[i];
1454           for (j=0; j<len; j++) {
1455             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1456           }
1457           nnz += cnt;
1458           iia[i+1] = nnz;
1459         }
1460         /* Partitioning of the adjacency matrix */
1461         ierr      = MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);CHKERRQ(ierr);
1462         ierr      = MatPartitioningSetAdjacency(mpart,adj);CHKERRQ(ierr);
1463         ierr      = MatPartitioningSetNParts(mpart,nloc);CHKERRQ(ierr);
1464         ierr      = MatPartitioningApply(mpart,&ispart);CHKERRQ(ierr);
1465         ierr      = ISPartitioningToNumbering(ispart,&isnumb);CHKERRQ(ierr);
1466         ierr      = MatDestroy(&adj);CHKERRQ(ierr);
1467         foundpart = PETSC_TRUE;
1468       }
1469       ierr = MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);CHKERRQ(ierr);
1470     }
1471     ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
1472   }
1473   ierr = PetscMalloc1(nloc,&is);CHKERRQ(ierr);
1474   if (!foundpart) {
1475 
1476     /* Partitioning by contiguous chunks of rows */
1477 
1478     PetscInt mbs   = (rend-rstart)/bs;
1479     PetscInt start = rstart;
1480     for (i=0; i<nloc; i++) {
1481       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1482       ierr   = ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);CHKERRQ(ierr);
1483       start += count;
1484     }
1485 
1486   } else {
1487 
1488     /* Partitioning by adjacency of diagonal block  */
1489 
1490     const PetscInt *numbering;
1491     PetscInt       *count,nidx,*indices,*newidx,start=0;
1492     /* Get node count in each partition */
1493     ierr = PetscMalloc1(nloc,&count);CHKERRQ(ierr);
1494     ierr = ISPartitioningCount(ispart,nloc,count);CHKERRQ(ierr);
1495     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1496       for (i=0; i<nloc; i++) count[i] *= bs;
1497     }
1498     /* Build indices from node numbering */
1499     ierr = ISGetLocalSize(isnumb,&nidx);CHKERRQ(ierr);
1500     ierr = PetscMalloc1(nidx,&indices);CHKERRQ(ierr);
1501     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1502     ierr = ISGetIndices(isnumb,&numbering);CHKERRQ(ierr);
1503     ierr = PetscSortIntWithPermutation(nidx,numbering,indices);CHKERRQ(ierr);
1504     ierr = ISRestoreIndices(isnumb,&numbering);CHKERRQ(ierr);
1505     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1506       ierr = PetscMalloc1(nidx*bs,&newidx);CHKERRQ(ierr);
1507       for (i=0; i<nidx; i++) {
1508         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1509       }
1510       ierr    = PetscFree(indices);CHKERRQ(ierr);
1511       nidx   *= bs;
1512       indices = newidx;
1513     }
1514     /* Shift to get global indices */
1515     for (i=0; i<nidx; i++) indices[i] += rstart;
1516 
1517     /* Build the index sets for each block */
1518     for (i=0; i<nloc; i++) {
1519       ierr   = ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);CHKERRQ(ierr);
1520       ierr   = ISSort(is[i]);CHKERRQ(ierr);
1521       start += count[i];
1522     }
1523 
1524     ierr = PetscFree(count);CHKERRQ(ierr);
1525     ierr = PetscFree(indices);CHKERRQ(ierr);
1526     ierr = ISDestroy(&isnumb);CHKERRQ(ierr);
1527     ierr = ISDestroy(&ispart);CHKERRQ(ierr);
1528   }
1529   *iis = is;
1530   PetscFunctionReturn(0);
1531 }
1532 
PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt * n,IS * iis[])1533 PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1534 {
1535   PetscErrorCode  ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatSubdomainsCreateCoalesce(A,N,n,iis);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 /*@C
1543    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1544    Schwarz preconditioner for a any problem based on its matrix.
1545 
1546    Collective
1547 
1548    Input Parameters:
1549 +  A       - The global matrix operator
1550 -  N       - the number of global subdomains requested
1551 
1552    Output Parameters:
1553 +  n   - the number of subdomains created on this processor
1554 -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)
1555 
1556    Level: advanced
1557 
1558    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1559          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1560          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1561          outer subdomains will be automatically generated from these according to the requested amount of
1562          overlap; this is currently supported only with local subdomains.
1563 
1564 
1565 .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1566 @*/
PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt * n,IS * iis[])1567 PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1568 {
1569   PetscMPIInt     size;
1570   PetscErrorCode  ierr;
1571 
1572   PetscFunctionBegin;
1573   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1574   PetscValidPointer(iis,4);
1575 
1576   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1577   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1578   if (N >= size) {
1579     *n = N/size + (N%size);
1580     ierr = PCGASMCreateLocalSubdomains(A,*n,iis);CHKERRQ(ierr);
1581   } else {
1582     ierr = PCGASMCreateStraddlingSubdomains(A,N,n,iis);CHKERRQ(ierr);
1583   }
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 /*@C
1588    PCGASMDestroySubdomains - Destroys the index sets created with
1589    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1590    called after setting subdomains with PCGASMSetSubdomains().
1591 
1592    Collective
1593 
1594    Input Parameters:
1595 +  n   - the number of index sets
1596 .  iis - the array of inner subdomains,
1597 -  ois - the array of outer subdomains, can be NULL
1598 
1599    Level: intermediate
1600 
1601    Notes:
1602     this is merely a convenience subroutine that walks each list,
1603    destroys each IS on the list, and then frees the list. At the end the
1604    list pointers are set to NULL.
1605 
1606 .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1607 @*/
PCGASMDestroySubdomains(PetscInt n,IS ** iis,IS ** ois)1608 PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1609 {
1610   PetscInt       i;
1611   PetscErrorCode ierr;
1612 
1613   PetscFunctionBegin;
1614   if (n <= 0) PetscFunctionReturn(0);
1615   if (ois) {
1616     PetscValidPointer(ois,3);
1617     if (*ois) {
1618       PetscValidPointer(*ois,3);
1619       for (i=0; i<n; i++) {
1620         ierr = ISDestroy(&(*ois)[i]);CHKERRQ(ierr);
1621       }
1622       ierr = PetscFree((*ois));CHKERRQ(ierr);
1623     }
1624   }
1625   if (iis) {
1626     PetscValidPointer(iis,2);
1627     if (*iis) {
1628       PetscValidPointer(*iis,2);
1629       for (i=0; i<n; i++) {
1630         ierr = ISDestroy(&(*iis)[i]);CHKERRQ(ierr);
1631       }
1632       ierr = PetscFree((*iis));CHKERRQ(ierr);
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1639   {                                                                                                       \
1640     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1641     /*                                                                                                    \
1642      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1643      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1644      subdomain).                                                                                          \
1645      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1646      of the intersection.                                                                                 \
1647     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1648     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1649     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1650     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1651     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1652     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1653     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1654     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1655     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1656     /* Now compute the size of the local subdomain n. */ \
1657     *n = 0;                                               \
1658     if (*ylow_loc < *yhigh_loc) {                           \
1659       PetscInt width = xright-xleft;                     \
1660       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1661       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1662       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1663     } \
1664   }
1665 
1666 /*@
1667    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1668    preconditioner for a two-dimensional problem on a regular grid.
1669 
1670    Collective
1671 
1672    Input Parameters:
1673 +  M, N               - the global number of grid points in the x and y directions
1674 .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1675 .  dof                - degrees of freedom per node
1676 -  overlap            - overlap in mesh lines
1677 
1678    Output Parameters:
1679 +  Nsub - the number of local subdomains created
1680 .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1681 -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains
1682 
1683 
1684    Level: advanced
1685 
1686 .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1687 @*/
PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt * nsub,IS ** iis,IS ** ois)1688 PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1689 {
1690   PetscErrorCode ierr;
1691   PetscMPIInt    size, rank;
1692   PetscInt       i, j;
1693   PetscInt       maxheight, maxwidth;
1694   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1695   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1696   PetscInt       x[2][2], y[2][2], n[2];
1697   PetscInt       first, last;
1698   PetscInt       nidx, *idx;
1699   PetscInt       ii,jj,s,q,d;
1700   PetscInt       k,kk;
1701   PetscMPIInt    color;
1702   MPI_Comm       comm, subcomm;
1703   IS             **xis = NULL, **is = ois, **is_local = iis;
1704 
1705   PetscFunctionBegin;
1706   ierr = PetscObjectGetComm((PetscObject)pc, &comm);CHKERRQ(ierr);
1707   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1708   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1709   ierr = MatGetOwnershipRange(pc->pmat, &first, &last);CHKERRQ(ierr);
1710   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1711                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);
1712 
1713   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1714   s      = 0;
1715   ystart = 0;
1716   for (j=0; j<Ndomains; ++j) {
1717     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1718     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1719     /* Vertical domain limits with an overlap. */
1720     ylow   = PetscMax(ystart - overlap,0);
1721     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1722     xstart = 0;
1723     for (i=0; i<Mdomains; ++i) {
1724       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1725       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1726       /* Horizontal domain limits with an overlap. */
1727       xleft  = PetscMax(xstart - overlap,0);
1728       xright = PetscMin(xstart + maxwidth + overlap,M);
1729       /*
1730          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1731       */
1732       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1733       if (nidx) ++s;
1734       xstart += maxwidth;
1735     } /* for (i = 0; i < Mdomains; ++i) */
1736     ystart += maxheight;
1737   } /* for (j = 0; j < Ndomains; ++j) */
1738 
1739   /* Now we can allocate the necessary number of ISs. */
1740   *nsub  = s;
1741   ierr   = PetscMalloc1(*nsub,is);CHKERRQ(ierr);
1742   ierr   = PetscMalloc1(*nsub,is_local);CHKERRQ(ierr);
1743   s      = 0;
1744   ystart = 0;
1745   for (j=0; j<Ndomains; ++j) {
1746     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1747     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1748     /* Vertical domain limits with an overlap. */
1749     y[0][0] = PetscMax(ystart - overlap,0);
1750     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1751     /* Vertical domain limits without an overlap. */
1752     y[1][0] = ystart;
1753     y[1][1] = PetscMin(ystart + maxheight,N);
1754     xstart  = 0;
1755     for (i=0; i<Mdomains; ++i) {
1756       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1757       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1758       /* Horizontal domain limits with an overlap. */
1759       x[0][0] = PetscMax(xstart - overlap,0);
1760       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1761       /* Horizontal domain limits without an overlap. */
1762       x[1][0] = xstart;
1763       x[1][1] = PetscMin(xstart+maxwidth,M);
1764       /*
1765          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1766          Do this twice: first for the domains with overlaps, and once without.
1767          During the first pass create the subcommunicators, and use them on the second pass as well.
1768       */
1769       for (q = 0; q < 2; ++q) {
1770         PetscBool split = PETSC_FALSE;
1771         /*
1772           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1773           according to whether the domain with an overlap or without is considered.
1774         */
1775         xleft = x[q][0]; xright = x[q][1];
1776         ylow  = y[q][0]; yhigh  = y[q][1];
1777         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1778         nidx *= dof;
1779         n[q]  = nidx;
1780         /*
1781          Based on the counted number of indices in the local domain *with an overlap*,
1782          construct a subcommunicator of all the processors supporting this domain.
1783          Observe that a domain with an overlap might have nontrivial local support,
1784          while the domain without an overlap might not.  Hence, the decision to participate
1785          in the subcommunicator must be based on the domain with an overlap.
1786          */
1787         if (q == 0) {
1788           if (nidx) color = 1;
1789           else color = MPI_UNDEFINED;
1790           ierr = MPI_Comm_split(comm, color, rank, &subcomm);CHKERRQ(ierr);
1791           split = PETSC_TRUE;
1792         }
1793         /*
1794          Proceed only if the number of local indices *with an overlap* is nonzero.
1795          */
1796         if (n[0]) {
1797           if (q == 0) xis = is;
1798           if (q == 1) {
1799             /*
1800              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1801              Moreover, if the overlap is zero, the two ISs are identical.
1802              */
1803             if (overlap == 0) {
1804               (*is_local)[s] = (*is)[s];
1805               ierr           = PetscObjectReference((PetscObject)(*is)[s]);CHKERRQ(ierr);
1806               continue;
1807             } else {
1808               xis     = is_local;
1809               subcomm = ((PetscObject)(*is)[s])->comm;
1810             }
1811           } /* if (q == 1) */
1812           idx  = NULL;
1813           ierr = PetscMalloc1(nidx,&idx);CHKERRQ(ierr);
1814           if (nidx) {
1815             k = 0;
1816             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1817               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1818               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1819               kk = dof*(M*jj + x0);
1820               for (ii=x0; ii<x1; ++ii) {
1821                 for (d = 0; d < dof; ++d) {
1822                   idx[k++] = kk++;
1823                 }
1824               }
1825             }
1826           }
1827           ierr = ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);CHKERRQ(ierr);
1828           if (split) {
1829             ierr = MPI_Comm_free(&subcomm);CHKERRQ(ierr);
1830           }
1831         }/* if (n[0]) */
1832       }/* for (q = 0; q < 2; ++q) */
1833       if (n[0]) ++s;
1834       xstart += maxwidth;
1835     } /* for (i = 0; i < Mdomains; ++i) */
1836     ystart += maxheight;
1837   } /* for (j = 0; j < Ndomains; ++j) */
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 /*@C
1842     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1843     for the additive Schwarz preconditioner.
1844 
1845     Not Collective
1846 
1847     Input Parameter:
1848 .   pc - the preconditioner context
1849 
1850     Output Parameters:
1851 +   n   - the number of subdomains for this processor (default value = 1)
1852 .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1853 -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)
1854 
1855 
1856     Notes:
1857     The user is responsible for destroying the ISs and freeing the returned arrays.
1858     The IS numbering is in the parallel, global numbering of the vector.
1859 
1860     Level: advanced
1861 
1862 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1863           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1864 @*/
PCGASMGetSubdomains(PC pc,PetscInt * n,IS * iis[],IS * ois[])1865 PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1866 {
1867   PC_GASM        *osm;
1868   PetscErrorCode ierr;
1869   PetscBool      match;
1870   PetscInt       i;
1871 
1872   PetscFunctionBegin;
1873   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1874   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1875   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1876   osm = (PC_GASM*)pc->data;
1877   if (n) *n = osm->n;
1878   if (iis) {
1879     ierr = PetscMalloc1(osm->n, iis);CHKERRQ(ierr);
1880   }
1881   if (ois) {
1882     ierr = PetscMalloc1(osm->n, ois);CHKERRQ(ierr);
1883   }
1884   if (iis || ois) {
1885     for (i = 0; i < osm->n; ++i) {
1886       if (iis) (*iis)[i] = osm->iis[i];
1887       if (ois) (*ois)[i] = osm->ois[i];
1888     }
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 /*@C
1894     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1895     only) for the additive Schwarz preconditioner.
1896 
1897     Not Collective
1898 
1899     Input Parameter:
1900 .   pc - the preconditioner context
1901 
1902     Output Parameters:
1903 +   n   - the number of matrices for this processor (default value = 1)
1904 -   mat - the matrices
1905 
1906     Notes:
1907     matrices returned by this routine have the same communicators as the index sets (IS)
1908            used to define subdomains in PCGASMSetSubdomains()
1909     Level: advanced
1910 
1911 .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1912           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1913 @*/
PCGASMGetSubmatrices(PC pc,PetscInt * n,Mat * mat[])1914 PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1915 {
1916   PC_GASM        *osm;
1917   PetscErrorCode ierr;
1918   PetscBool      match;
1919 
1920   PetscFunctionBegin;
1921   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1922   PetscValidIntPointer(n,2);
1923   if (mat) PetscValidPointer(mat,3);
1924   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUp() or PCSetUp().");
1925   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1926   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1927   osm = (PC_GASM*)pc->data;
1928   if (n) *n = osm->n;
1929   if (mat) *mat = osm->pmat;
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 /*@
1934     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1935     Logically Collective
1936 
1937     Input Parameter:
1938 +   pc  - the preconditioner
1939 -   flg - boolean indicating whether to use subdomains defined by the DM
1940 
1941     Options Database Key:
1942 .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains
1943 
1944     Level: intermediate
1945 
1946     Notes:
1947     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1948     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1949     automatically turns the latter off.
1950 
1951 .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1952           PCGASMCreateSubdomains2D()
1953 @*/
PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)1954 PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1955 {
1956   PC_GASM        *osm = (PC_GASM*)pc->data;
1957   PetscErrorCode ierr;
1958   PetscBool      match;
1959 
1960   PetscFunctionBegin;
1961   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1962   PetscValidLogicalCollectiveBool(pc,flg,2);
1963   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1964   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1965   if (match) {
1966     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1967       osm->dm_subdomains = flg;
1968     }
1969   }
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 /*@
1974     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1975     Not Collective
1976 
1977     Input Parameter:
1978 .   pc  - the preconditioner
1979 
1980     Output Parameter:
1981 .   flg - boolean indicating whether to use subdomains defined by the DM
1982 
1983     Level: intermediate
1984 
1985 .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
1986           PCGASMCreateSubdomains2D()
1987 @*/
PCGASMGetUseDMSubdomains(PC pc,PetscBool * flg)1988 PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
1989 {
1990   PC_GASM        *osm = (PC_GASM*)pc->data;
1991   PetscErrorCode ierr;
1992   PetscBool      match;
1993 
1994   PetscFunctionBegin;
1995   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1996   PetscValidBoolPointer(flg,2);
1997   ierr = PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);CHKERRQ(ierr);
1998   if (match) {
1999     if (flg) *flg = osm->dm_subdomains;
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003