1 #include <petsc/private/dmpleximpl.h>   /*I      "petscdmplex.h"   I*/
2 #include <petsc/private/partitionerimpl.h>
3 #include <petsc/private/hashseti.h>
4 
DMPlex_GlobalID(PetscInt point)5 PETSC_STATIC_INLINE PetscInt DMPlex_GlobalID(PetscInt point) { return point >= 0 ? point : -(point+1); }
6 
DMPlexCreatePartitionerGraph_Native(DM dm,PetscInt height,PetscInt * numVertices,PetscInt ** offsets,PetscInt ** adjacency,IS * globalNumbering)7 static PetscErrorCode DMPlexCreatePartitionerGraph_Native(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
8 {
9   PetscInt       dim, depth, p, pStart, pEnd, a, adjSize, idx, size;
10   PetscInt      *adj = NULL, *vOffsets = NULL, *graph = NULL;
11   IS             cellNumbering;
12   const PetscInt *cellNum;
13   PetscBool      useCone, useClosure;
14   PetscSection   section;
15   PetscSegBuffer adjBuffer;
16   PetscSF        sfPoint;
17   PetscInt       *adjCells = NULL, *remoteCells = NULL;
18   const PetscInt *local;
19   PetscInt       nroots, nleaves, l;
20   PetscMPIInt    rank;
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank);CHKERRQ(ierr);
25   ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
26   ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr);
27   if (dim != depth) {
28     /* We do not handle the uninterpolated case here */
29     ierr = DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency);CHKERRQ(ierr);
30     /* DMPlexCreateNeighborCSR does not make a numbering */
31     if (globalNumbering) {ierr = DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering);CHKERRQ(ierr);}
32     /* Different behavior for empty graphs */
33     if (!*numVertices) {
34       ierr = PetscMalloc1(1, offsets);CHKERRQ(ierr);
35       (*offsets)[0] = 0;
36     }
37     /* Broken in parallel */
38     if (rank && *numVertices) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
39     PetscFunctionReturn(0);
40   }
41   ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr);
42   ierr = DMPlexGetHeightStratum(dm, height, &pStart, &pEnd);CHKERRQ(ierr);
43   /* Build adjacency graph via a section/segbuffer */
44   ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), &section);CHKERRQ(ierr);
45   ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr);
46   ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr);
47   /* Always use FVM adjacency to create partitioner graph */
48   ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr);
49   ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr);
50   ierr = DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, sfPoint, &cellNumbering);CHKERRQ(ierr);
51   if (globalNumbering) {
52     ierr = PetscObjectReference((PetscObject)cellNumbering);CHKERRQ(ierr);
53     *globalNumbering = cellNumbering;
54   }
55   ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr);
56   /* For all boundary faces (including faces adjacent to a ghost cell), record the local cell in adjCells
57      Broadcast adjCells to remoteCells (to get cells from roots) and Reduce adjCells to remoteCells (to get cells from leaves)
58    */
59   ierr = PetscSFGetGraph(sfPoint, &nroots, &nleaves, &local, NULL);CHKERRQ(ierr);
60   if (nroots >= 0) {
61     PetscInt fStart, fEnd, f;
62 
63     ierr = PetscCalloc2(nroots, &adjCells, nroots, &remoteCells);CHKERRQ(ierr);
64     ierr = DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd);CHKERRQ(ierr);
65     for (l = 0; l < nroots; ++l) adjCells[l] = -3;
66     for (f = fStart; f < fEnd; ++f) {
67       const PetscInt *support;
68       PetscInt        supportSize;
69 
70       ierr = DMPlexGetSupport(dm, f, &support);CHKERRQ(ierr);
71       ierr = DMPlexGetSupportSize(dm, f, &supportSize);CHKERRQ(ierr);
72       if (supportSize == 1) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]]);
73       else if (supportSize == 2) {
74         ierr = PetscFindInt(support[0], nleaves, local, &p);CHKERRQ(ierr);
75         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[1]]);
76         ierr = PetscFindInt(support[1], nleaves, local, &p);CHKERRQ(ierr);
77         if (p >= 0) adjCells[f] = DMPlex_GlobalID(cellNum[support[0]]);
78       }
79       /* Handle non-conforming meshes */
80       if (supportSize > 2) {
81         PetscInt        numChildren, i;
82         const PetscInt *children;
83 
84         ierr = DMPlexGetTreeChildren(dm, f, &numChildren, &children);CHKERRQ(ierr);
85         for (i = 0; i < numChildren; ++i) {
86           const PetscInt child = children[i];
87           if (fStart <= child && child < fEnd) {
88             ierr = DMPlexGetSupport(dm, child, &support);CHKERRQ(ierr);
89             ierr = DMPlexGetSupportSize(dm, child, &supportSize);CHKERRQ(ierr);
90             if (supportSize == 1) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]]);
91             else if (supportSize == 2) {
92               ierr = PetscFindInt(support[0], nleaves, local, &p);CHKERRQ(ierr);
93               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[1]]);
94               ierr = PetscFindInt(support[1], nleaves, local, &p);CHKERRQ(ierr);
95               if (p >= 0) adjCells[child] = DMPlex_GlobalID(cellNum[support[0]]);
96             }
97           }
98         }
99       }
100     }
101     for (l = 0; l < nroots; ++l) remoteCells[l] = -1;
102     ierr = PetscSFBcastBegin(dm->sf, MPIU_INT, adjCells, remoteCells);CHKERRQ(ierr);
103     ierr = PetscSFBcastEnd(dm->sf, MPIU_INT, adjCells, remoteCells);CHKERRQ(ierr);
104     ierr = PetscSFReduceBegin(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX);CHKERRQ(ierr);
105     ierr = PetscSFReduceEnd(dm->sf, MPIU_INT, adjCells, remoteCells, MPI_MAX);CHKERRQ(ierr);
106   }
107   /* Combine local and global adjacencies */
108   for (*numVertices = 0, p = pStart; p < pEnd; p++) {
109     /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
110     if (nroots > 0) {if (cellNum[p] < 0) continue;}
111     /* Add remote cells */
112     if (remoteCells) {
113       const PetscInt gp = DMPlex_GlobalID(cellNum[p]);
114       PetscInt       coneSize, numChildren, c, i;
115       const PetscInt *cone, *children;
116 
117       ierr = DMPlexGetCone(dm, p, &cone);CHKERRQ(ierr);
118       ierr = DMPlexGetConeSize(dm, p, &coneSize);CHKERRQ(ierr);
119       for (c = 0; c < coneSize; ++c) {
120         const PetscInt point = cone[c];
121         if (remoteCells[point] >= 0 && remoteCells[point] != gp) {
122           PetscInt *PETSC_RESTRICT pBuf;
123           ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr);
124           ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr);
125           *pBuf = remoteCells[point];
126         }
127         /* Handle non-conforming meshes */
128         ierr = DMPlexGetTreeChildren(dm, point, &numChildren, &children);CHKERRQ(ierr);
129         for (i = 0; i < numChildren; ++i) {
130           const PetscInt child = children[i];
131           if (remoteCells[child] >= 0 && remoteCells[child] != gp) {
132             PetscInt *PETSC_RESTRICT pBuf;
133             ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr);
134             ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr);
135             *pBuf = remoteCells[child];
136           }
137         }
138       }
139     }
140     /* Add local cells */
141     adjSize = PETSC_DETERMINE;
142     ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr);
143     for (a = 0; a < adjSize; ++a) {
144       const PetscInt point = adj[a];
145       if (point != p && pStart <= point && point < pEnd) {
146         PetscInt *PETSC_RESTRICT pBuf;
147         ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr);
148         ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr);
149         *pBuf = DMPlex_GlobalID(cellNum[point]);
150       }
151     }
152     (*numVertices)++;
153   }
154   ierr = PetscFree(adj);CHKERRQ(ierr);
155   ierr = PetscFree2(adjCells, remoteCells);CHKERRQ(ierr);
156   ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr);
157 
158   /* Derive CSR graph from section/segbuffer */
159   ierr = PetscSectionSetUp(section);CHKERRQ(ierr);
160   ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr);
161   ierr = PetscMalloc1(*numVertices+1, &vOffsets);CHKERRQ(ierr);
162   for (idx = 0, p = pStart; p < pEnd; p++) {
163     if (nroots > 0) {if (cellNum[p] < 0) continue;}
164     ierr = PetscSectionGetOffset(section, p, &(vOffsets[idx++]));CHKERRQ(ierr);
165   }
166   vOffsets[*numVertices] = size;
167   ierr = PetscSegBufferExtractAlloc(adjBuffer, &graph);CHKERRQ(ierr);
168 
169   if (nroots >= 0) {
170     /* Filter out duplicate edges using section/segbuffer */
171     ierr = PetscSectionReset(section);CHKERRQ(ierr);
172     ierr = PetscSectionSetChart(section, 0, *numVertices);CHKERRQ(ierr);
173     for (p = 0; p < *numVertices; p++) {
174       PetscInt start = vOffsets[p], end = vOffsets[p+1];
175       PetscInt numEdges = end-start, *PETSC_RESTRICT edges;
176       ierr = PetscSortRemoveDupsInt(&numEdges, &graph[start]);CHKERRQ(ierr);
177       ierr = PetscSectionSetDof(section, p, numEdges);CHKERRQ(ierr);
178       ierr = PetscSegBufferGetInts(adjBuffer, numEdges, &edges);CHKERRQ(ierr);
179       ierr = PetscArraycpy(edges, &graph[start], numEdges);CHKERRQ(ierr);
180     }
181     ierr = PetscFree(vOffsets);CHKERRQ(ierr);
182     ierr = PetscFree(graph);CHKERRQ(ierr);
183     /* Derive CSR graph from section/segbuffer */
184     ierr = PetscSectionSetUp(section);CHKERRQ(ierr);
185     ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr);
186     ierr = PetscMalloc1(*numVertices+1, &vOffsets);CHKERRQ(ierr);
187     for (idx = 0, p = 0; p < *numVertices; p++) {
188       ierr = PetscSectionGetOffset(section, p, &(vOffsets[idx++]));CHKERRQ(ierr);
189     }
190     vOffsets[*numVertices] = size;
191     ierr = PetscSegBufferExtractAlloc(adjBuffer, &graph);CHKERRQ(ierr);
192   } else {
193     /* Sort adjacencies (not strictly necessary) */
194     for (p = 0; p < *numVertices; p++) {
195       PetscInt start = vOffsets[p], end = vOffsets[p+1];
196       ierr = PetscSortInt(end-start, &graph[start]);CHKERRQ(ierr);
197     }
198   }
199 
200   if (offsets) *offsets = vOffsets;
201   if (adjacency) *adjacency = graph;
202 
203   /* Cleanup */
204   ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr);
205   ierr = PetscSectionDestroy(&section);CHKERRQ(ierr);
206   ierr = ISRestoreIndices(cellNumbering, &cellNum);CHKERRQ(ierr);
207   ierr = ISDestroy(&cellNumbering);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
DMPlexCreatePartitionerGraph_ViaMat(DM dm,PetscInt height,PetscInt * numVertices,PetscInt ** offsets,PetscInt ** adjacency,IS * globalNumbering)211 static PetscErrorCode DMPlexCreatePartitionerGraph_ViaMat(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
212 {
213   Mat            conn, CSR;
214   IS             fis, cis, cis_own;
215   PetscSF        sfPoint;
216   const PetscInt *rows, *cols, *ii, *jj;
217   PetscInt       *idxs,*idxs2;
218   PetscInt       dim, depth, floc, cloc, i, M, N, c, lm, m, cStart, cEnd, fStart, fEnd;
219   PetscMPIInt    rank;
220   PetscBool      flg;
221   PetscErrorCode ierr;
222 
223   PetscFunctionBegin;
224   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank);CHKERRQ(ierr);
225   ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
226   ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr);
227   if (dim != depth) {
228     /* We do not handle the uninterpolated case here */
229     ierr = DMPlexCreateNeighborCSR(dm, height, numVertices, offsets, adjacency);CHKERRQ(ierr);
230     /* DMPlexCreateNeighborCSR does not make a numbering */
231     if (globalNumbering) {ierr = DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, globalNumbering);CHKERRQ(ierr);}
232     /* Different behavior for empty graphs */
233     if (!*numVertices) {
234       ierr = PetscMalloc1(1, offsets);CHKERRQ(ierr);
235       (*offsets)[0] = 0;
236     }
237     /* Broken in parallel */
238     if (rank && *numVertices) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Parallel partitioning of uninterpolated meshes not supported");
239     PetscFunctionReturn(0);
240   }
241   /* Interpolated and parallel case */
242 
243   /* numbering */
244   ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr);
245   ierr = DMPlexGetHeightStratum(dm, height, &cStart, &cEnd);CHKERRQ(ierr);
246   ierr = DMPlexGetHeightStratum(dm, height+1, &fStart, &fEnd);CHKERRQ(ierr);
247   ierr = DMPlexCreateNumbering_Plex(dm, cStart, cEnd, 0, &N, sfPoint, &cis);CHKERRQ(ierr);
248   ierr = DMPlexCreateNumbering_Plex(dm, fStart, fEnd, 0, &M, sfPoint, &fis);CHKERRQ(ierr);
249   if (globalNumbering) {
250     ierr = ISDuplicate(cis, globalNumbering);CHKERRQ(ierr);
251   }
252 
253   /* get positive global ids and local sizes for facets and cells */
254   ierr = ISGetLocalSize(fis, &m);CHKERRQ(ierr);
255   ierr = ISGetIndices(fis, &rows);CHKERRQ(ierr);
256   ierr = PetscMalloc1(m, &idxs);CHKERRQ(ierr);
257   for (i = 0, floc = 0; i < m; i++) {
258     const PetscInt p = rows[i];
259 
260     if (p < 0) {
261       idxs[i] = -(p+1);
262     } else {
263       idxs[i] = p;
264       floc   += 1;
265     }
266   }
267   ierr = ISRestoreIndices(fis, &rows);CHKERRQ(ierr);
268   ierr = ISDestroy(&fis);CHKERRQ(ierr);
269   ierr = ISCreateGeneral(PETSC_COMM_SELF, m, idxs, PETSC_OWN_POINTER, &fis);CHKERRQ(ierr);
270 
271   ierr = ISGetLocalSize(cis, &m);CHKERRQ(ierr);
272   ierr = ISGetIndices(cis, &cols);CHKERRQ(ierr);
273   ierr = PetscMalloc1(m, &idxs);CHKERRQ(ierr);
274   ierr = PetscMalloc1(m, &idxs2);CHKERRQ(ierr);
275   for (i = 0, cloc = 0; i < m; i++) {
276     const PetscInt p = cols[i];
277 
278     if (p < 0) {
279       idxs[i] = -(p+1);
280     } else {
281       idxs[i]       = p;
282       idxs2[cloc++] = p;
283     }
284   }
285   ierr = ISRestoreIndices(cis, &cols);CHKERRQ(ierr);
286   ierr = ISDestroy(&cis);CHKERRQ(ierr);
287   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm), m, idxs, PETSC_OWN_POINTER, &cis);CHKERRQ(ierr);
288   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)dm), cloc, idxs2, PETSC_OWN_POINTER, &cis_own);CHKERRQ(ierr);
289 
290   /* Create matrix to hold F-C connectivity (MatMatTranspose Mult not supported for MPIAIJ) */
291   ierr = MatCreate(PetscObjectComm((PetscObject)dm), &conn);CHKERRQ(ierr);
292   ierr = MatSetSizes(conn, floc, cloc, M, N);CHKERRQ(ierr);
293   ierr = MatSetType(conn, MATMPIAIJ);CHKERRQ(ierr);
294   ierr = DMPlexGetMaxSizes(dm, NULL, &lm);CHKERRQ(ierr);
295   ierr = MPI_Allreduce(&lm, &m, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject) dm));CHKERRQ(ierr);
296   ierr = MatMPIAIJSetPreallocation(conn, m, NULL, m, NULL);CHKERRQ(ierr);
297 
298   /* Assemble matrix */
299   ierr = ISGetIndices(fis, &rows);CHKERRQ(ierr);
300   ierr = ISGetIndices(cis, &cols);CHKERRQ(ierr);
301   for (c = cStart; c < cEnd; c++) {
302     const PetscInt *cone;
303     PetscInt        coneSize, row, col, f;
304 
305     col  = cols[c-cStart];
306     ierr = DMPlexGetCone(dm, c, &cone);CHKERRQ(ierr);
307     ierr = DMPlexGetConeSize(dm, c, &coneSize);CHKERRQ(ierr);
308     for (f = 0; f < coneSize; f++) {
309       const PetscScalar v = 1.0;
310       const PetscInt *children;
311       PetscInt        numChildren, ch;
312 
313       row  = rows[cone[f]-fStart];
314       ierr = MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES);CHKERRQ(ierr);
315 
316       /* non-conforming meshes */
317       ierr = DMPlexGetTreeChildren(dm, cone[f], &numChildren, &children);CHKERRQ(ierr);
318       for (ch = 0; ch < numChildren; ch++) {
319         const PetscInt child = children[ch];
320 
321         if (child < fStart || child >= fEnd) continue;
322         row  = rows[child-fStart];
323         ierr = MatSetValues(conn, 1, &row, 1, &col, &v, INSERT_VALUES);CHKERRQ(ierr);
324       }
325     }
326   }
327   ierr = ISRestoreIndices(fis, &rows);CHKERRQ(ierr);
328   ierr = ISRestoreIndices(cis, &cols);CHKERRQ(ierr);
329   ierr = ISDestroy(&fis);CHKERRQ(ierr);
330   ierr = ISDestroy(&cis);CHKERRQ(ierr);
331   ierr = MatAssemblyBegin(conn, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
332   ierr = MatAssemblyEnd(conn, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
333 
334   /* Get parallel CSR by doing conn^T * conn */
335   ierr = MatTransposeMatMult(conn, conn, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CSR);CHKERRQ(ierr);
336   ierr = MatDestroy(&conn);CHKERRQ(ierr);
337 
338   /* extract local part of the CSR */
339   ierr = MatMPIAIJGetLocalMat(CSR, MAT_INITIAL_MATRIX, &conn);CHKERRQ(ierr);
340   ierr = MatDestroy(&CSR);CHKERRQ(ierr);
341   ierr = MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg);CHKERRQ(ierr);
342   if (!flg) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
343 
344   /* get back requested output */
345   if (numVertices) *numVertices = m;
346   if (offsets) {
347     ierr = PetscCalloc1(m+1, &idxs);CHKERRQ(ierr);
348     for (i = 1; i < m+1; i++) idxs[i] = ii[i] - i; /* ParMetis does not like self-connectivity */
349     *offsets = idxs;
350   }
351   if (adjacency) {
352     ierr = PetscMalloc1(ii[m] - m, &idxs);CHKERRQ(ierr);
353     ierr = ISGetIndices(cis_own, &rows);CHKERRQ(ierr);
354     for (i = 0, c = 0; i < m; i++) {
355       PetscInt j, g = rows[i];
356 
357       for (j = ii[i]; j < ii[i+1]; j++) {
358         if (jj[j] == g) continue; /* again, self-connectivity */
359         idxs[c++] = jj[j];
360       }
361     }
362     if (c != ii[m] - m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %D != %D",c,ii[m]-m);
363     ierr = ISRestoreIndices(cis_own, &rows);CHKERRQ(ierr);
364     *adjacency = idxs;
365   }
366 
367   /* cleanup */
368   ierr = ISDestroy(&cis_own);CHKERRQ(ierr);
369   ierr = MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &m, &ii, &jj, &flg);CHKERRQ(ierr);
370   if (!flg) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "No IJ format");
371   ierr = MatDestroy(&conn);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 /*@C
376   DMPlexCreatePartitionerGraph - Create a CSR graph of point connections for the partitioner
377 
378   Input Parameters:
379 + dm      - The mesh DM dm
380 - height  - Height of the strata from which to construct the graph
381 
382   Output Parameter:
383 + numVertices     - Number of vertices in the graph
384 . offsets         - Point offsets in the graph
385 . adjacency       - Point connectivity in the graph
386 - globalNumbering - A map from the local cell numbering to the global numbering used in "adjacency".  Negative indicates that the cell is a duplicate from another process.
387 
388   The user can control the definition of adjacency for the mesh using DMSetAdjacency(). They should choose the combination appropriate for the function
389   representation on the mesh. If requested, globalNumbering needs to be destroyed by the caller; offsets and adjacency need to be freed with PetscFree().
390 
391   Level: developer
392 
393 .seealso: PetscPartitionerGetType(), PetscPartitionerCreate(), DMSetAdjacency()
394 @*/
DMPlexCreatePartitionerGraph(DM dm,PetscInt height,PetscInt * numVertices,PetscInt ** offsets,PetscInt ** adjacency,IS * globalNumbering)395 PetscErrorCode DMPlexCreatePartitionerGraph(DM dm, PetscInt height, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency, IS *globalNumbering)
396 {
397   PetscErrorCode ierr;
398   PetscBool      usemat = PETSC_FALSE;
399 
400   PetscFunctionBegin;
401   ierr = PetscOptionsGetBool(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-dm_plex_csr_via_mat", &usemat, NULL);CHKERRQ(ierr);
402   if (usemat) {
403     ierr = DMPlexCreatePartitionerGraph_ViaMat(dm, height, numVertices, offsets, adjacency, globalNumbering);CHKERRQ(ierr);
404   } else {
405     ierr = DMPlexCreatePartitionerGraph_Native(dm, height, numVertices, offsets, adjacency, globalNumbering);CHKERRQ(ierr);
406   }
407   PetscFunctionReturn(0);
408 }
409 
410 /*@C
411   DMPlexCreateNeighborCSR - Create a mesh graph (cell-cell adjacency) in parallel CSR format.
412 
413   Collective on DM
414 
415   Input Arguments:
416 + dm - The DMPlex
417 - cellHeight - The height of mesh points to treat as cells (default should be 0)
418 
419   Output Arguments:
420 + numVertices - The number of local vertices in the graph, or cells in the mesh.
421 . offsets     - The offset to the adjacency list for each cell
422 - adjacency   - The adjacency list for all cells
423 
424   Note: This is suitable for input to a mesh partitioner like ParMetis.
425 
426   Level: advanced
427 
428 .seealso: DMPlexCreate()
429 @*/
DMPlexCreateNeighborCSR(DM dm,PetscInt cellHeight,PetscInt * numVertices,PetscInt ** offsets,PetscInt ** adjacency)430 PetscErrorCode DMPlexCreateNeighborCSR(DM dm, PetscInt cellHeight, PetscInt *numVertices, PetscInt **offsets, PetscInt **adjacency)
431 {
432   const PetscInt maxFaceCases = 30;
433   PetscInt       numFaceCases = 0;
434   PetscInt       numFaceVertices[30]; /* maxFaceCases, C89 sucks sucks sucks */
435   PetscInt      *off, *adj;
436   PetscInt      *neighborCells = NULL;
437   PetscInt       dim, cellDim, depth = 0, faceDepth, cStart, cEnd, c, numCells, cell;
438   PetscErrorCode ierr;
439 
440   PetscFunctionBegin;
441   /* For parallel partitioning, I think you have to communicate supports */
442   ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
443   cellDim = dim - cellHeight;
444   ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr);
445   ierr = DMPlexGetHeightStratum(dm, cellHeight, &cStart, &cEnd);CHKERRQ(ierr);
446   if (cEnd - cStart == 0) {
447     if (numVertices) *numVertices = 0;
448     if (offsets)   *offsets   = NULL;
449     if (adjacency) *adjacency = NULL;
450     PetscFunctionReturn(0);
451   }
452   numCells  = cEnd - cStart;
453   faceDepth = depth - cellHeight;
454   if (dim == depth) {
455     PetscInt f, fStart, fEnd;
456 
457     ierr = PetscCalloc1(numCells+1, &off);CHKERRQ(ierr);
458     /* Count neighboring cells */
459     ierr = DMPlexGetHeightStratum(dm, cellHeight+1, &fStart, &fEnd);CHKERRQ(ierr);
460     for (f = fStart; f < fEnd; ++f) {
461       const PetscInt *support;
462       PetscInt        supportSize;
463       ierr = DMPlexGetSupportSize(dm, f, &supportSize);CHKERRQ(ierr);
464       ierr = DMPlexGetSupport(dm, f, &support);CHKERRQ(ierr);
465       if (supportSize == 2) {
466         PetscInt numChildren;
467 
468         ierr = DMPlexGetTreeChildren(dm,f,&numChildren,NULL);CHKERRQ(ierr);
469         if (!numChildren) {
470           ++off[support[0]-cStart+1];
471           ++off[support[1]-cStart+1];
472         }
473       }
474     }
475     /* Prefix sum */
476     for (c = 1; c <= numCells; ++c) off[c] += off[c-1];
477     if (adjacency) {
478       PetscInt *tmp;
479 
480       ierr = PetscMalloc1(off[numCells], &adj);CHKERRQ(ierr);
481       ierr = PetscMalloc1(numCells+1, &tmp);CHKERRQ(ierr);
482       ierr = PetscArraycpy(tmp, off, numCells+1);CHKERRQ(ierr);
483       /* Get neighboring cells */
484       for (f = fStart; f < fEnd; ++f) {
485         const PetscInt *support;
486         PetscInt        supportSize;
487         ierr = DMPlexGetSupportSize(dm, f, &supportSize);CHKERRQ(ierr);
488         ierr = DMPlexGetSupport(dm, f, &support);CHKERRQ(ierr);
489         if (supportSize == 2) {
490           PetscInt numChildren;
491 
492           ierr = DMPlexGetTreeChildren(dm,f,&numChildren,NULL);CHKERRQ(ierr);
493           if (!numChildren) {
494             adj[tmp[support[0]-cStart]++] = support[1];
495             adj[tmp[support[1]-cStart]++] = support[0];
496           }
497         }
498       }
499       if (PetscDefined(USE_DEBUG)) {
500         for (c = 0; c < cEnd-cStart; ++c) if (tmp[c] != off[c+1]) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Offset %d != %d for cell %d", tmp[c], off[c], c+cStart);
501       }
502       ierr = PetscFree(tmp);CHKERRQ(ierr);
503     }
504     if (numVertices) *numVertices = numCells;
505     if (offsets)   *offsets   = off;
506     if (adjacency) *adjacency = adj;
507     PetscFunctionReturn(0);
508   }
509   /* Setup face recognition */
510   if (faceDepth == 1) {
511     PetscInt cornersSeen[30] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; /* Could use PetscBT */
512 
513     for (c = cStart; c < cEnd; ++c) {
514       PetscInt corners;
515 
516       ierr = DMPlexGetConeSize(dm, c, &corners);CHKERRQ(ierr);
517       if (!cornersSeen[corners]) {
518         PetscInt nFV;
519 
520         if (numFaceCases >= maxFaceCases) SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Exceeded maximum number of face recognition cases");
521         cornersSeen[corners] = 1;
522 
523         ierr = DMPlexGetNumFaceVertices(dm, cellDim, corners, &nFV);CHKERRQ(ierr);
524 
525         numFaceVertices[numFaceCases++] = nFV;
526       }
527     }
528   }
529   ierr = PetscCalloc1(numCells+1, &off);CHKERRQ(ierr);
530   /* Count neighboring cells */
531   for (cell = cStart; cell < cEnd; ++cell) {
532     PetscInt numNeighbors = PETSC_DETERMINE, n;
533 
534     ierr = DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells);CHKERRQ(ierr);
535     /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
536     for (n = 0; n < numNeighbors; ++n) {
537       PetscInt        cellPair[2];
538       PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
539       PetscInt        meetSize = 0;
540       const PetscInt *meet    = NULL;
541 
542       cellPair[0] = cell; cellPair[1] = neighborCells[n];
543       if (cellPair[0] == cellPair[1]) continue;
544       if (!found) {
545         ierr = DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet);CHKERRQ(ierr);
546         if (meetSize) {
547           PetscInt f;
548 
549           for (f = 0; f < numFaceCases; ++f) {
550             if (numFaceVertices[f] == meetSize) {
551               found = PETSC_TRUE;
552               break;
553             }
554           }
555         }
556         ierr = DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet);CHKERRQ(ierr);
557       }
558       if (found) ++off[cell-cStart+1];
559     }
560   }
561   /* Prefix sum */
562   for (cell = 1; cell <= numCells; ++cell) off[cell] += off[cell-1];
563 
564   if (adjacency) {
565     ierr = PetscMalloc1(off[numCells], &adj);CHKERRQ(ierr);
566     /* Get neighboring cells */
567     for (cell = cStart; cell < cEnd; ++cell) {
568       PetscInt numNeighbors = PETSC_DETERMINE, n;
569       PetscInt cellOffset   = 0;
570 
571       ierr = DMPlexGetAdjacency_Internal(dm, cell, PETSC_TRUE, PETSC_FALSE, PETSC_FALSE, &numNeighbors, &neighborCells);CHKERRQ(ierr);
572       /* Get meet with each cell, and check with recognizer (could optimize to check each pair only once) */
573       for (n = 0; n < numNeighbors; ++n) {
574         PetscInt        cellPair[2];
575         PetscBool       found    = faceDepth > 1 ? PETSC_TRUE : PETSC_FALSE;
576         PetscInt        meetSize = 0;
577         const PetscInt *meet    = NULL;
578 
579         cellPair[0] = cell; cellPair[1] = neighborCells[n];
580         if (cellPair[0] == cellPair[1]) continue;
581         if (!found) {
582           ierr = DMPlexGetMeet(dm, 2, cellPair, &meetSize, &meet);CHKERRQ(ierr);
583           if (meetSize) {
584             PetscInt f;
585 
586             for (f = 0; f < numFaceCases; ++f) {
587               if (numFaceVertices[f] == meetSize) {
588                 found = PETSC_TRUE;
589                 break;
590               }
591             }
592           }
593           ierr = DMPlexRestoreMeet(dm, 2, cellPair, &meetSize, &meet);CHKERRQ(ierr);
594         }
595         if (found) {
596           adj[off[cell-cStart]+cellOffset] = neighborCells[n];
597           ++cellOffset;
598         }
599       }
600     }
601   }
602   ierr = PetscFree(neighborCells);CHKERRQ(ierr);
603   if (numVertices) *numVertices = numCells;
604   if (offsets)   *offsets   = off;
605   if (adjacency) *adjacency = adj;
606   PetscFunctionReturn(0);
607 }
608 
609 /*@
610   PetscPartitionerDMPlexPartition - Create a non-overlapping partition of the cells in the mesh
611 
612   Collective on PetscPartitioner
613 
614   Input Parameters:
615 + part    - The PetscPartitioner
616 . targetSection - The PetscSection describing the absolute weight of each partition (can be NULL)
617 - dm      - The mesh DM
618 
619   Output Parameters:
620 + partSection     - The PetscSection giving the division of points by partition
621 - partition       - The list of points by partition
622 
623   Notes:
624     If the DM has a local section associated, each point to be partitioned will be weighted by the total number of dofs identified
625     by the section in the transitive closure of the point.
626 
627   Level: developer
628 
629 .seealso DMPlexDistribute(), PetscPartitionerCreate(), PetscSectionCreate(), PetscSectionSetChart(), PetscPartitionerPartition()
630 @*/
PetscPartitionerDMPlexPartition(PetscPartitioner part,DM dm,PetscSection targetSection,PetscSection partSection,IS * partition)631 PetscErrorCode PetscPartitionerDMPlexPartition(PetscPartitioner part, DM dm, PetscSection targetSection, PetscSection partSection, IS *partition)
632 {
633   PetscMPIInt    size;
634   PetscBool      isplex;
635   PetscErrorCode ierr;
636   PetscSection   vertSection = NULL;
637 
638   PetscFunctionBegin;
639   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 1);
640   PetscValidHeaderSpecific(dm, DM_CLASSID, 2);
641   if (targetSection) PetscValidHeaderSpecific(targetSection, PETSC_SECTION_CLASSID, 3);
642   PetscValidHeaderSpecific(partSection, PETSC_SECTION_CLASSID, 4);
643   PetscValidPointer(partition, 5);
644   ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr);
645   if (!isplex) SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Not for type %s",((PetscObject)dm)->type_name);
646   ierr = MPI_Comm_size(PetscObjectComm((PetscObject) part), &size);CHKERRQ(ierr);
647   if (size == 1) {
648     PetscInt *points;
649     PetscInt  cStart, cEnd, c;
650 
651     ierr = DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd);CHKERRQ(ierr);
652     ierr = PetscSectionReset(partSection);CHKERRQ(ierr);
653     ierr = PetscSectionSetChart(partSection, 0, size);CHKERRQ(ierr);
654     ierr = PetscSectionSetDof(partSection, 0, cEnd-cStart);CHKERRQ(ierr);
655     ierr = PetscSectionSetUp(partSection);CHKERRQ(ierr);
656     ierr = PetscMalloc1(cEnd-cStart, &points);CHKERRQ(ierr);
657     for (c = cStart; c < cEnd; ++c) points[c] = c;
658     ierr = ISCreateGeneral(PetscObjectComm((PetscObject) part), cEnd-cStart, points, PETSC_OWN_POINTER, partition);CHKERRQ(ierr);
659     PetscFunctionReturn(0);
660   }
661   if (part->height == 0) {
662     PetscInt numVertices = 0;
663     PetscInt *start     = NULL;
664     PetscInt *adjacency = NULL;
665     IS       globalNumbering;
666 
667     if (!part->noGraph || part->viewGraph) {
668       ierr = DMPlexCreatePartitionerGraph(dm, part->height, &numVertices, &start, &adjacency, &globalNumbering);CHKERRQ(ierr);
669     } else { /* only compute the number of owned local vertices */
670       const PetscInt *idxs;
671       PetscInt       p, pStart, pEnd;
672 
673       ierr = DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd);CHKERRQ(ierr);
674       ierr = DMPlexCreateNumbering_Plex(dm, pStart, pEnd, 0, NULL, dm->sf, &globalNumbering);CHKERRQ(ierr);
675       ierr = ISGetIndices(globalNumbering, &idxs);CHKERRQ(ierr);
676       for (p = 0; p < pEnd - pStart; p++) numVertices += idxs[p] < 0 ? 0 : 1;
677       ierr = ISRestoreIndices(globalNumbering, &idxs);CHKERRQ(ierr);
678     }
679     if (part->usevwgt) {
680       PetscSection   section = dm->localSection, clSection = NULL;
681       IS             clPoints = NULL;
682       const PetscInt *gid,*clIdx;
683       PetscInt       v, p, pStart, pEnd;
684 
685       /* dm->localSection encodes degrees of freedom per point, not per cell. We need to get the closure index to properly specify cell weights (aka dofs) */
686       /* We do this only if the local section has been set */
687       if (section) {
688         ierr = PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, NULL);CHKERRQ(ierr);
689         if (!clSection) {
690           ierr = DMPlexCreateClosureIndex(dm,NULL);CHKERRQ(ierr);
691         }
692         ierr = PetscSectionGetClosureIndex(section, (PetscObject)dm, &clSection, &clPoints);CHKERRQ(ierr);
693         ierr = ISGetIndices(clPoints,&clIdx);CHKERRQ(ierr);
694       }
695       ierr = DMPlexGetHeightStratum(dm, part->height, &pStart, &pEnd);CHKERRQ(ierr);
696       ierr = PetscSectionCreate(PETSC_COMM_SELF, &vertSection);CHKERRQ(ierr);
697       ierr = PetscSectionSetChart(vertSection, 0, numVertices);CHKERRQ(ierr);
698       if (globalNumbering) {
699         ierr = ISGetIndices(globalNumbering,&gid);CHKERRQ(ierr);
700       } else gid = NULL;
701       for (p = pStart, v = 0; p < pEnd; ++p) {
702         PetscInt dof = 1;
703 
704         /* skip cells in the overlap */
705         if (gid && gid[p-pStart] < 0) continue;
706 
707         if (section) {
708           PetscInt cl, clSize, clOff;
709 
710           dof  = 0;
711           ierr = PetscSectionGetDof(clSection, p, &clSize);CHKERRQ(ierr);
712           ierr = PetscSectionGetOffset(clSection, p, &clOff);CHKERRQ(ierr);
713           for (cl = 0; cl < clSize; cl+=2) {
714             PetscInt clDof, clPoint = clIdx[clOff + cl]; /* odd indices are reserved for orientations */
715 
716             ierr = PetscSectionGetDof(section, clPoint, &clDof);CHKERRQ(ierr);
717             dof += clDof;
718           }
719         }
720         if (!dof) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Number of dofs for point %D in the local section should be positive",p);
721         ierr = PetscSectionSetDof(vertSection, v, dof);CHKERRQ(ierr);
722         v++;
723       }
724       if (globalNumbering) {
725         ierr = ISRestoreIndices(globalNumbering,&gid);CHKERRQ(ierr);
726       }
727       if (clPoints) {
728         ierr = ISRestoreIndices(clPoints,&clIdx);CHKERRQ(ierr);
729       }
730       ierr = PetscSectionSetUp(vertSection);CHKERRQ(ierr);
731     }
732     ierr = PetscPartitionerPartition(part, size, numVertices, start, adjacency, vertSection, targetSection, partSection, partition);CHKERRQ(ierr);
733     ierr = PetscFree(start);CHKERRQ(ierr);
734     ierr = PetscFree(adjacency);CHKERRQ(ierr);
735     if (globalNumbering) { /* partition is wrt global unique numbering: change this to be wrt local numbering */
736       const PetscInt *globalNum;
737       const PetscInt *partIdx;
738       PetscInt       *map, cStart, cEnd;
739       PetscInt       *adjusted, i, localSize, offset;
740       IS             newPartition;
741 
742       ierr = ISGetLocalSize(*partition,&localSize);CHKERRQ(ierr);
743       ierr = PetscMalloc1(localSize,&adjusted);CHKERRQ(ierr);
744       ierr = ISGetIndices(globalNumbering,&globalNum);CHKERRQ(ierr);
745       ierr = ISGetIndices(*partition,&partIdx);CHKERRQ(ierr);
746       ierr = PetscMalloc1(localSize,&map);CHKERRQ(ierr);
747       ierr = DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd);CHKERRQ(ierr);
748       for (i = cStart, offset = 0; i < cEnd; i++) {
749         if (globalNum[i - cStart] >= 0) map[offset++] = i;
750       }
751       for (i = 0; i < localSize; i++) {
752         adjusted[i] = map[partIdx[i]];
753       }
754       ierr = PetscFree(map);CHKERRQ(ierr);
755       ierr = ISRestoreIndices(*partition,&partIdx);CHKERRQ(ierr);
756       ierr = ISRestoreIndices(globalNumbering,&globalNum);CHKERRQ(ierr);
757       ierr = ISCreateGeneral(PETSC_COMM_SELF,localSize,adjusted,PETSC_OWN_POINTER,&newPartition);CHKERRQ(ierr);
758       ierr = ISDestroy(&globalNumbering);CHKERRQ(ierr);
759       ierr = ISDestroy(partition);CHKERRQ(ierr);
760       *partition = newPartition;
761     }
762   } else SETERRQ1(PetscObjectComm((PetscObject) part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid height %D for points to partition", part->height);
763   ierr = PetscSectionDestroy(&vertSection);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 /*@
768   DMPlexGetPartitioner - Get the mesh partitioner
769 
770   Not collective
771 
772   Input Parameter:
773 . dm - The DM
774 
775   Output Parameter:
776 . part - The PetscPartitioner
777 
778   Level: developer
779 
780   Note: This gets a borrowed reference, so the user should not destroy this PetscPartitioner.
781 
782 .seealso DMPlexDistribute(), DMPlexSetPartitioner(), PetscPartitionerDMPlexPartition(), PetscPartitionerCreate()
783 @*/
DMPlexGetPartitioner(DM dm,PetscPartitioner * part)784 PetscErrorCode DMPlexGetPartitioner(DM dm, PetscPartitioner *part)
785 {
786   DM_Plex       *mesh = (DM_Plex *) dm->data;
787 
788   PetscFunctionBegin;
789   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
790   PetscValidPointer(part, 2);
791   *part = mesh->partitioner;
792   PetscFunctionReturn(0);
793 }
794 
795 /*@
796   DMPlexSetPartitioner - Set the mesh partitioner
797 
798   logically collective on DM
799 
800   Input Parameters:
801 + dm - The DM
802 - part - The partitioner
803 
804   Level: developer
805 
806   Note: Any existing PetscPartitioner will be destroyed.
807 
808 .seealso DMPlexDistribute(), DMPlexGetPartitioner(), PetscPartitionerCreate()
809 @*/
DMPlexSetPartitioner(DM dm,PetscPartitioner part)810 PetscErrorCode DMPlexSetPartitioner(DM dm, PetscPartitioner part)
811 {
812   DM_Plex       *mesh = (DM_Plex *) dm->data;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816   PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
817   PetscValidHeaderSpecific(part, PETSCPARTITIONER_CLASSID, 2);
818   ierr = PetscObjectReference((PetscObject)part);CHKERRQ(ierr);
819   ierr = PetscPartitionerDestroy(&mesh->partitioner);CHKERRQ(ierr);
820   mesh->partitioner = part;
821   PetscFunctionReturn(0);
822 }
823 
DMPlexAddClosure_Private(DM dm,PetscHSetI ht,PetscInt point)824 static PetscErrorCode DMPlexAddClosure_Private(DM dm, PetscHSetI ht, PetscInt point)
825 {
826   const PetscInt *cone;
827   PetscInt       coneSize, c;
828   PetscBool      missing;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBeginHot;
832   ierr = PetscHSetIQueryAdd(ht, point, &missing);CHKERRQ(ierr);
833   if (missing) {
834     ierr = DMPlexGetCone(dm, point, &cone);CHKERRQ(ierr);
835     ierr = DMPlexGetConeSize(dm, point, &coneSize);CHKERRQ(ierr);
836     for (c = 0; c < coneSize; c++) {
837       ierr = DMPlexAddClosure_Private(dm, ht, cone[c]);CHKERRQ(ierr);
838     }
839   }
840   PetscFunctionReturn(0);
841 }
842 
DMPlexAddClosure_Tree(DM dm,PetscHSetI ht,PetscInt point,PetscBool up,PetscBool down)843 PETSC_UNUSED static PetscErrorCode DMPlexAddClosure_Tree(DM dm, PetscHSetI ht, PetscInt point, PetscBool up, PetscBool down)
844 {
845   PetscErrorCode ierr;
846 
847   PetscFunctionBegin;
848   if (up) {
849     PetscInt parent;
850 
851     ierr = DMPlexGetTreeParent(dm,point,&parent,NULL);CHKERRQ(ierr);
852     if (parent != point) {
853       PetscInt closureSize, *closure = NULL, i;
854 
855       ierr = DMPlexGetTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure);CHKERRQ(ierr);
856       for (i = 0; i < closureSize; i++) {
857         PetscInt cpoint = closure[2*i];
858 
859         ierr = PetscHSetIAdd(ht, cpoint);CHKERRQ(ierr);
860         ierr = DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_TRUE,PETSC_FALSE);CHKERRQ(ierr);
861       }
862       ierr = DMPlexRestoreTransitiveClosure(dm,parent,PETSC_TRUE,&closureSize,&closure);CHKERRQ(ierr);
863     }
864   }
865   if (down) {
866     PetscInt numChildren;
867     const PetscInt *children;
868 
869     ierr = DMPlexGetTreeChildren(dm,point,&numChildren,&children);CHKERRQ(ierr);
870     if (numChildren) {
871       PetscInt i;
872 
873       for (i = 0; i < numChildren; i++) {
874         PetscInt cpoint = children[i];
875 
876         ierr = PetscHSetIAdd(ht, cpoint);CHKERRQ(ierr);
877         ierr = DMPlexAddClosure_Tree(dm,ht,cpoint,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
878       }
879     }
880   }
881   PetscFunctionReturn(0);
882 }
883 
DMPlexAddClosureTree_Up_Private(DM dm,PetscHSetI ht,PetscInt point)884 static PetscErrorCode DMPlexAddClosureTree_Up_Private(DM dm, PetscHSetI ht, PetscInt point)
885 {
886   PetscInt       parent;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBeginHot;
890   ierr = DMPlexGetTreeParent(dm, point, &parent,NULL);CHKERRQ(ierr);
891   if (point != parent) {
892     const PetscInt *cone;
893     PetscInt       coneSize, c;
894 
895     ierr = DMPlexAddClosureTree_Up_Private(dm, ht, parent);CHKERRQ(ierr);
896     ierr = DMPlexAddClosure_Private(dm, ht, parent);CHKERRQ(ierr);
897     ierr = DMPlexGetCone(dm, parent, &cone);CHKERRQ(ierr);
898     ierr = DMPlexGetConeSize(dm, parent, &coneSize);CHKERRQ(ierr);
899     for (c = 0; c < coneSize; c++) {
900       const PetscInt cp = cone[c];
901 
902       ierr = DMPlexAddClosureTree_Up_Private(dm, ht, cp);CHKERRQ(ierr);
903     }
904   }
905   PetscFunctionReturn(0);
906 }
907 
DMPlexAddClosureTree_Down_Private(DM dm,PetscHSetI ht,PetscInt point)908 static PetscErrorCode DMPlexAddClosureTree_Down_Private(DM dm, PetscHSetI ht, PetscInt point)
909 {
910   PetscInt       i, numChildren;
911   const PetscInt *children;
912   PetscErrorCode ierr;
913 
914   PetscFunctionBeginHot;
915   ierr = DMPlexGetTreeChildren(dm, point, &numChildren, &children);CHKERRQ(ierr);
916   for (i = 0; i < numChildren; i++) {
917     ierr = PetscHSetIAdd(ht, children[i]);CHKERRQ(ierr);
918   }
919   PetscFunctionReturn(0);
920 }
921 
DMPlexAddClosureTree_Private(DM dm,PetscHSetI ht,PetscInt point)922 static PetscErrorCode DMPlexAddClosureTree_Private(DM dm, PetscHSetI ht, PetscInt point)
923 {
924   const PetscInt *cone;
925   PetscInt       coneSize, c;
926   PetscErrorCode ierr;
927 
928   PetscFunctionBeginHot;
929   ierr = PetscHSetIAdd(ht, point);CHKERRQ(ierr);
930   ierr = DMPlexAddClosureTree_Up_Private(dm, ht, point);CHKERRQ(ierr);
931   ierr = DMPlexAddClosureTree_Down_Private(dm, ht, point);CHKERRQ(ierr);
932   ierr = DMPlexGetCone(dm, point, &cone);CHKERRQ(ierr);
933   ierr = DMPlexGetConeSize(dm, point, &coneSize);CHKERRQ(ierr);
934   for (c = 0; c < coneSize; c++) {
935     ierr = DMPlexAddClosureTree_Private(dm, ht, cone[c]);CHKERRQ(ierr);
936   }
937   PetscFunctionReturn(0);
938 }
939 
DMPlexClosurePoints_Private(DM dm,PetscInt numPoints,const PetscInt points[],IS * closureIS)940 PetscErrorCode DMPlexClosurePoints_Private(DM dm, PetscInt numPoints, const PetscInt points[], IS *closureIS)
941 {
942   DM_Plex         *mesh = (DM_Plex *)dm->data;
943   const PetscBool hasTree = (mesh->parentSection || mesh->childSection) ? PETSC_TRUE : PETSC_FALSE;
944   PetscInt        nelems, *elems, off = 0, p;
945   PetscHSetI      ht;
946   PetscErrorCode  ierr;
947 
948   PetscFunctionBegin;
949   ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
950   ierr = PetscHSetIResize(ht, numPoints*16);CHKERRQ(ierr);
951   if (!hasTree) {
952     for (p = 0; p < numPoints; ++p) {
953       ierr = DMPlexAddClosure_Private(dm, ht, points[p]);CHKERRQ(ierr);
954     }
955   } else {
956 #if 1
957     for (p = 0; p < numPoints; ++p) {
958       ierr = DMPlexAddClosureTree_Private(dm, ht, points[p]);CHKERRQ(ierr);
959     }
960 #else
961     PetscInt  *closure = NULL, closureSize, c;
962     for (p = 0; p < numPoints; ++p) {
963       ierr = DMPlexGetTransitiveClosure(dm, points[p], PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr);
964       for (c = 0; c < closureSize*2; c += 2) {
965         ierr = PetscHSetIAdd(ht, closure[c]);CHKERRQ(ierr);
966         if (hasTree) {ierr = DMPlexAddClosure_Tree(dm, ht, closure[c], PETSC_TRUE, PETSC_TRUE);CHKERRQ(ierr);}
967       }
968     }
969     if (closure) {ierr = DMPlexRestoreTransitiveClosure(dm, 0, PETSC_TRUE, NULL, &closure);CHKERRQ(ierr);}
970 #endif
971   }
972   ierr = PetscHSetIGetSize(ht, &nelems);CHKERRQ(ierr);
973   ierr = PetscMalloc1(nelems, &elems);CHKERRQ(ierr);
974   ierr = PetscHSetIGetElems(ht, &off, elems);CHKERRQ(ierr);
975   ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
976   ierr = PetscSortInt(nelems, elems);CHKERRQ(ierr);
977   ierr = ISCreateGeneral(PETSC_COMM_SELF, nelems, elems, PETSC_OWN_POINTER, closureIS);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 /*@
982   DMPlexPartitionLabelClosure - Add the closure of all points to the partition label
983 
984   Input Parameters:
985 + dm     - The DM
986 - label  - DMLabel assinging ranks to remote roots
987 
988   Level: developer
989 
990 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
991 @*/
DMPlexPartitionLabelClosure(DM dm,DMLabel label)992 PetscErrorCode DMPlexPartitionLabelClosure(DM dm, DMLabel label)
993 {
994   IS              rankIS,   pointIS, closureIS;
995   const PetscInt *ranks,   *points;
996   PetscInt        numRanks, numPoints, r;
997   PetscErrorCode  ierr;
998 
999   PetscFunctionBegin;
1000   ierr = DMLabelGetValueIS(label, &rankIS);CHKERRQ(ierr);
1001   ierr = ISGetLocalSize(rankIS, &numRanks);CHKERRQ(ierr);
1002   ierr = ISGetIndices(rankIS, &ranks);CHKERRQ(ierr);
1003   for (r = 0; r < numRanks; ++r) {
1004     const PetscInt rank = ranks[r];
1005     ierr = DMLabelGetStratumIS(label, rank, &pointIS);CHKERRQ(ierr);
1006     ierr = ISGetLocalSize(pointIS, &numPoints);CHKERRQ(ierr);
1007     ierr = ISGetIndices(pointIS, &points);CHKERRQ(ierr);
1008     ierr = DMPlexClosurePoints_Private(dm, numPoints, points, &closureIS);CHKERRQ(ierr);
1009     ierr = ISRestoreIndices(pointIS, &points);CHKERRQ(ierr);
1010     ierr = ISDestroy(&pointIS);CHKERRQ(ierr);
1011     ierr = DMLabelSetStratumIS(label, rank, closureIS);CHKERRQ(ierr);
1012     ierr = ISDestroy(&closureIS);CHKERRQ(ierr);
1013   }
1014   ierr = ISRestoreIndices(rankIS, &ranks);CHKERRQ(ierr);
1015   ierr = ISDestroy(&rankIS);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 /*@
1020   DMPlexPartitionLabelAdjacency - Add one level of adjacent points to the partition label
1021 
1022   Input Parameters:
1023 + dm     - The DM
1024 - label  - DMLabel assinging ranks to remote roots
1025 
1026   Level: developer
1027 
1028 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
1029 @*/
DMPlexPartitionLabelAdjacency(DM dm,DMLabel label)1030 PetscErrorCode DMPlexPartitionLabelAdjacency(DM dm, DMLabel label)
1031 {
1032   IS              rankIS,   pointIS;
1033   const PetscInt *ranks,   *points;
1034   PetscInt        numRanks, numPoints, r, p, a, adjSize;
1035   PetscInt       *adj = NULL;
1036   PetscErrorCode  ierr;
1037 
1038   PetscFunctionBegin;
1039   ierr = DMLabelGetValueIS(label, &rankIS);CHKERRQ(ierr);
1040   ierr = ISGetLocalSize(rankIS, &numRanks);CHKERRQ(ierr);
1041   ierr = ISGetIndices(rankIS, &ranks);CHKERRQ(ierr);
1042   for (r = 0; r < numRanks; ++r) {
1043     const PetscInt rank = ranks[r];
1044 
1045     ierr = DMLabelGetStratumIS(label, rank, &pointIS);CHKERRQ(ierr);
1046     ierr = ISGetLocalSize(pointIS, &numPoints);CHKERRQ(ierr);
1047     ierr = ISGetIndices(pointIS, &points);CHKERRQ(ierr);
1048     for (p = 0; p < numPoints; ++p) {
1049       adjSize = PETSC_DETERMINE;
1050       ierr = DMPlexGetAdjacency(dm, points[p], &adjSize, &adj);CHKERRQ(ierr);
1051       for (a = 0; a < adjSize; ++a) {ierr = DMLabelSetValue(label, adj[a], rank);CHKERRQ(ierr);}
1052     }
1053     ierr = ISRestoreIndices(pointIS, &points);CHKERRQ(ierr);
1054     ierr = ISDestroy(&pointIS);CHKERRQ(ierr);
1055   }
1056   ierr = ISRestoreIndices(rankIS, &ranks);CHKERRQ(ierr);
1057   ierr = ISDestroy(&rankIS);CHKERRQ(ierr);
1058   ierr = PetscFree(adj);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 /*@
1063   DMPlexPartitionLabelPropagate - Propagate points in a partition label over the point SF
1064 
1065   Input Parameters:
1066 + dm     - The DM
1067 - label  - DMLabel assinging ranks to remote roots
1068 
1069   Level: developer
1070 
1071   Note: This is required when generating multi-level overlaps to capture
1072   overlap points from non-neighbouring partitions.
1073 
1074 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
1075 @*/
DMPlexPartitionLabelPropagate(DM dm,DMLabel label)1076 PetscErrorCode DMPlexPartitionLabelPropagate(DM dm, DMLabel label)
1077 {
1078   MPI_Comm        comm;
1079   PetscMPIInt     rank;
1080   PetscSF         sfPoint;
1081   DMLabel         lblRoots, lblLeaves;
1082   IS              rankIS, pointIS;
1083   const PetscInt *ranks;
1084   PetscInt        numRanks, r;
1085   PetscErrorCode  ierr;
1086 
1087   PetscFunctionBegin;
1088   ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr);
1089   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1090   ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr);
1091   /* Pull point contributions from remote leaves into local roots */
1092   ierr = DMLabelGather(label, sfPoint, &lblLeaves);CHKERRQ(ierr);
1093   ierr = DMLabelGetValueIS(lblLeaves, &rankIS);CHKERRQ(ierr);
1094   ierr = ISGetLocalSize(rankIS, &numRanks);CHKERRQ(ierr);
1095   ierr = ISGetIndices(rankIS, &ranks);CHKERRQ(ierr);
1096   for (r = 0; r < numRanks; ++r) {
1097     const PetscInt remoteRank = ranks[r];
1098     if (remoteRank == rank) continue;
1099     ierr = DMLabelGetStratumIS(lblLeaves, remoteRank, &pointIS);CHKERRQ(ierr);
1100     ierr = DMLabelInsertIS(label, pointIS, remoteRank);CHKERRQ(ierr);
1101     ierr = ISDestroy(&pointIS);CHKERRQ(ierr);
1102   }
1103   ierr = ISRestoreIndices(rankIS, &ranks);CHKERRQ(ierr);
1104   ierr = ISDestroy(&rankIS);CHKERRQ(ierr);
1105   ierr = DMLabelDestroy(&lblLeaves);CHKERRQ(ierr);
1106   /* Push point contributions from roots into remote leaves */
1107   ierr = DMLabelDistribute(label, sfPoint, &lblRoots);CHKERRQ(ierr);
1108   ierr = DMLabelGetValueIS(lblRoots, &rankIS);CHKERRQ(ierr);
1109   ierr = ISGetLocalSize(rankIS, &numRanks);CHKERRQ(ierr);
1110   ierr = ISGetIndices(rankIS, &ranks);CHKERRQ(ierr);
1111   for (r = 0; r < numRanks; ++r) {
1112     const PetscInt remoteRank = ranks[r];
1113     if (remoteRank == rank) continue;
1114     ierr = DMLabelGetStratumIS(lblRoots, remoteRank, &pointIS);CHKERRQ(ierr);
1115     ierr = DMLabelInsertIS(label, pointIS, remoteRank);CHKERRQ(ierr);
1116     ierr = ISDestroy(&pointIS);CHKERRQ(ierr);
1117   }
1118   ierr = ISRestoreIndices(rankIS, &ranks);CHKERRQ(ierr);
1119   ierr = ISDestroy(&rankIS);CHKERRQ(ierr);
1120   ierr = DMLabelDestroy(&lblRoots);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 /*@
1125   DMPlexPartitionLabelInvert - Create a partition label of remote roots from a local root label
1126 
1127   Input Parameters:
1128 + dm        - The DM
1129 . rootLabel - DMLabel assinging ranks to local roots
1130 - processSF - A star forest mapping into the local index on each remote rank
1131 
1132   Output Parameter:
1133 . leafLabel - DMLabel assinging ranks to remote roots
1134 
1135   Note: The rootLabel defines a send pattern by mapping local points to remote target ranks. The
1136   resulting leafLabel is a receiver mapping of remote roots to their parent rank.
1137 
1138   Level: developer
1139 
1140 .seealso: DMPlexPartitionLabelCreateSF(), DMPlexDistribute(), DMPlexCreateOverlap()
1141 @*/
DMPlexPartitionLabelInvert(DM dm,DMLabel rootLabel,PetscSF processSF,DMLabel leafLabel)1142 PetscErrorCode DMPlexPartitionLabelInvert(DM dm, DMLabel rootLabel, PetscSF processSF, DMLabel leafLabel)
1143 {
1144   MPI_Comm           comm;
1145   PetscMPIInt        rank, size, r;
1146   PetscInt           p, n, numNeighbors, numPoints, dof, off, rootSize, l, nleaves, leafSize;
1147   PetscSF            sfPoint;
1148   PetscSection       rootSection;
1149   PetscSFNode       *rootPoints, *leafPoints;
1150   const PetscSFNode *remote;
1151   const PetscInt    *local, *neighbors;
1152   IS                 valueIS;
1153   PetscBool          mpiOverflow = PETSC_FALSE;
1154   PetscErrorCode     ierr;
1155 
1156   PetscFunctionBegin;
1157   ierr = PetscLogEventBegin(DMPLEX_PartLabelInvert,dm,0,0,0);CHKERRQ(ierr);
1158   ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr);
1159   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1160   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1161   ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr);
1162 
1163   /* Convert to (point, rank) and use actual owners */
1164   ierr = PetscSectionCreate(comm, &rootSection);CHKERRQ(ierr);
1165   ierr = PetscSectionSetChart(rootSection, 0, size);CHKERRQ(ierr);
1166   ierr = DMLabelGetValueIS(rootLabel, &valueIS);CHKERRQ(ierr);
1167   ierr = ISGetLocalSize(valueIS, &numNeighbors);CHKERRQ(ierr);
1168   ierr = ISGetIndices(valueIS, &neighbors);CHKERRQ(ierr);
1169   for (n = 0; n < numNeighbors; ++n) {
1170     ierr = DMLabelGetStratumSize(rootLabel, neighbors[n], &numPoints);CHKERRQ(ierr);
1171     ierr = PetscSectionAddDof(rootSection, neighbors[n], numPoints);CHKERRQ(ierr);
1172   }
1173   ierr = PetscSectionSetUp(rootSection);CHKERRQ(ierr);
1174   ierr = PetscSectionGetStorageSize(rootSection, &rootSize);CHKERRQ(ierr);
1175   ierr = PetscMalloc1(rootSize, &rootPoints);CHKERRQ(ierr);
1176   ierr = PetscSFGetGraph(sfPoint, NULL, &nleaves, &local, &remote);CHKERRQ(ierr);
1177   for (n = 0; n < numNeighbors; ++n) {
1178     IS              pointIS;
1179     const PetscInt *points;
1180 
1181     ierr = PetscSectionGetOffset(rootSection, neighbors[n], &off);CHKERRQ(ierr);
1182     ierr = DMLabelGetStratumIS(rootLabel, neighbors[n], &pointIS);CHKERRQ(ierr);
1183     ierr = ISGetLocalSize(pointIS, &numPoints);CHKERRQ(ierr);
1184     ierr = ISGetIndices(pointIS, &points);CHKERRQ(ierr);
1185     for (p = 0; p < numPoints; ++p) {
1186       if (local) {ierr = PetscFindInt(points[p], nleaves, local, &l);CHKERRQ(ierr);}
1187       else       {l = -1;}
1188       if (l >= 0) {rootPoints[off+p] = remote[l];}
1189       else        {rootPoints[off+p].index = points[p]; rootPoints[off+p].rank = rank;}
1190     }
1191     ierr = ISRestoreIndices(pointIS, &points);CHKERRQ(ierr);
1192     ierr = ISDestroy(&pointIS);CHKERRQ(ierr);
1193   }
1194 
1195   /* Try to communicate overlap using All-to-All */
1196   if (!processSF) {
1197     PetscInt64  counter = 0;
1198     PetscBool   locOverflow = PETSC_FALSE;
1199     PetscMPIInt *scounts, *sdispls, *rcounts, *rdispls;
1200 
1201     ierr = PetscCalloc4(size, &scounts, size, &sdispls, size, &rcounts, size, &rdispls);CHKERRQ(ierr);
1202     for (n = 0; n < numNeighbors; ++n) {
1203       ierr = PetscSectionGetDof(rootSection, neighbors[n], &dof);CHKERRQ(ierr);
1204       ierr = PetscSectionGetOffset(rootSection, neighbors[n], &off);CHKERRQ(ierr);
1205 #if defined(PETSC_USE_64BIT_INDICES)
1206       if (dof > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;}
1207       if (off > PETSC_MPI_INT_MAX) {locOverflow = PETSC_TRUE; break;}
1208 #endif
1209       scounts[neighbors[n]] = (PetscMPIInt) dof;
1210       sdispls[neighbors[n]] = (PetscMPIInt) off;
1211     }
1212     ierr = MPI_Alltoall(scounts, 1, MPI_INT, rcounts, 1, MPI_INT, comm);CHKERRQ(ierr);
1213     for (r = 0; r < size; ++r) { rdispls[r] = (int)counter; counter += rcounts[r]; }
1214     if (counter > PETSC_MPI_INT_MAX) locOverflow = PETSC_TRUE;
1215     ierr = MPI_Allreduce(&locOverflow, &mpiOverflow, 1, MPIU_BOOL, MPI_LOR, comm);CHKERRQ(ierr);
1216     if (!mpiOverflow) {
1217       ierr = PetscInfo(dm,"Using Alltoallv for mesh distribution\n");CHKERRQ(ierr);
1218       leafSize = (PetscInt) counter;
1219       ierr = PetscMalloc1(leafSize, &leafPoints);CHKERRQ(ierr);
1220       ierr = MPI_Alltoallv(rootPoints, scounts, sdispls, MPIU_2INT, leafPoints, rcounts, rdispls, MPIU_2INT, comm);CHKERRQ(ierr);
1221     }
1222     ierr = PetscFree4(scounts, sdispls, rcounts, rdispls);CHKERRQ(ierr);
1223   }
1224 
1225   /* Communicate overlap using process star forest */
1226   if (processSF || mpiOverflow) {
1227     PetscSF      procSF;
1228     PetscSection leafSection;
1229 
1230     if (processSF) {
1231       ierr = PetscInfo(dm,"Using processSF for mesh distribution\n");CHKERRQ(ierr);
1232       ierr = PetscObjectReference((PetscObject)processSF);CHKERRQ(ierr);
1233       procSF = processSF;
1234     } else {
1235       ierr = PetscInfo(dm,"Using processSF for mesh distribution (MPI overflow)\n");CHKERRQ(ierr);
1236       ierr = PetscSFCreate(comm,&procSF);CHKERRQ(ierr);
1237       ierr = PetscSFSetGraphWithPattern(procSF,NULL,PETSCSF_PATTERN_ALLTOALL);CHKERRQ(ierr);
1238     }
1239 
1240     ierr = PetscSectionCreate(PetscObjectComm((PetscObject)dm), &leafSection);CHKERRQ(ierr);
1241     ierr = DMPlexDistributeData(dm, procSF, rootSection, MPIU_2INT, rootPoints, leafSection, (void**) &leafPoints);CHKERRQ(ierr);
1242     ierr = PetscSectionGetStorageSize(leafSection, &leafSize);CHKERRQ(ierr);
1243     ierr = PetscSectionDestroy(&leafSection);CHKERRQ(ierr);
1244     ierr = PetscSFDestroy(&procSF);CHKERRQ(ierr);
1245   }
1246 
1247   for (p = 0; p < leafSize; p++) {
1248     ierr = DMLabelSetValue(leafLabel, leafPoints[p].index, leafPoints[p].rank);CHKERRQ(ierr);
1249   }
1250 
1251   ierr = ISRestoreIndices(valueIS, &neighbors);CHKERRQ(ierr);
1252   ierr = ISDestroy(&valueIS);CHKERRQ(ierr);
1253   ierr = PetscSectionDestroy(&rootSection);CHKERRQ(ierr);
1254   ierr = PetscFree(rootPoints);CHKERRQ(ierr);
1255   ierr = PetscFree(leafPoints);CHKERRQ(ierr);
1256   ierr = PetscLogEventEnd(DMPLEX_PartLabelInvert,dm,0,0,0);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 /*@
1261   DMPlexPartitionLabelCreateSF - Create a star forest from a label that assigns ranks to points
1262 
1263   Input Parameters:
1264 + dm    - The DM
1265 - label - DMLabel assinging ranks to remote roots
1266 
1267   Output Parameter:
1268 . sf    - The star forest communication context encapsulating the defined mapping
1269 
1270   Note: The incoming label is a receiver mapping of remote points to their parent rank.
1271 
1272   Level: developer
1273 
1274 .seealso: DMPlexDistribute(), DMPlexCreateOverlap()
1275 @*/
DMPlexPartitionLabelCreateSF(DM dm,DMLabel label,PetscSF * sf)1276 PetscErrorCode DMPlexPartitionLabelCreateSF(DM dm, DMLabel label, PetscSF *sf)
1277 {
1278   PetscMPIInt     rank;
1279   PetscInt        n, numRemote, p, numPoints, pStart, pEnd, idx = 0, nNeighbors;
1280   PetscSFNode    *remotePoints;
1281   IS              remoteRootIS, neighborsIS;
1282   const PetscInt *remoteRoots, *neighbors;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   ierr = PetscLogEventBegin(DMPLEX_PartLabelCreateSF,dm,0,0,0);CHKERRQ(ierr);
1287   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject) dm), &rank);CHKERRQ(ierr);
1288 
1289   ierr = DMLabelGetValueIS(label, &neighborsIS);CHKERRQ(ierr);
1290 #if 0
1291   {
1292     IS is;
1293     ierr = ISDuplicate(neighborsIS, &is);CHKERRQ(ierr);
1294     ierr = ISSort(is);CHKERRQ(ierr);
1295     ierr = ISDestroy(&neighborsIS);CHKERRQ(ierr);
1296     neighborsIS = is;
1297   }
1298 #endif
1299   ierr = ISGetLocalSize(neighborsIS, &nNeighbors);CHKERRQ(ierr);
1300   ierr = ISGetIndices(neighborsIS, &neighbors);CHKERRQ(ierr);
1301   for (numRemote = 0, n = 0; n < nNeighbors; ++n) {
1302     ierr = DMLabelGetStratumSize(label, neighbors[n], &numPoints);CHKERRQ(ierr);
1303     numRemote += numPoints;
1304   }
1305   ierr = PetscMalloc1(numRemote, &remotePoints);CHKERRQ(ierr);
1306   /* Put owned points first */
1307   ierr = DMLabelGetStratumSize(label, rank, &numPoints);CHKERRQ(ierr);
1308   if (numPoints > 0) {
1309     ierr = DMLabelGetStratumIS(label, rank, &remoteRootIS);CHKERRQ(ierr);
1310     ierr = ISGetIndices(remoteRootIS, &remoteRoots);CHKERRQ(ierr);
1311     for (p = 0; p < numPoints; p++) {
1312       remotePoints[idx].index = remoteRoots[p];
1313       remotePoints[idx].rank = rank;
1314       idx++;
1315     }
1316     ierr = ISRestoreIndices(remoteRootIS, &remoteRoots);CHKERRQ(ierr);
1317     ierr = ISDestroy(&remoteRootIS);CHKERRQ(ierr);
1318   }
1319   /* Now add remote points */
1320   for (n = 0; n < nNeighbors; ++n) {
1321     const PetscInt nn = neighbors[n];
1322 
1323     ierr = DMLabelGetStratumSize(label, nn, &numPoints);CHKERRQ(ierr);
1324     if (nn == rank || numPoints <= 0) continue;
1325     ierr = DMLabelGetStratumIS(label, nn, &remoteRootIS);CHKERRQ(ierr);
1326     ierr = ISGetIndices(remoteRootIS, &remoteRoots);CHKERRQ(ierr);
1327     for (p = 0; p < numPoints; p++) {
1328       remotePoints[idx].index = remoteRoots[p];
1329       remotePoints[idx].rank = nn;
1330       idx++;
1331     }
1332     ierr = ISRestoreIndices(remoteRootIS, &remoteRoots);CHKERRQ(ierr);
1333     ierr = ISDestroy(&remoteRootIS);CHKERRQ(ierr);
1334   }
1335   ierr = PetscSFCreate(PetscObjectComm((PetscObject) dm), sf);CHKERRQ(ierr);
1336   ierr = DMPlexGetChart(dm, &pStart, &pEnd);CHKERRQ(ierr);
1337   ierr = PetscSFSetGraph(*sf, pEnd-pStart, numRemote, NULL, PETSC_OWN_POINTER, remotePoints, PETSC_OWN_POINTER);CHKERRQ(ierr);
1338   ierr = PetscSFSetUp(*sf);CHKERRQ(ierr);
1339   ierr = ISDestroy(&neighborsIS);CHKERRQ(ierr);
1340   ierr = PetscLogEventEnd(DMPLEX_PartLabelCreateSF,dm,0,0,0);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #if defined(PETSC_HAVE_PARMETIS)
1345 #include <parmetis.h>
1346 #endif
1347 
1348 /* The two functions below are used by DMPlexRebalanceSharedPoints which errors
1349  * when PETSc is built without ParMETIS. To avoid -Wunused-function, we take
1350  * them out in that case. */
1351 #if defined(PETSC_HAVE_PARMETIS)
1352 /*@C
1353 
1354   DMPlexRewriteSF - Rewrites the ownership of the SF of a DM (in place).
1355 
1356   Input parameters:
1357 + dm                - The DMPlex object.
1358 . n                 - The number of points.
1359 . pointsToRewrite   - The points in the SF whose ownership will change.
1360 . targetOwners      - New owner for each element in pointsToRewrite.
1361 - degrees           - Degrees of the points in the SF as obtained by PetscSFComputeDegreeBegin/PetscSFComputeDegreeEnd.
1362 
1363   Level: developer
1364 
1365 @*/
DMPlexRewriteSF(DM dm,PetscInt n,PetscInt * pointsToRewrite,PetscInt * targetOwners,const PetscInt * degrees)1366 static PetscErrorCode DMPlexRewriteSF(DM dm, PetscInt n, PetscInt *pointsToRewrite, PetscInt *targetOwners, const PetscInt *degrees)
1367 {
1368   PetscInt      ierr, pStart, pEnd, i, j, counter, leafCounter, sumDegrees, nroots, nleafs;
1369   PetscInt     *cumSumDegrees, *newOwners, *newNumbers, *rankOnLeafs, *locationsOfLeafs, *remoteLocalPointOfLeafs, *points, *leafsNew;
1370   PetscSFNode  *leafLocationsNew;
1371   const         PetscSFNode *iremote;
1372   const         PetscInt *ilocal;
1373   PetscBool    *isLeaf;
1374   PetscSF       sf;
1375   MPI_Comm      comm;
1376   PetscMPIInt   rank, size;
1377 
1378   PetscFunctionBegin;
1379   ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr);
1380   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1381   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1382   ierr = DMPlexGetChart(dm, &pStart, &pEnd);CHKERRQ(ierr);
1383 
1384   ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr);
1385   ierr = PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote);CHKERRQ(ierr);
1386   ierr = PetscMalloc1(pEnd-pStart, &isLeaf);CHKERRQ(ierr);
1387   for (i=0; i<pEnd-pStart; i++) {
1388     isLeaf[i] = PETSC_FALSE;
1389   }
1390   for (i=0; i<nleafs; i++) {
1391     isLeaf[ilocal[i]-pStart] = PETSC_TRUE;
1392   }
1393 
1394   ierr = PetscMalloc1(pEnd-pStart+1, &cumSumDegrees);CHKERRQ(ierr);
1395   cumSumDegrees[0] = 0;
1396   for (i=1; i<=pEnd-pStart; i++) {
1397     cumSumDegrees[i] = cumSumDegrees[i-1] + degrees[i-1];
1398   }
1399   sumDegrees = cumSumDegrees[pEnd-pStart];
1400   /* get the location of my leafs (we have sumDegrees many leafs pointing at our roots) */
1401 
1402   ierr = PetscMalloc1(sumDegrees, &locationsOfLeafs);CHKERRQ(ierr);
1403   ierr = PetscMalloc1(pEnd-pStart, &rankOnLeafs);CHKERRQ(ierr);
1404   for (i=0; i<pEnd-pStart; i++) {
1405     rankOnLeafs[i] = rank;
1406   }
1407   ierr = PetscSFGatherBegin(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs);CHKERRQ(ierr);
1408   ierr = PetscSFGatherEnd(sf, MPIU_INT, rankOnLeafs, locationsOfLeafs);CHKERRQ(ierr);
1409   ierr = PetscFree(rankOnLeafs);CHKERRQ(ierr);
1410 
1411   /* get the remote local points of my leaves */
1412   ierr = PetscMalloc1(sumDegrees, &remoteLocalPointOfLeafs);CHKERRQ(ierr);
1413   ierr = PetscMalloc1(pEnd-pStart, &points);CHKERRQ(ierr);
1414   for (i=0; i<pEnd-pStart; i++) {
1415     points[i] = pStart+i;
1416   }
1417   ierr = PetscSFGatherBegin(sf, MPIU_INT, points, remoteLocalPointOfLeafs);CHKERRQ(ierr);
1418   ierr = PetscSFGatherEnd(sf, MPIU_INT, points, remoteLocalPointOfLeafs);CHKERRQ(ierr);
1419   ierr = PetscFree(points);CHKERRQ(ierr);
1420   /* Figure out the new owners of the vertices that are up for grabs and their numbers on the new owners */
1421   ierr = PetscMalloc1(pEnd-pStart, &newOwners);CHKERRQ(ierr);
1422   ierr = PetscMalloc1(pEnd-pStart, &newNumbers);CHKERRQ(ierr);
1423   for (i=0; i<pEnd-pStart; i++) {
1424     newOwners[i] = -1;
1425     newNumbers[i] = -1;
1426   }
1427   {
1428     PetscInt oldNumber, newNumber, oldOwner, newOwner;
1429     for (i=0; i<n; i++) {
1430       oldNumber = pointsToRewrite[i];
1431       newNumber = -1;
1432       oldOwner = rank;
1433       newOwner = targetOwners[i];
1434       if (oldOwner == newOwner) {
1435         newNumber = oldNumber;
1436       } else {
1437         for (j=0; j<degrees[oldNumber]; j++) {
1438           if (locationsOfLeafs[cumSumDegrees[oldNumber]+j] == newOwner) {
1439             newNumber = remoteLocalPointOfLeafs[cumSumDegrees[oldNumber]+j];
1440             break;
1441           }
1442         }
1443       }
1444       if (newNumber == -1) SETERRQ(PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Couldn't find the new owner of vertex.");
1445 
1446       newOwners[oldNumber] = newOwner;
1447       newNumbers[oldNumber] = newNumber;
1448     }
1449   }
1450   ierr = PetscFree(cumSumDegrees);CHKERRQ(ierr);
1451   ierr = PetscFree(locationsOfLeafs);CHKERRQ(ierr);
1452   ierr = PetscFree(remoteLocalPointOfLeafs);CHKERRQ(ierr);
1453 
1454   ierr = PetscSFBcastBegin(sf, MPIU_INT, newOwners, newOwners);CHKERRQ(ierr);
1455   ierr = PetscSFBcastEnd(sf, MPIU_INT, newOwners, newOwners);CHKERRQ(ierr);
1456   ierr = PetscSFBcastBegin(sf, MPIU_INT, newNumbers, newNumbers);CHKERRQ(ierr);
1457   ierr = PetscSFBcastEnd(sf, MPIU_INT, newNumbers, newNumbers);CHKERRQ(ierr);
1458 
1459   /* Now count how many leafs we have on each processor. */
1460   leafCounter=0;
1461   for (i=0; i<pEnd-pStart; i++) {
1462     if (newOwners[i] >= 0) {
1463       if (newOwners[i] != rank) {
1464         leafCounter++;
1465       }
1466     } else {
1467       if (isLeaf[i]) {
1468         leafCounter++;
1469       }
1470     }
1471   }
1472 
1473   /* Now set up the new sf by creating the leaf arrays */
1474   ierr = PetscMalloc1(leafCounter, &leafsNew);CHKERRQ(ierr);
1475   ierr = PetscMalloc1(leafCounter, &leafLocationsNew);CHKERRQ(ierr);
1476 
1477   leafCounter = 0;
1478   counter = 0;
1479   for (i=0; i<pEnd-pStart; i++) {
1480     if (newOwners[i] >= 0) {
1481       if (newOwners[i] != rank) {
1482         leafsNew[leafCounter] = i;
1483         leafLocationsNew[leafCounter].rank = newOwners[i];
1484         leafLocationsNew[leafCounter].index = newNumbers[i];
1485         leafCounter++;
1486       }
1487     } else {
1488       if (isLeaf[i]) {
1489         leafsNew[leafCounter] = i;
1490         leafLocationsNew[leafCounter].rank = iremote[counter].rank;
1491         leafLocationsNew[leafCounter].index = iremote[counter].index;
1492         leafCounter++;
1493       }
1494     }
1495     if (isLeaf[i]) {
1496       counter++;
1497     }
1498   }
1499 
1500   ierr = PetscSFSetGraph(sf, nroots, leafCounter, leafsNew, PETSC_OWN_POINTER, leafLocationsNew, PETSC_OWN_POINTER);CHKERRQ(ierr);
1501   ierr = PetscFree(newOwners);CHKERRQ(ierr);
1502   ierr = PetscFree(newNumbers);CHKERRQ(ierr);
1503   ierr = PetscFree(isLeaf);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
DMPlexViewDistribution(MPI_Comm comm,PetscInt n,PetscInt skip,PetscInt * vtxwgt,PetscInt * part,PetscViewer viewer)1507 static PetscErrorCode DMPlexViewDistribution(MPI_Comm comm, PetscInt n, PetscInt skip, PetscInt *vtxwgt, PetscInt *part, PetscViewer viewer)
1508 {
1509   PetscInt *distribution, min, max, sum, i, ierr;
1510   PetscMPIInt rank, size;
1511   PetscFunctionBegin;
1512   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1513   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1514   ierr = PetscCalloc1(size, &distribution);CHKERRQ(ierr);
1515   for (i=0; i<n; i++) {
1516     if (part) distribution[part[i]] += vtxwgt[skip*i];
1517     else distribution[rank] += vtxwgt[skip*i];
1518   }
1519   ierr = MPI_Allreduce(MPI_IN_PLACE, distribution, size, MPIU_INT, MPI_SUM, comm);CHKERRQ(ierr);
1520   min = distribution[0];
1521   max = distribution[0];
1522   sum = distribution[0];
1523   for (i=1; i<size; i++) {
1524     if (distribution[i]<min) min=distribution[i];
1525     if (distribution[i]>max) max=distribution[i];
1526     sum += distribution[i];
1527   }
1528   ierr = PetscViewerASCIIPrintf(viewer, "Min: %D, Avg: %D, Max: %D, Balance: %f\n", min, sum/size, max, (max*1.*size)/sum);CHKERRQ(ierr);
1529   ierr = PetscFree(distribution);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #endif
1534 
1535 /*@
1536   DMPlexRebalanceSharedPoints - Redistribute points in the plex that are shared in order to achieve better balancing. This routine updates the PointSF of the DM inplace.
1537 
1538   Input parameters:
1539 + dm               - The DMPlex object.
1540 . entityDepth      - depth of the entity to balance (0 -> balance vertices).
1541 . useInitialGuess  - whether to use the current distribution as initial guess (only used by ParMETIS).
1542 - parallel         - whether to use ParMETIS and do the partition in parallel or whether to gather the graph onto a single process and use METIS.
1543 
1544   Output parameters:
1545 . success          - whether the graph partitioning was successful or not. If not, try useInitialGuess=True and parallel=True.
1546 
1547   Level: intermediate
1548 
1549 @*/
1550 
DMPlexRebalanceSharedPoints(DM dm,PetscInt entityDepth,PetscBool useInitialGuess,PetscBool parallel,PetscBool * success)1551 PetscErrorCode DMPlexRebalanceSharedPoints(DM dm, PetscInt entityDepth, PetscBool useInitialGuess, PetscBool parallel, PetscBool *success)
1552 {
1553 #if defined(PETSC_HAVE_PARMETIS)
1554   PetscSF     sf;
1555   PetscInt    ierr, i, j, idx, jdx;
1556   PetscInt    eBegin, eEnd, nroots, nleafs, pStart, pEnd;
1557   const       PetscInt *degrees, *ilocal;
1558   const       PetscSFNode *iremote;
1559   PetscBool   *toBalance, *isLeaf, *isExclusivelyOwned, *isNonExclusivelyOwned;
1560   PetscInt    numExclusivelyOwned, numNonExclusivelyOwned;
1561   PetscMPIInt rank, size;
1562   PetscInt    *globalNumbersOfLocalOwnedVertices, *leafGlobalNumbers;
1563   const       PetscInt *cumSumVertices;
1564   PetscInt    offset, counter;
1565   PetscInt    lenadjncy;
1566   PetscInt    *xadj, *adjncy, *vtxwgt;
1567   PetscInt    lenxadj;
1568   PetscInt    *adjwgt = NULL;
1569   PetscInt    *part, *options;
1570   PetscInt    nparts, wgtflag, numflag, ncon, edgecut;
1571   real_t      *ubvec;
1572   PetscInt    *firstVertices, *renumbering;
1573   PetscInt    failed, failedGlobal;
1574   MPI_Comm    comm;
1575   Mat         A, Apre;
1576   const char *prefix = NULL;
1577   PetscViewer       viewer;
1578   PetscViewerFormat format;
1579   PetscLayout layout;
1580 
1581   PetscFunctionBegin;
1582   if (success) *success = PETSC_FALSE;
1583   ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr);
1584   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1585   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1586   if (size==1) PetscFunctionReturn(0);
1587 
1588   ierr = PetscLogEventBegin(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0);CHKERRQ(ierr);
1589 
1590   ierr = PetscOptionsGetViewer(comm,((PetscObject)dm)->options, prefix,"-dm_rebalance_partition_view",&viewer,&format,NULL);CHKERRQ(ierr);
1591   if (viewer) {
1592     ierr = PetscViewerPushFormat(viewer,format);CHKERRQ(ierr);
1593   }
1594 
1595   /* Figure out all points in the plex that we are interested in balancing. */
1596   ierr = DMPlexGetDepthStratum(dm, entityDepth, &eBegin, &eEnd);CHKERRQ(ierr);
1597   ierr = DMPlexGetChart(dm, &pStart, &pEnd);CHKERRQ(ierr);
1598   ierr = PetscMalloc1(pEnd-pStart, &toBalance);CHKERRQ(ierr);
1599 
1600   for (i=0; i<pEnd-pStart; i++) {
1601     toBalance[i] = (PetscBool)(i-pStart>=eBegin && i-pStart<eEnd);
1602   }
1603 
1604   /* There are three types of points:
1605    * exclusivelyOwned: points that are owned by this process and only seen by this process
1606    * nonExclusivelyOwned: points that are owned by this process but seen by at least another process
1607    * leaf: a point that is seen by this process but owned by a different process
1608    */
1609   ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr);
1610   ierr = PetscSFGetGraph(sf, &nroots, &nleafs, &ilocal, &iremote);CHKERRQ(ierr);
1611   ierr = PetscMalloc1(pEnd-pStart, &isLeaf);CHKERRQ(ierr);
1612   ierr = PetscMalloc1(pEnd-pStart, &isNonExclusivelyOwned);CHKERRQ(ierr);
1613   ierr = PetscMalloc1(pEnd-pStart, &isExclusivelyOwned);CHKERRQ(ierr);
1614   for (i=0; i<pEnd-pStart; i++) {
1615     isNonExclusivelyOwned[i] = PETSC_FALSE;
1616     isExclusivelyOwned[i] = PETSC_FALSE;
1617     isLeaf[i] = PETSC_FALSE;
1618   }
1619 
1620   /* start by marking all the leafs */
1621   for (i=0; i<nleafs; i++) {
1622     isLeaf[ilocal[i]-pStart] = PETSC_TRUE;
1623   }
1624 
1625   /* for an owned point, we can figure out whether another processor sees it or
1626    * not by calculating its degree */
1627   ierr = PetscSFComputeDegreeBegin(sf, &degrees);CHKERRQ(ierr);
1628   ierr = PetscSFComputeDegreeEnd(sf, &degrees);CHKERRQ(ierr);
1629 
1630   numExclusivelyOwned = 0;
1631   numNonExclusivelyOwned = 0;
1632   for (i=0; i<pEnd-pStart; i++) {
1633     if (toBalance[i]) {
1634       if (degrees[i] > 0) {
1635         isNonExclusivelyOwned[i] = PETSC_TRUE;
1636         numNonExclusivelyOwned += 1;
1637       } else {
1638         if (!isLeaf[i]) {
1639           isExclusivelyOwned[i] = PETSC_TRUE;
1640           numExclusivelyOwned += 1;
1641         }
1642       }
1643     }
1644   }
1645 
1646   /* We are going to build a graph with one vertex per core representing the
1647    * exclusively owned points and then one vertex per nonExclusively owned
1648    * point. */
1649 
1650   ierr = PetscLayoutCreate(comm, &layout);CHKERRQ(ierr);
1651   ierr = PetscLayoutSetLocalSize(layout, 1 + numNonExclusivelyOwned);CHKERRQ(ierr);
1652   ierr = PetscLayoutSetUp(layout);CHKERRQ(ierr);
1653   ierr = PetscLayoutGetRanges(layout, &cumSumVertices);CHKERRQ(ierr);
1654 
1655   ierr = PetscMalloc1(pEnd-pStart, &globalNumbersOfLocalOwnedVertices);CHKERRQ(ierr);
1656   for (i=0; i<pEnd-pStart; i++) {globalNumbersOfLocalOwnedVertices[i] = pStart - 1;}
1657   offset = cumSumVertices[rank];
1658   counter = 0;
1659   for (i=0; i<pEnd-pStart; i++) {
1660     if (toBalance[i]) {
1661       if (degrees[i] > 0) {
1662         globalNumbersOfLocalOwnedVertices[i] = counter + 1 + offset;
1663         counter++;
1664       }
1665     }
1666   }
1667 
1668   /* send the global numbers of vertices I own to the leafs so that they know to connect to it */
1669   ierr = PetscMalloc1(pEnd-pStart, &leafGlobalNumbers);CHKERRQ(ierr);
1670   ierr = PetscSFBcastBegin(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers);CHKERRQ(ierr);
1671   ierr = PetscSFBcastEnd(sf, MPIU_INT, globalNumbersOfLocalOwnedVertices, leafGlobalNumbers);CHKERRQ(ierr);
1672 
1673   /* Now start building the data structure for ParMETIS */
1674 
1675   ierr = MatCreate(comm, &Apre);CHKERRQ(ierr);
1676   ierr = MatSetType(Apre, MATPREALLOCATOR);CHKERRQ(ierr);
1677   ierr = MatSetSizes(Apre, 1+numNonExclusivelyOwned, 1+numNonExclusivelyOwned, cumSumVertices[size], cumSumVertices[size]);CHKERRQ(ierr);
1678   ierr = MatSetUp(Apre);CHKERRQ(ierr);
1679 
1680   for (i=0; i<pEnd-pStart; i++) {
1681     if (toBalance[i]) {
1682       idx = cumSumVertices[rank];
1683       if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i];
1684       else if (isLeaf[i]) jdx = leafGlobalNumbers[i];
1685       else continue;
1686       ierr = MatSetValue(Apre, idx, jdx, 1, INSERT_VALUES);CHKERRQ(ierr);
1687       ierr = MatSetValue(Apre, jdx, idx, 1, INSERT_VALUES);CHKERRQ(ierr);
1688     }
1689   }
1690 
1691   ierr = MatAssemblyBegin(Apre, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   ierr = MatAssemblyEnd(Apre, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693 
1694   ierr = MatCreate(comm, &A);CHKERRQ(ierr);
1695   ierr = MatSetType(A, MATMPIAIJ);CHKERRQ(ierr);
1696   ierr = MatSetSizes(A, 1+numNonExclusivelyOwned, 1+numNonExclusivelyOwned, cumSumVertices[size], cumSumVertices[size]);CHKERRQ(ierr);
1697   ierr = MatPreallocatorPreallocate(Apre, PETSC_FALSE, A);CHKERRQ(ierr);
1698   ierr = MatDestroy(&Apre);CHKERRQ(ierr);
1699 
1700   for (i=0; i<pEnd-pStart; i++) {
1701     if (toBalance[i]) {
1702       idx = cumSumVertices[rank];
1703       if (isNonExclusivelyOwned[i]) jdx = globalNumbersOfLocalOwnedVertices[i];
1704       else if (isLeaf[i]) jdx = leafGlobalNumbers[i];
1705       else continue;
1706       ierr = MatSetValue(A, idx, jdx, 1, INSERT_VALUES);CHKERRQ(ierr);
1707       ierr = MatSetValue(A, jdx, idx, 1, INSERT_VALUES);CHKERRQ(ierr);
1708     }
1709   }
1710 
1711   ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1712   ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1713   ierr = PetscFree(leafGlobalNumbers);CHKERRQ(ierr);
1714   ierr = PetscFree(globalNumbersOfLocalOwnedVertices);CHKERRQ(ierr);
1715 
1716   nparts = size;
1717   wgtflag = 2;
1718   numflag = 0;
1719   ncon = 2;
1720   real_t *tpwgts;
1721   ierr = PetscMalloc1(ncon * nparts, &tpwgts);CHKERRQ(ierr);
1722   for (i=0; i<ncon*nparts; i++) {
1723     tpwgts[i] = 1./(nparts);
1724   }
1725 
1726   ierr = PetscMalloc1(ncon, &ubvec);CHKERRQ(ierr);
1727   ubvec[0] = 1.01;
1728   ubvec[1] = 1.01;
1729   lenadjncy = 0;
1730   for (i=0; i<1+numNonExclusivelyOwned; i++) {
1731     PetscInt temp=0;
1732     ierr = MatGetRow(A, cumSumVertices[rank] + i, &temp, NULL, NULL);CHKERRQ(ierr);
1733     lenadjncy += temp;
1734     ierr = MatRestoreRow(A, cumSumVertices[rank] + i, &temp, NULL, NULL);CHKERRQ(ierr);
1735   }
1736   ierr = PetscMalloc1(lenadjncy, &adjncy);CHKERRQ(ierr);
1737   lenxadj = 2 + numNonExclusivelyOwned;
1738   ierr = PetscMalloc1(lenxadj, &xadj);CHKERRQ(ierr);
1739   xadj[0] = 0;
1740   counter = 0;
1741   for (i=0; i<1+numNonExclusivelyOwned; i++) {
1742     PetscInt        temp=0;
1743     const PetscInt *cols;
1744     ierr = MatGetRow(A, cumSumVertices[rank] + i, &temp, &cols, NULL);CHKERRQ(ierr);
1745     ierr = PetscArraycpy(&adjncy[counter], cols, temp);CHKERRQ(ierr);
1746     counter += temp;
1747     xadj[i+1] = counter;
1748     ierr = MatRestoreRow(A, cumSumVertices[rank] + i, &temp, &cols, NULL);CHKERRQ(ierr);
1749   }
1750 
1751   ierr = PetscMalloc1(cumSumVertices[rank+1]-cumSumVertices[rank], &part);CHKERRQ(ierr);
1752   ierr = PetscMalloc1(ncon*(1 + numNonExclusivelyOwned), &vtxwgt);CHKERRQ(ierr);
1753   vtxwgt[0] = numExclusivelyOwned;
1754   if (ncon>1) vtxwgt[1] = 1;
1755   for (i=0; i<numNonExclusivelyOwned; i++) {
1756     vtxwgt[ncon*(i+1)] = 1;
1757     if (ncon>1) vtxwgt[ncon*(i+1)+1] = 0;
1758   }
1759 
1760   if (viewer) {
1761     ierr = PetscViewerASCIIPrintf(viewer, "Attempt rebalancing of shared points of depth %D on interface of mesh distribution.\n", entityDepth);CHKERRQ(ierr);
1762     ierr = PetscViewerASCIIPrintf(viewer, "Size of generated auxiliary graph: %D\n", cumSumVertices[size]);CHKERRQ(ierr);
1763   }
1764   if (parallel) {
1765     ierr = PetscMalloc1(4, &options);CHKERRQ(ierr);
1766     options[0] = 1;
1767     options[1] = 0; /* Verbosity */
1768     options[2] = 0; /* Seed */
1769     options[3] = PARMETIS_PSR_COUPLED; /* Seed */
1770     if (viewer) { ierr = PetscViewerASCIIPrintf(viewer, "Using ParMETIS to partition graph.\n");CHKERRQ(ierr); }
1771     if (useInitialGuess) {
1772       if (viewer) { ierr = PetscViewerASCIIPrintf(viewer, "Using current distribution of points as initial guess.\n");CHKERRQ(ierr); }
1773       PetscStackPush("ParMETIS_V3_RefineKway");
1774       ierr = ParMETIS_V3_RefineKway((PetscInt*)cumSumVertices, xadj, adjncy, vtxwgt, adjwgt, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1775       if (ierr != METIS_OK) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_RefineKway()");
1776       PetscStackPop;
1777     } else {
1778       PetscStackPush("ParMETIS_V3_PartKway");
1779       ierr = ParMETIS_V3_PartKway((PetscInt*)cumSumVertices, xadj, adjncy, vtxwgt, adjwgt, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &comm);
1780       PetscStackPop;
1781       if (ierr != METIS_OK) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ParMETIS_V3_PartKway()");
1782     }
1783     ierr = PetscFree(options);CHKERRQ(ierr);
1784   } else {
1785     if (viewer) { ierr = PetscViewerASCIIPrintf(viewer, "Using METIS to partition graph.\n");CHKERRQ(ierr); }
1786     Mat As;
1787     PetscInt numRows;
1788     PetscInt *partGlobal;
1789     ierr = MatCreateRedundantMatrix(A, size, MPI_COMM_NULL, MAT_INITIAL_MATRIX, &As);CHKERRQ(ierr);
1790 
1791     PetscInt *numExclusivelyOwnedAll;
1792     ierr = PetscMalloc1(size, &numExclusivelyOwnedAll);CHKERRQ(ierr);
1793     numExclusivelyOwnedAll[rank] = numExclusivelyOwned;
1794     ierr = MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,numExclusivelyOwnedAll,1,MPIU_INT,comm);CHKERRQ(ierr);
1795 
1796     ierr = MatGetSize(As, &numRows, NULL);CHKERRQ(ierr);
1797     ierr = PetscMalloc1(numRows, &partGlobal);CHKERRQ(ierr);
1798     if (!rank) {
1799       PetscInt *adjncy_g, *xadj_g, *vtxwgt_g;
1800       lenadjncy = 0;
1801 
1802       for (i=0; i<numRows; i++) {
1803         PetscInt temp=0;
1804         ierr = MatGetRow(As, i, &temp, NULL, NULL);CHKERRQ(ierr);
1805         lenadjncy += temp;
1806         ierr = MatRestoreRow(As, i, &temp, NULL, NULL);CHKERRQ(ierr);
1807       }
1808       ierr = PetscMalloc1(lenadjncy, &adjncy_g);CHKERRQ(ierr);
1809       lenxadj = 1 + numRows;
1810       ierr = PetscMalloc1(lenxadj, &xadj_g);CHKERRQ(ierr);
1811       xadj_g[0] = 0;
1812       counter = 0;
1813       for (i=0; i<numRows; i++) {
1814         PetscInt        temp=0;
1815         const PetscInt *cols;
1816         ierr = MatGetRow(As, i, &temp, &cols, NULL);CHKERRQ(ierr);
1817         ierr = PetscArraycpy(&adjncy_g[counter], cols, temp);CHKERRQ(ierr);
1818         counter += temp;
1819         xadj_g[i+1] = counter;
1820         ierr = MatRestoreRow(As, i, &temp, &cols, NULL);CHKERRQ(ierr);
1821       }
1822       ierr = PetscMalloc1(2*numRows, &vtxwgt_g);CHKERRQ(ierr);
1823       for (i=0; i<size; i++){
1824         vtxwgt_g[ncon*cumSumVertices[i]] = numExclusivelyOwnedAll[i];
1825         if (ncon>1) vtxwgt_g[ncon*cumSumVertices[i]+1] = 1;
1826         for (j=cumSumVertices[i]+1; j<cumSumVertices[i+1]; j++) {
1827           vtxwgt_g[ncon*j] = 1;
1828           if (ncon>1) vtxwgt_g[2*j+1] = 0;
1829         }
1830       }
1831       ierr = PetscMalloc1(64, &options);CHKERRQ(ierr);
1832       ierr = METIS_SetDefaultOptions(options); /* initialize all defaults */
1833       if (ierr != METIS_OK) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_SetDefaultOptions()");
1834       options[METIS_OPTION_CONTIG] = 1;
1835       PetscStackPush("METIS_PartGraphKway");
1836       ierr = METIS_PartGraphKway(&numRows, &ncon, xadj_g, adjncy_g, vtxwgt_g, NULL, NULL, &nparts, tpwgts, ubvec, options, &edgecut, partGlobal);
1837       PetscStackPop;
1838       if (ierr != METIS_OK) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in METIS_PartGraphKway()");
1839       ierr = PetscFree(options);CHKERRQ(ierr);
1840       ierr = PetscFree(xadj_g);CHKERRQ(ierr);
1841       ierr = PetscFree(adjncy_g);CHKERRQ(ierr);
1842       ierr = PetscFree(vtxwgt_g);CHKERRQ(ierr);
1843     }
1844     ierr = PetscFree(numExclusivelyOwnedAll);CHKERRQ(ierr);
1845 
1846     /* Now scatter the parts array. */
1847     {
1848       PetscMPIInt *counts, *mpiCumSumVertices;
1849       ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
1850       ierr = PetscMalloc1(size+1, &mpiCumSumVertices);CHKERRQ(ierr);
1851       for (i=0; i<size; i++) {
1852         ierr = PetscMPIIntCast(cumSumVertices[i+1] - cumSumVertices[i], &(counts[i]));CHKERRQ(ierr);
1853       }
1854       for (i=0; i<=size; i++) {
1855         ierr = PetscMPIIntCast(cumSumVertices[i], &(mpiCumSumVertices[i]));CHKERRQ(ierr);
1856       }
1857       ierr = MPI_Scatterv(partGlobal, counts, mpiCumSumVertices, MPIU_INT, part, counts[rank], MPIU_INT, 0, comm);CHKERRQ(ierr);
1858       ierr = PetscFree(counts);CHKERRQ(ierr);
1859       ierr = PetscFree(mpiCumSumVertices);CHKERRQ(ierr);
1860     }
1861 
1862     ierr = PetscFree(partGlobal);CHKERRQ(ierr);
1863     ierr = MatDestroy(&As);CHKERRQ(ierr);
1864   }
1865 
1866   ierr = MatDestroy(&A);CHKERRQ(ierr);
1867   ierr = PetscFree(ubvec);CHKERRQ(ierr);
1868   ierr = PetscFree(tpwgts);CHKERRQ(ierr);
1869 
1870   /* Now rename the result so that the vertex resembling the exclusively owned points stays on the same rank */
1871 
1872   ierr = PetscMalloc1(size, &firstVertices);CHKERRQ(ierr);
1873   ierr = PetscMalloc1(size, &renumbering);CHKERRQ(ierr);
1874   firstVertices[rank] = part[0];
1875   ierr = MPI_Allgather(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,firstVertices,1,MPIU_INT,comm);CHKERRQ(ierr);
1876   for (i=0; i<size; i++) {
1877     renumbering[firstVertices[i]] = i;
1878   }
1879   for (i=0; i<cumSumVertices[rank+1]-cumSumVertices[rank]; i++) {
1880     part[i] = renumbering[part[i]];
1881   }
1882   /* Check if the renumbering worked (this can fail when ParMETIS gives fewer partitions than there are processes) */
1883   failed = (PetscInt)(part[0] != rank);
1884   ierr = MPI_Allreduce(&failed, &failedGlobal, 1, MPIU_INT, MPI_SUM, comm);CHKERRQ(ierr);
1885 
1886   ierr = PetscFree(firstVertices);CHKERRQ(ierr);
1887   ierr = PetscFree(renumbering);CHKERRQ(ierr);
1888 
1889   if (failedGlobal > 0) {
1890     ierr = PetscLayoutDestroy(&layout);CHKERRQ(ierr);
1891     ierr = PetscFree(xadj);CHKERRQ(ierr);
1892     ierr = PetscFree(adjncy);CHKERRQ(ierr);
1893     ierr = PetscFree(vtxwgt);CHKERRQ(ierr);
1894     ierr = PetscFree(toBalance);CHKERRQ(ierr);
1895     ierr = PetscFree(isLeaf);CHKERRQ(ierr);
1896     ierr = PetscFree(isNonExclusivelyOwned);CHKERRQ(ierr);
1897     ierr = PetscFree(isExclusivelyOwned);CHKERRQ(ierr);
1898     ierr = PetscFree(part);CHKERRQ(ierr);
1899     if (viewer) {
1900       ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
1901       ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1902     }
1903     ierr = PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0);CHKERRQ(ierr);
1904     PetscFunctionReturn(0);
1905   }
1906 
1907   /*Let's check how well we did distributing points*/
1908   if (viewer) {
1909     ierr = PetscViewerASCIIPrintf(viewer, "Comparing number of owned entities of depth %D on each process before rebalancing, after rebalancing, and after consistency checks.\n", entityDepth);CHKERRQ(ierr);
1910     ierr = PetscViewerASCIIPrintf(viewer, "Initial.     ");CHKERRQ(ierr);
1911     ierr = DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, NULL, viewer);CHKERRQ(ierr);
1912     ierr = PetscViewerASCIIPrintf(viewer, "Rebalanced.  ");CHKERRQ(ierr);
1913     ierr = DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer);CHKERRQ(ierr);
1914   }
1915 
1916   /* Now check that every vertex is owned by a process that it is actually connected to. */
1917   for (i=1; i<=numNonExclusivelyOwned; i++) {
1918     PetscInt loc = 0;
1919     ierr = PetscFindInt(cumSumVertices[part[i]], xadj[i+1]-xadj[i], &adjncy[xadj[i]], &loc);CHKERRQ(ierr);
1920     /* If not, then just set the owner to the original owner (hopefully a rare event, it means that a vertex has been isolated) */
1921     if (loc<0) {
1922       part[i] = rank;
1923     }
1924   }
1925 
1926   /* Let's see how significant the influences of the previous fixing up step was.*/
1927   if (viewer) {
1928     ierr = PetscViewerASCIIPrintf(viewer, "After.       ");CHKERRQ(ierr);
1929     ierr = DMPlexViewDistribution(comm, cumSumVertices[rank+1]-cumSumVertices[rank], ncon, vtxwgt, part, viewer);CHKERRQ(ierr);
1930   }
1931 
1932   ierr = PetscLayoutDestroy(&layout);CHKERRQ(ierr);
1933   ierr = PetscFree(xadj);CHKERRQ(ierr);
1934   ierr = PetscFree(adjncy);CHKERRQ(ierr);
1935   ierr = PetscFree(vtxwgt);CHKERRQ(ierr);
1936 
1937   /* Almost done, now rewrite the SF to reflect the new ownership. */
1938   {
1939     PetscInt *pointsToRewrite;
1940     ierr = PetscMalloc1(numNonExclusivelyOwned, &pointsToRewrite);CHKERRQ(ierr);
1941     counter = 0;
1942     for (i=0; i<pEnd-pStart; i++) {
1943       if (toBalance[i]) {
1944         if (isNonExclusivelyOwned[i]) {
1945           pointsToRewrite[counter] = i + pStart;
1946           counter++;
1947         }
1948       }
1949     }
1950     ierr = DMPlexRewriteSF(dm, numNonExclusivelyOwned, pointsToRewrite, part+1, degrees);CHKERRQ(ierr);
1951     ierr = PetscFree(pointsToRewrite);CHKERRQ(ierr);
1952   }
1953 
1954   ierr = PetscFree(toBalance);CHKERRQ(ierr);
1955   ierr = PetscFree(isLeaf);CHKERRQ(ierr);
1956   ierr = PetscFree(isNonExclusivelyOwned);CHKERRQ(ierr);
1957   ierr = PetscFree(isExclusivelyOwned);CHKERRQ(ierr);
1958   ierr = PetscFree(part);CHKERRQ(ierr);
1959   if (viewer) {
1960     ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
1961     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1962   }
1963   if (success) *success = PETSC_TRUE;
1964   ierr = PetscLogEventEnd(DMPLEX_RebalanceSharedPoints, dm, 0, 0, 0);CHKERRQ(ierr);
1965 #else
1966   SETERRQ(PetscObjectComm((PetscObject) dm), PETSC_ERR_SUP, "Mesh partitioning needs external package support.\nPlease reconfigure with --download-parmetis.");
1967 #endif
1968   PetscFunctionReturn(0);
1969 }
1970