1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4
5 #if defined(PETSC_HAVE_CUDA)
6 #include <cuda_runtime.h>
7 #endif
8
9 #if defined(PETSC_HAVE_HIP)
10 #include <hip/hip_runtime.h>
11 #endif
12
13 #if defined(PETSC_USE_DEBUG)
14 # define PetscSFCheckGraphSet(sf,arg) do { \
15 if (PetscUnlikely(!(sf)->graphset)) \
16 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17 } while (0)
18 #else
19 # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20 #endif
21
22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
23
PetscGetMemType(const void * data,PetscMemType * type)24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25 {
26 PetscFunctionBegin;
27 PetscValidPointer(type,2);
28 *type = PETSC_MEMTYPE_HOST;
29 #if defined(PETSC_HAVE_CUDA)
30 if (PetscCUDAInitialized && data) {
31 cudaError_t cerr;
32 struct cudaPointerAttributes attr;
33 enum cudaMemoryType mtype;
34 cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35 cudaGetLastError(); /* Reset the last error */
36 #if (CUDART_VERSION < 10000)
37 mtype = attr.memoryType;
38 #else
39 mtype = attr.type;
40 #endif
41 if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42 }
43 #endif
44
45 #if defined(PETSC_HAVE_HIP)
46 if (PetscHIPInitialized && data) {
47 hipError_t cerr;
48 struct hipPointerAttribute_t attr;
49 enum hipMemoryType mtype;
50 cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
51 hipGetLastError(); /* Reset the last error */
52 mtype = attr.memoryType;
53 if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54 }
55 #endif
56 PetscFunctionReturn(0);
57 }
58
59 /*@
60 PetscSFCreate - create a star forest communication context
61
62 Collective
63
64 Input Arguments:
65 . comm - communicator on which the star forest will operate
66
67 Output Arguments:
68 . sf - new star forest context
69
70 Options Database Keys:
71 + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
72 . -sf_type window -Use MPI-3 one-sided window for communication
73 - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
74
75 Level: intermediate
76
77 Notes:
78 When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79 MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80 SFs are optimized and they have better performance than general SFs.
81
82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83 @*/
PetscSFCreate(MPI_Comm comm,PetscSF * sf)84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85 {
86 PetscErrorCode ierr;
87 PetscSF b;
88
89 PetscFunctionBegin;
90 PetscValidPointer(sf,2);
91 ierr = PetscSFInitializePackage();CHKERRQ(ierr);
92
93 ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
94
95 b->nroots = -1;
96 b->nleaves = -1;
97 b->minleaf = PETSC_MAX_INT;
98 b->maxleaf = PETSC_MIN_INT;
99 b->nranks = -1;
100 b->rankorder = PETSC_TRUE;
101 b->ingroup = MPI_GROUP_NULL;
102 b->outgroup = MPI_GROUP_NULL;
103 b->graphset = PETSC_FALSE;
104 #if defined(PETSC_HAVE_DEVICE)
105 b->use_gpu_aware_mpi = use_gpu_aware_mpi;
106 b->use_stream_aware_mpi = PETSC_FALSE;
107 b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
108 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109 b->backend = PETSCSF_BACKEND_KOKKOS;
110 #elif defined(PETSC_HAVE_CUDA)
111 b->backend = PETSCSF_BACKEND_CUDA;
112 #endif
113 #endif
114 *sf = b;
115 PetscFunctionReturn(0);
116 }
117
118 /*@
119 PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
120
121 Collective
122
123 Input Arguments:
124 . sf - star forest
125
126 Level: advanced
127
128 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
129 @*/
PetscSFReset(PetscSF sf)130 PetscErrorCode PetscSFReset(PetscSF sf)
131 {
132 PetscErrorCode ierr;
133
134 PetscFunctionBegin;
135 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
136 if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
137 sf->nroots = -1;
138 sf->nleaves = -1;
139 sf->minleaf = PETSC_MAX_INT;
140 sf->maxleaf = PETSC_MIN_INT;
141 sf->mine = NULL;
142 sf->remote = NULL;
143 sf->graphset = PETSC_FALSE;
144 ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
145 ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
146 sf->nranks = -1;
147 ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
148 #if defined(PETSC_HAVE_DEVICE)
149 for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
150 #endif
151 sf->degreeknown = PETSC_FALSE;
152 ierr = PetscFree(sf->degree);CHKERRQ(ierr);
153 if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
154 if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
155 ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
156 ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
157 sf->setupcalled = PETSC_FALSE;
158 PetscFunctionReturn(0);
159 }
160
161 /*@C
162 PetscSFSetType - Set the PetscSF communication implementation
163
164 Collective on PetscSF
165
166 Input Parameters:
167 + sf - the PetscSF context
168 - type - a known method
169
170 Options Database Key:
171 . -sf_type <type> - Sets the method; use -help for a list
172 of available methods (for instance, window, basic, neighbor)
173
174 Notes:
175 See "include/petscsf.h" for available methods (for instance)
176 + PETSCSFWINDOW - MPI-2/3 one-sided
177 - PETSCSFBASIC - basic implementation using MPI-1 two-sided
178
179 Level: intermediate
180
181 .seealso: PetscSFType, PetscSFCreate()
182 @*/
PetscSFSetType(PetscSF sf,PetscSFType type)183 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
184 {
185 PetscErrorCode ierr,(*r)(PetscSF);
186 PetscBool match;
187
188 PetscFunctionBegin;
189 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
190 PetscValidCharPointer(type,2);
191
192 ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
193 if (match) PetscFunctionReturn(0);
194
195 ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
196 if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
197 /* Destroy the previous PetscSF implementation context */
198 if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
199 ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
200 ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
201 ierr = (*r)(sf);CHKERRQ(ierr);
202 PetscFunctionReturn(0);
203 }
204
205 /*@C
206 PetscSFGetType - Get the PetscSF communication implementation
207
208 Not Collective
209
210 Input Parameter:
211 . sf - the PetscSF context
212
213 Output Parameter:
214 . type - the PetscSF type name
215
216 Level: intermediate
217
218 .seealso: PetscSFSetType(), PetscSFCreate()
219 @*/
PetscSFGetType(PetscSF sf,PetscSFType * type)220 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
221 {
222 PetscFunctionBegin;
223 PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
224 PetscValidPointer(type,2);
225 *type = ((PetscObject)sf)->type_name;
226 PetscFunctionReturn(0);
227 }
228
229 /*@
230 PetscSFDestroy - destroy star forest
231
232 Collective
233
234 Input Arguments:
235 . sf - address of star forest
236
237 Level: intermediate
238
239 .seealso: PetscSFCreate(), PetscSFReset()
240 @*/
PetscSFDestroy(PetscSF * sf)241 PetscErrorCode PetscSFDestroy(PetscSF *sf)
242 {
243 PetscErrorCode ierr;
244
245 PetscFunctionBegin;
246 if (!*sf) PetscFunctionReturn(0);
247 PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
248 if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
249 ierr = PetscSFReset(*sf);CHKERRQ(ierr);
250 if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
251 ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
252 PetscFunctionReturn(0);
253 }
254
PetscSFCheckGraphValid_Private(PetscSF sf)255 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
256 {
257 PetscInt i, nleaves;
258 PetscMPIInt size;
259 const PetscInt *ilocal;
260 const PetscSFNode *iremote;
261 PetscErrorCode ierr;
262
263 PetscFunctionBegin;
264 if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
265 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
267 for (i = 0; i < nleaves; i++) {
268 const PetscInt rank = iremote[i].rank;
269 const PetscInt remote = iremote[i].index;
270 const PetscInt leaf = ilocal ? ilocal[i] : i;
271 if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
272 if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
273 if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
274 }
275 PetscFunctionReturn(0);
276 }
277
278
279
280
281 /*@
282 PetscSFSetUp - set up communication structures
283
284 Collective
285
286 Input Arguments:
287 . sf - star forest communication object
288
289 Level: beginner
290
291 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
292 @*/
PetscSFSetUp(PetscSF sf)293 PetscErrorCode PetscSFSetUp(PetscSF sf)
294 {
295 PetscErrorCode ierr;
296
297 PetscFunctionBegin;
298 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
299 PetscSFCheckGraphSet(sf,1);
300 if (sf->setupcalled) PetscFunctionReturn(0);
301 ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
302 ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
303 if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
304 if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
305 #if defined(PETSC_HAVE_CUDA)
306 if (sf->backend == PETSCSF_BACKEND_CUDA) {
307 sf->ops->Malloc = PetscSFMalloc_Cuda;
308 sf->ops->Free = PetscSFFree_Cuda;
309 }
310 #endif
311
312 #if defined(PETSC_HAVE_KOKKOS)
313 if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
314 sf->ops->Malloc = PetscSFMalloc_Kokkos;
315 sf->ops->Free = PetscSFFree_Kokkos;
316 }
317 #endif
318 ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
319 sf->setupcalled = PETSC_TRUE;
320 PetscFunctionReturn(0);
321 }
322
323 /*@
324 PetscSFSetFromOptions - set PetscSF options using the options database
325
326 Logically Collective
327
328 Input Arguments:
329 . sf - star forest
330
331 Options Database Keys:
332 + -sf_type - implementation type, see PetscSFSetType()
333 . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
334 . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
335 use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
336 If true, this option only works with -use_gpu_aware_mpi 1.
337 . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
338 If true, this option only works with -use_gpu_aware_mpi 1.
339
340 - -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos.
341 On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices,
342 the only available is kokkos.
343
344 Level: intermediate
345 @*/
PetscSFSetFromOptions(PetscSF sf)346 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347 {
348 PetscSFType deft;
349 char type[256];
350 PetscErrorCode ierr;
351 PetscBool flg;
352
353 PetscFunctionBegin;
354 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
355 ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
356 deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
357 ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
358 ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
359 ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
360 #if defined(PETSC_HAVE_DEVICE)
361 {
362 char backendstr[32] = {0};
363 PetscBool isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
364 /* Change the defaults set in PetscSFCreate() with command line options */
365 ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
366 ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
367 ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
368 ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
369 ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
370 #if defined(PETSC_HAVE_CUDA)
371 if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372 else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373 else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr);
374 #elif defined(PETSC_HAVE_KOKKOS)
375 if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376 #endif
377 }
378 #endif
379 if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
380 ierr = PetscOptionsEnd();CHKERRQ(ierr);
381 PetscFunctionReturn(0);
382 }
383
384 /*@
385 PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
386
387 Logically Collective
388
389 Input Arguments:
390 + sf - star forest
391 - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
392
393 Level: advanced
394
395 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
396 @*/
PetscSFSetRankOrder(PetscSF sf,PetscBool flg)397 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
398 {
399 PetscFunctionBegin;
400 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
401 PetscValidLogicalCollectiveBool(sf,flg,2);
402 if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
403 sf->rankorder = flg;
404 PetscFunctionReturn(0);
405 }
406
407 /*@
408 PetscSFSetGraph - Set a parallel star forest
409
410 Collective
411
412 Input Arguments:
413 + sf - star forest
414 . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
415 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
416 . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
417 during setup in debug mode)
418 . localmode - copy mode for ilocal
419 . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
420 during setup in debug mode)
421 - remotemode - copy mode for iremote
422
423 Level: intermediate
424
425 Notes:
426 In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
427
428 Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
429 encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
430 needed
431
432 Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
433 indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
434
435 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
436 @*/
PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt * ilocal,PetscCopyMode localmode,const PetscSFNode * iremote,PetscCopyMode remotemode)437 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
438 {
439 PetscErrorCode ierr;
440
441 PetscFunctionBegin;
442 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
443 if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
444 if (nleaves > 0) PetscValidPointer(iremote,6);
445 if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
446 if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
447
448 if (sf->nroots >= 0) { /* Reset only if graph already set */
449 ierr = PetscSFReset(sf);CHKERRQ(ierr);
450 }
451
452 ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
453
454 sf->nroots = nroots;
455 sf->nleaves = nleaves;
456
457 if (nleaves && ilocal) {
458 PetscInt i;
459 PetscInt minleaf = PETSC_MAX_INT;
460 PetscInt maxleaf = PETSC_MIN_INT;
461 int contiguous = 1;
462 for (i=0; i<nleaves; i++) {
463 minleaf = PetscMin(minleaf,ilocal[i]);
464 maxleaf = PetscMax(maxleaf,ilocal[i]);
465 contiguous &= (ilocal[i] == i);
466 }
467 sf->minleaf = minleaf;
468 sf->maxleaf = maxleaf;
469 if (contiguous) {
470 if (localmode == PETSC_OWN_POINTER) {
471 ierr = PetscFree(ilocal);CHKERRQ(ierr);
472 }
473 ilocal = NULL;
474 }
475 } else {
476 sf->minleaf = 0;
477 sf->maxleaf = nleaves - 1;
478 }
479
480 if (ilocal) {
481 switch (localmode) {
482 case PETSC_COPY_VALUES:
483 ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
484 ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
485 sf->mine = sf->mine_alloc;
486 break;
487 case PETSC_OWN_POINTER:
488 sf->mine_alloc = (PetscInt*)ilocal;
489 sf->mine = sf->mine_alloc;
490 break;
491 case PETSC_USE_POINTER:
492 sf->mine_alloc = NULL;
493 sf->mine = (PetscInt*)ilocal;
494 break;
495 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
496 }
497 }
498
499 switch (remotemode) {
500 case PETSC_COPY_VALUES:
501 ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
502 ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
503 sf->remote = sf->remote_alloc;
504 break;
505 case PETSC_OWN_POINTER:
506 sf->remote_alloc = (PetscSFNode*)iremote;
507 sf->remote = sf->remote_alloc;
508 break;
509 case PETSC_USE_POINTER:
510 sf->remote_alloc = NULL;
511 sf->remote = (PetscSFNode*)iremote;
512 break;
513 default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
514 }
515
516 ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
517 sf->graphset = PETSC_TRUE;
518 PetscFunctionReturn(0);
519 }
520
521 /*@
522 PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
523
524 Collective
525
526 Input Parameters:
527 + sf - The PetscSF
528 . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
529 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
530
531 Notes:
532 It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
533 n and N are local and global sizes of x respectively.
534
535 With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
536 sequential vectors y on all ranks.
537
538 With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
539 sequential vector y on rank 0.
540
541 In above cases, entries of x are roots and entries of y are leaves.
542
543 With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
544 creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
545 of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
546 not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
547 items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
548
549 In this case, roots and leaves are symmetric.
550
551 Level: intermediate
552 @*/
PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)553 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
554 {
555 MPI_Comm comm;
556 PetscInt n,N,res[2];
557 PetscMPIInt rank,size;
558 PetscSFType type;
559 PetscErrorCode ierr;
560
561 PetscFunctionBegin;
562 ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
563 if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
564 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
565 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
566
567 if (pattern == PETSCSF_PATTERN_ALLTOALL) {
568 type = PETSCSFALLTOALL;
569 ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
570 ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
571 ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
572 ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
573 } else {
574 ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
575 ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
576 res[0] = n;
577 res[1] = -n;
578 /* Check if n are same over all ranks so that we can optimize it */
579 ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
580 if (res[0] == -res[1]) { /* same n */
581 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
582 } else {
583 type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
584 }
585 ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
586 }
587 ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
588
589 sf->pattern = pattern;
590 sf->mine = NULL; /* Contiguous */
591
592 /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
593 Also set other easy stuff.
594 */
595 if (pattern == PETSCSF_PATTERN_ALLGATHER) {
596 sf->nleaves = N;
597 sf->nroots = n;
598 sf->nranks = size;
599 sf->minleaf = 0;
600 sf->maxleaf = N - 1;
601 } else if (pattern == PETSCSF_PATTERN_GATHER) {
602 sf->nleaves = rank ? 0 : N;
603 sf->nroots = n;
604 sf->nranks = rank ? 0 : size;
605 sf->minleaf = 0;
606 sf->maxleaf = rank ? -1 : N - 1;
607 } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
608 sf->nleaves = size;
609 sf->nroots = size;
610 sf->nranks = size;
611 sf->minleaf = 0;
612 sf->maxleaf = size - 1;
613 }
614 sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
615 sf->graphset = PETSC_TRUE;
616 PetscFunctionReturn(0);
617 }
618
619 /*@
620 PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
621
622 Collective
623
624 Input Arguments:
625
626 . sf - star forest to invert
627
628 Output Arguments:
629 . isf - inverse of sf
630 Level: advanced
631
632 Notes:
633 All roots must have degree 1.
634
635 The local space may be a permutation, but cannot be sparse.
636
637 .seealso: PetscSFSetGraph()
638 @*/
PetscSFCreateInverseSF(PetscSF sf,PetscSF * isf)639 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
640 {
641 PetscErrorCode ierr;
642 PetscMPIInt rank;
643 PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
644 const PetscInt *ilocal;
645 PetscSFNode *roots,*leaves;
646
647 PetscFunctionBegin;
648 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
649 PetscSFCheckGraphSet(sf,1);
650 PetscValidPointer(isf,2);
651
652 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
653 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
654
655 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
656 ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
657 for (i=0; i<maxlocal; i++) {
658 leaves[i].rank = rank;
659 leaves[i].index = i;
660 }
661 for (i=0; i <nroots; i++) {
662 roots[i].rank = -1;
663 roots[i].index = -1;
664 }
665 ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
666 ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
667
668 /* Check whether our leaves are sparse */
669 for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
670 if (count == nroots) newilocal = NULL;
671 else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
672 ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
673 for (i=0,count=0; i<nroots; i++) {
674 if (roots[i].rank >= 0) {
675 newilocal[count] = i;
676 roots[count].rank = roots[i].rank;
677 roots[count].index = roots[i].index;
678 count++;
679 }
680 }
681 }
682
683 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
684 ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
685 ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
686 PetscFunctionReturn(0);
687 }
688
689 /*@
690 PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
691
692 Collective
693
694 Input Arguments:
695 + sf - communication object to duplicate
696 - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
697
698 Output Arguments:
699 . newsf - new communication object
700
701 Level: beginner
702
703 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
704 @*/
PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF * newsf)705 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
706 {
707 PetscSFType type;
708 PetscErrorCode ierr;
709
710 PetscFunctionBegin;
711 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
712 PetscValidLogicalCollectiveEnum(sf,opt,2);
713 PetscValidPointer(newsf,3);
714 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
715 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
716 if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
717 if (opt == PETSCSF_DUPLICATE_GRAPH) {
718 PetscSFCheckGraphSet(sf,1);
719 if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
720 PetscInt nroots,nleaves;
721 const PetscInt *ilocal;
722 const PetscSFNode *iremote;
723 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
724 ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
725 } else {
726 ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
727 }
728 }
729 #if defined(PETSC_HAVE_DEVICE)
730 (*newsf)->backend = sf->backend;
731 (*newsf)->use_default_stream = sf->use_default_stream;
732 (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
733 (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
734 #endif
735 if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
736 /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
737 PetscFunctionReturn(0);
738 }
739
740 /*@C
741 PetscSFGetGraph - Get the graph specifying a parallel star forest
742
743 Not Collective
744
745 Input Arguments:
746 . sf - star forest
747
748 Output Arguments:
749 + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
750 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
751 . ilocal - locations of leaves in leafdata buffers
752 - iremote - remote locations of root vertices for each leaf on the current process
753
754 Notes:
755 We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
756
757 When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
758 want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
759
760 Level: intermediate
761
762 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
763 @*/
PetscSFGetGraph(PetscSF sf,PetscInt * nroots,PetscInt * nleaves,const PetscInt ** ilocal,const PetscSFNode ** iremote)764 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
765 {
766 PetscErrorCode ierr;
767
768 PetscFunctionBegin;
769 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
770 if (sf->ops->GetGraph) {
771 ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
772 } else {
773 if (nroots) *nroots = sf->nroots;
774 if (nleaves) *nleaves = sf->nleaves;
775 if (ilocal) *ilocal = sf->mine;
776 if (iremote) *iremote = sf->remote;
777 }
778 PetscFunctionReturn(0);
779 }
780
781 /*@
782 PetscSFGetLeafRange - Get the active leaf ranges
783
784 Not Collective
785
786 Input Arguments:
787 . sf - star forest
788
789 Output Arguments:
790 + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
791 - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
792
793 Level: developer
794
795 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
796 @*/
PetscSFGetLeafRange(PetscSF sf,PetscInt * minleaf,PetscInt * maxleaf)797 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
798 {
799 PetscFunctionBegin;
800 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
801 PetscSFCheckGraphSet(sf,1);
802 if (minleaf) *minleaf = sf->minleaf;
803 if (maxleaf) *maxleaf = sf->maxleaf;
804 PetscFunctionReturn(0);
805 }
806
807 /*@C
808 PetscSFViewFromOptions - View from Options
809
810 Collective on PetscSF
811
812 Input Parameters:
813 + A - the star forest
814 . obj - Optional object
815 - name - command line option
816
817 Level: intermediate
818 .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
819 @*/
PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])820 PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
821 {
822 PetscErrorCode ierr;
823
824 PetscFunctionBegin;
825 PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
826 ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
827 PetscFunctionReturn(0);
828 }
829
830 /*@C
831 PetscSFView - view a star forest
832
833 Collective
834
835 Input Arguments:
836 + sf - star forest
837 - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
838
839 Level: beginner
840
841 .seealso: PetscSFCreate(), PetscSFSetGraph()
842 @*/
PetscSFView(PetscSF sf,PetscViewer viewer)843 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
844 {
845 PetscErrorCode ierr;
846 PetscBool iascii;
847 PetscViewerFormat format;
848
849 PetscFunctionBegin;
850 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
851 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
852 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
853 PetscCheckSameComm(sf,1,viewer,2);
854 if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
855 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
856 if (iascii) {
857 PetscMPIInt rank;
858 PetscInt ii,i,j;
859
860 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
861 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
862 if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
863 if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
864 if (!sf->graphset) {
865 ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
866 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
867 PetscFunctionReturn(0);
868 }
869 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
870 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
872 for (i=0; i<sf->nleaves; i++) {
873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
874 }
875 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
876 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
877 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
878 PetscMPIInt *tmpranks,*perm;
879 ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
880 ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
881 for (i=0; i<sf->nranks; i++) perm[i] = i;
882 ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
883 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
884 for (ii=0; ii<sf->nranks; ii++) {
885 i = perm[ii];
886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
887 for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
889 }
890 }
891 ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
892 }
893 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
894 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
895 }
896 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
897 }
898 PetscFunctionReturn(0);
899 }
900
901 /*@C
902 PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
903
904 Not Collective
905
906 Input Arguments:
907 . sf - star forest
908
909 Output Arguments:
910 + nranks - number of ranks referenced by local part
911 . ranks - array of ranks
912 . roffset - offset in rmine/rremote for each rank (length nranks+1)
913 . rmine - concatenated array holding local indices referencing each remote rank
914 - rremote - concatenated array holding remote indices referenced for each remote rank
915
916 Level: developer
917
918 .seealso: PetscSFGetLeafRanks()
919 @*/
PetscSFGetRootRanks(PetscSF sf,PetscInt * nranks,const PetscMPIInt ** ranks,const PetscInt ** roffset,const PetscInt ** rmine,const PetscInt ** rremote)920 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
921 {
922 PetscErrorCode ierr;
923
924 PetscFunctionBegin;
925 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
926 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
927 if (sf->ops->GetRootRanks) {
928 ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
929 } else {
930 /* The generic implementation */
931 if (nranks) *nranks = sf->nranks;
932 if (ranks) *ranks = sf->ranks;
933 if (roffset) *roffset = sf->roffset;
934 if (rmine) *rmine = sf->rmine;
935 if (rremote) *rremote = sf->rremote;
936 }
937 PetscFunctionReturn(0);
938 }
939
940 /*@C
941 PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
942
943 Not Collective
944
945 Input Arguments:
946 . sf - star forest
947
948 Output Arguments:
949 + niranks - number of leaf ranks referencing roots on this process
950 . iranks - array of ranks
951 . ioffset - offset in irootloc for each rank (length niranks+1)
952 - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
953
954 Level: developer
955
956 .seealso: PetscSFGetRootRanks()
957 @*/
PetscSFGetLeafRanks(PetscSF sf,PetscInt * niranks,const PetscMPIInt ** iranks,const PetscInt ** ioffset,const PetscInt ** irootloc)958 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
959 {
960 PetscErrorCode ierr;
961
962 PetscFunctionBegin;
963 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
964 if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
965 if (sf->ops->GetLeafRanks) {
966 ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
967 } else {
968 PetscSFType type;
969 ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
970 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
971 }
972 PetscFunctionReturn(0);
973 }
974
InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt * list)975 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
976 PetscInt i;
977 for (i=0; i<n; i++) {
978 if (needle == list[i]) return PETSC_TRUE;
979 }
980 return PETSC_FALSE;
981 }
982
983 /*@C
984 PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
985
986 Collective
987
988 Input Arguments:
989 + sf - PetscSF to set up; PetscSFSetGraph() must have been called
990 - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
991
992 Level: developer
993
994 .seealso: PetscSFGetRootRanks()
995 @*/
PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)996 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
997 {
998 PetscErrorCode ierr;
999 PetscTable table;
1000 PetscTablePosition pos;
1001 PetscMPIInt size,groupsize,*groupranks;
1002 PetscInt *rcount,*ranks;
1003 PetscInt i, irank = -1,orank = -1;
1004
1005 PetscFunctionBegin;
1006 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1007 PetscSFCheckGraphSet(sf,1);
1008 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
1009 ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1010 for (i=0; i<sf->nleaves; i++) {
1011 /* Log 1-based rank */
1012 ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1013 }
1014 ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1015 ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1016 ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1017 ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1018 for (i=0; i<sf->nranks; i++) {
1019 ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1020 ranks[i]--; /* Convert back to 0-based */
1021 }
1022 ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1023
1024 /* We expect that dgroup is reliably "small" while nranks could be large */
1025 {
1026 MPI_Group group = MPI_GROUP_NULL;
1027 PetscMPIInt *dgroupranks;
1028 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1029 ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
1030 ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1031 ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1032 for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1033 if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
1034 ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1035 ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1036 }
1037
1038 /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1039 for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1040 for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1041 if (InList(ranks[i],groupsize,groupranks)) break;
1042 }
1043 for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1044 if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1045 }
1046 if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1047 PetscInt tmprank,tmpcount;
1048
1049 tmprank = ranks[i];
1050 tmpcount = rcount[i];
1051 ranks[i] = ranks[sf->ndranks];
1052 rcount[i] = rcount[sf->ndranks];
1053 ranks[sf->ndranks] = tmprank;
1054 rcount[sf->ndranks] = tmpcount;
1055 sf->ndranks++;
1056 }
1057 }
1058 ierr = PetscFree(groupranks);CHKERRQ(ierr);
1059 ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1060 ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1061 sf->roffset[0] = 0;
1062 for (i=0; i<sf->nranks; i++) {
1063 ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1064 sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1065 rcount[i] = 0;
1066 }
1067 for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1068 /* short circuit */
1069 if (orank != sf->remote[i].rank) {
1070 /* Search for index of iremote[i].rank in sf->ranks */
1071 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1072 if (irank < 0) {
1073 ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1074 if (irank >= 0) irank += sf->ndranks;
1075 }
1076 orank = sf->remote[i].rank;
1077 }
1078 if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1079 sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1080 sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1081 rcount[irank]++;
1082 }
1083 ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1084 PetscFunctionReturn(0);
1085 }
1086
1087 /*@C
1088 PetscSFGetGroups - gets incoming and outgoing process groups
1089
1090 Collective
1091
1092 Input Argument:
1093 . sf - star forest
1094
1095 Output Arguments:
1096 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1097 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1098
1099 Level: developer
1100
1101 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1102 @*/
PetscSFGetGroups(PetscSF sf,MPI_Group * incoming,MPI_Group * outgoing)1103 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1104 {
1105 PetscErrorCode ierr;
1106 MPI_Group group = MPI_GROUP_NULL;
1107
1108 PetscFunctionBegin;
1109 if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1110 if (sf->ingroup == MPI_GROUP_NULL) {
1111 PetscInt i;
1112 const PetscInt *indegree;
1113 PetscMPIInt rank,*outranks,*inranks;
1114 PetscSFNode *remote;
1115 PetscSF bgcount;
1116
1117 /* Compute the number of incoming ranks */
1118 ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1119 for (i=0; i<sf->nranks; i++) {
1120 remote[i].rank = sf->ranks[i];
1121 remote[i].index = 0;
1122 }
1123 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1124 ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1125 ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1126 ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1127 /* Enumerate the incoming ranks */
1128 ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1129 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1130 for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1131 ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1132 ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1133 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1134 ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1135 ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1136 ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1137 ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1138 }
1139 *incoming = sf->ingroup;
1140
1141 if (sf->outgroup == MPI_GROUP_NULL) {
1142 ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1143 ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1144 ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1145 }
1146 *outgoing = sf->outgroup;
1147 PetscFunctionReturn(0);
1148 }
1149
1150 /*@
1151 PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1152
1153 Collective
1154
1155 Input Argument:
1156 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1157
1158 Output Arguments:
1159 . multi - star forest with split roots, such that each root has degree exactly 1
1160
1161 Level: developer
1162
1163 Notes:
1164
1165 In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1166 directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1167 edge, it is a candidate for future optimization that might involve its removal.
1168
1169 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1170 @*/
PetscSFGetMultiSF(PetscSF sf,PetscSF * multi)1171 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1172 {
1173 PetscErrorCode ierr;
1174
1175 PetscFunctionBegin;
1176 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1177 PetscValidPointer(multi,2);
1178 if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1179 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1180 *multi = sf->multi;
1181 PetscFunctionReturn(0);
1182 }
1183 if (!sf->multi) {
1184 const PetscInt *indegree;
1185 PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1186 PetscSFNode *remote;
1187 maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1188 ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1189 ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1190 ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1191 inoffset[0] = 0;
1192 for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1193 for (i=0; i<maxlocal; i++) outones[i] = 1;
1194 ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1195 ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1196 for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1197 if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1198 for (i=0; i<sf->nroots; i++) {
1199 if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1200 }
1201 }
1202 ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1203 for (i=0; i<sf->nleaves; i++) {
1204 remote[i].rank = sf->remote[i].rank;
1205 remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1206 }
1207 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1208 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1209 if (sf->rankorder) { /* Sort the ranks */
1210 PetscMPIInt rank;
1211 PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1212 PetscSFNode *newremote;
1213 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1214 for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1215 ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1216 for (i=0; i<maxlocal; i++) outranks[i] = rank;
1217 ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1218 ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1219 /* Sort the incoming ranks at each vertex, build the inverse map */
1220 for (i=0; i<sf->nroots; i++) {
1221 PetscInt j;
1222 for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1223 ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1224 for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1225 }
1226 ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1227 ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1228 ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1229 for (i=0; i<sf->nleaves; i++) {
1230 newremote[i].rank = sf->remote[i].rank;
1231 newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1232 }
1233 ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1234 ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1235 }
1236 ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1237 }
1238 *multi = sf->multi;
1239 PetscFunctionReturn(0);
1240 }
1241
1242 /*@C
1243 PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1244
1245 Collective
1246
1247 Input Arguments:
1248 + sf - original star forest
1249 . nselected - number of selected roots on this process
1250 - selected - indices of the selected roots on this process
1251
1252 Output Arguments:
1253 . esf - new star forest
1254
1255 Level: advanced
1256
1257 Note:
1258 To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1259 be done by calling PetscSFGetGraph().
1260
1261 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1262 @*/
PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt * selected,PetscSF * esf)1263 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1264 {
1265 PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1266 const PetscInt *ilocal;
1267 signed char *rootdata,*leafdata,*leafmem;
1268 const PetscSFNode *iremote;
1269 PetscSFNode *new_iremote;
1270 MPI_Comm comm;
1271 PetscErrorCode ierr;
1272
1273 PetscFunctionBegin;
1274 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1275 PetscSFCheckGraphSet(sf,1);
1276 if (nselected) PetscValidPointer(selected,3);
1277 PetscValidPointer(esf,4);
1278
1279 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1280 ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1281 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1282 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1283
1284 if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1285 PetscBool dups;
1286 ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1287 if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1288 for (i=0; i<nselected; i++)
1289 if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1290 }
1291
1292 if (sf->ops->CreateEmbeddedSF) {
1293 ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1294 } else {
1295 /* A generic version of creating embedded sf */
1296 ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1297 maxlocal = maxleaf - minleaf + 1;
1298 ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1299 leafdata = leafmem - minleaf;
1300 /* Tag selected roots and bcast to leaves */
1301 for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1302 ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1303 ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1304
1305 /* Build esf with leaves that are still connected */
1306 esf_nleaves = 0;
1307 for (i=0; i<nleaves; i++) {
1308 j = ilocal ? ilocal[i] : i;
1309 /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1310 with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1311 */
1312 esf_nleaves += (leafdata[j] ? 1 : 0);
1313 }
1314 ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1315 ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1316 for (i=n=0; i<nleaves; i++) {
1317 j = ilocal ? ilocal[i] : i;
1318 if (leafdata[j]) {
1319 new_ilocal[n] = j;
1320 new_iremote[n].rank = iremote[i].rank;
1321 new_iremote[n].index = iremote[i].index;
1322 ++n;
1323 }
1324 }
1325 ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1326 ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1327 ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1328 ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1329 }
1330 ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1331 PetscFunctionReturn(0);
1332 }
1333
1334 /*@C
1335 PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1336
1337 Collective
1338
1339 Input Arguments:
1340 + sf - original star forest
1341 . nselected - number of selected leaves on this process
1342 - selected - indices of the selected leaves on this process
1343
1344 Output Arguments:
1345 . newsf - new star forest
1346
1347 Level: advanced
1348
1349 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1350 @*/
PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt * selected,PetscSF * newsf)1351 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1352 {
1353 const PetscSFNode *iremote;
1354 PetscSFNode *new_iremote;
1355 const PetscInt *ilocal;
1356 PetscInt i,nroots,*leaves,*new_ilocal;
1357 MPI_Comm comm;
1358 PetscErrorCode ierr;
1359
1360 PetscFunctionBegin;
1361 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1362 PetscSFCheckGraphSet(sf,1);
1363 if (nselected) PetscValidPointer(selected,3);
1364 PetscValidPointer(newsf,4);
1365
1366 /* Uniq selected[] and put results in leaves[] */
1367 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1368 ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1369 ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1370 ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1371 if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1372
1373 /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1374 if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1375 ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1376 } else {
1377 ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1378 ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1379 ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1380 for (i=0; i<nselected; ++i) {
1381 const PetscInt l = leaves[i];
1382 new_ilocal[i] = ilocal ? ilocal[l] : l;
1383 new_iremote[i].rank = iremote[l].rank;
1384 new_iremote[i].index = iremote[l].index;
1385 }
1386 ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1387 ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1388 }
1389 ierr = PetscFree(leaves);CHKERRQ(ierr);
1390 PetscFunctionReturn(0);
1391 }
1392
1393 /*@C
1394 PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1395
1396 Collective on PetscSF
1397
1398 Input Arguments:
1399 + sf - star forest on which to communicate
1400 . unit - data type associated with each node
1401 . rootdata - buffer to broadcast
1402 - op - operation to use for reduction
1403
1404 Output Arguments:
1405 . leafdata - buffer to be reduced with values from each leaf's respective root
1406
1407 Level: intermediate
1408
1409 Notes:
1410 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1411 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1412 use PetscSFBcastAndOpWithMemTypeBegin() instead.
1413 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin()
1414 @*/
PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata,MPI_Op op)1415 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1416 {
1417 PetscErrorCode ierr;
1418 PetscMemType rootmtype,leafmtype;
1419
1420 PetscFunctionBegin;
1421 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1422 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1423 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1424 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1425 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1426 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1427 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1428 PetscFunctionReturn(0);
1429 }
1430
1431 /*@C
1432 PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd()
1433
1434 Collective on PetscSF
1435
1436 Input Arguments:
1437 + sf - star forest on which to communicate
1438 . unit - data type associated with each node
1439 . rootmtype - memory type of rootdata
1440 . rootdata - buffer to broadcast
1441 . leafmtype - memory type of leafdata
1442 - op - operation to use for reduction
1443
1444 Output Arguments:
1445 . leafdata - buffer to be reduced with values from each leaf's respective root
1446
1447 Level: intermediate
1448
1449 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin()
1450 @*/
PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void * rootdata,PetscMemType leafmtype,void * leafdata,MPI_Op op)1451 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1452 {
1453 PetscErrorCode ierr;
1454
1455 PetscFunctionBegin;
1456 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1457 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1458 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1459 ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1460 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1461 PetscFunctionReturn(0);
1462 }
1463
1464 /*@C
1465 PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1466
1467 Collective
1468
1469 Input Arguments:
1470 + sf - star forest
1471 . unit - data type
1472 . rootdata - buffer to broadcast
1473 - op - operation to use for reduction
1474
1475 Output Arguments:
1476 . leafdata - buffer to be reduced with values from each leaf's respective root
1477
1478 Level: intermediate
1479
1480 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1481 @*/
PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata,MPI_Op op)1482 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1483 {
1484 PetscErrorCode ierr;
1485
1486 PetscFunctionBegin;
1487 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1488 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1489 ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1490 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1491 PetscFunctionReturn(0);
1492 }
1493
1494 /*@C
1495 PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1496
1497 Collective
1498
1499 Input Arguments:
1500 + sf - star forest
1501 . unit - data type
1502 . leafdata - values to reduce
1503 - op - reduction operation
1504
1505 Output Arguments:
1506 . rootdata - result of reduction of values from all leaves of each root
1507
1508 Level: intermediate
1509
1510 Notes:
1511 When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1512 are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1513 use PetscSFReduceWithMemTypeBegin() instead.
1514
1515 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1516 @*/
PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * rootdata,MPI_Op op)1517 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1518 {
1519 PetscErrorCode ierr;
1520 PetscMemType rootmtype,leafmtype;
1521
1522 PetscFunctionBegin;
1523 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1524 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1525 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1526 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1527 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1528 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1529 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1530 PetscFunctionReturn(0);
1531 }
1532
1533 /*@C
1534 PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1535
1536 Collective
1537
1538 Input Arguments:
1539 + sf - star forest
1540 . unit - data type
1541 . leafmtype - memory type of leafdata
1542 . leafdata - values to reduce
1543 . rootmtype - memory type of rootdata
1544 - op - reduction operation
1545
1546 Output Arguments:
1547 . rootdata - result of reduction of values from all leaves of each root
1548
1549 Level: intermediate
1550
1551 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1552 @*/
PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void * leafdata,PetscMemType rootmtype,void * rootdata,MPI_Op op)1553 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1554 {
1555 PetscErrorCode ierr;
1556
1557 PetscFunctionBegin;
1558 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1559 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1560 ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1561 ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1562 ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1563 PetscFunctionReturn(0);
1564 }
1565
1566 /*@C
1567 PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1568
1569 Collective
1570
1571 Input Arguments:
1572 + sf - star forest
1573 . unit - data type
1574 . leafdata - values to reduce
1575 - op - reduction operation
1576
1577 Output Arguments:
1578 . rootdata - result of reduction of values from all leaves of each root
1579
1580 Level: intermediate
1581
1582 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1583 @*/
PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * rootdata,MPI_Op op)1584 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1585 {
1586 PetscErrorCode ierr;
1587
1588 PetscFunctionBegin;
1589 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1590 ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1591 ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1592 ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1593 PetscFunctionReturn(0);
1594 }
1595
1596 /*@C
1597 PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1598
1599 Collective
1600
1601 Input Arguments:
1602 + sf - star forest
1603 . unit - data type
1604 . leafdata - leaf values to use in reduction
1605 - op - operation to use for reduction
1606
1607 Output Arguments:
1608 + rootdata - root values to be updated, input state is seen by first process to perform an update
1609 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1610
1611 Level: advanced
1612
1613 Note:
1614 The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1615 might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1616 not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1617 integers.
1618
1619 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1620 @*/
PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void * rootdata,const void * leafdata,void * leafupdate,MPI_Op op)1621 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1622 {
1623 PetscErrorCode ierr;
1624 PetscMemType rootmtype,leafmtype,leafupdatemtype;
1625
1626 PetscFunctionBegin;
1627 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1628 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1629 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1630 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1631 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1632 ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1633 if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1634 ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1635 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1636 PetscFunctionReturn(0);
1637 }
1638
1639 /*@C
1640 PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1641
1642 Collective
1643
1644 Input Arguments:
1645 + sf - star forest
1646 . unit - data type
1647 . leafdata - leaf values to use in reduction
1648 - op - operation to use for reduction
1649
1650 Output Arguments:
1651 + rootdata - root values to be updated, input state is seen by first process to perform an update
1652 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1653
1654 Level: advanced
1655
1656 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1657 @*/
PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void * rootdata,const void * leafdata,void * leafupdate,MPI_Op op)1658 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1659 {
1660 PetscErrorCode ierr;
1661
1662 PetscFunctionBegin;
1663 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1664 ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1665 ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1666 ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1667 PetscFunctionReturn(0);
1668 }
1669
1670 /*@C
1671 PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1672
1673 Collective
1674
1675 Input Arguments:
1676 . sf - star forest
1677
1678 Output Arguments:
1679 . degree - degree of each root vertex
1680
1681 Level: advanced
1682
1683 Notes:
1684 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1685
1686 .seealso: PetscSFGatherBegin()
1687 @*/
PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt ** degree)1688 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1689 {
1690 PetscErrorCode ierr;
1691
1692 PetscFunctionBegin;
1693 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1694 PetscSFCheckGraphSet(sf,1);
1695 PetscValidPointer(degree,2);
1696 if (!sf->degreeknown) {
1697 PetscInt i, nroots = sf->nroots, maxlocal;
1698 if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1699 maxlocal = sf->maxleaf-sf->minleaf+1;
1700 ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1701 ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1702 for (i=0; i<nroots; i++) sf->degree[i] = 0;
1703 for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1704 ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1705 }
1706 *degree = NULL;
1707 PetscFunctionReturn(0);
1708 }
1709
1710 /*@C
1711 PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1712
1713 Collective
1714
1715 Input Arguments:
1716 . sf - star forest
1717
1718 Output Arguments:
1719 . degree - degree of each root vertex
1720
1721 Level: developer
1722
1723 Notes:
1724 The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1725
1726 .seealso:
1727 @*/
PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt ** degree)1728 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1729 {
1730 PetscErrorCode ierr;
1731
1732 PetscFunctionBegin;
1733 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1734 PetscSFCheckGraphSet(sf,1);
1735 PetscValidPointer(degree,2);
1736 if (!sf->degreeknown) {
1737 if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1738 ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1739 ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1740 sf->degreeknown = PETSC_TRUE;
1741 }
1742 *degree = sf->degree;
1743 PetscFunctionReturn(0);
1744 }
1745
1746
1747 /*@C
1748 PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1749 Each multi-root is assigned index of the corresponding original root.
1750
1751 Collective
1752
1753 Input Arguments:
1754 + sf - star forest
1755 - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1756
1757 Output Arguments:
1758 + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1759 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1760
1761 Level: developer
1762
1763 Notes:
1764 The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1765
1766 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1767 @*/
PetscSFComputeMultiRootOriginalNumbering(PetscSF sf,const PetscInt degree[],PetscInt * nMultiRoots,PetscInt * multiRootsOrigNumbering[])1768 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1769 {
1770 PetscSF msf;
1771 PetscInt i, j, k, nroots, nmroots;
1772 PetscErrorCode ierr;
1773
1774 PetscFunctionBegin;
1775 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1776 ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1777 if (nroots) PetscValidIntPointer(degree,2);
1778 if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1779 PetscValidPointer(multiRootsOrigNumbering,4);
1780 ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1781 ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1782 ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1783 for (i=0,j=0,k=0; i<nroots; i++) {
1784 if (!degree[i]) continue;
1785 for (j=0; j<degree[i]; j++,k++) {
1786 (*multiRootsOrigNumbering)[k] = i;
1787 }
1788 }
1789 if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1790 if (nMultiRoots) *nMultiRoots = nmroots;
1791 PetscFunctionReturn(0);
1792 }
1793
1794 /*@C
1795 PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1796
1797 Collective
1798
1799 Input Arguments:
1800 + sf - star forest
1801 . unit - data type
1802 - leafdata - leaf data to gather to roots
1803
1804 Output Argument:
1805 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1806
1807 Level: intermediate
1808
1809 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1810 @*/
PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * multirootdata)1811 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1812 {
1813 PetscErrorCode ierr;
1814 PetscSF multi = NULL;
1815
1816 PetscFunctionBegin;
1817 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1818 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1819 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1820 ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1821 PetscFunctionReturn(0);
1822 }
1823
1824 /*@C
1825 PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1826
1827 Collective
1828
1829 Input Arguments:
1830 + sf - star forest
1831 . unit - data type
1832 - leafdata - leaf data to gather to roots
1833
1834 Output Argument:
1835 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1836
1837 Level: intermediate
1838
1839 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1840 @*/
PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * multirootdata)1841 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1842 {
1843 PetscErrorCode ierr;
1844 PetscSF multi = NULL;
1845
1846 PetscFunctionBegin;
1847 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1848 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1849 ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1850 PetscFunctionReturn(0);
1851 }
1852
1853 /*@C
1854 PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1855
1856 Collective
1857
1858 Input Arguments:
1859 + sf - star forest
1860 . unit - data type
1861 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1862
1863 Output Argument:
1864 . leafdata - leaf data to be update with personal data from each respective root
1865
1866 Level: intermediate
1867
1868 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1869 @*/
PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void * multirootdata,void * leafdata)1870 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1871 {
1872 PetscErrorCode ierr;
1873 PetscSF multi = NULL;
1874
1875 PetscFunctionBegin;
1876 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1877 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1878 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1879 ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1880 PetscFunctionReturn(0);
1881 }
1882
1883 /*@C
1884 PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1885
1886 Collective
1887
1888 Input Arguments:
1889 + sf - star forest
1890 . unit - data type
1891 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1892
1893 Output Argument:
1894 . leafdata - leaf data to be update with personal data from each respective root
1895
1896 Level: intermediate
1897
1898 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1899 @*/
PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void * multirootdata,void * leafdata)1900 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1901 {
1902 PetscErrorCode ierr;
1903 PetscSF multi = NULL;
1904
1905 PetscFunctionBegin;
1906 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1907 ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1908 ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1909 PetscFunctionReturn(0);
1910 }
1911
PetscSFCheckLeavesUnique_Private(PetscSF sf)1912 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1913 {
1914 PetscInt i, n, nleaves;
1915 const PetscInt *ilocal = NULL;
1916 PetscHSetI seen;
1917 PetscErrorCode ierr;
1918
1919 PetscFunctionBegin;
1920 if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
1921 ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1922 ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1923 for (i = 0; i < nleaves; i++) {
1924 const PetscInt leaf = ilocal ? ilocal[i] : i;
1925 ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1926 }
1927 ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1928 if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1929 ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1930 PetscFunctionReturn(0);
1931 }
1932
1933 /*@
1934 PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1935
1936 Input Parameters:
1937 + sfA - The first PetscSF
1938 - sfB - The second PetscSF
1939
1940 Output Parameters:
1941 . sfBA - The composite SF
1942
1943 Level: developer
1944
1945 Notes:
1946 Currently, the two SFs must be defined on congruent communicators and they must be true star
1947 forests, i.e. the same leaf is not connected with different roots.
1948
1949 sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1950 a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1951 nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1952 Bcast on sfA, then a Bcast on sfB, on connected nodes.
1953
1954 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1955 @*/
PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF * sfBA)1956 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1957 {
1958 PetscErrorCode ierr;
1959 const PetscSFNode *remotePointsA,*remotePointsB;
1960 PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1961 const PetscInt *localPointsA,*localPointsB;
1962 PetscInt *localPointsBA;
1963 PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1964 PetscBool denseB;
1965
1966 PetscFunctionBegin;
1967 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1968 PetscSFCheckGraphSet(sfA,1);
1969 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1970 PetscSFCheckGraphSet(sfB,2);
1971 PetscCheckSameComm(sfA,1,sfB,2);
1972 PetscValidPointer(sfBA,3);
1973 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1974 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1975
1976 ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1977 ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1978 if (localPointsA) {
1979 ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1980 for (i=0; i<numRootsB; i++) {
1981 reorderedRemotePointsA[i].rank = -1;
1982 reorderedRemotePointsA[i].index = -1;
1983 }
1984 for (i=0; i<numLeavesA; i++) {
1985 if (localPointsA[i] >= numRootsB) continue;
1986 reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1987 }
1988 remotePointsA = reorderedRemotePointsA;
1989 }
1990 ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1991 ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1992 ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1993 ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1994 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1995
1996 denseB = (PetscBool)!localPointsB;
1997 for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1998 if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1999 else numLeavesBA++;
2000 }
2001 if (denseB) {
2002 localPointsBA = NULL;
2003 remotePointsBA = leafdataB;
2004 } else {
2005 ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2006 ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2007 for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2008 const PetscInt l = localPointsB ? localPointsB[i] : i;
2009
2010 if (leafdataB[l-minleaf].rank == -1) continue;
2011 remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2012 localPointsBA[numLeavesBA] = l;
2013 numLeavesBA++;
2014 }
2015 ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2016 }
2017 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2018 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2019 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2020 PetscFunctionReturn(0);
2021 }
2022
2023 /*@
2024 PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2025
2026 Input Parameters:
2027 + sfA - The first PetscSF
2028 - sfB - The second PetscSF
2029
2030 Output Parameters:
2031 . sfBA - The composite SF.
2032
2033 Level: developer
2034
2035 Notes:
2036 Currently, the two SFs must be defined on congruent communicators and they must be true star
2037 forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2038 second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2039
2040 sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2041 a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2042 roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2043 a Reduce on sfB, on connected roots.
2044
2045 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2046 @*/
PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF * sfBA)2047 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2048 {
2049 PetscErrorCode ierr;
2050 const PetscSFNode *remotePointsA,*remotePointsB;
2051 PetscSFNode *remotePointsBA;
2052 const PetscInt *localPointsA,*localPointsB;
2053 PetscSFNode *reorderedRemotePointsA = NULL;
2054 PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2055 MPI_Op op;
2056 #if defined(PETSC_USE_64BIT_INDICES)
2057 PetscBool iswin;
2058 #endif
2059
2060 PetscFunctionBegin;
2061 PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2062 PetscSFCheckGraphSet(sfA,1);
2063 PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2064 PetscSFCheckGraphSet(sfB,2);
2065 PetscCheckSameComm(sfA,1,sfB,2);
2066 PetscValidPointer(sfBA,3);
2067 ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2068 ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2069
2070 ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2071 ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2072
2073 /* TODO: Check roots of sfB have degree of 1 */
2074 /* Once we implement it, we can replace the MPI_MAXLOC
2075 with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
2076 We use MPI_MAXLOC only to have a deterministic output from this routine if
2077 the root condition is not meet.
2078 */
2079 op = MPI_MAXLOC;
2080 #if defined(PETSC_USE_64BIT_INDICES)
2081 /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2082 ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2083 if (iswin) op = MPIU_REPLACE;
2084 #endif
2085
2086 ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2087 ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2088 for (i=0; i<maxleaf - minleaf + 1; i++) {
2089 reorderedRemotePointsA[i].rank = -1;
2090 reorderedRemotePointsA[i].index = -1;
2091 }
2092 if (localPointsA) {
2093 for (i=0; i<numLeavesA; i++) {
2094 if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2095 reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2096 }
2097 } else {
2098 for (i=0; i<numLeavesA; i++) {
2099 if (i > maxleaf || i < minleaf) continue;
2100 reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2101 }
2102 }
2103
2104 ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2105 ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2106 for (i=0; i<numRootsB; i++) {
2107 remotePointsBA[i].rank = -1;
2108 remotePointsBA[i].index = -1;
2109 }
2110
2111 ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2112 ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2113 ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2114 for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2115 if (remotePointsBA[i].rank == -1) continue;
2116 remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2117 remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2118 localPointsBA[numLeavesBA] = i;
2119 numLeavesBA++;
2120 }
2121 ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2122 ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2123 ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2124 PetscFunctionReturn(0);
2125 }
2126
2127 /*
2128 PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2129
2130 Input Parameters:
2131 . sf - The global PetscSF
2132
2133 Output Parameters:
2134 . out - The local PetscSF
2135 */
PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF * out)2136 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2137 {
2138 MPI_Comm comm;
2139 PetscMPIInt myrank;
2140 const PetscInt *ilocal;
2141 const PetscSFNode *iremote;
2142 PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
2143 PetscSFNode *liremote;
2144 PetscSF lsf;
2145 PetscErrorCode ierr;
2146
2147 PetscFunctionBegin;
2148 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2149 if (sf->ops->CreateLocalSF) {
2150 ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2151 } else {
2152 /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2153 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2154 ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
2155
2156 /* Find out local edges and build a local SF */
2157 ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2158 for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2159 ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2160 ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2161
2162 for (i=j=0; i<nleaves; i++) {
2163 if (iremote[i].rank == (PetscInt)myrank) {
2164 lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2165 liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2166 liremote[j].index = iremote[i].index;
2167 j++;
2168 }
2169 }
2170 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2171 ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2172 ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2173 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2174 *out = lsf;
2175 }
2176 PetscFunctionReturn(0);
2177 }
2178
2179 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata)2180 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2181 {
2182 PetscErrorCode ierr;
2183 PetscMemType rootmtype,leafmtype;
2184
2185 PetscFunctionBegin;
2186 PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2187 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2188 ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2189 ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2190 ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2191 if (sf->ops->BcastToZero) {
2192 ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2193 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2194 ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2195 PetscFunctionReturn(0);
2196 }
2197
2198