1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_HAVE_CUDA)
6   #include <cuda_runtime.h>
7 #endif
8 
9 #if defined(PETSC_HAVE_HIP)
10   #include <hip/hip_runtime.h>
11 #endif
12 
13 #if defined(PETSC_USE_DEBUG)
14 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
15     if (PetscUnlikely(!(sf)->graphset))                              \
16       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17   } while (0)
18 #else
19 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20 #endif
21 
22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
23 
PetscGetMemType(const void * data,PetscMemType * type)24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25 {
26   PetscFunctionBegin;
27   PetscValidPointer(type,2);
28   *type = PETSC_MEMTYPE_HOST;
29 #if defined(PETSC_HAVE_CUDA)
30   if (PetscCUDAInitialized && data) {
31     cudaError_t                  cerr;
32     struct cudaPointerAttributes attr;
33     enum cudaMemoryType          mtype;
34     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35     cudaGetLastError(); /* Reset the last error */
36     #if (CUDART_VERSION < 10000)
37       mtype = attr.memoryType;
38     #else
39       mtype = attr.type;
40     #endif
41     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42   }
43 #endif
44 
45 #if defined(PETSC_HAVE_HIP)
46   if (PetscHIPInitialized && data) {
47     hipError_t                   cerr;
48     struct hipPointerAttribute_t attr;
49     enum hipMemoryType           mtype;
50     cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
51     hipGetLastError(); /* Reset the last error */
52     mtype = attr.memoryType;
53     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54   }
55 #endif
56   PetscFunctionReturn(0);
57 }
58 
59 /*@
60    PetscSFCreate - create a star forest communication context
61 
62    Collective
63 
64    Input Arguments:
65 .  comm - communicator on which the star forest will operate
66 
67    Output Arguments:
68 .  sf - new star forest context
69 
70    Options Database Keys:
71 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
72 .  -sf_type window    -Use MPI-3 one-sided window for communication
73 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
74 
75    Level: intermediate
76 
77    Notes:
78    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80    SFs are optimized and they have better performance than general SFs.
81 
82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83 @*/
PetscSFCreate(MPI_Comm comm,PetscSF * sf)84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85 {
86   PetscErrorCode ierr;
87   PetscSF        b;
88 
89   PetscFunctionBegin;
90   PetscValidPointer(sf,2);
91   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
92 
93   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
94 
95   b->nroots    = -1;
96   b->nleaves   = -1;
97   b->minleaf   = PETSC_MAX_INT;
98   b->maxleaf   = PETSC_MIN_INT;
99   b->nranks    = -1;
100   b->rankorder = PETSC_TRUE;
101   b->ingroup   = MPI_GROUP_NULL;
102   b->outgroup  = MPI_GROUP_NULL;
103   b->graphset  = PETSC_FALSE;
104 #if defined(PETSC_HAVE_DEVICE)
105   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
106   b->use_stream_aware_mpi = PETSC_FALSE;
107   b->use_default_stream   = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
108   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109     b->backend = PETSCSF_BACKEND_KOKKOS;
110   #elif defined(PETSC_HAVE_CUDA)
111     b->backend = PETSCSF_BACKEND_CUDA;
112   #endif
113 #endif
114   *sf = b;
115   PetscFunctionReturn(0);
116 }
117 
118 /*@
119    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
120 
121    Collective
122 
123    Input Arguments:
124 .  sf - star forest
125 
126    Level: advanced
127 
128 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
129 @*/
PetscSFReset(PetscSF sf)130 PetscErrorCode PetscSFReset(PetscSF sf)
131 {
132   PetscErrorCode ierr;
133 
134   PetscFunctionBegin;
135   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
136   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
137   sf->nroots   = -1;
138   sf->nleaves  = -1;
139   sf->minleaf  = PETSC_MAX_INT;
140   sf->maxleaf  = PETSC_MIN_INT;
141   sf->mine     = NULL;
142   sf->remote   = NULL;
143   sf->graphset = PETSC_FALSE;
144   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
145   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
146   sf->nranks = -1;
147   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
148 #if defined(PETSC_HAVE_DEVICE)
149   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
150 #endif
151   sf->degreeknown = PETSC_FALSE;
152   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
153   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
154   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
155   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
156   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
157   sf->setupcalled = PETSC_FALSE;
158   PetscFunctionReturn(0);
159 }
160 
161 /*@C
162    PetscSFSetType - Set the PetscSF communication implementation
163 
164    Collective on PetscSF
165 
166    Input Parameters:
167 +  sf - the PetscSF context
168 -  type - a known method
169 
170    Options Database Key:
171 .  -sf_type <type> - Sets the method; use -help for a list
172    of available methods (for instance, window, basic, neighbor)
173 
174    Notes:
175    See "include/petscsf.h" for available methods (for instance)
176 +    PETSCSFWINDOW - MPI-2/3 one-sided
177 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
178 
179   Level: intermediate
180 
181 .seealso: PetscSFType, PetscSFCreate()
182 @*/
PetscSFSetType(PetscSF sf,PetscSFType type)183 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
184 {
185   PetscErrorCode ierr,(*r)(PetscSF);
186   PetscBool      match;
187 
188   PetscFunctionBegin;
189   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
190   PetscValidCharPointer(type,2);
191 
192   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
193   if (match) PetscFunctionReturn(0);
194 
195   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
196   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
197   /* Destroy the previous PetscSF implementation context */
198   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
199   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
200   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
201   ierr = (*r)(sf);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206   PetscSFGetType - Get the PetscSF communication implementation
207 
208   Not Collective
209 
210   Input Parameter:
211 . sf  - the PetscSF context
212 
213   Output Parameter:
214 . type - the PetscSF type name
215 
216   Level: intermediate
217 
218 .seealso: PetscSFSetType(), PetscSFCreate()
219 @*/
PetscSFGetType(PetscSF sf,PetscSFType * type)220 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
221 {
222   PetscFunctionBegin;
223   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
224   PetscValidPointer(type,2);
225   *type = ((PetscObject)sf)->type_name;
226   PetscFunctionReturn(0);
227 }
228 
229 /*@
230    PetscSFDestroy - destroy star forest
231 
232    Collective
233 
234    Input Arguments:
235 .  sf - address of star forest
236 
237    Level: intermediate
238 
239 .seealso: PetscSFCreate(), PetscSFReset()
240 @*/
PetscSFDestroy(PetscSF * sf)241 PetscErrorCode PetscSFDestroy(PetscSF *sf)
242 {
243   PetscErrorCode ierr;
244 
245   PetscFunctionBegin;
246   if (!*sf) PetscFunctionReturn(0);
247   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
248   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
249   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
250   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
251   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
PetscSFCheckGraphValid_Private(PetscSF sf)255 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
256 {
257   PetscInt           i, nleaves;
258   PetscMPIInt        size;
259   const PetscInt    *ilocal;
260   const PetscSFNode *iremote;
261   PetscErrorCode     ierr;
262 
263   PetscFunctionBegin;
264   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
265   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
266   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
267   for (i = 0; i < nleaves; i++) {
268     const PetscInt rank = iremote[i].rank;
269     const PetscInt remote = iremote[i].index;
270     const PetscInt leaf = ilocal ? ilocal[i] : i;
271     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
272     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
273     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 
279 
280 
281 /*@
282    PetscSFSetUp - set up communication structures
283 
284    Collective
285 
286    Input Arguments:
287 .  sf - star forest communication object
288 
289    Level: beginner
290 
291 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
292 @*/
PetscSFSetUp(PetscSF sf)293 PetscErrorCode PetscSFSetUp(PetscSF sf)
294 {
295   PetscErrorCode ierr;
296 
297   PetscFunctionBegin;
298   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
299   PetscSFCheckGraphSet(sf,1);
300   if (sf->setupcalled) PetscFunctionReturn(0);
301   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
302   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
303   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
304   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
305 #if defined(PETSC_HAVE_CUDA)
306   if (sf->backend == PETSCSF_BACKEND_CUDA) {
307     sf->ops->Malloc = PetscSFMalloc_Cuda;
308     sf->ops->Free   = PetscSFFree_Cuda;
309   }
310 #endif
311 
312 #if defined(PETSC_HAVE_KOKKOS)
313   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
314     sf->ops->Malloc = PetscSFMalloc_Kokkos;
315     sf->ops->Free   = PetscSFFree_Kokkos;
316   }
317 #endif
318   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
319   sf->setupcalled = PETSC_TRUE;
320   PetscFunctionReturn(0);
321 }
322 
323 /*@
324    PetscSFSetFromOptions - set PetscSF options using the options database
325 
326    Logically Collective
327 
328    Input Arguments:
329 .  sf - star forest
330 
331    Options Database Keys:
332 +  -sf_type               - implementation type, see PetscSFSetType()
333 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
334 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
335                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
336                             If true, this option only works with -use_gpu_aware_mpi 1.
337 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
338                                If true, this option only works with -use_gpu_aware_mpi 1.
339 
340 -  -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos.
341                               On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices,
342                               the only available is kokkos.
343 
344    Level: intermediate
345 @*/
PetscSFSetFromOptions(PetscSF sf)346 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347 {
348   PetscSFType    deft;
349   char           type[256];
350   PetscErrorCode ierr;
351   PetscBool      flg;
352 
353   PetscFunctionBegin;
354   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
355   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
356   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
357   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
358   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
359   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
360 #if defined(PETSC_HAVE_DEVICE)
361   {
362     char        backendstr[32] = {0};
363     PetscBool   isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
364     /* Change the defaults set in PetscSFCreate() with command line options */
365     ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
366     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
367     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
368     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
369     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
370   #if defined(PETSC_HAVE_CUDA)
371     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr);
374   #elif defined(PETSC_HAVE_KOKKOS)
375     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376   #endif
377   }
378 #endif
379   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
380   ierr = PetscOptionsEnd();CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 
384 /*@
385    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
386 
387    Logically Collective
388 
389    Input Arguments:
390 +  sf - star forest
391 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
392 
393    Level: advanced
394 
395 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
396 @*/
PetscSFSetRankOrder(PetscSF sf,PetscBool flg)397 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
398 {
399   PetscFunctionBegin;
400   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
401   PetscValidLogicalCollectiveBool(sf,flg,2);
402   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
403   sf->rankorder = flg;
404   PetscFunctionReturn(0);
405 }
406 
407 /*@
408    PetscSFSetGraph - Set a parallel star forest
409 
410    Collective
411 
412    Input Arguments:
413 +  sf - star forest
414 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
415 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
416 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
417 during setup in debug mode)
418 .  localmode - copy mode for ilocal
419 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
420 during setup in debug mode)
421 -  remotemode - copy mode for iremote
422 
423    Level: intermediate
424 
425    Notes:
426     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
427 
428    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
429    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
430    needed
431 
432    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
433    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
434 
435 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
436 @*/
PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt * ilocal,PetscCopyMode localmode,const PetscSFNode * iremote,PetscCopyMode remotemode)437 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
438 {
439   PetscErrorCode ierr;
440 
441   PetscFunctionBegin;
442   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
443   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
444   if (nleaves > 0) PetscValidPointer(iremote,6);
445   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
446   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
447 
448   if (sf->nroots >= 0) { /* Reset only if graph already set */
449     ierr = PetscSFReset(sf);CHKERRQ(ierr);
450   }
451 
452   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
453 
454   sf->nroots  = nroots;
455   sf->nleaves = nleaves;
456 
457   if (nleaves && ilocal) {
458     PetscInt i;
459     PetscInt minleaf = PETSC_MAX_INT;
460     PetscInt maxleaf = PETSC_MIN_INT;
461     int      contiguous = 1;
462     for (i=0; i<nleaves; i++) {
463       minleaf = PetscMin(minleaf,ilocal[i]);
464       maxleaf = PetscMax(maxleaf,ilocal[i]);
465       contiguous &= (ilocal[i] == i);
466     }
467     sf->minleaf = minleaf;
468     sf->maxleaf = maxleaf;
469     if (contiguous) {
470       if (localmode == PETSC_OWN_POINTER) {
471         ierr = PetscFree(ilocal);CHKERRQ(ierr);
472       }
473       ilocal = NULL;
474     }
475   } else {
476     sf->minleaf = 0;
477     sf->maxleaf = nleaves - 1;
478   }
479 
480   if (ilocal) {
481     switch (localmode) {
482     case PETSC_COPY_VALUES:
483       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
484       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
485       sf->mine = sf->mine_alloc;
486       break;
487     case PETSC_OWN_POINTER:
488       sf->mine_alloc = (PetscInt*)ilocal;
489       sf->mine       = sf->mine_alloc;
490       break;
491     case PETSC_USE_POINTER:
492       sf->mine_alloc = NULL;
493       sf->mine       = (PetscInt*)ilocal;
494       break;
495     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
496     }
497   }
498 
499   switch (remotemode) {
500   case PETSC_COPY_VALUES:
501     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
502     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
503     sf->remote = sf->remote_alloc;
504     break;
505   case PETSC_OWN_POINTER:
506     sf->remote_alloc = (PetscSFNode*)iremote;
507     sf->remote       = sf->remote_alloc;
508     break;
509   case PETSC_USE_POINTER:
510     sf->remote_alloc = NULL;
511     sf->remote       = (PetscSFNode*)iremote;
512     break;
513   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
514   }
515 
516   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
517   sf->graphset = PETSC_TRUE;
518   PetscFunctionReturn(0);
519 }
520 
521 /*@
522   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
523 
524   Collective
525 
526   Input Parameters:
527 + sf      - The PetscSF
528 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
529 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
530 
531   Notes:
532   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
533   n and N are local and global sizes of x respectively.
534 
535   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
536   sequential vectors y on all ranks.
537 
538   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
539   sequential vector y on rank 0.
540 
541   In above cases, entries of x are roots and entries of y are leaves.
542 
543   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
544   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
545   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
546   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
547   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
548 
549   In this case, roots and leaves are symmetric.
550 
551   Level: intermediate
552  @*/
PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)553 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
554 {
555   MPI_Comm       comm;
556   PetscInt       n,N,res[2];
557   PetscMPIInt    rank,size;
558   PetscSFType    type;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
563   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
564   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
565   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
566 
567   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
568     type = PETSCSFALLTOALL;
569     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
570     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
571     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
572     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
573   } else {
574     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
575     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
576     res[0] = n;
577     res[1] = -n;
578     /* Check if n are same over all ranks so that we can optimize it */
579     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
580     if (res[0] == -res[1]) { /* same n */
581       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
582     } else {
583       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
584     }
585     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
586   }
587   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
588 
589   sf->pattern = pattern;
590   sf->mine    = NULL; /* Contiguous */
591 
592   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
593      Also set other easy stuff.
594    */
595   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
596     sf->nleaves      = N;
597     sf->nroots       = n;
598     sf->nranks       = size;
599     sf->minleaf      = 0;
600     sf->maxleaf      = N - 1;
601   } else if (pattern == PETSCSF_PATTERN_GATHER) {
602     sf->nleaves      = rank ? 0 : N;
603     sf->nroots       = n;
604     sf->nranks       = rank ? 0 : size;
605     sf->minleaf      = 0;
606     sf->maxleaf      = rank ? -1 : N - 1;
607   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
608     sf->nleaves      = size;
609     sf->nroots       = size;
610     sf->nranks       = size;
611     sf->minleaf      = 0;
612     sf->maxleaf      = size - 1;
613   }
614   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
615   sf->graphset = PETSC_TRUE;
616   PetscFunctionReturn(0);
617 }
618 
619 /*@
620    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
621 
622    Collective
623 
624    Input Arguments:
625 
626 .  sf - star forest to invert
627 
628    Output Arguments:
629 .  isf - inverse of sf
630    Level: advanced
631 
632    Notes:
633    All roots must have degree 1.
634 
635    The local space may be a permutation, but cannot be sparse.
636 
637 .seealso: PetscSFSetGraph()
638 @*/
PetscSFCreateInverseSF(PetscSF sf,PetscSF * isf)639 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
640 {
641   PetscErrorCode ierr;
642   PetscMPIInt    rank;
643   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
644   const PetscInt *ilocal;
645   PetscSFNode    *roots,*leaves;
646 
647   PetscFunctionBegin;
648   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
649   PetscSFCheckGraphSet(sf,1);
650   PetscValidPointer(isf,2);
651 
652   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
653   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
654 
655   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
656   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
657   for (i=0; i<maxlocal; i++) {
658     leaves[i].rank  = rank;
659     leaves[i].index = i;
660   }
661   for (i=0; i <nroots; i++) {
662     roots[i].rank  = -1;
663     roots[i].index = -1;
664   }
665   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
666   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
667 
668   /* Check whether our leaves are sparse */
669   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
670   if (count == nroots) newilocal = NULL;
671   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
672     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
673     for (i=0,count=0; i<nroots; i++) {
674       if (roots[i].rank >= 0) {
675         newilocal[count]   = i;
676         roots[count].rank  = roots[i].rank;
677         roots[count].index = roots[i].index;
678         count++;
679       }
680     }
681   }
682 
683   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
684   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
685   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 /*@
690    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
691 
692    Collective
693 
694    Input Arguments:
695 +  sf - communication object to duplicate
696 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
697 
698    Output Arguments:
699 .  newsf - new communication object
700 
701    Level: beginner
702 
703 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
704 @*/
PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF * newsf)705 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
706 {
707   PetscSFType    type;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
712   PetscValidLogicalCollectiveEnum(sf,opt,2);
713   PetscValidPointer(newsf,3);
714   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
715   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
716   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
717   if (opt == PETSCSF_DUPLICATE_GRAPH) {
718     PetscSFCheckGraphSet(sf,1);
719     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
720       PetscInt          nroots,nleaves;
721       const PetscInt    *ilocal;
722       const PetscSFNode *iremote;
723       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
724       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
725     } else {
726       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
727     }
728   }
729 #if defined(PETSC_HAVE_DEVICE)
730   (*newsf)->backend              = sf->backend;
731   (*newsf)->use_default_stream   = sf->use_default_stream;
732   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
733   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
734 #endif
735   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
736   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
737   PetscFunctionReturn(0);
738 }
739 
740 /*@C
741    PetscSFGetGraph - Get the graph specifying a parallel star forest
742 
743    Not Collective
744 
745    Input Arguments:
746 .  sf - star forest
747 
748    Output Arguments:
749 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
750 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
751 .  ilocal - locations of leaves in leafdata buffers
752 -  iremote - remote locations of root vertices for each leaf on the current process
753 
754    Notes:
755    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
756 
757    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
758    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
759 
760    Level: intermediate
761 
762 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
763 @*/
PetscSFGetGraph(PetscSF sf,PetscInt * nroots,PetscInt * nleaves,const PetscInt ** ilocal,const PetscSFNode ** iremote)764 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
765 {
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
770   if (sf->ops->GetGraph) {
771     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
772   } else {
773     if (nroots) *nroots = sf->nroots;
774     if (nleaves) *nleaves = sf->nleaves;
775     if (ilocal) *ilocal = sf->mine;
776     if (iremote) *iremote = sf->remote;
777   }
778   PetscFunctionReturn(0);
779 }
780 
781 /*@
782    PetscSFGetLeafRange - Get the active leaf ranges
783 
784    Not Collective
785 
786    Input Arguments:
787 .  sf - star forest
788 
789    Output Arguments:
790 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
791 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
792 
793    Level: developer
794 
795 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
796 @*/
PetscSFGetLeafRange(PetscSF sf,PetscInt * minleaf,PetscInt * maxleaf)797 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
798 {
799   PetscFunctionBegin;
800   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
801   PetscSFCheckGraphSet(sf,1);
802   if (minleaf) *minleaf = sf->minleaf;
803   if (maxleaf) *maxleaf = sf->maxleaf;
804   PetscFunctionReturn(0);
805 }
806 
807 /*@C
808    PetscSFViewFromOptions - View from Options
809 
810    Collective on PetscSF
811 
812    Input Parameters:
813 +  A - the star forest
814 .  obj - Optional object
815 -  name - command line option
816 
817    Level: intermediate
818 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
819 @*/
PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])820 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
821 {
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
826   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 /*@C
831    PetscSFView - view a star forest
832 
833    Collective
834 
835    Input Arguments:
836 +  sf - star forest
837 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
838 
839    Level: beginner
840 
841 .seealso: PetscSFCreate(), PetscSFSetGraph()
842 @*/
PetscSFView(PetscSF sf,PetscViewer viewer)843 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
844 {
845   PetscErrorCode    ierr;
846   PetscBool         iascii;
847   PetscViewerFormat format;
848 
849   PetscFunctionBegin;
850   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
851   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
852   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
853   PetscCheckSameComm(sf,1,viewer,2);
854   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
855   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
856   if (iascii) {
857     PetscMPIInt rank;
858     PetscInt    ii,i,j;
859 
860     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
861     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
862     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
863     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
864       if (!sf->graphset) {
865         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
866         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
867         PetscFunctionReturn(0);
868       }
869       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
870       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
872       for (i=0; i<sf->nleaves; i++) {
873         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
874       }
875       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
876       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
877       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
878         PetscMPIInt *tmpranks,*perm;
879         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
880         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
881         for (i=0; i<sf->nranks; i++) perm[i] = i;
882         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
883         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
884         for (ii=0; ii<sf->nranks; ii++) {
885           i = perm[ii];
886           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
887           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
888             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
889           }
890         }
891         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
892       }
893       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
894       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
895     }
896     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 /*@C
902    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
903 
904    Not Collective
905 
906    Input Arguments:
907 .  sf - star forest
908 
909    Output Arguments:
910 +  nranks - number of ranks referenced by local part
911 .  ranks - array of ranks
912 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
913 .  rmine - concatenated array holding local indices referencing each remote rank
914 -  rremote - concatenated array holding remote indices referenced for each remote rank
915 
916    Level: developer
917 
918 .seealso: PetscSFGetLeafRanks()
919 @*/
PetscSFGetRootRanks(PetscSF sf,PetscInt * nranks,const PetscMPIInt ** ranks,const PetscInt ** roffset,const PetscInt ** rmine,const PetscInt ** rremote)920 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
921 {
922   PetscErrorCode ierr;
923 
924   PetscFunctionBegin;
925   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
926   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
927   if (sf->ops->GetRootRanks) {
928     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
929   } else {
930     /* The generic implementation */
931     if (nranks)  *nranks  = sf->nranks;
932     if (ranks)   *ranks   = sf->ranks;
933     if (roffset) *roffset = sf->roffset;
934     if (rmine)   *rmine   = sf->rmine;
935     if (rremote) *rremote = sf->rremote;
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 /*@C
941    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
942 
943    Not Collective
944 
945    Input Arguments:
946 .  sf - star forest
947 
948    Output Arguments:
949 +  niranks - number of leaf ranks referencing roots on this process
950 .  iranks - array of ranks
951 .  ioffset - offset in irootloc for each rank (length niranks+1)
952 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
953 
954    Level: developer
955 
956 .seealso: PetscSFGetRootRanks()
957 @*/
PetscSFGetLeafRanks(PetscSF sf,PetscInt * niranks,const PetscMPIInt ** iranks,const PetscInt ** ioffset,const PetscInt ** irootloc)958 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
964   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
965   if (sf->ops->GetLeafRanks) {
966     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
967   } else {
968     PetscSFType type;
969     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
970     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
971   }
972   PetscFunctionReturn(0);
973 }
974 
InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt * list)975 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
976   PetscInt i;
977   for (i=0; i<n; i++) {
978     if (needle == list[i]) return PETSC_TRUE;
979   }
980   return PETSC_FALSE;
981 }
982 
983 /*@C
984    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
985 
986    Collective
987 
988    Input Arguments:
989 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
990 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
991 
992    Level: developer
993 
994 .seealso: PetscSFGetRootRanks()
995 @*/
PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)996 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
997 {
998   PetscErrorCode     ierr;
999   PetscTable         table;
1000   PetscTablePosition pos;
1001   PetscMPIInt        size,groupsize,*groupranks;
1002   PetscInt           *rcount,*ranks;
1003   PetscInt           i, irank = -1,orank = -1;
1004 
1005   PetscFunctionBegin;
1006   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1007   PetscSFCheckGraphSet(sf,1);
1008   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
1009   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1010   for (i=0; i<sf->nleaves; i++) {
1011     /* Log 1-based rank */
1012     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1013   }
1014   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1015   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1016   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1017   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1018   for (i=0; i<sf->nranks; i++) {
1019     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1020     ranks[i]--;             /* Convert back to 0-based */
1021   }
1022   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1023 
1024   /* We expect that dgroup is reliably "small" while nranks could be large */
1025   {
1026     MPI_Group group = MPI_GROUP_NULL;
1027     PetscMPIInt *dgroupranks;
1028     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1029     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
1030     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1031     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1032     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1033     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
1034     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1035     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1036   }
1037 
1038   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1039   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1040     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1041       if (InList(ranks[i],groupsize,groupranks)) break;
1042     }
1043     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1044       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1045     }
1046     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1047       PetscInt tmprank,tmpcount;
1048 
1049       tmprank             = ranks[i];
1050       tmpcount            = rcount[i];
1051       ranks[i]            = ranks[sf->ndranks];
1052       rcount[i]           = rcount[sf->ndranks];
1053       ranks[sf->ndranks]  = tmprank;
1054       rcount[sf->ndranks] = tmpcount;
1055       sf->ndranks++;
1056     }
1057   }
1058   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1059   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1060   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1061   sf->roffset[0] = 0;
1062   for (i=0; i<sf->nranks; i++) {
1063     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1064     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1065     rcount[i]        = 0;
1066   }
1067   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1068     /* short circuit */
1069     if (orank != sf->remote[i].rank) {
1070       /* Search for index of iremote[i].rank in sf->ranks */
1071       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1072       if (irank < 0) {
1073         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1074         if (irank >= 0) irank += sf->ndranks;
1075       }
1076       orank = sf->remote[i].rank;
1077     }
1078     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1079     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1080     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1081     rcount[irank]++;
1082   }
1083   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /*@C
1088    PetscSFGetGroups - gets incoming and outgoing process groups
1089 
1090    Collective
1091 
1092    Input Argument:
1093 .  sf - star forest
1094 
1095    Output Arguments:
1096 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1097 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1098 
1099    Level: developer
1100 
1101 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1102 @*/
PetscSFGetGroups(PetscSF sf,MPI_Group * incoming,MPI_Group * outgoing)1103 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1104 {
1105   PetscErrorCode ierr;
1106   MPI_Group      group = MPI_GROUP_NULL;
1107 
1108   PetscFunctionBegin;
1109   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1110   if (sf->ingroup == MPI_GROUP_NULL) {
1111     PetscInt       i;
1112     const PetscInt *indegree;
1113     PetscMPIInt    rank,*outranks,*inranks;
1114     PetscSFNode    *remote;
1115     PetscSF        bgcount;
1116 
1117     /* Compute the number of incoming ranks */
1118     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1119     for (i=0; i<sf->nranks; i++) {
1120       remote[i].rank  = sf->ranks[i];
1121       remote[i].index = 0;
1122     }
1123     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1124     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1125     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1126     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1127     /* Enumerate the incoming ranks */
1128     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1129     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1130     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1131     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1132     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1133     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1134     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1135     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1136     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1137     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1138   }
1139   *incoming = sf->ingroup;
1140 
1141   if (sf->outgroup == MPI_GROUP_NULL) {
1142     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1143     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1144     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1145   }
1146   *outgoing = sf->outgroup;
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 /*@
1151    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1152 
1153    Collective
1154 
1155    Input Argument:
1156 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1157 
1158    Output Arguments:
1159 .  multi - star forest with split roots, such that each root has degree exactly 1
1160 
1161    Level: developer
1162 
1163    Notes:
1164 
1165    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1166    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1167    edge, it is a candidate for future optimization that might involve its removal.
1168 
1169 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1170 @*/
PetscSFGetMultiSF(PetscSF sf,PetscSF * multi)1171 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1172 {
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1177   PetscValidPointer(multi,2);
1178   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1179     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1180     *multi = sf->multi;
1181     PetscFunctionReturn(0);
1182   }
1183   if (!sf->multi) {
1184     const PetscInt *indegree;
1185     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1186     PetscSFNode    *remote;
1187     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1188     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1189     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1190     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1191     inoffset[0] = 0;
1192     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1193     for (i=0; i<maxlocal; i++) outones[i] = 1;
1194     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1195     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1196     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1197     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1198       for (i=0; i<sf->nroots; i++) {
1199         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1200       }
1201     }
1202     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1203     for (i=0; i<sf->nleaves; i++) {
1204       remote[i].rank  = sf->remote[i].rank;
1205       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1206     }
1207     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1208     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1209     if (sf->rankorder) {        /* Sort the ranks */
1210       PetscMPIInt rank;
1211       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1212       PetscSFNode *newremote;
1213       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1214       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1215       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1216       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1217       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1218       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1219       /* Sort the incoming ranks at each vertex, build the inverse map */
1220       for (i=0; i<sf->nroots; i++) {
1221         PetscInt j;
1222         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1223         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1224         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1225       }
1226       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1227       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1228       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1229       for (i=0; i<sf->nleaves; i++) {
1230         newremote[i].rank  = sf->remote[i].rank;
1231         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1232       }
1233       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1234       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1235     }
1236     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1237   }
1238   *multi = sf->multi;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 /*@C
1243    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1244 
1245    Collective
1246 
1247    Input Arguments:
1248 +  sf - original star forest
1249 .  nselected  - number of selected roots on this process
1250 -  selected   - indices of the selected roots on this process
1251 
1252    Output Arguments:
1253 .  esf - new star forest
1254 
1255    Level: advanced
1256 
1257    Note:
1258    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1259    be done by calling PetscSFGetGraph().
1260 
1261 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1262 @*/
PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt * selected,PetscSF * esf)1263 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1264 {
1265   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1266   const PetscInt    *ilocal;
1267   signed char       *rootdata,*leafdata,*leafmem;
1268   const PetscSFNode *iremote;
1269   PetscSFNode       *new_iremote;
1270   MPI_Comm          comm;
1271   PetscErrorCode    ierr;
1272 
1273   PetscFunctionBegin;
1274   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1275   PetscSFCheckGraphSet(sf,1);
1276   if (nselected) PetscValidPointer(selected,3);
1277   PetscValidPointer(esf,4);
1278 
1279   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1280   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1281   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1282   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1283 
1284   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1285     PetscBool dups;
1286     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1287     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1288     for (i=0; i<nselected; i++)
1289       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1290   }
1291 
1292   if (sf->ops->CreateEmbeddedSF) {
1293     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1294   } else {
1295     /* A generic version of creating embedded sf */
1296     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1297     maxlocal = maxleaf - minleaf + 1;
1298     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1299     leafdata = leafmem - minleaf;
1300     /* Tag selected roots and bcast to leaves */
1301     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1302     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1303     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1304 
1305     /* Build esf with leaves that are still connected */
1306     esf_nleaves = 0;
1307     for (i=0; i<nleaves; i++) {
1308       j = ilocal ? ilocal[i] : i;
1309       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1310          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1311       */
1312       esf_nleaves += (leafdata[j] ? 1 : 0);
1313     }
1314     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1315     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1316     for (i=n=0; i<nleaves; i++) {
1317       j = ilocal ? ilocal[i] : i;
1318       if (leafdata[j]) {
1319         new_ilocal[n]        = j;
1320         new_iremote[n].rank  = iremote[i].rank;
1321         new_iremote[n].index = iremote[i].index;
1322         ++n;
1323       }
1324     }
1325     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1326     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1327     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1328     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1329   }
1330   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 /*@C
1335   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1336 
1337   Collective
1338 
1339   Input Arguments:
1340 + sf - original star forest
1341 . nselected  - number of selected leaves on this process
1342 - selected   - indices of the selected leaves on this process
1343 
1344   Output Arguments:
1345 .  newsf - new star forest
1346 
1347   Level: advanced
1348 
1349 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1350 @*/
PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt * selected,PetscSF * newsf)1351 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1352 {
1353   const PetscSFNode *iremote;
1354   PetscSFNode       *new_iremote;
1355   const PetscInt    *ilocal;
1356   PetscInt          i,nroots,*leaves,*new_ilocal;
1357   MPI_Comm          comm;
1358   PetscErrorCode    ierr;
1359 
1360   PetscFunctionBegin;
1361   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1362   PetscSFCheckGraphSet(sf,1);
1363   if (nselected) PetscValidPointer(selected,3);
1364   PetscValidPointer(newsf,4);
1365 
1366   /* Uniq selected[] and put results in leaves[] */
1367   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1368   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1369   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1370   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1371   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1372 
1373   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1374   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1375     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1378     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1379     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1380     for (i=0; i<nselected; ++i) {
1381       const PetscInt l     = leaves[i];
1382       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1383       new_iremote[i].rank  = iremote[l].rank;
1384       new_iremote[i].index = iremote[l].index;
1385     }
1386     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1387     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1388   }
1389   ierr = PetscFree(leaves);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*@C
1394    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1395 
1396    Collective on PetscSF
1397 
1398    Input Arguments:
1399 +  sf - star forest on which to communicate
1400 .  unit - data type associated with each node
1401 .  rootdata - buffer to broadcast
1402 -  op - operation to use for reduction
1403 
1404    Output Arguments:
1405 .  leafdata - buffer to be reduced with values from each leaf's respective root
1406 
1407    Level: intermediate
1408 
1409    Notes:
1410     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1411     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1412     use PetscSFBcastAndOpWithMemTypeBegin() instead.
1413 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin()
1414 @*/
PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata,MPI_Op op)1415 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1416 {
1417   PetscErrorCode ierr;
1418   PetscMemType   rootmtype,leafmtype;
1419 
1420   PetscFunctionBegin;
1421   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1422   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1423   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1424   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1425   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1426   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1427   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 /*@C
1432    PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd()
1433 
1434    Collective on PetscSF
1435 
1436    Input Arguments:
1437 +  sf - star forest on which to communicate
1438 .  unit - data type associated with each node
1439 .  rootmtype - memory type of rootdata
1440 .  rootdata - buffer to broadcast
1441 .  leafmtype - memory type of leafdata
1442 -  op - operation to use for reduction
1443 
1444    Output Arguments:
1445 .  leafdata - buffer to be reduced with values from each leaf's respective root
1446 
1447    Level: intermediate
1448 
1449 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin()
1450 @*/
PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void * rootdata,PetscMemType leafmtype,void * leafdata,MPI_Op op)1451 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1452 {
1453   PetscErrorCode ierr;
1454 
1455   PetscFunctionBegin;
1456   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1457   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1458   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1459   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1460   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 /*@C
1465    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1466 
1467    Collective
1468 
1469    Input Arguments:
1470 +  sf - star forest
1471 .  unit - data type
1472 .  rootdata - buffer to broadcast
1473 -  op - operation to use for reduction
1474 
1475    Output Arguments:
1476 .  leafdata - buffer to be reduced with values from each leaf's respective root
1477 
1478    Level: intermediate
1479 
1480 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1481 @*/
PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata,MPI_Op op)1482 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1483 {
1484   PetscErrorCode ierr;
1485 
1486   PetscFunctionBegin;
1487   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1488   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1489   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1490   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 /*@C
1495    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1496 
1497    Collective
1498 
1499    Input Arguments:
1500 +  sf - star forest
1501 .  unit - data type
1502 .  leafdata - values to reduce
1503 -  op - reduction operation
1504 
1505    Output Arguments:
1506 .  rootdata - result of reduction of values from all leaves of each root
1507 
1508    Level: intermediate
1509 
1510    Notes:
1511     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1512     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1513     use PetscSFReduceWithMemTypeBegin() instead.
1514 
1515 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1516 @*/
PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * rootdata,MPI_Op op)1517 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1518 {
1519   PetscErrorCode ierr;
1520   PetscMemType   rootmtype,leafmtype;
1521 
1522   PetscFunctionBegin;
1523   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1524   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1525   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1526   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1527   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1528   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1529   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 /*@C
1534    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1535 
1536    Collective
1537 
1538    Input Arguments:
1539 +  sf - star forest
1540 .  unit - data type
1541 .  leafmtype - memory type of leafdata
1542 .  leafdata - values to reduce
1543 .  rootmtype - memory type of rootdata
1544 -  op - reduction operation
1545 
1546    Output Arguments:
1547 .  rootdata - result of reduction of values from all leaves of each root
1548 
1549    Level: intermediate
1550 
1551 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1552 @*/
PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void * leafdata,PetscMemType rootmtype,void * rootdata,MPI_Op op)1553 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1554 {
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1559   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1560   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1561   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1562   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 /*@C
1567    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1568 
1569    Collective
1570 
1571    Input Arguments:
1572 +  sf - star forest
1573 .  unit - data type
1574 .  leafdata - values to reduce
1575 -  op - reduction operation
1576 
1577    Output Arguments:
1578 .  rootdata - result of reduction of values from all leaves of each root
1579 
1580    Level: intermediate
1581 
1582 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1583 @*/
PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * rootdata,MPI_Op op)1584 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1585 {
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1590   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1591   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1592   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 /*@C
1597    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1598 
1599    Collective
1600 
1601    Input Arguments:
1602 +  sf - star forest
1603 .  unit - data type
1604 .  leafdata - leaf values to use in reduction
1605 -  op - operation to use for reduction
1606 
1607    Output Arguments:
1608 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1609 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1610 
1611    Level: advanced
1612 
1613    Note:
1614    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1615    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1616    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1617    integers.
1618 
1619 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1620 @*/
PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void * rootdata,const void * leafdata,void * leafupdate,MPI_Op op)1621 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1622 {
1623   PetscErrorCode ierr;
1624   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1625 
1626   PetscFunctionBegin;
1627   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1628   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1629   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1630   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1631   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1632   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1633   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1634   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1635   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 /*@C
1640    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1641 
1642    Collective
1643 
1644    Input Arguments:
1645 +  sf - star forest
1646 .  unit - data type
1647 .  leafdata - leaf values to use in reduction
1648 -  op - operation to use for reduction
1649 
1650    Output Arguments:
1651 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1652 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1653 
1654    Level: advanced
1655 
1656 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1657 @*/
PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void * rootdata,const void * leafdata,void * leafupdate,MPI_Op op)1658 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1659 {
1660   PetscErrorCode ierr;
1661 
1662   PetscFunctionBegin;
1663   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1664   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1665   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1666   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 /*@C
1671    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1672 
1673    Collective
1674 
1675    Input Arguments:
1676 .  sf - star forest
1677 
1678    Output Arguments:
1679 .  degree - degree of each root vertex
1680 
1681    Level: advanced
1682 
1683    Notes:
1684    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1685 
1686 .seealso: PetscSFGatherBegin()
1687 @*/
PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt ** degree)1688 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1689 {
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1694   PetscSFCheckGraphSet(sf,1);
1695   PetscValidPointer(degree,2);
1696   if (!sf->degreeknown) {
1697     PetscInt i, nroots = sf->nroots, maxlocal;
1698     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1699     maxlocal = sf->maxleaf-sf->minleaf+1;
1700     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1701     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1702     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1703     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1704     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1705   }
1706   *degree = NULL;
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 /*@C
1711    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1712 
1713    Collective
1714 
1715    Input Arguments:
1716 .  sf - star forest
1717 
1718    Output Arguments:
1719 .  degree - degree of each root vertex
1720 
1721    Level: developer
1722 
1723    Notes:
1724    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1725 
1726 .seealso:
1727 @*/
PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt ** degree)1728 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1729 {
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1734   PetscSFCheckGraphSet(sf,1);
1735   PetscValidPointer(degree,2);
1736   if (!sf->degreeknown) {
1737     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1738     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1739     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1740     sf->degreeknown = PETSC_TRUE;
1741   }
1742   *degree = sf->degree;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 
1747 /*@C
1748    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1749    Each multi-root is assigned index of the corresponding original root.
1750 
1751    Collective
1752 
1753    Input Arguments:
1754 +  sf - star forest
1755 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1756 
1757    Output Arguments:
1758 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1759 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1760 
1761    Level: developer
1762 
1763    Notes:
1764    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1765 
1766 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1767 @*/
PetscSFComputeMultiRootOriginalNumbering(PetscSF sf,const PetscInt degree[],PetscInt * nMultiRoots,PetscInt * multiRootsOrigNumbering[])1768 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1769 {
1770   PetscSF             msf;
1771   PetscInt            i, j, k, nroots, nmroots;
1772   PetscErrorCode      ierr;
1773 
1774   PetscFunctionBegin;
1775   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1776   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1777   if (nroots) PetscValidIntPointer(degree,2);
1778   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1779   PetscValidPointer(multiRootsOrigNumbering,4);
1780   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1781   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1782   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1783   for (i=0,j=0,k=0; i<nroots; i++) {
1784     if (!degree[i]) continue;
1785     for (j=0; j<degree[i]; j++,k++) {
1786       (*multiRootsOrigNumbering)[k] = i;
1787     }
1788   }
1789   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1790   if (nMultiRoots) *nMultiRoots = nmroots;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 /*@C
1795    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1796 
1797    Collective
1798 
1799    Input Arguments:
1800 +  sf - star forest
1801 .  unit - data type
1802 -  leafdata - leaf data to gather to roots
1803 
1804    Output Argument:
1805 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1806 
1807    Level: intermediate
1808 
1809 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1810 @*/
PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * multirootdata)1811 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1812 {
1813   PetscErrorCode ierr;
1814   PetscSF        multi = NULL;
1815 
1816   PetscFunctionBegin;
1817   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1818   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1819   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1820   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 /*@C
1825    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1826 
1827    Collective
1828 
1829    Input Arguments:
1830 +  sf - star forest
1831 .  unit - data type
1832 -  leafdata - leaf data to gather to roots
1833 
1834    Output Argument:
1835 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1836 
1837    Level: intermediate
1838 
1839 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1840 @*/
PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * multirootdata)1841 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1842 {
1843   PetscErrorCode ierr;
1844   PetscSF        multi = NULL;
1845 
1846   PetscFunctionBegin;
1847   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1848   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1849   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 /*@C
1854    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1855 
1856    Collective
1857 
1858    Input Arguments:
1859 +  sf - star forest
1860 .  unit - data type
1861 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1862 
1863    Output Argument:
1864 .  leafdata - leaf data to be update with personal data from each respective root
1865 
1866    Level: intermediate
1867 
1868 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1869 @*/
PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void * multirootdata,void * leafdata)1870 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1871 {
1872   PetscErrorCode ierr;
1873   PetscSF        multi = NULL;
1874 
1875   PetscFunctionBegin;
1876   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1877   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1878   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1879   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 /*@C
1884    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1885 
1886    Collective
1887 
1888    Input Arguments:
1889 +  sf - star forest
1890 .  unit - data type
1891 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1892 
1893    Output Argument:
1894 .  leafdata - leaf data to be update with personal data from each respective root
1895 
1896    Level: intermediate
1897 
1898 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1899 @*/
PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void * multirootdata,void * leafdata)1900 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1901 {
1902   PetscErrorCode ierr;
1903   PetscSF        multi = NULL;
1904 
1905   PetscFunctionBegin;
1906   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1907   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1908   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
PetscSFCheckLeavesUnique_Private(PetscSF sf)1912 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1913 {
1914   PetscInt        i, n, nleaves;
1915   const PetscInt *ilocal = NULL;
1916   PetscHSetI      seen;
1917   PetscErrorCode  ierr;
1918 
1919   PetscFunctionBegin;
1920   if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
1921   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1922   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1923   for (i = 0; i < nleaves; i++) {
1924     const PetscInt leaf = ilocal ? ilocal[i] : i;
1925     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1926   }
1927   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1928   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1929   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 /*@
1934   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1935 
1936   Input Parameters:
1937 + sfA - The first PetscSF
1938 - sfB - The second PetscSF
1939 
1940   Output Parameters:
1941 . sfBA - The composite SF
1942 
1943   Level: developer
1944 
1945   Notes:
1946   Currently, the two SFs must be defined on congruent communicators and they must be true star
1947   forests, i.e. the same leaf is not connected with different roots.
1948 
1949   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1950   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1951   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1952   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1953 
1954 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1955 @*/
PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF * sfBA)1956 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1957 {
1958   PetscErrorCode    ierr;
1959   const PetscSFNode *remotePointsA,*remotePointsB;
1960   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1961   const PetscInt    *localPointsA,*localPointsB;
1962   PetscInt          *localPointsBA;
1963   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1964   PetscBool         denseB;
1965 
1966   PetscFunctionBegin;
1967   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1968   PetscSFCheckGraphSet(sfA,1);
1969   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1970   PetscSFCheckGraphSet(sfB,2);
1971   PetscCheckSameComm(sfA,1,sfB,2);
1972   PetscValidPointer(sfBA,3);
1973   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1974   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1975 
1976   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1977   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1978   if (localPointsA) {
1979     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1980     for (i=0; i<numRootsB; i++) {
1981       reorderedRemotePointsA[i].rank = -1;
1982       reorderedRemotePointsA[i].index = -1;
1983     }
1984     for (i=0; i<numLeavesA; i++) {
1985       if (localPointsA[i] >= numRootsB) continue;
1986       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1987     }
1988     remotePointsA = reorderedRemotePointsA;
1989   }
1990   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1991   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1992   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1993   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1994   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1995 
1996   denseB = (PetscBool)!localPointsB;
1997   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1998     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1999     else numLeavesBA++;
2000   }
2001   if (denseB) {
2002     localPointsBA  = NULL;
2003     remotePointsBA = leafdataB;
2004   } else {
2005     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2006     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2007     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2008       const PetscInt l = localPointsB ? localPointsB[i] : i;
2009 
2010       if (leafdataB[l-minleaf].rank == -1) continue;
2011       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2012       localPointsBA[numLeavesBA] = l;
2013       numLeavesBA++;
2014     }
2015     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2016   }
2017   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2018   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2019   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /*@
2024   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2025 
2026   Input Parameters:
2027 + sfA - The first PetscSF
2028 - sfB - The second PetscSF
2029 
2030   Output Parameters:
2031 . sfBA - The composite SF.
2032 
2033   Level: developer
2034 
2035   Notes:
2036   Currently, the two SFs must be defined on congruent communicators and they must be true star
2037   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2038   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2039 
2040   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2041   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2042   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2043   a Reduce on sfB, on connected roots.
2044 
2045 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2046 @*/
PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF * sfBA)2047 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2048 {
2049   PetscErrorCode    ierr;
2050   const PetscSFNode *remotePointsA,*remotePointsB;
2051   PetscSFNode       *remotePointsBA;
2052   const PetscInt    *localPointsA,*localPointsB;
2053   PetscSFNode       *reorderedRemotePointsA = NULL;
2054   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2055   MPI_Op            op;
2056 #if defined(PETSC_USE_64BIT_INDICES)
2057   PetscBool         iswin;
2058 #endif
2059 
2060   PetscFunctionBegin;
2061   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2062   PetscSFCheckGraphSet(sfA,1);
2063   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2064   PetscSFCheckGraphSet(sfB,2);
2065   PetscCheckSameComm(sfA,1,sfB,2);
2066   PetscValidPointer(sfBA,3);
2067   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2068   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2069 
2070   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2071   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2072 
2073   /* TODO: Check roots of sfB have degree of 1 */
2074   /* Once we implement it, we can replace the MPI_MAXLOC
2075      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
2076      We use MPI_MAXLOC only to have a deterministic output from this routine if
2077      the root condition is not meet.
2078    */
2079   op = MPI_MAXLOC;
2080 #if defined(PETSC_USE_64BIT_INDICES)
2081   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2082   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2083   if (iswin) op = MPIU_REPLACE;
2084 #endif
2085 
2086   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2087   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2088   for (i=0; i<maxleaf - minleaf + 1; i++) {
2089     reorderedRemotePointsA[i].rank = -1;
2090     reorderedRemotePointsA[i].index = -1;
2091   }
2092   if (localPointsA) {
2093     for (i=0; i<numLeavesA; i++) {
2094       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2095       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2096     }
2097   } else {
2098     for (i=0; i<numLeavesA; i++) {
2099       if (i > maxleaf || i < minleaf) continue;
2100       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2101     }
2102   }
2103 
2104   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2105   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2106   for (i=0; i<numRootsB; i++) {
2107     remotePointsBA[i].rank = -1;
2108     remotePointsBA[i].index = -1;
2109   }
2110 
2111   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2112   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2113   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2114   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2115     if (remotePointsBA[i].rank == -1) continue;
2116     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2117     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2118     localPointsBA[numLeavesBA] = i;
2119     numLeavesBA++;
2120   }
2121   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2122   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2123   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 /*
2128   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2129 
2130   Input Parameters:
2131 . sf - The global PetscSF
2132 
2133   Output Parameters:
2134 . out - The local PetscSF
2135  */
PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF * out)2136 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2137 {
2138   MPI_Comm           comm;
2139   PetscMPIInt        myrank;
2140   const PetscInt     *ilocal;
2141   const PetscSFNode  *iremote;
2142   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2143   PetscSFNode        *liremote;
2144   PetscSF            lsf;
2145   PetscErrorCode     ierr;
2146 
2147   PetscFunctionBegin;
2148   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2149   if (sf->ops->CreateLocalSF) {
2150     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2151   } else {
2152     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2153     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2154     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
2155 
2156     /* Find out local edges and build a local SF */
2157     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2158     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2159     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2160     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2161 
2162     for (i=j=0; i<nleaves; i++) {
2163       if (iremote[i].rank == (PetscInt)myrank) {
2164         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2165         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2166         liremote[j].index = iremote[i].index;
2167         j++;
2168       }
2169     }
2170     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2171     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2172     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2173     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2174     *out = lsf;
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata)2180 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2181 {
2182   PetscErrorCode     ierr;
2183   PetscMemType       rootmtype,leafmtype;
2184 
2185   PetscFunctionBegin;
2186   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2187   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2188   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2189   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2190   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2191   if (sf->ops->BcastToZero) {
2192     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2193   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2194   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2195   PetscFunctionReturn(0);
2196 }
2197 
2198