1 
2 /*
3    This private file should not be included in users' code.
4    Defines the fields shared by all vector scatter implementations.
5 
6 */
7 
8 #ifndef __VECSCATTERIMPL_H
9 #define __VECSCATTERIMPL_H
10 
11 #include <petscvec.h>
12 #include <petsc/private/petscimpl.h>
13 #include <petsc/private/vecimpl.h>
14 #include <petscviewer.h>
15 
16 PETSC_EXTERN PetscBool VecScatterRegisterAllCalled;
17 PETSC_EXTERN PetscErrorCode VecScatterRegisterAll(void);
18 
19 typedef enum { VEC_SCATTER_SEQ_GENERAL,VEC_SCATTER_SEQ_STRIDE,
20                VEC_SCATTER_MPI_GENERAL,VEC_SCATTER_MPI_TOALL,
21                VEC_SCATTER_MPI_TOONE} VecScatterFormat;
22 
23 #define VECSCATTER_IMPL_HEADER \
24       VecScatterFormat format;
25 
26 typedef struct {
27   VECSCATTER_IMPL_HEADER
28 } VecScatter_Common;
29 
30 /* A plan to optimize individual memory copies (e.g., pack/unpack to/from send/recv buffers, or local scatters)
31    in VecScatter. Currently, a scatter to a neighbor processor may be transformed into 1) multiple (including one)
32    contiguous memory copies, e.g., memcpy; OR 2) one strided memory copies.
33 
34     For brevity, we call them memory copies. In reality, the optimization applies not only to INSERT_VALUES, but also to ADD_VALUES, etc.
35  */
36 typedef struct {
37   PetscInt  n;                /* number of processors */
38   PetscBool *optimized;       /* [n] is the scatter to procs[i] optimized? */
39   PetscInt  *copy_offsets;    /* [n+1] we number all copies. Scatter to procs[i] is optimized into copies in [copy_offsets[i],copy_offsets[i+1]) */
40   PetscInt  *copy_starts;     /* [*] j-th copy starts at index copy_starts[j] of the vector */
41   PetscInt  *copy_lengths;    /* [*] with length copy_lengths[j] in units of PetscScalar */
42   PetscInt  *stride_first;    /* [n] if optimized[i] is TRUE but copy_offsets[i] = copy_offsets[i+1], then scatter to procs[i] is strided. The first */
43   PetscInt  *stride_step;     /* [n]   index is stride_first[i], step is stride_step[i], */
44   PetscInt  *stride_n;        /* [n]   and total stride_n[i] steps */
45   PetscBool same_copy_starts; /* used only by VecScatterMemcpyPlanCreate_SGToSG(). If true, to's copy_starts[] values
46                                  are as same as from's. Used to quickly test if we are doing a self-copy */
47 } VecScatterMemcpyPlan;
48 
49 /*
50    These scatters are for the purely local case.
51 */
52 typedef struct {
53   VECSCATTER_IMPL_HEADER
54   PetscInt       n;                    /* number of components to scatter */
55   PetscInt       *vslots;              /* locations of components */
56   /*
57        The next three fields are used in parallel scatters, they contain
58        optimization in the special case that the "to" vector and the "from"
59        vector are the same, so one only needs copy components that truly
60        copies instead of just y[idx[i]] = y[jdx[i]] where idx[i] == jdx[i].
61   */
62   PetscBool      nonmatching_computed;
63   PetscInt       n_nonmatching;        /* number of "from"s  != "to"s */
64   PetscInt       *slots_nonmatching;   /* locations of "from"s  != "to"s */
65   VecScatterMemcpyPlan memcpy_plan;    /* a plan to optimize pack/unpack with memcpy */
66 } VecScatter_Seq_General;
67 
68 typedef struct {
69   VECSCATTER_IMPL_HEADER
70   PetscInt       n;
71   PetscInt       first;
72   PetscInt       step;
73 } VecScatter_Seq_Stride;
74 
75 /*
76    This scatter is for a global vector copied (completely) to each processor (or all to one)
77 */
78 typedef struct {
79   VECSCATTER_IMPL_HEADER
80   PetscMPIInt    *count;        /* elements of vector on each processor */
81   PetscMPIInt    *displx;
82   PetscScalar    *work1;
83   PetscScalar    *work2;
84 } VecScatter_MPI_ToAll;
85 
86 /*
87    This is the general parallel scatter
88 */
89 typedef struct {
90   VECSCATTER_IMPL_HEADER
91   PetscInt               n;        /* number of processors to send/receive */
92   PetscInt               *starts;  /* starting point in indices and values for each proc*/
93   PetscInt               *indices; /* list of all components sent or received */
94   PetscMPIInt            *procs;   /* processors we are communicating with in scatter */
95   VecScatterMemcpyPlan   memcpy_plan; /* a plan to optimize pack/unpack/scatter */
96   MPI_Request            *requests,*rev_requests;
97   PetscScalar            *values;  /* buffer for all sends or receives */
98   VecScatter_Seq_General local;    /* any part that happens to be local */
99   MPI_Status             *sstatus,*rstatus;
100   PetscInt               bs;
101   PetscBool              contiq;
102 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)   /* these uses windows for communication only within each node */
103   PetscMPIInt            msize,sharedcnt;           /* total to entries that are going to processes with the same shared memory space */
104   PetscScalar            *sharedspace;              /* space each process puts data to be read from other processes; allocated by MPI */
105   PetscScalar            **sharedspaces;            /* [msize] space other processes put data to be read from this processes. */
106   PetscInt               *sharedspacesoffset;       /* [msize] offset into sharedspaces, that I will read from */
107   PetscInt               *sharedspacestarts;        /* [msize+1] for each shared memory partner this maps to the part of sharedspaceindices of that partner */
108   PetscInt               *sharedspaceindices;       /* [] for each shared memory partner contains indices where values are to be copied to */
109   MPI_Win                sharedwin;                 /* Window that owns sharedspace */
110   PetscInt               notdone;                   /* used by VecScatterEndMPI3Node() */
111 #endif
112 } VecScatter_MPI_General;
113 
114 /* Routines to create, copy, destroy or execute a memcpy plan */
115 
116 /* Create a memcpy plan based on a list of indices */
117 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_Index(PetscInt,const PetscInt*,const PetscInt*,PetscInt,VecScatterMemcpyPlan*);
118 /* Create a memcpy plan for a SG (sequential general vector) to SG scatter */
119 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt,VecScatter_Seq_General*,VecScatter_Seq_General*);
120 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCopy(const VecScatterMemcpyPlan*,VecScatterMemcpyPlan*);
121 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanDestroy(VecScatterMemcpyPlan*);
122 /* Create/copy/destroy a memcpy plan for a P (parallel vector) to P scatter */
123 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General*,VecScatter_MPI_General*);
124 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General*,const VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter_MPI_General*);
125 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General*,VecScatter_MPI_General*);
126 
127 /* Pack data from x to y according to the i-th memcpy plan in xplan */
VecScatterMemcpyPlanExecute_Pack(PetscInt i,const PetscScalar * PETSC_RESTRICT x,const VecScatterMemcpyPlan * xplan,PetscScalar * PETSC_RESTRICT y,InsertMode addv,PetscInt bs)128 PETSC_STATIC_INLINE PetscErrorCode VecScatterMemcpyPlanExecute_Pack(PetscInt i,const PetscScalar *PETSC_RESTRICT x,const VecScatterMemcpyPlan *xplan,PetscScalar *PETSC_RESTRICT y,InsertMode addv,PetscInt bs)
129 {
130   PetscErrorCode    ierr;
131   PetscInt          j,k,len,step = 0,n = 0;
132   const PetscScalar *xv = NULL;
133   PetscBool         strided;
134 
135   PetscFunctionBegin;
136   strided = (xplan->copy_offsets[i] == xplan->copy_offsets[i+1]) ? PETSC_TRUE : PETSC_FALSE;
137   if (strided) {
138     xv   = x+xplan->stride_first[i];
139     step = xplan->stride_step[i];
140     n    = xplan->stride_n[i];
141   }
142 
143   if (addv == INSERT_VALUES) {
144     if (strided) {
145       for (j=0; j<n; j++)
146         for (k=0; k<bs; k++) y[j*bs+k] = xv[j*step+k];
147     } else {
148       for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
149         len  = xplan->copy_lengths[j];
150         ierr = PetscArraycpy(y,x+xplan->copy_starts[j],len);CHKERRQ(ierr);
151         y   += len;
152       }
153     }
154   } else if (addv == ADD_VALUES) {
155     if (strided) {
156       for (j=0; j<n; j++)
157         for (k=0; k<bs; k++) y[j*bs+k] += xv[j*step+k];
158     } else {
159       for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
160         len  = xplan->copy_lengths[j];
161         xv   = x+xplan->copy_starts[j];
162         for (k=0; k<len; k++) y[k] += xv[k];
163         y   += len;
164       }
165     }
166   }
167 #if !defined(PETSC_USE_COMPLEX)
168   else if (addv == MAX_VALUES) {
169     if (strided) {
170       for (j=0; j<n; j++)
171         for (k=0; k<bs; k++) y[j*bs+k] = PetscMax(y[j*bs+k],xv[j*step+k]);
172     } else {
173       for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
174         len  = xplan->copy_lengths[j];
175         xv   = x+xplan->copy_starts[j];
176         for (k=0; k<len; k++) y[k] = PetscMax(y[k],xv[k]);
177         y   += len;
178       }
179     }
180   }
181 #endif
182   else {
183     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d in packing",addv);
184   }
185   PetscFunctionReturn(0);
186 }
187 
188 /* Unpack data from x to y according to the i-th memcpy plan in yplan */
VecScatterMemcpyPlanExecute_Unpack(PetscInt i,const PetscScalar * PETSC_RESTRICT x,PetscScalar * PETSC_RESTRICT y,const VecScatterMemcpyPlan * yplan,InsertMode addv,PetscInt bs)189 PETSC_STATIC_INLINE PetscErrorCode VecScatterMemcpyPlanExecute_Unpack(PetscInt i,const PetscScalar *PETSC_RESTRICT x,PetscScalar *PETSC_RESTRICT y,const VecScatterMemcpyPlan *yplan,InsertMode addv,PetscInt bs)
190 {
191   PetscErrorCode ierr;
192   PetscInt       j,k,len,step = 0,n = 0;
193   PetscScalar    *yv = NULL;
194   PetscBool      strided;
195 
196   PetscFunctionBegin;
197   strided = (yplan->copy_offsets[i] == yplan->copy_offsets[i+1]) ? PETSC_TRUE : PETSC_FALSE;
198   if (strided) {
199     yv   = y+yplan->stride_first[i];
200     step = yplan->stride_step[i];
201     n    = yplan->stride_n[i];
202   }
203 
204   if (addv == INSERT_VALUES) {
205     if (strided) {
206       for (j=0; j<n; j++)
207         for (k=0; k<bs; k++) yv[j*step+k] = x[j*bs+k];
208     } else {
209       for (j=yplan->copy_offsets[i]; j<yplan->copy_offsets[i+1]; j++) {
210         len  = yplan->copy_lengths[j];
211         ierr = PetscArraycpy(y+yplan->copy_starts[j],x,len);CHKERRQ(ierr);
212         x   += len;
213       }
214     }
215   } else if (addv == ADD_VALUES) {
216     if (strided) {
217       for (j=0; j<n; j++)
218         for (k=0; k<bs; k++) yv[j*step+k] += x[j*bs+k];
219     } else {
220       for (j=yplan->copy_offsets[i]; j<yplan->copy_offsets[i+1]; j++) {
221         len  = yplan->copy_lengths[j];
222         yv   = y+yplan->copy_starts[j];
223         for (k=0; k<len; k++) yv[k] += x[k];
224         x   += len;
225       }
226     }
227   }
228 #if !defined(PETSC_USE_COMPLEX)
229   else if (addv == MAX_VALUES) {
230     if (strided) {
231       for (j=0; j<n; j++)
232         for (k=0; k<bs; k++) yv[j*step+k] = PetscMax(yv[j*step+k],x[j*bs+k]);
233     } else {
234       for (j=yplan->copy_offsets[i]; j<yplan->copy_offsets[i+1]; j++) {
235         len  = yplan->copy_lengths[j];
236         yv   = y+yplan->copy_starts[j];
237         for (k=0; k<len; k++) yv[k] = PetscMax(yv[k],x[k]);
238         x   += len;
239       }
240     }
241   }
242 #endif
243   else {
244     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d in unpacking",addv);
245   }
246   PetscFunctionReturn(0);
247 }
248 
249 /* Scatter data from piece-wise contiguous x to (conforming) piece-wise contiguous y according to the i-th memcpy plan in xplan and yplan respectively */
VecScatterMemcpyPlanExecute_Scatter(PetscInt i,const PetscScalar * PETSC_RESTRICT x,const VecScatterMemcpyPlan * xplan,PetscScalar * PETSC_RESTRICT y,const VecScatterMemcpyPlan * yplan,InsertMode addv)250 PETSC_STATIC_INLINE PetscErrorCode VecScatterMemcpyPlanExecute_Scatter(PetscInt i,const PetscScalar *PETSC_RESTRICT x,const VecScatterMemcpyPlan *xplan,PetscScalar *PETSC_RESTRICT y,const VecScatterMemcpyPlan *yplan,InsertMode addv)
251 {
252   PetscErrorCode    ierr;
253   PetscInt          j,k,len;
254   const PetscScalar *xv;
255   PetscScalar       *yv;
256 
257   PetscFunctionBegin;
258   if (addv == INSERT_VALUES) {
259     for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
260       ierr = PetscArraycpy(y+yplan->copy_starts[j],x+xplan->copy_starts[j],xplan->copy_lengths[j]);CHKERRQ(ierr);
261     }
262   } else if (addv == ADD_VALUES) {
263     for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
264       len = xplan->copy_lengths[j];
265       xv  = x+xplan->copy_starts[j];
266       yv  = y+yplan->copy_starts[j];
267       for (k=0; k<len; k++) yv[k] += xv[k];
268     }
269   }
270 #if !defined(PETSC_USE_COMPLEX)
271   else if (addv == MAX_VALUES) {
272     for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
273       len = xplan->copy_lengths[j];
274       xv  = x+xplan->copy_starts[j];
275       yv  = y+yplan->copy_starts[j];
276       for (k=0; k<len; k++) yv[k] = PetscMax(yv[k],xv[k]);
277     }
278   }
279 #endif
280   else {
281     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d in scattering",addv);
282   }
283   PetscFunctionReturn(0);
284 }
285 
286 PETSC_EXTERN PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter,PetscBool,PetscInt*,PetscInt*);
287 PETSC_EXTERN PetscErrorCode VecScatterGetRemote_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
288 PETSC_EXTERN PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
289 PETSC_EXTERN PetscErrorCode VecScatterRestoreRemote_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
290 PETSC_EXTERN PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
291 
292 typedef struct _VecScatterOps *VecScatterOps;
293 struct _VecScatterOps {
294   PetscErrorCode (*begin)(VecScatter,Vec,Vec,InsertMode,ScatterMode);
295   PetscErrorCode (*end)(VecScatter,Vec,Vec,InsertMode,ScatterMode);
296   PetscErrorCode (*copy)(VecScatter,VecScatter);
297   PetscErrorCode (*destroy)(VecScatter);
298   PetscErrorCode (*setup)(VecScatter);
299   PetscErrorCode (*view)(VecScatter,PetscViewer);
300   PetscErrorCode (*viewfromoptions)(VecScatter,const char prefix[],const char name[]);
301   PetscErrorCode (*remap)(VecScatter,const PetscInt *,const PetscInt*);
302   PetscErrorCode (*getmerged)(VecScatter,PetscBool *);
303   PetscErrorCode (*getremotecount)(VecScatter,PetscBool,PetscInt*,PetscInt*);
304   PetscErrorCode (*getremote)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
305   PetscErrorCode (*getremoteordered)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
306   PetscErrorCode (*restoreremote)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
307   PetscErrorCode (*restoreremoteordered)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
308 };
309 
310 struct _p_VecScatter {
311   PETSCHEADER(struct _VecScatterOps);
312   PetscInt          to_n,from_n;
313   PetscBool         inuse;                /* prevents corruption from mixing two scatters */
314   PetscBool         beginandendtogether;  /* indicates that the scatter begin and end  function are called together, VecScatterEnd() is then treated as a nop */
315   PetscBool         packongpu;            /* For GPU vectors, pack needed entries on GPU, then copy packed data to CPU, then do MPI. Otherwise, we might copy a segment encompassing needed entries */
316   void              *fromdata,*todata;
317   void              *spptr;
318   PetscBool         is_duplicate;         /* IS has duplicate indices, would cause writing error in the case StoP of VecScatterEndMPI3Node */
319   Vec               to_v,from_v;          /* used in VecScatterCreate() */
320   IS                to_is,from_is;        /* used in VecScatterCreate() */
321   const PetscScalar *xdata;               /* vector data to read from */
322   PetscScalar       *ydata;               /* vector data to write to */
323 
324   void              *data;                /* implementation specific data */
325 };
326 
327 PETSC_INTERN PetscErrorCode VecScatterCreate_Seq(VecScatter);
328 PETSC_INTERN PetscErrorCode VecScatterCreate_MPI1(VecScatter);
329 PETSC_INTERN PetscErrorCode VecScatterCreate_MPI3(VecScatter);
330 PETSC_INTERN PetscErrorCode VecScatterCreate_MPI3Node(VecScatter);
331 PETSC_INTERN PetscErrorCode VecScatterCreate_SF(VecScatter);
332 
333 PETSC_INTERN PetscErrorCode VecScatterSetUp_vectype_private(VecScatter,PetscErrorCode (*)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter));
334 
335 PETSC_INTERN PetscErrorCode VecScatterDestroy_SGToSG(VecScatter);
336 PETSC_INTERN PetscErrorCode VecScatterDestroy_SGToSS(VecScatter);
337 PETSC_INTERN PetscErrorCode VecScatterDestroy_SSToSG(VecScatter);
338 PETSC_INTERN PetscErrorCode VecScatterDestroy_SSToSS(VecScatter);
339 PETSC_INTERN PetscErrorCode VecScatterBegin_SGToSG(VecScatter,Vec,Vec,InsertMode,ScatterMode);
340 PETSC_INTERN PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter,Vec,Vec,InsertMode,ScatterMode);
341 PETSC_INTERN PetscErrorCode VecScatterBegin_SGToSS(VecScatter,Vec,Vec,InsertMode,ScatterMode);
342 PETSC_INTERN PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter,Vec,Vec,InsertMode,ScatterMode);
343 PETSC_INTERN PetscErrorCode VecScatterBegin_SSToSG(VecScatter,Vec,Vec,InsertMode,ScatterMode);
344 PETSC_INTERN PetscErrorCode VecScatterView_SSToSG(VecScatter,PetscViewer);
345 PETSC_INTERN PetscErrorCode VecScatterBegin_SSToSS(VecScatter,Vec,Vec,InsertMode,ScatterMode);
346 PETSC_INTERN PetscErrorCode VecScatterCopy_SGToSG(VecScatter,VecScatter);
347 PETSC_INTERN PetscErrorCode VecScatterView_SGToSG(VecScatter,PetscViewer);
348 PETSC_INTERN PetscErrorCode VecScatterCopy_SGToSS(VecScatter,VecScatter);
349 PETSC_INTERN PetscErrorCode VecScatterView_SGToSS(VecScatter,PetscViewer);
350 PETSC_INTERN PetscErrorCode VecScatterCopy_SSToSS(VecScatter,VecScatter);
351 PETSC_INTERN PetscErrorCode VecScatterView_SSToSS(VecScatter,PetscViewer);
352 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt,VecScatter_Seq_General*,VecScatter_Seq_General*);
353 PETSC_INTERN PetscErrorCode GetInputISType_private(VecScatter,PetscInt,PetscInt,PetscInt*,IS*,PetscInt*,IS*);
354 
355 #define VEC_SEQ_ID 0
356 #define VEC_MPI_ID 1
357 #define IS_GENERAL_ID 0
358 #define IS_STRIDE_ID  1
359 #define IS_BLOCK_ID   2
360 
361 PETSC_EXTERN PetscMPIInt Petsc_Reduction_keyval;
362 
363 #endif
364