1
2 /*
3 This private file should not be included in users' code.
4 Defines the fields shared by all vector scatter implementations.
5
6 */
7
8 #ifndef __VECSCATTERIMPL_H
9 #define __VECSCATTERIMPL_H
10
11 #include <petscvec.h>
12 #include <petsc/private/petscimpl.h>
13 #include <petsc/private/vecimpl.h>
14 #include <petscviewer.h>
15
16 PETSC_EXTERN PetscBool VecScatterRegisterAllCalled;
17 PETSC_EXTERN PetscErrorCode VecScatterRegisterAll(void);
18
19 typedef enum { VEC_SCATTER_SEQ_GENERAL,VEC_SCATTER_SEQ_STRIDE,
20 VEC_SCATTER_MPI_GENERAL,VEC_SCATTER_MPI_TOALL,
21 VEC_SCATTER_MPI_TOONE} VecScatterFormat;
22
23 #define VECSCATTER_IMPL_HEADER \
24 VecScatterFormat format;
25
26 typedef struct {
27 VECSCATTER_IMPL_HEADER
28 } VecScatter_Common;
29
30 /* A plan to optimize individual memory copies (e.g., pack/unpack to/from send/recv buffers, or local scatters)
31 in VecScatter. Currently, a scatter to a neighbor processor may be transformed into 1) multiple (including one)
32 contiguous memory copies, e.g., memcpy; OR 2) one strided memory copies.
33
34 For brevity, we call them memory copies. In reality, the optimization applies not only to INSERT_VALUES, but also to ADD_VALUES, etc.
35 */
36 typedef struct {
37 PetscInt n; /* number of processors */
38 PetscBool *optimized; /* [n] is the scatter to procs[i] optimized? */
39 PetscInt *copy_offsets; /* [n+1] we number all copies. Scatter to procs[i] is optimized into copies in [copy_offsets[i],copy_offsets[i+1]) */
40 PetscInt *copy_starts; /* [*] j-th copy starts at index copy_starts[j] of the vector */
41 PetscInt *copy_lengths; /* [*] with length copy_lengths[j] in units of PetscScalar */
42 PetscInt *stride_first; /* [n] if optimized[i] is TRUE but copy_offsets[i] = copy_offsets[i+1], then scatter to procs[i] is strided. The first */
43 PetscInt *stride_step; /* [n] index is stride_first[i], step is stride_step[i], */
44 PetscInt *stride_n; /* [n] and total stride_n[i] steps */
45 PetscBool same_copy_starts; /* used only by VecScatterMemcpyPlanCreate_SGToSG(). If true, to's copy_starts[] values
46 are as same as from's. Used to quickly test if we are doing a self-copy */
47 } VecScatterMemcpyPlan;
48
49 /*
50 These scatters are for the purely local case.
51 */
52 typedef struct {
53 VECSCATTER_IMPL_HEADER
54 PetscInt n; /* number of components to scatter */
55 PetscInt *vslots; /* locations of components */
56 /*
57 The next three fields are used in parallel scatters, they contain
58 optimization in the special case that the "to" vector and the "from"
59 vector are the same, so one only needs copy components that truly
60 copies instead of just y[idx[i]] = y[jdx[i]] where idx[i] == jdx[i].
61 */
62 PetscBool nonmatching_computed;
63 PetscInt n_nonmatching; /* number of "from"s != "to"s */
64 PetscInt *slots_nonmatching; /* locations of "from"s != "to"s */
65 VecScatterMemcpyPlan memcpy_plan; /* a plan to optimize pack/unpack with memcpy */
66 } VecScatter_Seq_General;
67
68 typedef struct {
69 VECSCATTER_IMPL_HEADER
70 PetscInt n;
71 PetscInt first;
72 PetscInt step;
73 } VecScatter_Seq_Stride;
74
75 /*
76 This scatter is for a global vector copied (completely) to each processor (or all to one)
77 */
78 typedef struct {
79 VECSCATTER_IMPL_HEADER
80 PetscMPIInt *count; /* elements of vector on each processor */
81 PetscMPIInt *displx;
82 PetscScalar *work1;
83 PetscScalar *work2;
84 } VecScatter_MPI_ToAll;
85
86 /*
87 This is the general parallel scatter
88 */
89 typedef struct {
90 VECSCATTER_IMPL_HEADER
91 PetscInt n; /* number of processors to send/receive */
92 PetscInt *starts; /* starting point in indices and values for each proc*/
93 PetscInt *indices; /* list of all components sent or received */
94 PetscMPIInt *procs; /* processors we are communicating with in scatter */
95 VecScatterMemcpyPlan memcpy_plan; /* a plan to optimize pack/unpack/scatter */
96 MPI_Request *requests,*rev_requests;
97 PetscScalar *values; /* buffer for all sends or receives */
98 VecScatter_Seq_General local; /* any part that happens to be local */
99 MPI_Status *sstatus,*rstatus;
100 PetscInt bs;
101 PetscBool contiq;
102 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) /* these uses windows for communication only within each node */
103 PetscMPIInt msize,sharedcnt; /* total to entries that are going to processes with the same shared memory space */
104 PetscScalar *sharedspace; /* space each process puts data to be read from other processes; allocated by MPI */
105 PetscScalar **sharedspaces; /* [msize] space other processes put data to be read from this processes. */
106 PetscInt *sharedspacesoffset; /* [msize] offset into sharedspaces, that I will read from */
107 PetscInt *sharedspacestarts; /* [msize+1] for each shared memory partner this maps to the part of sharedspaceindices of that partner */
108 PetscInt *sharedspaceindices; /* [] for each shared memory partner contains indices where values are to be copied to */
109 MPI_Win sharedwin; /* Window that owns sharedspace */
110 PetscInt notdone; /* used by VecScatterEndMPI3Node() */
111 #endif
112 } VecScatter_MPI_General;
113
114 /* Routines to create, copy, destroy or execute a memcpy plan */
115
116 /* Create a memcpy plan based on a list of indices */
117 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_Index(PetscInt,const PetscInt*,const PetscInt*,PetscInt,VecScatterMemcpyPlan*);
118 /* Create a memcpy plan for a SG (sequential general vector) to SG scatter */
119 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt,VecScatter_Seq_General*,VecScatter_Seq_General*);
120 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCopy(const VecScatterMemcpyPlan*,VecScatterMemcpyPlan*);
121 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanDestroy(VecScatterMemcpyPlan*);
122 /* Create/copy/destroy a memcpy plan for a P (parallel vector) to P scatter */
123 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General*,VecScatter_MPI_General*);
124 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General*,const VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter_MPI_General*);
125 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General*,VecScatter_MPI_General*);
126
127 /* Pack data from x to y according to the i-th memcpy plan in xplan */
VecScatterMemcpyPlanExecute_Pack(PetscInt i,const PetscScalar * PETSC_RESTRICT x,const VecScatterMemcpyPlan * xplan,PetscScalar * PETSC_RESTRICT y,InsertMode addv,PetscInt bs)128 PETSC_STATIC_INLINE PetscErrorCode VecScatterMemcpyPlanExecute_Pack(PetscInt i,const PetscScalar *PETSC_RESTRICT x,const VecScatterMemcpyPlan *xplan,PetscScalar *PETSC_RESTRICT y,InsertMode addv,PetscInt bs)
129 {
130 PetscErrorCode ierr;
131 PetscInt j,k,len,step = 0,n = 0;
132 const PetscScalar *xv = NULL;
133 PetscBool strided;
134
135 PetscFunctionBegin;
136 strided = (xplan->copy_offsets[i] == xplan->copy_offsets[i+1]) ? PETSC_TRUE : PETSC_FALSE;
137 if (strided) {
138 xv = x+xplan->stride_first[i];
139 step = xplan->stride_step[i];
140 n = xplan->stride_n[i];
141 }
142
143 if (addv == INSERT_VALUES) {
144 if (strided) {
145 for (j=0; j<n; j++)
146 for (k=0; k<bs; k++) y[j*bs+k] = xv[j*step+k];
147 } else {
148 for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
149 len = xplan->copy_lengths[j];
150 ierr = PetscArraycpy(y,x+xplan->copy_starts[j],len);CHKERRQ(ierr);
151 y += len;
152 }
153 }
154 } else if (addv == ADD_VALUES) {
155 if (strided) {
156 for (j=0; j<n; j++)
157 for (k=0; k<bs; k++) y[j*bs+k] += xv[j*step+k];
158 } else {
159 for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
160 len = xplan->copy_lengths[j];
161 xv = x+xplan->copy_starts[j];
162 for (k=0; k<len; k++) y[k] += xv[k];
163 y += len;
164 }
165 }
166 }
167 #if !defined(PETSC_USE_COMPLEX)
168 else if (addv == MAX_VALUES) {
169 if (strided) {
170 for (j=0; j<n; j++)
171 for (k=0; k<bs; k++) y[j*bs+k] = PetscMax(y[j*bs+k],xv[j*step+k]);
172 } else {
173 for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
174 len = xplan->copy_lengths[j];
175 xv = x+xplan->copy_starts[j];
176 for (k=0; k<len; k++) y[k] = PetscMax(y[k],xv[k]);
177 y += len;
178 }
179 }
180 }
181 #endif
182 else {
183 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d in packing",addv);
184 }
185 PetscFunctionReturn(0);
186 }
187
188 /* Unpack data from x to y according to the i-th memcpy plan in yplan */
VecScatterMemcpyPlanExecute_Unpack(PetscInt i,const PetscScalar * PETSC_RESTRICT x,PetscScalar * PETSC_RESTRICT y,const VecScatterMemcpyPlan * yplan,InsertMode addv,PetscInt bs)189 PETSC_STATIC_INLINE PetscErrorCode VecScatterMemcpyPlanExecute_Unpack(PetscInt i,const PetscScalar *PETSC_RESTRICT x,PetscScalar *PETSC_RESTRICT y,const VecScatterMemcpyPlan *yplan,InsertMode addv,PetscInt bs)
190 {
191 PetscErrorCode ierr;
192 PetscInt j,k,len,step = 0,n = 0;
193 PetscScalar *yv = NULL;
194 PetscBool strided;
195
196 PetscFunctionBegin;
197 strided = (yplan->copy_offsets[i] == yplan->copy_offsets[i+1]) ? PETSC_TRUE : PETSC_FALSE;
198 if (strided) {
199 yv = y+yplan->stride_first[i];
200 step = yplan->stride_step[i];
201 n = yplan->stride_n[i];
202 }
203
204 if (addv == INSERT_VALUES) {
205 if (strided) {
206 for (j=0; j<n; j++)
207 for (k=0; k<bs; k++) yv[j*step+k] = x[j*bs+k];
208 } else {
209 for (j=yplan->copy_offsets[i]; j<yplan->copy_offsets[i+1]; j++) {
210 len = yplan->copy_lengths[j];
211 ierr = PetscArraycpy(y+yplan->copy_starts[j],x,len);CHKERRQ(ierr);
212 x += len;
213 }
214 }
215 } else if (addv == ADD_VALUES) {
216 if (strided) {
217 for (j=0; j<n; j++)
218 for (k=0; k<bs; k++) yv[j*step+k] += x[j*bs+k];
219 } else {
220 for (j=yplan->copy_offsets[i]; j<yplan->copy_offsets[i+1]; j++) {
221 len = yplan->copy_lengths[j];
222 yv = y+yplan->copy_starts[j];
223 for (k=0; k<len; k++) yv[k] += x[k];
224 x += len;
225 }
226 }
227 }
228 #if !defined(PETSC_USE_COMPLEX)
229 else if (addv == MAX_VALUES) {
230 if (strided) {
231 for (j=0; j<n; j++)
232 for (k=0; k<bs; k++) yv[j*step+k] = PetscMax(yv[j*step+k],x[j*bs+k]);
233 } else {
234 for (j=yplan->copy_offsets[i]; j<yplan->copy_offsets[i+1]; j++) {
235 len = yplan->copy_lengths[j];
236 yv = y+yplan->copy_starts[j];
237 for (k=0; k<len; k++) yv[k] = PetscMax(yv[k],x[k]);
238 x += len;
239 }
240 }
241 }
242 #endif
243 else {
244 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d in unpacking",addv);
245 }
246 PetscFunctionReturn(0);
247 }
248
249 /* Scatter data from piece-wise contiguous x to (conforming) piece-wise contiguous y according to the i-th memcpy plan in xplan and yplan respectively */
VecScatterMemcpyPlanExecute_Scatter(PetscInt i,const PetscScalar * PETSC_RESTRICT x,const VecScatterMemcpyPlan * xplan,PetscScalar * PETSC_RESTRICT y,const VecScatterMemcpyPlan * yplan,InsertMode addv)250 PETSC_STATIC_INLINE PetscErrorCode VecScatterMemcpyPlanExecute_Scatter(PetscInt i,const PetscScalar *PETSC_RESTRICT x,const VecScatterMemcpyPlan *xplan,PetscScalar *PETSC_RESTRICT y,const VecScatterMemcpyPlan *yplan,InsertMode addv)
251 {
252 PetscErrorCode ierr;
253 PetscInt j,k,len;
254 const PetscScalar *xv;
255 PetscScalar *yv;
256
257 PetscFunctionBegin;
258 if (addv == INSERT_VALUES) {
259 for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
260 ierr = PetscArraycpy(y+yplan->copy_starts[j],x+xplan->copy_starts[j],xplan->copy_lengths[j]);CHKERRQ(ierr);
261 }
262 } else if (addv == ADD_VALUES) {
263 for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
264 len = xplan->copy_lengths[j];
265 xv = x+xplan->copy_starts[j];
266 yv = y+yplan->copy_starts[j];
267 for (k=0; k<len; k++) yv[k] += xv[k];
268 }
269 }
270 #if !defined(PETSC_USE_COMPLEX)
271 else if (addv == MAX_VALUES) {
272 for (j=xplan->copy_offsets[i]; j<xplan->copy_offsets[i+1]; j++) {
273 len = xplan->copy_lengths[j];
274 xv = x+xplan->copy_starts[j];
275 yv = y+yplan->copy_starts[j];
276 for (k=0; k<len; k++) yv[k] = PetscMax(yv[k],xv[k]);
277 }
278 }
279 #endif
280 else {
281 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d in scattering",addv);
282 }
283 PetscFunctionReturn(0);
284 }
285
286 PETSC_EXTERN PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter,PetscBool,PetscInt*,PetscInt*);
287 PETSC_EXTERN PetscErrorCode VecScatterGetRemote_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
288 PETSC_EXTERN PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
289 PETSC_EXTERN PetscErrorCode VecScatterRestoreRemote_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
290 PETSC_EXTERN PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
291
292 typedef struct _VecScatterOps *VecScatterOps;
293 struct _VecScatterOps {
294 PetscErrorCode (*begin)(VecScatter,Vec,Vec,InsertMode,ScatterMode);
295 PetscErrorCode (*end)(VecScatter,Vec,Vec,InsertMode,ScatterMode);
296 PetscErrorCode (*copy)(VecScatter,VecScatter);
297 PetscErrorCode (*destroy)(VecScatter);
298 PetscErrorCode (*setup)(VecScatter);
299 PetscErrorCode (*view)(VecScatter,PetscViewer);
300 PetscErrorCode (*viewfromoptions)(VecScatter,const char prefix[],const char name[]);
301 PetscErrorCode (*remap)(VecScatter,const PetscInt *,const PetscInt*);
302 PetscErrorCode (*getmerged)(VecScatter,PetscBool *);
303 PetscErrorCode (*getremotecount)(VecScatter,PetscBool,PetscInt*,PetscInt*);
304 PetscErrorCode (*getremote)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
305 PetscErrorCode (*getremoteordered)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
306 PetscErrorCode (*restoreremote)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
307 PetscErrorCode (*restoreremoteordered)(VecScatter,PetscBool,PetscInt*,const PetscInt**,const PetscInt**,const PetscMPIInt**,PetscInt*);
308 };
309
310 struct _p_VecScatter {
311 PETSCHEADER(struct _VecScatterOps);
312 PetscInt to_n,from_n;
313 PetscBool inuse; /* prevents corruption from mixing two scatters */
314 PetscBool beginandendtogether; /* indicates that the scatter begin and end function are called together, VecScatterEnd() is then treated as a nop */
315 PetscBool packongpu; /* For GPU vectors, pack needed entries on GPU, then copy packed data to CPU, then do MPI. Otherwise, we might copy a segment encompassing needed entries */
316 void *fromdata,*todata;
317 void *spptr;
318 PetscBool is_duplicate; /* IS has duplicate indices, would cause writing error in the case StoP of VecScatterEndMPI3Node */
319 Vec to_v,from_v; /* used in VecScatterCreate() */
320 IS to_is,from_is; /* used in VecScatterCreate() */
321 const PetscScalar *xdata; /* vector data to read from */
322 PetscScalar *ydata; /* vector data to write to */
323
324 void *data; /* implementation specific data */
325 };
326
327 PETSC_INTERN PetscErrorCode VecScatterCreate_Seq(VecScatter);
328 PETSC_INTERN PetscErrorCode VecScatterCreate_MPI1(VecScatter);
329 PETSC_INTERN PetscErrorCode VecScatterCreate_MPI3(VecScatter);
330 PETSC_INTERN PetscErrorCode VecScatterCreate_MPI3Node(VecScatter);
331 PETSC_INTERN PetscErrorCode VecScatterCreate_SF(VecScatter);
332
333 PETSC_INTERN PetscErrorCode VecScatterSetUp_vectype_private(VecScatter,PetscErrorCode (*)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter),PetscErrorCode (*)(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter));
334
335 PETSC_INTERN PetscErrorCode VecScatterDestroy_SGToSG(VecScatter);
336 PETSC_INTERN PetscErrorCode VecScatterDestroy_SGToSS(VecScatter);
337 PETSC_INTERN PetscErrorCode VecScatterDestroy_SSToSG(VecScatter);
338 PETSC_INTERN PetscErrorCode VecScatterDestroy_SSToSS(VecScatter);
339 PETSC_INTERN PetscErrorCode VecScatterBegin_SGToSG(VecScatter,Vec,Vec,InsertMode,ScatterMode);
340 PETSC_INTERN PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter,Vec,Vec,InsertMode,ScatterMode);
341 PETSC_INTERN PetscErrorCode VecScatterBegin_SGToSS(VecScatter,Vec,Vec,InsertMode,ScatterMode);
342 PETSC_INTERN PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter,Vec,Vec,InsertMode,ScatterMode);
343 PETSC_INTERN PetscErrorCode VecScatterBegin_SSToSG(VecScatter,Vec,Vec,InsertMode,ScatterMode);
344 PETSC_INTERN PetscErrorCode VecScatterView_SSToSG(VecScatter,PetscViewer);
345 PETSC_INTERN PetscErrorCode VecScatterBegin_SSToSS(VecScatter,Vec,Vec,InsertMode,ScatterMode);
346 PETSC_INTERN PetscErrorCode VecScatterCopy_SGToSG(VecScatter,VecScatter);
347 PETSC_INTERN PetscErrorCode VecScatterView_SGToSG(VecScatter,PetscViewer);
348 PETSC_INTERN PetscErrorCode VecScatterCopy_SGToSS(VecScatter,VecScatter);
349 PETSC_INTERN PetscErrorCode VecScatterView_SGToSS(VecScatter,PetscViewer);
350 PETSC_INTERN PetscErrorCode VecScatterCopy_SSToSS(VecScatter,VecScatter);
351 PETSC_INTERN PetscErrorCode VecScatterView_SSToSS(VecScatter,PetscViewer);
352 PETSC_INTERN PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt,VecScatter_Seq_General*,VecScatter_Seq_General*);
353 PETSC_INTERN PetscErrorCode GetInputISType_private(VecScatter,PetscInt,PetscInt,PetscInt*,IS*,PetscInt*,IS*);
354
355 #define VEC_SEQ_ID 0
356 #define VEC_MPI_ID 1
357 #define IS_GENERAL_ID 0
358 #define IS_STRIDE_ID 1
359 #define IS_BLOCK_ID 2
360
361 PETSC_EXTERN PetscMPIInt Petsc_Reduction_keyval;
362
363 #endif
364