1
2 /*
3 Defines parallel vector scatters using MPI1.
4 */
5
6 #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7 #include <../src/vec/vec/impls/mpi/pvecimpl.h>
8 #include <petsc/private/vecscatterimpl.h>
9
10
VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)11 PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
12 {
13 VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
14 VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
15 PetscErrorCode ierr;
16 PetscInt i;
17 PetscMPIInt rank;
18 PetscViewerFormat format;
19 PetscBool iascii;
20
21 PetscFunctionBegin;
22 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
23 if (iascii) {
24 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);CHKERRQ(ierr);
25 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
26 if (format == PETSC_VIEWER_ASCII_INFO) {
27 PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
28
29 ierr = MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
30 ierr = MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
31 itmp = to->starts[to->n+1];
32 ierr = MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
33 itmp = from->starts[from->n+1];
34 ierr = MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
35 ierr = MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
36
37 ierr = PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");CHKERRQ(ierr);
38 ierr = PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);CHKERRQ(ierr);
39 ierr = PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);CHKERRQ(ierr);
40 ierr = PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
41 ierr = PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
42 ierr = PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
43
44 } else {
45 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
46 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);CHKERRQ(ierr);
47 if (to->n) {
48 for (i=0; i<to->n; i++) {
49 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);CHKERRQ(ierr);
50 if (to->memcpy_plan.optimized[i]) { ierr = PetscViewerASCIISynchronizedPrintf(viewer," is optimized with %D memcpy's in Pack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]);CHKERRQ(ierr); }
51 }
52 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");CHKERRQ(ierr);
53 for (i=0; i<to->starts[to->n]; i++) {
54 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);CHKERRQ(ierr);
55 }
56 }
57
58 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);CHKERRQ(ierr);
59 if (from->n) {
60 for (i=0; i<from->n; i++) {
61 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);CHKERRQ(ierr);
62 if (from->memcpy_plan.optimized[i]) { ierr = PetscViewerASCIISynchronizedPrintf(viewer," is optimized with %D memcpy's in Unpack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]);CHKERRQ(ierr); }
63 }
64
65 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");CHKERRQ(ierr);
66 for (i=0; i<from->starts[from->n]; i++) {
67 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);CHKERRQ(ierr);
68 }
69 }
70 if (to->local.n) {
71 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);CHKERRQ(ierr);
72 if (to->local.memcpy_plan.optimized[0]) {
73 ierr = PetscViewerASCIIPrintf(viewer,"Local part of the scatter is made of %D copies\n",to->local.memcpy_plan.copy_offsets[1]);CHKERRQ(ierr);
74 }
75 for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
76 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);CHKERRQ(ierr);
77 }
78 }
79
80 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
81 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
82 }
83 }
84 PetscFunctionReturn(0);
85 }
86
87 /* -------------------------------------------------------------------------------------*/
VecScatterDestroy_PtoP_MPI1(VecScatter ctx)88 PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
89 {
90 VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
91 VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
92 PetscErrorCode ierr;
93 PetscInt i;
94
95 PetscFunctionBegin;
96 /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
97 if (to->requests) {
98 for (i=0; i<to->n; i++) {
99 ierr = MPI_Request_free(to->requests + i);CHKERRQ(ierr);
100 }
101 }
102 if (to->rev_requests) {
103 for (i=0; i<to->n; i++) {
104 ierr = MPI_Request_free(to->rev_requests + i);CHKERRQ(ierr);
105 }
106 }
107 if (from->requests) {
108 for (i=0; i<from->n; i++) {
109 ierr = MPI_Request_free(from->requests + i);CHKERRQ(ierr);
110 }
111 }
112
113 if (from->rev_requests) {
114 for (i=0; i<from->n; i++) {
115 ierr = MPI_Request_free(from->rev_requests + i);CHKERRQ(ierr);
116 }
117 }
118
119 ierr = PetscFree(to->local.vslots);CHKERRQ(ierr);
120 ierr = PetscFree(from->local.vslots);CHKERRQ(ierr);
121 ierr = PetscFree(to->local.slots_nonmatching);CHKERRQ(ierr);
122 ierr = PetscFree(from->local.slots_nonmatching);CHKERRQ(ierr);
123 ierr = PetscFree(to->rev_requests);CHKERRQ(ierr);
124 ierr = PetscFree(from->rev_requests);CHKERRQ(ierr);
125 ierr = PetscFree(to->requests);CHKERRQ(ierr);
126 ierr = PetscFree(from->requests);CHKERRQ(ierr);
127 ierr = PetscFree4(to->values,to->indices,to->starts,to->procs);CHKERRQ(ierr);
128 ierr = PetscFree2(to->sstatus,to->rstatus);CHKERRQ(ierr);
129 ierr = PetscFree4(from->values,from->indices,from->starts,from->procs);CHKERRQ(ierr);
130 ierr = VecScatterMemcpyPlanDestroy_PtoP(to,from);CHKERRQ(ierr);
131 ierr = PetscFree(from);CHKERRQ(ierr);
132 ierr = PetscFree(to);CHKERRQ(ierr);
133 PetscFunctionReturn(0);
134 }
135
136 /* --------------------------------------------------------------------------------------*/
137
VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)138 PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
139 {
140 VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
141 VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
142 PetscErrorCode ierr;
143 PetscInt ny,bs = in_from->bs;
144
145 PetscFunctionBegin;
146 out->ops->begin = in->ops->begin;
147 out->ops->end = in->ops->end;
148 out->ops->copy = in->ops->copy;
149 out->ops->destroy = in->ops->destroy;
150 out->ops->view = in->ops->view;
151
152 /* allocate entire send scatter context */
153 ierr = PetscNewLog(out,&out_to);CHKERRQ(ierr);
154 ierr = PetscNewLog(out,&out_from);CHKERRQ(ierr);
155
156 ny = in_to->starts[in_to->n];
157 out_to->n = in_to->n;
158 out_to->format = in_to->format;
159
160 ierr = PetscMalloc1(out_to->n,&out_to->requests);CHKERRQ(ierr);
161 ierr = PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);CHKERRQ(ierr);
162 ierr = PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);CHKERRQ(ierr);
163 ierr = PetscArraycpy(out_to->indices,in_to->indices,ny);CHKERRQ(ierr);
164 ierr = PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);CHKERRQ(ierr);
165 ierr = PetscArraycpy(out_to->procs,in_to->procs,out_to->n);CHKERRQ(ierr);
166
167 out->todata = (void*)out_to;
168 out_to->local.n = in_to->local.n;
169 out_to->local.nonmatching_computed = PETSC_FALSE;
170 out_to->local.n_nonmatching = 0;
171 out_to->local.slots_nonmatching = NULL;
172 if (in_to->local.n) {
173 ierr = PetscMalloc1(in_to->local.n,&out_to->local.vslots);CHKERRQ(ierr);
174 ierr = PetscMalloc1(in_from->local.n,&out_from->local.vslots);CHKERRQ(ierr);
175 ierr = PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);CHKERRQ(ierr);
176 ierr = PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);CHKERRQ(ierr);
177 } else {
178 out_to->local.vslots = NULL;
179 out_from->local.vslots = NULL;
180 }
181
182 /* allocate entire receive context */
183 out_from->format = in_from->format;
184 ny = in_from->starts[in_from->n];
185 out_from->n = in_from->n;
186
187 ierr = PetscMalloc1(out_from->n,&out_from->requests);CHKERRQ(ierr);
188 ierr = PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);CHKERRQ(ierr);
189 ierr = PetscArraycpy(out_from->indices,in_from->indices,ny);CHKERRQ(ierr);
190 ierr = PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);CHKERRQ(ierr);
191 ierr = PetscArraycpy(out_from->procs,in_from->procs,out_from->n);CHKERRQ(ierr);
192
193 out->fromdata = (void*)out_from;
194 out_from->local.n = in_from->local.n;
195 out_from->local.nonmatching_computed = PETSC_FALSE;
196 out_from->local.n_nonmatching = 0;
197 out_from->local.slots_nonmatching = NULL;
198
199 /*
200 set up the request arrays for use with isend_init() and irecv_init()
201 */
202 {
203 PetscMPIInt tag;
204 MPI_Comm comm;
205 PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
206 PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
207 PetscInt i;
208 MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
209 MPI_Request *rev_swaits,*rev_rwaits;
210 PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
211
212 ierr = PetscMalloc1(in_to->n,&out_to->rev_requests);CHKERRQ(ierr);
213 ierr = PetscMalloc1(in_from->n,&out_from->rev_requests);CHKERRQ(ierr);
214
215 rev_rwaits = out_to->rev_requests;
216 rev_swaits = out_from->rev_requests;
217
218 out_from->bs = out_to->bs = bs;
219 tag = ((PetscObject)out)->tag;
220 ierr = PetscObjectGetComm((PetscObject)out,&comm);CHKERRQ(ierr);
221
222 /* Register the receives that you will use later (sends for scatter reverse) */
223 for (i=0; i<out_from->n; i++) {
224 ierr = MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
225 ierr = MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);CHKERRQ(ierr);
226 }
227 for (i=0; i<out_to->n; i++) {
228 ierr = MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
229 /* Register receives for scatter reverse */
230 ierr = MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);CHKERRQ(ierr);
231 }
232 }
233
234 ierr = VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);CHKERRQ(ierr);
235 PetscFunctionReturn(0);
236 }
237
VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)238 PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
239 {
240 VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
241 VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
242 PetscErrorCode ierr;
243 PetscInt ny,bs = in_from->bs;
244 PetscMPIInt size;
245
246 PetscFunctionBegin;
247 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);CHKERRQ(ierr);
248
249 out->ops->begin = in->ops->begin;
250 out->ops->end = in->ops->end;
251 out->ops->copy = in->ops->copy;
252 out->ops->destroy = in->ops->destroy;
253 out->ops->view = in->ops->view;
254
255 /* allocate entire send scatter context */
256 ierr = PetscNewLog(out,&out_to);CHKERRQ(ierr);
257 ierr = PetscNewLog(out,&out_from);CHKERRQ(ierr);
258
259 ny = in_to->starts[in_to->n];
260 out_to->n = in_to->n;
261 out_to->format = in_to->format;
262
263 ierr = PetscMalloc1(out_to->n,&out_to->requests);CHKERRQ(ierr);
264 ierr = PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);CHKERRQ(ierr);
265 ierr = PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);CHKERRQ(ierr);
266 ierr = PetscArraycpy(out_to->indices,in_to->indices,ny);CHKERRQ(ierr);
267 ierr = PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);CHKERRQ(ierr);
268 ierr = PetscArraycpy(out_to->procs,in_to->procs,out_to->n);CHKERRQ(ierr);
269
270 out->todata = (void*)out_to;
271 out_to->local.n = in_to->local.n;
272 out_to->local.nonmatching_computed = PETSC_FALSE;
273 out_to->local.n_nonmatching = 0;
274 out_to->local.slots_nonmatching = NULL;
275 if (in_to->local.n) {
276 ierr = PetscMalloc1(in_to->local.n,&out_to->local.vslots);CHKERRQ(ierr);
277 ierr = PetscMalloc1(in_from->local.n,&out_from->local.vslots);CHKERRQ(ierr);
278 ierr = PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);CHKERRQ(ierr);
279 ierr = PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);CHKERRQ(ierr);
280 } else {
281 out_to->local.vslots = NULL;
282 out_from->local.vslots = NULL;
283 }
284
285 /* allocate entire receive context */
286 out_from->format = in_from->format;
287 ny = in_from->starts[in_from->n];
288 out_from->n = in_from->n;
289
290 ierr = PetscMalloc1(out_from->n,&out_from->requests);CHKERRQ(ierr);
291 ierr = PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);CHKERRQ(ierr);
292 ierr = PetscArraycpy(out_from->indices,in_from->indices,ny);CHKERRQ(ierr);
293 ierr = PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);CHKERRQ(ierr);
294 ierr = PetscArraycpy(out_from->procs,in_from->procs,out_from->n);CHKERRQ(ierr);
295
296 out->fromdata = (void*)out_from;
297 out_from->local.n = in_from->local.n;
298 out_from->local.nonmatching_computed = PETSC_FALSE;
299 out_from->local.n_nonmatching = 0;
300 out_from->local.slots_nonmatching = NULL;
301
302 ierr = VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);CHKERRQ(ierr);
303 PetscFunctionReturn(0);
304 }
305
306 /* Optimize a parallel vector to parallel vector vecscatter with memory copies */
VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General * to,VecScatter_MPI_General * from)307 PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
308 {
309 PetscErrorCode ierr;
310
311 PetscFunctionBegin;
312 ierr = VecScatterMemcpyPlanCreate_Index(to->n,to->starts,to->indices,to->bs,&to->memcpy_plan);CHKERRQ(ierr);
313 ierr = VecScatterMemcpyPlanCreate_Index(from->n,from->starts,from->indices,to->bs,&from->memcpy_plan);CHKERRQ(ierr);
314 ierr = VecScatterMemcpyPlanCreate_SGToSG(to->bs,&to->local,&from->local);CHKERRQ(ierr);
315 PetscFunctionReturn(0);
316 }
317
VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General * in_to,const VecScatter_MPI_General * in_from,VecScatter_MPI_General * out_to,VecScatter_MPI_General * out_from)318 PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General *in_to,const VecScatter_MPI_General *in_from,VecScatter_MPI_General *out_to,VecScatter_MPI_General *out_from)
319 {
320 PetscErrorCode ierr;
321
322 PetscFunctionBegin;
323 ierr = VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);CHKERRQ(ierr);
324 ierr = VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);CHKERRQ(ierr);
325 ierr = VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);CHKERRQ(ierr);
326 ierr = VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);CHKERRQ(ierr);
327 PetscFunctionReturn(0);
328 }
329
VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General * to,VecScatter_MPI_General * from)330 PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
331 {
332 PetscErrorCode ierr;
333
334 PetscFunctionBegin;
335 ierr = VecScatterMemcpyPlanDestroy(&to->memcpy_plan);CHKERRQ(ierr);
336 ierr = VecScatterMemcpyPlanDestroy(&from->memcpy_plan);CHKERRQ(ierr);
337 ierr = VecScatterMemcpyPlanDestroy(&to->local.memcpy_plan);CHKERRQ(ierr);
338 ierr = VecScatterMemcpyPlanDestroy(&from->local.memcpy_plan);CHKERRQ(ierr);
339 PetscFunctionReturn(0);
340 }
341
342 /* --------------------------------------------------------------------------------------------------
343 Packs and unpacks the message data into send or from receive buffers.
344
345 These could be generated automatically.
346
347 Fortran kernels etc. could be used.
348 */
Pack_MPI1_1(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)349 PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
350 {
351 PetscInt i;
352 for (i=0; i<n; i++) y[i] = x[indicesx[i]];
353 }
354
UnPack_MPI1_1(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)355 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
356 {
357 PetscInt i;
358
359 PetscFunctionBegin;
360 switch (addv) {
361 case INSERT_VALUES:
362 case INSERT_ALL_VALUES:
363 for (i=0; i<n; i++) y[indicesy[i]] = x[i];
364 break;
365 case ADD_VALUES:
366 case ADD_ALL_VALUES:
367 for (i=0; i<n; i++) y[indicesy[i]] += x[i];
368 break;
369 #if !defined(PETSC_USE_COMPLEX)
370 case MAX_VALUES:
371 for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
372 #else
373 case MAX_VALUES:
374 #endif
375 case NOT_SET_VALUES:
376 break;
377 default:
378 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
379 }
380 PetscFunctionReturn(0);
381 }
382
Scatter_MPI1_1(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)383 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
384 {
385 PetscInt i;
386
387 PetscFunctionBegin;
388 switch (addv) {
389 case INSERT_VALUES:
390 case INSERT_ALL_VALUES:
391 for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
392 break;
393 case ADD_VALUES:
394 case ADD_ALL_VALUES:
395 for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
396 break;
397 #if !defined(PETSC_USE_COMPLEX)
398 case MAX_VALUES:
399 for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
400 #else
401 case MAX_VALUES:
402 #endif
403 case NOT_SET_VALUES:
404 break;
405 default:
406 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
407 }
408 PetscFunctionReturn(0);
409 }
410
411 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_2(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)412 PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
413 {
414 PetscInt i,idx;
415
416 for (i=0; i<n; i++) {
417 idx = *indicesx++;
418 y[0] = x[idx];
419 y[1] = x[idx+1];
420 y += 2;
421 }
422 }
423
UnPack_MPI1_2(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)424 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
425 {
426 PetscInt i,idy;
427
428 PetscFunctionBegin;
429 switch (addv) {
430 case INSERT_VALUES:
431 case INSERT_ALL_VALUES:
432 for (i=0; i<n; i++) {
433 idy = *indicesy++;
434 y[idy] = x[0];
435 y[idy+1] = x[1];
436 x += 2;
437 }
438 break;
439 case ADD_VALUES:
440 case ADD_ALL_VALUES:
441 for (i=0; i<n; i++) {
442 idy = *indicesy++;
443 y[idy] += x[0];
444 y[idy+1] += x[1];
445 x += 2;
446 }
447 break;
448 #if !defined(PETSC_USE_COMPLEX)
449 case MAX_VALUES:
450 for (i=0; i<n; i++) {
451 idy = *indicesy++;
452 y[idy] = PetscMax(y[idy],x[0]);
453 y[idy+1] = PetscMax(y[idy+1],x[1]);
454 x += 2;
455 }
456 #else
457 case MAX_VALUES:
458 #endif
459 case NOT_SET_VALUES:
460 break;
461 default:
462 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
463 }
464 PetscFunctionReturn(0);
465 }
466
Scatter_MPI1_2(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)467 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
468 {
469 PetscInt i,idx,idy;
470
471 PetscFunctionBegin;
472 switch (addv) {
473 case INSERT_VALUES:
474 case INSERT_ALL_VALUES:
475 for (i=0; i<n; i++) {
476 idx = *indicesx++;
477 idy = *indicesy++;
478 y[idy] = x[idx];
479 y[idy+1] = x[idx+1];
480 }
481 break;
482 case ADD_VALUES:
483 case ADD_ALL_VALUES:
484 for (i=0; i<n; i++) {
485 idx = *indicesx++;
486 idy = *indicesy++;
487 y[idy] += x[idx];
488 y[idy+1] += x[idx+1];
489 }
490 break;
491 #if !defined(PETSC_USE_COMPLEX)
492 case MAX_VALUES:
493 for (i=0; i<n; i++) {
494 idx = *indicesx++;
495 idy = *indicesy++;
496 y[idy] = PetscMax(y[idy],x[idx]);
497 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
498 }
499 #else
500 case MAX_VALUES:
501 #endif
502 case NOT_SET_VALUES:
503 break;
504 default:
505 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
506 }
507 PetscFunctionReturn(0);
508 }
509 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_3(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)510 PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
511 {
512 PetscInt i,idx;
513
514 for (i=0; i<n; i++) {
515 idx = *indicesx++;
516 y[0] = x[idx];
517 y[1] = x[idx+1];
518 y[2] = x[idx+2];
519 y += 3;
520 }
521 }
UnPack_MPI1_3(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)522 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
523 {
524 PetscInt i,idy;
525
526 PetscFunctionBegin;
527 switch (addv) {
528 case INSERT_VALUES:
529 case INSERT_ALL_VALUES:
530 for (i=0; i<n; i++) {
531 idy = *indicesy++;
532 y[idy] = x[0];
533 y[idy+1] = x[1];
534 y[idy+2] = x[2];
535 x += 3;
536 }
537 break;
538 case ADD_VALUES:
539 case ADD_ALL_VALUES:
540 for (i=0; i<n; i++) {
541 idy = *indicesy++;
542 y[idy] += x[0];
543 y[idy+1] += x[1];
544 y[idy+2] += x[2];
545 x += 3;
546 }
547 break;
548 #if !defined(PETSC_USE_COMPLEX)
549 case MAX_VALUES:
550 for (i=0; i<n; i++) {
551 idy = *indicesy++;
552 y[idy] = PetscMax(y[idy],x[0]);
553 y[idy+1] = PetscMax(y[idy+1],x[1]);
554 y[idy+2] = PetscMax(y[idy+2],x[2]);
555 x += 3;
556 }
557 #else
558 case MAX_VALUES:
559 #endif
560 case NOT_SET_VALUES:
561 break;
562 default:
563 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
564 }
565 PetscFunctionReturn(0);
566 }
567
Scatter_MPI1_3(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)568 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
569 {
570 PetscInt i,idx,idy;
571
572 PetscFunctionBegin;
573 switch (addv) {
574 case INSERT_VALUES:
575 case INSERT_ALL_VALUES:
576 for (i=0; i<n; i++) {
577 idx = *indicesx++;
578 idy = *indicesy++;
579 y[idy] = x[idx];
580 y[idy+1] = x[idx+1];
581 y[idy+2] = x[idx+2];
582 }
583 break;
584 case ADD_VALUES:
585 case ADD_ALL_VALUES:
586 for (i=0; i<n; i++) {
587 idx = *indicesx++;
588 idy = *indicesy++;
589 y[idy] += x[idx];
590 y[idy+1] += x[idx+1];
591 y[idy+2] += x[idx+2];
592 }
593 break;
594 #if !defined(PETSC_USE_COMPLEX)
595 case MAX_VALUES:
596 for (i=0; i<n; i++) {
597 idx = *indicesx++;
598 idy = *indicesy++;
599 y[idy] = PetscMax(y[idy],x[idx]);
600 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
601 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
602 }
603 #else
604 case MAX_VALUES:
605 #endif
606 case NOT_SET_VALUES:
607 break;
608 default:
609 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
610 }
611 PetscFunctionReturn(0);
612 }
613 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_4(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)614 PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
615 {
616 PetscInt i,idx;
617
618 for (i=0; i<n; i++) {
619 idx = *indicesx++;
620 y[0] = x[idx];
621 y[1] = x[idx+1];
622 y[2] = x[idx+2];
623 y[3] = x[idx+3];
624 y += 4;
625 }
626 }
UnPack_MPI1_4(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)627 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
628 {
629 PetscInt i,idy;
630
631 PetscFunctionBegin;
632 switch (addv) {
633 case INSERT_VALUES:
634 case INSERT_ALL_VALUES:
635 for (i=0; i<n; i++) {
636 idy = *indicesy++;
637 y[idy] = x[0];
638 y[idy+1] = x[1];
639 y[idy+2] = x[2];
640 y[idy+3] = x[3];
641 x += 4;
642 }
643 break;
644 case ADD_VALUES:
645 case ADD_ALL_VALUES:
646 for (i=0; i<n; i++) {
647 idy = *indicesy++;
648 y[idy] += x[0];
649 y[idy+1] += x[1];
650 y[idy+2] += x[2];
651 y[idy+3] += x[3];
652 x += 4;
653 }
654 break;
655 #if !defined(PETSC_USE_COMPLEX)
656 case MAX_VALUES:
657 for (i=0; i<n; i++) {
658 idy = *indicesy++;
659 y[idy] = PetscMax(y[idy],x[0]);
660 y[idy+1] = PetscMax(y[idy+1],x[1]);
661 y[idy+2] = PetscMax(y[idy+2],x[2]);
662 y[idy+3] = PetscMax(y[idy+3],x[3]);
663 x += 4;
664 }
665 #else
666 case MAX_VALUES:
667 #endif
668 case NOT_SET_VALUES:
669 break;
670 default:
671 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
672 }
673 PetscFunctionReturn(0);
674 }
675
Scatter_MPI1_4(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)676 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
677 {
678 PetscInt i,idx,idy;
679
680 PetscFunctionBegin;
681 switch (addv) {
682 case INSERT_VALUES:
683 case INSERT_ALL_VALUES:
684 for (i=0; i<n; i++) {
685 idx = *indicesx++;
686 idy = *indicesy++;
687 y[idy] = x[idx];
688 y[idy+1] = x[idx+1];
689 y[idy+2] = x[idx+2];
690 y[idy+3] = x[idx+3];
691 }
692 break;
693 case ADD_VALUES:
694 case ADD_ALL_VALUES:
695 for (i=0; i<n; i++) {
696 idx = *indicesx++;
697 idy = *indicesy++;
698 y[idy] += x[idx];
699 y[idy+1] += x[idx+1];
700 y[idy+2] += x[idx+2];
701 y[idy+3] += x[idx+3];
702 }
703 break;
704 #if !defined(PETSC_USE_COMPLEX)
705 case MAX_VALUES:
706 for (i=0; i<n; i++) {
707 idx = *indicesx++;
708 idy = *indicesy++;
709 y[idy] = PetscMax(y[idy],x[idx]);
710 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
711 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
712 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
713 }
714 #else
715 case MAX_VALUES:
716 #endif
717 case NOT_SET_VALUES:
718 break;
719 default:
720 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
721 }
722 PetscFunctionReturn(0);
723 }
724 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_5(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)725 PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
726 {
727 PetscInt i,idx;
728
729 for (i=0; i<n; i++) {
730 idx = *indicesx++;
731 y[0] = x[idx];
732 y[1] = x[idx+1];
733 y[2] = x[idx+2];
734 y[3] = x[idx+3];
735 y[4] = x[idx+4];
736 y += 5;
737 }
738 }
739
UnPack_MPI1_5(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)740 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
741 {
742 PetscInt i,idy;
743
744 PetscFunctionBegin;
745 switch (addv) {
746 case INSERT_VALUES:
747 case INSERT_ALL_VALUES:
748 for (i=0; i<n; i++) {
749 idy = *indicesy++;
750 y[idy] = x[0];
751 y[idy+1] = x[1];
752 y[idy+2] = x[2];
753 y[idy+3] = x[3];
754 y[idy+4] = x[4];
755 x += 5;
756 }
757 break;
758 case ADD_VALUES:
759 case ADD_ALL_VALUES:
760 for (i=0; i<n; i++) {
761 idy = *indicesy++;
762 y[idy] += x[0];
763 y[idy+1] += x[1];
764 y[idy+2] += x[2];
765 y[idy+3] += x[3];
766 y[idy+4] += x[4];
767 x += 5;
768 }
769 break;
770 #if !defined(PETSC_USE_COMPLEX)
771 case MAX_VALUES:
772 for (i=0; i<n; i++) {
773 idy = *indicesy++;
774 y[idy] = PetscMax(y[idy],x[0]);
775 y[idy+1] = PetscMax(y[idy+1],x[1]);
776 y[idy+2] = PetscMax(y[idy+2],x[2]);
777 y[idy+3] = PetscMax(y[idy+3],x[3]);
778 y[idy+4] = PetscMax(y[idy+4],x[4]);
779 x += 5;
780 }
781 #else
782 case MAX_VALUES:
783 #endif
784 case NOT_SET_VALUES:
785 break;
786 default:
787 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
788 }
789 PetscFunctionReturn(0);
790 }
791
Scatter_MPI1_5(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)792 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
793 {
794 PetscInt i,idx,idy;
795
796 PetscFunctionBegin;
797 switch (addv) {
798 case INSERT_VALUES:
799 case INSERT_ALL_VALUES:
800 for (i=0; i<n; i++) {
801 idx = *indicesx++;
802 idy = *indicesy++;
803 y[idy] = x[idx];
804 y[idy+1] = x[idx+1];
805 y[idy+2] = x[idx+2];
806 y[idy+3] = x[idx+3];
807 y[idy+4] = x[idx+4];
808 }
809 break;
810 case ADD_VALUES:
811 case ADD_ALL_VALUES:
812 for (i=0; i<n; i++) {
813 idx = *indicesx++;
814 idy = *indicesy++;
815 y[idy] += x[idx];
816 y[idy+1] += x[idx+1];
817 y[idy+2] += x[idx+2];
818 y[idy+3] += x[idx+3];
819 y[idy+4] += x[idx+4];
820 }
821 break;
822 #if !defined(PETSC_USE_COMPLEX)
823 case MAX_VALUES:
824 for (i=0; i<n; i++) {
825 idx = *indicesx++;
826 idy = *indicesy++;
827 y[idy] = PetscMax(y[idy],x[idx]);
828 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
829 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
830 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
831 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
832 }
833 #else
834 case MAX_VALUES:
835 #endif
836 case NOT_SET_VALUES:
837 break;
838 default:
839 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
840 }
841 PetscFunctionReturn(0);
842 }
843 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_6(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)844 PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
845 {
846 PetscInt i,idx;
847
848 for (i=0; i<n; i++) {
849 idx = *indicesx++;
850 y[0] = x[idx];
851 y[1] = x[idx+1];
852 y[2] = x[idx+2];
853 y[3] = x[idx+3];
854 y[4] = x[idx+4];
855 y[5] = x[idx+5];
856 y += 6;
857 }
858 }
859
UnPack_MPI1_6(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)860 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
861 {
862 PetscInt i,idy;
863
864 PetscFunctionBegin;
865 switch (addv) {
866 case INSERT_VALUES:
867 case INSERT_ALL_VALUES:
868 for (i=0; i<n; i++) {
869 idy = *indicesy++;
870 y[idy] = x[0];
871 y[idy+1] = x[1];
872 y[idy+2] = x[2];
873 y[idy+3] = x[3];
874 y[idy+4] = x[4];
875 y[idy+5] = x[5];
876 x += 6;
877 }
878 break;
879 case ADD_VALUES:
880 case ADD_ALL_VALUES:
881 for (i=0; i<n; i++) {
882 idy = *indicesy++;
883 y[idy] += x[0];
884 y[idy+1] += x[1];
885 y[idy+2] += x[2];
886 y[idy+3] += x[3];
887 y[idy+4] += x[4];
888 y[idy+5] += x[5];
889 x += 6;
890 }
891 break;
892 #if !defined(PETSC_USE_COMPLEX)
893 case MAX_VALUES:
894 for (i=0; i<n; i++) {
895 idy = *indicesy++;
896 y[idy] = PetscMax(y[idy],x[0]);
897 y[idy+1] = PetscMax(y[idy+1],x[1]);
898 y[idy+2] = PetscMax(y[idy+2],x[2]);
899 y[idy+3] = PetscMax(y[idy+3],x[3]);
900 y[idy+4] = PetscMax(y[idy+4],x[4]);
901 y[idy+5] = PetscMax(y[idy+5],x[5]);
902 x += 6;
903 }
904 #else
905 case MAX_VALUES:
906 #endif
907 case NOT_SET_VALUES:
908 break;
909 default:
910 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
911 }
912 PetscFunctionReturn(0);
913 }
914
Scatter_MPI1_6(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)915 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
916 {
917 PetscInt i,idx,idy;
918
919 PetscFunctionBegin;
920 switch (addv) {
921 case INSERT_VALUES:
922 case INSERT_ALL_VALUES:
923 for (i=0; i<n; i++) {
924 idx = *indicesx++;
925 idy = *indicesy++;
926 y[idy] = x[idx];
927 y[idy+1] = x[idx+1];
928 y[idy+2] = x[idx+2];
929 y[idy+3] = x[idx+3];
930 y[idy+4] = x[idx+4];
931 y[idy+5] = x[idx+5];
932 }
933 break;
934 case ADD_VALUES:
935 case ADD_ALL_VALUES:
936 for (i=0; i<n; i++) {
937 idx = *indicesx++;
938 idy = *indicesy++;
939 y[idy] += x[idx];
940 y[idy+1] += x[idx+1];
941 y[idy+2] += x[idx+2];
942 y[idy+3] += x[idx+3];
943 y[idy+4] += x[idx+4];
944 y[idy+5] += x[idx+5];
945 }
946 break;
947 #if !defined(PETSC_USE_COMPLEX)
948 case MAX_VALUES:
949 for (i=0; i<n; i++) {
950 idx = *indicesx++;
951 idy = *indicesy++;
952 y[idy] = PetscMax(y[idy],x[idx]);
953 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
954 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
955 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
956 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
957 y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
958 }
959 #else
960 case MAX_VALUES:
961 #endif
962 case NOT_SET_VALUES:
963 break;
964 default:
965 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
966 }
967 PetscFunctionReturn(0);
968 }
969 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_7(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)970 PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
971 {
972 PetscInt i,idx;
973
974 for (i=0; i<n; i++) {
975 idx = *indicesx++;
976 y[0] = x[idx];
977 y[1] = x[idx+1];
978 y[2] = x[idx+2];
979 y[3] = x[idx+3];
980 y[4] = x[idx+4];
981 y[5] = x[idx+5];
982 y[6] = x[idx+6];
983 y += 7;
984 }
985 }
986
UnPack_MPI1_7(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)987 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
988 {
989 PetscInt i,idy;
990
991 PetscFunctionBegin;
992 switch (addv) {
993 case INSERT_VALUES:
994 case INSERT_ALL_VALUES:
995 for (i=0; i<n; i++) {
996 idy = *indicesy++;
997 y[idy] = x[0];
998 y[idy+1] = x[1];
999 y[idy+2] = x[2];
1000 y[idy+3] = x[3];
1001 y[idy+4] = x[4];
1002 y[idy+5] = x[5];
1003 y[idy+6] = x[6];
1004 x += 7;
1005 }
1006 break;
1007 case ADD_VALUES:
1008 case ADD_ALL_VALUES:
1009 for (i=0; i<n; i++) {
1010 idy = *indicesy++;
1011 y[idy] += x[0];
1012 y[idy+1] += x[1];
1013 y[idy+2] += x[2];
1014 y[idy+3] += x[3];
1015 y[idy+4] += x[4];
1016 y[idy+5] += x[5];
1017 y[idy+6] += x[6];
1018 x += 7;
1019 }
1020 break;
1021 #if !defined(PETSC_USE_COMPLEX)
1022 case MAX_VALUES:
1023 for (i=0; i<n; i++) {
1024 idy = *indicesy++;
1025 y[idy] = PetscMax(y[idy],x[0]);
1026 y[idy+1] = PetscMax(y[idy+1],x[1]);
1027 y[idy+2] = PetscMax(y[idy+2],x[2]);
1028 y[idy+3] = PetscMax(y[idy+3],x[3]);
1029 y[idy+4] = PetscMax(y[idy+4],x[4]);
1030 y[idy+5] = PetscMax(y[idy+5],x[5]);
1031 y[idy+6] = PetscMax(y[idy+6],x[6]);
1032 x += 7;
1033 }
1034 #else
1035 case MAX_VALUES:
1036 #endif
1037 case NOT_SET_VALUES:
1038 break;
1039 default:
1040 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1041 }
1042 PetscFunctionReturn(0);
1043 }
1044
Scatter_MPI1_7(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1045 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1046 {
1047 PetscInt i,idx,idy;
1048
1049 PetscFunctionBegin;
1050 switch (addv) {
1051 case INSERT_VALUES:
1052 case INSERT_ALL_VALUES:
1053 for (i=0; i<n; i++) {
1054 idx = *indicesx++;
1055 idy = *indicesy++;
1056 y[idy] = x[idx];
1057 y[idy+1] = x[idx+1];
1058 y[idy+2] = x[idx+2];
1059 y[idy+3] = x[idx+3];
1060 y[idy+4] = x[idx+4];
1061 y[idy+5] = x[idx+5];
1062 y[idy+6] = x[idx+6];
1063 }
1064 break;
1065 case ADD_VALUES:
1066 case ADD_ALL_VALUES:
1067 for (i=0; i<n; i++) {
1068 idx = *indicesx++;
1069 idy = *indicesy++;
1070 y[idy] += x[idx];
1071 y[idy+1] += x[idx+1];
1072 y[idy+2] += x[idx+2];
1073 y[idy+3] += x[idx+3];
1074 y[idy+4] += x[idx+4];
1075 y[idy+5] += x[idx+5];
1076 y[idy+6] += x[idx+6];
1077 }
1078 break;
1079 #if !defined(PETSC_USE_COMPLEX)
1080 case MAX_VALUES:
1081 for (i=0; i<n; i++) {
1082 idx = *indicesx++;
1083 idy = *indicesy++;
1084 y[idy] = PetscMax(y[idy],x[idx]);
1085 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1086 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1087 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1088 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1089 y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1090 y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1091 }
1092 #else
1093 case MAX_VALUES:
1094 #endif
1095 case NOT_SET_VALUES:
1096 break;
1097 default:
1098 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1099 }
1100 PetscFunctionReturn(0);
1101 }
1102 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_8(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1103 PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1104 {
1105 PetscInt i,idx;
1106
1107 for (i=0; i<n; i++) {
1108 idx = *indicesx++;
1109 y[0] = x[idx];
1110 y[1] = x[idx+1];
1111 y[2] = x[idx+2];
1112 y[3] = x[idx+3];
1113 y[4] = x[idx+4];
1114 y[5] = x[idx+5];
1115 y[6] = x[idx+6];
1116 y[7] = x[idx+7];
1117 y += 8;
1118 }
1119 }
1120
UnPack_MPI1_8(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1121 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1122 {
1123 PetscInt i,idy;
1124
1125 PetscFunctionBegin;
1126 switch (addv) {
1127 case INSERT_VALUES:
1128 case INSERT_ALL_VALUES:
1129 for (i=0; i<n; i++) {
1130 idy = *indicesy++;
1131 y[idy] = x[0];
1132 y[idy+1] = x[1];
1133 y[idy+2] = x[2];
1134 y[idy+3] = x[3];
1135 y[idy+4] = x[4];
1136 y[idy+5] = x[5];
1137 y[idy+6] = x[6];
1138 y[idy+7] = x[7];
1139 x += 8;
1140 }
1141 break;
1142 case ADD_VALUES:
1143 case ADD_ALL_VALUES:
1144 for (i=0; i<n; i++) {
1145 idy = *indicesy++;
1146 y[idy] += x[0];
1147 y[idy+1] += x[1];
1148 y[idy+2] += x[2];
1149 y[idy+3] += x[3];
1150 y[idy+4] += x[4];
1151 y[idy+5] += x[5];
1152 y[idy+6] += x[6];
1153 y[idy+7] += x[7];
1154 x += 8;
1155 }
1156 break;
1157 #if !defined(PETSC_USE_COMPLEX)
1158 case MAX_VALUES:
1159 for (i=0; i<n; i++) {
1160 idy = *indicesy++;
1161 y[idy] = PetscMax(y[idy],x[0]);
1162 y[idy+1] = PetscMax(y[idy+1],x[1]);
1163 y[idy+2] = PetscMax(y[idy+2],x[2]);
1164 y[idy+3] = PetscMax(y[idy+3],x[3]);
1165 y[idy+4] = PetscMax(y[idy+4],x[4]);
1166 y[idy+5] = PetscMax(y[idy+5],x[5]);
1167 y[idy+6] = PetscMax(y[idy+6],x[6]);
1168 y[idy+7] = PetscMax(y[idy+7],x[7]);
1169 x += 8;
1170 }
1171 #else
1172 case MAX_VALUES:
1173 #endif
1174 case NOT_SET_VALUES:
1175 break;
1176 default:
1177 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1178 }
1179 PetscFunctionReturn(0);
1180 }
1181
Scatter_MPI1_8(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1182 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1183 {
1184 PetscInt i,idx,idy;
1185
1186 PetscFunctionBegin;
1187 switch (addv) {
1188 case INSERT_VALUES:
1189 case INSERT_ALL_VALUES:
1190 for (i=0; i<n; i++) {
1191 idx = *indicesx++;
1192 idy = *indicesy++;
1193 y[idy] = x[idx];
1194 y[idy+1] = x[idx+1];
1195 y[idy+2] = x[idx+2];
1196 y[idy+3] = x[idx+3];
1197 y[idy+4] = x[idx+4];
1198 y[idy+5] = x[idx+5];
1199 y[idy+6] = x[idx+6];
1200 y[idy+7] = x[idx+7];
1201 }
1202 break;
1203 case ADD_VALUES:
1204 case ADD_ALL_VALUES:
1205 for (i=0; i<n; i++) {
1206 idx = *indicesx++;
1207 idy = *indicesy++;
1208 y[idy] += x[idx];
1209 y[idy+1] += x[idx+1];
1210 y[idy+2] += x[idx+2];
1211 y[idy+3] += x[idx+3];
1212 y[idy+4] += x[idx+4];
1213 y[idy+5] += x[idx+5];
1214 y[idy+6] += x[idx+6];
1215 y[idy+7] += x[idx+7];
1216 }
1217 break;
1218 #if !defined(PETSC_USE_COMPLEX)
1219 case MAX_VALUES:
1220 for (i=0; i<n; i++) {
1221 idx = *indicesx++;
1222 idy = *indicesy++;
1223 y[idy] = PetscMax(y[idy],x[idx]);
1224 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1225 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1226 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1227 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1228 y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1229 y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1230 y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1231 }
1232 #else
1233 case MAX_VALUES:
1234 #endif
1235 case NOT_SET_VALUES:
1236 break;
1237 default:
1238 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1239 }
1240 PetscFunctionReturn(0);
1241 }
1242
Pack_MPI1_9(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1243 PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1244 {
1245 PetscInt i,idx;
1246
1247 for (i=0; i<n; i++) {
1248 idx = *indicesx++;
1249 y[0] = x[idx];
1250 y[1] = x[idx+1];
1251 y[2] = x[idx+2];
1252 y[3] = x[idx+3];
1253 y[4] = x[idx+4];
1254 y[5] = x[idx+5];
1255 y[6] = x[idx+6];
1256 y[7] = x[idx+7];
1257 y[8] = x[idx+8];
1258 y += 9;
1259 }
1260 }
1261
UnPack_MPI1_9(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1262 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1263 {
1264 PetscInt i,idy;
1265
1266 PetscFunctionBegin;
1267 switch (addv) {
1268 case INSERT_VALUES:
1269 case INSERT_ALL_VALUES:
1270 for (i=0; i<n; i++) {
1271 idy = *indicesy++;
1272 y[idy] = x[0];
1273 y[idy+1] = x[1];
1274 y[idy+2] = x[2];
1275 y[idy+3] = x[3];
1276 y[idy+4] = x[4];
1277 y[idy+5] = x[5];
1278 y[idy+6] = x[6];
1279 y[idy+7] = x[7];
1280 y[idy+8] = x[8];
1281 x += 9;
1282 }
1283 break;
1284 case ADD_VALUES:
1285 case ADD_ALL_VALUES:
1286 for (i=0; i<n; i++) {
1287 idy = *indicesy++;
1288 y[idy] += x[0];
1289 y[idy+1] += x[1];
1290 y[idy+2] += x[2];
1291 y[idy+3] += x[3];
1292 y[idy+4] += x[4];
1293 y[idy+5] += x[5];
1294 y[idy+6] += x[6];
1295 y[idy+7] += x[7];
1296 y[idy+8] += x[8];
1297 x += 9;
1298 }
1299 break;
1300 #if !defined(PETSC_USE_COMPLEX)
1301 case MAX_VALUES:
1302 for (i=0; i<n; i++) {
1303 idy = *indicesy++;
1304 y[idy] = PetscMax(y[idy],x[0]);
1305 y[idy+1] = PetscMax(y[idy+1],x[1]);
1306 y[idy+2] = PetscMax(y[idy+2],x[2]);
1307 y[idy+3] = PetscMax(y[idy+3],x[3]);
1308 y[idy+4] = PetscMax(y[idy+4],x[4]);
1309 y[idy+5] = PetscMax(y[idy+5],x[5]);
1310 y[idy+6] = PetscMax(y[idy+6],x[6]);
1311 y[idy+7] = PetscMax(y[idy+7],x[7]);
1312 y[idy+8] = PetscMax(y[idy+8],x[8]);
1313 x += 9;
1314 }
1315 #else
1316 case MAX_VALUES:
1317 #endif
1318 case NOT_SET_VALUES:
1319 break;
1320 default:
1321 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1322 }
1323 PetscFunctionReturn(0);
1324 }
1325
Scatter_MPI1_9(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1326 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1327 {
1328 PetscInt i,idx,idy;
1329
1330 PetscFunctionBegin;
1331 switch (addv) {
1332 case INSERT_VALUES:
1333 case INSERT_ALL_VALUES:
1334 for (i=0; i<n; i++) {
1335 idx = *indicesx++;
1336 idy = *indicesy++;
1337 y[idy] = x[idx];
1338 y[idy+1] = x[idx+1];
1339 y[idy+2] = x[idx+2];
1340 y[idy+3] = x[idx+3];
1341 y[idy+4] = x[idx+4];
1342 y[idy+5] = x[idx+5];
1343 y[idy+6] = x[idx+6];
1344 y[idy+7] = x[idx+7];
1345 y[idy+8] = x[idx+8];
1346 }
1347 break;
1348 case ADD_VALUES:
1349 case ADD_ALL_VALUES:
1350 for (i=0; i<n; i++) {
1351 idx = *indicesx++;
1352 idy = *indicesy++;
1353 y[idy] += x[idx];
1354 y[idy+1] += x[idx+1];
1355 y[idy+2] += x[idx+2];
1356 y[idy+3] += x[idx+3];
1357 y[idy+4] += x[idx+4];
1358 y[idy+5] += x[idx+5];
1359 y[idy+6] += x[idx+6];
1360 y[idy+7] += x[idx+7];
1361 y[idy+8] += x[idx+8];
1362 }
1363 break;
1364 #if !defined(PETSC_USE_COMPLEX)
1365 case MAX_VALUES:
1366 for (i=0; i<n; i++) {
1367 idx = *indicesx++;
1368 idy = *indicesy++;
1369 y[idy] = PetscMax(y[idy],x[idx]);
1370 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1371 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1372 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1373 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1374 y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1375 y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1376 y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1377 y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1378 }
1379 #else
1380 case MAX_VALUES:
1381 #endif
1382 case NOT_SET_VALUES:
1383 break;
1384 default:
1385 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1386 }
1387 PetscFunctionReturn(0);
1388 }
1389
Pack_MPI1_10(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1390 PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1391 {
1392 PetscInt i,idx;
1393
1394 for (i=0; i<n; i++) {
1395 idx = *indicesx++;
1396 y[0] = x[idx];
1397 y[1] = x[idx+1];
1398 y[2] = x[idx+2];
1399 y[3] = x[idx+3];
1400 y[4] = x[idx+4];
1401 y[5] = x[idx+5];
1402 y[6] = x[idx+6];
1403 y[7] = x[idx+7];
1404 y[8] = x[idx+8];
1405 y[9] = x[idx+9];
1406 y += 10;
1407 }
1408 }
1409
UnPack_MPI1_10(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1410 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1411 {
1412 PetscInt i,idy;
1413
1414 PetscFunctionBegin;
1415 switch (addv) {
1416 case INSERT_VALUES:
1417 case INSERT_ALL_VALUES:
1418 for (i=0; i<n; i++) {
1419 idy = *indicesy++;
1420 y[idy] = x[0];
1421 y[idy+1] = x[1];
1422 y[idy+2] = x[2];
1423 y[idy+3] = x[3];
1424 y[idy+4] = x[4];
1425 y[idy+5] = x[5];
1426 y[idy+6] = x[6];
1427 y[idy+7] = x[7];
1428 y[idy+8] = x[8];
1429 y[idy+9] = x[9];
1430 x += 10;
1431 }
1432 break;
1433 case ADD_VALUES:
1434 case ADD_ALL_VALUES:
1435 for (i=0; i<n; i++) {
1436 idy = *indicesy++;
1437 y[idy] += x[0];
1438 y[idy+1] += x[1];
1439 y[idy+2] += x[2];
1440 y[idy+3] += x[3];
1441 y[idy+4] += x[4];
1442 y[idy+5] += x[5];
1443 y[idy+6] += x[6];
1444 y[idy+7] += x[7];
1445 y[idy+8] += x[8];
1446 y[idy+9] += x[9];
1447 x += 10;
1448 }
1449 break;
1450 #if !defined(PETSC_USE_COMPLEX)
1451 case MAX_VALUES:
1452 for (i=0; i<n; i++) {
1453 idy = *indicesy++;
1454 y[idy] = PetscMax(y[idy],x[0]);
1455 y[idy+1] = PetscMax(y[idy+1],x[1]);
1456 y[idy+2] = PetscMax(y[idy+2],x[2]);
1457 y[idy+3] = PetscMax(y[idy+3],x[3]);
1458 y[idy+4] = PetscMax(y[idy+4],x[4]);
1459 y[idy+5] = PetscMax(y[idy+5],x[5]);
1460 y[idy+6] = PetscMax(y[idy+6],x[6]);
1461 y[idy+7] = PetscMax(y[idy+7],x[7]);
1462 y[idy+8] = PetscMax(y[idy+8],x[8]);
1463 y[idy+9] = PetscMax(y[idy+9],x[9]);
1464 x += 10;
1465 }
1466 #else
1467 case MAX_VALUES:
1468 #endif
1469 case NOT_SET_VALUES:
1470 break;
1471 default:
1472 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1473 }
1474 PetscFunctionReturn(0);
1475 }
1476
Scatter_MPI1_10(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1477 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1478 {
1479 PetscInt i,idx,idy;
1480
1481 PetscFunctionBegin;
1482 switch (addv) {
1483 case INSERT_VALUES:
1484 case INSERT_ALL_VALUES:
1485 for (i=0; i<n; i++) {
1486 idx = *indicesx++;
1487 idy = *indicesy++;
1488 y[idy] = x[idx];
1489 y[idy+1] = x[idx+1];
1490 y[idy+2] = x[idx+2];
1491 y[idy+3] = x[idx+3];
1492 y[idy+4] = x[idx+4];
1493 y[idy+5] = x[idx+5];
1494 y[idy+6] = x[idx+6];
1495 y[idy+7] = x[idx+7];
1496 y[idy+8] = x[idx+8];
1497 y[idy+9] = x[idx+9];
1498 }
1499 break;
1500 case ADD_VALUES:
1501 case ADD_ALL_VALUES:
1502 for (i=0; i<n; i++) {
1503 idx = *indicesx++;
1504 idy = *indicesy++;
1505 y[idy] += x[idx];
1506 y[idy+1] += x[idx+1];
1507 y[idy+2] += x[idx+2];
1508 y[idy+3] += x[idx+3];
1509 y[idy+4] += x[idx+4];
1510 y[idy+5] += x[idx+5];
1511 y[idy+6] += x[idx+6];
1512 y[idy+7] += x[idx+7];
1513 y[idy+8] += x[idx+8];
1514 y[idy+9] += x[idx+9];
1515 }
1516 break;
1517 #if !defined(PETSC_USE_COMPLEX)
1518 case MAX_VALUES:
1519 for (i=0; i<n; i++) {
1520 idx = *indicesx++;
1521 idy = *indicesy++;
1522 y[idy] = PetscMax(y[idy],x[idx]);
1523 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1524 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1525 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1526 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1527 y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1528 y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1529 y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1530 y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1531 y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1532 }
1533 #else
1534 case MAX_VALUES:
1535 #endif
1536 case NOT_SET_VALUES:
1537 break;
1538 default:
1539 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1540 }
1541 PetscFunctionReturn(0);
1542 }
1543
Pack_MPI1_11(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1544 PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1545 {
1546 PetscInt i,idx;
1547
1548 for (i=0; i<n; i++) {
1549 idx = *indicesx++;
1550 y[0] = x[idx];
1551 y[1] = x[idx+1];
1552 y[2] = x[idx+2];
1553 y[3] = x[idx+3];
1554 y[4] = x[idx+4];
1555 y[5] = x[idx+5];
1556 y[6] = x[idx+6];
1557 y[7] = x[idx+7];
1558 y[8] = x[idx+8];
1559 y[9] = x[idx+9];
1560 y[10] = x[idx+10];
1561 y += 11;
1562 }
1563 }
1564
UnPack_MPI1_11(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1565 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1566 {
1567 PetscInt i,idy;
1568
1569 PetscFunctionBegin;
1570 switch (addv) {
1571 case INSERT_VALUES:
1572 case INSERT_ALL_VALUES:
1573 for (i=0; i<n; i++) {
1574 idy = *indicesy++;
1575 y[idy] = x[0];
1576 y[idy+1] = x[1];
1577 y[idy+2] = x[2];
1578 y[idy+3] = x[3];
1579 y[idy+4] = x[4];
1580 y[idy+5] = x[5];
1581 y[idy+6] = x[6];
1582 y[idy+7] = x[7];
1583 y[idy+8] = x[8];
1584 y[idy+9] = x[9];
1585 y[idy+10] = x[10];
1586 x += 11;
1587 }
1588 break;
1589 case ADD_VALUES:
1590 case ADD_ALL_VALUES:
1591 for (i=0; i<n; i++) {
1592 idy = *indicesy++;
1593 y[idy] += x[0];
1594 y[idy+1] += x[1];
1595 y[idy+2] += x[2];
1596 y[idy+3] += x[3];
1597 y[idy+4] += x[4];
1598 y[idy+5] += x[5];
1599 y[idy+6] += x[6];
1600 y[idy+7] += x[7];
1601 y[idy+8] += x[8];
1602 y[idy+9] += x[9];
1603 y[idy+10] += x[10];
1604 x += 11;
1605 }
1606 break;
1607 #if !defined(PETSC_USE_COMPLEX)
1608 case MAX_VALUES:
1609 for (i=0; i<n; i++) {
1610 idy = *indicesy++;
1611 y[idy] = PetscMax(y[idy],x[0]);
1612 y[idy+1] = PetscMax(y[idy+1],x[1]);
1613 y[idy+2] = PetscMax(y[idy+2],x[2]);
1614 y[idy+3] = PetscMax(y[idy+3],x[3]);
1615 y[idy+4] = PetscMax(y[idy+4],x[4]);
1616 y[idy+5] = PetscMax(y[idy+5],x[5]);
1617 y[idy+6] = PetscMax(y[idy+6],x[6]);
1618 y[idy+7] = PetscMax(y[idy+7],x[7]);
1619 y[idy+8] = PetscMax(y[idy+8],x[8]);
1620 y[idy+9] = PetscMax(y[idy+9],x[9]);
1621 y[idy+10] = PetscMax(y[idy+10],x[10]);
1622 x += 11;
1623 }
1624 #else
1625 case MAX_VALUES:
1626 #endif
1627 case NOT_SET_VALUES:
1628 break;
1629 default:
1630 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1631 }
1632 PetscFunctionReturn(0);
1633 }
1634
Scatter_MPI1_11(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1635 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1636 {
1637 PetscInt i,idx,idy;
1638
1639 PetscFunctionBegin;
1640 switch (addv) {
1641 case INSERT_VALUES:
1642 case INSERT_ALL_VALUES:
1643 for (i=0; i<n; i++) {
1644 idx = *indicesx++;
1645 idy = *indicesy++;
1646 y[idy] = x[idx];
1647 y[idy+1] = x[idx+1];
1648 y[idy+2] = x[idx+2];
1649 y[idy+3] = x[idx+3];
1650 y[idy+4] = x[idx+4];
1651 y[idy+5] = x[idx+5];
1652 y[idy+6] = x[idx+6];
1653 y[idy+7] = x[idx+7];
1654 y[idy+8] = x[idx+8];
1655 y[idy+9] = x[idx+9];
1656 y[idy+10] = x[idx+10];
1657 }
1658 break;
1659 case ADD_VALUES:
1660 case ADD_ALL_VALUES:
1661 for (i=0; i<n; i++) {
1662 idx = *indicesx++;
1663 idy = *indicesy++;
1664 y[idy] += x[idx];
1665 y[idy+1] += x[idx+1];
1666 y[idy+2] += x[idx+2];
1667 y[idy+3] += x[idx+3];
1668 y[idy+4] += x[idx+4];
1669 y[idy+5] += x[idx+5];
1670 y[idy+6] += x[idx+6];
1671 y[idy+7] += x[idx+7];
1672 y[idy+8] += x[idx+8];
1673 y[idy+9] += x[idx+9];
1674 y[idy+10] += x[idx+10];
1675 }
1676 break;
1677 #if !defined(PETSC_USE_COMPLEX)
1678 case MAX_VALUES:
1679 for (i=0; i<n; i++) {
1680 idx = *indicesx++;
1681 idy = *indicesy++;
1682 y[idy] = PetscMax(y[idy],x[idx]);
1683 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1684 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1685 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1686 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1687 y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1688 y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1689 y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1690 y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1691 y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1692 y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1693 }
1694 #else
1695 case MAX_VALUES:
1696 #endif
1697 case NOT_SET_VALUES:
1698 break;
1699 default:
1700 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1701 }
1702 PetscFunctionReturn(0);
1703 }
1704
1705 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_12(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1706 PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1707 {
1708 PetscInt i,idx;
1709
1710 for (i=0; i<n; i++) {
1711 idx = *indicesx++;
1712 y[0] = x[idx];
1713 y[1] = x[idx+1];
1714 y[2] = x[idx+2];
1715 y[3] = x[idx+3];
1716 y[4] = x[idx+4];
1717 y[5] = x[idx+5];
1718 y[6] = x[idx+6];
1719 y[7] = x[idx+7];
1720 y[8] = x[idx+8];
1721 y[9] = x[idx+9];
1722 y[10] = x[idx+10];
1723 y[11] = x[idx+11];
1724 y += 12;
1725 }
1726 }
1727
UnPack_MPI1_12(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1728 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1729 {
1730 PetscInt i,idy;
1731
1732 PetscFunctionBegin;
1733 switch (addv) {
1734 case INSERT_VALUES:
1735 case INSERT_ALL_VALUES:
1736 for (i=0; i<n; i++) {
1737 idy = *indicesy++;
1738 y[idy] = x[0];
1739 y[idy+1] = x[1];
1740 y[idy+2] = x[2];
1741 y[idy+3] = x[3];
1742 y[idy+4] = x[4];
1743 y[idy+5] = x[5];
1744 y[idy+6] = x[6];
1745 y[idy+7] = x[7];
1746 y[idy+8] = x[8];
1747 y[idy+9] = x[9];
1748 y[idy+10] = x[10];
1749 y[idy+11] = x[11];
1750 x += 12;
1751 }
1752 break;
1753 case ADD_VALUES:
1754 case ADD_ALL_VALUES:
1755 for (i=0; i<n; i++) {
1756 idy = *indicesy++;
1757 y[idy] += x[0];
1758 y[idy+1] += x[1];
1759 y[idy+2] += x[2];
1760 y[idy+3] += x[3];
1761 y[idy+4] += x[4];
1762 y[idy+5] += x[5];
1763 y[idy+6] += x[6];
1764 y[idy+7] += x[7];
1765 y[idy+8] += x[8];
1766 y[idy+9] += x[9];
1767 y[idy+10] += x[10];
1768 y[idy+11] += x[11];
1769 x += 12;
1770 }
1771 break;
1772 #if !defined(PETSC_USE_COMPLEX)
1773 case MAX_VALUES:
1774 for (i=0; i<n; i++) {
1775 idy = *indicesy++;
1776 y[idy] = PetscMax(y[idy],x[0]);
1777 y[idy+1] = PetscMax(y[idy+1],x[1]);
1778 y[idy+2] = PetscMax(y[idy+2],x[2]);
1779 y[idy+3] = PetscMax(y[idy+3],x[3]);
1780 y[idy+4] = PetscMax(y[idy+4],x[4]);
1781 y[idy+5] = PetscMax(y[idy+5],x[5]);
1782 y[idy+6] = PetscMax(y[idy+6],x[6]);
1783 y[idy+7] = PetscMax(y[idy+7],x[7]);
1784 y[idy+8] = PetscMax(y[idy+8],x[8]);
1785 y[idy+9] = PetscMax(y[idy+9],x[9]);
1786 y[idy+10] = PetscMax(y[idy+10],x[10]);
1787 y[idy+11] = PetscMax(y[idy+11],x[11]);
1788 x += 12;
1789 }
1790 #else
1791 case MAX_VALUES:
1792 #endif
1793 case NOT_SET_VALUES:
1794 break;
1795 default:
1796 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1797 }
1798 PetscFunctionReturn(0);
1799 }
1800
Scatter_MPI1_12(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1801 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1802 {
1803 PetscInt i,idx,idy;
1804
1805 PetscFunctionBegin;
1806 switch (addv) {
1807 case INSERT_VALUES:
1808 case INSERT_ALL_VALUES:
1809 for (i=0; i<n; i++) {
1810 idx = *indicesx++;
1811 idy = *indicesy++;
1812 y[idy] = x[idx];
1813 y[idy+1] = x[idx+1];
1814 y[idy+2] = x[idx+2];
1815 y[idy+3] = x[idx+3];
1816 y[idy+4] = x[idx+4];
1817 y[idy+5] = x[idx+5];
1818 y[idy+6] = x[idx+6];
1819 y[idy+7] = x[idx+7];
1820 y[idy+8] = x[idx+8];
1821 y[idy+9] = x[idx+9];
1822 y[idy+10] = x[idx+10];
1823 y[idy+11] = x[idx+11];
1824 }
1825 break;
1826 case ADD_VALUES:
1827 case ADD_ALL_VALUES:
1828 for (i=0; i<n; i++) {
1829 idx = *indicesx++;
1830 idy = *indicesy++;
1831 y[idy] += x[idx];
1832 y[idy+1] += x[idx+1];
1833 y[idy+2] += x[idx+2];
1834 y[idy+3] += x[idx+3];
1835 y[idy+4] += x[idx+4];
1836 y[idy+5] += x[idx+5];
1837 y[idy+6] += x[idx+6];
1838 y[idy+7] += x[idx+7];
1839 y[idy+8] += x[idx+8];
1840 y[idy+9] += x[idx+9];
1841 y[idy+10] += x[idx+10];
1842 y[idy+11] += x[idx+11];
1843 }
1844 break;
1845 #if !defined(PETSC_USE_COMPLEX)
1846 case MAX_VALUES:
1847 for (i=0; i<n; i++) {
1848 idx = *indicesx++;
1849 idy = *indicesy++;
1850 y[idy] = PetscMax(y[idy],x[idx]);
1851 y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1852 y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1853 y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1854 y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1855 y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1856 y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1857 y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1858 y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1859 y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1860 y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1861 y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1862 }
1863 #else
1864 case MAX_VALUES:
1865 #endif
1866 case NOT_SET_VALUES:
1867 break;
1868 default:
1869 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1870 }
1871 PetscFunctionReturn(0);
1872 }
1873
1874 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_bs(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1875 PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1876 {
1877 PetscInt i,idx;
1878 PetscErrorCode ierr;
1879
1880 for (i=0; i<n; i++) {
1881 idx = *indicesx++;
1882 ierr = PetscArraycpy(y,x + idx,bs);CHKERRV(ierr);
1883 y += bs;
1884 }
1885 }
1886
UnPack_MPI1_bs(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1887 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1888 {
1889 PetscInt i,idy,j;
1890 PetscErrorCode ierr;
1891
1892 PetscFunctionBegin;
1893 switch (addv) {
1894 case INSERT_VALUES:
1895 case INSERT_ALL_VALUES:
1896 for (i=0; i<n; i++) {
1897 idy = *indicesy++;
1898 ierr = PetscArraycpy(y + idy,x,bs);CHKERRQ(ierr);
1899 x += bs;
1900 }
1901 break;
1902 case ADD_VALUES:
1903 case ADD_ALL_VALUES:
1904 for (i=0; i<n; i++) {
1905 idy = *indicesy++;
1906 for (j=0; j<bs; j++) y[idy+j] += x[j];
1907 x += bs;
1908 }
1909 break;
1910 #if !defined(PETSC_USE_COMPLEX)
1911 case MAX_VALUES:
1912 for (i=0; i<n; i++) {
1913 idy = *indicesy++;
1914 for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1915 x += bs;
1916 }
1917 #else
1918 case MAX_VALUES:
1919 #endif
1920 case NOT_SET_VALUES:
1921 break;
1922 default:
1923 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1924 }
1925 PetscFunctionReturn(0);
1926 }
1927
Scatter_MPI1_bs(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1928 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1929 {
1930 PetscInt i,idx,idy,j;
1931 PetscErrorCode ierr;
1932
1933 PetscFunctionBegin;
1934 switch (addv) {
1935 case INSERT_VALUES:
1936 case INSERT_ALL_VALUES:
1937 for (i=0; i<n; i++) {
1938 idx = *indicesx++;
1939 idy = *indicesy++;
1940 ierr = PetscArraycpy(y + idy, x + idx,bs);CHKERRQ(ierr);
1941 }
1942 break;
1943 case ADD_VALUES:
1944 case ADD_ALL_VALUES:
1945 for (i=0; i<n; i++) {
1946 idx = *indicesx++;
1947 idy = *indicesy++;
1948 for (j=0; j<bs; j++) y[idy+j] += x[idx+j];
1949 }
1950 break;
1951 #if !defined(PETSC_USE_COMPLEX)
1952 case MAX_VALUES:
1953 for (i=0; i<n; i++) {
1954 idx = *indicesx++;
1955 idy = *indicesy++;
1956 for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
1957 }
1958 #else
1959 case MAX_VALUES:
1960 #endif
1961 case NOT_SET_VALUES:
1962 break;
1963 default:
1964 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1965 }
1966 PetscFunctionReturn(0);
1967 }
1968
1969 /* Create the VecScatterBegin/End_P for our chosen block sizes */
1970 #define BS 1
1971 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1972 #define BS 2
1973 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1974 #define BS 3
1975 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1976 #define BS 4
1977 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1978 #define BS 5
1979 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1980 #define BS 6
1981 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1982 #define BS 7
1983 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1984 #define BS 8
1985 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1986 #define BS 9
1987 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1988 #define BS 10
1989 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1990 #define BS 11
1991 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1992 #define BS 12
1993 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1994 #define BS bs
1995 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1996
1997 /* ==========================================================================================*/
1998
1999 /* create parallel to sequential scatter context */
2000
2001 PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2002
2003 /*
2004 bs indicates how many elements there are in each block. Normally this would be 1.
2005
2006 contains check that PetscMPIInt can handle the sizes needed
2007 */
VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2008 PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2009 {
2010 VecScatter_MPI_General *from,*to;
2011 PetscMPIInt size,rank,imdex,tag,n;
2012 PetscInt *source = NULL,*owners = NULL,nxr;
2013 PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
2014 PetscMPIInt *nprocs = NULL,nrecvs;
2015 PetscInt i,j,idx,nsends;
2016 PetscMPIInt *owner = NULL;
2017 PetscInt *starts = NULL,count,slen;
2018 PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2019 PetscMPIInt *onodes1,*olengths1;
2020 MPI_Comm comm;
2021 MPI_Request *send_waits = NULL,*recv_waits = NULL;
2022 MPI_Status recv_status,*send_status;
2023 PetscErrorCode ierr;
2024
2025 PetscFunctionBegin;
2026 ierr = PetscObjectGetNewTag((PetscObject)ctx,&tag);CHKERRQ(ierr);
2027 ierr = PetscObjectGetComm((PetscObject)xin,&comm);CHKERRQ(ierr);
2028 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2029 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2030 owners = xin->map->range;
2031 ierr = VecGetSize(yin,&lengthy);CHKERRQ(ierr);
2032 ierr = VecGetSize(xin,&lengthx);CHKERRQ(ierr);
2033
2034 /* first count number of contributors to each processor */
2035 /* owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2036 ierr = PetscMalloc2(size,&nprocs,nx,&owner);CHKERRQ(ierr);
2037 ierr = PetscArrayzero(nprocs,size);CHKERRQ(ierr);
2038
2039 j = 0;
2040 nsends = 0;
2041 for (i=0; i<nx; i++) {
2042 idx = bs*inidx[i];
2043 if (idx < owners[j]) j = 0;
2044 for (; j<size; j++) {
2045 if (idx < owners[j+1]) {
2046 if (!nprocs[j]++) nsends++;
2047 owner[i] = j;
2048 break;
2049 }
2050 }
2051 if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2052 }
2053
2054 nprocslocal = nprocs[rank];
2055 nprocs[rank] = 0;
2056 if (nprocslocal) nsends--;
2057 /* inform other processors of number of messages and max length*/
2058 ierr = PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);CHKERRQ(ierr);
2059 ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
2060 ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
2061 recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2062
2063 /* post receives: */
2064 ierr = PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);CHKERRQ(ierr);
2065 count = 0;
2066 for (i=0; i<nrecvs; i++) {
2067 ierr = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
2068 count += olengths1[i];
2069 }
2070
2071 /* do sends:
2072 1) starts[i] gives the starting index in svalues for stuff going to
2073 the ith processor
2074 */
2075 nxr = 0;
2076 for (i=0; i<nx; i++) {
2077 if (owner[i] != rank) nxr++;
2078 }
2079 ierr = PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);CHKERRQ(ierr);
2080
2081 starts[0] = 0;
2082 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2083 for (i=0; i<nx; i++) {
2084 if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2085 }
2086 starts[0] = 0;
2087 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2088 count = 0;
2089 for (i=0; i<size; i++) {
2090 if (nprocs[i]) {
2091 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
2092 }
2093 }
2094
2095 /* wait on receives */
2096 count = nrecvs;
2097 slen = 0;
2098 while (count) {
2099 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
2100 /* unpack receives into our local space */
2101 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
2102 slen += n;
2103 count--;
2104 }
2105
2106 if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2107
2108 /* allocate entire send scatter context */
2109 ierr = PetscNewLog(ctx,&to);CHKERRQ(ierr);
2110 to->n = nrecvs;
2111
2112 ierr = PetscMalloc1(nrecvs,&to->requests);CHKERRQ(ierr);
2113 ierr = PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);CHKERRQ(ierr);
2114 ierr = PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);CHKERRQ(ierr);
2115
2116 ctx->todata = (void*)to;
2117 to->starts[0] = 0;
2118
2119 if (nrecvs) {
2120 /* move the data into the send scatter */
2121 base = owners[rank];
2122 rsvalues = rvalues;
2123 for (i=0; i<nrecvs; i++) {
2124 to->starts[i+1] = to->starts[i] + olengths1[i];
2125 to->procs[i] = onodes1[i];
2126 values = rsvalues;
2127 rsvalues += olengths1[i];
2128 for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2129 }
2130 }
2131 ierr = PetscFree(olengths1);CHKERRQ(ierr);
2132 ierr = PetscFree(onodes1);CHKERRQ(ierr);
2133 ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr);
2134
2135 /* allocate entire receive scatter context */
2136 ierr = PetscNewLog(ctx,&from);CHKERRQ(ierr);
2137 from->n = nsends;
2138
2139 ierr = PetscMalloc1(nsends,&from->requests);CHKERRQ(ierr);
2140 ierr = PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);CHKERRQ(ierr);
2141 ctx->fromdata = (void*)from;
2142
2143 /* move data into receive scatter */
2144 ierr = PetscMalloc2(size,&lowner,nsends+1,&start);CHKERRQ(ierr);
2145 count = 0; from->starts[0] = start[0] = 0;
2146 for (i=0; i<size; i++) {
2147 if (nprocs[i]) {
2148 lowner[i] = count;
2149 from->procs[count++] = i;
2150 from->starts[count] = start[count] = start[count-1] + nprocs[i];
2151 }
2152 }
2153
2154 for (i=0; i<nx; i++) {
2155 if (owner[i] != rank) {
2156 from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2157 if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2158 }
2159 }
2160 ierr = PetscFree2(lowner,start);CHKERRQ(ierr);
2161 ierr = PetscFree2(nprocs,owner);CHKERRQ(ierr);
2162
2163 /* wait on sends */
2164 if (nsends) {
2165 ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
2166 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
2167 ierr = PetscFree(send_status);CHKERRQ(ierr);
2168 }
2169 ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr);
2170
2171 if (nprocslocal) {
2172 PetscInt nt = from->local.n = to->local.n = nprocslocal;
2173 /* we have a scatter to ourselves */
2174 ierr = PetscMalloc1(nt,&to->local.vslots);CHKERRQ(ierr);
2175 ierr = PetscMalloc1(nt,&from->local.vslots);CHKERRQ(ierr);
2176 nt = 0;
2177 for (i=0; i<nx; i++) {
2178 idx = bs*inidx[i];
2179 if (idx >= owners[rank] && idx < owners[rank+1]) {
2180 to->local.vslots[nt] = idx - owners[rank];
2181 from->local.vslots[nt++] = bs*inidy[i];
2182 if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2183 }
2184 }
2185 ierr = PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));CHKERRQ(ierr);
2186 } else {
2187 from->local.n = 0;
2188 from->local.vslots = NULL;
2189 to->local.n = 0;
2190 to->local.vslots = NULL;
2191 }
2192
2193 from->local.nonmatching_computed = PETSC_FALSE;
2194 from->local.n_nonmatching = 0;
2195 from->local.slots_nonmatching = NULL;
2196 to->local.nonmatching_computed = PETSC_FALSE;
2197 to->local.n_nonmatching = 0;
2198 to->local.slots_nonmatching = NULL;
2199
2200 from->format = VEC_SCATTER_MPI_GENERAL;
2201 to->format = VEC_SCATTER_MPI_GENERAL;
2202 from->bs = bs;
2203 to->bs = bs;
2204
2205 ierr = VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);CHKERRQ(ierr);
2206 PetscFunctionReturn(0);
2207 }
2208
2209 /*
2210 bs indicates how many elements there are in each block. Normally this would be 1.
2211 */
VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General * from,VecScatter_MPI_General * to,VecScatter ctx)2212 PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2213 {
2214 MPI_Comm comm;
2215 PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2216 PetscInt bs = to->bs;
2217 PetscMPIInt size;
2218 PetscInt i, n;
2219 PetscErrorCode ierr;
2220
2221 PetscFunctionBegin;
2222 ierr = PetscObjectGetComm((PetscObject)ctx,&comm);CHKERRQ(ierr);
2223 ierr = PetscObjectGetNewTag((PetscObject)ctx,&tagr);CHKERRQ(ierr);
2224 ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;
2225
2226 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2227 /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2228 to->contiq = PETSC_FALSE;
2229 n = from->starts[from->n];
2230 from->contiq = PETSC_TRUE;
2231 for (i=1; i<n; i++) {
2232 if (from->indices[i] != from->indices[i-1] + bs) {
2233 from->contiq = PETSC_FALSE;
2234 break;
2235 }
2236 }
2237
2238 {
2239 PetscInt *sstarts = to->starts, *rstarts = from->starts;
2240 PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2241 MPI_Request *swaits = to->requests,*rwaits = from->requests;
2242 MPI_Request *rev_swaits,*rev_rwaits;
2243 PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2244
2245 /* allocate additional wait variables for the "reverse" scatter */
2246 ierr = PetscMalloc1(to->n,&rev_rwaits);CHKERRQ(ierr);
2247 ierr = PetscMalloc1(from->n,&rev_swaits);CHKERRQ(ierr);
2248 to->rev_requests = rev_rwaits;
2249 from->rev_requests = rev_swaits;
2250
2251 for (i=0; i<from->n; i++) {
2252 ierr = MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);CHKERRQ(ierr);
2253 }
2254
2255 for (i=0; i<to->n; i++) {
2256 ierr = MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
2257 }
2258 /* Register receives for scatter and reverse */
2259 for (i=0; i<from->n; i++) {
2260 ierr = MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
2261 }
2262 for (i=0; i<to->n; i++) {
2263 ierr = MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);CHKERRQ(ierr);
2264 }
2265 ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2266 }
2267 ierr = PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);CHKERRQ(ierr);
2268
2269 if (PetscDefined(USE_DEBUG)) {
2270 ierr = MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
2271 ierr = MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
2272 if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2273 }
2274
2275 switch (bs) {
2276 case 12:
2277 ctx->ops->begin = VecScatterBeginMPI1_12;
2278 ctx->ops->end = VecScatterEndMPI1_12;
2279 break;
2280 case 11:
2281 ctx->ops->begin = VecScatterBeginMPI1_11;
2282 ctx->ops->end = VecScatterEndMPI1_11;
2283 break;
2284 case 10:
2285 ctx->ops->begin = VecScatterBeginMPI1_10;
2286 ctx->ops->end = VecScatterEndMPI1_10;
2287 break;
2288 case 9:
2289 ctx->ops->begin = VecScatterBeginMPI1_9;
2290 ctx->ops->end = VecScatterEndMPI1_9;
2291 break;
2292 case 8:
2293 ctx->ops->begin = VecScatterBeginMPI1_8;
2294 ctx->ops->end = VecScatterEndMPI1_8;
2295 break;
2296 case 7:
2297 ctx->ops->begin = VecScatterBeginMPI1_7;
2298 ctx->ops->end = VecScatterEndMPI1_7;
2299 break;
2300 case 6:
2301 ctx->ops->begin = VecScatterBeginMPI1_6;
2302 ctx->ops->end = VecScatterEndMPI1_6;
2303 break;
2304 case 5:
2305 ctx->ops->begin = VecScatterBeginMPI1_5;
2306 ctx->ops->end = VecScatterEndMPI1_5;
2307 break;
2308 case 4:
2309 ctx->ops->begin = VecScatterBeginMPI1_4;
2310 ctx->ops->end = VecScatterEndMPI1_4;
2311 break;
2312 case 3:
2313 ctx->ops->begin = VecScatterBeginMPI1_3;
2314 ctx->ops->end = VecScatterEndMPI1_3;
2315 break;
2316 case 2:
2317 ctx->ops->begin = VecScatterBeginMPI1_2;
2318 ctx->ops->end = VecScatterEndMPI1_2;
2319 break;
2320 case 1:
2321 ctx->ops->begin = VecScatterBeginMPI1_1;
2322 ctx->ops->end = VecScatterEndMPI1_1;
2323 break;
2324 default:
2325 ctx->ops->begin = VecScatterBeginMPI1_bs;
2326 ctx->ops->end = VecScatterEndMPI1_bs;
2327
2328 }
2329 ctx->ops->view = VecScatterView_MPI_MPI1;
2330 /* try to optimize PtoP vecscatter with memcpy's */
2331 ierr = VecScatterMemcpyPlanCreate_PtoP(to,from);CHKERRQ(ierr);
2332 PetscFunctionReturn(0);
2333 }
2334
2335
2336 /* ------------------------------------------------------------------------------------*/
2337 /*
2338 Scatter from local Seq vectors to a parallel vector.
2339 Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2340 reverses the result.
2341 */
VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2342 PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2343 {
2344 PetscErrorCode ierr;
2345 MPI_Request *waits;
2346 VecScatter_MPI_General *to,*from;
2347
2348 PetscFunctionBegin;
2349 ierr = VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);CHKERRQ(ierr);
2350 to = (VecScatter_MPI_General*)ctx->fromdata;
2351 from = (VecScatter_MPI_General*)ctx->todata;
2352 ctx->todata = (void*)to;
2353 ctx->fromdata = (void*)from;
2354 /* these two are special, they are ALWAYS stored in to struct */
2355 to->sstatus = from->sstatus;
2356 to->rstatus = from->rstatus;
2357
2358 from->sstatus = NULL;
2359 from->rstatus = NULL;
2360
2361 waits = from->rev_requests;
2362 from->rev_requests = from->requests;
2363 from->requests = waits;
2364 waits = to->rev_requests;
2365 to->rev_requests = to->requests;
2366 to->requests = waits;
2367 PetscFunctionReturn(0);
2368 }
2369
2370 /* ---------------------------------------------------------------------------------*/
VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2371 PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2372 {
2373 PetscErrorCode ierr;
2374 PetscMPIInt size,rank,tag,imdex,n;
2375 PetscInt *owners = xin->map->range;
2376 PetscMPIInt *nprocs = NULL;
2377 PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2378 PetscMPIInt *owner = NULL;
2379 PetscInt *starts = NULL,count,slen;
2380 PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2381 PetscMPIInt *onodes1,*olengths1,nrecvs;
2382 MPI_Comm comm;
2383 MPI_Request *send_waits = NULL,*recv_waits = NULL;
2384 MPI_Status recv_status,*send_status = NULL;
2385 PetscBool duplicate = PETSC_FALSE;
2386 PetscBool found = PETSC_FALSE;
2387
2388 PetscFunctionBegin;
2389 ierr = PetscObjectGetNewTag((PetscObject)ctx,&tag);CHKERRQ(ierr);
2390 ierr = PetscObjectGetComm((PetscObject)xin,&comm);CHKERRQ(ierr);
2391 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2392 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2393 if (size == 1) {
2394 ierr = VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);CHKERRQ(ierr);
2395 PetscFunctionReturn(0);
2396 }
2397
2398 /*
2399 Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2400 They then call the StoPScatterCreate()
2401 */
2402 /* first count number of contributors to each processor */
2403 ierr = PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);CHKERRQ(ierr);
2404 ierr = PetscArrayzero(nprocs,size);CHKERRQ(ierr);
2405
2406 lastidx = -1;
2407 j = 0;
2408 for (i=0; i<nx; i++) {
2409 /* if indices are NOT locally sorted, need to start search at the beginning */
2410 if (lastidx > (idx = bs*inidx[i])) j = 0;
2411 lastidx = idx;
2412 for (; j<size; j++) {
2413 if (idx >= owners[j] && idx < owners[j+1]) {
2414 nprocs[j]++;
2415 owner[i] = j;
2416 found = PETSC_TRUE;
2417 break;
2418 }
2419 }
2420 if (PetscUnlikelyDebug(!found)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2421 found = PETSC_FALSE;
2422 }
2423 nsends = 0;
2424 for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2425
2426 /* inform other processors of number of messages and max length*/
2427 ierr = PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);CHKERRQ(ierr);
2428 ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
2429 ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
2430 recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2431
2432 /* post receives: */
2433 ierr = PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
2434
2435 count = 0;
2436 for (i=0; i<nrecvs; i++) {
2437 ierr = MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
2438 count += olengths1[i];
2439 }
2440 ierr = PetscFree(onodes1);CHKERRQ(ierr);
2441
2442 /* do sends:
2443 1) starts[i] gives the starting index in svalues for stuff going to
2444 the ith processor
2445 */
2446 starts[0]= 0;
2447 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2448 for (i=0; i<nx; i++) {
2449 svalues[2*starts[owner[i]]] = bs*inidx[i];
2450 svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2451 }
2452
2453 starts[0] = 0;
2454 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2455 count = 0;
2456 for (i=0; i<size; i++) {
2457 if (nprocs[i]) {
2458 ierr = MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
2459 count++;
2460 }
2461 }
2462 ierr = PetscFree3(nprocs,owner,starts);CHKERRQ(ierr);
2463
2464 /* wait on receives */
2465 count = nrecvs;
2466 slen = 0;
2467 while (count) {
2468 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
2469 /* unpack receives into our local space */
2470 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
2471 slen += n/2;
2472 count--;
2473 }
2474 if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2475
2476 ierr = PetscMalloc2(slen,&local_inidx,slen,&local_inidy);CHKERRQ(ierr);
2477 base = owners[rank];
2478 count = 0;
2479 rsvalues = rvalues;
2480 for (i=0; i<nrecvs; i++) {
2481 values = rsvalues;
2482 rsvalues += 2*olengths1[i];
2483 for (j=0; j<olengths1[i]; j++) {
2484 local_inidx[count] = values[2*j] - base;
2485 local_inidy[count++] = values[2*j+1];
2486 }
2487 }
2488 ierr = PetscFree(olengths1);CHKERRQ(ierr);
2489
2490 /* wait on sends */
2491 if (nsends) {ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);}
2492 ierr = PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);CHKERRQ(ierr);
2493
2494 /*
2495 should sort and remove duplicates from local_inidx,local_inidy
2496 */
2497
2498 #if defined(do_it_slow)
2499 /* sort on the from index */
2500 ierr = PetscSortIntWithArray(slen,local_inidx,local_inidy);CHKERRQ(ierr);
2501 start = 0;
2502 while (start < slen) {
2503 count = start+1;
2504 last = local_inidx[start];
2505 while (count < slen && last == local_inidx[count]) count++;
2506 if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2507 /* sort on to index */
2508 ierr = PetscSortInt(count-start,local_inidy+start);CHKERRQ(ierr);
2509 }
2510 /* remove duplicates; not most efficient way, but probably good enough */
2511 i = start;
2512 while (i < count-1) {
2513 if (local_inidy[i] != local_inidy[i+1]) i++;
2514 else { /* found a duplicate */
2515 duplicate = PETSC_TRUE;
2516 for (j=i; j<slen-1; j++) {
2517 local_inidx[j] = local_inidx[j+1];
2518 local_inidy[j] = local_inidy[j+1];
2519 }
2520 slen--;
2521 count--;
2522 }
2523 }
2524 start = count;
2525 }
2526 #endif
2527 if (duplicate) {
2528 ierr = PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");CHKERRQ(ierr);
2529 }
2530 ierr = VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);CHKERRQ(ierr);
2531 ierr = PetscFree2(local_inidx,local_inidy);CHKERRQ(ierr);
2532 PetscFunctionReturn(0);
2533 }
2534
VecScatterSetUp_MPI1(VecScatter ctx)2535 PetscErrorCode VecScatterSetUp_MPI1(VecScatter ctx)
2536 {
2537 PetscErrorCode ierr;
2538
2539 PetscFunctionBegin;
2540 ierr = VecScatterSetUp_vectype_private(ctx,VecScatterCreateLocal_PtoS_MPI1,VecScatterCreateLocal_StoP_MPI1,VecScatterCreateLocal_PtoP_MPI1);CHKERRQ(ierr);
2541 PetscFunctionReturn(0);
2542 }
2543
VecScatterCreate_MPI1(VecScatter ctx)2544 PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
2545 {
2546 PetscErrorCode ierr;
2547
2548 PetscFunctionBegin;
2549 ctx->ops->setup = VecScatterSetUp_MPI1;
2550 ierr = PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);CHKERRQ(ierr);
2551 ierr = PetscInfo(ctx,"Using MPI1 for vector scatter\n");CHKERRQ(ierr);
2552 PetscFunctionReturn(0);
2553 }
2554
2555