1 
2 /*
3     Defines parallel vector scatters using MPI1.
4 */
5 
6 #include <../src/vec/vec/impls/dvecimpl.h>         /*I "petscvec.h" I*/
7 #include <../src/vec/vec/impls/mpi/pvecimpl.h>
8 #include <petsc/private/vecscatterimpl.h>
9 
10 
VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)11 PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
12 {
13   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
14   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
15   PetscErrorCode         ierr;
16   PetscInt               i;
17   PetscMPIInt            rank;
18   PetscViewerFormat      format;
19   PetscBool              iascii;
20 
21   PetscFunctionBegin;
22   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
23   if (iascii) {
24     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);CHKERRQ(ierr);
25     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
26     if (format ==  PETSC_VIEWER_ASCII_INFO) {
27       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
28 
29       ierr = MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
30       ierr = MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
31       itmp = to->starts[to->n+1];
32       ierr = MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
33       itmp = from->starts[from->n+1];
34       ierr = MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
35       ierr = MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
36 
37       ierr = PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");CHKERRQ(ierr);
38       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);CHKERRQ(ierr);
39       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);CHKERRQ(ierr);
40       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
41       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
42       ierr = PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
43 
44     } else {
45       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
46       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);CHKERRQ(ierr);
47       if (to->n) {
48         for (i=0; i<to->n; i++) {
49           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);CHKERRQ(ierr);
50           if (to->memcpy_plan.optimized[i]) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  is optimized with %D memcpy's in Pack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]);CHKERRQ(ierr); }
51         }
52         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");CHKERRQ(ierr);
53         for (i=0; i<to->starts[to->n]; i++) {
54           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);CHKERRQ(ierr);
55         }
56       }
57 
58       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);CHKERRQ(ierr);
59       if (from->n) {
60         for (i=0; i<from->n; i++) {
61           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);CHKERRQ(ierr);
62           if (from->memcpy_plan.optimized[i]) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  is optimized with %D memcpy's in Unpack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]);CHKERRQ(ierr); }
63         }
64 
65         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");CHKERRQ(ierr);
66         for (i=0; i<from->starts[from->n]; i++) {
67           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);CHKERRQ(ierr);
68         }
69       }
70       if (to->local.n) {
71         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);CHKERRQ(ierr);
72         if (to->local.memcpy_plan.optimized[0]) {
73           ierr = PetscViewerASCIIPrintf(viewer,"Local part of the scatter is made of %D copies\n",to->local.memcpy_plan.copy_offsets[1]);CHKERRQ(ierr);
74         }
75         for (i=0; i<to->local.n; i++) {  /* the to and from have the opposite meaning from what you would expect */
76           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);CHKERRQ(ierr);
77         }
78       }
79 
80       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
81       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
82     }
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /* -------------------------------------------------------------------------------------*/
VecScatterDestroy_PtoP_MPI1(VecScatter ctx)88 PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
89 {
90   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
91   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
92   PetscErrorCode         ierr;
93   PetscInt               i;
94 
95   PetscFunctionBegin;
96   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
97   if (to->requests) {
98     for (i=0; i<to->n; i++) {
99       ierr = MPI_Request_free(to->requests + i);CHKERRQ(ierr);
100     }
101   }
102   if (to->rev_requests) {
103     for (i=0; i<to->n; i++) {
104       ierr = MPI_Request_free(to->rev_requests + i);CHKERRQ(ierr);
105     }
106   }
107   if (from->requests) {
108     for (i=0; i<from->n; i++) {
109       ierr = MPI_Request_free(from->requests + i);CHKERRQ(ierr);
110     }
111   }
112 
113   if (from->rev_requests) {
114     for (i=0; i<from->n; i++) {
115       ierr = MPI_Request_free(from->rev_requests + i);CHKERRQ(ierr);
116     }
117   }
118 
119   ierr = PetscFree(to->local.vslots);CHKERRQ(ierr);
120   ierr = PetscFree(from->local.vslots);CHKERRQ(ierr);
121   ierr = PetscFree(to->local.slots_nonmatching);CHKERRQ(ierr);
122   ierr = PetscFree(from->local.slots_nonmatching);CHKERRQ(ierr);
123   ierr = PetscFree(to->rev_requests);CHKERRQ(ierr);
124   ierr = PetscFree(from->rev_requests);CHKERRQ(ierr);
125   ierr = PetscFree(to->requests);CHKERRQ(ierr);
126   ierr = PetscFree(from->requests);CHKERRQ(ierr);
127   ierr = PetscFree4(to->values,to->indices,to->starts,to->procs);CHKERRQ(ierr);
128   ierr = PetscFree2(to->sstatus,to->rstatus);CHKERRQ(ierr);
129   ierr = PetscFree4(from->values,from->indices,from->starts,from->procs);CHKERRQ(ierr);
130   ierr = VecScatterMemcpyPlanDestroy_PtoP(to,from);CHKERRQ(ierr);
131   ierr = PetscFree(from);CHKERRQ(ierr);
132   ierr = PetscFree(to);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 /* --------------------------------------------------------------------------------------*/
137 
VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)138 PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
139 {
140   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
141   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
142   PetscErrorCode         ierr;
143   PetscInt               ny,bs = in_from->bs;
144 
145   PetscFunctionBegin;
146   out->ops->begin   = in->ops->begin;
147   out->ops->end     = in->ops->end;
148   out->ops->copy    = in->ops->copy;
149   out->ops->destroy = in->ops->destroy;
150   out->ops->view    = in->ops->view;
151 
152   /* allocate entire send scatter context */
153   ierr = PetscNewLog(out,&out_to);CHKERRQ(ierr);
154   ierr = PetscNewLog(out,&out_from);CHKERRQ(ierr);
155 
156   ny                = in_to->starts[in_to->n];
157   out_to->n         = in_to->n;
158   out_to->format    = in_to->format;
159 
160   ierr = PetscMalloc1(out_to->n,&out_to->requests);CHKERRQ(ierr);
161   ierr = PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);CHKERRQ(ierr);
162   ierr = PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);CHKERRQ(ierr);
163   ierr = PetscArraycpy(out_to->indices,in_to->indices,ny);CHKERRQ(ierr);
164   ierr = PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);CHKERRQ(ierr);
165   ierr = PetscArraycpy(out_to->procs,in_to->procs,out_to->n);CHKERRQ(ierr);
166 
167   out->todata                        = (void*)out_to;
168   out_to->local.n                    = in_to->local.n;
169   out_to->local.nonmatching_computed = PETSC_FALSE;
170   out_to->local.n_nonmatching        = 0;
171   out_to->local.slots_nonmatching    = NULL;
172   if (in_to->local.n) {
173     ierr = PetscMalloc1(in_to->local.n,&out_to->local.vslots);CHKERRQ(ierr);
174     ierr = PetscMalloc1(in_from->local.n,&out_from->local.vslots);CHKERRQ(ierr);
175     ierr = PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);CHKERRQ(ierr);
176     ierr = PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);CHKERRQ(ierr);
177   } else {
178     out_to->local.vslots   = NULL;
179     out_from->local.vslots = NULL;
180   }
181 
182   /* allocate entire receive context */
183   out_from->format    = in_from->format;
184   ny                  = in_from->starts[in_from->n];
185   out_from->n         = in_from->n;
186 
187   ierr = PetscMalloc1(out_from->n,&out_from->requests);CHKERRQ(ierr);
188   ierr = PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);CHKERRQ(ierr);
189   ierr = PetscArraycpy(out_from->indices,in_from->indices,ny);CHKERRQ(ierr);
190   ierr = PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);CHKERRQ(ierr);
191   ierr = PetscArraycpy(out_from->procs,in_from->procs,out_from->n);CHKERRQ(ierr);
192 
193   out->fromdata                        = (void*)out_from;
194   out_from->local.n                    = in_from->local.n;
195   out_from->local.nonmatching_computed = PETSC_FALSE;
196   out_from->local.n_nonmatching        = 0;
197   out_from->local.slots_nonmatching    = NULL;
198 
199   /*
200       set up the request arrays for use with isend_init() and irecv_init()
201   */
202   {
203     PetscMPIInt tag;
204     MPI_Comm    comm;
205     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
206     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
207     PetscInt    i;
208     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
209     MPI_Request *rev_swaits,*rev_rwaits;
210     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
211 
212     ierr = PetscMalloc1(in_to->n,&out_to->rev_requests);CHKERRQ(ierr);
213     ierr = PetscMalloc1(in_from->n,&out_from->rev_requests);CHKERRQ(ierr);
214 
215     rev_rwaits = out_to->rev_requests;
216     rev_swaits = out_from->rev_requests;
217 
218     out_from->bs = out_to->bs = bs;
219     tag          = ((PetscObject)out)->tag;
220     ierr         = PetscObjectGetComm((PetscObject)out,&comm);CHKERRQ(ierr);
221 
222     /* Register the receives that you will use later (sends for scatter reverse) */
223     for (i=0; i<out_from->n; i++) {
224       ierr = MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
225       ierr = MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);CHKERRQ(ierr);
226     }
227     for (i=0; i<out_to->n; i++) {
228       ierr = MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
229       /* Register receives for scatter reverse */
230       ierr = MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);CHKERRQ(ierr);
231     }
232   }
233 
234   ierr = VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)238 PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
239 {
240   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
241   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
242   PetscErrorCode         ierr;
243   PetscInt               ny,bs = in_from->bs;
244   PetscMPIInt            size;
245 
246   PetscFunctionBegin;
247   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);CHKERRQ(ierr);
248 
249   out->ops->begin     = in->ops->begin;
250   out->ops->end       = in->ops->end;
251   out->ops->copy      = in->ops->copy;
252   out->ops->destroy   = in->ops->destroy;
253   out->ops->view      = in->ops->view;
254 
255   /* allocate entire send scatter context */
256   ierr = PetscNewLog(out,&out_to);CHKERRQ(ierr);
257   ierr = PetscNewLog(out,&out_from);CHKERRQ(ierr);
258 
259   ny                = in_to->starts[in_to->n];
260   out_to->n         = in_to->n;
261   out_to->format    = in_to->format;
262 
263   ierr = PetscMalloc1(out_to->n,&out_to->requests);CHKERRQ(ierr);
264   ierr = PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);CHKERRQ(ierr);
265   ierr = PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);CHKERRQ(ierr);
266   ierr = PetscArraycpy(out_to->indices,in_to->indices,ny);CHKERRQ(ierr);
267   ierr = PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);CHKERRQ(ierr);
268   ierr = PetscArraycpy(out_to->procs,in_to->procs,out_to->n);CHKERRQ(ierr);
269 
270   out->todata                        = (void*)out_to;
271   out_to->local.n                    = in_to->local.n;
272   out_to->local.nonmatching_computed = PETSC_FALSE;
273   out_to->local.n_nonmatching        = 0;
274   out_to->local.slots_nonmatching    = NULL;
275   if (in_to->local.n) {
276     ierr = PetscMalloc1(in_to->local.n,&out_to->local.vslots);CHKERRQ(ierr);
277     ierr = PetscMalloc1(in_from->local.n,&out_from->local.vslots);CHKERRQ(ierr);
278     ierr = PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);CHKERRQ(ierr);
279     ierr = PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);CHKERRQ(ierr);
280   } else {
281     out_to->local.vslots   = NULL;
282     out_from->local.vslots = NULL;
283   }
284 
285   /* allocate entire receive context */
286   out_from->format    = in_from->format;
287   ny                  = in_from->starts[in_from->n];
288   out_from->n         = in_from->n;
289 
290   ierr = PetscMalloc1(out_from->n,&out_from->requests);CHKERRQ(ierr);
291   ierr = PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);CHKERRQ(ierr);
292   ierr = PetscArraycpy(out_from->indices,in_from->indices,ny);CHKERRQ(ierr);
293   ierr = PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);CHKERRQ(ierr);
294   ierr = PetscArraycpy(out_from->procs,in_from->procs,out_from->n);CHKERRQ(ierr);
295 
296   out->fromdata                        = (void*)out_from;
297   out_from->local.n                    = in_from->local.n;
298   out_from->local.nonmatching_computed = PETSC_FALSE;
299   out_from->local.n_nonmatching        = 0;
300   out_from->local.slots_nonmatching    = NULL;
301 
302   ierr = VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 /* Optimize a parallel vector to parallel vector vecscatter with memory copies */
VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General * to,VecScatter_MPI_General * from)307 PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
308 {
309   PetscErrorCode ierr;
310 
311   PetscFunctionBegin;
312   ierr = VecScatterMemcpyPlanCreate_Index(to->n,to->starts,to->indices,to->bs,&to->memcpy_plan);CHKERRQ(ierr);
313   ierr = VecScatterMemcpyPlanCreate_Index(from->n,from->starts,from->indices,to->bs,&from->memcpy_plan);CHKERRQ(ierr);
314   ierr = VecScatterMemcpyPlanCreate_SGToSG(to->bs,&to->local,&from->local);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General * in_to,const VecScatter_MPI_General * in_from,VecScatter_MPI_General * out_to,VecScatter_MPI_General * out_from)318 PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General *in_to,const VecScatter_MPI_General *in_from,VecScatter_MPI_General *out_to,VecScatter_MPI_General *out_from)
319 {
320   PetscErrorCode ierr;
321 
322   PetscFunctionBegin;
323   ierr = VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);CHKERRQ(ierr);
324   ierr = VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);CHKERRQ(ierr);
325   ierr = VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);CHKERRQ(ierr);
326   ierr = VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General * to,VecScatter_MPI_General * from)330 PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
331 {
332   PetscErrorCode ierr;
333 
334   PetscFunctionBegin;
335   ierr = VecScatterMemcpyPlanDestroy(&to->memcpy_plan);CHKERRQ(ierr);
336   ierr = VecScatterMemcpyPlanDestroy(&from->memcpy_plan);CHKERRQ(ierr);
337   ierr = VecScatterMemcpyPlanDestroy(&to->local.memcpy_plan);CHKERRQ(ierr);
338   ierr = VecScatterMemcpyPlanDestroy(&from->local.memcpy_plan);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* --------------------------------------------------------------------------------------------------
343     Packs and unpacks the message data into send or from receive buffers.
344 
345     These could be generated automatically.
346 
347     Fortran kernels etc. could be used.
348 */
Pack_MPI1_1(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)349 PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
350 {
351   PetscInt i;
352   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
353 }
354 
UnPack_MPI1_1(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)355 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
356 {
357   PetscInt i;
358 
359   PetscFunctionBegin;
360   switch (addv) {
361   case INSERT_VALUES:
362   case INSERT_ALL_VALUES:
363     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
364     break;
365   case ADD_VALUES:
366   case ADD_ALL_VALUES:
367     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
368     break;
369 #if !defined(PETSC_USE_COMPLEX)
370   case MAX_VALUES:
371     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
372 #else
373   case MAX_VALUES:
374 #endif
375   case NOT_SET_VALUES:
376     break;
377   default:
378     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
379   }
380   PetscFunctionReturn(0);
381 }
382 
Scatter_MPI1_1(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)383 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
384 {
385   PetscInt i;
386 
387   PetscFunctionBegin;
388   switch (addv) {
389   case INSERT_VALUES:
390   case INSERT_ALL_VALUES:
391     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
392     break;
393   case ADD_VALUES:
394   case ADD_ALL_VALUES:
395     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
396     break;
397 #if !defined(PETSC_USE_COMPLEX)
398   case MAX_VALUES:
399     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
400 #else
401   case MAX_VALUES:
402 #endif
403   case NOT_SET_VALUES:
404     break;
405   default:
406     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_2(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)412 PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
413 {
414   PetscInt i,idx;
415 
416   for (i=0; i<n; i++) {
417     idx  = *indicesx++;
418     y[0] = x[idx];
419     y[1] = x[idx+1];
420     y   += 2;
421   }
422 }
423 
UnPack_MPI1_2(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)424 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
425 {
426   PetscInt i,idy;
427 
428   PetscFunctionBegin;
429   switch (addv) {
430   case INSERT_VALUES:
431   case INSERT_ALL_VALUES:
432     for (i=0; i<n; i++) {
433       idy      = *indicesy++;
434       y[idy]   = x[0];
435       y[idy+1] = x[1];
436       x       += 2;
437     }
438     break;
439   case ADD_VALUES:
440   case ADD_ALL_VALUES:
441     for (i=0; i<n; i++) {
442       idy       = *indicesy++;
443       y[idy]   += x[0];
444       y[idy+1] += x[1];
445       x        += 2;
446     }
447     break;
448 #if !defined(PETSC_USE_COMPLEX)
449   case MAX_VALUES:
450     for (i=0; i<n; i++) {
451       idy      = *indicesy++;
452       y[idy]   = PetscMax(y[idy],x[0]);
453       y[idy+1] = PetscMax(y[idy+1],x[1]);
454       x       += 2;
455     }
456 #else
457   case MAX_VALUES:
458 #endif
459   case NOT_SET_VALUES:
460     break;
461   default:
462     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
463   }
464   PetscFunctionReturn(0);
465 }
466 
Scatter_MPI1_2(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)467 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
468 {
469   PetscInt i,idx,idy;
470 
471   PetscFunctionBegin;
472   switch (addv) {
473   case INSERT_VALUES:
474   case INSERT_ALL_VALUES:
475     for (i=0; i<n; i++) {
476       idx      = *indicesx++;
477       idy      = *indicesy++;
478       y[idy]   = x[idx];
479       y[idy+1] = x[idx+1];
480     }
481     break;
482   case ADD_VALUES:
483   case ADD_ALL_VALUES:
484     for (i=0; i<n; i++) {
485       idx       = *indicesx++;
486       idy       = *indicesy++;
487       y[idy]   += x[idx];
488       y[idy+1] += x[idx+1];
489     }
490     break;
491 #if !defined(PETSC_USE_COMPLEX)
492   case MAX_VALUES:
493     for (i=0; i<n; i++) {
494       idx      = *indicesx++;
495       idy      = *indicesy++;
496       y[idy]   = PetscMax(y[idy],x[idx]);
497       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
498     }
499 #else
500   case MAX_VALUES:
501 #endif
502   case NOT_SET_VALUES:
503     break;
504   default:
505     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
506   }
507   PetscFunctionReturn(0);
508 }
509 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_3(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)510 PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
511 {
512   PetscInt i,idx;
513 
514   for (i=0; i<n; i++) {
515     idx  = *indicesx++;
516     y[0] = x[idx];
517     y[1] = x[idx+1];
518     y[2] = x[idx+2];
519     y   += 3;
520   }
521 }
UnPack_MPI1_3(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)522 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
523 {
524   PetscInt i,idy;
525 
526   PetscFunctionBegin;
527   switch (addv) {
528   case INSERT_VALUES:
529   case INSERT_ALL_VALUES:
530     for (i=0; i<n; i++) {
531       idy      = *indicesy++;
532       y[idy]   = x[0];
533       y[idy+1] = x[1];
534       y[idy+2] = x[2];
535       x       += 3;
536     }
537     break;
538   case ADD_VALUES:
539   case ADD_ALL_VALUES:
540     for (i=0; i<n; i++) {
541       idy       = *indicesy++;
542       y[idy]   += x[0];
543       y[idy+1] += x[1];
544       y[idy+2] += x[2];
545       x        += 3;
546     }
547     break;
548 #if !defined(PETSC_USE_COMPLEX)
549   case MAX_VALUES:
550     for (i=0; i<n; i++) {
551       idy      = *indicesy++;
552       y[idy]   = PetscMax(y[idy],x[0]);
553       y[idy+1] = PetscMax(y[idy+1],x[1]);
554       y[idy+2] = PetscMax(y[idy+2],x[2]);
555       x       += 3;
556     }
557 #else
558   case MAX_VALUES:
559 #endif
560   case NOT_SET_VALUES:
561     break;
562   default:
563     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
564   }
565   PetscFunctionReturn(0);
566 }
567 
Scatter_MPI1_3(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)568 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
569 {
570   PetscInt i,idx,idy;
571 
572   PetscFunctionBegin;
573   switch (addv) {
574   case INSERT_VALUES:
575   case INSERT_ALL_VALUES:
576     for (i=0; i<n; i++) {
577       idx      = *indicesx++;
578       idy      = *indicesy++;
579       y[idy]   = x[idx];
580       y[idy+1] = x[idx+1];
581       y[idy+2] = x[idx+2];
582     }
583     break;
584   case ADD_VALUES:
585   case ADD_ALL_VALUES:
586     for (i=0; i<n; i++) {
587       idx       = *indicesx++;
588       idy       = *indicesy++;
589       y[idy]   += x[idx];
590       y[idy+1] += x[idx+1];
591       y[idy+2] += x[idx+2];
592     }
593     break;
594 #if !defined(PETSC_USE_COMPLEX)
595   case MAX_VALUES:
596     for (i=0; i<n; i++) {
597       idx      = *indicesx++;
598       idy      = *indicesy++;
599       y[idy]   = PetscMax(y[idy],x[idx]);
600       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
601       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
602     }
603 #else
604   case MAX_VALUES:
605 #endif
606   case NOT_SET_VALUES:
607     break;
608   default:
609     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
610   }
611   PetscFunctionReturn(0);
612 }
613 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_4(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)614 PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
615 {
616   PetscInt i,idx;
617 
618   for (i=0; i<n; i++) {
619     idx  = *indicesx++;
620     y[0] = x[idx];
621     y[1] = x[idx+1];
622     y[2] = x[idx+2];
623     y[3] = x[idx+3];
624     y   += 4;
625   }
626 }
UnPack_MPI1_4(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)627 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
628 {
629   PetscInt i,idy;
630 
631   PetscFunctionBegin;
632   switch (addv) {
633   case INSERT_VALUES:
634   case INSERT_ALL_VALUES:
635     for (i=0; i<n; i++) {
636       idy      = *indicesy++;
637       y[idy]   = x[0];
638       y[idy+1] = x[1];
639       y[idy+2] = x[2];
640       y[idy+3] = x[3];
641       x       += 4;
642     }
643     break;
644   case ADD_VALUES:
645   case ADD_ALL_VALUES:
646     for (i=0; i<n; i++) {
647       idy       = *indicesy++;
648       y[idy]   += x[0];
649       y[idy+1] += x[1];
650       y[idy+2] += x[2];
651       y[idy+3] += x[3];
652       x        += 4;
653     }
654     break;
655 #if !defined(PETSC_USE_COMPLEX)
656   case MAX_VALUES:
657     for (i=0; i<n; i++) {
658       idy      = *indicesy++;
659       y[idy]   = PetscMax(y[idy],x[0]);
660       y[idy+1] = PetscMax(y[idy+1],x[1]);
661       y[idy+2] = PetscMax(y[idy+2],x[2]);
662       y[idy+3] = PetscMax(y[idy+3],x[3]);
663       x       += 4;
664     }
665 #else
666   case MAX_VALUES:
667 #endif
668   case NOT_SET_VALUES:
669     break;
670   default:
671     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
672   }
673   PetscFunctionReturn(0);
674 }
675 
Scatter_MPI1_4(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)676 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
677 {
678   PetscInt i,idx,idy;
679 
680   PetscFunctionBegin;
681   switch (addv) {
682   case INSERT_VALUES:
683   case INSERT_ALL_VALUES:
684     for (i=0; i<n; i++) {
685       idx      = *indicesx++;
686       idy      = *indicesy++;
687       y[idy]   = x[idx];
688       y[idy+1] = x[idx+1];
689       y[idy+2] = x[idx+2];
690       y[idy+3] = x[idx+3];
691     }
692     break;
693   case ADD_VALUES:
694   case ADD_ALL_VALUES:
695     for (i=0; i<n; i++) {
696       idx       = *indicesx++;
697       idy       = *indicesy++;
698       y[idy]   += x[idx];
699       y[idy+1] += x[idx+1];
700       y[idy+2] += x[idx+2];
701       y[idy+3] += x[idx+3];
702     }
703     break;
704 #if !defined(PETSC_USE_COMPLEX)
705   case MAX_VALUES:
706     for (i=0; i<n; i++) {
707       idx      = *indicesx++;
708       idy      = *indicesy++;
709       y[idy]   = PetscMax(y[idy],x[idx]);
710       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
711       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
712       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
713     }
714 #else
715   case MAX_VALUES:
716 #endif
717   case NOT_SET_VALUES:
718     break;
719   default:
720     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
721   }
722   PetscFunctionReturn(0);
723 }
724 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_5(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)725 PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
726 {
727   PetscInt i,idx;
728 
729   for (i=0; i<n; i++) {
730     idx  = *indicesx++;
731     y[0] = x[idx];
732     y[1] = x[idx+1];
733     y[2] = x[idx+2];
734     y[3] = x[idx+3];
735     y[4] = x[idx+4];
736     y   += 5;
737   }
738 }
739 
UnPack_MPI1_5(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)740 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
741 {
742   PetscInt i,idy;
743 
744   PetscFunctionBegin;
745   switch (addv) {
746   case INSERT_VALUES:
747   case INSERT_ALL_VALUES:
748     for (i=0; i<n; i++) {
749       idy      = *indicesy++;
750       y[idy]   = x[0];
751       y[idy+1] = x[1];
752       y[idy+2] = x[2];
753       y[idy+3] = x[3];
754       y[idy+4] = x[4];
755       x       += 5;
756     }
757     break;
758   case ADD_VALUES:
759   case ADD_ALL_VALUES:
760     for (i=0; i<n; i++) {
761       idy       = *indicesy++;
762       y[idy]   += x[0];
763       y[idy+1] += x[1];
764       y[idy+2] += x[2];
765       y[idy+3] += x[3];
766       y[idy+4] += x[4];
767       x        += 5;
768     }
769     break;
770 #if !defined(PETSC_USE_COMPLEX)
771   case MAX_VALUES:
772     for (i=0; i<n; i++) {
773       idy      = *indicesy++;
774       y[idy]   = PetscMax(y[idy],x[0]);
775       y[idy+1] = PetscMax(y[idy+1],x[1]);
776       y[idy+2] = PetscMax(y[idy+2],x[2]);
777       y[idy+3] = PetscMax(y[idy+3],x[3]);
778       y[idy+4] = PetscMax(y[idy+4],x[4]);
779       x       += 5;
780     }
781 #else
782   case MAX_VALUES:
783 #endif
784   case NOT_SET_VALUES:
785     break;
786   default:
787     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
788   }
789   PetscFunctionReturn(0);
790 }
791 
Scatter_MPI1_5(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)792 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
793 {
794   PetscInt i,idx,idy;
795 
796   PetscFunctionBegin;
797   switch (addv) {
798   case INSERT_VALUES:
799   case INSERT_ALL_VALUES:
800     for (i=0; i<n; i++) {
801       idx      = *indicesx++;
802       idy      = *indicesy++;
803       y[idy]   = x[idx];
804       y[idy+1] = x[idx+1];
805       y[idy+2] = x[idx+2];
806       y[idy+3] = x[idx+3];
807       y[idy+4] = x[idx+4];
808     }
809     break;
810   case ADD_VALUES:
811   case ADD_ALL_VALUES:
812     for (i=0; i<n; i++) {
813       idx       = *indicesx++;
814       idy       = *indicesy++;
815       y[idy]   += x[idx];
816       y[idy+1] += x[idx+1];
817       y[idy+2] += x[idx+2];
818       y[idy+3] += x[idx+3];
819       y[idy+4] += x[idx+4];
820     }
821     break;
822 #if !defined(PETSC_USE_COMPLEX)
823   case MAX_VALUES:
824     for (i=0; i<n; i++) {
825       idx      = *indicesx++;
826       idy      = *indicesy++;
827       y[idy]   = PetscMax(y[idy],x[idx]);
828       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
829       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
830       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
831       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
832     }
833 #else
834   case MAX_VALUES:
835 #endif
836   case NOT_SET_VALUES:
837     break;
838   default:
839     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
840   }
841   PetscFunctionReturn(0);
842 }
843 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_6(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)844 PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
845 {
846   PetscInt i,idx;
847 
848   for (i=0; i<n; i++) {
849     idx  = *indicesx++;
850     y[0] = x[idx];
851     y[1] = x[idx+1];
852     y[2] = x[idx+2];
853     y[3] = x[idx+3];
854     y[4] = x[idx+4];
855     y[5] = x[idx+5];
856     y   += 6;
857   }
858 }
859 
UnPack_MPI1_6(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)860 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
861 {
862   PetscInt i,idy;
863 
864   PetscFunctionBegin;
865   switch (addv) {
866   case INSERT_VALUES:
867   case INSERT_ALL_VALUES:
868     for (i=0; i<n; i++) {
869       idy      = *indicesy++;
870       y[idy]   = x[0];
871       y[idy+1] = x[1];
872       y[idy+2] = x[2];
873       y[idy+3] = x[3];
874       y[idy+4] = x[4];
875       y[idy+5] = x[5];
876       x       += 6;
877     }
878     break;
879   case ADD_VALUES:
880   case ADD_ALL_VALUES:
881     for (i=0; i<n; i++) {
882       idy       = *indicesy++;
883       y[idy]   += x[0];
884       y[idy+1] += x[1];
885       y[idy+2] += x[2];
886       y[idy+3] += x[3];
887       y[idy+4] += x[4];
888       y[idy+5] += x[5];
889       x        += 6;
890     }
891     break;
892 #if !defined(PETSC_USE_COMPLEX)
893   case MAX_VALUES:
894     for (i=0; i<n; i++) {
895       idy      = *indicesy++;
896       y[idy]   = PetscMax(y[idy],x[0]);
897       y[idy+1] = PetscMax(y[idy+1],x[1]);
898       y[idy+2] = PetscMax(y[idy+2],x[2]);
899       y[idy+3] = PetscMax(y[idy+3],x[3]);
900       y[idy+4] = PetscMax(y[idy+4],x[4]);
901       y[idy+5] = PetscMax(y[idy+5],x[5]);
902       x       += 6;
903     }
904 #else
905   case MAX_VALUES:
906 #endif
907   case NOT_SET_VALUES:
908     break;
909   default:
910     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
911   }
912   PetscFunctionReturn(0);
913 }
914 
Scatter_MPI1_6(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)915 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
916 {
917   PetscInt i,idx,idy;
918 
919   PetscFunctionBegin;
920   switch (addv) {
921   case INSERT_VALUES:
922   case INSERT_ALL_VALUES:
923     for (i=0; i<n; i++) {
924       idx      = *indicesx++;
925       idy      = *indicesy++;
926       y[idy]   = x[idx];
927       y[idy+1] = x[idx+1];
928       y[idy+2] = x[idx+2];
929       y[idy+3] = x[idx+3];
930       y[idy+4] = x[idx+4];
931       y[idy+5] = x[idx+5];
932     }
933     break;
934   case ADD_VALUES:
935   case ADD_ALL_VALUES:
936     for (i=0; i<n; i++) {
937       idx       = *indicesx++;
938       idy       = *indicesy++;
939       y[idy]   += x[idx];
940       y[idy+1] += x[idx+1];
941       y[idy+2] += x[idx+2];
942       y[idy+3] += x[idx+3];
943       y[idy+4] += x[idx+4];
944       y[idy+5] += x[idx+5];
945     }
946     break;
947 #if !defined(PETSC_USE_COMPLEX)
948   case MAX_VALUES:
949     for (i=0; i<n; i++) {
950       idx      = *indicesx++;
951       idy      = *indicesy++;
952       y[idy]   = PetscMax(y[idy],x[idx]);
953       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
954       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
955       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
956       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
957       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
958     }
959 #else
960   case MAX_VALUES:
961 #endif
962   case NOT_SET_VALUES:
963     break;
964   default:
965     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
966   }
967   PetscFunctionReturn(0);
968 }
969 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_7(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)970 PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
971 {
972   PetscInt i,idx;
973 
974   for (i=0; i<n; i++) {
975     idx  = *indicesx++;
976     y[0] = x[idx];
977     y[1] = x[idx+1];
978     y[2] = x[idx+2];
979     y[3] = x[idx+3];
980     y[4] = x[idx+4];
981     y[5] = x[idx+5];
982     y[6] = x[idx+6];
983     y   += 7;
984   }
985 }
986 
UnPack_MPI1_7(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)987 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
988 {
989   PetscInt i,idy;
990 
991   PetscFunctionBegin;
992   switch (addv) {
993   case INSERT_VALUES:
994   case INSERT_ALL_VALUES:
995     for (i=0; i<n; i++) {
996       idy      = *indicesy++;
997       y[idy]   = x[0];
998       y[idy+1] = x[1];
999       y[idy+2] = x[2];
1000       y[idy+3] = x[3];
1001       y[idy+4] = x[4];
1002       y[idy+5] = x[5];
1003       y[idy+6] = x[6];
1004       x       += 7;
1005     }
1006     break;
1007   case ADD_VALUES:
1008   case ADD_ALL_VALUES:
1009     for (i=0; i<n; i++) {
1010       idy       = *indicesy++;
1011       y[idy]   += x[0];
1012       y[idy+1] += x[1];
1013       y[idy+2] += x[2];
1014       y[idy+3] += x[3];
1015       y[idy+4] += x[4];
1016       y[idy+5] += x[5];
1017       y[idy+6] += x[6];
1018       x        += 7;
1019     }
1020     break;
1021 #if !defined(PETSC_USE_COMPLEX)
1022   case MAX_VALUES:
1023     for (i=0; i<n; i++) {
1024       idy      = *indicesy++;
1025       y[idy]   = PetscMax(y[idy],x[0]);
1026       y[idy+1] = PetscMax(y[idy+1],x[1]);
1027       y[idy+2] = PetscMax(y[idy+2],x[2]);
1028       y[idy+3] = PetscMax(y[idy+3],x[3]);
1029       y[idy+4] = PetscMax(y[idy+4],x[4]);
1030       y[idy+5] = PetscMax(y[idy+5],x[5]);
1031       y[idy+6] = PetscMax(y[idy+6],x[6]);
1032       x       += 7;
1033     }
1034 #else
1035   case MAX_VALUES:
1036 #endif
1037   case NOT_SET_VALUES:
1038     break;
1039   default:
1040     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1041   }
1042   PetscFunctionReturn(0);
1043 }
1044 
Scatter_MPI1_7(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1045 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1046 {
1047   PetscInt i,idx,idy;
1048 
1049   PetscFunctionBegin;
1050   switch (addv) {
1051   case INSERT_VALUES:
1052   case INSERT_ALL_VALUES:
1053     for (i=0; i<n; i++) {
1054       idx      = *indicesx++;
1055       idy      = *indicesy++;
1056       y[idy]   = x[idx];
1057       y[idy+1] = x[idx+1];
1058       y[idy+2] = x[idx+2];
1059       y[idy+3] = x[idx+3];
1060       y[idy+4] = x[idx+4];
1061       y[idy+5] = x[idx+5];
1062       y[idy+6] = x[idx+6];
1063     }
1064     break;
1065   case ADD_VALUES:
1066   case ADD_ALL_VALUES:
1067     for (i=0; i<n; i++) {
1068       idx       = *indicesx++;
1069       idy       = *indicesy++;
1070       y[idy]   += x[idx];
1071       y[idy+1] += x[idx+1];
1072       y[idy+2] += x[idx+2];
1073       y[idy+3] += x[idx+3];
1074       y[idy+4] += x[idx+4];
1075       y[idy+5] += x[idx+5];
1076       y[idy+6] += x[idx+6];
1077     }
1078     break;
1079 #if !defined(PETSC_USE_COMPLEX)
1080   case MAX_VALUES:
1081     for (i=0; i<n; i++) {
1082       idx      = *indicesx++;
1083       idy      = *indicesy++;
1084       y[idy]   = PetscMax(y[idy],x[idx]);
1085       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1086       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1087       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1088       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1089       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1090       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1091     }
1092 #else
1093   case MAX_VALUES:
1094 #endif
1095   case NOT_SET_VALUES:
1096     break;
1097   default:
1098     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_8(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1103 PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1104 {
1105   PetscInt i,idx;
1106 
1107   for (i=0; i<n; i++) {
1108     idx  = *indicesx++;
1109     y[0] = x[idx];
1110     y[1] = x[idx+1];
1111     y[2] = x[idx+2];
1112     y[3] = x[idx+3];
1113     y[4] = x[idx+4];
1114     y[5] = x[idx+5];
1115     y[6] = x[idx+6];
1116     y[7] = x[idx+7];
1117     y   += 8;
1118   }
1119 }
1120 
UnPack_MPI1_8(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1121 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1122 {
1123   PetscInt i,idy;
1124 
1125   PetscFunctionBegin;
1126   switch (addv) {
1127   case INSERT_VALUES:
1128   case INSERT_ALL_VALUES:
1129     for (i=0; i<n; i++) {
1130       idy      = *indicesy++;
1131       y[idy]   = x[0];
1132       y[idy+1] = x[1];
1133       y[idy+2] = x[2];
1134       y[idy+3] = x[3];
1135       y[idy+4] = x[4];
1136       y[idy+5] = x[5];
1137       y[idy+6] = x[6];
1138       y[idy+7] = x[7];
1139       x       += 8;
1140     }
1141     break;
1142   case ADD_VALUES:
1143   case ADD_ALL_VALUES:
1144     for (i=0; i<n; i++) {
1145       idy       = *indicesy++;
1146       y[idy]   += x[0];
1147       y[idy+1] += x[1];
1148       y[idy+2] += x[2];
1149       y[idy+3] += x[3];
1150       y[idy+4] += x[4];
1151       y[idy+5] += x[5];
1152       y[idy+6] += x[6];
1153       y[idy+7] += x[7];
1154       x        += 8;
1155     }
1156     break;
1157 #if !defined(PETSC_USE_COMPLEX)
1158   case MAX_VALUES:
1159     for (i=0; i<n; i++) {
1160       idy      = *indicesy++;
1161       y[idy]   = PetscMax(y[idy],x[0]);
1162       y[idy+1] = PetscMax(y[idy+1],x[1]);
1163       y[idy+2] = PetscMax(y[idy+2],x[2]);
1164       y[idy+3] = PetscMax(y[idy+3],x[3]);
1165       y[idy+4] = PetscMax(y[idy+4],x[4]);
1166       y[idy+5] = PetscMax(y[idy+5],x[5]);
1167       y[idy+6] = PetscMax(y[idy+6],x[6]);
1168       y[idy+7] = PetscMax(y[idy+7],x[7]);
1169       x       += 8;
1170     }
1171 #else
1172   case MAX_VALUES:
1173 #endif
1174   case NOT_SET_VALUES:
1175     break;
1176   default:
1177     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
Scatter_MPI1_8(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1182 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1183 {
1184   PetscInt i,idx,idy;
1185 
1186   PetscFunctionBegin;
1187   switch (addv) {
1188   case INSERT_VALUES:
1189   case INSERT_ALL_VALUES:
1190     for (i=0; i<n; i++) {
1191       idx      = *indicesx++;
1192       idy      = *indicesy++;
1193       y[idy]   = x[idx];
1194       y[idy+1] = x[idx+1];
1195       y[idy+2] = x[idx+2];
1196       y[idy+3] = x[idx+3];
1197       y[idy+4] = x[idx+4];
1198       y[idy+5] = x[idx+5];
1199       y[idy+6] = x[idx+6];
1200       y[idy+7] = x[idx+7];
1201     }
1202     break;
1203   case ADD_VALUES:
1204   case ADD_ALL_VALUES:
1205     for (i=0; i<n; i++) {
1206       idx       = *indicesx++;
1207       idy       = *indicesy++;
1208       y[idy]   += x[idx];
1209       y[idy+1] += x[idx+1];
1210       y[idy+2] += x[idx+2];
1211       y[idy+3] += x[idx+3];
1212       y[idy+4] += x[idx+4];
1213       y[idy+5] += x[idx+5];
1214       y[idy+6] += x[idx+6];
1215       y[idy+7] += x[idx+7];
1216     }
1217     break;
1218 #if !defined(PETSC_USE_COMPLEX)
1219   case MAX_VALUES:
1220     for (i=0; i<n; i++) {
1221       idx      = *indicesx++;
1222       idy      = *indicesy++;
1223       y[idy]   = PetscMax(y[idy],x[idx]);
1224       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1225       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1226       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1227       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1228       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1229       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1230       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1231     }
1232 #else
1233   case MAX_VALUES:
1234 #endif
1235   case NOT_SET_VALUES:
1236     break;
1237   default:
1238     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
Pack_MPI1_9(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1243 PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1244 {
1245   PetscInt i,idx;
1246 
1247   for (i=0; i<n; i++) {
1248     idx   = *indicesx++;
1249     y[0]  = x[idx];
1250     y[1]  = x[idx+1];
1251     y[2]  = x[idx+2];
1252     y[3]  = x[idx+3];
1253     y[4]  = x[idx+4];
1254     y[5]  = x[idx+5];
1255     y[6]  = x[idx+6];
1256     y[7]  = x[idx+7];
1257     y[8]  = x[idx+8];
1258     y    += 9;
1259   }
1260 }
1261 
UnPack_MPI1_9(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1262 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1263 {
1264   PetscInt i,idy;
1265 
1266   PetscFunctionBegin;
1267   switch (addv) {
1268   case INSERT_VALUES:
1269   case INSERT_ALL_VALUES:
1270     for (i=0; i<n; i++) {
1271       idy       = *indicesy++;
1272       y[idy]    = x[0];
1273       y[idy+1]  = x[1];
1274       y[idy+2]  = x[2];
1275       y[idy+3]  = x[3];
1276       y[idy+4]  = x[4];
1277       y[idy+5]  = x[5];
1278       y[idy+6]  = x[6];
1279       y[idy+7]  = x[7];
1280       y[idy+8]  = x[8];
1281       x        += 9;
1282     }
1283     break;
1284   case ADD_VALUES:
1285   case ADD_ALL_VALUES:
1286     for (i=0; i<n; i++) {
1287       idy        = *indicesy++;
1288       y[idy]    += x[0];
1289       y[idy+1]  += x[1];
1290       y[idy+2]  += x[2];
1291       y[idy+3]  += x[3];
1292       y[idy+4]  += x[4];
1293       y[idy+5]  += x[5];
1294       y[idy+6]  += x[6];
1295       y[idy+7]  += x[7];
1296       y[idy+8]  += x[8];
1297       x         += 9;
1298     }
1299     break;
1300 #if !defined(PETSC_USE_COMPLEX)
1301   case MAX_VALUES:
1302     for (i=0; i<n; i++) {
1303       idy       = *indicesy++;
1304       y[idy]    = PetscMax(y[idy],x[0]);
1305       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1306       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1307       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1308       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1309       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1310       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1311       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1312       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1313       x        += 9;
1314     }
1315 #else
1316   case MAX_VALUES:
1317 #endif
1318   case NOT_SET_VALUES:
1319     break;
1320   default:
1321     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
Scatter_MPI1_9(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1326 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1327 {
1328   PetscInt i,idx,idy;
1329 
1330   PetscFunctionBegin;
1331   switch (addv) {
1332   case INSERT_VALUES:
1333   case INSERT_ALL_VALUES:
1334     for (i=0; i<n; i++) {
1335       idx       = *indicesx++;
1336       idy       = *indicesy++;
1337       y[idy]    = x[idx];
1338       y[idy+1]  = x[idx+1];
1339       y[idy+2]  = x[idx+2];
1340       y[idy+3]  = x[idx+3];
1341       y[idy+4]  = x[idx+4];
1342       y[idy+5]  = x[idx+5];
1343       y[idy+6]  = x[idx+6];
1344       y[idy+7]  = x[idx+7];
1345       y[idy+8]  = x[idx+8];
1346     }
1347     break;
1348   case ADD_VALUES:
1349   case ADD_ALL_VALUES:
1350     for (i=0; i<n; i++) {
1351       idx        = *indicesx++;
1352       idy        = *indicesy++;
1353       y[idy]    += x[idx];
1354       y[idy+1]  += x[idx+1];
1355       y[idy+2]  += x[idx+2];
1356       y[idy+3]  += x[idx+3];
1357       y[idy+4]  += x[idx+4];
1358       y[idy+5]  += x[idx+5];
1359       y[idy+6]  += x[idx+6];
1360       y[idy+7]  += x[idx+7];
1361       y[idy+8]  += x[idx+8];
1362     }
1363     break;
1364 #if !defined(PETSC_USE_COMPLEX)
1365   case MAX_VALUES:
1366     for (i=0; i<n; i++) {
1367       idx       = *indicesx++;
1368       idy       = *indicesy++;
1369       y[idy]    = PetscMax(y[idy],x[idx]);
1370       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1371       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1372       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1373       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1374       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1375       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1376       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1377       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1378     }
1379 #else
1380   case MAX_VALUES:
1381 #endif
1382   case NOT_SET_VALUES:
1383     break;
1384   default:
1385     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
Pack_MPI1_10(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1390 PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1391 {
1392   PetscInt i,idx;
1393 
1394   for (i=0; i<n; i++) {
1395     idx   = *indicesx++;
1396     y[0]  = x[idx];
1397     y[1]  = x[idx+1];
1398     y[2]  = x[idx+2];
1399     y[3]  = x[idx+3];
1400     y[4]  = x[idx+4];
1401     y[5]  = x[idx+5];
1402     y[6]  = x[idx+6];
1403     y[7]  = x[idx+7];
1404     y[8]  = x[idx+8];
1405     y[9]  = x[idx+9];
1406     y    += 10;
1407   }
1408 }
1409 
UnPack_MPI1_10(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1410 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1411 {
1412   PetscInt i,idy;
1413 
1414   PetscFunctionBegin;
1415   switch (addv) {
1416   case INSERT_VALUES:
1417   case INSERT_ALL_VALUES:
1418     for (i=0; i<n; i++) {
1419       idy       = *indicesy++;
1420       y[idy]    = x[0];
1421       y[idy+1]  = x[1];
1422       y[idy+2]  = x[2];
1423       y[idy+3]  = x[3];
1424       y[idy+4]  = x[4];
1425       y[idy+5]  = x[5];
1426       y[idy+6]  = x[6];
1427       y[idy+7]  = x[7];
1428       y[idy+8]  = x[8];
1429       y[idy+9]  = x[9];
1430       x        += 10;
1431     }
1432     break;
1433   case ADD_VALUES:
1434   case ADD_ALL_VALUES:
1435     for (i=0; i<n; i++) {
1436       idy        = *indicesy++;
1437       y[idy]    += x[0];
1438       y[idy+1]  += x[1];
1439       y[idy+2]  += x[2];
1440       y[idy+3]  += x[3];
1441       y[idy+4]  += x[4];
1442       y[idy+5]  += x[5];
1443       y[idy+6]  += x[6];
1444       y[idy+7]  += x[7];
1445       y[idy+8]  += x[8];
1446       y[idy+9]  += x[9];
1447       x         += 10;
1448     }
1449     break;
1450 #if !defined(PETSC_USE_COMPLEX)
1451   case MAX_VALUES:
1452     for (i=0; i<n; i++) {
1453       idy       = *indicesy++;
1454       y[idy]    = PetscMax(y[idy],x[0]);
1455       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1456       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1457       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1458       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1459       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1460       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1461       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1462       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1463       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1464       x        += 10;
1465     }
1466 #else
1467   case MAX_VALUES:
1468 #endif
1469   case NOT_SET_VALUES:
1470     break;
1471   default:
1472     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
Scatter_MPI1_10(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1477 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1478 {
1479   PetscInt i,idx,idy;
1480 
1481   PetscFunctionBegin;
1482   switch (addv) {
1483   case INSERT_VALUES:
1484   case INSERT_ALL_VALUES:
1485     for (i=0; i<n; i++) {
1486       idx       = *indicesx++;
1487       idy       = *indicesy++;
1488       y[idy]    = x[idx];
1489       y[idy+1]  = x[idx+1];
1490       y[idy+2]  = x[idx+2];
1491       y[idy+3]  = x[idx+3];
1492       y[idy+4]  = x[idx+4];
1493       y[idy+5]  = x[idx+5];
1494       y[idy+6]  = x[idx+6];
1495       y[idy+7]  = x[idx+7];
1496       y[idy+8]  = x[idx+8];
1497       y[idy+9]  = x[idx+9];
1498     }
1499     break;
1500   case ADD_VALUES:
1501   case ADD_ALL_VALUES:
1502     for (i=0; i<n; i++) {
1503       idx        = *indicesx++;
1504       idy        = *indicesy++;
1505       y[idy]    += x[idx];
1506       y[idy+1]  += x[idx+1];
1507       y[idy+2]  += x[idx+2];
1508       y[idy+3]  += x[idx+3];
1509       y[idy+4]  += x[idx+4];
1510       y[idy+5]  += x[idx+5];
1511       y[idy+6]  += x[idx+6];
1512       y[idy+7]  += x[idx+7];
1513       y[idy+8]  += x[idx+8];
1514       y[idy+9]  += x[idx+9];
1515     }
1516     break;
1517 #if !defined(PETSC_USE_COMPLEX)
1518   case MAX_VALUES:
1519     for (i=0; i<n; i++) {
1520       idx       = *indicesx++;
1521       idy       = *indicesy++;
1522       y[idy]    = PetscMax(y[idy],x[idx]);
1523       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1524       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1525       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1526       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1527       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1528       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1529       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1530       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1531       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1532     }
1533 #else
1534   case MAX_VALUES:
1535 #endif
1536   case NOT_SET_VALUES:
1537     break;
1538   default:
1539     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
Pack_MPI1_11(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1544 PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1545 {
1546   PetscInt i,idx;
1547 
1548   for (i=0; i<n; i++) {
1549     idx   = *indicesx++;
1550     y[0]  = x[idx];
1551     y[1]  = x[idx+1];
1552     y[2]  = x[idx+2];
1553     y[3]  = x[idx+3];
1554     y[4]  = x[idx+4];
1555     y[5]  = x[idx+5];
1556     y[6]  = x[idx+6];
1557     y[7]  = x[idx+7];
1558     y[8]  = x[idx+8];
1559     y[9]  = x[idx+9];
1560     y[10] = x[idx+10];
1561     y    += 11;
1562   }
1563 }
1564 
UnPack_MPI1_11(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1565 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1566 {
1567   PetscInt i,idy;
1568 
1569   PetscFunctionBegin;
1570   switch (addv) {
1571   case INSERT_VALUES:
1572   case INSERT_ALL_VALUES:
1573     for (i=0; i<n; i++) {
1574       idy       = *indicesy++;
1575       y[idy]    = x[0];
1576       y[idy+1]  = x[1];
1577       y[idy+2]  = x[2];
1578       y[idy+3]  = x[3];
1579       y[idy+4]  = x[4];
1580       y[idy+5]  = x[5];
1581       y[idy+6]  = x[6];
1582       y[idy+7]  = x[7];
1583       y[idy+8]  = x[8];
1584       y[idy+9]  = x[9];
1585       y[idy+10] = x[10];
1586       x        += 11;
1587     }
1588     break;
1589   case ADD_VALUES:
1590   case ADD_ALL_VALUES:
1591     for (i=0; i<n; i++) {
1592       idy        = *indicesy++;
1593       y[idy]    += x[0];
1594       y[idy+1]  += x[1];
1595       y[idy+2]  += x[2];
1596       y[idy+3]  += x[3];
1597       y[idy+4]  += x[4];
1598       y[idy+5]  += x[5];
1599       y[idy+6]  += x[6];
1600       y[idy+7]  += x[7];
1601       y[idy+8]  += x[8];
1602       y[idy+9]  += x[9];
1603       y[idy+10] += x[10];
1604       x         += 11;
1605     }
1606     break;
1607 #if !defined(PETSC_USE_COMPLEX)
1608   case MAX_VALUES:
1609     for (i=0; i<n; i++) {
1610       idy       = *indicesy++;
1611       y[idy]    = PetscMax(y[idy],x[0]);
1612       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1613       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1614       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1615       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1616       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1617       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1618       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1619       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1620       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1621       y[idy+10] = PetscMax(y[idy+10],x[10]);
1622       x        += 11;
1623     }
1624 #else
1625   case MAX_VALUES:
1626 #endif
1627   case NOT_SET_VALUES:
1628     break;
1629   default:
1630     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 
Scatter_MPI1_11(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1635 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1636 {
1637   PetscInt i,idx,idy;
1638 
1639   PetscFunctionBegin;
1640   switch (addv) {
1641   case INSERT_VALUES:
1642   case INSERT_ALL_VALUES:
1643     for (i=0; i<n; i++) {
1644       idx       = *indicesx++;
1645       idy       = *indicesy++;
1646       y[idy]    = x[idx];
1647       y[idy+1]  = x[idx+1];
1648       y[idy+2]  = x[idx+2];
1649       y[idy+3]  = x[idx+3];
1650       y[idy+4]  = x[idx+4];
1651       y[idy+5]  = x[idx+5];
1652       y[idy+6]  = x[idx+6];
1653       y[idy+7]  = x[idx+7];
1654       y[idy+8]  = x[idx+8];
1655       y[idy+9]  = x[idx+9];
1656       y[idy+10] = x[idx+10];
1657     }
1658     break;
1659   case ADD_VALUES:
1660   case ADD_ALL_VALUES:
1661     for (i=0; i<n; i++) {
1662       idx        = *indicesx++;
1663       idy        = *indicesy++;
1664       y[idy]    += x[idx];
1665       y[idy+1]  += x[idx+1];
1666       y[idy+2]  += x[idx+2];
1667       y[idy+3]  += x[idx+3];
1668       y[idy+4]  += x[idx+4];
1669       y[idy+5]  += x[idx+5];
1670       y[idy+6]  += x[idx+6];
1671       y[idy+7]  += x[idx+7];
1672       y[idy+8]  += x[idx+8];
1673       y[idy+9]  += x[idx+9];
1674       y[idy+10] += x[idx+10];
1675     }
1676     break;
1677 #if !defined(PETSC_USE_COMPLEX)
1678   case MAX_VALUES:
1679     for (i=0; i<n; i++) {
1680       idx       = *indicesx++;
1681       idy       = *indicesy++;
1682       y[idy]    = PetscMax(y[idy],x[idx]);
1683       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1684       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1685       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1686       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1687       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1688       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1689       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1690       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1691       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1692       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1693     }
1694 #else
1695   case MAX_VALUES:
1696 #endif
1697   case NOT_SET_VALUES:
1698     break;
1699   default:
1700     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1701   }
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_12(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1706 PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1707 {
1708   PetscInt i,idx;
1709 
1710   for (i=0; i<n; i++) {
1711     idx   = *indicesx++;
1712     y[0]  = x[idx];
1713     y[1]  = x[idx+1];
1714     y[2]  = x[idx+2];
1715     y[3]  = x[idx+3];
1716     y[4]  = x[idx+4];
1717     y[5]  = x[idx+5];
1718     y[6]  = x[idx+6];
1719     y[7]  = x[idx+7];
1720     y[8]  = x[idx+8];
1721     y[9]  = x[idx+9];
1722     y[10] = x[idx+10];
1723     y[11] = x[idx+11];
1724     y    += 12;
1725   }
1726 }
1727 
UnPack_MPI1_12(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1728 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1729 {
1730   PetscInt i,idy;
1731 
1732   PetscFunctionBegin;
1733   switch (addv) {
1734   case INSERT_VALUES:
1735   case INSERT_ALL_VALUES:
1736     for (i=0; i<n; i++) {
1737       idy       = *indicesy++;
1738       y[idy]    = x[0];
1739       y[idy+1]  = x[1];
1740       y[idy+2]  = x[2];
1741       y[idy+3]  = x[3];
1742       y[idy+4]  = x[4];
1743       y[idy+5]  = x[5];
1744       y[idy+6]  = x[6];
1745       y[idy+7]  = x[7];
1746       y[idy+8]  = x[8];
1747       y[idy+9]  = x[9];
1748       y[idy+10] = x[10];
1749       y[idy+11] = x[11];
1750       x        += 12;
1751     }
1752     break;
1753   case ADD_VALUES:
1754   case ADD_ALL_VALUES:
1755     for (i=0; i<n; i++) {
1756       idy        = *indicesy++;
1757       y[idy]    += x[0];
1758       y[idy+1]  += x[1];
1759       y[idy+2]  += x[2];
1760       y[idy+3]  += x[3];
1761       y[idy+4]  += x[4];
1762       y[idy+5]  += x[5];
1763       y[idy+6]  += x[6];
1764       y[idy+7]  += x[7];
1765       y[idy+8]  += x[8];
1766       y[idy+9]  += x[9];
1767       y[idy+10] += x[10];
1768       y[idy+11] += x[11];
1769       x         += 12;
1770     }
1771     break;
1772 #if !defined(PETSC_USE_COMPLEX)
1773   case MAX_VALUES:
1774     for (i=0; i<n; i++) {
1775       idy       = *indicesy++;
1776       y[idy]    = PetscMax(y[idy],x[0]);
1777       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1778       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1779       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1780       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1781       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1782       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1783       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1784       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1785       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1786       y[idy+10] = PetscMax(y[idy+10],x[10]);
1787       y[idy+11] = PetscMax(y[idy+11],x[11]);
1788       x        += 12;
1789     }
1790 #else
1791   case MAX_VALUES:
1792 #endif
1793   case NOT_SET_VALUES:
1794     break;
1795   default:
1796     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1797   }
1798   PetscFunctionReturn(0);
1799 }
1800 
Scatter_MPI1_12(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1801 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1802 {
1803   PetscInt i,idx,idy;
1804 
1805   PetscFunctionBegin;
1806   switch (addv) {
1807   case INSERT_VALUES:
1808   case INSERT_ALL_VALUES:
1809     for (i=0; i<n; i++) {
1810       idx       = *indicesx++;
1811       idy       = *indicesy++;
1812       y[idy]    = x[idx];
1813       y[idy+1]  = x[idx+1];
1814       y[idy+2]  = x[idx+2];
1815       y[idy+3]  = x[idx+3];
1816       y[idy+4]  = x[idx+4];
1817       y[idy+5]  = x[idx+5];
1818       y[idy+6]  = x[idx+6];
1819       y[idy+7]  = x[idx+7];
1820       y[idy+8]  = x[idx+8];
1821       y[idy+9]  = x[idx+9];
1822       y[idy+10] = x[idx+10];
1823       y[idy+11] = x[idx+11];
1824     }
1825     break;
1826   case ADD_VALUES:
1827   case ADD_ALL_VALUES:
1828     for (i=0; i<n; i++) {
1829       idx        = *indicesx++;
1830       idy        = *indicesy++;
1831       y[idy]    += x[idx];
1832       y[idy+1]  += x[idx+1];
1833       y[idy+2]  += x[idx+2];
1834       y[idy+3]  += x[idx+3];
1835       y[idy+4]  += x[idx+4];
1836       y[idy+5]  += x[idx+5];
1837       y[idy+6]  += x[idx+6];
1838       y[idy+7]  += x[idx+7];
1839       y[idy+8]  += x[idx+8];
1840       y[idy+9]  += x[idx+9];
1841       y[idy+10] += x[idx+10];
1842       y[idy+11] += x[idx+11];
1843     }
1844     break;
1845 #if !defined(PETSC_USE_COMPLEX)
1846   case MAX_VALUES:
1847     for (i=0; i<n; i++) {
1848       idx       = *indicesx++;
1849       idy       = *indicesy++;
1850       y[idy]    = PetscMax(y[idy],x[idx]);
1851       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1852       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1853       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1854       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1855       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1856       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1857       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1858       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1859       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1860       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1861       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1862     }
1863 #else
1864   case MAX_VALUES:
1865 #endif
1866   case NOT_SET_VALUES:
1867     break;
1868   default:
1869     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 /* ----------------------------------------------------------------------------------------------- */
Pack_MPI1_bs(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1875 PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1876 {
1877   PetscInt       i,idx;
1878   PetscErrorCode ierr;
1879 
1880   for (i=0; i<n; i++) {
1881     idx   = *indicesx++;
1882     ierr = PetscArraycpy(y,x + idx,bs);CHKERRV(ierr);
1883     y    += bs;
1884   }
1885 }
1886 
UnPack_MPI1_bs(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1887 PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1888 {
1889   PetscInt i,idy,j;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   switch (addv) {
1894   case INSERT_VALUES:
1895   case INSERT_ALL_VALUES:
1896     for (i=0; i<n; i++) {
1897       idy       = *indicesy++;
1898       ierr = PetscArraycpy(y + idy,x,bs);CHKERRQ(ierr);
1899       x        += bs;
1900     }
1901     break;
1902   case ADD_VALUES:
1903   case ADD_ALL_VALUES:
1904     for (i=0; i<n; i++) {
1905       idy        = *indicesy++;
1906       for (j=0; j<bs; j++) y[idy+j] += x[j];
1907       x         += bs;
1908     }
1909     break;
1910 #if !defined(PETSC_USE_COMPLEX)
1911   case MAX_VALUES:
1912     for (i=0; i<n; i++) {
1913       idy = *indicesy++;
1914       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1915       x  += bs;
1916     }
1917 #else
1918   case MAX_VALUES:
1919 #endif
1920   case NOT_SET_VALUES:
1921     break;
1922   default:
1923     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
Scatter_MPI1_bs(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1928 PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1929 {
1930   PetscInt i,idx,idy,j;
1931   PetscErrorCode ierr;
1932 
1933   PetscFunctionBegin;
1934   switch (addv) {
1935   case INSERT_VALUES:
1936   case INSERT_ALL_VALUES:
1937     for (i=0; i<n; i++) {
1938       idx       = *indicesx++;
1939       idy       = *indicesy++;
1940       ierr = PetscArraycpy(y + idy, x + idx,bs);CHKERRQ(ierr);
1941     }
1942     break;
1943   case ADD_VALUES:
1944   case ADD_ALL_VALUES:
1945     for (i=0; i<n; i++) {
1946       idx        = *indicesx++;
1947       idy        = *indicesy++;
1948       for (j=0; j<bs; j++)  y[idy+j] += x[idx+j];
1949     }
1950     break;
1951 #if !defined(PETSC_USE_COMPLEX)
1952   case MAX_VALUES:
1953     for (i=0; i<n; i++) {
1954       idx       = *indicesx++;
1955       idy       = *indicesy++;
1956       for (j=0; j<bs; j++)  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
1957     }
1958 #else
1959   case MAX_VALUES:
1960 #endif
1961   case NOT_SET_VALUES:
1962     break;
1963   default:
1964     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 /* Create the VecScatterBegin/End_P for our chosen block sizes */
1970 #define BS 1
1971 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1972 #define BS 2
1973 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1974 #define BS 3
1975 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1976 #define BS 4
1977 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1978 #define BS 5
1979 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1980 #define BS 6
1981 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1982 #define BS 7
1983 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1984 #define BS 8
1985 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1986 #define BS 9
1987 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1988 #define BS 10
1989 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1990 #define BS 11
1991 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1992 #define BS 12
1993 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1994 #define BS bs
1995 #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1996 
1997 /* ==========================================================================================*/
1998 
1999 /*              create parallel to sequential scatter context                           */
2000 
2001 PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2002 
2003 /*
2004    bs indicates how many elements there are in each block. Normally this would be 1.
2005 
2006    contains check that PetscMPIInt can handle the sizes needed
2007 */
VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2008 PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2009 {
2010   VecScatter_MPI_General *from,*to;
2011   PetscMPIInt            size,rank,imdex,tag,n;
2012   PetscInt               *source = NULL,*owners = NULL,nxr;
2013   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
2014   PetscMPIInt            *nprocs = NULL,nrecvs;
2015   PetscInt               i,j,idx,nsends;
2016   PetscMPIInt            *owner = NULL;
2017   PetscInt               *starts = NULL,count,slen;
2018   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2019   PetscMPIInt            *onodes1,*olengths1;
2020   MPI_Comm               comm;
2021   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2022   MPI_Status             recv_status,*send_status;
2023   PetscErrorCode         ierr;
2024 
2025   PetscFunctionBegin;
2026   ierr   = PetscObjectGetNewTag((PetscObject)ctx,&tag);CHKERRQ(ierr);
2027   ierr   = PetscObjectGetComm((PetscObject)xin,&comm);CHKERRQ(ierr);
2028   ierr   = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2029   ierr   = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2030   owners = xin->map->range;
2031   ierr   = VecGetSize(yin,&lengthy);CHKERRQ(ierr);
2032   ierr   = VecGetSize(xin,&lengthx);CHKERRQ(ierr);
2033 
2034   /*  first count number of contributors to each processor */
2035   /*  owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2036   ierr = PetscMalloc2(size,&nprocs,nx,&owner);CHKERRQ(ierr);
2037   ierr = PetscArrayzero(nprocs,size);CHKERRQ(ierr);
2038 
2039   j      = 0;
2040   nsends = 0;
2041   for (i=0; i<nx; i++) {
2042     idx = bs*inidx[i];
2043     if (idx < owners[j]) j = 0;
2044     for (; j<size; j++) {
2045       if (idx < owners[j+1]) {
2046         if (!nprocs[j]++) nsends++;
2047         owner[i] = j;
2048         break;
2049       }
2050     }
2051     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2052   }
2053 
2054   nprocslocal  = nprocs[rank];
2055   nprocs[rank] = 0;
2056   if (nprocslocal) nsends--;
2057   /* inform other processors of number of messages and max length*/
2058   ierr = PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);CHKERRQ(ierr);
2059   ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
2060   ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
2061   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2062 
2063   /* post receives:   */
2064   ierr  = PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);CHKERRQ(ierr);
2065   count = 0;
2066   for (i=0; i<nrecvs; i++) {
2067     ierr   = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
2068     count += olengths1[i];
2069   }
2070 
2071   /* do sends:
2072      1) starts[i] gives the starting index in svalues for stuff going to
2073      the ith processor
2074   */
2075   nxr = 0;
2076   for (i=0; i<nx; i++) {
2077     if (owner[i] != rank) nxr++;
2078   }
2079   ierr = PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);CHKERRQ(ierr);
2080 
2081   starts[0]  = 0;
2082   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2083   for (i=0; i<nx; i++) {
2084     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2085   }
2086   starts[0] = 0;
2087   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2088   count = 0;
2089   for (i=0; i<size; i++) {
2090     if (nprocs[i]) {
2091       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
2092     }
2093   }
2094 
2095   /*  wait on receives */
2096   count = nrecvs;
2097   slen  = 0;
2098   while (count) {
2099     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
2100     /* unpack receives into our local space */
2101     ierr  = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
2102     slen += n;
2103     count--;
2104   }
2105 
2106   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2107 
2108   /* allocate entire send scatter context */
2109   ierr  = PetscNewLog(ctx,&to);CHKERRQ(ierr);
2110   to->n = nrecvs;
2111 
2112   ierr  = PetscMalloc1(nrecvs,&to->requests);CHKERRQ(ierr);
2113   ierr  = PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);CHKERRQ(ierr);
2114   ierr  = PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);CHKERRQ(ierr);
2115 
2116   ctx->todata   = (void*)to;
2117   to->starts[0] = 0;
2118 
2119   if (nrecvs) {
2120     /* move the data into the send scatter */
2121     base     = owners[rank];
2122     rsvalues = rvalues;
2123     for (i=0; i<nrecvs; i++) {
2124       to->starts[i+1] = to->starts[i] + olengths1[i];
2125       to->procs[i]    = onodes1[i];
2126       values = rsvalues;
2127       rsvalues += olengths1[i];
2128       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2129     }
2130   }
2131   ierr = PetscFree(olengths1);CHKERRQ(ierr);
2132   ierr = PetscFree(onodes1);CHKERRQ(ierr);
2133   ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr);
2134 
2135   /* allocate entire receive scatter context */
2136   ierr = PetscNewLog(ctx,&from);CHKERRQ(ierr);
2137   from->n = nsends;
2138 
2139   ierr = PetscMalloc1(nsends,&from->requests);CHKERRQ(ierr);
2140   ierr = PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);CHKERRQ(ierr);
2141   ctx->fromdata = (void*)from;
2142 
2143   /* move data into receive scatter */
2144   ierr = PetscMalloc2(size,&lowner,nsends+1,&start);CHKERRQ(ierr);
2145   count = 0; from->starts[0] = start[0] = 0;
2146   for (i=0; i<size; i++) {
2147     if (nprocs[i]) {
2148       lowner[i]            = count;
2149       from->procs[count++] = i;
2150       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
2151     }
2152   }
2153 
2154   for (i=0; i<nx; i++) {
2155     if (owner[i] != rank) {
2156       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2157       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2158     }
2159   }
2160   ierr = PetscFree2(lowner,start);CHKERRQ(ierr);
2161   ierr = PetscFree2(nprocs,owner);CHKERRQ(ierr);
2162 
2163   /* wait on sends */
2164   if (nsends) {
2165     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
2166     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
2167     ierr = PetscFree(send_status);CHKERRQ(ierr);
2168   }
2169   ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr);
2170 
2171   if (nprocslocal) {
2172     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2173     /* we have a scatter to ourselves */
2174     ierr = PetscMalloc1(nt,&to->local.vslots);CHKERRQ(ierr);
2175     ierr = PetscMalloc1(nt,&from->local.vslots);CHKERRQ(ierr);
2176     nt   = 0;
2177     for (i=0; i<nx; i++) {
2178       idx = bs*inidx[i];
2179       if (idx >= owners[rank] && idx < owners[rank+1]) {
2180         to->local.vslots[nt]     = idx - owners[rank];
2181         from->local.vslots[nt++] = bs*inidy[i];
2182         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2183       }
2184     }
2185     ierr = PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));CHKERRQ(ierr);
2186   } else {
2187     from->local.n      = 0;
2188     from->local.vslots = NULL;
2189     to->local.n        = 0;
2190     to->local.vslots   = NULL;
2191   }
2192 
2193   from->local.nonmatching_computed = PETSC_FALSE;
2194   from->local.n_nonmatching        = 0;
2195   from->local.slots_nonmatching    = NULL;
2196   to->local.nonmatching_computed   = PETSC_FALSE;
2197   to->local.n_nonmatching          = 0;
2198   to->local.slots_nonmatching      = NULL;
2199 
2200   from->format = VEC_SCATTER_MPI_GENERAL;
2201   to->format   = VEC_SCATTER_MPI_GENERAL;
2202   from->bs     = bs;
2203   to->bs       = bs;
2204 
2205   ierr = VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);CHKERRQ(ierr);
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 /*
2210    bs indicates how many elements there are in each block. Normally this would be 1.
2211 */
VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General * from,VecScatter_MPI_General * to,VecScatter ctx)2212 PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2213 {
2214   MPI_Comm       comm;
2215   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2216   PetscInt       bs   = to->bs;
2217   PetscMPIInt    size;
2218   PetscInt       i, n;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   ierr = PetscObjectGetComm((PetscObject)ctx,&comm);CHKERRQ(ierr);
2223   ierr = PetscObjectGetNewTag((PetscObject)ctx,&tagr);CHKERRQ(ierr);
2224   ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;
2225 
2226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2227   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2228   to->contiq = PETSC_FALSE;
2229   n = from->starts[from->n];
2230   from->contiq = PETSC_TRUE;
2231   for (i=1; i<n; i++) {
2232     if (from->indices[i] != from->indices[i-1] + bs) {
2233       from->contiq = PETSC_FALSE;
2234       break;
2235     }
2236   }
2237 
2238   {
2239     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2240     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2241     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2242     MPI_Request *rev_swaits,*rev_rwaits;
2243     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2244 
2245     /* allocate additional wait variables for the "reverse" scatter */
2246     ierr = PetscMalloc1(to->n,&rev_rwaits);CHKERRQ(ierr);
2247     ierr = PetscMalloc1(from->n,&rev_swaits);CHKERRQ(ierr);
2248     to->rev_requests   = rev_rwaits;
2249     from->rev_requests = rev_swaits;
2250 
2251     for (i=0; i<from->n; i++) {
2252       ierr = MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);CHKERRQ(ierr);
2253     }
2254 
2255     for (i=0; i<to->n; i++) {
2256       ierr = MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
2257     }
2258     /* Register receives for scatter and reverse */
2259     for (i=0; i<from->n; i++) {
2260       ierr = MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
2261     }
2262     for (i=0; i<to->n; i++) {
2263       ierr = MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);CHKERRQ(ierr);
2264     }
2265     ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2266   }
2267   ierr = PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);CHKERRQ(ierr);
2268 
2269   if (PetscDefined(USE_DEBUG)) {
2270     ierr = MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
2271     ierr = MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
2272     if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2273   }
2274 
2275   switch (bs) {
2276   case 12:
2277     ctx->ops->begin = VecScatterBeginMPI1_12;
2278     ctx->ops->end   = VecScatterEndMPI1_12;
2279     break;
2280   case 11:
2281     ctx->ops->begin = VecScatterBeginMPI1_11;
2282     ctx->ops->end   = VecScatterEndMPI1_11;
2283     break;
2284   case 10:
2285     ctx->ops->begin = VecScatterBeginMPI1_10;
2286     ctx->ops->end   = VecScatterEndMPI1_10;
2287     break;
2288   case 9:
2289     ctx->ops->begin = VecScatterBeginMPI1_9;
2290     ctx->ops->end   = VecScatterEndMPI1_9;
2291     break;
2292   case 8:
2293     ctx->ops->begin = VecScatterBeginMPI1_8;
2294     ctx->ops->end   = VecScatterEndMPI1_8;
2295     break;
2296   case 7:
2297     ctx->ops->begin = VecScatterBeginMPI1_7;
2298     ctx->ops->end   = VecScatterEndMPI1_7;
2299     break;
2300   case 6:
2301     ctx->ops->begin = VecScatterBeginMPI1_6;
2302     ctx->ops->end   = VecScatterEndMPI1_6;
2303     break;
2304   case 5:
2305     ctx->ops->begin = VecScatterBeginMPI1_5;
2306     ctx->ops->end   = VecScatterEndMPI1_5;
2307     break;
2308   case 4:
2309     ctx->ops->begin = VecScatterBeginMPI1_4;
2310     ctx->ops->end   = VecScatterEndMPI1_4;
2311     break;
2312   case 3:
2313     ctx->ops->begin = VecScatterBeginMPI1_3;
2314     ctx->ops->end   = VecScatterEndMPI1_3;
2315     break;
2316   case 2:
2317     ctx->ops->begin = VecScatterBeginMPI1_2;
2318     ctx->ops->end   = VecScatterEndMPI1_2;
2319     break;
2320   case 1:
2321     ctx->ops->begin = VecScatterBeginMPI1_1;
2322     ctx->ops->end   = VecScatterEndMPI1_1;
2323     break;
2324   default:
2325     ctx->ops->begin = VecScatterBeginMPI1_bs;
2326     ctx->ops->end   = VecScatterEndMPI1_bs;
2327 
2328   }
2329   ctx->ops->view = VecScatterView_MPI_MPI1;
2330   /* try to optimize PtoP vecscatter with memcpy's */
2331   ierr = VecScatterMemcpyPlanCreate_PtoP(to,from);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 
2336 /* ------------------------------------------------------------------------------------*/
2337 /*
2338          Scatter from local Seq vectors to a parallel vector.
2339          Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2340          reverses the result.
2341 */
VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2342 PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2343 {
2344   PetscErrorCode         ierr;
2345   MPI_Request            *waits;
2346   VecScatter_MPI_General *to,*from;
2347 
2348   PetscFunctionBegin;
2349   ierr          = VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);CHKERRQ(ierr);
2350   to            = (VecScatter_MPI_General*)ctx->fromdata;
2351   from          = (VecScatter_MPI_General*)ctx->todata;
2352   ctx->todata   = (void*)to;
2353   ctx->fromdata = (void*)from;
2354   /* these two are special, they are ALWAYS stored in to struct */
2355   to->sstatus   = from->sstatus;
2356   to->rstatus   = from->rstatus;
2357 
2358   from->sstatus = NULL;
2359   from->rstatus = NULL;
2360 
2361   waits              = from->rev_requests;
2362   from->rev_requests = from->requests;
2363   from->requests     = waits;
2364   waits              = to->rev_requests;
2365   to->rev_requests   = to->requests;
2366   to->requests       = waits;
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 /* ---------------------------------------------------------------------------------*/
VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2371 PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2372 {
2373   PetscErrorCode ierr;
2374   PetscMPIInt    size,rank,tag,imdex,n;
2375   PetscInt       *owners = xin->map->range;
2376   PetscMPIInt    *nprocs = NULL;
2377   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2378   PetscMPIInt    *owner   = NULL;
2379   PetscInt       *starts  = NULL,count,slen;
2380   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2381   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2382   MPI_Comm       comm;
2383   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2384   MPI_Status     recv_status,*send_status = NULL;
2385   PetscBool      duplicate = PETSC_FALSE;
2386   PetscBool      found = PETSC_FALSE;
2387 
2388   PetscFunctionBegin;
2389   ierr = PetscObjectGetNewTag((PetscObject)ctx,&tag);CHKERRQ(ierr);
2390   ierr = PetscObjectGetComm((PetscObject)xin,&comm);CHKERRQ(ierr);
2391   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2392   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2393   if (size == 1) {
2394     ierr = VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);CHKERRQ(ierr);
2395     PetscFunctionReturn(0);
2396   }
2397 
2398   /*
2399      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2400      They then call the StoPScatterCreate()
2401   */
2402   /*  first count number of contributors to each processor */
2403   ierr = PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);CHKERRQ(ierr);
2404   ierr = PetscArrayzero(nprocs,size);CHKERRQ(ierr);
2405 
2406   lastidx = -1;
2407   j       = 0;
2408   for (i=0; i<nx; i++) {
2409     /* if indices are NOT locally sorted, need to start search at the beginning */
2410     if (lastidx > (idx = bs*inidx[i])) j = 0;
2411     lastidx = idx;
2412     for (; j<size; j++) {
2413       if (idx >= owners[j] && idx < owners[j+1]) {
2414         nprocs[j]++;
2415         owner[i] = j;
2416         found = PETSC_TRUE;
2417         break;
2418       }
2419     }
2420     if (PetscUnlikelyDebug(!found)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2421     found = PETSC_FALSE;
2422   }
2423   nsends = 0;
2424   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2425 
2426   /* inform other processors of number of messages and max length*/
2427   ierr = PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);CHKERRQ(ierr);
2428   ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
2429   ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
2430   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2431 
2432   /* post receives:   */
2433   ierr = PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
2434 
2435   count = 0;
2436   for (i=0; i<nrecvs; i++) {
2437     ierr = MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
2438     count += olengths1[i];
2439   }
2440   ierr = PetscFree(onodes1);CHKERRQ(ierr);
2441 
2442   /* do sends:
2443       1) starts[i] gives the starting index in svalues for stuff going to
2444          the ith processor
2445   */
2446   starts[0]= 0;
2447   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2448   for (i=0; i<nx; i++) {
2449     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2450     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2451   }
2452 
2453   starts[0] = 0;
2454   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2455   count = 0;
2456   for (i=0; i<size; i++) {
2457     if (nprocs[i]) {
2458       ierr = MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
2459       count++;
2460     }
2461   }
2462   ierr = PetscFree3(nprocs,owner,starts);CHKERRQ(ierr);
2463 
2464   /*  wait on receives */
2465   count = nrecvs;
2466   slen  = 0;
2467   while (count) {
2468     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
2469     /* unpack receives into our local space */
2470     ierr  = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
2471     slen += n/2;
2472     count--;
2473   }
2474   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2475 
2476   ierr     = PetscMalloc2(slen,&local_inidx,slen,&local_inidy);CHKERRQ(ierr);
2477   base     = owners[rank];
2478   count    = 0;
2479   rsvalues = rvalues;
2480   for (i=0; i<nrecvs; i++) {
2481     values    = rsvalues;
2482     rsvalues += 2*olengths1[i];
2483     for (j=0; j<olengths1[i]; j++) {
2484       local_inidx[count]   = values[2*j] - base;
2485       local_inidy[count++] = values[2*j+1];
2486     }
2487   }
2488   ierr = PetscFree(olengths1);CHKERRQ(ierr);
2489 
2490   /* wait on sends */
2491   if (nsends) {ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);}
2492   ierr = PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);CHKERRQ(ierr);
2493 
2494   /*
2495      should sort and remove duplicates from local_inidx,local_inidy
2496   */
2497 
2498 #if defined(do_it_slow)
2499   /* sort on the from index */
2500   ierr  = PetscSortIntWithArray(slen,local_inidx,local_inidy);CHKERRQ(ierr);
2501   start = 0;
2502   while (start < slen) {
2503     count = start+1;
2504     last  = local_inidx[start];
2505     while (count < slen && last == local_inidx[count]) count++;
2506     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2507       /* sort on to index */
2508       ierr = PetscSortInt(count-start,local_inidy+start);CHKERRQ(ierr);
2509     }
2510     /* remove duplicates; not most efficient way, but probably good enough */
2511     i = start;
2512     while (i < count-1) {
2513       if (local_inidy[i] != local_inidy[i+1]) i++;
2514       else { /* found a duplicate */
2515         duplicate = PETSC_TRUE;
2516         for (j=i; j<slen-1; j++) {
2517           local_inidx[j] = local_inidx[j+1];
2518           local_inidy[j] = local_inidy[j+1];
2519         }
2520         slen--;
2521         count--;
2522       }
2523     }
2524     start = count;
2525   }
2526 #endif
2527   if (duplicate) {
2528     ierr = PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");CHKERRQ(ierr);
2529   }
2530   ierr = VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);CHKERRQ(ierr);
2531   ierr = PetscFree2(local_inidx,local_inidy);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
VecScatterSetUp_MPI1(VecScatter ctx)2535 PetscErrorCode VecScatterSetUp_MPI1(VecScatter ctx)
2536 {
2537   PetscErrorCode ierr;
2538 
2539   PetscFunctionBegin;
2540   ierr = VecScatterSetUp_vectype_private(ctx,VecScatterCreateLocal_PtoS_MPI1,VecScatterCreateLocal_StoP_MPI1,VecScatterCreateLocal_PtoP_MPI1);CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
VecScatterCreate_MPI1(VecScatter ctx)2544 PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
2545 {
2546   PetscErrorCode    ierr;
2547 
2548   PetscFunctionBegin;
2549   ctx->ops->setup = VecScatterSetUp_MPI1;
2550   ierr = PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);CHKERRQ(ierr);
2551   ierr = PetscInfo(ctx,"Using MPI1 for vector scatter\n");CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555