1 /*
2     Defines parallel vector scatters.
3 */
4 
5 #include <petsc/private/vecscatterimpl.h>
6 
7 
VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)8 PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
9 {
10   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
11   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
12   PetscErrorCode         ierr;
13   PetscInt               i;
14   PetscMPIInt            rank;
15   PetscViewerFormat      format;
16   PetscBool              iascii;
17 
18   PetscFunctionBegin;
19   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
20   if (iascii) {
21     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);CHKERRQ(ierr);
22     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23     if (format ==  PETSC_VIEWER_ASCII_INFO) {
24       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
25 
26       ierr = MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
27       ierr = MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
28       itmp = to->starts[to->n+1];
29       ierr = MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
30       itmp = from->starts[from->n+1];
31       ierr = MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
32       ierr = MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
33 
34       ierr = PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");CHKERRQ(ierr);
35       ierr = PetscViewerASCIIPrintf(viewer,"  Blocksize %D\n",to->bs);CHKERRQ(ierr);
36       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);CHKERRQ(ierr);
37       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);CHKERRQ(ierr);
38       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
39       ierr = PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
40       ierr = PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));CHKERRQ(ierr);
41     } else {
42       ierr = PetscViewerASCIIPrintf(viewer,"  VecScatter Blocksize %D\n",to->bs);CHKERRQ(ierr);
43       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
44       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);CHKERRQ(ierr);
45       if (to->n) {
46         for (i=0; i<to->n; i++) {
47           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);CHKERRQ(ierr);
48         }
49         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote sends (in order by process sent to)\n",rank);CHKERRQ(ierr);
50         for (i=0; i<to->starts[to->n]; i++) {
51           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);CHKERRQ(ierr);
52         }
53       }
54 
55       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);CHKERRQ(ierr);
56       if (from->n) {
57         for (i=0; i<from->n; i++) {
58           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);CHKERRQ(ierr);
59         }
60 
61         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote receives (in order by process received from)\n",rank);CHKERRQ(ierr);
62         for (i=0; i<from->starts[from->n]; i++) {
63           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);CHKERRQ(ierr);
64         }
65       }
66       if (to->local.n) {
67         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);CHKERRQ(ierr);
68         for (i=0; i<to->local.n; i++) {  /* the to and from have the opposite meaning from what you would expect */
69           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);CHKERRQ(ierr);
70         }
71       }
72 
73      for (i=0; i<to->msize; i++) {
74        if (to->sharedspacestarts[i+1] > to->sharedspacestarts[i]) {
75           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory to local memory partner %d count %d\n",rank,i,to->sharedspacestarts[i+1]-to->sharedspacestarts[i]);CHKERRQ(ierr);
76         }
77       }
78      for (i=0; i<from->msize; i++) {
79        if (from->sharedspacestarts[i+1] > from->sharedspacestarts[i]) {
80           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory from local memory partner %d count %d\n",rank,i,from->sharedspacestarts[i+1]-from->sharedspacestarts[i]);CHKERRQ(ierr);
81         }
82       }
83 
84       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
85       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
86 
87       ierr = PetscViewerASCIIPrintf(viewer,"Method used to implement the VecScatter: ");CHKERRQ(ierr);
88     }
89   }
90   PetscFunctionReturn(0);
91 }
92 
93 /* -----------------------------------------------------------------------------------*/
94 /*
95       The next routine determines what part of  the local part of the scatter is an
96   exact copy of values into their current location. We check this here and
97   then know that we need not perform that portion of the scatter when the vector is
98   scattering to itself with INSERT_VALUES.
99 
100      This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
101 
102 */
VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General * to,VecScatter_Seq_General * from)103 PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
104 {
105   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
106   PetscErrorCode ierr;
107   PetscInt       *nto_slots,*nfrom_slots,j = 0;
108 
109   PetscFunctionBegin;
110   for (i=0; i<n; i++) {
111     if (to_slots[i] != from_slots[i]) n_nonmatching++;
112   }
113 
114   if (!n_nonmatching) {
115     to->nonmatching_computed = PETSC_TRUE;
116     to->n_nonmatching        = from->n_nonmatching = 0;
117     ierr = PetscInfo1(scatter,"Reduced %D to 0\n", n);CHKERRQ(ierr);
118   } else if (n_nonmatching == n) {
119     to->nonmatching_computed = PETSC_FALSE;
120     ierr = PetscInfo(scatter,"All values non-matching\n");CHKERRQ(ierr);
121   } else {
122     to->nonmatching_computed= PETSC_TRUE;
123     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;
124 
125     ierr = PetscMalloc1(n_nonmatching,&nto_slots);CHKERRQ(ierr);
126     ierr = PetscMalloc1(n_nonmatching,&nfrom_slots);CHKERRQ(ierr);
127 
128     to->slots_nonmatching   = nto_slots;
129     from->slots_nonmatching = nfrom_slots;
130     for (i=0; i<n; i++) {
131       if (to_slots[i] != from_slots[i]) {
132         nto_slots[j]   = to_slots[i];
133         nfrom_slots[j] = from_slots[i];
134         j++;
135       }
136     }
137     ierr = PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);CHKERRQ(ierr);
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 /* --------------------------------------------------------------------------------------*/
143 
144 /* -------------------------------------------------------------------------------------*/
VecScatterDestroy_PtoP_MPI3(VecScatter ctx)145 PetscErrorCode VecScatterDestroy_PtoP_MPI3(VecScatter ctx)
146 {
147   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
148   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
149   PetscErrorCode         ierr;
150   PetscInt               i;
151 
152   PetscFunctionBegin;
153   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
154   if (to->requests) {
155     for (i=0; i<to->n; i++) {
156       ierr = MPI_Request_free(to->requests + i);CHKERRQ(ierr);
157     }
158   }
159   if (to->rev_requests) {
160     for (i=0; i<to->n; i++) {
161       ierr = MPI_Request_free(to->rev_requests + i);CHKERRQ(ierr);
162     }
163   }
164   if (from->requests) {
165     for (i=0; i<from->n; i++) {
166       ierr = MPI_Request_free(from->requests + i);CHKERRQ(ierr);
167     }
168   }
169   if (from->rev_requests) {
170     for (i=0; i<from->n; i++) {
171       ierr = MPI_Request_free(from->rev_requests + i);CHKERRQ(ierr);
172     }
173   }
174   if (to->sharedwin != MPI_WIN_NULL) {ierr = MPI_Win_free(&to->sharedwin);CHKERRQ(ierr);}
175   if (from->sharedwin != MPI_WIN_NULL) {ierr = MPI_Win_free(&from->sharedwin);CHKERRQ(ierr);}
176   ierr = PetscFree(to->sharedspaces);CHKERRQ(ierr);
177   ierr = PetscFree(to->sharedspacesoffset);CHKERRQ(ierr);
178   ierr = PetscFree(to->sharedspaceindices);CHKERRQ(ierr);
179   ierr = PetscFree(to->sharedspacestarts);CHKERRQ(ierr);
180 
181   ierr = PetscFree(from->sharedspaceindices);CHKERRQ(ierr);
182   ierr = PetscFree(from->sharedspaces);CHKERRQ(ierr);
183   ierr = PetscFree(from->sharedspacesoffset);CHKERRQ(ierr);
184   ierr = PetscFree(from->sharedspacestarts);CHKERRQ(ierr);
185 
186   ierr = PetscFree(to->local.vslots);CHKERRQ(ierr);
187   ierr = PetscFree(from->local.vslots);CHKERRQ(ierr);
188   ierr = PetscFree(to->local.slots_nonmatching);CHKERRQ(ierr);
189   ierr = PetscFree(from->local.slots_nonmatching);CHKERRQ(ierr);
190   ierr = PetscFree(to->rev_requests);CHKERRQ(ierr);
191   ierr = PetscFree(from->rev_requests);CHKERRQ(ierr);
192   ierr = PetscFree(to->requests);CHKERRQ(ierr);
193   ierr = PetscFree(from->requests);CHKERRQ(ierr);
194   ierr = PetscFree4(to->values,to->indices,to->starts,to->procs);CHKERRQ(ierr);
195   ierr = PetscFree2(to->sstatus,to->rstatus);CHKERRQ(ierr);
196   ierr = PetscFree4(from->values,from->indices,from->starts,from->procs);CHKERRQ(ierr);
197   ierr = VecScatterMemcpyPlanDestroy_PtoP(to,from);CHKERRQ(ierr);
198   ierr = PetscFree(from);CHKERRQ(ierr);
199   ierr = PetscFree(to);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 
203 /* --------------------------------------------------------------------------------------*/
204 
VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)205 PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
206 {
207   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
208   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
209   PetscErrorCode         ierr;
210   PetscInt               ny,bs = in_from->bs,jj;
211   PetscShmComm           scomm;
212   MPI_Comm               mscomm;
213   MPI_Info               info;
214 
215   PetscFunctionBegin;
216   out->ops->begin   = in->ops->begin;
217   out->ops->end     = in->ops->end;
218   out->ops->copy    = in->ops->copy;
219   out->ops->destroy = in->ops->destroy;
220   out->ops->view    = in->ops->view;
221 
222   /* allocate entire send scatter context */
223   ierr = PetscNewLog(out,&out_to);CHKERRQ(ierr);
224   out_to->sharedwin       = MPI_WIN_NULL;
225   ierr = PetscNewLog(out,&out_from);CHKERRQ(ierr);
226   out_from->sharedwin       = MPI_WIN_NULL;
227 
228   ny                = in_to->starts[in_to->n];
229   out_to->n         = in_to->n;
230   out_to->format    = in_to->format;
231 
232   ierr = PetscMalloc1(out_to->n,&out_to->requests);CHKERRQ(ierr);
233   ierr = PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);CHKERRQ(ierr);
234   ierr = PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);CHKERRQ(ierr);
235   ierr = PetscArraycpy(out_to->indices,in_to->indices,ny);CHKERRQ(ierr);
236   ierr = PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);CHKERRQ(ierr);
237   ierr = PetscArraycpy(out_to->procs,in_to->procs,out_to->n);CHKERRQ(ierr);
238 
239   out->todata                        = (void*)out_to;
240   out_to->local.n                    = in_to->local.n;
241   out_to->local.nonmatching_computed = PETSC_FALSE;
242   out_to->local.n_nonmatching        = 0;
243   out_to->local.slots_nonmatching    = NULL;
244   if (in_to->local.n) {
245     ierr = PetscMalloc1(in_to->local.n,&out_to->local.vslots);CHKERRQ(ierr);
246     ierr = PetscMalloc1(in_from->local.n,&out_from->local.vslots);CHKERRQ(ierr);
247     ierr = PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);CHKERRQ(ierr);
248     ierr = PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);CHKERRQ(ierr);
249   } else {
250     out_to->local.vslots   = NULL;
251     out_from->local.vslots = NULL;
252   }
253 
254   /* allocate entire receive context */
255   ierr = VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);CHKERRQ(ierr);
256   ierr = VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);CHKERRQ(ierr);
257   ierr = VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);CHKERRQ(ierr);
258   ierr = VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);CHKERRQ(ierr);
259 
260   out_from->format    = in_from->format;
261   ny                  = in_from->starts[in_from->n];
262   out_from->n         = in_from->n;
263 
264   ierr = PetscMalloc1(out_from->n,&out_from->requests);CHKERRQ(ierr);
265   ierr = PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);CHKERRQ(ierr);
266   ierr = PetscArraycpy(out_from->indices,in_from->indices,ny);CHKERRQ(ierr);
267   ierr = PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);CHKERRQ(ierr);
268   ierr = PetscArraycpy(out_from->procs,in_from->procs,out_from->n);CHKERRQ(ierr);
269 
270   out->fromdata                        = (void*)out_from;
271   out_from->local.n                    = in_from->local.n;
272   out_from->local.nonmatching_computed = PETSC_FALSE;
273   out_from->local.n_nonmatching        = 0;
274   out_from->local.slots_nonmatching    = NULL;
275 
276   /*
277       set up the request arrays for use with isend_init() and irecv_init()
278   */
279   {
280     PetscMPIInt tag;
281     MPI_Comm    comm;
282     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
283     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
284     PetscInt    i;
285     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
286     MPI_Request *rev_swaits,*rev_rwaits;
287     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
288 
289     ierr = PetscMalloc1(in_to->n,&out_to->rev_requests);CHKERRQ(ierr);
290     ierr = PetscMalloc1(in_from->n,&out_from->rev_requests);CHKERRQ(ierr);
291 
292     rev_rwaits = out_to->rev_requests;
293     rev_swaits = out_from->rev_requests;
294 
295     out_from->bs = out_to->bs = bs;
296     tag          = ((PetscObject)out)->tag;
297     ierr         = PetscObjectGetComm((PetscObject)out,&comm);CHKERRQ(ierr);
298 
299     /* Register the receives that you will use later (sends for scatter reverse) */
300     for (i=0; i<out_from->n; i++) {
301       ierr = MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
302       ierr = MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);CHKERRQ(ierr);
303     }
304 
305     for (i=0; i<out_to->n; i++) {
306       ierr = MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
307     }
308 
309     /* Register receives for scatter reverse */
310     for (i=0; i<out_to->n; i++) {
311       ierr = MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);CHKERRQ(ierr);
312     }
313   }
314 
315     /* since the to and from data structures are not symmetric for shared memory copies we insure they always listed in "standard" form */
316   if (!in_to->sharedwin) {
317     VecScatter_MPI_General *totmp = in_to,*out_totmp = out_to;
318     in_to       = in_from;
319     in_from     = totmp;
320     out_to      = out_from;
321     out_from    = out_totmp;
322   }
323 
324   /* copy the to parts for the shared memory copies between processes */
325   out_to->sharedcnt = in_to->sharedcnt;
326   out_to->msize     = in_to->msize;
327   ierr = PetscMalloc1(out_to->msize+1,&out_to->sharedspacestarts);CHKERRQ(ierr);
328   ierr = PetscArraycpy(out_to->sharedspacestarts,in_to->sharedspacestarts,out_to->msize+1);CHKERRQ(ierr);
329   ierr = PetscMalloc1(out_to->sharedcnt,&out_to->sharedspaceindices);CHKERRQ(ierr);
330   ierr = PetscArraycpy(out_to->sharedspaceindices,in_to->sharedspaceindices,out_to->sharedcnt);CHKERRQ(ierr);
331 
332   ierr = PetscShmCommGet(PetscObjectComm((PetscObject)in),&scomm);CHKERRQ(ierr);
333   ierr = PetscShmCommGetMpiShmComm(scomm,&mscomm);CHKERRQ(ierr);
334   ierr = MPI_Info_create(&info);CHKERRQ(ierr);
335   ierr = MPI_Info_set(info, "alloc_shared_noncontig", "true");CHKERRQ(ierr);
336   ierr = MPIU_Win_allocate_shared(bs*out_to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&out_to->sharedspace,&out_to->sharedwin);CHKERRQ(ierr);
337   ierr = MPI_Info_free(&info);CHKERRQ(ierr);
338 
339   /* copy the to parts for the shared memory copies between processes */
340   out_from->sharedcnt = in_from->sharedcnt;
341   out_from->msize     = in_from->msize;
342   ierr = PetscMalloc1(out_from->msize+1,&out_from->sharedspacestarts);CHKERRQ(ierr);
343   ierr = PetscArraycpy(out_from->sharedspacestarts,in_from->sharedspacestarts,out_from->msize+1);CHKERRQ(ierr);
344   ierr = PetscMalloc1(out_from->sharedcnt,&out_from->sharedspaceindices);CHKERRQ(ierr);
345   ierr = PetscArraycpy(out_from->sharedspaceindices,in_from->sharedspaceindices,out_from->sharedcnt);CHKERRQ(ierr);
346   ierr = PetscMalloc1(out_from->msize,&out_from->sharedspacesoffset);CHKERRQ(ierr);
347   ierr = PetscArraycpy(out_from->sharedspacesoffset,in_from->sharedspacesoffset,out_from->msize);CHKERRQ(ierr);
348   ierr = PetscCalloc1(out_from->msize,&out_from->sharedspaces);CHKERRQ(ierr);
349   for (jj=0; jj<out_from->msize; jj++) {
350     MPI_Aint    isize;
351     PetscMPIInt disp_unit;
352     ierr = MPIU_Win_shared_query(out_to->sharedwin,jj,&isize,&disp_unit,&out_from->sharedspaces[jj]);CHKERRQ(ierr);
353   }
354   PetscFunctionReturn(0);
355 }
356 
VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)357 PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
358 {
359   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
360   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
361   PetscErrorCode         ierr;
362   PetscInt               ny,bs = in_from->bs;
363   PetscMPIInt            size;
364 
365   PetscFunctionBegin;
366   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);CHKERRQ(ierr);
367 
368   out->ops->begin     = in->ops->begin;
369   out->ops->end       = in->ops->end;
370   out->ops->copy      = in->ops->copy;
371   out->ops->destroy   = in->ops->destroy;
372   out->ops->view      = in->ops->view;
373 
374   /* allocate entire send scatter context */
375   ierr = PetscNewLog(out,&out_to);CHKERRQ(ierr);
376   out_to->sharedwin       = MPI_WIN_NULL;
377   ierr = PetscNewLog(out,&out_from);CHKERRQ(ierr);
378   out_from->sharedwin       = MPI_WIN_NULL;
379 
380   ny                = in_to->starts[in_to->n];
381   out_to->n         = in_to->n;
382   out_to->format    = in_to->format;
383 
384   ierr = PetscMalloc1(out_to->n,&out_to->requests);CHKERRQ(ierr);
385   ierr = PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);CHKERRQ(ierr);
386   ierr = PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);CHKERRQ(ierr);
387   ierr = PetscArraycpy(out_to->indices,in_to->indices,ny);CHKERRQ(ierr);
388   ierr = PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);CHKERRQ(ierr);
389   ierr = PetscArraycpy(out_to->procs,in_to->procs,out_to->n);CHKERRQ(ierr);
390 
391   out->todata                        = (void*)out_to;
392   out_to->local.n                    = in_to->local.n;
393   out_to->local.nonmatching_computed = PETSC_FALSE;
394   out_to->local.n_nonmatching        = 0;
395   out_to->local.slots_nonmatching    = NULL;
396   if (in_to->local.n) {
397     ierr = PetscMalloc1(in_to->local.n,&out_to->local.vslots);CHKERRQ(ierr);
398     ierr = PetscMalloc1(in_from->local.n,&out_from->local.vslots);CHKERRQ(ierr);
399     ierr = PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);CHKERRQ(ierr);
400     ierr = PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);CHKERRQ(ierr);
401   } else {
402     out_to->local.vslots   = NULL;
403     out_from->local.vslots = NULL;
404   }
405 
406   /* allocate entire receive context */
407   ierr = VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);CHKERRQ(ierr);
408 
409   out_from->format    = in_from->format;
410   ny                  = in_from->starts[in_from->n];
411   out_from->n         = in_from->n;
412 
413   ierr = PetscMalloc1(out_from->n,&out_from->requests);CHKERRQ(ierr);
414   ierr = PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);CHKERRQ(ierr);
415   ierr = PetscArraycpy(out_from->indices,in_from->indices,ny);CHKERRQ(ierr);
416   ierr = PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);CHKERRQ(ierr);
417   ierr = PetscArraycpy(out_from->procs,in_from->procs,out_from->n);CHKERRQ(ierr);
418 
419   out->fromdata                        = (void*)out_from;
420   out_from->local.n                    = in_from->local.n;
421   out_from->local.nonmatching_computed = PETSC_FALSE;
422   out_from->local.n_nonmatching        = 0;
423   out_from->local.slots_nonmatching    = NULL;
424 
425   PetscFunctionReturn(0);
426 }
427 /* --------------------------------------------------------------------------------------------------
428     Packs and unpacks the message data into send or from receive buffers.
429 
430     These could be generated automatically.
431 
432     Fortran kernels etc. could be used.
433 */
Pack_1(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)434 PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
435 {
436   PetscInt i;
437   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
438 }
439 
UnPack_1(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)440 PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
441 {
442   PetscInt i;
443 
444   PetscFunctionBegin;
445   switch (addv) {
446   case INSERT_VALUES:
447   case INSERT_ALL_VALUES:
448     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
449     break;
450   case ADD_VALUES:
451   case ADD_ALL_VALUES:
452     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
453     break;
454 #if !defined(PETSC_USE_COMPLEX)
455   case MAX_VALUES:
456     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
457 #else
458   case MAX_VALUES:
459 #endif
460   case NOT_SET_VALUES:
461     break;
462   default:
463     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
464   }
465   PetscFunctionReturn(0);
466 }
467 
Scatter_1(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)468 PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
469 {
470   PetscInt i;
471 
472   PetscFunctionBegin;
473   switch (addv) {
474   case INSERT_VALUES:
475   case INSERT_ALL_VALUES:
476     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
477     break;
478   case ADD_VALUES:
479   case ADD_ALL_VALUES:
480     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
481     break;
482 #if !defined(PETSC_USE_COMPLEX)
483   case MAX_VALUES:
484     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
485 #else
486   case MAX_VALUES:
487 #endif
488   case NOT_SET_VALUES:
489     break;
490   default:
491     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 /* ----------------------------------------------------------------------------------------------- */
Pack_2(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)497 PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
498 {
499   PetscInt i,idx;
500 
501   for (i=0; i<n; i++) {
502     idx  = *indicesx++;
503     y[0] = x[idx];
504     y[1] = x[idx+1];
505     y   += 2;
506   }
507 }
508 
UnPack_2(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)509 PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
510 {
511   PetscInt i,idy;
512 
513   PetscFunctionBegin;
514   switch (addv) {
515   case INSERT_VALUES:
516   case INSERT_ALL_VALUES:
517     for (i=0; i<n; i++) {
518       idy      = *indicesy++;
519       y[idy]   = x[0];
520       y[idy+1] = x[1];
521       x       += 2;
522     }
523     break;
524   case ADD_VALUES:
525   case ADD_ALL_VALUES:
526     for (i=0; i<n; i++) {
527       idy       = *indicesy++;
528       y[idy]   += x[0];
529       y[idy+1] += x[1];
530       x        += 2;
531     }
532     break;
533 #if !defined(PETSC_USE_COMPLEX)
534   case MAX_VALUES:
535     for (i=0; i<n; i++) {
536       idy      = *indicesy++;
537       y[idy]   = PetscMax(y[idy],x[0]);
538       y[idy+1] = PetscMax(y[idy+1],x[1]);
539       x       += 2;
540     }
541 #else
542   case MAX_VALUES:
543 #endif
544   case NOT_SET_VALUES:
545     break;
546   default:
547     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
548   }
549   PetscFunctionReturn(0);
550 }
551 
Scatter_2(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)552 PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
553 {
554   PetscInt i,idx,idy;
555 
556   PetscFunctionBegin;
557   switch (addv) {
558   case INSERT_VALUES:
559   case INSERT_ALL_VALUES:
560     for (i=0; i<n; i++) {
561       idx      = *indicesx++;
562       idy      = *indicesy++;
563       y[idy]   = x[idx];
564       y[idy+1] = x[idx+1];
565     }
566     break;
567   case ADD_VALUES:
568   case ADD_ALL_VALUES:
569     for (i=0; i<n; i++) {
570       idx       = *indicesx++;
571       idy       = *indicesy++;
572       y[idy]   += x[idx];
573       y[idy+1] += x[idx+1];
574     }
575     break;
576 #if !defined(PETSC_USE_COMPLEX)
577   case MAX_VALUES:
578     for (i=0; i<n; i++) {
579       idx      = *indicesx++;
580       idy      = *indicesy++;
581       y[idy]   = PetscMax(y[idy],x[idx]);
582       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
583     }
584 #else
585   case MAX_VALUES:
586 #endif
587   case NOT_SET_VALUES:
588     break;
589   default:
590     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
591   }
592   PetscFunctionReturn(0);
593 }
594 /* ----------------------------------------------------------------------------------------------- */
Pack_3(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)595 PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
596 {
597   PetscInt i,idx;
598 
599   for (i=0; i<n; i++) {
600     idx  = *indicesx++;
601     y[0] = x[idx];
602     y[1] = x[idx+1];
603     y[2] = x[idx+2];
604     y   += 3;
605   }
606 }
UnPack_3(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)607 PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
608 {
609   PetscInt i,idy;
610 
611   PetscFunctionBegin;
612   switch (addv) {
613   case INSERT_VALUES:
614   case INSERT_ALL_VALUES:
615     for (i=0; i<n; i++) {
616       idy      = *indicesy++;
617       y[idy]   = x[0];
618       y[idy+1] = x[1];
619       y[idy+2] = x[2];
620       x       += 3;
621     }
622     break;
623   case ADD_VALUES:
624   case ADD_ALL_VALUES:
625     for (i=0; i<n; i++) {
626       idy       = *indicesy++;
627       y[idy]   += x[0];
628       y[idy+1] += x[1];
629       y[idy+2] += x[2];
630       x        += 3;
631     }
632     break;
633 #if !defined(PETSC_USE_COMPLEX)
634   case MAX_VALUES:
635     for (i=0; i<n; i++) {
636       idy      = *indicesy++;
637       y[idy]   = PetscMax(y[idy],x[0]);
638       y[idy+1] = PetscMax(y[idy+1],x[1]);
639       y[idy+2] = PetscMax(y[idy+2],x[2]);
640       x       += 3;
641     }
642 #else
643   case MAX_VALUES:
644 #endif
645   case NOT_SET_VALUES:
646     break;
647   default:
648     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
649   }
650   PetscFunctionReturn(0);
651 }
652 
Scatter_3(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)653 PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
654 {
655   PetscInt i,idx,idy;
656 
657   PetscFunctionBegin;
658   switch (addv) {
659   case INSERT_VALUES:
660   case INSERT_ALL_VALUES:
661     for (i=0; i<n; i++) {
662       idx      = *indicesx++;
663       idy      = *indicesy++;
664       y[idy]   = x[idx];
665       y[idy+1] = x[idx+1];
666       y[idy+2] = x[idx+2];
667     }
668     break;
669   case ADD_VALUES:
670   case ADD_ALL_VALUES:
671     for (i=0; i<n; i++) {
672       idx       = *indicesx++;
673       idy       = *indicesy++;
674       y[idy]   += x[idx];
675       y[idy+1] += x[idx+1];
676       y[idy+2] += x[idx+2];
677     }
678     break;
679 #if !defined(PETSC_USE_COMPLEX)
680   case MAX_VALUES:
681     for (i=0; i<n; i++) {
682       idx      = *indicesx++;
683       idy      = *indicesy++;
684       y[idy]   = PetscMax(y[idy],x[idx]);
685       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
686       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
687     }
688 #else
689   case MAX_VALUES:
690 #endif
691   case NOT_SET_VALUES:
692     break;
693   default:
694     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
695   }
696   PetscFunctionReturn(0);
697 }
698 /* ----------------------------------------------------------------------------------------------- */
Pack_4(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)699 PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
700 {
701   PetscInt i,idx;
702 
703   for (i=0; i<n; i++) {
704     idx  = *indicesx++;
705     y[0] = x[idx];
706     y[1] = x[idx+1];
707     y[2] = x[idx+2];
708     y[3] = x[idx+3];
709     y   += 4;
710   }
711 }
UnPack_4(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)712 PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
713 {
714   PetscInt i,idy;
715 
716   PetscFunctionBegin;
717   switch (addv) {
718   case INSERT_VALUES:
719   case INSERT_ALL_VALUES:
720     for (i=0; i<n; i++) {
721       idy      = *indicesy++;
722       y[idy]   = x[0];
723       y[idy+1] = x[1];
724       y[idy+2] = x[2];
725       y[idy+3] = x[3];
726       x       += 4;
727     }
728     break;
729   case ADD_VALUES:
730   case ADD_ALL_VALUES:
731     for (i=0; i<n; i++) {
732       idy       = *indicesy++;
733       y[idy]   += x[0];
734       y[idy+1] += x[1];
735       y[idy+2] += x[2];
736       y[idy+3] += x[3];
737       x        += 4;
738     }
739     break;
740 #if !defined(PETSC_USE_COMPLEX)
741   case MAX_VALUES:
742     for (i=0; i<n; i++) {
743       idy      = *indicesy++;
744       y[idy]   = PetscMax(y[idy],x[0]);
745       y[idy+1] = PetscMax(y[idy+1],x[1]);
746       y[idy+2] = PetscMax(y[idy+2],x[2]);
747       y[idy+3] = PetscMax(y[idy+3],x[3]);
748       x       += 4;
749     }
750 #else
751   case MAX_VALUES:
752 #endif
753   case NOT_SET_VALUES:
754     break;
755   default:
756     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
757   }
758   PetscFunctionReturn(0);
759 }
760 
Scatter_4(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)761 PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
762 {
763   PetscInt i,idx,idy;
764 
765   PetscFunctionBegin;
766   switch (addv) {
767   case INSERT_VALUES:
768   case INSERT_ALL_VALUES:
769     for (i=0; i<n; i++) {
770       idx      = *indicesx++;
771       idy      = *indicesy++;
772       y[idy]   = x[idx];
773       y[idy+1] = x[idx+1];
774       y[idy+2] = x[idx+2];
775       y[idy+3] = x[idx+3];
776     }
777     break;
778   case ADD_VALUES:
779   case ADD_ALL_VALUES:
780     for (i=0; i<n; i++) {
781       idx       = *indicesx++;
782       idy       = *indicesy++;
783       y[idy]   += x[idx];
784       y[idy+1] += x[idx+1];
785       y[idy+2] += x[idx+2];
786       y[idy+3] += x[idx+3];
787     }
788     break;
789 #if !defined(PETSC_USE_COMPLEX)
790   case MAX_VALUES:
791     for (i=0; i<n; i++) {
792       idx      = *indicesx++;
793       idy      = *indicesy++;
794       y[idy]   = PetscMax(y[idy],x[idx]);
795       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
796       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
797       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
798     }
799 #else
800   case MAX_VALUES:
801 #endif
802   case NOT_SET_VALUES:
803     break;
804   default:
805     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
806   }
807   PetscFunctionReturn(0);
808 }
809 /* ----------------------------------------------------------------------------------------------- */
Pack_5(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)810 PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
811 {
812   PetscInt i,idx;
813 
814   for (i=0; i<n; i++) {
815     idx  = *indicesx++;
816     y[0] = x[idx];
817     y[1] = x[idx+1];
818     y[2] = x[idx+2];
819     y[3] = x[idx+3];
820     y[4] = x[idx+4];
821     y   += 5;
822   }
823 }
824 
UnPack_5(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)825 PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
826 {
827   PetscInt i,idy;
828 
829   PetscFunctionBegin;
830   switch (addv) {
831   case INSERT_VALUES:
832   case INSERT_ALL_VALUES:
833     for (i=0; i<n; i++) {
834       idy      = *indicesy++;
835       y[idy]   = x[0];
836       y[idy+1] = x[1];
837       y[idy+2] = x[2];
838       y[idy+3] = x[3];
839       y[idy+4] = x[4];
840       x       += 5;
841     }
842     break;
843   case ADD_VALUES:
844   case ADD_ALL_VALUES:
845     for (i=0; i<n; i++) {
846       idy       = *indicesy++;
847       y[idy]   += x[0];
848       y[idy+1] += x[1];
849       y[idy+2] += x[2];
850       y[idy+3] += x[3];
851       y[idy+4] += x[4];
852       x        += 5;
853     }
854     break;
855 #if !defined(PETSC_USE_COMPLEX)
856   case MAX_VALUES:
857     for (i=0; i<n; i++) {
858       idy      = *indicesy++;
859       y[idy]   = PetscMax(y[idy],x[0]);
860       y[idy+1] = PetscMax(y[idy+1],x[1]);
861       y[idy+2] = PetscMax(y[idy+2],x[2]);
862       y[idy+3] = PetscMax(y[idy+3],x[3]);
863       y[idy+4] = PetscMax(y[idy+4],x[4]);
864       x       += 5;
865     }
866 #else
867   case MAX_VALUES:
868 #endif
869   case NOT_SET_VALUES:
870     break;
871   default:
872     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
873   }
874   PetscFunctionReturn(0);
875 }
876 
Scatter_5(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)877 PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
878 {
879   PetscInt i,idx,idy;
880 
881   PetscFunctionBegin;
882   switch (addv) {
883   case INSERT_VALUES:
884   case INSERT_ALL_VALUES:
885     for (i=0; i<n; i++) {
886       idx      = *indicesx++;
887       idy      = *indicesy++;
888       y[idy]   = x[idx];
889       y[idy+1] = x[idx+1];
890       y[idy+2] = x[idx+2];
891       y[idy+3] = x[idx+3];
892       y[idy+4] = x[idx+4];
893     }
894     break;
895   case ADD_VALUES:
896   case ADD_ALL_VALUES:
897     for (i=0; i<n; i++) {
898       idx       = *indicesx++;
899       idy       = *indicesy++;
900       y[idy]   += x[idx];
901       y[idy+1] += x[idx+1];
902       y[idy+2] += x[idx+2];
903       y[idy+3] += x[idx+3];
904       y[idy+4] += x[idx+4];
905     }
906     break;
907 #if !defined(PETSC_USE_COMPLEX)
908   case MAX_VALUES:
909     for (i=0; i<n; i++) {
910       idx      = *indicesx++;
911       idy      = *indicesy++;
912       y[idy]   = PetscMax(y[idy],x[idx]);
913       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
914       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
915       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
916       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
917     }
918 #else
919   case MAX_VALUES:
920 #endif
921   case NOT_SET_VALUES:
922     break;
923   default:
924     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
925   }
926   PetscFunctionReturn(0);
927 }
928 /* ----------------------------------------------------------------------------------------------- */
Pack_6(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)929 PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
930 {
931   PetscInt i,idx;
932 
933   for (i=0; i<n; i++) {
934     idx  = *indicesx++;
935     y[0] = x[idx];
936     y[1] = x[idx+1];
937     y[2] = x[idx+2];
938     y[3] = x[idx+3];
939     y[4] = x[idx+4];
940     y[5] = x[idx+5];
941     y   += 6;
942   }
943 }
944 
UnPack_6(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)945 PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
946 {
947   PetscInt i,idy;
948 
949   PetscFunctionBegin;
950   switch (addv) {
951   case INSERT_VALUES:
952   case INSERT_ALL_VALUES:
953     for (i=0; i<n; i++) {
954       idy      = *indicesy++;
955       y[idy]   = x[0];
956       y[idy+1] = x[1];
957       y[idy+2] = x[2];
958       y[idy+3] = x[3];
959       y[idy+4] = x[4];
960       y[idy+5] = x[5];
961       x       += 6;
962     }
963     break;
964   case ADD_VALUES:
965   case ADD_ALL_VALUES:
966     for (i=0; i<n; i++) {
967       idy       = *indicesy++;
968       y[idy]   += x[0];
969       y[idy+1] += x[1];
970       y[idy+2] += x[2];
971       y[idy+3] += x[3];
972       y[idy+4] += x[4];
973       y[idy+5] += x[5];
974       x        += 6;
975     }
976     break;
977 #if !defined(PETSC_USE_COMPLEX)
978   case MAX_VALUES:
979     for (i=0; i<n; i++) {
980       idy      = *indicesy++;
981       y[idy]   = PetscMax(y[idy],x[0]);
982       y[idy+1] = PetscMax(y[idy+1],x[1]);
983       y[idy+2] = PetscMax(y[idy+2],x[2]);
984       y[idy+3] = PetscMax(y[idy+3],x[3]);
985       y[idy+4] = PetscMax(y[idy+4],x[4]);
986       y[idy+5] = PetscMax(y[idy+5],x[5]);
987       x       += 6;
988     }
989 #else
990   case MAX_VALUES:
991 #endif
992   case NOT_SET_VALUES:
993     break;
994   default:
995     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
996   }
997   PetscFunctionReturn(0);
998 }
999 
Scatter_6(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1000 PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1001 {
1002   PetscInt i,idx,idy;
1003 
1004   PetscFunctionBegin;
1005   switch (addv) {
1006   case INSERT_VALUES:
1007   case INSERT_ALL_VALUES:
1008     for (i=0; i<n; i++) {
1009       idx      = *indicesx++;
1010       idy      = *indicesy++;
1011       y[idy]   = x[idx];
1012       y[idy+1] = x[idx+1];
1013       y[idy+2] = x[idx+2];
1014       y[idy+3] = x[idx+3];
1015       y[idy+4] = x[idx+4];
1016       y[idy+5] = x[idx+5];
1017     }
1018     break;
1019   case ADD_VALUES:
1020   case ADD_ALL_VALUES:
1021     for (i=0; i<n; i++) {
1022       idx       = *indicesx++;
1023       idy       = *indicesy++;
1024       y[idy]   += x[idx];
1025       y[idy+1] += x[idx+1];
1026       y[idy+2] += x[idx+2];
1027       y[idy+3] += x[idx+3];
1028       y[idy+4] += x[idx+4];
1029       y[idy+5] += x[idx+5];
1030     }
1031     break;
1032 #if !defined(PETSC_USE_COMPLEX)
1033   case MAX_VALUES:
1034     for (i=0; i<n; i++) {
1035       idx      = *indicesx++;
1036       idy      = *indicesy++;
1037       y[idy]   = PetscMax(y[idy],x[idx]);
1038       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1039       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1040       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1041       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1042       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1043     }
1044 #else
1045   case MAX_VALUES:
1046 #endif
1047   case NOT_SET_VALUES:
1048     break;
1049   default:
1050     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 /* ----------------------------------------------------------------------------------------------- */
Pack_7(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1055 PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1056 {
1057   PetscInt i,idx;
1058 
1059   for (i=0; i<n; i++) {
1060     idx  = *indicesx++;
1061     y[0] = x[idx];
1062     y[1] = x[idx+1];
1063     y[2] = x[idx+2];
1064     y[3] = x[idx+3];
1065     y[4] = x[idx+4];
1066     y[5] = x[idx+5];
1067     y[6] = x[idx+6];
1068     y   += 7;
1069   }
1070 }
1071 
UnPack_7(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1072 PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1073 {
1074   PetscInt i,idy;
1075 
1076   PetscFunctionBegin;
1077   switch (addv) {
1078   case INSERT_VALUES:
1079   case INSERT_ALL_VALUES:
1080     for (i=0; i<n; i++) {
1081       idy      = *indicesy++;
1082       y[idy]   = x[0];
1083       y[idy+1] = x[1];
1084       y[idy+2] = x[2];
1085       y[idy+3] = x[3];
1086       y[idy+4] = x[4];
1087       y[idy+5] = x[5];
1088       y[idy+6] = x[6];
1089       x       += 7;
1090     }
1091     break;
1092   case ADD_VALUES:
1093   case ADD_ALL_VALUES:
1094     for (i=0; i<n; i++) {
1095       idy       = *indicesy++;
1096       y[idy]   += x[0];
1097       y[idy+1] += x[1];
1098       y[idy+2] += x[2];
1099       y[idy+3] += x[3];
1100       y[idy+4] += x[4];
1101       y[idy+5] += x[5];
1102       y[idy+6] += x[6];
1103       x        += 7;
1104     }
1105     break;
1106 #if !defined(PETSC_USE_COMPLEX)
1107   case MAX_VALUES:
1108     for (i=0; i<n; i++) {
1109       idy      = *indicesy++;
1110       y[idy]   = PetscMax(y[idy],x[0]);
1111       y[idy+1] = PetscMax(y[idy+1],x[1]);
1112       y[idy+2] = PetscMax(y[idy+2],x[2]);
1113       y[idy+3] = PetscMax(y[idy+3],x[3]);
1114       y[idy+4] = PetscMax(y[idy+4],x[4]);
1115       y[idy+5] = PetscMax(y[idy+5],x[5]);
1116       y[idy+6] = PetscMax(y[idy+6],x[6]);
1117       x       += 7;
1118     }
1119 #else
1120   case MAX_VALUES:
1121 #endif
1122   case NOT_SET_VALUES:
1123     break;
1124   default:
1125     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 
Scatter_7(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1130 PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1131 {
1132   PetscInt i,idx,idy;
1133 
1134   PetscFunctionBegin;
1135   switch (addv) {
1136   case INSERT_VALUES:
1137   case INSERT_ALL_VALUES:
1138     for (i=0; i<n; i++) {
1139       idx      = *indicesx++;
1140       idy      = *indicesy++;
1141       y[idy]   = x[idx];
1142       y[idy+1] = x[idx+1];
1143       y[idy+2] = x[idx+2];
1144       y[idy+3] = x[idx+3];
1145       y[idy+4] = x[idx+4];
1146       y[idy+5] = x[idx+5];
1147       y[idy+6] = x[idx+6];
1148     }
1149     break;
1150   case ADD_VALUES:
1151   case ADD_ALL_VALUES:
1152     for (i=0; i<n; i++) {
1153       idx       = *indicesx++;
1154       idy       = *indicesy++;
1155       y[idy]   += x[idx];
1156       y[idy+1] += x[idx+1];
1157       y[idy+2] += x[idx+2];
1158       y[idy+3] += x[idx+3];
1159       y[idy+4] += x[idx+4];
1160       y[idy+5] += x[idx+5];
1161       y[idy+6] += x[idx+6];
1162     }
1163     break;
1164 #if !defined(PETSC_USE_COMPLEX)
1165   case MAX_VALUES:
1166     for (i=0; i<n; i++) {
1167       idx      = *indicesx++;
1168       idy      = *indicesy++;
1169       y[idy]   = PetscMax(y[idy],x[idx]);
1170       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1171       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1172       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1173       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1174       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1175       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1176     }
1177 #else
1178   case MAX_VALUES:
1179 #endif
1180   case NOT_SET_VALUES:
1181     break;
1182   default:
1183     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 /* ----------------------------------------------------------------------------------------------- */
Pack_8(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1188 PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1189 {
1190   PetscInt i,idx;
1191 
1192   for (i=0; i<n; i++) {
1193     idx  = *indicesx++;
1194     y[0] = x[idx];
1195     y[1] = x[idx+1];
1196     y[2] = x[idx+2];
1197     y[3] = x[idx+3];
1198     y[4] = x[idx+4];
1199     y[5] = x[idx+5];
1200     y[6] = x[idx+6];
1201     y[7] = x[idx+7];
1202     y   += 8;
1203   }
1204 }
1205 
UnPack_8(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1206 PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1207 {
1208   PetscInt i,idy;
1209 
1210   PetscFunctionBegin;
1211   switch (addv) {
1212   case INSERT_VALUES:
1213   case INSERT_ALL_VALUES:
1214     for (i=0; i<n; i++) {
1215       idy      = *indicesy++;
1216       y[idy]   = x[0];
1217       y[idy+1] = x[1];
1218       y[idy+2] = x[2];
1219       y[idy+3] = x[3];
1220       y[idy+4] = x[4];
1221       y[idy+5] = x[5];
1222       y[idy+6] = x[6];
1223       y[idy+7] = x[7];
1224       x       += 8;
1225     }
1226     break;
1227   case ADD_VALUES:
1228   case ADD_ALL_VALUES:
1229     for (i=0; i<n; i++) {
1230       idy       = *indicesy++;
1231       y[idy]   += x[0];
1232       y[idy+1] += x[1];
1233       y[idy+2] += x[2];
1234       y[idy+3] += x[3];
1235       y[idy+4] += x[4];
1236       y[idy+5] += x[5];
1237       y[idy+6] += x[6];
1238       y[idy+7] += x[7];
1239       x        += 8;
1240     }
1241     break;
1242 #if !defined(PETSC_USE_COMPLEX)
1243   case MAX_VALUES:
1244     for (i=0; i<n; i++) {
1245       idy      = *indicesy++;
1246       y[idy]   = PetscMax(y[idy],x[0]);
1247       y[idy+1] = PetscMax(y[idy+1],x[1]);
1248       y[idy+2] = PetscMax(y[idy+2],x[2]);
1249       y[idy+3] = PetscMax(y[idy+3],x[3]);
1250       y[idy+4] = PetscMax(y[idy+4],x[4]);
1251       y[idy+5] = PetscMax(y[idy+5],x[5]);
1252       y[idy+6] = PetscMax(y[idy+6],x[6]);
1253       y[idy+7] = PetscMax(y[idy+7],x[7]);
1254       x       += 8;
1255     }
1256 #else
1257   case MAX_VALUES:
1258 #endif
1259   case NOT_SET_VALUES:
1260     break;
1261   default:
1262     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
Scatter_8(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1267 PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1268 {
1269   PetscInt i,idx,idy;
1270 
1271   PetscFunctionBegin;
1272   switch (addv) {
1273   case INSERT_VALUES:
1274   case INSERT_ALL_VALUES:
1275     for (i=0; i<n; i++) {
1276       idx      = *indicesx++;
1277       idy      = *indicesy++;
1278       y[idy]   = x[idx];
1279       y[idy+1] = x[idx+1];
1280       y[idy+2] = x[idx+2];
1281       y[idy+3] = x[idx+3];
1282       y[idy+4] = x[idx+4];
1283       y[idy+5] = x[idx+5];
1284       y[idy+6] = x[idx+6];
1285       y[idy+7] = x[idx+7];
1286     }
1287     break;
1288   case ADD_VALUES:
1289   case ADD_ALL_VALUES:
1290     for (i=0; i<n; i++) {
1291       idx       = *indicesx++;
1292       idy       = *indicesy++;
1293       y[idy]   += x[idx];
1294       y[idy+1] += x[idx+1];
1295       y[idy+2] += x[idx+2];
1296       y[idy+3] += x[idx+3];
1297       y[idy+4] += x[idx+4];
1298       y[idy+5] += x[idx+5];
1299       y[idy+6] += x[idx+6];
1300       y[idy+7] += x[idx+7];
1301     }
1302     break;
1303 #if !defined(PETSC_USE_COMPLEX)
1304   case MAX_VALUES:
1305     for (i=0; i<n; i++) {
1306       idx      = *indicesx++;
1307       idy      = *indicesy++;
1308       y[idy]   = PetscMax(y[idy],x[idx]);
1309       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1310       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1311       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1312       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1313       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1314       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1315       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1316     }
1317 #else
1318   case MAX_VALUES:
1319 #endif
1320   case NOT_SET_VALUES:
1321     break;
1322   default:
1323     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1324   }
1325   PetscFunctionReturn(0);
1326 }
1327 
Pack_9(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1328 PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1329 {
1330   PetscInt i,idx;
1331 
1332   for (i=0; i<n; i++) {
1333     idx   = *indicesx++;
1334     y[0]  = x[idx];
1335     y[1]  = x[idx+1];
1336     y[2]  = x[idx+2];
1337     y[3]  = x[idx+3];
1338     y[4]  = x[idx+4];
1339     y[5]  = x[idx+5];
1340     y[6]  = x[idx+6];
1341     y[7]  = x[idx+7];
1342     y[8]  = x[idx+8];
1343     y    += 9;
1344   }
1345 }
1346 
UnPack_9(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1347 PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1348 {
1349   PetscInt i,idy;
1350 
1351   PetscFunctionBegin;
1352   switch (addv) {
1353   case INSERT_VALUES:
1354   case INSERT_ALL_VALUES:
1355     for (i=0; i<n; i++) {
1356       idy       = *indicesy++;
1357       y[idy]    = x[0];
1358       y[idy+1]  = x[1];
1359       y[idy+2]  = x[2];
1360       y[idy+3]  = x[3];
1361       y[idy+4]  = x[4];
1362       y[idy+5]  = x[5];
1363       y[idy+6]  = x[6];
1364       y[idy+7]  = x[7];
1365       y[idy+8]  = x[8];
1366       x        += 9;
1367     }
1368     break;
1369   case ADD_VALUES:
1370   case ADD_ALL_VALUES:
1371     for (i=0; i<n; i++) {
1372       idy        = *indicesy++;
1373       y[idy]    += x[0];
1374       y[idy+1]  += x[1];
1375       y[idy+2]  += x[2];
1376       y[idy+3]  += x[3];
1377       y[idy+4]  += x[4];
1378       y[idy+5]  += x[5];
1379       y[idy+6]  += x[6];
1380       y[idy+7]  += x[7];
1381       y[idy+8]  += x[8];
1382       x         += 9;
1383     }
1384     break;
1385 #if !defined(PETSC_USE_COMPLEX)
1386   case MAX_VALUES:
1387     for (i=0; i<n; i++) {
1388       idy       = *indicesy++;
1389       y[idy]    = PetscMax(y[idy],x[0]);
1390       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1391       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1392       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1393       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1394       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1395       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1396       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1397       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1398       x        += 9;
1399     }
1400 #else
1401   case MAX_VALUES:
1402 #endif
1403   case NOT_SET_VALUES:
1404     break;
1405   default:
1406     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
Scatter_9(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1411 PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1412 {
1413   PetscInt i,idx,idy;
1414 
1415   PetscFunctionBegin;
1416   switch (addv) {
1417   case INSERT_VALUES:
1418   case INSERT_ALL_VALUES:
1419     for (i=0; i<n; i++) {
1420       idx       = *indicesx++;
1421       idy       = *indicesy++;
1422       y[idy]    = x[idx];
1423       y[idy+1]  = x[idx+1];
1424       y[idy+2]  = x[idx+2];
1425       y[idy+3]  = x[idx+3];
1426       y[idy+4]  = x[idx+4];
1427       y[idy+5]  = x[idx+5];
1428       y[idy+6]  = x[idx+6];
1429       y[idy+7]  = x[idx+7];
1430       y[idy+8]  = x[idx+8];
1431     }
1432     break;
1433   case ADD_VALUES:
1434   case ADD_ALL_VALUES:
1435     for (i=0; i<n; i++) {
1436       idx        = *indicesx++;
1437       idy        = *indicesy++;
1438       y[idy]    += x[idx];
1439       y[idy+1]  += x[idx+1];
1440       y[idy+2]  += x[idx+2];
1441       y[idy+3]  += x[idx+3];
1442       y[idy+4]  += x[idx+4];
1443       y[idy+5]  += x[idx+5];
1444       y[idy+6]  += x[idx+6];
1445       y[idy+7]  += x[idx+7];
1446       y[idy+8]  += x[idx+8];
1447     }
1448     break;
1449 #if !defined(PETSC_USE_COMPLEX)
1450   case MAX_VALUES:
1451     for (i=0; i<n; i++) {
1452       idx       = *indicesx++;
1453       idy       = *indicesy++;
1454       y[idy]    = PetscMax(y[idy],x[idx]);
1455       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1456       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1457       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1458       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1459       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1460       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1461       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1462       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1463     }
1464 #else
1465   case MAX_VALUES:
1466 #endif
1467   case NOT_SET_VALUES:
1468     break;
1469   default:
1470     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
Pack_10(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1475 PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1476 {
1477   PetscInt i,idx;
1478 
1479   for (i=0; i<n; i++) {
1480     idx   = *indicesx++;
1481     y[0]  = x[idx];
1482     y[1]  = x[idx+1];
1483     y[2]  = x[idx+2];
1484     y[3]  = x[idx+3];
1485     y[4]  = x[idx+4];
1486     y[5]  = x[idx+5];
1487     y[6]  = x[idx+6];
1488     y[7]  = x[idx+7];
1489     y[8]  = x[idx+8];
1490     y[9]  = x[idx+9];
1491     y    += 10;
1492   }
1493 }
1494 
UnPack_10(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1495 PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1496 {
1497   PetscInt i,idy;
1498 
1499   PetscFunctionBegin;
1500   switch (addv) {
1501   case INSERT_VALUES:
1502   case INSERT_ALL_VALUES:
1503     for (i=0; i<n; i++) {
1504       idy       = *indicesy++;
1505       y[idy]    = x[0];
1506       y[idy+1]  = x[1];
1507       y[idy+2]  = x[2];
1508       y[idy+3]  = x[3];
1509       y[idy+4]  = x[4];
1510       y[idy+5]  = x[5];
1511       y[idy+6]  = x[6];
1512       y[idy+7]  = x[7];
1513       y[idy+8]  = x[8];
1514       y[idy+9]  = x[9];
1515       x        += 10;
1516     }
1517     break;
1518   case ADD_VALUES:
1519   case ADD_ALL_VALUES:
1520     for (i=0; i<n; i++) {
1521       idy        = *indicesy++;
1522       y[idy]    += x[0];
1523       y[idy+1]  += x[1];
1524       y[idy+2]  += x[2];
1525       y[idy+3]  += x[3];
1526       y[idy+4]  += x[4];
1527       y[idy+5]  += x[5];
1528       y[idy+6]  += x[6];
1529       y[idy+7]  += x[7];
1530       y[idy+8]  += x[8];
1531       y[idy+9]  += x[9];
1532       x         += 10;
1533     }
1534     break;
1535 #if !defined(PETSC_USE_COMPLEX)
1536   case MAX_VALUES:
1537     for (i=0; i<n; i++) {
1538       idy       = *indicesy++;
1539       y[idy]    = PetscMax(y[idy],x[0]);
1540       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1541       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1542       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1543       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1544       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1545       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1546       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1547       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1548       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1549       x        += 10;
1550     }
1551 #else
1552   case MAX_VALUES:
1553 #endif
1554   case NOT_SET_VALUES:
1555     break;
1556   default:
1557     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
Scatter_10(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1562 PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1563 {
1564   PetscInt i,idx,idy;
1565 
1566   PetscFunctionBegin;
1567   switch (addv) {
1568   case INSERT_VALUES:
1569   case INSERT_ALL_VALUES:
1570     for (i=0; i<n; i++) {
1571       idx       = *indicesx++;
1572       idy       = *indicesy++;
1573       y[idy]    = x[idx];
1574       y[idy+1]  = x[idx+1];
1575       y[idy+2]  = x[idx+2];
1576       y[idy+3]  = x[idx+3];
1577       y[idy+4]  = x[idx+4];
1578       y[idy+5]  = x[idx+5];
1579       y[idy+6]  = x[idx+6];
1580       y[idy+7]  = x[idx+7];
1581       y[idy+8]  = x[idx+8];
1582       y[idy+9]  = x[idx+9];
1583     }
1584     break;
1585   case ADD_VALUES:
1586   case ADD_ALL_VALUES:
1587     for (i=0; i<n; i++) {
1588       idx        = *indicesx++;
1589       idy        = *indicesy++;
1590       y[idy]    += x[idx];
1591       y[idy+1]  += x[idx+1];
1592       y[idy+2]  += x[idx+2];
1593       y[idy+3]  += x[idx+3];
1594       y[idy+4]  += x[idx+4];
1595       y[idy+5]  += x[idx+5];
1596       y[idy+6]  += x[idx+6];
1597       y[idy+7]  += x[idx+7];
1598       y[idy+8]  += x[idx+8];
1599       y[idy+9]  += x[idx+9];
1600     }
1601     break;
1602 #if !defined(PETSC_USE_COMPLEX)
1603   case MAX_VALUES:
1604     for (i=0; i<n; i++) {
1605       idx       = *indicesx++;
1606       idy       = *indicesy++;
1607       y[idy]    = PetscMax(y[idy],x[idx]);
1608       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1609       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1610       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1611       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1612       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1613       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1614       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1615       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1616       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1617     }
1618 #else
1619   case MAX_VALUES:
1620 #endif
1621   case NOT_SET_VALUES:
1622     break;
1623   default:
1624     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
Pack_11(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1629 PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1630 {
1631   PetscInt i,idx;
1632 
1633   for (i=0; i<n; i++) {
1634     idx   = *indicesx++;
1635     y[0]  = x[idx];
1636     y[1]  = x[idx+1];
1637     y[2]  = x[idx+2];
1638     y[3]  = x[idx+3];
1639     y[4]  = x[idx+4];
1640     y[5]  = x[idx+5];
1641     y[6]  = x[idx+6];
1642     y[7]  = x[idx+7];
1643     y[8]  = x[idx+8];
1644     y[9]  = x[idx+9];
1645     y[10] = x[idx+10];
1646     y    += 11;
1647   }
1648 }
1649 
UnPack_11(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1650 PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1651 {
1652   PetscInt i,idy;
1653 
1654   PetscFunctionBegin;
1655   switch (addv) {
1656   case INSERT_VALUES:
1657   case INSERT_ALL_VALUES:
1658     for (i=0; i<n; i++) {
1659       idy       = *indicesy++;
1660       y[idy]    = x[0];
1661       y[idy+1]  = x[1];
1662       y[idy+2]  = x[2];
1663       y[idy+3]  = x[3];
1664       y[idy+4]  = x[4];
1665       y[idy+5]  = x[5];
1666       y[idy+6]  = x[6];
1667       y[idy+7]  = x[7];
1668       y[idy+8]  = x[8];
1669       y[idy+9]  = x[9];
1670       y[idy+10] = x[10];
1671       x        += 11;
1672     }
1673     break;
1674   case ADD_VALUES:
1675   case ADD_ALL_VALUES:
1676     for (i=0; i<n; i++) {
1677       idy        = *indicesy++;
1678       y[idy]    += x[0];
1679       y[idy+1]  += x[1];
1680       y[idy+2]  += x[2];
1681       y[idy+3]  += x[3];
1682       y[idy+4]  += x[4];
1683       y[idy+5]  += x[5];
1684       y[idy+6]  += x[6];
1685       y[idy+7]  += x[7];
1686       y[idy+8]  += x[8];
1687       y[idy+9]  += x[9];
1688       y[idy+10] += x[10];
1689       x         += 11;
1690     }
1691     break;
1692 #if !defined(PETSC_USE_COMPLEX)
1693   case MAX_VALUES:
1694     for (i=0; i<n; i++) {
1695       idy       = *indicesy++;
1696       y[idy]    = PetscMax(y[idy],x[0]);
1697       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1698       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1699       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1700       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1701       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1702       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1703       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1704       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1705       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1706       y[idy+10] = PetscMax(y[idy+10],x[10]);
1707       x        += 11;
1708     }
1709 #else
1710   case MAX_VALUES:
1711 #endif
1712   case NOT_SET_VALUES:
1713     break;
1714   default:
1715     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
Scatter_11(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1720 PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1721 {
1722   PetscInt i,idx,idy;
1723 
1724   PetscFunctionBegin;
1725   switch (addv) {
1726   case INSERT_VALUES:
1727   case INSERT_ALL_VALUES:
1728     for (i=0; i<n; i++) {
1729       idx       = *indicesx++;
1730       idy       = *indicesy++;
1731       y[idy]    = x[idx];
1732       y[idy+1]  = x[idx+1];
1733       y[idy+2]  = x[idx+2];
1734       y[idy+3]  = x[idx+3];
1735       y[idy+4]  = x[idx+4];
1736       y[idy+5]  = x[idx+5];
1737       y[idy+6]  = x[idx+6];
1738       y[idy+7]  = x[idx+7];
1739       y[idy+8]  = x[idx+8];
1740       y[idy+9]  = x[idx+9];
1741       y[idy+10] = x[idx+10];
1742     }
1743     break;
1744   case ADD_VALUES:
1745   case ADD_ALL_VALUES:
1746     for (i=0; i<n; i++) {
1747       idx        = *indicesx++;
1748       idy        = *indicesy++;
1749       y[idy]    += x[idx];
1750       y[idy+1]  += x[idx+1];
1751       y[idy+2]  += x[idx+2];
1752       y[idy+3]  += x[idx+3];
1753       y[idy+4]  += x[idx+4];
1754       y[idy+5]  += x[idx+5];
1755       y[idy+6]  += x[idx+6];
1756       y[idy+7]  += x[idx+7];
1757       y[idy+8]  += x[idx+8];
1758       y[idy+9]  += x[idx+9];
1759       y[idy+10] += x[idx+10];
1760     }
1761     break;
1762 #if !defined(PETSC_USE_COMPLEX)
1763   case MAX_VALUES:
1764     for (i=0; i<n; i++) {
1765       idx       = *indicesx++;
1766       idy       = *indicesy++;
1767       y[idy]    = PetscMax(y[idy],x[idx]);
1768       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1769       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1770       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1771       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1772       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1773       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1774       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1775       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1776       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1777       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1778     }
1779 #else
1780   case MAX_VALUES:
1781 #endif
1782   case NOT_SET_VALUES:
1783     break;
1784   default:
1785     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1786   }
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 /* ----------------------------------------------------------------------------------------------- */
Pack_12(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1791 PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1792 {
1793   PetscInt i,idx;
1794 
1795   for (i=0; i<n; i++) {
1796     idx   = *indicesx++;
1797     y[0]  = x[idx];
1798     y[1]  = x[idx+1];
1799     y[2]  = x[idx+2];
1800     y[3]  = x[idx+3];
1801     y[4]  = x[idx+4];
1802     y[5]  = x[idx+5];
1803     y[6]  = x[idx+6];
1804     y[7]  = x[idx+7];
1805     y[8]  = x[idx+8];
1806     y[9]  = x[idx+9];
1807     y[10] = x[idx+10];
1808     y[11] = x[idx+11];
1809     y    += 12;
1810   }
1811 }
1812 
UnPack_12(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1813 PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1814 {
1815   PetscInt i,idy;
1816 
1817   PetscFunctionBegin;
1818   switch (addv) {
1819   case INSERT_VALUES:
1820   case INSERT_ALL_VALUES:
1821     for (i=0; i<n; i++) {
1822       idy       = *indicesy++;
1823       y[idy]    = x[0];
1824       y[idy+1]  = x[1];
1825       y[idy+2]  = x[2];
1826       y[idy+3]  = x[3];
1827       y[idy+4]  = x[4];
1828       y[idy+5]  = x[5];
1829       y[idy+6]  = x[6];
1830       y[idy+7]  = x[7];
1831       y[idy+8]  = x[8];
1832       y[idy+9]  = x[9];
1833       y[idy+10] = x[10];
1834       y[idy+11] = x[11];
1835       x        += 12;
1836     }
1837     break;
1838   case ADD_VALUES:
1839   case ADD_ALL_VALUES:
1840     for (i=0; i<n; i++) {
1841       idy        = *indicesy++;
1842       y[idy]    += x[0];
1843       y[idy+1]  += x[1];
1844       y[idy+2]  += x[2];
1845       y[idy+3]  += x[3];
1846       y[idy+4]  += x[4];
1847       y[idy+5]  += x[5];
1848       y[idy+6]  += x[6];
1849       y[idy+7]  += x[7];
1850       y[idy+8]  += x[8];
1851       y[idy+9]  += x[9];
1852       y[idy+10] += x[10];
1853       y[idy+11] += x[11];
1854       x         += 12;
1855     }
1856     break;
1857 #if !defined(PETSC_USE_COMPLEX)
1858   case MAX_VALUES:
1859     for (i=0; i<n; i++) {
1860       idy       = *indicesy++;
1861       y[idy]    = PetscMax(y[idy],x[0]);
1862       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1863       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1864       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1865       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1866       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1867       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1868       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1869       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1870       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1871       y[idy+10] = PetscMax(y[idy+10],x[10]);
1872       y[idy+11] = PetscMax(y[idy+11],x[11]);
1873       x        += 12;
1874     }
1875 #else
1876   case MAX_VALUES:
1877 #endif
1878   case NOT_SET_VALUES:
1879     break;
1880   default:
1881     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1882   }
1883   PetscFunctionReturn(0);
1884 }
1885 
Scatter_12(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1886 PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1887 {
1888   PetscInt i,idx,idy;
1889 
1890   PetscFunctionBegin;
1891   switch (addv) {
1892   case INSERT_VALUES:
1893   case INSERT_ALL_VALUES:
1894     for (i=0; i<n; i++) {
1895       idx       = *indicesx++;
1896       idy       = *indicesy++;
1897       y[idy]    = x[idx];
1898       y[idy+1]  = x[idx+1];
1899       y[idy+2]  = x[idx+2];
1900       y[idy+3]  = x[idx+3];
1901       y[idy+4]  = x[idx+4];
1902       y[idy+5]  = x[idx+5];
1903       y[idy+6]  = x[idx+6];
1904       y[idy+7]  = x[idx+7];
1905       y[idy+8]  = x[idx+8];
1906       y[idy+9]  = x[idx+9];
1907       y[idy+10] = x[idx+10];
1908       y[idy+11] = x[idx+11];
1909     }
1910     break;
1911   case ADD_VALUES:
1912   case ADD_ALL_VALUES:
1913     for (i=0; i<n; i++) {
1914       idx        = *indicesx++;
1915       idy        = *indicesy++;
1916       y[idy]    += x[idx];
1917       y[idy+1]  += x[idx+1];
1918       y[idy+2]  += x[idx+2];
1919       y[idy+3]  += x[idx+3];
1920       y[idy+4]  += x[idx+4];
1921       y[idy+5]  += x[idx+5];
1922       y[idy+6]  += x[idx+6];
1923       y[idy+7]  += x[idx+7];
1924       y[idy+8]  += x[idx+8];
1925       y[idy+9]  += x[idx+9];
1926       y[idy+10] += x[idx+10];
1927       y[idy+11] += x[idx+11];
1928     }
1929     break;
1930 #if !defined(PETSC_USE_COMPLEX)
1931   case MAX_VALUES:
1932     for (i=0; i<n; i++) {
1933       idx       = *indicesx++;
1934       idy       = *indicesy++;
1935       y[idy]    = PetscMax(y[idy],x[idx]);
1936       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1937       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1938       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1939       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1940       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1941       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1942       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1943       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1944       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1945       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1946       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1947     }
1948 #else
1949   case MAX_VALUES:
1950 #endif
1951   case NOT_SET_VALUES:
1952     break;
1953   default:
1954     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1955   }
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 /* ----------------------------------------------------------------------------------------------- */
Pack_bs(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,PetscScalar * y,PetscInt bs)1960 PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1961 {
1962   PetscInt       i,idx;
1963   PetscErrorCode ierr;
1964 
1965   for (i=0; i<n; i++) {
1966     idx   = *indicesx++;
1967     ierr = PetscArraycpy(y,x + idx,bs);CHKERRV(ierr);
1968     y    += bs;
1969   }
1970 }
1971 
UnPack_bs(PetscInt n,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)1972 PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1973 {
1974   PetscInt i,idy,j;
1975   PetscErrorCode ierr;
1976 
1977   PetscFunctionBegin;
1978   switch (addv) {
1979   case INSERT_VALUES:
1980   case INSERT_ALL_VALUES:
1981     for (i=0; i<n; i++) {
1982       idy       = *indicesy++;
1983       ierr = PetscArraycpy(y + idy,x,bs);CHKERRQ(ierr);
1984       x        += bs;
1985     }
1986     break;
1987   case ADD_VALUES:
1988   case ADD_ALL_VALUES:
1989     for (i=0; i<n; i++) {
1990       idy        = *indicesy++;
1991       for (j=0; j<bs; j++) y[idy+j] += x[j];
1992       x         += bs;
1993     }
1994     break;
1995 #if !defined(PETSC_USE_COMPLEX)
1996   case MAX_VALUES:
1997     for (i=0; i<n; i++) {
1998       idy = *indicesy++;
1999       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2000       x  += bs;
2001     }
2002 #else
2003   case MAX_VALUES:
2004 #endif
2005   case NOT_SET_VALUES:
2006     break;
2007   default:
2008     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
Scatter_bs(PetscInt n,const PetscInt * indicesx,const PetscScalar * x,const PetscInt * indicesy,PetscScalar * y,InsertMode addv,PetscInt bs)2013 PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2014 {
2015   PetscInt i,idx,idy,j;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   switch (addv) {
2020   case INSERT_VALUES:
2021   case INSERT_ALL_VALUES:
2022     for (i=0; i<n; i++) {
2023       idx       = *indicesx++;
2024       idy       = *indicesy++;
2025       ierr = PetscArraycpy(y + idy, x + idx,bs);CHKERRQ(ierr);
2026     }
2027     break;
2028   case ADD_VALUES:
2029   case ADD_ALL_VALUES:
2030     for (i=0; i<n; i++) {
2031       idx        = *indicesx++;
2032       idy        = *indicesy++;
2033       for (j=0; j<bs; j++)  y[idy+j] += x[idx+j];
2034     }
2035     break;
2036 #if !defined(PETSC_USE_COMPLEX)
2037   case MAX_VALUES:
2038     for (i=0; i<n; i++) {
2039       idx       = *indicesx++;
2040       idy       = *indicesy++;
2041       for (j=0; j<bs; j++)  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2042     }
2043 #else
2044   case MAX_VALUES:
2045 #endif
2046   case NOT_SET_VALUES:
2047     break;
2048   default:
2049     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2050   }
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 /* Create the VecScatterBegin/End_P for our chosen block sizes */
2055 #define BS 1
2056 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2057 #define BS 2
2058 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2059 #define BS 3
2060 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2061 #define BS 4
2062 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2063 #define BS 5
2064 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2065 #define BS 6
2066 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2067 #define BS 7
2068 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2069 #define BS 8
2070 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2071 #define BS 9
2072 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2073 #define BS 10
2074 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2075 #define BS 11
2076 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2077 #define BS 12
2078 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2079 #define BS bs
2080 #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2081 
2082 /* ==========================================================================================*/
2083 /*              create parallel to sequential scatter context                                */
2084 
2085 extern PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2086 
2087 /*
2088    bs indicates how many elements there are in each block. Normally this would be 1.
2089 
2090    contains check that PetscMPIInt can handle the sizes needed
2091 */
2092 #include <petscbt.h>
VecScatterCreateLocal_PtoS_MPI3(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2093 PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2094 {
2095   VecScatter_MPI_General *from,*to;
2096   PetscMPIInt            size,rank,mrank,imdex,tag,n;
2097   PetscInt               *source = NULL,*owners = NULL,nxr;
2098   PetscInt               *lowner = NULL,*start = NULL,*lsharedowner = NULL,*sharedstart = NULL,lengthy,lengthx;
2099   PetscMPIInt            *nprocs = NULL,nrecvs,nrecvshared;
2100   PetscInt               i,j,idx = 0,nsends;
2101   PetscMPIInt            *owner = NULL;
2102   PetscInt               *starts = NULL,count,slen,slenshared;
2103   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2104   PetscMPIInt            *onodes1,*olengths1;
2105   MPI_Comm               comm;
2106   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2107   MPI_Status             recv_status,*send_status;
2108   PetscErrorCode         ierr;
2109   PetscShmComm           scomm;
2110   PetscMPIInt            jj;
2111   MPI_Info               info;
2112   MPI_Comm               mscomm;
2113   MPI_Win                sharedoffsetwin;  /* Window that owns sharedspaceoffset */
2114   PetscInt               *sharedspaceoffset;
2115   VecScatterType         type;
2116   PetscBool              mpi3node;
2117   PetscFunctionBegin;
2118   ierr   = PetscObjectGetNewTag((PetscObject)ctx,&tag);CHKERRQ(ierr);
2119   ierr   = PetscObjectGetComm((PetscObject)xin,&comm);CHKERRQ(ierr);
2120   ierr   = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2121   ierr   = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2122 
2123   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
2124   ierr = PetscStrcmp(type,"mpi3node",&mpi3node);CHKERRQ(ierr);
2125 
2126   ierr = PetscShmCommGet(comm,&scomm);CHKERRQ(ierr);
2127   ierr = PetscShmCommGetMpiShmComm(scomm,&mscomm);CHKERRQ(ierr);
2128 
2129   ierr = MPI_Info_create(&info);CHKERRQ(ierr);
2130   ierr = MPI_Info_set(info, "alloc_shared_noncontig", "true");CHKERRQ(ierr);
2131 
2132   if (mpi3node) {
2133     /* Check if (parallel) inidx has duplicate indices.
2134      Current VecScatterEndMPI3Node() (case StoP) writes the sequential vector to the parallel vector.
2135      Writing to the same shared location without variable locking
2136      leads to incorrect scattering. See src/vec/vscat/examples/runex2_5 and runex3_5 */
2137 
2138     PetscInt    *mem,**optr;
2139     MPI_Win     swin;
2140     PetscMPIInt msize;
2141 
2142     ierr = MPI_Comm_size(mscomm,&msize);CHKERRQ(ierr);
2143     ierr = MPI_Comm_rank(mscomm,&mrank);CHKERRQ(ierr);
2144 
2145     ierr = PetscMalloc1(msize,&optr);CHKERRQ(ierr);
2146     ierr = MPIU_Win_allocate_shared((nx+1)*sizeof(PetscInt),sizeof(PetscInt),MPI_INFO_NULL,mscomm,&mem,&swin);CHKERRQ(ierr);
2147 
2148     /* Write local nx and inidx into mem */
2149     mem[0] = nx;
2150     for (i=1; i<=nx; i++) mem[i] = inidx[i-1];
2151     ierr = MPI_Barrier(mscomm);CHKERRQ(ierr); /* sync shared memory */
2152 
2153     if (!mrank) {
2154       PetscBT        table;
2155       /* sz and dsp_unit are not used. Replacing them with NULL would cause MPI_Win_shared_query() crash */
2156       MPI_Aint       sz;
2157       PetscMPIInt    dsp_unit;
2158       PetscInt       N = xin->map->N;
2159 
2160       ierr = PetscBTCreate(N,&table);CHKERRQ(ierr); /* may not be scalable */
2161       ierr = PetscBTMemzero(N,table);CHKERRQ(ierr);
2162 
2163       jj = 0;
2164       while (jj<msize && !ctx->is_duplicate) {
2165         if (jj == mrank) {jj++; continue;}
2166         ierr = MPIU_Win_shared_query(swin,jj,&sz,&dsp_unit,&optr[jj]);CHKERRQ(ierr);
2167         for (j=1; j<=optr[jj][0]; j++) { /* optr[jj][0] = nx */
2168           if (PetscBTLookupSet(table,optr[jj][j])) {
2169             ctx->is_duplicate = PETSC_TRUE; /* only mrank=0 has this info., will be checked in VecScatterEndMPI3Node() */
2170             break;
2171           }
2172         }
2173         jj++;
2174       }
2175       ierr = PetscBTDestroy(&table);CHKERRQ(ierr);
2176     }
2177 
2178     if (swin != MPI_WIN_NULL) {ierr = MPI_Win_free(&swin);CHKERRQ(ierr);}
2179     ierr = PetscFree(optr);CHKERRQ(ierr);
2180   }
2181 
2182   owners = xin->map->range;
2183   ierr   = VecGetSize(yin,&lengthy);CHKERRQ(ierr);
2184   ierr   = VecGetSize(xin,&lengthx);CHKERRQ(ierr);
2185 
2186   /*  first count number of contributors to each processor */
2187   ierr = PetscMalloc2(size,&nprocs,nx,&owner);CHKERRQ(ierr);
2188   ierr = PetscArrayzero(nprocs,size);CHKERRQ(ierr);
2189 
2190   j      = 0;
2191   nsends = 0;
2192   for (i=0; i<nx; i++) {
2193     ierr = PetscIntMultError(bs,inidx[i],&idx);CHKERRQ(ierr);
2194     if (idx < owners[j]) j = 0;
2195     for (; j<size; j++) {
2196       if (idx < owners[j+1]) {
2197         if (!nprocs[j]++) nsends++;
2198         owner[i] = j;
2199         break;
2200       }
2201     }
2202     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2203   }
2204 
2205   nprocslocal  = nprocs[rank];
2206   nprocs[rank] = 0;
2207   if (nprocslocal) nsends--;
2208   /* inform other processors of number of messages and max length*/
2209   ierr = PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);CHKERRQ(ierr);
2210   ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
2211   ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
2212   recvtotal = 0;
2213   for (i=0; i<nrecvs; i++) {
2214     ierr = PetscIntSumError(recvtotal,olengths1[i],&recvtotal);CHKERRQ(ierr);
2215   }
2216 
2217   /* post receives:   */
2218   ierr  = PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);CHKERRQ(ierr);
2219   count = 0;
2220   for (i=0; i<nrecvs; i++) {
2221     ierr   = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
2222     count += olengths1[i];
2223   }
2224 
2225   /* do sends:
2226      1) starts[i] gives the starting index in svalues for stuff going to
2227      the ith processor
2228   */
2229   nxr = 0;
2230   for (i=0; i<nx; i++) {
2231     if (owner[i] != rank) nxr++;
2232   }
2233   ierr = PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);CHKERRQ(ierr);
2234 
2235   starts[0]  = 0;
2236   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2237   for (i=0; i<nx; i++) {
2238     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2239   }
2240   starts[0] = 0;
2241   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2242   count = 0;
2243   for (i=0; i<size; i++) {
2244     if (nprocs[i]) {
2245       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
2246     }
2247   }
2248 
2249   /*  wait on receives; this is everyone who we need to deliver data to */
2250   count = nrecvs;
2251   slen  = 0;
2252   nrecvshared = 0;
2253   slenshared  = 0;
2254   while (count) {
2255     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
2256     /* unpack receives into our local space */
2257     ierr  = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
2258     ierr = PetscShmCommGlobalToLocal(scomm,onodes1[imdex],&jj);CHKERRQ(ierr);
2259     if (jj > -1) {
2260       ierr = PetscInfo3(NULL,"[%d] Sending values to shared memory partner %d,global rank %d\n",rank,jj,onodes1[imdex]);CHKERRQ(ierr);
2261       nrecvshared++;
2262       slenshared += n;
2263     }
2264     slen += n;
2265     count--;
2266   }
2267 
2268   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2269 
2270   /* allocate entire send scatter context */
2271   ierr          = PetscNewLog(ctx,&to);CHKERRQ(ierr);
2272   to->sharedwin = MPI_WIN_NULL;
2273   to->n         = nrecvs-nrecvshared;
2274 
2275   ierr = PetscMalloc1(nrecvs-nrecvshared,&to->requests);CHKERRQ(ierr);
2276   ierr = PetscMalloc4(bs*(slen-slenshared),&to->values,slen-slenshared,&to->indices,nrecvs-nrecvshared+1,&to->starts,nrecvs-nrecvshared,&to->procs);CHKERRQ(ierr);
2277   ierr = PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);CHKERRQ(ierr);
2278 
2279   ierr = MPI_Comm_size(mscomm,&to->msize);CHKERRQ(ierr);
2280   ierr = PetscMalloc1(slenshared,&to->sharedspaceindices);CHKERRQ(ierr);
2281   ierr = PetscCalloc1(to->msize+1,&to->sharedspacestarts);CHKERRQ(ierr);
2282 
2283   ctx->todata              = (void*)to;
2284   to->starts[0]            = 0;
2285   to->sharedspacestarts[0] = 0;
2286 
2287   /* Allocate shared memory space for shared memory partner communication */
2288   ierr = MPIU_Win_allocate_shared(to->msize*sizeof(PetscInt),sizeof(PetscInt),info,mscomm,&sharedspaceoffset,&sharedoffsetwin);CHKERRQ(ierr);
2289   for (i=0; i<to->msize; i++) sharedspaceoffset[i] = -1; /* mark with -1 for later error checking */
2290   if (nrecvs) {
2291     /* move the data into the send scatter */
2292     base     = owners[rank];
2293     rsvalues = rvalues;
2294     to->n    = 0;
2295     for (i=0; i<nrecvs; i++) {
2296       values = rsvalues;
2297       ierr = PetscShmCommGlobalToLocal(scomm,(PetscMPIInt)onodes1[i],&jj);CHKERRQ(ierr);
2298       if (jj > -1) {
2299         to->sharedspacestarts[jj]   = to->sharedcnt;
2300         to->sharedspacestarts[jj+1] = to->sharedcnt + olengths1[i];
2301         for (j=0; j<olengths1[i]; j++) to->sharedspaceindices[to->sharedcnt + j] = values[j] - base;
2302         sharedspaceoffset[jj]      = to->sharedcnt;
2303         to->sharedcnt             += olengths1[i];
2304         ierr = PetscInfo4(NULL,"[%d] Sending %d values to shared memory partner %d,global rank %d\n",rank,olengths1[i],jj,onodes1[i]);CHKERRQ(ierr);
2305       } else {
2306         to->starts[to->n+1] = to->starts[to->n] + olengths1[i];
2307         to->procs[to->n]    = onodes1[i];
2308         for (j=0; j<olengths1[i]; j++) to->indices[to->starts[to->n] + j] = values[j] - base;
2309         to->n++;
2310       }
2311       rsvalues += olengths1[i];
2312     }
2313   }
2314   ierr = PetscFree(olengths1);CHKERRQ(ierr);
2315   ierr = PetscFree(onodes1);CHKERRQ(ierr);
2316   ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr);
2317 
2318   if (mpi3node) {
2319     /* to->sharedspace is used only as a flag */
2320     i = 0;
2321     if (to->sharedcnt) i = 1;
2322     ierr = MPIU_Win_allocate_shared(i*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);CHKERRQ(ierr);
2323   } else { /* mpi3 */
2324     ierr = MPIU_Win_allocate_shared(bs*to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);CHKERRQ(ierr);
2325   }
2326   if (to->sharedwin == MPI_WIN_NULL) SETERRQ(PETSC_COMM_SELF,100,"what the");
2327   ierr = MPI_Info_free(&info);CHKERRQ(ierr);
2328 
2329   /* allocate entire receive scatter context */
2330   ierr = PetscNewLog(ctx,&from);CHKERRQ(ierr);
2331   from->sharedwin       = MPI_WIN_NULL;
2332   from->msize           = to->msize;
2333   /* we don't actually know the correct value for from->n at this point so we use the upper bound */
2334   from->n = nsends;
2335 
2336   ierr = PetscMalloc1(nsends,&from->requests);CHKERRQ(ierr);
2337   ierr = PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);CHKERRQ(ierr);
2338   ctx->fromdata = (void*)from;
2339 
2340   ierr  = PetscCalloc1(to->msize+1,&from->sharedspacestarts);CHKERRQ(ierr);
2341   /* move data into receive scatter */
2342   ierr = PetscMalloc2(size,&lowner,nsends+1,&start);CHKERRQ(ierr);
2343   ierr = PetscMalloc2(size,&lsharedowner,to->msize+1,&sharedstart);CHKERRQ(ierr);
2344   count = 0; from->starts[0] = start[0] = 0;
2345   from->sharedcnt = 0;
2346   for (i=0; i<size; i++) {
2347     lsharedowner[i] = -1;
2348     lowner[i]       = -1;
2349     if (nprocs[i]) {
2350       ierr = PetscShmCommGlobalToLocal(scomm,i,&jj);CHKERRQ(ierr);
2351       if (jj > -1) {
2352         from->sharedspacestarts[jj] = sharedstart[jj] = from->sharedcnt;
2353         from->sharedspacestarts[jj+1] = sharedstart[jj+1] = from->sharedspacestarts[jj] + nprocs[i];
2354         from->sharedcnt += nprocs[i];
2355         lsharedowner[i] = jj;
2356       } else {
2357         lowner[i]            = count++;
2358         from->procs[count-1]  = i;
2359         from->starts[count] = start[count] = start[count-1] + nprocs[i];
2360       }
2361     }
2362   }
2363   from->n = count;
2364 
2365   for (i=0; i<nx; i++) {
2366     if (owner[i] != rank && lowner[owner[i]] > -1) {
2367       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2368       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2369     }
2370   }
2371 
2372   /* copy over appropriate parts of inidy[] into from->sharedspaceindices */
2373   if (mpi3node) {
2374     /* in addition, copy over appropriate parts of inidx[] into 2nd part of from->sharedspaceindices */
2375     PetscInt *sharedspaceindicesx;
2376     ierr = PetscMalloc1(2*from->sharedcnt,&from->sharedspaceindices);CHKERRQ(ierr);
2377     sharedspaceindicesx = from->sharedspaceindices + from->sharedcnt;
2378     for (i=0; i<nx; i++) {
2379       if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2380         if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2381 
2382         from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]] = bs*inidy[i];
2383         sharedspaceindicesx[sharedstart[lsharedowner[owner[i]]]] = bs*inidx[i]-owners[owner[i]]; /* local inidx */
2384         sharedstart[lsharedowner[owner[i]]]++;
2385       }
2386     }
2387   } else { /* mpi3 */
2388     ierr = PetscMalloc1(from->sharedcnt,&from->sharedspaceindices);CHKERRQ(ierr);
2389     for (i=0; i<nx; i++) {
2390       if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2391         if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2392         from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]++] = bs*inidy[i];
2393       }
2394     }
2395   }
2396 
2397   ierr = PetscFree2(lowner,start);CHKERRQ(ierr);
2398   ierr = PetscFree2(lsharedowner,sharedstart);CHKERRQ(ierr);
2399   ierr = PetscFree2(nprocs,owner);CHKERRQ(ierr);
2400 
2401   /* wait on sends */
2402   if (nsends) {
2403     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
2404     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
2405     ierr = PetscFree(send_status);CHKERRQ(ierr);
2406   }
2407   ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr);
2408 
2409   if (nprocslocal) {
2410     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2411     /* we have a scatter to ourselves */
2412     ierr = PetscMalloc1(nt,&to->local.vslots);CHKERRQ(ierr);
2413     ierr = PetscMalloc1(nt,&from->local.vslots);CHKERRQ(ierr);
2414     nt   = 0;
2415     for (i=0; i<nx; i++) {
2416       idx = bs*inidx[i];
2417       if (idx >= owners[rank] && idx < owners[rank+1]) {
2418         to->local.vslots[nt]     = idx - owners[rank];
2419         from->local.vslots[nt++] = bs*inidy[i];
2420         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2421       }
2422     }
2423     ierr = PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));CHKERRQ(ierr);
2424   } else {
2425     from->local.n      = 0;
2426     from->local.vslots = NULL;
2427     to->local.n        = 0;
2428     to->local.vslots   = NULL;
2429   }
2430 
2431   /* Get the shared memory address for all processes we will be copying data from */
2432   ierr = PetscCalloc1(to->msize,&from->sharedspaces);CHKERRQ(ierr);
2433   ierr = PetscCalloc1(to->msize,&from->sharedspacesoffset);CHKERRQ(ierr);
2434   ierr = MPI_Comm_rank(mscomm,&mrank);CHKERRQ(ierr);
2435   for (jj=0; jj<to->msize; jj++) {
2436     MPI_Aint    isize;
2437     PetscMPIInt disp_unit;
2438     PetscInt    *ptr;
2439     ierr = MPIU_Win_shared_query(to->sharedwin,jj,&isize,&disp_unit,&from->sharedspaces[jj]);CHKERRQ(ierr);
2440     ierr = MPIU_Win_shared_query(sharedoffsetwin,jj,&isize,&disp_unit,&ptr);CHKERRQ(ierr);
2441     from->sharedspacesoffset[jj] = ptr[mrank];
2442   }
2443   ierr = MPI_Win_free(&sharedoffsetwin);CHKERRQ(ierr);
2444 
2445   if (mpi3node && !from->sharedspace) {
2446     /* comput from->notdone to be used by VecScatterEndMPI3Node() */
2447     PetscInt notdone = 0;
2448     for (i=0; i<from->msize; i++) {
2449       if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
2450         notdone++;
2451       }
2452     }
2453     from->notdone = notdone;
2454   }
2455 
2456   from->local.nonmatching_computed = PETSC_FALSE;
2457   from->local.n_nonmatching        = 0;
2458   from->local.slots_nonmatching    = NULL;
2459   to->local.nonmatching_computed   = PETSC_FALSE;
2460   to->local.n_nonmatching          = 0;
2461   to->local.slots_nonmatching      = NULL;
2462 
2463   from->format = VEC_SCATTER_MPI_GENERAL;
2464   to->format   = VEC_SCATTER_MPI_GENERAL;
2465   from->bs     = bs;
2466   to->bs       = bs;
2467 
2468   ierr = VecScatterCreateCommon_PtoS_MPI3(from,to,ctx);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 /*
2473    bs indicates how many elements there are in each block. Normally this would be 1.
2474 */
VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General * from,VecScatter_MPI_General * to,VecScatter ctx)2475 PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2476 {
2477   MPI_Comm       comm;
2478   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2479   PetscInt       bs   = to->bs;
2480   PetscMPIInt    size;
2481   PetscInt       i, n;
2482   PetscErrorCode ierr;
2483   VecScatterType type;
2484   PetscBool      mpi3node;
2485 
2486   PetscFunctionBegin;
2487   ierr = PetscObjectGetComm((PetscObject)ctx,&comm);CHKERRQ(ierr);
2488   ierr = PetscObjectGetNewTag((PetscObject)ctx,&tagr);CHKERRQ(ierr);
2489   ctx->ops->destroy = VecScatterDestroy_PtoP_MPI3;
2490 
2491   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2492   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2493   to->contiq = PETSC_FALSE;
2494   n = from->starts[from->n];
2495   from->contiq = PETSC_TRUE;
2496   for (i=1; i<n; i++) {
2497     if (from->indices[i] != from->indices[i-1] + bs) {
2498       from->contiq = PETSC_FALSE;
2499       break;
2500     }
2501   }
2502 
2503   ctx->ops->copy      = VecScatterCopy_PtoP_AllToAll;
2504   {
2505     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2506     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2507     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2508     MPI_Request *rev_swaits,*rev_rwaits;
2509     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2510 
2511     /* allocate additional wait variables for the "reverse" scatter */
2512     ierr = PetscMalloc1(to->n,&rev_rwaits);CHKERRQ(ierr);
2513     ierr = PetscMalloc1(from->n,&rev_swaits);CHKERRQ(ierr);
2514     to->rev_requests   = rev_rwaits;
2515     from->rev_requests = rev_swaits;
2516 
2517     for (i=0; i<from->n; i++) {
2518       ierr = MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);CHKERRQ(ierr);
2519     }
2520 
2521     for (i=0; i<to->n; i++) {
2522       ierr = MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
2523     }
2524     /* Register receives for scatter and reverse */
2525     for (i=0; i<from->n; i++) {
2526       ierr = MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
2527     }
2528     for (i=0; i<to->n; i++) {
2529       ierr = MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);CHKERRQ(ierr);
2530     }
2531     ctx->ops->copy = VecScatterCopy_PtoP_X;
2532   }
2533   ierr = PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);CHKERRQ(ierr);
2534 
2535   if (PetscDefined(USE_DEBUG)) {
2536     ierr = MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
2537     ierr = MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));CHKERRQ(ierr);
2538     if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2539   }
2540 
2541   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
2542   ierr = PetscStrcmp(type,"mpi3node",&mpi3node);CHKERRQ(ierr);
2543 
2544   if (mpi3node) {
2545     switch (bs) {
2546     case 12:
2547       ctx->ops->begin = VecScatterBeginMPI3Node_12;
2548       ctx->ops->end   = VecScatterEndMPI3Node_12;
2549       break;
2550     case 11:
2551       ctx->ops->begin = VecScatterBeginMPI3Node_11;
2552       ctx->ops->end   = VecScatterEndMPI3Node_11;
2553       break;
2554     case 10:
2555       ctx->ops->begin = VecScatterBeginMPI3Node_10;
2556       ctx->ops->end   = VecScatterEndMPI3Node_10;
2557       break;
2558     case 9:
2559       ctx->ops->begin = VecScatterBeginMPI3Node_9;
2560       ctx->ops->end   = VecScatterEndMPI3Node_9;
2561       break;
2562     case 8:
2563       ctx->ops->begin = VecScatterBeginMPI3Node_8;
2564       ctx->ops->end   = VecScatterEndMPI3Node_8;
2565       break;
2566     case 7:
2567       ctx->ops->begin = VecScatterBeginMPI3Node_7;
2568       ctx->ops->end   = VecScatterEndMPI3Node_7;
2569       break;
2570     case 6:
2571       ctx->ops->begin = VecScatterBeginMPI3Node_6;
2572       ctx->ops->end   = VecScatterEndMPI3Node_6;
2573       break;
2574     case 5:
2575       ctx->ops->begin = VecScatterBeginMPI3Node_5;
2576       ctx->ops->end   = VecScatterEndMPI3Node_5;
2577       break;
2578     case 4:
2579       ctx->ops->begin = VecScatterBeginMPI3Node_4;
2580       ctx->ops->end   = VecScatterEndMPI3Node_4;
2581       break;
2582     case 3:
2583       ctx->ops->begin = VecScatterBeginMPI3Node_3;
2584       ctx->ops->end   = VecScatterEndMPI3Node_3;
2585       break;
2586     case 2:
2587       ctx->ops->begin = VecScatterBeginMPI3Node_2;
2588       ctx->ops->end   = VecScatterEndMPI3Node_2;
2589       break;
2590     case 1:
2591       ctx->ops->begin = VecScatterBeginMPI3Node_1;
2592       ctx->ops->end   = VecScatterEndMPI3Node_1;
2593       break;
2594     default:
2595       ctx->ops->begin = VecScatterBegin_bs;
2596       ctx->ops->end   = VecScatterEnd_bs;
2597     }
2598   } else { /* !mpi3node */
2599 
2600     switch (bs) {
2601     case 12:
2602       ctx->ops->begin = VecScatterBegin_12;
2603       ctx->ops->end   = VecScatterEnd_12;
2604       break;
2605     case 11:
2606       ctx->ops->begin = VecScatterBegin_11;
2607       ctx->ops->end   = VecScatterEnd_11;
2608       break;
2609     case 10:
2610       ctx->ops->begin = VecScatterBegin_10;
2611       ctx->ops->end   = VecScatterEnd_10;
2612       break;
2613     case 9:
2614       ctx->ops->begin = VecScatterBegin_9;
2615       ctx->ops->end   = VecScatterEnd_9;
2616       break;
2617     case 8:
2618       ctx->ops->begin = VecScatterBegin_8;
2619       ctx->ops->end   = VecScatterEnd_8;
2620       break;
2621     case 7:
2622       ctx->ops->begin = VecScatterBegin_7;
2623       ctx->ops->end   = VecScatterEnd_7;
2624       break;
2625     case 6:
2626       ctx->ops->begin = VecScatterBegin_6;
2627       ctx->ops->end   = VecScatterEnd_6;
2628       break;
2629     case 5:
2630       ctx->ops->begin = VecScatterBegin_5;
2631       ctx->ops->end   = VecScatterEnd_5;
2632       break;
2633     case 4:
2634       ctx->ops->begin = VecScatterBegin_4;
2635       ctx->ops->end   = VecScatterEnd_4;
2636       break;
2637     case 3:
2638       ctx->ops->begin = VecScatterBegin_3;
2639       ctx->ops->end   = VecScatterEnd_3;
2640       break;
2641     case 2:
2642       ctx->ops->begin = VecScatterBegin_2;
2643       ctx->ops->end   = VecScatterEnd_2;
2644       break;
2645     case 1:
2646       if (mpi3node) {
2647         ctx->ops->begin = VecScatterBeginMPI3Node_1;
2648         ctx->ops->end   = VecScatterEndMPI3Node_1;
2649       } else {
2650         ctx->ops->begin = VecScatterBegin_1;
2651         ctx->ops->end   = VecScatterEnd_1;
2652       }
2653       break;
2654     default:
2655       ctx->ops->begin = VecScatterBegin_bs;
2656       ctx->ops->end   = VecScatterEnd_bs;
2657     }
2658   }
2659 
2660   ctx->ops->view = VecScatterView_MPI;
2661 
2662   /* try to optimize PtoP vecscatter with memcpy's */
2663   ierr = VecScatterMemcpyPlanCreate_PtoP(to,from);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 /* ------------------------------------------------------------------------------------*/
2668 /*
2669          Scatter from local Seq vectors to a parallel vector.
2670          Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2671          reverses the result.
2672 */
VecScatterCreateLocal_StoP_MPI3(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2673 PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2674 {
2675   PetscErrorCode         ierr;
2676   MPI_Request            *waits;
2677   VecScatter_MPI_General *to,*from;
2678 
2679   PetscFunctionBegin;
2680   ierr          = VecScatterCreateLocal_PtoS_MPI3(ny,inidy,nx,inidx,yin,xin,bs,ctx);CHKERRQ(ierr);
2681   to            = (VecScatter_MPI_General*)ctx->fromdata;
2682   from          = (VecScatter_MPI_General*)ctx->todata;
2683   ctx->todata   = (void*)to;
2684   ctx->fromdata = (void*)from;
2685   /* these two are special, they are ALWAYS stored in to struct */
2686   to->sstatus   = from->sstatus;
2687   to->rstatus   = from->rstatus;
2688 
2689   from->sstatus = NULL;
2690   from->rstatus = NULL;
2691   waits              = from->rev_requests;
2692   from->rev_requests = from->requests;
2693   from->requests     = waits;
2694   waits              = to->rev_requests;
2695   to->rev_requests   = to->requests;
2696   to->requests       = waits;
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 /* ---------------------------------------------------------------------------------*/
VecScatterCreateLocal_PtoP_MPI3(PetscInt nx,const PetscInt * inidx,PetscInt ny,const PetscInt * inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)2701 PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2702 {
2703   PetscErrorCode ierr;
2704   PetscMPIInt    size,rank,tag,imdex,n;
2705   PetscInt       *owners = xin->map->range;
2706   PetscMPIInt    *nprocs = NULL;
2707   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2708   PetscMPIInt    *owner   = NULL;
2709   PetscInt       *starts  = NULL,count,slen;
2710   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2711   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2712   MPI_Comm       comm;
2713   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2714   MPI_Status     recv_status,*send_status = NULL;
2715   PetscBool      duplicate = PETSC_FALSE;
2716   PetscBool      found = PETSC_FALSE;
2717 
2718   PetscFunctionBegin;
2719   ierr = PetscObjectGetNewTag((PetscObject)ctx,&tag);CHKERRQ(ierr);
2720   ierr = PetscObjectGetComm((PetscObject)xin,&comm);CHKERRQ(ierr);
2721   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2722   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2723   if (size == 1) {
2724     ierr = VecScatterCreateLocal_StoP_MPI3(nx,inidx,ny,inidy,xin,yin,bs,ctx);CHKERRQ(ierr);
2725     PetscFunctionReturn(0);
2726   }
2727 
2728   /*
2729      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2730      They then call the StoPScatterCreate()
2731   */
2732   /*  first count number of contributors to each processor */
2733   ierr = PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);CHKERRQ(ierr);
2734   ierr = PetscArrayzero(nprocs,size);CHKERRQ(ierr);
2735 
2736   lastidx = -1;
2737   j       = 0;
2738   for (i=0; i<nx; i++) {
2739     /* if indices are NOT locally sorted, need to start search at the beginning */
2740     if (lastidx > (idx = bs*inidx[i])) j = 0;
2741     lastidx = idx;
2742     for (; j<size; j++) {
2743       if (idx >= owners[j] && idx < owners[j+1]) {
2744         nprocs[j]++;
2745         owner[i] = j;
2746         found = PETSC_TRUE;
2747         break;
2748       }
2749     }
2750     if (PetscUnlikelyDebug(!found)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2751     found = PETSC_FALSE;
2752   }
2753   nsends = 0;
2754   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2755 
2756   /* inform other processors of number of messages and max length*/
2757   ierr = PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);CHKERRQ(ierr);
2758   ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
2759   ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
2760   recvtotal = 0;
2761   for (i=0; i<nrecvs; i++) {
2762     ierr = PetscIntSumError(recvtotal,olengths1[i],&recvtotal);CHKERRQ(ierr);
2763   }
2764 
2765   /* post receives:   */
2766   ierr = PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);CHKERRQ(ierr);
2767 
2768   count = 0;
2769   for (i=0; i<nrecvs; i++) {
2770     ierr = MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
2771     ierr = PetscIntSumError(count,olengths1[i],&count);CHKERRQ(ierr);
2772   }
2773   ierr = PetscFree(onodes1);CHKERRQ(ierr);
2774 
2775   /* do sends:
2776       1) starts[i] gives the starting index in svalues for stuff going to
2777          the ith processor
2778   */
2779   starts[0]= 0;
2780   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2781   for (i=0; i<nx; i++) {
2782     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2783     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2784   }
2785 
2786   starts[0] = 0;
2787   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2788   count = 0;
2789   for (i=0; i<size; i++) {
2790     if (nprocs[i]) {
2791       ierr = MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
2792       count++;
2793     }
2794   }
2795   ierr = PetscFree3(nprocs,owner,starts);CHKERRQ(ierr);
2796 
2797   /*  wait on receives */
2798   count = nrecvs;
2799   slen  = 0;
2800   while (count) {
2801     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
2802     /* unpack receives into our local space */
2803     ierr  = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
2804     slen += n/2;
2805     count--;
2806   }
2807   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2808 
2809   ierr     = PetscMalloc2(slen,&local_inidx,slen,&local_inidy);CHKERRQ(ierr);
2810   base     = owners[rank];
2811   count    = 0;
2812   rsvalues = rvalues;
2813   for (i=0; i<nrecvs; i++) {
2814     values    = rsvalues;
2815     rsvalues += 2*olengths1[i];
2816     for (j=0; j<olengths1[i]; j++) {
2817       local_inidx[count]   = values[2*j] - base;
2818       local_inidy[count++] = values[2*j+1];
2819     }
2820   }
2821   ierr = PetscFree(olengths1);CHKERRQ(ierr);
2822 
2823   /* wait on sends */
2824   if (nsends) {ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);}
2825   ierr = PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);CHKERRQ(ierr);
2826 
2827   /*
2828      should sort and remove duplicates from local_inidx,local_inidy
2829   */
2830 #if defined(do_it_slow)
2831   /* sort on the from index */
2832   ierr  = PetscSortIntWithArray(slen,local_inidx,local_inidy);CHKERRQ(ierr);
2833   start = 0;
2834   while (start < slen) {
2835     count = start+1;
2836     last  = local_inidx[start];
2837     while (count < slen && last == local_inidx[count]) count++;
2838     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2839       /* sort on to index */
2840       ierr = PetscSortInt(count-start,local_inidy+start);CHKERRQ(ierr);
2841     }
2842     /* remove duplicates; not most efficient way, but probably good enough */
2843     i = start;
2844     while (i < count-1) {
2845       if (local_inidy[i] != local_inidy[i+1]) i++;
2846       else { /* found a duplicate */
2847         duplicate = PETSC_TRUE;
2848         for (j=i; j<slen-1; j++) {
2849           local_inidx[j] = local_inidx[j+1];
2850           local_inidy[j] = local_inidy[j+1];
2851         }
2852         slen--;
2853         count--;
2854       }
2855     }
2856     start = count;
2857   }
2858 #endif
2859   if (duplicate) {
2860     ierr = PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");CHKERRQ(ierr);
2861   }
2862   ierr = VecScatterCreateLocal_StoP_MPI3(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);CHKERRQ(ierr);
2863   ierr = PetscFree2(local_inidx,local_inidy);CHKERRQ(ierr);
2864   PetscFunctionReturn(0);
2865 }
2866 
VecScatterSetUp_MPI3(VecScatter ctx)2867 PetscErrorCode VecScatterSetUp_MPI3(VecScatter ctx)
2868 {
2869   PetscErrorCode ierr;
2870 
2871   PetscFunctionBegin;
2872   ierr = VecScatterSetUp_vectype_private(ctx,VecScatterCreateLocal_PtoS_MPI3,VecScatterCreateLocal_StoP_MPI3,VecScatterCreateLocal_PtoP_MPI3);CHKERRQ(ierr);
2873   PetscFunctionReturn(0);
2874 }
2875 
VecScatterCreate_MPI3(VecScatter ctx)2876 PetscErrorCode VecScatterCreate_MPI3(VecScatter ctx)
2877 {
2878   PetscErrorCode    ierr;
2879 
2880   PetscFunctionBegin;
2881   ctx->ops->setup = VecScatterSetUp_MPI3;
2882   ierr = PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3);CHKERRQ(ierr);
2883   ierr = PetscInfo(ctx,"Using MPI3 for vector scatter\n");CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
VecScatterCreate_MPI3Node(VecScatter ctx)2887 PetscErrorCode VecScatterCreate_MPI3Node(VecScatter ctx)
2888 {
2889   PetscErrorCode    ierr;
2890 
2891   PetscFunctionBegin;
2892   ctx->ops->setup = VecScatterSetUp_MPI3;
2893   ierr = PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3NODE);CHKERRQ(ierr);
2894   ierr = PetscInfo(ctx,"Using MPI3NODE for vector scatter\n");CHKERRQ(ierr);
2895   PetscFunctionReturn(0);
2896 }
2897