1 
2 /*
3      Code for creating scatters between vectors. This file
4   includes the code for scattering between sequential vectors and
5   some special cases for parallel scatters.
6 */
7 
8 #include <petsc/private/vecscatterimpl.h>    /*I   "petscvec.h"    I*/
9 
10 #if defined(PETSC_HAVE_CUDA)
11 #include <petsc/private/cudavecimpl.h>
12 #endif
13 
14 /*
15      Checks if any indices are less than zero and generates an error
16 */
VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt * idx)17 static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
18 {
19   PetscInt i;
20 
21   PetscFunctionBegin;
22   if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
23   for (i=0; i<n; i++) {
24     if (idx[i] < 0)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
25     if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
26   }
27   PetscFunctionReturn(0);
28 }
29 
VecScatterDestroy_SGToSG(VecScatter ctx)30 PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
31 {
32   PetscErrorCode ierr;
33 
34   PetscFunctionBegin;
35   ierr = PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);CHKERRQ(ierr);
36   ierr = VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);CHKERRQ(ierr);
37   ierr = VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);CHKERRQ(ierr);
38   ierr = PetscFree2(ctx->todata,ctx->fromdata);CHKERRQ(ierr);
39   PetscFunctionReturn(0);
40 }
41 
VecScatterDestroy_SGToSS(VecScatter ctx)42 PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
43 {
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);CHKERRQ(ierr);
48   ierr = VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);CHKERRQ(ierr);
49   ierr = PetscFree2(ctx->todata,ctx->fromdata);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
VecScatterDestroy_SSToSG(VecScatter ctx)53 PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
54 {
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);CHKERRQ(ierr);
59   ierr = VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);CHKERRQ(ierr);
60   ierr = PetscFree2(ctx->todata,ctx->fromdata);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 
VecScatterDestroy_SSToSS(VecScatter ctx)64 PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
65 {
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = PetscFree2(ctx->todata,ctx->fromdata);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 /* --------------------------------------------------------------------------------------*/
74 /*
75         Scatter: sequential general to sequential general
76 */
VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)77 PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
78 {
79   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
80   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
81   PetscErrorCode         ierr;
82   PetscInt               i,n = gen_from->n,*fslots,*tslots;
83   PetscScalar            *xv,*yv;
84 #if defined(PETSC_HAVE_CUDA)
85   PetscBool              is_veccuda,isy_veccuda;
86 #endif
87 
88   PetscFunctionBegin;
89 #if defined(PETSC_HAVE_CUDA)
90   ierr = PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
91   ierr = PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
92   if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
93     /* create the scatter indices if not done already */
94     if (!ctx->spptr) {
95       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
96       fslots = gen_from->vslots;
97       tslots = gen_to->vslots;
98       ierr = VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));CHKERRQ(ierr);
99     }
100     /* next do the scatter */
101     ierr = VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);CHKERRQ(ierr);
102     PetscFunctionReturn(0);
103   }
104 #endif
105 
106   ierr = VecGetArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
107   if (mode & SCATTER_REVERSE) {
108     gen_to   = (VecScatter_Seq_General*)ctx->fromdata;
109     gen_from = (VecScatter_Seq_General*)ctx->todata;
110   }
111   fslots = gen_from->vslots;
112   tslots = gen_to->vslots;
113 
114   if (gen_from->memcpy_plan.optimized[0]) { ierr = VecScatterMemcpyPlanExecute_Scatter(0,xv,&gen_from->memcpy_plan,yv,&gen_to->memcpy_plan,addv);CHKERRQ(ierr); }
115   else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]]  = xv[fslots[i]]; }
116   else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]]; }
117 #if !defined(PETSC_USE_COMPLEX)
118   else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]]  = PetscMax(yv[tslots[i]],xv[fslots[i]]); }
119 #endif
120   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
121   ierr = VecRestoreArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
122   PetscFunctionReturn(0);
123 }
124 
125 /*
126     Scatter: sequential general to sequential stride 1
127 */
VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)128 PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
129 {
130   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
131   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
132   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
133   PetscErrorCode         ierr;
134   PetscInt               first = gen_to->first;
135   PetscScalar            *xv,*yv;
136 #if defined(PETSC_HAVE_CUDA)
137   PetscBool              is_veccuda,isy_veccuda;
138 #endif
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_HAVE_CUDA)
142   ierr = PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
143   ierr = PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
144   if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
145     /* create the scatter indices if not done already */
146     if (!ctx->spptr) {
147       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
148       PetscInt *tslots = 0;
149       ierr = VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));CHKERRQ(ierr);
150     }
151     /* next do the scatter */
152     ierr = VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);CHKERRQ(ierr);
153     PetscFunctionReturn(0);
154   }
155 #endif
156 
157   ierr = VecGetArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
158   if (mode & SCATTER_REVERSE) {
159     PetscScalar *xxv = xv + first;
160     if (gen_from->memcpy_plan.optimized[0]) { ierr = VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_from->memcpy_plan,addv,1);CHKERRQ(ierr); }
161     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[fslots[i]]  = xxv[i]; }
162     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[fslots[i]] += xxv[i]; }
163 #if !defined(PETSC_USE_COMPLEX)
164     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[fslots[i]]  = PetscMax(yv[fslots[i]],xxv[i]); }
165 #endif
166     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
167   } else {
168     PetscScalar *yyv = yv + first;
169     if (gen_from->memcpy_plan.optimized[0]) { ierr = VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_from->memcpy_plan,yyv,addv,1);CHKERRQ(ierr); }
170     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i]  = xv[fslots[i]]; }
171     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yyv[i] += xv[fslots[i]]; }
172 #if !defined(PETSC_USE_COMPLEX)
173     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yyv[i]  = PetscMax(yyv[i],xv[fslots[i]]); }
174 #endif
175     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
176   }
177   ierr = VecRestoreArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 /*
182    Scatter: sequential general to sequential stride
183 */
VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)184 PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
185 {
186   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
187   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
188   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
189   PetscErrorCode         ierr;
190   PetscInt               first = gen_to->first,step = gen_to->step;
191   PetscScalar            *xv,*yv;
192 #if defined(PETSC_HAVE_CUDA)
193   PetscBool              is_veccuda,isy_veccuda;
194 #endif
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_HAVE_CUDA)
198   ierr = PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
199   ierr = PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
200   if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
201     /* create the scatter indices if not done already */
202     if (!ctx->spptr) {
203       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
204       PetscInt * tslots = 0;
205       ierr = VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));CHKERRQ(ierr);
206     }
207     /* next do the scatter */
208     ierr = VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);CHKERRQ(ierr);
209     PetscFunctionReturn(0);
210   }
211 #endif
212 
213   ierr = VecGetArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
214   if (mode & SCATTER_REVERSE) {
215     if (addv == INSERT_VALUES) {
216       for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
217     } else if (addv == ADD_VALUES) {
218       for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
219 #if !defined(PETSC_USE_COMPLEX)
220     } else if (addv == MAX_VALUES) {
221       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
222 #endif
223     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
224   } else {
225     if (addv == INSERT_VALUES) {
226       for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
227     } else if (addv == ADD_VALUES) {
228       for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
229 #if !defined(PETSC_USE_COMPLEX)
230     } else if (addv == MAX_VALUES) {
231       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
232 #endif
233     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
234   }
235   ierr = VecRestoreArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 /*
240     Scatter: sequential stride 1 to sequential general
241 */
VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)242 PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
243 {
244   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
245   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
246   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
247   PetscErrorCode         ierr;
248   PetscInt               first = gen_from->first;
249   PetscScalar            *xv,*yv;
250 #if defined(PETSC_HAVE_CUDA)
251   PetscBool              is_veccuda,isy_veccuda;
252 #endif
253 
254   PetscFunctionBegin;
255 #if defined(PETSC_HAVE_CUDA)
256   ierr = PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
257   ierr = PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
258   if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
259     /* create the scatter indices if not done already */
260     if (!ctx->spptr) {
261       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
262       PetscInt *fslots = 0;
263       ierr = VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));CHKERRQ(ierr);
264     }
265     /* next do the scatter */
266     ierr = VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);CHKERRQ(ierr);
267     PetscFunctionReturn(0);
268   }
269 #endif
270 
271   ierr = VecGetArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
272   if (mode & SCATTER_REVERSE) {
273     PetscScalar *yyv = yv + first;
274     if (gen_to->memcpy_plan.optimized[0]) { ierr = VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_to->memcpy_plan,yyv,addv,1);CHKERRQ(ierr); }
275     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i]  = xv[tslots[i]]; }
276     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yyv[i] += xv[tslots[i]]; }
277 #if !defined(PETSC_USE_COMPLEX)
278     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yyv[i]  = PetscMax(yyv[i],xv[tslots[i]]); }
279 #endif
280     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
281   } else {
282     PetscScalar *xxv = xv + first;
283     if (gen_to->memcpy_plan.optimized[0]) { ierr = VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_to->memcpy_plan,addv,1);CHKERRQ(ierr); }
284     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]]  = xxv[i]; }
285     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]] += xxv[i]; }
286 #if !defined(PETSC_USE_COMPLEX)
287     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]]  = PetscMax(yv[tslots[i]],xxv[i]); }
288 #endif
289     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
290   }
291   ierr = VecRestoreArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 /*
296    Scatter: sequential stride to sequential general
297 */
VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)298 PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
299 {
300   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
301   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
302   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
303   PetscErrorCode         ierr;
304   PetscInt               first = gen_from->first,step = gen_from->step;
305   PetscScalar            *xv,*yv;
306 #if defined(PETSC_HAVE_CUDA)
307   PetscBool              is_veccuda,isy_veccuda;
308 #endif
309 
310   PetscFunctionBegin;
311 #if defined(PETSC_HAVE_CUDA)
312   ierr = PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
313   ierr = PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
314   if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
315     /* create the scatter indices if not done already */
316     if (!ctx->spptr) {
317       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
318       PetscInt *fslots = 0;
319       ierr = VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));CHKERRQ(ierr);
320     }
321     /* next do the scatter */
322     ierr = VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);CHKERRQ(ierr);
323     PetscFunctionReturn(0);
324   }
325 #endif
326 
327   ierr = VecGetArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
328   if (mode & SCATTER_REVERSE) {
329     if (addv == INSERT_VALUES) {
330       for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
331     } else if (addv == ADD_VALUES) {
332       for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
333 #if !defined(PETSC_USE_COMPLEX)
334     } else if (addv == MAX_VALUES) {
335       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
336 #endif
337     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
338   } else {
339     if (addv == INSERT_VALUES) {
340       for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
341     } else if (addv == ADD_VALUES) {
342       for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
343 #if !defined(PETSC_USE_COMPLEX)
344     } else if (addv == MAX_VALUES) {
345       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
346 #endif
347     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
348   }
349   ierr = VecRestoreArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)353 PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
354 {
355   PetscErrorCode         ierr;
356   VecScatter_Seq_Stride  *in_from = (VecScatter_Seq_Stride*)in->fromdata;
357   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
358   PetscInt               i;
359   PetscBool              isascii;
360 
361   PetscFunctionBegin;
362   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
363   if (isascii) {
364     ierr = PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");CHKERRQ(ierr);
365     for (i=0; i<in_to->n; i++) {
366       ierr = PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);CHKERRQ(ierr);
367     }
368     if (in_to->memcpy_plan.optimized[0]) {
369       ierr = PetscViewerASCIIPrintf(viewer,"This stride1 to general scatter is made of %D copies\n",in_to->memcpy_plan.copy_offsets[1]);CHKERRQ(ierr);
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 /*
375      Scatter: sequential stride to sequential stride
376 */
VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)377 PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
378 {
379   VecScatter_Seq_Stride *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
380   VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
381   PetscInt              i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
382   PetscErrorCode        ierr;
383   PetscInt              from_first = gen_from->first,from_step = gen_from->step;
384   PetscScalar           *xv,*yv;
385 #if defined(PETSC_HAVE_CUDA)
386   PetscBool              is_veccuda,isy_veccuda;
387 #endif
388 
389   PetscFunctionBegin;
390 #if defined(PETSC_HAVE_CUDA)
391   ierr = PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
392   ierr = PetscObjectTypeCompareAny((PetscObject)y,&isy_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");CHKERRQ(ierr);
393   if (is_veccuda && isy_veccuda && x->offloadmask == PETSC_OFFLOAD_GPU) {
394     /* create the scatter indices if not done already */
395     if (!ctx->spptr) {
396       PetscInt *tslots = 0,*fslots = 0;
397       ierr = VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));CHKERRQ(ierr);
398     }
399     /* next do the scatter */
400     ierr = VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);CHKERRQ(ierr);
401     PetscFunctionReturn(0);
402   }
403 #endif
404 
405   ierr = VecGetArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
406   if (mode & SCATTER_REVERSE) {
407     from_first = gen_to->first;
408     to_first   = gen_from->first;
409     from_step  = gen_to->step;
410     to_step    = gen_from->step;
411   }
412 
413   if (addv == INSERT_VALUES) {
414     if (to_step == 1 && from_step == 1) {
415       ierr = PetscArraycpy(yv+to_first,xv+from_first,n);CHKERRQ(ierr);
416     } else  {
417       for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
418     }
419   } else if (addv == ADD_VALUES) {
420     if (to_step == 1 && from_step == 1) {
421       PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
422       for (i=0; i<n; i++) yyv[i] += xxv[i];
423     } else {
424       for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
425     }
426 #if !defined(PETSC_USE_COMPLEX)
427   } else if (addv == MAX_VALUES) {
428     if (to_step == 1 && from_step == 1) {
429       PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
430       for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xxv[i]);
431     } else {
432       for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
433     }
434 #endif
435   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
436   ierr = VecRestoreArrayPair(x,y,&xv,&yv);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 /* --------------------------------------------------------------------------*/
441 
442 
VecScatterCopy_SGToSG(VecScatter in,VecScatter out)443 PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
444 {
445   PetscErrorCode         ierr;
446   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
447   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
448 
449   PetscFunctionBegin;
450   out->ops->begin   = in->ops->begin;
451   out->ops->end     = in->ops->end;
452   out->ops->copy    = in->ops->copy;
453   out->ops->destroy = in->ops->destroy;
454   out->ops->view    = in->ops->view;
455 
456   ierr                         = PetscMalloc2(1,&out_to,1,&out_from);CHKERRQ(ierr);
457   ierr                         = PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);CHKERRQ(ierr);
458   out_to->n                    = in_to->n;
459   out_to->format               = in_to->format;
460   out_to->nonmatching_computed = PETSC_FALSE;
461   out_to->slots_nonmatching    = NULL;
462   ierr = PetscArraycpy(out_to->vslots,in_to->vslots,out_to->n);CHKERRQ(ierr);
463   ierr = VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);CHKERRQ(ierr);
464 
465   out_from->n                    = in_from->n;
466   out_from->format               = in_from->format;
467   out_from->nonmatching_computed = PETSC_FALSE;
468   out_from->slots_nonmatching    = NULL;
469   ierr = PetscArraycpy(out_from->vslots,in_from->vslots,out_from->n);CHKERRQ(ierr);
470   ierr = VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);CHKERRQ(ierr);
471 
472   out->todata   = (void*)out_to;
473   out->fromdata = (void*)out_from;
474   PetscFunctionReturn(0);
475 }
476 
VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)477 PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
478 {
479   PetscErrorCode         ierr;
480   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
481   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
482   PetscInt               i;
483   PetscBool              isascii;
484 
485   PetscFunctionBegin;
486   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
487   if (isascii) {
488     ierr = PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");CHKERRQ(ierr);
489     for (i=0; i<in_to->n; i++) {
490       ierr = PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);CHKERRQ(ierr);
491     }
492     if (in_from->memcpy_plan.optimized[0]) {
493       ierr = PetscViewerASCIIPrintf(viewer,"This general to general scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);CHKERRQ(ierr);
494     }
495   }
496   PetscFunctionReturn(0);
497 }
498 
499 
VecScatterCopy_SGToSS(VecScatter in,VecScatter out)500 PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
501 {
502   PetscErrorCode         ierr;
503   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
504   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
505 
506   PetscFunctionBegin;
507   out->ops->begin   = in->ops->begin;
508   out->ops->end     = in->ops->end;
509   out->ops->copy    = in->ops->copy;
510   out->ops->destroy = in->ops->destroy;
511   out->ops->view    = in->ops->view;
512 
513   ierr           = PetscMalloc2(1,&out_to,1,&out_from);CHKERRQ(ierr);
514   ierr           = PetscMalloc1(in_from->n,&out_from->vslots);CHKERRQ(ierr);
515   out_to->n      = in_to->n;
516   out_to->format = in_to->format;
517   out_to->first  = in_to->first;
518   out_to->step   = in_to->step;
519   out_to->format = in_to->format;
520 
521   out_from->n                    = in_from->n;
522   out_from->format               = in_from->format;
523   out_from->nonmatching_computed = PETSC_FALSE;
524   out_from->slots_nonmatching    = NULL;
525   ierr = PetscArraycpy(out_from->vslots,in_from->vslots,out_from->n);CHKERRQ(ierr);
526   ierr = VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);CHKERRQ(ierr);
527 
528   out->todata   = (void*)out_to;
529   out->fromdata = (void*)out_from;
530   PetscFunctionReturn(0);
531 }
532 
VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)533 PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
534 {
535   PetscErrorCode         ierr;
536   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata;
537   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
538   PetscInt               i;
539   PetscBool              isascii;
540 
541   PetscFunctionBegin;
542   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
543   if (isascii) {
544     ierr = PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");CHKERRQ(ierr);
545     for (i=0; i<in_to->n; i++) {
546       ierr = PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);CHKERRQ(ierr);
547     }
548     if (in_from->memcpy_plan.optimized[0]) {
549       ierr = PetscViewerASCIIPrintf(viewer,"This general to stride1 scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);CHKERRQ(ierr);
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 /* --------------------------------------------------------------------------*/
556 /*
557     Scatter: parallel to sequential vector, sequential strides for both.
558 */
VecScatterCopy_SSToSS(VecScatter in,VecScatter out)559 PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
560 {
561   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
562   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
563   PetscErrorCode        ierr;
564 
565   PetscFunctionBegin;
566   out->ops->begin   = in->ops->begin;
567   out->ops->end     = in->ops->end;
568   out->ops->copy    = in->ops->copy;
569   out->ops->destroy = in->ops->destroy;
570   out->ops->view    = in->ops->view;
571 
572   ierr            = PetscMalloc2(1,&out_to,1,&out_from);CHKERRQ(ierr);
573   out_to->n       = in_to->n;
574   out_to->format  = in_to->format;
575   out_to->first   = in_to->first;
576   out_to->step    = in_to->step;
577   out_to->format  = in_to->format;
578   out_from->n     = in_from->n;
579   out_from->format= in_from->format;
580   out_from->first = in_from->first;
581   out_from->step  = in_from->step;
582   out_from->format= in_from->format;
583   out->todata     = (void*)out_to;
584   out->fromdata   = (void*)out_from;
585   PetscFunctionReturn(0);
586 }
587 
VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)588 PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
589 {
590   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata;
591   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
592   PetscErrorCode        ierr;
593   PetscBool             isascii;
594 
595   PetscFunctionBegin;
596   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
597   if (isascii) {
598     ierr = PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 /* --------------------------------------------------------------------------------------*/
604 /* Create a memcpy plan for a sequential general (SG) to SG scatter */
VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General * to,VecScatter_Seq_General * from)605 PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
606 {
607   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
608   PetscInt       j,n_copies;
609   PetscErrorCode ierr;
610   PetscBool      same_copy_starts;
611 
612   PetscFunctionBegin;
613   ierr                = PetscMemzero(&to->memcpy_plan,sizeof(VecScatterMemcpyPlan));CHKERRQ(ierr);
614   ierr                = PetscMemzero(&from->memcpy_plan,sizeof(VecScatterMemcpyPlan));CHKERRQ(ierr);
615   to->memcpy_plan.n   = 1;
616   from->memcpy_plan.n = 1;
617 
618   /* malloc and init the two fields to false and zero */
619   ierr = PetscCalloc2(1,&to->memcpy_plan.optimized,2,&to->memcpy_plan.copy_offsets);CHKERRQ(ierr);
620   ierr = PetscCalloc2(1,&from->memcpy_plan.optimized,2,&from->memcpy_plan.copy_offsets);CHKERRQ(ierr);
621 
622   /* count number of copies, which runs from 1 to n */
623   n_copies = 1;
624   for (i=0; i<n-1; i++) {
625     if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) n_copies++;
626   }
627 
628   /* if average copy size >= 256 bytes, use memcpy instead of load/store */
629   if (bs*n*sizeof(PetscScalar)/n_copies >= 256) {
630     ierr = PetscMalloc2(n_copies,&to->memcpy_plan.copy_starts,n_copies,&to->memcpy_plan.copy_lengths);CHKERRQ(ierr);
631     ierr = PetscMalloc2(n_copies,&from->memcpy_plan.copy_starts,n_copies,&from->memcpy_plan.copy_lengths);CHKERRQ(ierr);
632 
633     /* set up copy_starts[] & copy_lenghts[] of to and from */
634     to->memcpy_plan.copy_starts[0]   = to_slots[0];
635     from->memcpy_plan.copy_starts[0] = from_slots[0];
636 
637     if (n_copies != 1) { /* one copy is trival and we can save some work */
638       j = 0;  /* j-th copy */
639       for (i=0; i<n-1; i++) {
640         if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) {
641           to->memcpy_plan.copy_lengths[j]    = to_slots[i]+bs-to->memcpy_plan.copy_starts[j];
642           from->memcpy_plan.copy_lengths[j]  = from_slots[i]+bs-from->memcpy_plan.copy_starts[j];
643           to->memcpy_plan.copy_starts[j+1]   = to_slots[i+1];
644           from->memcpy_plan.copy_starts[j+1] = from_slots[i+1];
645           j++;
646         }
647       }
648     }
649 
650     /* set up copy_lengths[] of the last copy */
651     to->memcpy_plan.copy_lengths[n_copies-1]   = to_slots[n-1]+bs-to->memcpy_plan.copy_starts[n_copies-1];
652     from->memcpy_plan.copy_lengths[n_copies-1] = from_slots[n-1]+bs-from->memcpy_plan.copy_starts[n_copies-1];
653 
654     /* check if to and from have the same copy_starts[] values */
655     same_copy_starts = PETSC_TRUE;
656     for (i=0; i<n_copies; i++) {
657       if (to->memcpy_plan.copy_starts[i] != from->memcpy_plan.copy_starts[i]) { same_copy_starts = PETSC_FALSE; break; }
658     }
659 
660     to->memcpy_plan.optimized[0]        = PETSC_TRUE;
661     from->memcpy_plan.optimized[0]      = PETSC_TRUE;
662     to->memcpy_plan.copy_offsets[1]     = n_copies;
663     from->memcpy_plan.copy_offsets[1]   = n_copies;
664     to->memcpy_plan.same_copy_starts    = same_copy_starts;
665     from->memcpy_plan.same_copy_starts  = same_copy_starts;
666   }
667 
668   /* we do not do stride optimzation for this kind of scatter since the chance is rare. All related fields are zeroed out */
669   PetscFunctionReturn(0);
670 }
671 
672 /* -------------------------------------------------- */
VecScatterSetUp_Seq(VecScatter ctx)673 PetscErrorCode VecScatterSetUp_Seq(VecScatter ctx)
674 {
675   PetscErrorCode    ierr;
676   PetscInt          ix_type=-1,iy_type=-1;
677   IS                tix = NULL,tiy = NULL,ix=ctx->from_is,iy=ctx->to_is;
678 
679   PetscFunctionBegin;
680   ierr = GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);CHKERRQ(ierr);
681   if (tix) ix = tix;
682   if (tiy) iy = tiy;
683 
684   if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
685     PetscInt               nx,ny;
686     const PetscInt         *idx,*idy;
687     VecScatter_Seq_General *to = NULL,*from = NULL;
688 
689     ierr = ISGetLocalSize(ix,&nx);CHKERRQ(ierr);
690     ierr = ISGetLocalSize(iy,&ny);CHKERRQ(ierr);
691     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
692     ierr  = ISGetIndices(ix,&idx);CHKERRQ(ierr);
693     ierr  = ISGetIndices(iy,&idy);CHKERRQ(ierr);
694     ierr  = PetscMalloc2(1,&to,1,&from);CHKERRQ(ierr);
695     ierr  = PetscMalloc2(nx,&to->vslots,nx,&from->vslots);CHKERRQ(ierr);
696     to->n = nx;
697     ierr = VecScatterCheckIndices_Private(ctx->to_n,ny,idy);CHKERRQ(ierr);
698     ierr    = PetscArraycpy(to->vslots,idy,nx);CHKERRQ(ierr);
699     from->n = nx;
700     ierr = VecScatterCheckIndices_Private(ctx->from_n,nx,idx);CHKERRQ(ierr);
701     ierr              = PetscArraycpy(from->vslots,idx,nx);CHKERRQ(ierr);
702     to->format        = VEC_SCATTER_SEQ_GENERAL;
703     from->format      = VEC_SCATTER_SEQ_GENERAL;
704     ctx->todata       = (void*)to;
705     ctx->fromdata     = (void*)from;
706     ierr              = VecScatterMemcpyPlanCreate_SGToSG(1,to,from);CHKERRQ(ierr);
707     ctx->ops->begin   = VecScatterBegin_SGToSG;
708     ctx->ops->end     = NULL;
709     ctx->ops->destroy = VecScatterDestroy_SGToSG;
710     ctx->ops->copy    = VecScatterCopy_SGToSG;
711     ctx->ops->view    = VecScatterView_SGToSG;
712     ierr              = PetscInfo(ctx->from_v,"Special case: sequential vector general scatter\n");CHKERRQ(ierr);
713     goto functionend;
714   } else if (ix_type == IS_STRIDE_ID &&  iy_type == IS_STRIDE_ID) {
715     PetscInt              nx,ny,to_first,to_step,from_first,from_step;
716     VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
717 
718     ierr = ISGetLocalSize(ix,&nx);CHKERRQ(ierr);
719     ierr = ISGetLocalSize(iy,&ny);CHKERRQ(ierr);
720     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
721     ierr              = ISStrideGetInfo(iy,&to_first,&to_step);CHKERRQ(ierr);
722     ierr              = ISStrideGetInfo(ix,&from_first,&from_step);CHKERRQ(ierr);
723     ierr              = PetscMalloc2(1,&to8,1,&from8);CHKERRQ(ierr);
724     to8->n            = nx;
725     to8->first        = to_first;
726     to8->step         = to_step;
727     from8->n          = nx;
728     from8->first      = from_first;
729     from8->step       = from_step;
730     to8->format       = VEC_SCATTER_SEQ_STRIDE;
731     from8->format     = VEC_SCATTER_SEQ_STRIDE;
732     ctx->todata       = (void*)to8;
733     ctx->fromdata     = (void*)from8;
734     ctx->ops->begin   = VecScatterBegin_SSToSS;
735     ctx->ops->end     = NULL;
736     ctx->ops->destroy = VecScatterDestroy_SSToSS;
737     ctx->ops->copy    = VecScatterCopy_SSToSS;
738     ctx->ops->view    = VecScatterView_SSToSS;
739     ierr          = PetscInfo(ctx->from_v,"Special case: sequential vector stride to stride\n");CHKERRQ(ierr);
740     goto functionend;
741   } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
742     PetscInt               nx,ny,first,step;
743     const PetscInt         *idx;
744     VecScatter_Seq_General *from9 = NULL;
745     VecScatter_Seq_Stride  *to9   = NULL;
746 
747     ierr = ISGetLocalSize(ix,&nx);CHKERRQ(ierr);
748     ierr = ISGetIndices(ix,&idx);CHKERRQ(ierr);
749     ierr = ISGetLocalSize(iy,&ny);CHKERRQ(ierr);
750     ierr = ISStrideGetInfo(iy,&first,&step);CHKERRQ(ierr);
751     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
752     ierr       = PetscMalloc2(1,&to9,1,&from9);CHKERRQ(ierr);
753     ierr       = PetscMemzero(&from9->memcpy_plan,sizeof(VecScatterMemcpyPlan));CHKERRQ(ierr);
754     ierr       = PetscMalloc1(nx,&from9->vslots);CHKERRQ(ierr);
755     to9->n     = nx;
756     to9->first = first;
757     to9->step  = step;
758     from9->n   = nx;
759     ierr           = VecScatterCheckIndices_Private(ctx->from_n,nx,idx);CHKERRQ(ierr);
760     ierr           = PetscArraycpy(from9->vslots,idx,nx);CHKERRQ(ierr);
761     ctx->todata    = (void*)to9; ctx->fromdata = (void*)from9;
762     if (step == 1) {
763       PetscInt tmp[2];
764       tmp[0] = 0; tmp[1] = nx;
765       ierr = VecScatterMemcpyPlanCreate_Index(1,tmp,from9->vslots,1,&from9->memcpy_plan);CHKERRQ(ierr);
766       ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
767     } else {
768       ctx->ops->begin = VecScatterBegin_SGToSS;
769     }
770     ctx->ops->destroy = VecScatterDestroy_SGToSS;
771     ctx->ops->end     = NULL;
772     ctx->ops->copy    = VecScatterCopy_SGToSS;
773     ctx->ops->view    = VecScatterView_SGToSS;
774     to9->format       = VEC_SCATTER_SEQ_STRIDE;
775     from9->format     = VEC_SCATTER_SEQ_GENERAL;
776     ierr = PetscInfo(ctx->from_v,"Special case: sequential vector general to stride\n");CHKERRQ(ierr);
777     goto functionend;
778   } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
779     PetscInt               nx,ny,first,step;
780     const PetscInt         *idy;
781     VecScatter_Seq_General *to10 = NULL;
782     VecScatter_Seq_Stride  *from10 = NULL;
783 
784     ierr = ISGetLocalSize(ix,&nx);CHKERRQ(ierr);
785     ierr = ISGetIndices(iy,&idy);CHKERRQ(ierr);
786     ierr = ISGetLocalSize(iy,&ny);CHKERRQ(ierr);
787     ierr = ISStrideGetInfo(ix,&first,&step);CHKERRQ(ierr);
788     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
789     ierr = PetscMalloc2(1,&to10,1,&from10);CHKERRQ(ierr);
790     ierr = PetscMemzero(&to10->memcpy_plan,sizeof(VecScatterMemcpyPlan));CHKERRQ(ierr);
791     ierr = PetscMalloc1(nx,&to10->vslots);CHKERRQ(ierr);
792     from10->n     = nx;
793     from10->first = first;
794     from10->step  = step;
795     to10->n       = nx;
796     ierr = VecScatterCheckIndices_Private(ctx->to_n,ny,idy);CHKERRQ(ierr);
797     ierr = PetscArraycpy(to10->vslots,idy,nx);CHKERRQ(ierr);
798     ctx->todata   = (void*)to10;
799     ctx->fromdata = (void*)from10;
800     if (step == 1) {
801       PetscInt tmp[2];
802       tmp[0] = 0; tmp[1] = nx;
803       ierr = VecScatterMemcpyPlanCreate_Index(1,tmp,to10->vslots,1,&to10->memcpy_plan);CHKERRQ(ierr);
804       ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
805     } else {
806       ctx->ops->begin = VecScatterBegin_SSToSG;
807     }
808     ctx->ops->destroy = VecScatterDestroy_SSToSG;
809     ctx->ops->end     = NULL;
810     ctx->ops->copy    = NULL;
811     ctx->ops->view    = VecScatterView_SSToSG;
812     to10->format      = VEC_SCATTER_SEQ_GENERAL;
813     from10->format    = VEC_SCATTER_SEQ_STRIDE;
814     ierr = PetscInfo(ctx->from_v,"Special case: sequential vector stride to general\n");CHKERRQ(ierr);
815     goto functionend;
816   } else {
817     PetscInt               nx,ny;
818     const PetscInt         *idx,*idy;
819     VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
820     PetscBool              idnx,idny;
821 
822     ierr = ISGetLocalSize(ix,&nx);CHKERRQ(ierr);
823     ierr = ISGetLocalSize(iy,&ny);CHKERRQ(ierr);
824     if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
825 
826     ierr = ISIdentity(ix,&idnx);CHKERRQ(ierr);
827     ierr = ISIdentity(iy,&idny);CHKERRQ(ierr);
828     if (idnx && idny) {
829       VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
830       ierr          = PetscMalloc2(1,&to13,1,&from13);CHKERRQ(ierr);
831       to13->n       = nx;
832       to13->first   = 0;
833       to13->step    = 1;
834       from13->n     = nx;
835       from13->first = 0;
836       from13->step  = 1;
837       to13->format    = VEC_SCATTER_SEQ_STRIDE;
838       from13->format  = VEC_SCATTER_SEQ_STRIDE;
839       ctx->todata   = (void*)to13;
840       ctx->fromdata = (void*)from13;
841       ctx->ops->begin    = VecScatterBegin_SSToSS;
842       ctx->ops->end      = NULL;
843       ctx->ops->destroy  = VecScatterDestroy_SSToSS;
844       ctx->ops->copy     = VecScatterCopy_SSToSS;
845       ctx->ops->view     = VecScatterView_SSToSS;
846       ierr = PetscInfo(ctx->from_v,"Special case: sequential copy\n");CHKERRQ(ierr);
847       goto functionend;
848     }
849 
850     ierr = ISGetIndices(iy,&idy);CHKERRQ(ierr);
851     ierr = ISGetIndices(ix,&idx);CHKERRQ(ierr);
852     ierr = PetscMalloc2(1,&to11,1,&from11);CHKERRQ(ierr);
853     ierr = PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);CHKERRQ(ierr);
854     to11->n = nx;
855     ierr = VecScatterCheckIndices_Private(ctx->to_n,ny,idy);CHKERRQ(ierr);
856     ierr = PetscArraycpy(to11->vslots,idy,nx);CHKERRQ(ierr);
857     from11->n = nx;
858     ierr = VecScatterCheckIndices_Private(ctx->from_n,nx,idx);CHKERRQ(ierr);
859     ierr = PetscArraycpy(from11->vslots,idx,nx);CHKERRQ(ierr);
860     to11->format        = VEC_SCATTER_SEQ_GENERAL;
861     from11->format      = VEC_SCATTER_SEQ_GENERAL;
862     ctx->todata       = (void*)to11;
863     ctx->fromdata     = (void*)from11;
864     ctx->ops->begin   = VecScatterBegin_SGToSG;
865     ctx->ops->end     = NULL;
866     ctx->ops->destroy = VecScatterDestroy_SGToSG;
867     ctx->ops->copy    = VecScatterCopy_SGToSG;
868     ctx->ops->view    = VecScatterView_SGToSG;
869     ierr = VecScatterMemcpyPlanCreate_SGToSG(1,to11,from11);CHKERRQ(ierr);
870     ierr = ISRestoreIndices(ix,&idx);CHKERRQ(ierr);
871     ierr = ISRestoreIndices(iy,&idy);CHKERRQ(ierr);
872     ierr = PetscInfo(ctx->from_v,"Sequential vector scatter with block indices\n");CHKERRQ(ierr);
873     goto functionend;
874   }
875   functionend:
876   ierr = ISDestroy(&tix);CHKERRQ(ierr);
877   ierr = ISDestroy(&tiy);CHKERRQ(ierr);
878   ierr = VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");CHKERRQ(ierr);
879   PetscFunctionReturn(0);
880 }
881 
VecScatterCreate_Seq(VecScatter ctx)882 PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
883 {
884   PetscErrorCode    ierr;
885 
886   PetscFunctionBegin;
887   ctx->ops->setup = VecScatterSetUp_Seq;
888   ierr = PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892