1 /*
2  Implementation of the sequential cuda vectors.
3 
4  This file contains the code that can be compiled with a C
5  compiler.  The companion file veccuda2.cu contains the code that
6  must be compiled with nvcc or a C++ compiler.
7  */
8 
9 #define PETSC_SKIP_SPINLOCK
10 
11 #include <petscconf.h>
12 #include <petsc/private/vecimpl.h>          /*I <petscvec.h> I*/
13 #include <../src/vec/vec/impls/dvecimpl.h>
14 #include <petsc/private/cudavecimpl.h>
15 
VecCUDAGetArrays_Private(Vec v,const PetscScalar ** x,const PetscScalar ** x_d,PetscOffloadMask * flg)16 PetscErrorCode VecCUDAGetArrays_Private(Vec v,const PetscScalar** x,const PetscScalar** x_d,PetscOffloadMask* flg)
17 {
18   PetscCheckTypeNames(v,VECSEQCUDA,VECMPICUDA);
19   PetscFunctionBegin;
20   if (x) {
21     Vec_Seq *h = (Vec_Seq*)v->data;
22 
23     *x = h->array;
24   }
25   if (x_d) {
26     Vec_CUDA *d = (Vec_CUDA*)v->spptr;
27 
28     *x_d = d ? d->GPUarray : NULL;
29   }
30   if (flg) *flg = v->offloadmask;
31   PetscFunctionReturn(0);
32 }
33 
34 /*
35     Allocates space for the vector array on the Host if it does not exist.
36     Does NOT change the PetscCUDAFlag for the vector
37     Does NOT zero the CUDA array
38  */
VecCUDAAllocateCheckHost(Vec v)39 PetscErrorCode VecCUDAAllocateCheckHost(Vec v)
40 {
41   PetscErrorCode ierr;
42   PetscScalar    *array;
43   Vec_Seq        *s = (Vec_Seq*)v->data;
44   PetscInt       n = v->map->n;
45 
46   PetscFunctionBegin;
47   if (!s) {
48     ierr = PetscNewLog((PetscObject)v,&s);CHKERRQ(ierr);
49     v->data = s;
50   }
51   if (!s->array) {
52     if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
53       ierr = PetscMallocSetCUDAHost();CHKERRQ(ierr);
54       v->pinned_memory = PETSC_TRUE;
55     }
56     ierr = PetscMalloc1(n,&array);CHKERRQ(ierr);
57     ierr = PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));CHKERRQ(ierr);
58     s->array           = array;
59     s->array_allocated = array;
60     if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
61       ierr = PetscMallocResetCUDAHost();CHKERRQ(ierr);
62     }
63     if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
64       v->offloadmask = PETSC_OFFLOAD_CPU;
65     }
66   }
67   PetscFunctionReturn(0);
68 }
69 
VecCopy_SeqCUDA_Private(Vec xin,Vec yin)70 PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin)
71 {
72   PetscScalar       *ya;
73   const PetscScalar *xa;
74   PetscErrorCode    ierr;
75 
76   PetscFunctionBegin;
77   ierr = VecCUDAAllocateCheckHost(xin);CHKERRQ(ierr);
78   ierr = VecCUDAAllocateCheckHost(yin);CHKERRQ(ierr);
79   if (xin != yin) {
80     ierr = VecGetArrayRead(xin,&xa);CHKERRQ(ierr);
81     ierr = VecGetArray(yin,&ya);CHKERRQ(ierr);
82     ierr = PetscArraycpy(ya,xa,xin->map->n);CHKERRQ(ierr);
83     ierr = VecRestoreArrayRead(xin,&xa);CHKERRQ(ierr);
84     ierr = VecRestoreArray(yin,&ya);CHKERRQ(ierr);
85   }
86   PetscFunctionReturn(0);
87 }
88 
VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)89 PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)
90 {
91   PetscErrorCode ierr;
92   PetscInt       n = xin->map->n,i;
93   PetscScalar    *xx;
94 
95   PetscFunctionBegin;
96   ierr = VecGetArray(xin,&xx);CHKERRQ(ierr);
97   for (i=0; i<n; i++) { ierr = PetscRandomGetValue(r,&xx[i]);CHKERRQ(ierr); }
98   ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
99   PetscFunctionReturn(0);
100 }
101 
VecDestroy_SeqCUDA_Private(Vec v)102 PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v)
103 {
104   Vec_Seq        *vs = (Vec_Seq*)v->data;
105   PetscErrorCode ierr;
106 
107   PetscFunctionBegin;
108   ierr = PetscObjectSAWsViewOff(v);CHKERRQ(ierr);
109 #if defined(PETSC_USE_LOG)
110   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
111 #endif
112   if (vs) {
113     if (vs->array_allocated) {
114       if (v->pinned_memory) {
115         ierr = PetscMallocSetCUDAHost();CHKERRQ(ierr);
116       }
117       ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr);
118       if (v->pinned_memory) {
119         ierr = PetscMallocResetCUDAHost();CHKERRQ(ierr);
120         v->pinned_memory = PETSC_FALSE;
121       }
122     }
123     ierr = PetscFree(vs);CHKERRQ(ierr);
124   }
125   PetscFunctionReturn(0);
126 }
127 
VecResetArray_SeqCUDA_Private(Vec vin)128 PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin)
129 {
130   Vec_Seq *v = (Vec_Seq*)vin->data;
131 
132   PetscFunctionBegin;
133   v->array         = v->unplacedarray;
134   v->unplacedarray = 0;
135   PetscFunctionReturn(0);
136 }
137 
VecCUDAAllocateCheck_Public(Vec v)138 PetscErrorCode VecCUDAAllocateCheck_Public(Vec v)
139 {
140   PetscErrorCode ierr;
141 
142   PetscFunctionBegin;
143   ierr = VecCUDAAllocateCheck(v);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
VecCUDACopyToGPU_Public(Vec v)147 PetscErrorCode VecCUDACopyToGPU_Public(Vec v)
148 {
149   PetscErrorCode ierr;
150 
151   PetscFunctionBegin;
152   ierr = VecCUDACopyToGPU(v);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 /*
157     VecCUDACopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector
158 
159    Input Parameters:
160  +  v    - the vector
161  .  ci   - the requested indices, this should be created with CUDAIndicesCreate()
162  -  mode - vec scatter mode used in VecScatterBegin/End
163 */
VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)164 PetscErrorCode VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)
165 {
166   PetscErrorCode ierr;
167 
168   PetscFunctionBegin;
169   ierr = VecCUDACopyToGPUSome(v,ci,mode);CHKERRQ(ierr);
170   PetscFunctionReturn(0);
171 }
172 
173 /*
174   VecCUDACopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector
175 
176   Input Parameters:
177  +  v    - the vector
178  .  ci   - the requested indices, this should be created with CUDAIndicesCreate()
179  -  mode - vec scatter mode used in VecScatterBegin/End
180 */
VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)181 PetscErrorCode VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)
182 {
183   PetscErrorCode ierr;
184 
185   PetscFunctionBegin;
186   ierr = VecCUDACopyFromGPUSome(v,ci,mode);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)190 PetscErrorCode VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)
191 {
192   PetscErrorCode ierr;
193 
194   PetscFunctionBegin;
195   ierr = VecSetRandom_SeqCUDA_Private(xin,r);CHKERRQ(ierr);
196   xin->offloadmask = PETSC_OFFLOAD_CPU;
197   PetscFunctionReturn(0);
198 }
199 
VecResetArray_SeqCUDA(Vec vin)200 PetscErrorCode VecResetArray_SeqCUDA(Vec vin)
201 {
202   PetscErrorCode ierr;
203 
204   PetscFunctionBegin;
205   ierr = VecCUDACopyFromGPU(vin);CHKERRQ(ierr);
206   ierr = VecResetArray_SeqCUDA_Private(vin);CHKERRQ(ierr);
207   vin->offloadmask = PETSC_OFFLOAD_CPU;
208   PetscFunctionReturn(0);
209 }
210 
VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar * a)211 PetscErrorCode VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
212 {
213   PetscErrorCode ierr;
214 
215   PetscFunctionBegin;
216   ierr = VecCUDACopyFromGPU(vin);CHKERRQ(ierr);
217   ierr = VecPlaceArray_Seq(vin,a);CHKERRQ(ierr);
218   vin->offloadmask = PETSC_OFFLOAD_CPU;
219   PetscFunctionReturn(0);
220 }
221 
VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar * a)222 PetscErrorCode VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
223 {
224   PetscErrorCode ierr;
225   Vec_Seq        *vs = (Vec_Seq*)vin->data;
226 
227   PetscFunctionBegin;
228   if (vs->array != vs->array_allocated) {
229     /* make sure the users array has the latest values */
230     ierr = VecCUDACopyFromGPU(vin);CHKERRQ(ierr);
231   }
232   if (vs->array_allocated) {
233     if (vin->pinned_memory) {
234       ierr = PetscMallocSetCUDAHost();CHKERRQ(ierr);
235     }
236     ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr);
237     if (vin->pinned_memory) {
238       ierr = PetscMallocResetCUDAHost();CHKERRQ(ierr);
239     }
240   }
241   vin->pinned_memory = PETSC_FALSE;
242   vs->array_allocated = vs->array = (PetscScalar*)a;
243   vin->offloadmask = PETSC_OFFLOAD_CPU;
244   PetscFunctionReturn(0);
245 }
246 
247 /*@
248  VecCreateSeqCUDA - Creates a standard, sequential array-style vector.
249 
250  Collective
251 
252  Input Parameter:
253  +  comm - the communicator, should be PETSC_COMM_SELF
254  -  n - the vector length
255 
256  Output Parameter:
257  .  v - the vector
258 
259  Notes:
260  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
261  same type as an existing vector.
262 
263  Level: intermediate
264 
265  .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
266  @*/
VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec * v)267 PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec *v)
268 {
269   PetscErrorCode ierr;
270 
271   PetscFunctionBegin;
272   ierr = VecCreate(comm,v);CHKERRQ(ierr);
273   ierr = VecSetSizes(*v,n,n);CHKERRQ(ierr);
274   ierr = VecSetType(*v,VECSEQCUDA);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
VecDuplicate_SeqCUDA(Vec win,Vec * V)278 PetscErrorCode VecDuplicate_SeqCUDA(Vec win,Vec *V)
279 {
280   PetscErrorCode ierr;
281 
282   PetscFunctionBegin;
283   ierr = VecCreateSeqCUDA(PetscObjectComm((PetscObject)win),win->map->n,V);CHKERRQ(ierr);
284   ierr = PetscLayoutReference(win->map,&(*V)->map);CHKERRQ(ierr);
285   ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);CHKERRQ(ierr);
286   ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);CHKERRQ(ierr);
287   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
288   PetscFunctionReturn(0);
289 }
290 
VecCreate_SeqCUDA(Vec V)291 PetscErrorCode VecCreate_SeqCUDA(Vec V)
292 {
293   PetscErrorCode ierr;
294 
295   PetscFunctionBegin;
296   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
297   ierr = PetscLayoutSetUp(V->map);CHKERRQ(ierr);
298   ierr = VecCUDAAllocateCheck(V);CHKERRQ(ierr);
299   ierr = VecCreate_SeqCUDA_Private(V,((Vec_CUDA*)V->spptr)->GPUarray_allocated);CHKERRQ(ierr);
300   ierr = VecCUDAAllocateCheckHost(V);CHKERRQ(ierr);
301   ierr = VecSet(V,0.0);CHKERRQ(ierr);
302   ierr = VecSet_Seq(V,0.0);CHKERRQ(ierr);
303   V->offloadmask = PETSC_OFFLOAD_BOTH;
304   PetscFunctionReturn(0);
305 }
306 
307 /*@C
308    VecCreateSeqCUDAWithArray - Creates a CUDA sequential array-style vector,
309    where the user provides the array space to store the vector values. The array
310    provided must be a GPU array.
311 
312    Collective
313 
314    Input Parameter:
315 +  comm - the communicator, should be PETSC_COMM_SELF
316 .  bs - the block size
317 .  n - the vector length
318 -  array - GPU memory where the vector elements are to be stored.
319 
320    Output Parameter:
321 .  V - the vector
322 
323    Notes:
324    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
325    same type as an existing vector.
326 
327    If the user-provided array is NULL, then VecCUDAPlaceArray() can be used
328    at a later stage to SET the array for storing the vector values.
329 
330    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
331    The user should not free the array until the vector is destroyed.
332 
333    Level: intermediate
334 
335 .seealso: VecCreateMPICUDAWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
336           VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
337           VecCreateMPIWithArray()
338 @*/
VecCreateSeqCUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec * V)339 PetscErrorCode  VecCreateSeqCUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
340 {
341   PetscErrorCode ierr;
342 
343   PetscFunctionBegin;
344   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
345   ierr = VecCreate(comm,V);CHKERRQ(ierr);
346   ierr = VecSetSizes(*V,n,n);CHKERRQ(ierr);
347   ierr = VecSetBlockSize(*V,bs);CHKERRQ(ierr);
348   ierr = VecCreate_SeqCUDA_Private(*V,array);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /*@C
353    VecCreateSeqCUDAWithArrays - Creates a CUDA sequential array-style vector,
354    where the user provides the array space to store the vector values.
355 
356    Collective
357 
358    Input Parameter:
359 +  comm - the communicator, should be PETSC_COMM_SELF
360 .  bs - the block size
361 .  n - the vector length
362 -  cpuarray - CPU memory where the vector elements are to be stored.
363 -  gpuarray - GPU memory where the vector elements are to be stored.
364 
365    Output Parameter:
366 .  V - the vector
367 
368    Notes:
369    If both cpuarray and gpuarray are provided, the caller must ensure that
370    the provided arrays have identical values.
371 
372    PETSc does NOT free the provided arrays when the vector is destroyed via
373    VecDestroy(). The user should not free the array until the vector is
374    destroyed.
375 
376    Level: intermediate
377 
378 .seealso: VecCreateMPICUDAWithArrays(), VecCreate(), VecCreateSeqWithArray(),
379           VecCUDAPlaceArray(), VecCreateSeqCUDAWithArray(),
380           VecCUDAAllocateCheckHost()
381 @*/
VecCreateSeqCUDAWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec * V)382 PetscErrorCode  VecCreateSeqCUDAWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *V)
383 {
384   PetscErrorCode ierr;
385 
386   PetscFunctionBegin;
387   // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
388   ierr = VecCreateSeqCUDAWithArray(comm,bs,n,gpuarray,V);CHKERRQ(ierr);
389 
390   if (cpuarray && gpuarray) {
391     Vec_Seq *s = (Vec_Seq*)((*V)->data);
392     s->array = (PetscScalar*)cpuarray;
393     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
394   } else if (cpuarray) {
395     Vec_Seq *s = (Vec_Seq*)((*V)->data);
396     s->array = (PetscScalar*)cpuarray;
397     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
398   } else if (gpuarray) {
399     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
400   } else {
401     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
402   }
403 
404   PetscFunctionReturn(0);
405 }
406 
VecGetArrayWrite_SeqCUDA(Vec v,PetscScalar ** vv)407 PetscErrorCode VecGetArrayWrite_SeqCUDA(Vec v,PetscScalar **vv)
408 {
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecCUDAAllocateCheckHost(v);CHKERRQ(ierr);
413   v->offloadmask = PETSC_OFFLOAD_CPU;
414   *vv = *((PetscScalar**)v->data);
415   PetscFunctionReturn(0);
416 }
417 
VecBindToCPU_SeqCUDA(Vec V,PetscBool pin)418 PetscErrorCode VecBindToCPU_SeqCUDA(Vec V,PetscBool pin)
419 {
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   V->boundtocpu = pin;
424   if (pin) {
425     ierr = VecCUDACopyFromGPU(V);CHKERRQ(ierr);
426     V->offloadmask                 = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
427     V->ops->dot                    = VecDot_Seq;
428     V->ops->norm                   = VecNorm_Seq;
429     V->ops->tdot                   = VecTDot_Seq;
430     V->ops->scale                  = VecScale_Seq;
431     V->ops->copy                   = VecCopy_Seq;
432     V->ops->set                    = VecSet_Seq;
433     V->ops->swap                   = VecSwap_Seq;
434     V->ops->axpy                   = VecAXPY_Seq;
435     V->ops->axpby                  = VecAXPBY_Seq;
436     V->ops->axpbypcz               = VecAXPBYPCZ_Seq;
437     V->ops->pointwisemult          = VecPointwiseMult_Seq;
438     V->ops->pointwisedivide        = VecPointwiseDivide_Seq;
439     V->ops->setrandom              = VecSetRandom_Seq;
440     V->ops->dot_local              = VecDot_Seq;
441     V->ops->tdot_local             = VecTDot_Seq;
442     V->ops->norm_local             = VecNorm_Seq;
443     V->ops->mdot_local             = VecMDot_Seq;
444     V->ops->mtdot_local            = VecMTDot_Seq;
445     V->ops->maxpy                  = VecMAXPY_Seq;
446     V->ops->mdot                   = VecMDot_Seq;
447     V->ops->mtdot                  = VecMTDot_Seq;
448     V->ops->aypx                   = VecAYPX_Seq;
449     V->ops->waxpy                  = VecWAXPY_Seq;
450     V->ops->dotnorm2               = NULL;
451     V->ops->placearray             = VecPlaceArray_Seq;
452     V->ops->replacearray           = VecReplaceArray_SeqCUDA;
453     V->ops->resetarray             = VecResetArray_Seq;
454     V->ops->duplicate              = VecDuplicate_Seq;
455     V->ops->conjugate              = VecConjugate_Seq;
456     V->ops->getlocalvector         = NULL;
457     V->ops->restorelocalvector     = NULL;
458     V->ops->getlocalvectorread     = NULL;
459     V->ops->restorelocalvectorread = NULL;
460     V->ops->getarraywrite          = NULL;
461   } else {
462     V->ops->dot                    = VecDot_SeqCUDA;
463     V->ops->norm                   = VecNorm_SeqCUDA;
464     V->ops->tdot                   = VecTDot_SeqCUDA;
465     V->ops->scale                  = VecScale_SeqCUDA;
466     V->ops->copy                   = VecCopy_SeqCUDA;
467     V->ops->set                    = VecSet_SeqCUDA;
468     V->ops->swap                   = VecSwap_SeqCUDA;
469     V->ops->axpy                   = VecAXPY_SeqCUDA;
470     V->ops->axpby                  = VecAXPBY_SeqCUDA;
471     V->ops->axpbypcz               = VecAXPBYPCZ_SeqCUDA;
472     V->ops->pointwisemult          = VecPointwiseMult_SeqCUDA;
473     V->ops->pointwisedivide        = VecPointwiseDivide_SeqCUDA;
474     V->ops->setrandom              = VecSetRandom_SeqCUDA;
475     V->ops->dot_local              = VecDot_SeqCUDA;
476     V->ops->tdot_local             = VecTDot_SeqCUDA;
477     V->ops->norm_local             = VecNorm_SeqCUDA;
478     V->ops->mdot_local             = VecMDot_SeqCUDA;
479     V->ops->maxpy                  = VecMAXPY_SeqCUDA;
480     V->ops->mdot                   = VecMDot_SeqCUDA;
481     V->ops->aypx                   = VecAYPX_SeqCUDA;
482     V->ops->waxpy                  = VecWAXPY_SeqCUDA;
483     V->ops->dotnorm2               = VecDotNorm2_SeqCUDA;
484     V->ops->placearray             = VecPlaceArray_SeqCUDA;
485     V->ops->replacearray           = VecReplaceArray_SeqCUDA;
486     V->ops->resetarray             = VecResetArray_SeqCUDA;
487     V->ops->destroy                = VecDestroy_SeqCUDA;
488     V->ops->duplicate              = VecDuplicate_SeqCUDA;
489     V->ops->conjugate              = VecConjugate_SeqCUDA;
490     V->ops->getlocalvector         = VecGetLocalVector_SeqCUDA;
491     V->ops->restorelocalvector     = VecRestoreLocalVector_SeqCUDA;
492     V->ops->getlocalvectorread     = VecGetLocalVector_SeqCUDA;
493     V->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;
494     V->ops->getarraywrite          = VecGetArrayWrite_SeqCUDA;
495   }
496   PetscFunctionReturn(0);
497 }
498 
VecCreate_SeqCUDA_Private(Vec V,const PetscScalar * array)499 PetscErrorCode VecCreate_SeqCUDA_Private(Vec V,const PetscScalar *array)
500 {
501   PetscErrorCode ierr;
502   Vec_CUDA       *veccuda;
503   PetscMPIInt    size;
504   PetscBool      option_set;
505 
506   PetscFunctionBegin;
507   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);CHKERRQ(ierr);
508   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQCUDA on more than one process");
509   ierr = VecCreate_Seq_Private(V,0);CHKERRQ(ierr);
510   ierr = PetscObjectChangeTypeName((PetscObject)V,VECSEQCUDA);CHKERRQ(ierr);
511   ierr = VecBindToCPU_SeqCUDA(V,PETSC_FALSE);CHKERRQ(ierr);
512   V->ops->bindtocpu = VecBindToCPU_SeqCUDA;
513 
514   /* Later, functions check for the Vec_CUDA structure existence, so do not create it without array */
515   if (array) {
516     if (!V->spptr) {
517       PetscReal pinned_memory_min;
518       ierr = PetscMalloc(sizeof(Vec_CUDA),&V->spptr);CHKERRQ(ierr);
519       veccuda = (Vec_CUDA*)V->spptr;
520       veccuda->stream = 0; /* using default stream */
521       veccuda->GPUarray_allocated = 0;
522       V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
523 
524       pinned_memory_min = 0;
525       /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
526          Note: This same code duplicated in VecCUDAAllocateCheck() and VecCreate_MPICUDA_Private(). Is there a good way to avoid this? */
527       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)V),((PetscObject)V)->prefix,"VECCUDA Options","Vec");CHKERRQ(ierr);
528       ierr = PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&option_set);CHKERRQ(ierr);
529       if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
530       ierr = PetscOptionsEnd();CHKERRQ(ierr);
531     }
532     veccuda = (Vec_CUDA*)V->spptr;
533     veccuda->GPUarray = (PetscScalar*)array;
534     V->offloadmask = PETSC_OFFLOAD_GPU;
535 
536   }
537   PetscFunctionReturn(0);
538 }
539