1 /*
2 Implementation of the sequential cuda vectors.
3
4 This file contains the code that can be compiled with a C
5 compiler. The companion file veccuda2.cu contains the code that
6 must be compiled with nvcc or a C++ compiler.
7 */
8
9 #define PETSC_SKIP_SPINLOCK
10
11 #include <petscconf.h>
12 #include <petsc/private/vecimpl.h> /*I <petscvec.h> I*/
13 #include <../src/vec/vec/impls/dvecimpl.h>
14 #include <petsc/private/cudavecimpl.h>
15
VecCUDAGetArrays_Private(Vec v,const PetscScalar ** x,const PetscScalar ** x_d,PetscOffloadMask * flg)16 PetscErrorCode VecCUDAGetArrays_Private(Vec v,const PetscScalar** x,const PetscScalar** x_d,PetscOffloadMask* flg)
17 {
18 PetscCheckTypeNames(v,VECSEQCUDA,VECMPICUDA);
19 PetscFunctionBegin;
20 if (x) {
21 Vec_Seq *h = (Vec_Seq*)v->data;
22
23 *x = h->array;
24 }
25 if (x_d) {
26 Vec_CUDA *d = (Vec_CUDA*)v->spptr;
27
28 *x_d = d ? d->GPUarray : NULL;
29 }
30 if (flg) *flg = v->offloadmask;
31 PetscFunctionReturn(0);
32 }
33
34 /*
35 Allocates space for the vector array on the Host if it does not exist.
36 Does NOT change the PetscCUDAFlag for the vector
37 Does NOT zero the CUDA array
38 */
VecCUDAAllocateCheckHost(Vec v)39 PetscErrorCode VecCUDAAllocateCheckHost(Vec v)
40 {
41 PetscErrorCode ierr;
42 PetscScalar *array;
43 Vec_Seq *s = (Vec_Seq*)v->data;
44 PetscInt n = v->map->n;
45
46 PetscFunctionBegin;
47 if (!s) {
48 ierr = PetscNewLog((PetscObject)v,&s);CHKERRQ(ierr);
49 v->data = s;
50 }
51 if (!s->array) {
52 if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
53 ierr = PetscMallocSetCUDAHost();CHKERRQ(ierr);
54 v->pinned_memory = PETSC_TRUE;
55 }
56 ierr = PetscMalloc1(n,&array);CHKERRQ(ierr);
57 ierr = PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));CHKERRQ(ierr);
58 s->array = array;
59 s->array_allocated = array;
60 if (n*sizeof(PetscScalar) > v->minimum_bytes_pinned_memory) {
61 ierr = PetscMallocResetCUDAHost();CHKERRQ(ierr);
62 }
63 if (v->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
64 v->offloadmask = PETSC_OFFLOAD_CPU;
65 }
66 }
67 PetscFunctionReturn(0);
68 }
69
VecCopy_SeqCUDA_Private(Vec xin,Vec yin)70 PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin)
71 {
72 PetscScalar *ya;
73 const PetscScalar *xa;
74 PetscErrorCode ierr;
75
76 PetscFunctionBegin;
77 ierr = VecCUDAAllocateCheckHost(xin);CHKERRQ(ierr);
78 ierr = VecCUDAAllocateCheckHost(yin);CHKERRQ(ierr);
79 if (xin != yin) {
80 ierr = VecGetArrayRead(xin,&xa);CHKERRQ(ierr);
81 ierr = VecGetArray(yin,&ya);CHKERRQ(ierr);
82 ierr = PetscArraycpy(ya,xa,xin->map->n);CHKERRQ(ierr);
83 ierr = VecRestoreArrayRead(xin,&xa);CHKERRQ(ierr);
84 ierr = VecRestoreArray(yin,&ya);CHKERRQ(ierr);
85 }
86 PetscFunctionReturn(0);
87 }
88
VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)89 PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)
90 {
91 PetscErrorCode ierr;
92 PetscInt n = xin->map->n,i;
93 PetscScalar *xx;
94
95 PetscFunctionBegin;
96 ierr = VecGetArray(xin,&xx);CHKERRQ(ierr);
97 for (i=0; i<n; i++) { ierr = PetscRandomGetValue(r,&xx[i]);CHKERRQ(ierr); }
98 ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
99 PetscFunctionReturn(0);
100 }
101
VecDestroy_SeqCUDA_Private(Vec v)102 PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v)
103 {
104 Vec_Seq *vs = (Vec_Seq*)v->data;
105 PetscErrorCode ierr;
106
107 PetscFunctionBegin;
108 ierr = PetscObjectSAWsViewOff(v);CHKERRQ(ierr);
109 #if defined(PETSC_USE_LOG)
110 PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
111 #endif
112 if (vs) {
113 if (vs->array_allocated) {
114 if (v->pinned_memory) {
115 ierr = PetscMallocSetCUDAHost();CHKERRQ(ierr);
116 }
117 ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr);
118 if (v->pinned_memory) {
119 ierr = PetscMallocResetCUDAHost();CHKERRQ(ierr);
120 v->pinned_memory = PETSC_FALSE;
121 }
122 }
123 ierr = PetscFree(vs);CHKERRQ(ierr);
124 }
125 PetscFunctionReturn(0);
126 }
127
VecResetArray_SeqCUDA_Private(Vec vin)128 PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin)
129 {
130 Vec_Seq *v = (Vec_Seq*)vin->data;
131
132 PetscFunctionBegin;
133 v->array = v->unplacedarray;
134 v->unplacedarray = 0;
135 PetscFunctionReturn(0);
136 }
137
VecCUDAAllocateCheck_Public(Vec v)138 PetscErrorCode VecCUDAAllocateCheck_Public(Vec v)
139 {
140 PetscErrorCode ierr;
141
142 PetscFunctionBegin;
143 ierr = VecCUDAAllocateCheck(v);CHKERRQ(ierr);
144 PetscFunctionReturn(0);
145 }
146
VecCUDACopyToGPU_Public(Vec v)147 PetscErrorCode VecCUDACopyToGPU_Public(Vec v)
148 {
149 PetscErrorCode ierr;
150
151 PetscFunctionBegin;
152 ierr = VecCUDACopyToGPU(v);CHKERRQ(ierr);
153 PetscFunctionReturn(0);
154 }
155
156 /*
157 VecCUDACopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector
158
159 Input Parameters:
160 + v - the vector
161 . ci - the requested indices, this should be created with CUDAIndicesCreate()
162 - mode - vec scatter mode used in VecScatterBegin/End
163 */
VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)164 PetscErrorCode VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)
165 {
166 PetscErrorCode ierr;
167
168 PetscFunctionBegin;
169 ierr = VecCUDACopyToGPUSome(v,ci,mode);CHKERRQ(ierr);
170 PetscFunctionReturn(0);
171 }
172
173 /*
174 VecCUDACopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector
175
176 Input Parameters:
177 + v - the vector
178 . ci - the requested indices, this should be created with CUDAIndicesCreate()
179 - mode - vec scatter mode used in VecScatterBegin/End
180 */
VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)181 PetscErrorCode VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci,ScatterMode mode)
182 {
183 PetscErrorCode ierr;
184
185 PetscFunctionBegin;
186 ierr = VecCUDACopyFromGPUSome(v,ci,mode);CHKERRQ(ierr);
187 PetscFunctionReturn(0);
188 }
189
VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)190 PetscErrorCode VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)
191 {
192 PetscErrorCode ierr;
193
194 PetscFunctionBegin;
195 ierr = VecSetRandom_SeqCUDA_Private(xin,r);CHKERRQ(ierr);
196 xin->offloadmask = PETSC_OFFLOAD_CPU;
197 PetscFunctionReturn(0);
198 }
199
VecResetArray_SeqCUDA(Vec vin)200 PetscErrorCode VecResetArray_SeqCUDA(Vec vin)
201 {
202 PetscErrorCode ierr;
203
204 PetscFunctionBegin;
205 ierr = VecCUDACopyFromGPU(vin);CHKERRQ(ierr);
206 ierr = VecResetArray_SeqCUDA_Private(vin);CHKERRQ(ierr);
207 vin->offloadmask = PETSC_OFFLOAD_CPU;
208 PetscFunctionReturn(0);
209 }
210
VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar * a)211 PetscErrorCode VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
212 {
213 PetscErrorCode ierr;
214
215 PetscFunctionBegin;
216 ierr = VecCUDACopyFromGPU(vin);CHKERRQ(ierr);
217 ierr = VecPlaceArray_Seq(vin,a);CHKERRQ(ierr);
218 vin->offloadmask = PETSC_OFFLOAD_CPU;
219 PetscFunctionReturn(0);
220 }
221
VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar * a)222 PetscErrorCode VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
223 {
224 PetscErrorCode ierr;
225 Vec_Seq *vs = (Vec_Seq*)vin->data;
226
227 PetscFunctionBegin;
228 if (vs->array != vs->array_allocated) {
229 /* make sure the users array has the latest values */
230 ierr = VecCUDACopyFromGPU(vin);CHKERRQ(ierr);
231 }
232 if (vs->array_allocated) {
233 if (vin->pinned_memory) {
234 ierr = PetscMallocSetCUDAHost();CHKERRQ(ierr);
235 }
236 ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr);
237 if (vin->pinned_memory) {
238 ierr = PetscMallocResetCUDAHost();CHKERRQ(ierr);
239 }
240 }
241 vin->pinned_memory = PETSC_FALSE;
242 vs->array_allocated = vs->array = (PetscScalar*)a;
243 vin->offloadmask = PETSC_OFFLOAD_CPU;
244 PetscFunctionReturn(0);
245 }
246
247 /*@
248 VecCreateSeqCUDA - Creates a standard, sequential array-style vector.
249
250 Collective
251
252 Input Parameter:
253 + comm - the communicator, should be PETSC_COMM_SELF
254 - n - the vector length
255
256 Output Parameter:
257 . v - the vector
258
259 Notes:
260 Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
261 same type as an existing vector.
262
263 Level: intermediate
264
265 .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
266 @*/
VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec * v)267 PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec *v)
268 {
269 PetscErrorCode ierr;
270
271 PetscFunctionBegin;
272 ierr = VecCreate(comm,v);CHKERRQ(ierr);
273 ierr = VecSetSizes(*v,n,n);CHKERRQ(ierr);
274 ierr = VecSetType(*v,VECSEQCUDA);CHKERRQ(ierr);
275 PetscFunctionReturn(0);
276 }
277
VecDuplicate_SeqCUDA(Vec win,Vec * V)278 PetscErrorCode VecDuplicate_SeqCUDA(Vec win,Vec *V)
279 {
280 PetscErrorCode ierr;
281
282 PetscFunctionBegin;
283 ierr = VecCreateSeqCUDA(PetscObjectComm((PetscObject)win),win->map->n,V);CHKERRQ(ierr);
284 ierr = PetscLayoutReference(win->map,&(*V)->map);CHKERRQ(ierr);
285 ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);CHKERRQ(ierr);
286 ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);CHKERRQ(ierr);
287 (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
288 PetscFunctionReturn(0);
289 }
290
VecCreate_SeqCUDA(Vec V)291 PetscErrorCode VecCreate_SeqCUDA(Vec V)
292 {
293 PetscErrorCode ierr;
294
295 PetscFunctionBegin;
296 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
297 ierr = PetscLayoutSetUp(V->map);CHKERRQ(ierr);
298 ierr = VecCUDAAllocateCheck(V);CHKERRQ(ierr);
299 ierr = VecCreate_SeqCUDA_Private(V,((Vec_CUDA*)V->spptr)->GPUarray_allocated);CHKERRQ(ierr);
300 ierr = VecCUDAAllocateCheckHost(V);CHKERRQ(ierr);
301 ierr = VecSet(V,0.0);CHKERRQ(ierr);
302 ierr = VecSet_Seq(V,0.0);CHKERRQ(ierr);
303 V->offloadmask = PETSC_OFFLOAD_BOTH;
304 PetscFunctionReturn(0);
305 }
306
307 /*@C
308 VecCreateSeqCUDAWithArray - Creates a CUDA sequential array-style vector,
309 where the user provides the array space to store the vector values. The array
310 provided must be a GPU array.
311
312 Collective
313
314 Input Parameter:
315 + comm - the communicator, should be PETSC_COMM_SELF
316 . bs - the block size
317 . n - the vector length
318 - array - GPU memory where the vector elements are to be stored.
319
320 Output Parameter:
321 . V - the vector
322
323 Notes:
324 Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
325 same type as an existing vector.
326
327 If the user-provided array is NULL, then VecCUDAPlaceArray() can be used
328 at a later stage to SET the array for storing the vector values.
329
330 PETSc does NOT free the array when the vector is destroyed via VecDestroy().
331 The user should not free the array until the vector is destroyed.
332
333 Level: intermediate
334
335 .seealso: VecCreateMPICUDAWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
336 VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
337 VecCreateMPIWithArray()
338 @*/
VecCreateSeqCUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec * V)339 PetscErrorCode VecCreateSeqCUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
340 {
341 PetscErrorCode ierr;
342
343 PetscFunctionBegin;
344 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
345 ierr = VecCreate(comm,V);CHKERRQ(ierr);
346 ierr = VecSetSizes(*V,n,n);CHKERRQ(ierr);
347 ierr = VecSetBlockSize(*V,bs);CHKERRQ(ierr);
348 ierr = VecCreate_SeqCUDA_Private(*V,array);CHKERRQ(ierr);
349 PetscFunctionReturn(0);
350 }
351
352 /*@C
353 VecCreateSeqCUDAWithArrays - Creates a CUDA sequential array-style vector,
354 where the user provides the array space to store the vector values.
355
356 Collective
357
358 Input Parameter:
359 + comm - the communicator, should be PETSC_COMM_SELF
360 . bs - the block size
361 . n - the vector length
362 - cpuarray - CPU memory where the vector elements are to be stored.
363 - gpuarray - GPU memory where the vector elements are to be stored.
364
365 Output Parameter:
366 . V - the vector
367
368 Notes:
369 If both cpuarray and gpuarray are provided, the caller must ensure that
370 the provided arrays have identical values.
371
372 PETSc does NOT free the provided arrays when the vector is destroyed via
373 VecDestroy(). The user should not free the array until the vector is
374 destroyed.
375
376 Level: intermediate
377
378 .seealso: VecCreateMPICUDAWithArrays(), VecCreate(), VecCreateSeqWithArray(),
379 VecCUDAPlaceArray(), VecCreateSeqCUDAWithArray(),
380 VecCUDAAllocateCheckHost()
381 @*/
VecCreateSeqCUDAWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec * V)382 PetscErrorCode VecCreateSeqCUDAWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *V)
383 {
384 PetscErrorCode ierr;
385
386 PetscFunctionBegin;
387 // set V's gpuarray to be gpuarray, do not allocate memory on host yet.
388 ierr = VecCreateSeqCUDAWithArray(comm,bs,n,gpuarray,V);CHKERRQ(ierr);
389
390 if (cpuarray && gpuarray) {
391 Vec_Seq *s = (Vec_Seq*)((*V)->data);
392 s->array = (PetscScalar*)cpuarray;
393 (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
394 } else if (cpuarray) {
395 Vec_Seq *s = (Vec_Seq*)((*V)->data);
396 s->array = (PetscScalar*)cpuarray;
397 (*V)->offloadmask = PETSC_OFFLOAD_CPU;
398 } else if (gpuarray) {
399 (*V)->offloadmask = PETSC_OFFLOAD_GPU;
400 } else {
401 (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
402 }
403
404 PetscFunctionReturn(0);
405 }
406
VecGetArrayWrite_SeqCUDA(Vec v,PetscScalar ** vv)407 PetscErrorCode VecGetArrayWrite_SeqCUDA(Vec v,PetscScalar **vv)
408 {
409 PetscErrorCode ierr;
410
411 PetscFunctionBegin;
412 ierr = VecCUDAAllocateCheckHost(v);CHKERRQ(ierr);
413 v->offloadmask = PETSC_OFFLOAD_CPU;
414 *vv = *((PetscScalar**)v->data);
415 PetscFunctionReturn(0);
416 }
417
VecBindToCPU_SeqCUDA(Vec V,PetscBool pin)418 PetscErrorCode VecBindToCPU_SeqCUDA(Vec V,PetscBool pin)
419 {
420 PetscErrorCode ierr;
421
422 PetscFunctionBegin;
423 V->boundtocpu = pin;
424 if (pin) {
425 ierr = VecCUDACopyFromGPU(V);CHKERRQ(ierr);
426 V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
427 V->ops->dot = VecDot_Seq;
428 V->ops->norm = VecNorm_Seq;
429 V->ops->tdot = VecTDot_Seq;
430 V->ops->scale = VecScale_Seq;
431 V->ops->copy = VecCopy_Seq;
432 V->ops->set = VecSet_Seq;
433 V->ops->swap = VecSwap_Seq;
434 V->ops->axpy = VecAXPY_Seq;
435 V->ops->axpby = VecAXPBY_Seq;
436 V->ops->axpbypcz = VecAXPBYPCZ_Seq;
437 V->ops->pointwisemult = VecPointwiseMult_Seq;
438 V->ops->pointwisedivide = VecPointwiseDivide_Seq;
439 V->ops->setrandom = VecSetRandom_Seq;
440 V->ops->dot_local = VecDot_Seq;
441 V->ops->tdot_local = VecTDot_Seq;
442 V->ops->norm_local = VecNorm_Seq;
443 V->ops->mdot_local = VecMDot_Seq;
444 V->ops->mtdot_local = VecMTDot_Seq;
445 V->ops->maxpy = VecMAXPY_Seq;
446 V->ops->mdot = VecMDot_Seq;
447 V->ops->mtdot = VecMTDot_Seq;
448 V->ops->aypx = VecAYPX_Seq;
449 V->ops->waxpy = VecWAXPY_Seq;
450 V->ops->dotnorm2 = NULL;
451 V->ops->placearray = VecPlaceArray_Seq;
452 V->ops->replacearray = VecReplaceArray_SeqCUDA;
453 V->ops->resetarray = VecResetArray_Seq;
454 V->ops->duplicate = VecDuplicate_Seq;
455 V->ops->conjugate = VecConjugate_Seq;
456 V->ops->getlocalvector = NULL;
457 V->ops->restorelocalvector = NULL;
458 V->ops->getlocalvectorread = NULL;
459 V->ops->restorelocalvectorread = NULL;
460 V->ops->getarraywrite = NULL;
461 } else {
462 V->ops->dot = VecDot_SeqCUDA;
463 V->ops->norm = VecNorm_SeqCUDA;
464 V->ops->tdot = VecTDot_SeqCUDA;
465 V->ops->scale = VecScale_SeqCUDA;
466 V->ops->copy = VecCopy_SeqCUDA;
467 V->ops->set = VecSet_SeqCUDA;
468 V->ops->swap = VecSwap_SeqCUDA;
469 V->ops->axpy = VecAXPY_SeqCUDA;
470 V->ops->axpby = VecAXPBY_SeqCUDA;
471 V->ops->axpbypcz = VecAXPBYPCZ_SeqCUDA;
472 V->ops->pointwisemult = VecPointwiseMult_SeqCUDA;
473 V->ops->pointwisedivide = VecPointwiseDivide_SeqCUDA;
474 V->ops->setrandom = VecSetRandom_SeqCUDA;
475 V->ops->dot_local = VecDot_SeqCUDA;
476 V->ops->tdot_local = VecTDot_SeqCUDA;
477 V->ops->norm_local = VecNorm_SeqCUDA;
478 V->ops->mdot_local = VecMDot_SeqCUDA;
479 V->ops->maxpy = VecMAXPY_SeqCUDA;
480 V->ops->mdot = VecMDot_SeqCUDA;
481 V->ops->aypx = VecAYPX_SeqCUDA;
482 V->ops->waxpy = VecWAXPY_SeqCUDA;
483 V->ops->dotnorm2 = VecDotNorm2_SeqCUDA;
484 V->ops->placearray = VecPlaceArray_SeqCUDA;
485 V->ops->replacearray = VecReplaceArray_SeqCUDA;
486 V->ops->resetarray = VecResetArray_SeqCUDA;
487 V->ops->destroy = VecDestroy_SeqCUDA;
488 V->ops->duplicate = VecDuplicate_SeqCUDA;
489 V->ops->conjugate = VecConjugate_SeqCUDA;
490 V->ops->getlocalvector = VecGetLocalVector_SeqCUDA;
491 V->ops->restorelocalvector = VecRestoreLocalVector_SeqCUDA;
492 V->ops->getlocalvectorread = VecGetLocalVector_SeqCUDA;
493 V->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;
494 V->ops->getarraywrite = VecGetArrayWrite_SeqCUDA;
495 }
496 PetscFunctionReturn(0);
497 }
498
VecCreate_SeqCUDA_Private(Vec V,const PetscScalar * array)499 PetscErrorCode VecCreate_SeqCUDA_Private(Vec V,const PetscScalar *array)
500 {
501 PetscErrorCode ierr;
502 Vec_CUDA *veccuda;
503 PetscMPIInt size;
504 PetscBool option_set;
505
506 PetscFunctionBegin;
507 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);CHKERRQ(ierr);
508 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQCUDA on more than one process");
509 ierr = VecCreate_Seq_Private(V,0);CHKERRQ(ierr);
510 ierr = PetscObjectChangeTypeName((PetscObject)V,VECSEQCUDA);CHKERRQ(ierr);
511 ierr = VecBindToCPU_SeqCUDA(V,PETSC_FALSE);CHKERRQ(ierr);
512 V->ops->bindtocpu = VecBindToCPU_SeqCUDA;
513
514 /* Later, functions check for the Vec_CUDA structure existence, so do not create it without array */
515 if (array) {
516 if (!V->spptr) {
517 PetscReal pinned_memory_min;
518 ierr = PetscMalloc(sizeof(Vec_CUDA),&V->spptr);CHKERRQ(ierr);
519 veccuda = (Vec_CUDA*)V->spptr;
520 veccuda->stream = 0; /* using default stream */
521 veccuda->GPUarray_allocated = 0;
522 V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
523
524 pinned_memory_min = 0;
525 /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
526 Note: This same code duplicated in VecCUDAAllocateCheck() and VecCreate_MPICUDA_Private(). Is there a good way to avoid this? */
527 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)V),((PetscObject)V)->prefix,"VECCUDA Options","Vec");CHKERRQ(ierr);
528 ierr = PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&option_set);CHKERRQ(ierr);
529 if (option_set) V->minimum_bytes_pinned_memory = pinned_memory_min;
530 ierr = PetscOptionsEnd();CHKERRQ(ierr);
531 }
532 veccuda = (Vec_CUDA*)V->spptr;
533 veccuda->GPUarray = (PetscScalar*)array;
534 V->offloadmask = PETSC_OFFLOAD_GPU;
535
536 }
537 PetscFunctionReturn(0);
538 }
539