1 /*
2 Implements the sequential ViennaCL vectors.
3 */
4
5 #include <petscconf.h>
6 #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
7 #include <../src/vec/vec/impls/dvecimpl.h>
8 #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
9
10 #include <vector>
11
12 #include "viennacl/linalg/inner_prod.hpp"
13 #include "viennacl/linalg/norm_1.hpp"
14 #include "viennacl/linalg/norm_2.hpp"
15 #include "viennacl/linalg/norm_inf.hpp"
16
17 #ifdef VIENNACL_WITH_OPENCL
18 #include "viennacl/ocl/backend.hpp"
19 #endif
20
21
VecViennaCLGetArray(Vec v,ViennaCLVector ** a)22 PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
23 {
24 PetscErrorCode ierr;
25
26 PetscFunctionBegin;
27 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
28 *a = 0;
29 ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
30 *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
31 ViennaCLWaitForGPU();
32 PetscFunctionReturn(0);
33 }
34
VecViennaCLRestoreArray(Vec v,ViennaCLVector ** a)35 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
36 {
37 PetscErrorCode ierr;
38
39 PetscFunctionBegin;
40 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
41 v->offloadmask = PETSC_OFFLOAD_GPU;
42
43 ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr);
44 PetscFunctionReturn(0);
45 }
46
VecViennaCLGetArrayRead(Vec v,const ViennaCLVector ** a)47 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
48 {
49 PetscErrorCode ierr;
50
51 PetscFunctionBegin;
52 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
53 *a = 0;
54 ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
55 *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56 ViennaCLWaitForGPU();
57 PetscFunctionReturn(0);
58 }
59
VecViennaCLRestoreArrayRead(Vec v,const ViennaCLVector ** a)60 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61 {
62 PetscFunctionBegin;
63 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
64 PetscFunctionReturn(0);
65 }
66
VecViennaCLGetArrayWrite(Vec v,ViennaCLVector ** a)67 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
68 {
69 PetscErrorCode ierr;
70
71 PetscFunctionBegin;
72 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
73 *a = 0;
74 ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
75 *a = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76 ViennaCLWaitForGPU();
77 PetscFunctionReturn(0);
78 }
79
VecViennaCLRestoreArrayWrite(Vec v,ViennaCLVector ** a)80 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
81 {
82 PetscErrorCode ierr;
83
84 PetscFunctionBegin;
85 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
86 v->offloadmask = PETSC_OFFLOAD_GPU;
87
88 ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr);
89 PetscFunctionReturn(0);
90 }
91
92
93
PetscViennaCLInit()94 PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
95 {
96 PetscErrorCode ierr;
97 char string[20];
98 PetscBool flg,flg_cuda,flg_opencl,flg_openmp;
99
100 PetscFunctionBegin;
101 /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102 ierr = PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);CHKERRQ(ierr);
103 if (flg) {
104 try {
105 ierr = PetscStrcasecmp(string,"cuda",&flg_cuda);CHKERRQ(ierr);
106 ierr = PetscStrcasecmp(string,"opencl",&flg_opencl);CHKERRQ(ierr);
107 ierr = PetscStrcasecmp(string,"openmp",&flg_openmp);CHKERRQ(ierr);
108
109 /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110 if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111 #if defined(PETSC_HAVE_CUDA)
112 else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113 #endif
114 #if defined(PETSC_HAVE_OPENCL)
115 else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116 #endif
117 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118 } catch (std::exception const & ex) {
119 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120 }
121 }
122
123 #if defined(PETSC_HAVE_OPENCL)
124 /* ViennaCL OpenCL device type configuration */
125 ierr = PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);CHKERRQ(ierr);
126 if (flg) {
127 try {
128 ierr = PetscStrcasecmp(string,"cpu",&flg);CHKERRQ(ierr);
129 if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
130
131 ierr = PetscStrcasecmp(string,"gpu",&flg);CHKERRQ(ierr);
132 if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
133
134 ierr = PetscStrcasecmp(string,"accelerator",&flg);CHKERRQ(ierr);
135 if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136 } catch (std::exception const & ex) {
137 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138 }
139 }
140 #endif
141
142 /* Print available backends */
143 ierr = PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);CHKERRQ(ierr);
144 if (flg) {
145 ierr = PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");CHKERRQ(ierr);
146 #if defined(PETSC_HAVE_CUDA)
147 ierr = PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");CHKERRQ(ierr);
148 #endif
149 #if defined(PETSC_HAVE_OPENCL)
150 ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");CHKERRQ(ierr);
151 #endif
152 #if defined(PETSC_HAVE_OPENMP)
153 ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");CHKERRQ(ierr);
154 #else
155 ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");CHKERRQ(ierr);
156 #endif
157 ierr = PetscPrintf(PETSC_COMM_WORLD, "\n");CHKERRQ(ierr);
158
159 /* Print selected backends */
160 ierr = PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: ");CHKERRQ(ierr);
161 #if defined(PETSC_HAVE_CUDA)
162 if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163 ierr = PetscPrintf(PETSC_COMM_WORLD, "CUDA ");CHKERRQ(ierr);
164 }
165 #endif
166 #if defined(PETSC_HAVE_OPENCL)
167 if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168 ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");CHKERRQ(ierr);
169 }
170 #endif
171 #if defined(PETSC_HAVE_OPENMP)
172 if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173 ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");CHKERRQ(ierr);
174 }
175 #else
176 if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177 ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");CHKERRQ(ierr);
178 }
179 #endif
180 ierr = PetscPrintf(PETSC_COMM_WORLD, "\n");CHKERRQ(ierr);
181 }
182 PetscFunctionReturn(0);
183 }
184
185 /*
186 Allocates space for the vector array on the Host if it does not exist.
187 Does NOT change the PetscViennaCLFlag for the vector
188 Does NOT zero the ViennaCL array
189 */
VecViennaCLAllocateCheckHost(Vec v)190 PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191 {
192 PetscErrorCode ierr;
193 PetscScalar *array;
194 Vec_Seq *s;
195 PetscInt n = v->map->n;
196
197 PetscFunctionBegin;
198 s = (Vec_Seq*)v->data;
199 ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
200 if (s->array == 0) {
201 ierr = PetscMalloc1(n,&array);CHKERRQ(ierr);
202 ierr = PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));CHKERRQ(ierr);
203 s->array = array;
204 s->array_allocated = array;
205 }
206 PetscFunctionReturn(0);
207 }
208
209
210 /*
211 Allocates space for the vector array on the GPU if it does not exist.
212 Does NOT change the PetscViennaCLFlag for the vector
213 Does NOT zero the ViennaCL array
214
215 */
VecViennaCLAllocateCheck(Vec v)216 PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217 {
218 PetscFunctionBegin;
219 if (!v->spptr) {
220 try {
221 v->spptr = new Vec_ViennaCL;
222 ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
223 ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
224
225 } catch(std::exception const & ex) {
226 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
227 }
228 }
229 PetscFunctionReturn(0);
230 }
231
232
233 /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
VecViennaCLCopyToGPU(Vec v)234 PetscErrorCode VecViennaCLCopyToGPU(Vec v)
235 {
236 PetscErrorCode ierr;
237
238 PetscFunctionBegin;
239 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
240 ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
241 if (v->map->n > 0) {
242 if (v->offloadmask == PETSC_OFFLOAD_CPU) {
243 ierr = PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
244 try {
245 ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
246 viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
247 ViennaCLWaitForGPU();
248 } catch(std::exception const & ex) {
249 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
250 }
251 ierr = PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));CHKERRQ(ierr);
252 ierr = PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
253 v->offloadmask = PETSC_OFFLOAD_BOTH;
254 }
255 }
256 PetscFunctionReturn(0);
257 }
258
259
260
261 /*
262 VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
263 */
VecViennaCLCopyFromGPU(Vec v)264 PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
265 {
266 PetscErrorCode ierr;
267
268 PetscFunctionBegin;
269 PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
270 ierr = VecViennaCLAllocateCheckHost(v);CHKERRQ(ierr);
271 if (v->offloadmask == PETSC_OFFLOAD_GPU) {
272 ierr = PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
273 try {
274 ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
275 viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
276 ViennaCLWaitForGPU();
277 } catch(std::exception const & ex) {
278 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
279 }
280 ierr = PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));CHKERRQ(ierr);
281 ierr = PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
282 v->offloadmask = PETSC_OFFLOAD_BOTH;
283 }
284 PetscFunctionReturn(0);
285 }
286
287
288 /* Copy on CPU */
VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)289 static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
290 {
291 PetscScalar *ya;
292 const PetscScalar *xa;
293 PetscErrorCode ierr;
294
295 PetscFunctionBegin;
296 ierr = VecViennaCLAllocateCheckHost(xin);CHKERRQ(ierr);
297 ierr = VecViennaCLAllocateCheckHost(yin);CHKERRQ(ierr);
298 if (xin != yin) {
299 ierr = VecGetArrayRead(xin,&xa);CHKERRQ(ierr);
300 ierr = VecGetArray(yin,&ya);CHKERRQ(ierr);
301 ierr = PetscArraycpy(ya,xa,xin->map->n);CHKERRQ(ierr);
302 ierr = VecRestoreArrayRead(xin,&xa);CHKERRQ(ierr);
303 ierr = VecRestoreArray(yin,&ya);CHKERRQ(ierr);
304 }
305 PetscFunctionReturn(0);
306 }
307
VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)308 static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
309 {
310 PetscErrorCode ierr;
311 PetscInt n = xin->map->n,i;
312 PetscScalar *xx;
313
314 PetscFunctionBegin;
315 ierr = VecGetArray(xin,&xx);CHKERRQ(ierr);
316 for (i=0; i<n; i++) {ierr = PetscRandomGetValue(r,&xx[i]);CHKERRQ(ierr);}
317 ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
318 PetscFunctionReturn(0);
319 }
320
VecDestroy_SeqViennaCL_Private(Vec v)321 static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
322 {
323 Vec_Seq *vs = (Vec_Seq*)v->data;
324 PetscErrorCode ierr;
325
326 PetscFunctionBegin;
327 ierr = PetscObjectSAWsViewOff(v);CHKERRQ(ierr);
328 #if defined(PETSC_USE_LOG)
329 PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
330 #endif
331 if (vs->array_allocated) { ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr); }
332 ierr = PetscFree(vs);CHKERRQ(ierr);
333 PetscFunctionReturn(0);
334 }
335
VecResetArray_SeqViennaCL_Private(Vec vin)336 static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
337 {
338 Vec_Seq *v = (Vec_Seq*)vin->data;
339
340 PetscFunctionBegin;
341 v->array = v->unplacedarray;
342 v->unplacedarray = 0;
343 PetscFunctionReturn(0);
344 }
345
346
347 /*MC
348 VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
349
350 Options Database Keys:
351 . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
352
353 Level: beginner
354
355 .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
356 M*/
357
358
VecAYPX_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)359 PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
360 {
361 const ViennaCLVector *xgpu;
362 ViennaCLVector *ygpu;
363 PetscErrorCode ierr;
364
365 PetscFunctionBegin;
366 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
367 ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
368 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
369 try {
370 if (alpha != 0.0 && xin->map->n > 0) {
371 *ygpu = *xgpu + alpha * *ygpu;
372 ierr = PetscLogGpuFlops(2.0*yin->map->n);CHKERRQ(ierr);
373 } else {
374 *ygpu = *xgpu;
375 }
376 ViennaCLWaitForGPU();
377 } catch(std::exception const & ex) {
378 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
379 }
380 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
381 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
382 ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
383 PetscFunctionReturn(0);
384 }
385
386
VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)387 PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388 {
389 const ViennaCLVector *xgpu;
390 ViennaCLVector *ygpu;
391 PetscErrorCode ierr;
392
393 PetscFunctionBegin;
394 if (alpha != 0.0 && xin->map->n > 0) {
395 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
396 ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
397 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
398 try {
399 *ygpu += alpha * *xgpu;
400 ViennaCLWaitForGPU();
401 } catch(std::exception const & ex) {
402 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
403 }
404 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
405 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
406 ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
407 ierr = PetscLogGpuFlops(2.0*yin->map->n);CHKERRQ(ierr);
408 }
409 PetscFunctionReturn(0);
410 }
411
412
VecPointwiseDivide_SeqViennaCL(Vec win,Vec xin,Vec yin)413 PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
414 {
415 const ViennaCLVector *xgpu,*ygpu;
416 ViennaCLVector *wgpu;
417 PetscErrorCode ierr;
418
419 PetscFunctionBegin;
420 if (xin->map->n > 0) {
421 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
422 ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
423 ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
424 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
425 try {
426 *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
427 ViennaCLWaitForGPU();
428 } catch(std::exception const & ex) {
429 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
430 }
431 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
432 ierr = PetscLogGpuFlops(win->map->n);CHKERRQ(ierr);
433 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
434 ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
435 ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
436 }
437 PetscFunctionReturn(0);
438 }
439
440
VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin,Vec yin)441 PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
442 {
443 const ViennaCLVector *xgpu,*ygpu;
444 ViennaCLVector *wgpu;
445 PetscErrorCode ierr;
446
447 PetscFunctionBegin;
448 if (alpha == 0.0 && xin->map->n > 0) {
449 ierr = VecCopy_SeqViennaCL(yin,win);CHKERRQ(ierr);
450 } else {
451 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
452 ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
453 ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
454 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
455 if (alpha == 1.0) {
456 try {
457 *wgpu = *ygpu + *xgpu;
458 } catch(std::exception const & ex) {
459 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
460 }
461 ierr = PetscLogGpuFlops(win->map->n);CHKERRQ(ierr);
462 } else if (alpha == -1.0) {
463 try {
464 *wgpu = *ygpu - *xgpu;
465 } catch(std::exception const & ex) {
466 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
467 }
468 ierr = PetscLogGpuFlops(win->map->n);CHKERRQ(ierr);
469 } else {
470 try {
471 *wgpu = *ygpu + alpha * *xgpu;
472 } catch(std::exception const & ex) {
473 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
474 }
475 ierr = PetscLogGpuFlops(2*win->map->n);CHKERRQ(ierr);
476 }
477 ViennaCLWaitForGPU();
478 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
479 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
480 ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
481 ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
482 }
483 PetscFunctionReturn(0);
484 }
485
486
487 /*
488 * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
489 *
490 * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
491 * hence there is an iterated application of these until the final result is obtained
492 */
VecMAXPY_SeqViennaCL(Vec xin,PetscInt nv,const PetscScalar * alpha,Vec * y)493 PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
494 {
495 PetscErrorCode ierr;
496 PetscInt j;
497
498 PetscFunctionBegin;
499 for (j = 0; j < nv; ++j) {
500 if (j+1 < nv) {
501 VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
502 ++j;
503 } else {
504 ierr = VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);CHKERRQ(ierr);
505 }
506 }
507 ViennaCLWaitForGPU();
508 PetscFunctionReturn(0);
509 }
510
511
VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)512 PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
513 {
514 const ViennaCLVector *xgpu,*ygpu;
515 PetscErrorCode ierr;
516
517 PetscFunctionBegin;
518 if (xin->map->n > 0) {
519 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
520 ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
521 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
522 try {
523 *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
524 } catch(std::exception const & ex) {
525 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
526 }
527 ViennaCLWaitForGPU();
528 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
529 if (xin->map->n >0) {
530 ierr = PetscLogGpuFlops(2.0*xin->map->n-1);CHKERRQ(ierr);
531 }
532 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
533 ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
534 } else *z = 0.0;
535 PetscFunctionReturn(0);
536 }
537
538
539
540 /*
541 * Operation z[j] = dot(x, y[j])
542 *
543 * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
544 */
VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)545 PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
546 {
547 PetscErrorCode ierr;
548 PetscInt n = xin->map->n,i;
549 const ViennaCLVector *xgpu,*ygpu;
550 Vec *yyin = (Vec*)yin;
551 std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
552
553 PetscFunctionBegin;
554 if (xin->map->n > 0) {
555 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
556 for (i=0; i<nv; i++) {
557 ierr = VecViennaCLGetArrayRead(yyin[i],&ygpu);CHKERRQ(ierr);
558 ygpu_array[i] = ygpu;
559 }
560 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
561 viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
562 ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
563 viennacl::copy(result.begin(), result.end(), z);
564 for (i=0; i<nv; i++) {
565 ierr = VecViennaCLRestoreArrayRead(yyin[i],&ygpu);CHKERRQ(ierr);
566 }
567 ViennaCLWaitForGPU();
568 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
569 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
570 ierr = PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));CHKERRQ(ierr);
571 } else {
572 for (i=0; i<nv; i++) z[i] = 0.0;
573 }
574 PetscFunctionReturn(0);
575 }
576
VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)577 PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
578 {
579 PetscErrorCode ierr;
580
581 PetscFunctionBegin;
582 /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
583 ierr = VecMDot_SeqViennaCL(xin,nv,yin,z);CHKERRQ(ierr);
584 ViennaCLWaitForGPU();
585 PetscFunctionReturn(0);
586 }
587
588
VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)589 PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
590 {
591 ViennaCLVector *xgpu;
592 PetscErrorCode ierr;
593
594 PetscFunctionBegin;
595 if (xin->map->n > 0) {
596 ierr = VecViennaCLGetArrayWrite(xin,&xgpu);CHKERRQ(ierr);
597 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
598 try {
599 *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
600 ViennaCLWaitForGPU();
601 } catch(std::exception const & ex) {
602 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
603 }
604 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
605 ierr = VecViennaCLRestoreArrayWrite(xin,&xgpu);CHKERRQ(ierr);
606 }
607 PetscFunctionReturn(0);
608 }
609
VecScale_SeqViennaCL(Vec xin,PetscScalar alpha)610 PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
611 {
612 ViennaCLVector *xgpu;
613 PetscErrorCode ierr;
614
615 PetscFunctionBegin;
616 if (alpha == 0.0 && xin->map->n > 0) {
617 ierr = VecSet_SeqViennaCL(xin,alpha);CHKERRQ(ierr);
618 ierr = PetscLogGpuFlops(xin->map->n);CHKERRQ(ierr);
619 } else if (alpha != 1.0 && xin->map->n > 0) {
620 ierr = VecViennaCLGetArray(xin,&xgpu);CHKERRQ(ierr);
621 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
622 try {
623 *xgpu *= alpha;
624 ViennaCLWaitForGPU();
625 } catch(std::exception const & ex) {
626 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
627 }
628 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
629 ierr = VecViennaCLRestoreArray(xin,&xgpu);CHKERRQ(ierr);
630 ierr = PetscLogGpuFlops(xin->map->n);CHKERRQ(ierr);
631 }
632 PetscFunctionReturn(0);
633 }
634
635
VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)636 PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
637 {
638 PetscErrorCode ierr;
639
640 PetscFunctionBegin;
641 /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
642 ierr = VecDot_SeqViennaCL(xin, yin, z);CHKERRQ(ierr);
643 ViennaCLWaitForGPU();
644 PetscFunctionReturn(0);
645 }
646
647
VecCopy_SeqViennaCL(Vec xin,Vec yin)648 PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
649 {
650 const ViennaCLVector *xgpu;
651 ViennaCLVector *ygpu;
652 PetscErrorCode ierr;
653
654 PetscFunctionBegin;
655 if (xin != yin && xin->map->n > 0) {
656 if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
657 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
658 ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
659 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
660 try {
661 *ygpu = *xgpu;
662 ViennaCLWaitForGPU();
663 } catch(std::exception const & ex) {
664 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
665 }
666 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
667 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
668 ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
669
670 } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
671 /* copy in CPU if we are on the CPU*/
672 ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
673 ViennaCLWaitForGPU();
674 } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
675 /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
676 if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
677 /* copy in CPU */
678 ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
679 ViennaCLWaitForGPU();
680 } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
681 /* copy in GPU */
682 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
683 ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
684 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
685 try {
686 *ygpu = *xgpu;
687 ViennaCLWaitForGPU();
688 } catch(std::exception const & ex) {
689 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
690 }
691 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
692 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
693 ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
694 } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
695 /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
696 default to copy in GPU (this is an arbitrary choice) */
697 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
698 ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
699 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
700 try {
701 *ygpu = *xgpu;
702 ViennaCLWaitForGPU();
703 } catch(std::exception const & ex) {
704 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
705 }
706 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
707 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
708 ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
709 } else {
710 ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
711 ViennaCLWaitForGPU();
712 }
713 }
714 }
715 PetscFunctionReturn(0);
716 }
717
718
VecSwap_SeqViennaCL(Vec xin,Vec yin)719 PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
720 {
721 PetscErrorCode ierr;
722 ViennaCLVector *xgpu,*ygpu;
723
724 PetscFunctionBegin;
725 if (xin != yin && xin->map->n > 0) {
726 ierr = VecViennaCLGetArray(xin,&xgpu);CHKERRQ(ierr);
727 ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
728 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
729 try {
730 viennacl::swap(*xgpu, *ygpu);
731 ViennaCLWaitForGPU();
732 } catch(std::exception const & ex) {
733 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
734 }
735 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
736 ierr = VecViennaCLRestoreArray(xin,&xgpu);CHKERRQ(ierr);
737 ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
738 }
739 PetscFunctionReturn(0);
740 }
741
742
743 // y = alpha * x + beta * y
VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)744 PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
745 {
746 PetscErrorCode ierr;
747 PetscScalar a = alpha,b = beta;
748 const ViennaCLVector *xgpu;
749 ViennaCLVector *ygpu;
750
751 PetscFunctionBegin;
752 if (a == 0.0 && xin->map->n > 0) {
753 ierr = VecScale_SeqViennaCL(yin,beta);CHKERRQ(ierr);
754 } else if (b == 1.0 && xin->map->n > 0) {
755 ierr = VecAXPY_SeqViennaCL(yin,alpha,xin);CHKERRQ(ierr);
756 } else if (a == 1.0 && xin->map->n > 0) {
757 ierr = VecAYPX_SeqViennaCL(yin,beta,xin);CHKERRQ(ierr);
758 } else if (b == 0.0 && xin->map->n > 0) {
759 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
760 ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
761 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
762 try {
763 *ygpu = *xgpu * alpha;
764 ViennaCLWaitForGPU();
765 } catch(std::exception const & ex) {
766 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
767 }
768 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
769 ierr = PetscLogGpuFlops(xin->map->n);CHKERRQ(ierr);
770 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
771 ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
772 } else if (xin->map->n > 0) {
773 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
774 ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
775 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
776 try {
777 *ygpu = *xgpu * alpha + *ygpu * beta;
778 ViennaCLWaitForGPU();
779 } catch(std::exception const & ex) {
780 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
781 }
782 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
783 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
784 ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
785 ierr = PetscLogGpuFlops(3.0*xin->map->n);CHKERRQ(ierr);
786 }
787 PetscFunctionReturn(0);
788 }
789
790
791 /* operation z = alpha * x + beta *y + gamma *z*/
VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)792 PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
793 {
794 PetscErrorCode ierr;
795 PetscInt n = zin->map->n;
796 const ViennaCLVector *xgpu,*ygpu;
797 ViennaCLVector *zgpu;
798
799 PetscFunctionBegin;
800 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
801 ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
802 ierr = VecViennaCLGetArray(zin,&zgpu);CHKERRQ(ierr);
803 if (alpha == 0.0 && xin->map->n > 0) {
804 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
805 try {
806 if (beta == 0.0) {
807 *zgpu = gamma * *zgpu;
808 ViennaCLWaitForGPU();
809 ierr = PetscLogGpuFlops(1.0*n);CHKERRQ(ierr);
810 } else if (gamma == 0.0) {
811 *zgpu = beta * *ygpu;
812 ViennaCLWaitForGPU();
813 ierr = PetscLogGpuFlops(1.0*n);CHKERRQ(ierr);
814 } else {
815 *zgpu = beta * *ygpu + gamma * *zgpu;
816 ViennaCLWaitForGPU();
817 ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
818 }
819 } catch(std::exception const & ex) {
820 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
821 }
822 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
823 ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
824 } else if (beta == 0.0 && xin->map->n > 0) {
825 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
826 try {
827 if (gamma == 0.0) {
828 *zgpu = alpha * *xgpu;
829 ViennaCLWaitForGPU();
830 ierr = PetscLogGpuFlops(1.0*n);CHKERRQ(ierr);
831 } else {
832 *zgpu = alpha * *xgpu + gamma * *zgpu;
833 ViennaCLWaitForGPU();
834 ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
835 }
836 } catch(std::exception const & ex) {
837 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
838 }
839 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
840 } else if (gamma == 0.0 && xin->map->n > 0) {
841 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
842 try {
843 *zgpu = alpha * *xgpu + beta * *ygpu;
844 ViennaCLWaitForGPU();
845 } catch(std::exception const & ex) {
846 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
847 }
848 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
849 ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
850 } else if (xin->map->n > 0) {
851 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
852 try {
853 /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
854 if (gamma != 1.0)
855 *zgpu *= gamma;
856 *zgpu += alpha * *xgpu + beta * *ygpu;
857 ViennaCLWaitForGPU();
858 } catch(std::exception const & ex) {
859 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
860 }
861 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
862 ierr = VecViennaCLRestoreArray(zin,&zgpu);CHKERRQ(ierr);
863 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
864 ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
865 ierr = PetscLogGpuFlops(5.0*n);CHKERRQ(ierr);
866 }
867 PetscFunctionReturn(0);
868 }
869
VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)870 PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
871 {
872 PetscErrorCode ierr;
873 PetscInt n = win->map->n;
874 const ViennaCLVector *xgpu,*ygpu;
875 ViennaCLVector *wgpu;
876
877 PetscFunctionBegin;
878 if (xin->map->n > 0) {
879 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
880 ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
881 ierr = VecViennaCLGetArray(win,&wgpu);CHKERRQ(ierr);
882 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
883 try {
884 *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
885 ViennaCLWaitForGPU();
886 } catch(std::exception const & ex) {
887 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
888 }
889 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
890 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
891 ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
892 ierr = VecViennaCLRestoreArray(win,&wgpu);CHKERRQ(ierr);
893 ierr = PetscLogGpuFlops(n);CHKERRQ(ierr);
894 }
895 PetscFunctionReturn(0);
896 }
897
898
VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal * z)899 PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
900 {
901 PetscErrorCode ierr;
902 PetscInt n = xin->map->n;
903 PetscBLASInt bn;
904 const ViennaCLVector *xgpu;
905
906 PetscFunctionBegin;
907 if (xin->map->n > 0) {
908 ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
909 ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
910 if (type == NORM_2 || type == NORM_FROBENIUS) {
911 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
912 try {
913 *z = viennacl::linalg::norm_2(*xgpu);
914 ViennaCLWaitForGPU();
915 } catch(std::exception const & ex) {
916 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
917 }
918 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
919 ierr = PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr);
920 } else if (type == NORM_INFINITY) {
921 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
922 try {
923 *z = viennacl::linalg::norm_inf(*xgpu);
924 ViennaCLWaitForGPU();
925 } catch(std::exception const & ex) {
926 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
927 }
928 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
929 } else if (type == NORM_1) {
930 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
931 try {
932 *z = viennacl::linalg::norm_1(*xgpu);
933 ViennaCLWaitForGPU();
934 } catch(std::exception const & ex) {
935 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
936 }
937 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
938 ierr = PetscLogGpuFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr);
939 } else if (type == NORM_1_AND_2) {
940 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
941 try {
942 *z = viennacl::linalg::norm_1(*xgpu);
943 *(z+1) = viennacl::linalg::norm_2(*xgpu);
944 ViennaCLWaitForGPU();
945 } catch(std::exception const & ex) {
946 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
947 }
948 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
949 ierr = PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr);
950 ierr = PetscLogGpuFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr);
951 }
952 ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
953 } else if (type == NORM_1_AND_2) {
954 *z = 0.0;
955 *(z+1) = 0.0;
956 } else *z = 0.0;
957 PetscFunctionReturn(0);
958 }
959
960
VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)961 PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
962 {
963 PetscErrorCode ierr;
964
965 PetscFunctionBegin;
966 ierr = VecSetRandom_SeqViennaCL_Private(xin,r);CHKERRQ(ierr);
967 xin->offloadmask = PETSC_OFFLOAD_CPU;
968 PetscFunctionReturn(0);
969 }
970
VecResetArray_SeqViennaCL(Vec vin)971 PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
972 {
973 PetscErrorCode ierr;
974
975 PetscFunctionBegin;
976 PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
977 ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
978 ierr = VecResetArray_SeqViennaCL_Private(vin);CHKERRQ(ierr);
979 vin->offloadmask = PETSC_OFFLOAD_CPU;
980 PetscFunctionReturn(0);
981 }
982
VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)983 PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
984 {
985 PetscErrorCode ierr;
986
987 PetscFunctionBegin;
988 PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
989 ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
990 ierr = VecPlaceArray_Seq(vin,a);CHKERRQ(ierr);
991 vin->offloadmask = PETSC_OFFLOAD_CPU;
992 PetscFunctionReturn(0);
993 }
994
VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)995 PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
996 {
997 PetscErrorCode ierr;
998
999 PetscFunctionBegin;
1000 PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
1001 ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
1002 ierr = VecReplaceArray_Seq(vin,a);CHKERRQ(ierr);
1003 vin->offloadmask = PETSC_OFFLOAD_CPU;
1004 PetscFunctionReturn(0);
1005 }
1006
1007
1008 /*@C
1009 VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
1010
1011 Collective
1012
1013 Input Parameter:
1014 + comm - the communicator, should be PETSC_COMM_SELF
1015 - n - the vector length
1016
1017 Output Parameter:
1018 . V - the vector
1019
1020 Notes:
1021 Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1022 same type as an existing vector.
1023
1024 Level: intermediate
1025
1026 .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1027 @*/
VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec * v)1028 PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1029 {
1030 PetscErrorCode ierr;
1031
1032 PetscFunctionBegin;
1033 ierr = VecCreate(comm,v);CHKERRQ(ierr);
1034 ierr = VecSetSizes(*v,n,n);CHKERRQ(ierr);
1035 ierr = VecSetType(*v,VECSEQVIENNACL);CHKERRQ(ierr);
1036 PetscFunctionReturn(0);
1037 }
1038
1039
1040 /*@C
1041 VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
1042 where the user provides the array space to store the vector values.
1043
1044 Collective
1045
1046 Input Parameter:
1047 + comm - the communicator, should be PETSC_COMM_SELF
1048 . bs - the block size
1049 . n - the vector length
1050 - array - viennacl array where the vector elements are to be stored.
1051
1052 Output Parameter:
1053 . V - the vector
1054
1055 Notes:
1056 Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1057 same type as an existing vector.
1058
1059 If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
1060 at a later stage to SET the array for storing the vector values.
1061
1062 PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1063 The user should not free the array until the vector is destroyed.
1064
1065 Level: intermediate
1066
1067 .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1068 VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
1069 VecCreateMPIWithArray()
1070 @*/
VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector * array,Vec * V)1071 PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
1072 {
1073 PetscErrorCode ierr;
1074 PetscMPIInt size;
1075
1076 PetscFunctionBegin;
1077 ierr = VecCreate(comm,V);CHKERRQ(ierr);
1078 ierr = VecSetSizes(*V,n,n);CHKERRQ(ierr);
1079 ierr = VecSetBlockSize(*V,bs);CHKERRQ(ierr);
1080 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1082 ierr = VecCreate_SeqViennaCL_Private(*V,array);CHKERRQ(ierr);
1083 PetscFunctionReturn(0);
1084 }
1085
1086 /*@C
1087 VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
1088 the user provides the array space to store the vector values.
1089
1090 Collective
1091
1092 Input Parameter:
1093 + comm - the communicator, should be PETSC_COMM_SELF
1094 . bs - the block size
1095 . n - the vector length
1096 - cpuarray - CPU memory where the vector elements are to be stored.
1097 - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1098
1099 Output Parameter:
1100 . V - the vector
1101
1102 Notes:
1103 If both cpuarray and viennaclvec are provided, the caller must ensure that
1104 the provided arrays have identical values.
1105
1106 PETSc does NOT free the provided arrays when the vector is destroyed via
1107 VecDestroy(). The user should not free the array until the vector is
1108 destroyed.
1109
1110 Level: intermediate
1111
1112 .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1113 VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1114 VecViennaCLAllocateCheckHost()
1115 @*/
VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector * viennaclvec,Vec * V)1116 PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1117 {
1118 PetscErrorCode ierr;
1119 PetscMPIInt size;
1120
1121 PetscFunctionBegin;
1122
1123 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1124 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1125
1126 // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1127 ierr = VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);CHKERRQ(ierr);
1128
1129 if (cpuarray && viennaclvec) {
1130 Vec_Seq *s = (Vec_Seq*)((*V)->data);
1131 s->array = (PetscScalar*)cpuarray;
1132 (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1133 } else if (cpuarray) {
1134 Vec_Seq *s = (Vec_Seq*)((*V)->data);
1135 s->array = (PetscScalar*)cpuarray;
1136 (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1137 } else if (viennaclvec) {
1138 (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1139 } else {
1140 (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1141 }
1142
1143 PetscFunctionReturn(0);
1144 }
1145
1146 /*@C
1147 VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1148 the one provided by the user. This is useful to avoid a copy.
1149
1150 Not Collective
1151
1152 Input Parameters:
1153 + vec - the vector
1154 - array - the ViennaCL vector
1155
1156 Notes:
1157 You can return to the original viennacl vector with a call to
1158 VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1159 and VecPlaceArray() at the same time on the same vector.
1160
1161 Level: intermediate
1162
1163 .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1164 VecCUDAPlaceArray(),
1165
1166 @*/
VecViennaCLPlaceArray(Vec vin,const ViennaCLVector * a)1167 PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1168 {
1169 PetscErrorCode ierr;
1170
1171 PetscFunctionBegin;
1172 PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
1173 ierr = VecViennaCLCopyToGPU(vin);CHKERRQ(ierr);
1174 if (((Vec_Seq*)vin->data)->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1175 ((Vec_Seq*)vin->data)->unplacedarray = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1176 ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1177 vin->offloadmask = PETSC_OFFLOAD_GPU;
1178 ierr = PetscObjectStateIncrease((PetscObject)vin);CHKERRQ(ierr);
1179 PetscFunctionReturn(0);
1180 }
1181
1182 /*@C
1183 VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1184 after the use of VecViennaCLPlaceArray().
1185
1186 Not Collective
1187
1188 Input Parameters:
1189 . vec - the vector
1190
1191 Level: developer
1192
1193 .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1194 @*/
VecViennaCLResetArray(Vec vin)1195 PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1196 {
1197 PetscErrorCode ierr;
1198
1199 PetscFunctionBegin;
1200 PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
1201 ierr = VecViennaCLCopyToGPU(vin);CHKERRQ(ierr);
1202 ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1203 ((Vec_Seq*)vin->data)->unplacedarray = 0;
1204 vin->offloadmask = PETSC_OFFLOAD_GPU;
1205 ierr = PetscObjectStateIncrease((PetscObject)vin);CHKERRQ(ierr);
1206 PetscFunctionReturn(0);
1207 }
1208
1209
1210 /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1211 *
1212 * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1213 */
VecDotNorm2_SeqViennaCL(Vec s,Vec t,PetscScalar * dp,PetscScalar * nm)1214 PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1215 {
1216 PetscErrorCode ierr;
1217
1218 PetscFunctionBegin;
1219 ierr = VecDot_SeqViennaCL(s,t,dp);CHKERRQ(ierr);
1220 ierr = VecNorm_SeqViennaCL(t,NORM_2,nm);CHKERRQ(ierr);
1221 *nm *= *nm; //squared norm required
1222 PetscFunctionReturn(0);
1223 }
1224
VecDuplicate_SeqViennaCL(Vec win,Vec * V)1225 PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1226 {
1227 PetscErrorCode ierr;
1228
1229 PetscFunctionBegin;
1230 ierr = VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);CHKERRQ(ierr);
1231 ierr = PetscLayoutReference(win->map,&(*V)->map);CHKERRQ(ierr);
1232 ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);CHKERRQ(ierr);
1233 ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);CHKERRQ(ierr);
1234 (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1235 PetscFunctionReturn(0);
1236 }
1237
VecDestroy_SeqViennaCL(Vec v)1238 PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1239 {
1240 PetscErrorCode ierr;
1241
1242 PetscFunctionBegin;
1243 try {
1244 if (v->spptr) {
1245 delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1246 delete (Vec_ViennaCL*) v->spptr;
1247 }
1248 } catch(char *ex) {
1249 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1250 }
1251 ierr = VecDestroy_SeqViennaCL_Private(v);CHKERRQ(ierr);
1252 PetscFunctionReturn(0);
1253 }
1254
VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)1255 static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1256 {
1257 PetscErrorCode ierr;
1258
1259 PetscFunctionBegin;
1260 V->boundtocpu = flg;
1261 if (flg) {
1262 ierr = VecViennaCLCopyFromGPU(V);CHKERRQ(ierr);
1263 V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1264 V->ops->dot = VecDot_Seq;
1265 V->ops->norm = VecNorm_Seq;
1266 V->ops->tdot = VecTDot_Seq;
1267 V->ops->scale = VecScale_Seq;
1268 V->ops->copy = VecCopy_Seq;
1269 V->ops->set = VecSet_Seq;
1270 V->ops->swap = VecSwap_Seq;
1271 V->ops->axpy = VecAXPY_Seq;
1272 V->ops->axpby = VecAXPBY_Seq;
1273 V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1274 V->ops->pointwisemult = VecPointwiseMult_Seq;
1275 V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1276 V->ops->setrandom = VecSetRandom_Seq;
1277 V->ops->dot_local = VecDot_Seq;
1278 V->ops->tdot_local = VecTDot_Seq;
1279 V->ops->norm_local = VecNorm_Seq;
1280 V->ops->mdot_local = VecMDot_Seq;
1281 V->ops->mtdot_local = VecMTDot_Seq;
1282 V->ops->maxpy = VecMAXPY_Seq;
1283 V->ops->mdot = VecMDot_Seq;
1284 V->ops->mtdot = VecMTDot_Seq;
1285 V->ops->aypx = VecAYPX_Seq;
1286 V->ops->waxpy = VecWAXPY_Seq;
1287 V->ops->dotnorm2 = NULL;
1288 V->ops->placearray = VecPlaceArray_Seq;
1289 V->ops->replacearray = VecReplaceArray_Seq;
1290 V->ops->resetarray = VecResetArray_Seq;
1291 V->ops->duplicate = VecDuplicate_Seq;
1292 } else {
1293 V->ops->dot = VecDot_SeqViennaCL;
1294 V->ops->norm = VecNorm_SeqViennaCL;
1295 V->ops->tdot = VecTDot_SeqViennaCL;
1296 V->ops->scale = VecScale_SeqViennaCL;
1297 V->ops->copy = VecCopy_SeqViennaCL;
1298 V->ops->set = VecSet_SeqViennaCL;
1299 V->ops->swap = VecSwap_SeqViennaCL;
1300 V->ops->axpy = VecAXPY_SeqViennaCL;
1301 V->ops->axpby = VecAXPBY_SeqViennaCL;
1302 V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1303 V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1304 V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1305 V->ops->setrandom = VecSetRandom_SeqViennaCL;
1306 V->ops->dot_local = VecDot_SeqViennaCL;
1307 V->ops->tdot_local = VecTDot_SeqViennaCL;
1308 V->ops->norm_local = VecNorm_SeqViennaCL;
1309 V->ops->mdot_local = VecMDot_SeqViennaCL;
1310 V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1311 V->ops->maxpy = VecMAXPY_SeqViennaCL;
1312 V->ops->mdot = VecMDot_SeqViennaCL;
1313 V->ops->mtdot = VecMTDot_SeqViennaCL;
1314 V->ops->aypx = VecAYPX_SeqViennaCL;
1315 V->ops->waxpy = VecWAXPY_SeqViennaCL;
1316 V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1317 V->ops->placearray = VecPlaceArray_SeqViennaCL;
1318 V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1319 V->ops->resetarray = VecResetArray_SeqViennaCL;
1320 V->ops->destroy = VecDestroy_SeqViennaCL;
1321 V->ops->duplicate = VecDuplicate_SeqViennaCL;
1322 }
1323 PetscFunctionReturn(0);
1324 }
1325
VecCreate_SeqViennaCL(Vec V)1326 PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1327 {
1328 PetscErrorCode ierr;
1329 PetscMPIInt size;
1330
1331 PetscFunctionBegin;
1332 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);CHKERRQ(ierr);
1333 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1334 ierr = VecCreate_Seq_Private(V,0);CHKERRQ(ierr);
1335 ierr = PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);CHKERRQ(ierr);
1336
1337 ierr = VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);CHKERRQ(ierr);
1338 V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1339
1340 ierr = VecViennaCLAllocateCheck(V);CHKERRQ(ierr);
1341 ierr = VecViennaCLAllocateCheckHost(V);CHKERRQ(ierr);
1342 ierr = VecSet(V,0.0);CHKERRQ(ierr);
1343 ierr = VecSet_Seq(V,0.0);CHKERRQ(ierr);
1344 V->offloadmask = PETSC_OFFLOAD_BOTH;
1345 PetscFunctionReturn(0);
1346 }
1347
1348 /*@C
1349 VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1350
1351 Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1352 invoking clReleaseContext().
1353
1354
1355 Input Parameters:
1356 . v - the vector
1357
1358 Output Parameters:
1359 . ctx - pointer to the underlying CL context
1360
1361 Level: intermediate
1362
1363 .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1364 @*/
VecViennaCLGetCLContext(Vec v,PETSC_UINTPTR_T * ctx)1365 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1366 {
1367 #if !defined(PETSC_HAVE_OPENCL)
1368 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1369 #else
1370
1371 PetscFunctionBegin;
1372 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1373
1374 PetscErrorCode ierr;
1375 const ViennaCLVector *v_vcl;
1376 ierr = VecViennaCLGetArrayRead(v, &v_vcl);CHKERRQ(ierr);
1377 try{
1378 viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1379 const cl_context ocl_ctx = vcl_ctx.handle().get();
1380 clRetainContext(ocl_ctx);
1381 *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1382 } catch (std::exception const & ex) {
1383 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1384 }
1385
1386 PetscFunctionReturn(0);
1387 #endif
1388 }
1389
1390 /*@C
1391 VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1392 operations of the Vec are enqueued.
1393
1394 Caller should cast (*queue) to (const cl_command_queue). Caller is
1395 responsible for invoking clReleaseCommandQueue().
1396
1397 Input Parameters:
1398 . v - the vector
1399
1400 Output Parameters:
1401 . ctx - pointer to the CL command queue
1402
1403 Level: intermediate
1404
1405 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1406 @*/
VecViennaCLGetCLQueue(Vec v,PETSC_UINTPTR_T * queue)1407 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1408 {
1409 #if !defined(PETSC_HAVE_OPENCL)
1410 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1411 #else
1412 PetscFunctionBegin;
1413 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1414
1415 PetscErrorCode ierr;
1416 const ViennaCLVector *v_vcl;
1417 ierr = VecViennaCLGetArrayRead(v, &v_vcl);CHKERRQ(ierr);
1418 try{
1419 viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1420 const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1421 const cl_command_queue ocl_queue = vcl_queue.handle().get();
1422 clRetainCommandQueue(ocl_queue);
1423 *queue = (PETSC_UINTPTR_T)(ocl_queue);
1424 } catch (std::exception const & ex) {
1425 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1426 }
1427
1428 PetscFunctionReturn(0);
1429 #endif
1430 }
1431
1432 /*@C
1433 VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1434
1435 Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1436 invoking clReleaseMemObject().
1437
1438 Input Parameters:
1439 . v - the vector
1440
1441 Output Parameters:
1442 . mem - pointer to the device buffer
1443
1444 Level: intermediate
1445
1446 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1447 @*/
VecViennaCLGetCLMemRead(Vec v,PETSC_UINTPTR_T * mem)1448 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1449 {
1450 #if !defined(PETSC_HAVE_OPENCL)
1451 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1452 #else
1453 PetscFunctionBegin;
1454 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1455
1456 PetscErrorCode ierr;
1457 const ViennaCLVector *v_vcl;
1458 ierr = VecViennaCLGetArrayRead(v, &v_vcl);CHKERRQ(ierr);
1459 try{
1460 const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1461 clRetainMemObject(ocl_mem);
1462 *mem = (PETSC_UINTPTR_T)(ocl_mem);
1463 } catch (std::exception const & ex) {
1464 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1465 }
1466 PetscFunctionReturn(0);
1467 #endif
1468 }
1469
1470 /*@C
1471 VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1472
1473 Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1474 invoking clReleaseMemObject().
1475
1476 The device pointer has to be released by calling
1477 VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1478 the host will be marked as out of date. A subsequent access of the host data
1479 will thus incur a data transfer from the device to the host.
1480
1481 Input Parameters:
1482 . v - the vector
1483
1484 Output Parameters:
1485 . mem - pointer to the device buffer
1486
1487 Level: intermediate
1488
1489 .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1490 @*/
VecViennaCLGetCLMemWrite(Vec v,PETSC_UINTPTR_T * mem)1491 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1492 {
1493 #if !defined(PETSC_HAVE_OPENCL)
1494 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1495 #else
1496 PetscFunctionBegin;
1497 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1498
1499 PetscErrorCode ierr;
1500 ViennaCLVector *v_vcl;
1501 ierr = VecViennaCLGetArrayWrite(v, &v_vcl);CHKERRQ(ierr);
1502 try{
1503 const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1504 clRetainMemObject(ocl_mem);
1505 *mem = (PETSC_UINTPTR_T)(ocl_mem);
1506 } catch (std::exception const & ex) {
1507 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1508 }
1509
1510 PetscFunctionReturn(0);
1511 #endif
1512 }
1513
1514 /*@C
1515 VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1516 acquired with VecViennaCLGetCLMemWrite().
1517
1518 This marks the host data as out of date. Subsequent access to the
1519 vector data on the host side with for instance VecGetArray() incurs a
1520 data transfer.
1521
1522 Input Parameters:
1523 . v - the vector
1524
1525 Level: intermediate
1526
1527 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1528 @*/
VecViennaCLRestoreCLMemWrite(Vec v)1529 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1530 {
1531 #if !defined(PETSC_HAVE_OPENCL)
1532 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1533 #else
1534 PetscFunctionBegin;
1535 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1536
1537 PetscErrorCode ierr;
1538 ierr = VecViennaCLRestoreArrayWrite(v, PETSC_NULL);CHKERRQ(ierr);
1539
1540 PetscFunctionReturn(0);
1541 #endif
1542 }
1543
1544
1545 /*@C
1546 VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1547
1548 Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1549 invoking clReleaseMemObject().
1550
1551 The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1552 Upon restoring the vector data the data on the host will be marked as out of
1553 date. A subsequent access of the host data will thus incur a data transfer
1554 from the device to the host.
1555
1556 Input Parameters:
1557 . v - the vector
1558
1559 Output Parameters:
1560 . mem - pointer to the device buffer
1561
1562 Level: intermediate
1563
1564 .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1565 @*/
VecViennaCLGetCLMem(Vec v,PETSC_UINTPTR_T * mem)1566 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1567 {
1568 #if !defined(PETSC_HAVE_OPENCL)
1569 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1570 #else
1571 PetscFunctionBegin;
1572 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1573
1574 PetscErrorCode ierr;
1575 ViennaCLVector *v_vcl;
1576 ierr = VecViennaCLGetArray(v, &v_vcl);CHKERRQ(ierr);
1577 try{
1578 const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1579 clRetainMemObject(ocl_mem);
1580 *mem = (PETSC_UINTPTR_T)(ocl_mem);
1581 } catch (std::exception const & ex) {
1582 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1583 }
1584
1585 PetscFunctionReturn(0);
1586 #endif
1587 }
1588
1589 /*@C
1590 VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1591 acquired with VecViennaCLGetCLMem().
1592
1593 This marks the host data as out of date. Subsequent access to the vector
1594 data on the host side with for instance VecGetArray() incurs a data
1595 transfer.
1596
1597 Input Parameters:
1598 . v - the vector
1599
1600 Level: intermediate
1601
1602 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1603 @*/
VecViennaCLRestoreCLMem(Vec v)1604 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1605 {
1606 #if !defined(PETSC_HAVE_OPENCL)
1607 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1608 #else
1609 PetscFunctionBegin;
1610 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1611
1612 PetscErrorCode ierr;
1613 ierr = VecViennaCLRestoreArray(v, PETSC_NULL);CHKERRQ(ierr);
1614
1615 PetscFunctionReturn(0);
1616 #endif
1617 }
1618
VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector * array)1619 PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1620 {
1621 PetscErrorCode ierr;
1622 Vec_ViennaCL *vecviennacl;
1623 PetscMPIInt size;
1624
1625 PetscFunctionBegin;
1626 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);CHKERRQ(ierr);
1627 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1628 ierr = VecCreate_Seq_Private(V,0);CHKERRQ(ierr);
1629 ierr = PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);CHKERRQ(ierr);
1630 ierr = VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);CHKERRQ(ierr);
1631 V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1632
1633 if (array) {
1634 if (!V->spptr)
1635 V->spptr = new Vec_ViennaCL;
1636 vecviennacl = (Vec_ViennaCL*)V->spptr;
1637 vecviennacl->GPUarray_allocated = 0;
1638 vecviennacl->GPUarray = (ViennaCLVector*)array;
1639 V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1640 }
1641
1642 PetscFunctionReturn(0);
1643 }
1644