1 /*
2    Implements the sequential ViennaCL vectors.
3 */
4 
5 #include <petscconf.h>
6 #include <petsc/private/vecimpl.h>          /*I "petscvec.h" I*/
7 #include <../src/vec/vec/impls/dvecimpl.h>
8 #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
9 
10 #include <vector>
11 
12 #include "viennacl/linalg/inner_prod.hpp"
13 #include "viennacl/linalg/norm_1.hpp"
14 #include "viennacl/linalg/norm_2.hpp"
15 #include "viennacl/linalg/norm_inf.hpp"
16 
17 #ifdef VIENNACL_WITH_OPENCL
18 #include "viennacl/ocl/backend.hpp"
19 #endif
20 
21 
VecViennaCLGetArray(Vec v,ViennaCLVector ** a)22 PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
28   *a   = 0;
29   ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
30   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
31   ViennaCLWaitForGPU();
32   PetscFunctionReturn(0);
33 }
34 
VecViennaCLRestoreArray(Vec v,ViennaCLVector ** a)35 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
36 {
37   PetscErrorCode ierr;
38 
39   PetscFunctionBegin;
40   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
41   v->offloadmask = PETSC_OFFLOAD_GPU;
42 
43   ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
VecViennaCLGetArrayRead(Vec v,const ViennaCLVector ** a)47 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
48 {
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
53   *a   = 0;
54   ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
55   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
56   ViennaCLWaitForGPU();
57   PetscFunctionReturn(0);
58 }
59 
VecViennaCLRestoreArrayRead(Vec v,const ViennaCLVector ** a)60 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
61 {
62   PetscFunctionBegin;
63   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
64   PetscFunctionReturn(0);
65 }
66 
VecViennaCLGetArrayWrite(Vec v,ViennaCLVector ** a)67 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
68 {
69   PetscErrorCode ierr;
70 
71   PetscFunctionBegin;
72   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
73   *a   = 0;
74   ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
75   *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
76   ViennaCLWaitForGPU();
77   PetscFunctionReturn(0);
78 }
79 
VecViennaCLRestoreArrayWrite(Vec v,ViennaCLVector ** a)80 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
81 {
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
86   v->offloadmask = PETSC_OFFLOAD_GPU;
87 
88   ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 
93 
PetscViennaCLInit()94 PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
95 {
96   PetscErrorCode       ierr;
97   char                 string[20];
98   PetscBool            flg,flg_cuda,flg_opencl,flg_openmp;
99 
100   PetscFunctionBegin;
101   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
102   ierr = PetscOptionsGetString(NULL,NULL,"-viennacl_backend",string,sizeof(string),&flg);CHKERRQ(ierr);
103   if (flg) {
104     try {
105       ierr = PetscStrcasecmp(string,"cuda",&flg_cuda);CHKERRQ(ierr);
106       ierr = PetscStrcasecmp(string,"opencl",&flg_opencl);CHKERRQ(ierr);
107       ierr = PetscStrcasecmp(string,"openmp",&flg_openmp);CHKERRQ(ierr);
108 
109       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
110       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
111 #if defined(PETSC_HAVE_CUDA)
112       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
113 #endif
114 #if defined(PETSC_HAVE_OPENCL)
115       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
116 #endif
117       else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.\n", string);
118     } catch (std::exception const & ex) {
119       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
120     }
121   }
122 
123 #if defined(PETSC_HAVE_OPENCL)
124   /* ViennaCL OpenCL device type configuration */
125   ierr = PetscOptionsGetString(NULL,NULL,"-viennacl_opencl_device_type",string,sizeof(string),&flg);CHKERRQ(ierr);
126   if (flg) {
127     try {
128       ierr = PetscStrcasecmp(string,"cpu",&flg);CHKERRQ(ierr);
129       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
130 
131       ierr = PetscStrcasecmp(string,"gpu",&flg);CHKERRQ(ierr);
132       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
133 
134       ierr = PetscStrcasecmp(string,"accelerator",&flg);CHKERRQ(ierr);
135       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
136     } catch (std::exception const & ex) {
137       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
138     }
139   }
140 #endif
141 
142   /* Print available backends */
143   ierr = PetscOptionsHasName(NULL,NULL,"-viennacl_view",&flg);CHKERRQ(ierr);
144   if (flg) {
145     ierr = PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: ");CHKERRQ(ierr);
146 #if defined(PETSC_HAVE_CUDA)
147     ierr = PetscPrintf(PETSC_COMM_WORLD, "CUDA, ");CHKERRQ(ierr);
148 #endif
149 #if defined(PETSC_HAVE_OPENCL)
150     ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenCL, ");CHKERRQ(ierr);
151 #endif
152 #if defined(PETSC_HAVE_OPENMP)
153     ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");CHKERRQ(ierr);
154 #else
155     ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) ");CHKERRQ(ierr);
156 #endif
157     ierr = PetscPrintf(PETSC_COMM_WORLD, "\n");CHKERRQ(ierr);
158 
159     /* Print selected backends */
160     ierr = PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: ");CHKERRQ(ierr);
161 #if defined(PETSC_HAVE_CUDA)
162     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) {
163       ierr = PetscPrintf(PETSC_COMM_WORLD, "CUDA ");CHKERRQ(ierr);
164     }
165 #endif
166 #if defined(PETSC_HAVE_OPENCL)
167     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) {
168       ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenCL ");CHKERRQ(ierr);
169     }
170 #endif
171 #if defined(PETSC_HAVE_OPENMP)
172     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
173       ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP ");CHKERRQ(ierr);
174     }
175 #else
176     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) {
177       ierr = PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) ");CHKERRQ(ierr);
178     }
179 #endif
180     ierr = PetscPrintf(PETSC_COMM_WORLD, "\n");CHKERRQ(ierr);
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 /*
186     Allocates space for the vector array on the Host if it does not exist.
187     Does NOT change the PetscViennaCLFlag for the vector
188     Does NOT zero the ViennaCL array
189  */
VecViennaCLAllocateCheckHost(Vec v)190 PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
191 {
192   PetscErrorCode ierr;
193   PetscScalar    *array;
194   Vec_Seq        *s;
195   PetscInt       n = v->map->n;
196 
197   PetscFunctionBegin;
198   s    = (Vec_Seq*)v->data;
199   ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
200   if (s->array == 0) {
201     ierr               = PetscMalloc1(n,&array);CHKERRQ(ierr);
202     ierr               = PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));CHKERRQ(ierr);
203     s->array           = array;
204     s->array_allocated = array;
205   }
206   PetscFunctionReturn(0);
207 }
208 
209 
210 /*
211     Allocates space for the vector array on the GPU if it does not exist.
212     Does NOT change the PetscViennaCLFlag for the vector
213     Does NOT zero the ViennaCL array
214 
215  */
VecViennaCLAllocateCheck(Vec v)216 PetscErrorCode VecViennaCLAllocateCheck(Vec v)
217 {
218   PetscFunctionBegin;
219   if (!v->spptr) {
220     try {
221       v->spptr                            = new Vec_ViennaCL;
222       ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
223       ((Vec_ViennaCL*)v->spptr)->GPUarray = ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
224 
225     } catch(std::exception const & ex) {
226       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 
233 /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
VecViennaCLCopyToGPU(Vec v)234 PetscErrorCode VecViennaCLCopyToGPU(Vec v)
235 {
236   PetscErrorCode ierr;
237 
238   PetscFunctionBegin;
239   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
240   ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
241   if (v->map->n > 0) {
242     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
243       ierr = PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
244       try {
245         ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
246         viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
247         ViennaCLWaitForGPU();
248       } catch(std::exception const & ex) {
249         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
250       }
251       ierr = PetscLogCpuToGpu((v->map->n)*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
253       v->offloadmask = PETSC_OFFLOAD_BOTH;
254     }
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 
260 
261 /*
262      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
263 */
VecViennaCLCopyFromGPU(Vec v)264 PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
265 {
266   PetscErrorCode ierr;
267 
268   PetscFunctionBegin;
269   PetscCheckTypeNames(v,VECSEQVIENNACL,VECMPIVIENNACL);
270   ierr = VecViennaCLAllocateCheckHost(v);CHKERRQ(ierr);
271   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
272     ierr = PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
273     try {
274       ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
275       viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
276       ViennaCLWaitForGPU();
277     } catch(std::exception const & ex) {
278       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
279     }
280     ierr = PetscLogGpuToCpu((v->map->n)*sizeof(PetscScalar));CHKERRQ(ierr);
281     ierr = PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
282     v->offloadmask = PETSC_OFFLOAD_BOTH;
283   }
284   PetscFunctionReturn(0);
285 }
286 
287 
288 /* Copy on CPU */
VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)289 static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
290 {
291   PetscScalar       *ya;
292   const PetscScalar *xa;
293   PetscErrorCode    ierr;
294 
295   PetscFunctionBegin;
296   ierr = VecViennaCLAllocateCheckHost(xin);CHKERRQ(ierr);
297   ierr = VecViennaCLAllocateCheckHost(yin);CHKERRQ(ierr);
298   if (xin != yin) {
299     ierr = VecGetArrayRead(xin,&xa);CHKERRQ(ierr);
300     ierr = VecGetArray(yin,&ya);CHKERRQ(ierr);
301     ierr = PetscArraycpy(ya,xa,xin->map->n);CHKERRQ(ierr);
302     ierr = VecRestoreArrayRead(xin,&xa);CHKERRQ(ierr);
303     ierr = VecRestoreArray(yin,&ya);CHKERRQ(ierr);
304   }
305   PetscFunctionReturn(0);
306 }
307 
VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)308 static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
309 {
310   PetscErrorCode ierr;
311   PetscInt       n = xin->map->n,i;
312   PetscScalar    *xx;
313 
314   PetscFunctionBegin;
315   ierr = VecGetArray(xin,&xx);CHKERRQ(ierr);
316   for (i=0; i<n; i++) {ierr = PetscRandomGetValue(r,&xx[i]);CHKERRQ(ierr);}
317   ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
VecDestroy_SeqViennaCL_Private(Vec v)321 static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
322 {
323   Vec_Seq        *vs = (Vec_Seq*)v->data;
324   PetscErrorCode ierr;
325 
326   PetscFunctionBegin;
327   ierr = PetscObjectSAWsViewOff(v);CHKERRQ(ierr);
328 #if defined(PETSC_USE_LOG)
329   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
330 #endif
331   if (vs->array_allocated) { ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr); }
332   ierr = PetscFree(vs);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
VecResetArray_SeqViennaCL_Private(Vec vin)336 static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
337 {
338   Vec_Seq *v = (Vec_Seq*)vin->data;
339 
340   PetscFunctionBegin;
341   v->array         = v->unplacedarray;
342   v->unplacedarray = 0;
343   PetscFunctionReturn(0);
344 }
345 
346 
347 /*MC
348    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
349 
350    Options Database Keys:
351 . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
352 
353   Level: beginner
354 
355 .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
356 M*/
357 
358 
VecAYPX_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)359 PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
360 {
361   const ViennaCLVector  *xgpu;
362   ViennaCLVector        *ygpu;
363   PetscErrorCode        ierr;
364 
365   PetscFunctionBegin;
366   ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
367   ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
368   ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
369   try {
370     if (alpha != 0.0 && xin->map->n > 0) {
371       *ygpu = *xgpu + alpha * *ygpu;
372       ierr = PetscLogGpuFlops(2.0*yin->map->n);CHKERRQ(ierr);
373     } else {
374       *ygpu = *xgpu;
375     }
376     ViennaCLWaitForGPU();
377   } catch(std::exception const & ex) {
378     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
379   }
380   ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
381   ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
382   ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
383   PetscFunctionReturn(0);
384 }
385 
386 
VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)387 PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
388 {
389   const ViennaCLVector  *xgpu;
390   ViennaCLVector        *ygpu;
391   PetscErrorCode        ierr;
392 
393   PetscFunctionBegin;
394   if (alpha != 0.0 && xin->map->n > 0) {
395     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
396     ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
397     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
398     try {
399       *ygpu += alpha * *xgpu;
400       ViennaCLWaitForGPU();
401     } catch(std::exception const & ex) {
402       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
403     }
404     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
405     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
406     ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
407     ierr = PetscLogGpuFlops(2.0*yin->map->n);CHKERRQ(ierr);
408   }
409   PetscFunctionReturn(0);
410 }
411 
412 
VecPointwiseDivide_SeqViennaCL(Vec win,Vec xin,Vec yin)413 PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
414 {
415   const ViennaCLVector  *xgpu,*ygpu;
416   ViennaCLVector        *wgpu;
417   PetscErrorCode        ierr;
418 
419   PetscFunctionBegin;
420   if (xin->map->n > 0) {
421     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
422     ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
423     ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
424     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
425     try {
426       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
427       ViennaCLWaitForGPU();
428     } catch(std::exception const & ex) {
429       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
430     }
431     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
432     ierr = PetscLogGpuFlops(win->map->n);CHKERRQ(ierr);
433     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
434     ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
435     ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
436   }
437   PetscFunctionReturn(0);
438 }
439 
440 
VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin,Vec yin)441 PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
442 {
443   const ViennaCLVector  *xgpu,*ygpu;
444   ViennaCLVector        *wgpu;
445   PetscErrorCode        ierr;
446 
447   PetscFunctionBegin;
448   if (alpha == 0.0 && xin->map->n > 0) {
449     ierr = VecCopy_SeqViennaCL(yin,win);CHKERRQ(ierr);
450   } else {
451     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
452     ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
453     ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
454     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
455     if (alpha == 1.0) {
456       try {
457         *wgpu = *ygpu + *xgpu;
458       } catch(std::exception const & ex) {
459         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
460       }
461       ierr = PetscLogGpuFlops(win->map->n);CHKERRQ(ierr);
462     } else if (alpha == -1.0) {
463       try {
464         *wgpu = *ygpu - *xgpu;
465       } catch(std::exception const & ex) {
466         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
467       }
468       ierr = PetscLogGpuFlops(win->map->n);CHKERRQ(ierr);
469     } else {
470       try {
471         *wgpu = *ygpu + alpha * *xgpu;
472       } catch(std::exception const & ex) {
473         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
474       }
475       ierr = PetscLogGpuFlops(2*win->map->n);CHKERRQ(ierr);
476     }
477     ViennaCLWaitForGPU();
478     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
479     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
480     ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
481     ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
482   }
483   PetscFunctionReturn(0);
484 }
485 
486 
487 /*
488  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
489  *
490  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
491  * hence there is an iterated application of these until the final result is obtained
492  */
VecMAXPY_SeqViennaCL(Vec xin,PetscInt nv,const PetscScalar * alpha,Vec * y)493 PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
494 {
495   PetscErrorCode ierr;
496   PetscInt       j;
497 
498   PetscFunctionBegin;
499   for (j = 0; j < nv; ++j) {
500     if (j+1 < nv) {
501       VecAXPBYPCZ_SeqViennaCL(xin,alpha[j],alpha[j+1],1.0,y[j],y[j+1]);
502       ++j;
503     } else {
504       ierr = VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);CHKERRQ(ierr);
505     }
506   }
507   ViennaCLWaitForGPU();
508   PetscFunctionReturn(0);
509 }
510 
511 
VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)512 PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
513 {
514   const ViennaCLVector  *xgpu,*ygpu;
515   PetscErrorCode        ierr;
516 
517   PetscFunctionBegin;
518   if (xin->map->n > 0) {
519     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
520     ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
521     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
522     try {
523       *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
524     } catch(std::exception const & ex) {
525       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
526     }
527     ViennaCLWaitForGPU();
528     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
529     if (xin->map->n >0) {
530       ierr = PetscLogGpuFlops(2.0*xin->map->n-1);CHKERRQ(ierr);
531     }
532     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
533     ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
534   } else *z = 0.0;
535   PetscFunctionReturn(0);
536 }
537 
538 
539 
540 /*
541  * Operation z[j] = dot(x, y[j])
542  *
543  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
544  */
VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)545 PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
546 {
547   PetscErrorCode       ierr;
548   PetscInt             n = xin->map->n,i;
549   const ViennaCLVector *xgpu,*ygpu;
550   Vec                  *yyin = (Vec*)yin;
551   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
552 
553   PetscFunctionBegin;
554   if (xin->map->n > 0) {
555     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
556     for (i=0; i<nv; i++) {
557       ierr = VecViennaCLGetArrayRead(yyin[i],&ygpu);CHKERRQ(ierr);
558       ygpu_array[i] = ygpu;
559     }
560     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
561     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
562     ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
563     viennacl::copy(result.begin(), result.end(), z);
564     for (i=0; i<nv; i++) {
565       ierr = VecViennaCLRestoreArrayRead(yyin[i],&ygpu);CHKERRQ(ierr);
566     }
567     ViennaCLWaitForGPU();
568     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
569     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
570     ierr = PetscLogGpuFlops(PetscMax(nv*(2.0*n-1),0.0));CHKERRQ(ierr);
571   } else {
572     for (i=0; i<nv; i++) z[i] = 0.0;
573   }
574   PetscFunctionReturn(0);
575 }
576 
VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)577 PetscErrorCode VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
578 {
579   PetscErrorCode ierr;
580 
581   PetscFunctionBegin;
582   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
583   ierr = VecMDot_SeqViennaCL(xin,nv,yin,z);CHKERRQ(ierr);
584   ViennaCLWaitForGPU();
585   PetscFunctionReturn(0);
586 }
587 
588 
VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)589 PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
590 {
591   ViennaCLVector *xgpu;
592   PetscErrorCode ierr;
593 
594   PetscFunctionBegin;
595   if (xin->map->n > 0) {
596     ierr = VecViennaCLGetArrayWrite(xin,&xgpu);CHKERRQ(ierr);
597     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
598     try {
599       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
600       ViennaCLWaitForGPU();
601     } catch(std::exception const & ex) {
602       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
603     }
604     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
605     ierr = VecViennaCLRestoreArrayWrite(xin,&xgpu);CHKERRQ(ierr);
606   }
607   PetscFunctionReturn(0);
608 }
609 
VecScale_SeqViennaCL(Vec xin,PetscScalar alpha)610 PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
611 {
612   ViennaCLVector *xgpu;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   if (alpha == 0.0 && xin->map->n > 0) {
617     ierr = VecSet_SeqViennaCL(xin,alpha);CHKERRQ(ierr);
618     ierr = PetscLogGpuFlops(xin->map->n);CHKERRQ(ierr);
619   } else if (alpha != 1.0 && xin->map->n > 0) {
620     ierr = VecViennaCLGetArray(xin,&xgpu);CHKERRQ(ierr);
621     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
622     try {
623       *xgpu *= alpha;
624       ViennaCLWaitForGPU();
625     } catch(std::exception const & ex) {
626       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
627     }
628     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
629     ierr = VecViennaCLRestoreArray(xin,&xgpu);CHKERRQ(ierr);
630     ierr = PetscLogGpuFlops(xin->map->n);CHKERRQ(ierr);
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 
VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)636 PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
637 {
638   PetscErrorCode ierr;
639 
640   PetscFunctionBegin;
641   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
642   ierr = VecDot_SeqViennaCL(xin, yin, z);CHKERRQ(ierr);
643   ViennaCLWaitForGPU();
644   PetscFunctionReturn(0);
645 }
646 
647 
VecCopy_SeqViennaCL(Vec xin,Vec yin)648 PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
649 {
650   const ViennaCLVector *xgpu;
651   ViennaCLVector       *ygpu;
652   PetscErrorCode       ierr;
653 
654   PetscFunctionBegin;
655   if (xin != yin && xin->map->n > 0) {
656     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
657       ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
658       ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
659       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
660       try {
661         *ygpu = *xgpu;
662         ViennaCLWaitForGPU();
663       } catch(std::exception const & ex) {
664         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
665       }
666       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
667       ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
668       ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
669 
670     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
671       /* copy in CPU if we are on the CPU*/
672       ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
673       ViennaCLWaitForGPU();
674     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
675       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
676       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
677         /* copy in CPU */
678         ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
679         ViennaCLWaitForGPU();
680       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
681         /* copy in GPU */
682         ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
683         ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
684         ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
685         try {
686           *ygpu = *xgpu;
687           ViennaCLWaitForGPU();
688         } catch(std::exception const & ex) {
689           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
690         }
691         ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
692         ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
693         ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
694       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
695         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
696            default to copy in GPU (this is an arbitrary choice) */
697         ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
698         ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
699         ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
700         try {
701           *ygpu = *xgpu;
702           ViennaCLWaitForGPU();
703         } catch(std::exception const & ex) {
704           SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
705         }
706         ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
707         ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
708         ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
709       } else {
710         ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
711         ViennaCLWaitForGPU();
712       }
713     }
714   }
715   PetscFunctionReturn(0);
716 }
717 
718 
VecSwap_SeqViennaCL(Vec xin,Vec yin)719 PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
720 {
721   PetscErrorCode ierr;
722   ViennaCLVector *xgpu,*ygpu;
723 
724   PetscFunctionBegin;
725   if (xin != yin && xin->map->n > 0) {
726     ierr = VecViennaCLGetArray(xin,&xgpu);CHKERRQ(ierr);
727     ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
728     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
729     try {
730       viennacl::swap(*xgpu, *ygpu);
731       ViennaCLWaitForGPU();
732     } catch(std::exception const & ex) {
733       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
734     }
735     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
736     ierr = VecViennaCLRestoreArray(xin,&xgpu);CHKERRQ(ierr);
737     ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 
743 // y = alpha * x + beta * y
VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)744 PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
745 {
746   PetscErrorCode       ierr;
747   PetscScalar          a = alpha,b = beta;
748   const ViennaCLVector *xgpu;
749   ViennaCLVector       *ygpu;
750 
751   PetscFunctionBegin;
752   if (a == 0.0 && xin->map->n > 0) {
753     ierr = VecScale_SeqViennaCL(yin,beta);CHKERRQ(ierr);
754   } else if (b == 1.0 && xin->map->n > 0) {
755     ierr = VecAXPY_SeqViennaCL(yin,alpha,xin);CHKERRQ(ierr);
756   } else if (a == 1.0 && xin->map->n > 0) {
757     ierr = VecAYPX_SeqViennaCL(yin,beta,xin);CHKERRQ(ierr);
758   } else if (b == 0.0 && xin->map->n > 0) {
759     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
760     ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
761     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
762     try {
763       *ygpu = *xgpu * alpha;
764       ViennaCLWaitForGPU();
765     } catch(std::exception const & ex) {
766       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
767     }
768     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
769     ierr = PetscLogGpuFlops(xin->map->n);CHKERRQ(ierr);
770     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
771     ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
772   } else if (xin->map->n > 0) {
773     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
774     ierr = VecViennaCLGetArray(yin,&ygpu);CHKERRQ(ierr);
775     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
776     try {
777       *ygpu = *xgpu * alpha + *ygpu * beta;
778       ViennaCLWaitForGPU();
779     } catch(std::exception const & ex) {
780       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
781     }
782     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
783     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
784     ierr = VecViennaCLRestoreArray(yin,&ygpu);CHKERRQ(ierr);
785     ierr = PetscLogGpuFlops(3.0*xin->map->n);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 
791 /* operation  z = alpha * x + beta *y + gamma *z*/
VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)792 PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
793 {
794   PetscErrorCode       ierr;
795   PetscInt             n = zin->map->n;
796   const ViennaCLVector *xgpu,*ygpu;
797   ViennaCLVector       *zgpu;
798 
799   PetscFunctionBegin;
800   ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
801   ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
802   ierr = VecViennaCLGetArray(zin,&zgpu);CHKERRQ(ierr);
803   if (alpha == 0.0 && xin->map->n > 0) {
804     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
805     try {
806       if (beta == 0.0) {
807         *zgpu = gamma * *zgpu;
808         ViennaCLWaitForGPU();
809         ierr = PetscLogGpuFlops(1.0*n);CHKERRQ(ierr);
810       } else if (gamma == 0.0) {
811         *zgpu = beta * *ygpu;
812         ViennaCLWaitForGPU();
813         ierr = PetscLogGpuFlops(1.0*n);CHKERRQ(ierr);
814       } else {
815         *zgpu = beta * *ygpu + gamma * *zgpu;
816         ViennaCLWaitForGPU();
817         ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
818       }
819     } catch(std::exception const & ex) {
820       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
821     }
822     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
823     ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
824   } else if (beta == 0.0 && xin->map->n > 0) {
825     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
826     try {
827       if (gamma == 0.0) {
828         *zgpu = alpha * *xgpu;
829         ViennaCLWaitForGPU();
830         ierr = PetscLogGpuFlops(1.0*n);CHKERRQ(ierr);
831       } else {
832         *zgpu = alpha * *xgpu + gamma * *zgpu;
833         ViennaCLWaitForGPU();
834         ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
835       }
836     } catch(std::exception const & ex) {
837       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
838     }
839     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
840   } else if (gamma == 0.0 && xin->map->n > 0) {
841     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
842     try {
843       *zgpu = alpha * *xgpu + beta * *ygpu;
844       ViennaCLWaitForGPU();
845     } catch(std::exception const & ex) {
846       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
847     }
848     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
849     ierr = PetscLogGpuFlops(3.0*n);CHKERRQ(ierr);
850   } else if (xin->map->n > 0) {
851     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
852     try {
853       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
854       if (gamma != 1.0)
855         *zgpu *= gamma;
856       *zgpu += alpha * *xgpu + beta * *ygpu;
857       ViennaCLWaitForGPU();
858     } catch(std::exception const & ex) {
859       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
860     }
861     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
862     ierr = VecViennaCLRestoreArray(zin,&zgpu);CHKERRQ(ierr);
863     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
864     ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
865     ierr = PetscLogGpuFlops(5.0*n);CHKERRQ(ierr);
866   }
867   PetscFunctionReturn(0);
868 }
869 
VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)870 PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
871 {
872   PetscErrorCode       ierr;
873   PetscInt             n = win->map->n;
874   const ViennaCLVector *xgpu,*ygpu;
875   ViennaCLVector       *wgpu;
876 
877   PetscFunctionBegin;
878   if (xin->map->n > 0) {
879     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
880     ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
881     ierr = VecViennaCLGetArray(win,&wgpu);CHKERRQ(ierr);
882     ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
883     try {
884       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
885       ViennaCLWaitForGPU();
886     } catch(std::exception const & ex) {
887       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
888     }
889     ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
890     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
891     ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
892     ierr = VecViennaCLRestoreArray(win,&wgpu);CHKERRQ(ierr);
893     ierr = PetscLogGpuFlops(n);CHKERRQ(ierr);
894   }
895   PetscFunctionReturn(0);
896 }
897 
898 
VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal * z)899 PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
900 {
901   PetscErrorCode       ierr;
902   PetscInt             n = xin->map->n;
903   PetscBLASInt         bn;
904   const ViennaCLVector *xgpu;
905 
906   PetscFunctionBegin;
907   if (xin->map->n > 0) {
908     ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
909     ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
910     if (type == NORM_2 || type == NORM_FROBENIUS) {
911       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
912       try {
913         *z = viennacl::linalg::norm_2(*xgpu);
914         ViennaCLWaitForGPU();
915       } catch(std::exception const & ex) {
916         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
917       }
918       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
919       ierr = PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr);
920     } else if (type == NORM_INFINITY) {
921       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
922       try {
923         *z = viennacl::linalg::norm_inf(*xgpu);
924         ViennaCLWaitForGPU();
925       } catch(std::exception const & ex) {
926         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
927       }
928       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
929     } else if (type == NORM_1) {
930       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
931       try {
932         *z = viennacl::linalg::norm_1(*xgpu);
933         ViennaCLWaitForGPU();
934       } catch(std::exception const & ex) {
935         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
936       }
937       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
938       ierr = PetscLogGpuFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr);
939     } else if (type == NORM_1_AND_2) {
940       ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
941       try {
942         *z     = viennacl::linalg::norm_1(*xgpu);
943         *(z+1) = viennacl::linalg::norm_2(*xgpu);
944         ViennaCLWaitForGPU();
945       } catch(std::exception const & ex) {
946         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
947       }
948       ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
949       ierr = PetscLogGpuFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr);
950       ierr = PetscLogGpuFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr);
951     }
952     ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
953   } else if (type == NORM_1_AND_2) {
954     *z      = 0.0;
955     *(z+1)  = 0.0;
956   } else *z = 0.0;
957   PetscFunctionReturn(0);
958 }
959 
960 
VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)961 PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
962 {
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   ierr = VecSetRandom_SeqViennaCL_Private(xin,r);CHKERRQ(ierr);
967   xin->offloadmask = PETSC_OFFLOAD_CPU;
968   PetscFunctionReturn(0);
969 }
970 
VecResetArray_SeqViennaCL(Vec vin)971 PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
972 {
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
977   ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
978   ierr = VecResetArray_SeqViennaCL_Private(vin);CHKERRQ(ierr);
979   vin->offloadmask = PETSC_OFFLOAD_CPU;
980   PetscFunctionReturn(0);
981 }
982 
VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)983 PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
984 {
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
989   ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
990   ierr = VecPlaceArray_Seq(vin,a);CHKERRQ(ierr);
991   vin->offloadmask = PETSC_OFFLOAD_CPU;
992   PetscFunctionReturn(0);
993 }
994 
VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)995 PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
996 {
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
1001   ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
1002   ierr = VecReplaceArray_Seq(vin,a);CHKERRQ(ierr);
1003   vin->offloadmask = PETSC_OFFLOAD_CPU;
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 
1008 /*@C
1009    VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
1010 
1011    Collective
1012 
1013    Input Parameter:
1014 +  comm - the communicator, should be PETSC_COMM_SELF
1015 -  n - the vector length
1016 
1017    Output Parameter:
1018 .  V - the vector
1019 
1020    Notes:
1021    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1022    same type as an existing vector.
1023 
1024    Level: intermediate
1025 
1026 .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
1027 @*/
VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec * v)1028 PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
1029 {
1030   PetscErrorCode ierr;
1031 
1032   PetscFunctionBegin;
1033   ierr = VecCreate(comm,v);CHKERRQ(ierr);
1034   ierr = VecSetSizes(*v,n,n);CHKERRQ(ierr);
1035   ierr = VecSetType(*v,VECSEQVIENNACL);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 
1040 /*@C
1041    VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
1042    where the user provides the array space to store the vector values.
1043 
1044    Collective
1045 
1046    Input Parameter:
1047 +  comm - the communicator, should be PETSC_COMM_SELF
1048 .  bs - the block size
1049 .  n - the vector length
1050 -  array - viennacl array where the vector elements are to be stored.
1051 
1052    Output Parameter:
1053 .  V - the vector
1054 
1055    Notes:
1056    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
1057    same type as an existing vector.
1058 
1059    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
1060    at a later stage to SET the array for storing the vector values.
1061 
1062    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
1063    The user should not free the array until the vector is destroyed.
1064 
1065    Level: intermediate
1066 
1067 .seealso: VecCreateMPIViennaCLWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
1068           VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
1069           VecCreateMPIWithArray()
1070 @*/
VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector * array,Vec * V)1071 PETSC_EXTERN PetscErrorCode  VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector* array,Vec *V)
1072 {
1073   PetscErrorCode ierr;
1074   PetscMPIInt    size;
1075 
1076   PetscFunctionBegin;
1077   ierr = VecCreate(comm,V);CHKERRQ(ierr);
1078   ierr = VecSetSizes(*V,n,n);CHKERRQ(ierr);
1079   ierr = VecSetBlockSize(*V,bs);CHKERRQ(ierr);
1080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1082   ierr = VecCreate_SeqViennaCL_Private(*V,array);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 /*@C
1087    VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
1088    the user provides the array space to store the vector values.
1089 
1090    Collective
1091 
1092    Input Parameter:
1093 +  comm - the communicator, should be PETSC_COMM_SELF
1094 .  bs - the block size
1095 .  n - the vector length
1096 -  cpuarray - CPU memory where the vector elements are to be stored.
1097 -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1098 
1099    Output Parameter:
1100 .  V - the vector
1101 
1102    Notes:
1103    If both cpuarray and viennaclvec are provided, the caller must ensure that
1104    the provided arrays have identical values.
1105 
1106    PETSc does NOT free the provided arrays when the vector is destroyed via
1107    VecDestroy(). The user should not free the array until the vector is
1108    destroyed.
1109 
1110    Level: intermediate
1111 
1112 .seealso: VecCreateMPIViennaCLWithArrays(), VecCreate(), VecCreateSeqWithArray(),
1113           VecViennaCLPlaceArray(), VecPlaceArray(), VecCreateSeqCUDAWithArrays(),
1114           VecViennaCLAllocateCheckHost()
1115 @*/
VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector * viennaclvec,Vec * V)1116 PetscErrorCode  VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector* viennaclvec,Vec *V)
1117 {
1118   PetscErrorCode ierr;
1119   PetscMPIInt    size;
1120 
1121   PetscFunctionBegin;
1122 
1123   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1124   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
1125 
1126   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1127   ierr = VecCreateSeqViennaCLWithArray(comm,bs,n,viennaclvec,V);CHKERRQ(ierr);
1128 
1129   if (cpuarray && viennaclvec) {
1130     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1131     s->array = (PetscScalar*)cpuarray;
1132     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1133   } else if (cpuarray) {
1134     Vec_Seq *s = (Vec_Seq*)((*V)->data);
1135     s->array = (PetscScalar*)cpuarray;
1136     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1137   } else if (viennaclvec) {
1138     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1139   } else {
1140     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1141   }
1142 
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 /*@C
1147    VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1148    the one provided by the user. This is useful to avoid a copy.
1149 
1150    Not Collective
1151 
1152    Input Parameters:
1153 +  vec - the vector
1154 -  array - the ViennaCL vector
1155 
1156    Notes:
1157    You can return to the original viennacl vector with a call to
1158    VecViennaCLResetArray() It is not possible to use VecViennaCLPlaceArray()
1159    and VecPlaceArray() at the same time on the same vector.
1160 
1161    Level: intermediate
1162 
1163 .seealso: VecPlaceArray(), VecSetValues(), VecViennaCLResetArray(),
1164           VecCUDAPlaceArray(),
1165 
1166 @*/
VecViennaCLPlaceArray(Vec vin,const ViennaCLVector * a)1167 PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin,const ViennaCLVector* a)
1168 {
1169   PetscErrorCode ierr;
1170 
1171   PetscFunctionBegin;
1172   PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
1173   ierr = VecViennaCLCopyToGPU(vin);CHKERRQ(ierr);
1174   if (((Vec_Seq*)vin->data)->unplacedarray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1175   ((Vec_Seq*)vin->data)->unplacedarray  = (PetscScalar *) ((Vec_ViennaCL*)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1176   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector*)a;
1177   vin->offloadmask = PETSC_OFFLOAD_GPU;
1178   ierr = PetscObjectStateIncrease((PetscObject)vin);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 /*@C
1183    VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1184    after the use of VecViennaCLPlaceArray().
1185 
1186    Not Collective
1187 
1188    Input Parameters:
1189 .  vec - the vector
1190 
1191    Level: developer
1192 
1193 .seealso: VecViennaCLPlaceArray(), VecResetArray(), VecCUDAResetArray(), VecPlaceArray()
1194 @*/
VecViennaCLResetArray(Vec vin)1195 PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1196 {
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   PetscCheckTypeNames(vin,VECSEQVIENNACL,VECMPIVIENNACL);
1201   ierr = VecViennaCLCopyToGPU(vin);CHKERRQ(ierr);
1202   ((Vec_ViennaCL*)vin->spptr)->GPUarray = (ViennaCLVector *) ((Vec_Seq*)vin->data)->unplacedarray;
1203   ((Vec_Seq*)vin->data)->unplacedarray = 0;
1204   vin->offloadmask = PETSC_OFFLOAD_GPU;
1205   ierr = PetscObjectStateIncrease((PetscObject)vin);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 
1210 /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1211  *
1212  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1213  */
VecDotNorm2_SeqViennaCL(Vec s,Vec t,PetscScalar * dp,PetscScalar * nm)1214 PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1215 {
1216   PetscErrorCode                         ierr;
1217 
1218   PetscFunctionBegin;
1219   ierr = VecDot_SeqViennaCL(s,t,dp);CHKERRQ(ierr);
1220   ierr = VecNorm_SeqViennaCL(t,NORM_2,nm);CHKERRQ(ierr);
1221   *nm *= *nm; //squared norm required
1222   PetscFunctionReturn(0);
1223 }
1224 
VecDuplicate_SeqViennaCL(Vec win,Vec * V)1225 PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
1226 {
1227   PetscErrorCode ierr;
1228 
1229   PetscFunctionBegin;
1230   ierr = VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);CHKERRQ(ierr);
1231   ierr = PetscLayoutReference(win->map,&(*V)->map);CHKERRQ(ierr);
1232   ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);CHKERRQ(ierr);
1233   ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);CHKERRQ(ierr);
1234   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1235   PetscFunctionReturn(0);
1236 }
1237 
VecDestroy_SeqViennaCL(Vec v)1238 PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1239 {
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   try {
1244     if (v->spptr) {
1245       delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
1246       delete (Vec_ViennaCL*) v->spptr;
1247     }
1248   } catch(char *ex) {
1249     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
1250   }
1251   ierr = VecDestroy_SeqViennaCL_Private(v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)1255 static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)
1256 {
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   V->boundtocpu = flg;
1261   if (flg) {
1262     ierr = VecViennaCLCopyFromGPU(V);CHKERRQ(ierr);
1263     V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1264     V->ops->dot             = VecDot_Seq;
1265     V->ops->norm            = VecNorm_Seq;
1266     V->ops->tdot            = VecTDot_Seq;
1267     V->ops->scale           = VecScale_Seq;
1268     V->ops->copy            = VecCopy_Seq;
1269     V->ops->set             = VecSet_Seq;
1270     V->ops->swap            = VecSwap_Seq;
1271     V->ops->axpy            = VecAXPY_Seq;
1272     V->ops->axpby           = VecAXPBY_Seq;
1273     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1274     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1275     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1276     V->ops->setrandom       = VecSetRandom_Seq;
1277     V->ops->dot_local       = VecDot_Seq;
1278     V->ops->tdot_local      = VecTDot_Seq;
1279     V->ops->norm_local      = VecNorm_Seq;
1280     V->ops->mdot_local      = VecMDot_Seq;
1281     V->ops->mtdot_local     = VecMTDot_Seq;
1282     V->ops->maxpy           = VecMAXPY_Seq;
1283     V->ops->mdot            = VecMDot_Seq;
1284     V->ops->mtdot           = VecMTDot_Seq;
1285     V->ops->aypx            = VecAYPX_Seq;
1286     V->ops->waxpy           = VecWAXPY_Seq;
1287     V->ops->dotnorm2        = NULL;
1288     V->ops->placearray      = VecPlaceArray_Seq;
1289     V->ops->replacearray    = VecReplaceArray_Seq;
1290     V->ops->resetarray      = VecResetArray_Seq;
1291     V->ops->duplicate       = VecDuplicate_Seq;
1292   } else {
1293     V->ops->dot             = VecDot_SeqViennaCL;
1294     V->ops->norm            = VecNorm_SeqViennaCL;
1295     V->ops->tdot            = VecTDot_SeqViennaCL;
1296     V->ops->scale           = VecScale_SeqViennaCL;
1297     V->ops->copy            = VecCopy_SeqViennaCL;
1298     V->ops->set             = VecSet_SeqViennaCL;
1299     V->ops->swap            = VecSwap_SeqViennaCL;
1300     V->ops->axpy            = VecAXPY_SeqViennaCL;
1301     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1302     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1303     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1304     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1305     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1306     V->ops->dot_local       = VecDot_SeqViennaCL;
1307     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1308     V->ops->norm_local      = VecNorm_SeqViennaCL;
1309     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1310     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1311     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1312     V->ops->mdot            = VecMDot_SeqViennaCL;
1313     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1314     V->ops->aypx            = VecAYPX_SeqViennaCL;
1315     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1316     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1317     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1318     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1319     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1320     V->ops->destroy         = VecDestroy_SeqViennaCL;
1321     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
VecCreate_SeqViennaCL(Vec V)1326 PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1327 {
1328   PetscErrorCode ierr;
1329   PetscMPIInt    size;
1330 
1331   PetscFunctionBegin;
1332   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);CHKERRQ(ierr);
1333   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1334   ierr = VecCreate_Seq_Private(V,0);CHKERRQ(ierr);
1335   ierr = PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);CHKERRQ(ierr);
1336 
1337   ierr = VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);CHKERRQ(ierr);
1338   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1339 
1340   ierr = VecViennaCLAllocateCheck(V);CHKERRQ(ierr);
1341   ierr = VecViennaCLAllocateCheckHost(V);CHKERRQ(ierr);
1342   ierr = VecSet(V,0.0);CHKERRQ(ierr);
1343   ierr = VecSet_Seq(V,0.0);CHKERRQ(ierr);
1344   V->offloadmask = PETSC_OFFLOAD_BOTH;
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 /*@C
1349   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1350 
1351   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1352   invoking clReleaseContext().
1353 
1354 
1355   Input Parameters:
1356 .  v    - the vector
1357 
1358   Output Parameters:
1359 .  ctx - pointer to the underlying CL context
1360 
1361   Level: intermediate
1362 
1363 .seealso: VecViennaCLGetCLQueue(), VecViennaCLGetCLMemRead()
1364 @*/
VecViennaCLGetCLContext(Vec v,PETSC_UINTPTR_T * ctx)1365 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T* ctx)
1366 {
1367 #if !defined(PETSC_HAVE_OPENCL)
1368   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_context.");
1369 #else
1370 
1371   PetscFunctionBegin;
1372   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1373 
1374   PetscErrorCode ierr;
1375   const ViennaCLVector *v_vcl;
1376   ierr = VecViennaCLGetArrayRead(v, &v_vcl);CHKERRQ(ierr);
1377   try{
1378     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1379     const cl_context ocl_ctx = vcl_ctx.handle().get();
1380     clRetainContext(ocl_ctx);
1381     *ctx = (PETSC_UINTPTR_T)(ocl_ctx);
1382   } catch (std::exception const & ex) {
1383     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1384   }
1385 
1386   PetscFunctionReturn(0);
1387 #endif
1388 }
1389 
1390 /*@C
1391   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1392   operations of the Vec are enqueued.
1393 
1394   Caller should cast (*queue) to (const cl_command_queue). Caller is
1395   responsible for invoking clReleaseCommandQueue().
1396 
1397   Input Parameters:
1398 .  v    - the vector
1399 
1400   Output Parameters:
1401 .  ctx - pointer to the CL command queue
1402 
1403   Level: intermediate
1404 
1405 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemRead()
1406 @*/
VecViennaCLGetCLQueue(Vec v,PETSC_UINTPTR_T * queue)1407 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T* queue)
1408 {
1409 #if !defined(PETSC_HAVE_OPENCL)
1410   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1411 #else
1412   PetscFunctionBegin;
1413   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1414 
1415   PetscErrorCode ierr;
1416   const ViennaCLVector *v_vcl;
1417   ierr = VecViennaCLGetArrayRead(v, &v_vcl);CHKERRQ(ierr);
1418   try{
1419     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1420     const viennacl::ocl::command_queue& vcl_queue = vcl_ctx.current_queue();
1421     const cl_command_queue ocl_queue = vcl_queue.handle().get();
1422     clRetainCommandQueue(ocl_queue);
1423     *queue = (PETSC_UINTPTR_T)(ocl_queue);
1424   } catch (std::exception const & ex) {
1425     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1426   }
1427 
1428   PetscFunctionReturn(0);
1429 #endif
1430 }
1431 
1432 /*@C
1433   VecViennaCLGetCLMemRead - Provides access to the the CL buffer inside a Vec.
1434 
1435   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1436   invoking clReleaseMemObject().
1437 
1438   Input Parameters:
1439 .  v    - the vector
1440 
1441   Output Parameters:
1442 .  mem - pointer to the device buffer
1443 
1444   Level: intermediate
1445 
1446 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1447 @*/
VecViennaCLGetCLMemRead(Vec v,PETSC_UINTPTR_T * mem)1448 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T* mem)
1449 {
1450 #if !defined(PETSC_HAVE_OPENCL)
1451   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1452 #else
1453   PetscFunctionBegin;
1454   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1455 
1456   PetscErrorCode ierr;
1457   const ViennaCLVector *v_vcl;
1458   ierr = VecViennaCLGetArrayRead(v, &v_vcl);CHKERRQ(ierr);
1459   try{
1460     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1461     clRetainMemObject(ocl_mem);
1462     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1463   } catch (std::exception const & ex) {
1464     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1465   }
1466   PetscFunctionReturn(0);
1467 #endif
1468 }
1469 
1470 /*@C
1471   VecViennaCLGetCLMemWrite - Provides access to the the CL buffer inside a Vec.
1472 
1473   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1474   invoking clReleaseMemObject().
1475 
1476   The device pointer has to be released by calling
1477   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1478   the host will be marked as out of date.  A subsequent access of the host data
1479   will thus incur a data transfer from the device to the host.
1480 
1481   Input Parameters:
1482 .  v    - the vector
1483 
1484   Output Parameters:
1485 .  mem - pointer to the device buffer
1486 
1487   Level: intermediate
1488 
1489 .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMemWrite()
1490 @*/
VecViennaCLGetCLMemWrite(Vec v,PETSC_UINTPTR_T * mem)1491 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T* mem)
1492 {
1493 #if !defined(PETSC_HAVE_OPENCL)
1494   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1495 #else
1496   PetscFunctionBegin;
1497   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1498 
1499   PetscErrorCode ierr;
1500   ViennaCLVector *v_vcl;
1501   ierr = VecViennaCLGetArrayWrite(v, &v_vcl);CHKERRQ(ierr);
1502   try{
1503     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1504     clRetainMemObject(ocl_mem);
1505     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1506   } catch (std::exception const & ex) {
1507     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1508   }
1509 
1510   PetscFunctionReturn(0);
1511 #endif
1512 }
1513 
1514 /*@C
1515   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1516   acquired with VecViennaCLGetCLMemWrite().
1517 
1518    This marks the host data as out of date.  Subsequent access to the
1519    vector data on the host side with for instance VecGetArray() incurs a
1520    data transfer.
1521 
1522   Input Parameters:
1523 .  v    - the vector
1524 
1525   Level: intermediate
1526 
1527 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMemWrite()
1528 @*/
VecViennaCLRestoreCLMemWrite(Vec v)1529 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1530 {
1531 #if !defined(PETSC_HAVE_OPENCL)
1532   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1533 #else
1534   PetscFunctionBegin;
1535   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1536 
1537   PetscErrorCode ierr;
1538   ierr = VecViennaCLRestoreArrayWrite(v, PETSC_NULL);CHKERRQ(ierr);
1539 
1540   PetscFunctionReturn(0);
1541 #endif
1542 }
1543 
1544 
1545 /*@C
1546   VecViennaCLGetCLMem - Provides access to the the CL buffer inside a Vec.
1547 
1548   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1549   invoking clReleaseMemObject().
1550 
1551   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1552   Upon restoring the vector data the data on the host will be marked as out of
1553   date.  A subsequent access of the host data will thus incur a data transfer
1554   from the device to the host.
1555 
1556   Input Parameters:
1557 .  v    - the vector
1558 
1559   Output Parameters:
1560 .  mem - pointer to the device buffer
1561 
1562   Level: intermediate
1563 
1564 .seealso: VecViennaCLGetCLContext(), VecViennaCLRestoreCLMem()
1565 @*/
VecViennaCLGetCLMem(Vec v,PETSC_UINTPTR_T * mem)1566 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T* mem)
1567 {
1568 #if !defined(PETSC_HAVE_OPENCL)
1569   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1570 #else
1571   PetscFunctionBegin;
1572   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1573 
1574   PetscErrorCode ierr;
1575   ViennaCLVector *v_vcl;
1576   ierr = VecViennaCLGetArray(v, &v_vcl);CHKERRQ(ierr);
1577   try{
1578     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1579     clRetainMemObject(ocl_mem);
1580     *mem = (PETSC_UINTPTR_T)(ocl_mem);
1581   } catch (std::exception const & ex) {
1582     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
1583   }
1584 
1585   PetscFunctionReturn(0);
1586 #endif
1587 }
1588 
1589 /*@C
1590   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1591   acquired with VecViennaCLGetCLMem().
1592 
1593    This marks the host data as out of date. Subsequent access to the vector
1594    data on the host side with for instance VecGetArray() incurs a data
1595    transfer.
1596 
1597   Input Parameters:
1598 .  v    - the vector
1599 
1600   Level: intermediate
1601 
1602 .seealso: VecViennaCLGetCLContext(), VecViennaCLGetCLMem()
1603 @*/
VecViennaCLRestoreCLMem(Vec v)1604 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1605 {
1606 #if !defined(PETSC_HAVE_OPENCL)
1607   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1608 #else
1609   PetscFunctionBegin;
1610   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1611 
1612   PetscErrorCode ierr;
1613   ierr = VecViennaCLRestoreArray(v, PETSC_NULL);CHKERRQ(ierr);
1614 
1615   PetscFunctionReturn(0);
1616 #endif
1617 }
1618 
VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector * array)1619 PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector *array)
1620 {
1621   PetscErrorCode ierr;
1622   Vec_ViennaCL   *vecviennacl;
1623   PetscMPIInt    size;
1624 
1625   PetscFunctionBegin;
1626   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);CHKERRQ(ierr);
1627   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
1628   ierr = VecCreate_Seq_Private(V,0);CHKERRQ(ierr);
1629   ierr = PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);CHKERRQ(ierr);
1630   ierr = VecBindToCPU_SeqAIJViennaCL(V,PETSC_FALSE);CHKERRQ(ierr);
1631   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1632 
1633   if (array) {
1634     if (!V->spptr)
1635       V->spptr = new Vec_ViennaCL;
1636     vecviennacl = (Vec_ViennaCL*)V->spptr;
1637     vecviennacl->GPUarray_allocated = 0;
1638     vecviennacl->GPUarray           = (ViennaCLVector*)array;
1639     V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1640   }
1641 
1642   PetscFunctionReturn(0);
1643 }
1644