1
2
3 /*
4 Defines the basic matrix operations for the AIJ (compressed row)
5 matrix storage format.
6 */
7
8 #include <petscconf.h>
9 #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
10 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
11 #include <petscbt.h>
12 #include <../src/vec/vec/impls/dvecimpl.h>
13 #include <petsc/private/vecimpl.h>
14
15 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h>
16
17
18 #include <algorithm>
19 #include <vector>
20 #include <string>
21
22 #include "viennacl/linalg/prod.hpp"
23
24 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A, MatType type, MatReuse reuse, Mat *newmat);
25 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat,MatFactorType,Mat*);
26 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqDense(Mat);
27
MatViennaCLCopyToGPU(Mat A)28 PetscErrorCode MatViennaCLCopyToGPU(Mat A)
29 {
30 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
31 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
32 PetscErrorCode ierr;
33
34 PetscFunctionBegin;
35 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) { //some OpenCL SDKs have issues with buffers of size 0
36 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED || A->offloadmask == PETSC_OFFLOAD_CPU) {
37 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
38
39 try {
40 if (a->compressedrow.use) {
41 if (!viennaclstruct->compressed_mat) viennaclstruct->compressed_mat = new ViennaCLCompressedAIJMatrix();
42
43 // Since PetscInt is different from cl_uint, we have to convert:
44 viennacl::backend::mem_handle dummy;
45
46 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, a->compressedrow.nrows+1);
47 for (PetscInt i=0; i<=a->compressedrow.nrows; ++i)
48 row_buffer.set(i, (a->compressedrow.i)[i]);
49
50 viennacl::backend::typesafe_host_array<unsigned int> row_indices; row_indices.raw_resize(dummy, a->compressedrow.nrows);
51 for (PetscInt i=0; i<a->compressedrow.nrows; ++i)
52 row_indices.set(i, (a->compressedrow.rindex)[i]);
53
54 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
55 for (PetscInt i=0; i<a->nz; ++i)
56 col_buffer.set(i, (a->j)[i]);
57
58 viennaclstruct->compressed_mat->set(row_buffer.get(), row_indices.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->compressedrow.nrows, a->nz);
59 ierr = PetscLogCpuToGpu(((2*a->compressedrow.nrows)+1+a->nz)*sizeof(PetscInt) + (a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
60 } else {
61 if (!viennaclstruct->mat) viennaclstruct->mat = new ViennaCLAIJMatrix();
62
63 // Since PetscInt is in general different from cl_uint, we have to convert:
64 viennacl::backend::mem_handle dummy;
65
66 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
67 for (PetscInt i=0; i<=A->rmap->n; ++i)
68 row_buffer.set(i, (a->i)[i]);
69
70 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
71 for (PetscInt i=0; i<a->nz; ++i)
72 col_buffer.set(i, (a->j)[i]);
73
74 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
75 ierr = PetscLogCpuToGpu(((A->rmap->n+1)+a->nz)*sizeof(PetscInt)+(a->nz)*sizeof(PetscScalar));CHKERRQ(ierr);
76 }
77 ViennaCLWaitForGPU();
78 } catch(std::exception const & ex) {
79 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
80 }
81
82 // Create temporary vector for v += A*x:
83 if (viennaclstruct->tempvec) {
84 if (viennaclstruct->tempvec->size() != static_cast<std::size_t>(A->rmap->n)) {
85 delete (ViennaCLVector*)viennaclstruct->tempvec;
86 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
87 } else {
88 viennaclstruct->tempvec->clear();
89 }
90 } else {
91 viennaclstruct->tempvec = new ViennaCLVector(A->rmap->n);
92 }
93
94 A->offloadmask = PETSC_OFFLOAD_BOTH;
95
96 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
97 }
98 }
99 PetscFunctionReturn(0);
100 }
101
MatViennaCLCopyFromGPU(Mat A,const ViennaCLAIJMatrix * Agpu)102 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu)
103 {
104 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
106 PetscInt m = A->rmap->n;
107 PetscErrorCode ierr;
108
109 PetscFunctionBegin;
110 if (A->offloadmask == PETSC_OFFLOAD_BOTH) PetscFunctionReturn(0);
111 if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED && Agpu) {
112 try {
113 if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
114 else {
115
116 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
117 a->nz = Agpu->nnz();
118 a->maxnz = a->nz; /* Since we allocate exactly the right amount */
119 A->preallocated = PETSC_TRUE;
120 if (a->singlemalloc) {
121 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
122 } else {
123 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
124 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
125 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
126 }
127 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr);
128 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
129
130 a->singlemalloc = PETSC_TRUE;
131
132 /* Setup row lengths */
133 ierr = PetscFree(a->imax);CHKERRQ(ierr);
134 ierr = PetscFree(a->ilen);CHKERRQ(ierr);
135 ierr = PetscMalloc1(m,&a->imax);CHKERRQ(ierr);
136 ierr = PetscMalloc1(m,&a->ilen);CHKERRQ(ierr);
137 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
138
139 /* Copy data back from GPU */
140 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
141
142 // copy row array
143 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
144 (a->i)[0] = row_buffer[0];
145 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
146 (a->i)[i+1] = row_buffer[i+1];
147 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
148 }
149
150 // copy column indices
151 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
152 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
153 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
154 (a->j)[i] = col_buffer[i];
155
156 // copy nonzero entries directly to destination (no conversion required)
157 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
158
159 ierr = PetscLogGpuToCpu(row_buffer.raw_size()+col_buffer.raw_size()+(Agpu->nnz()*sizeof(PetscScalar)));CHKERRQ(ierr);
160 ViennaCLWaitForGPU();
161 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */
162 }
163 } catch(std::exception const & ex) {
164 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
165 }
166 } else if (A->offloadmask == PETSC_OFFLOAD_UNALLOCATED) {
167 PetscFunctionReturn(0);
168 } else {
169 if (!Agpu && A->offloadmask != PETSC_OFFLOAD_GPU) PetscFunctionReturn(0);
170
171 if (a->compressedrow.use) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
172 if (!Agpu) {
173 viennacl::backend::memory_read(viennaclstruct->mat->handle(), 0, sizeof(PetscScalar)*viennaclstruct->mat->nnz(), a->a);
174 } else {
175 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
176 }
177 }
178 A->offloadmask = PETSC_OFFLOAD_BOTH;
179 /* This assembly prevents resetting the flag to PETSC_OFFLOAD_CPU and recopying */
180 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
181 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182 PetscFunctionReturn(0);
183 }
184
MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)185 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
186 {
187 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
188 PetscErrorCode ierr;
189 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
190 const ViennaCLVector *xgpu=NULL;
191 ViennaCLVector *ygpu=NULL;
192
193 PetscFunctionBegin;
194 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
195 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
196 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
197 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
198 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
199 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
200 try {
201 if (a->compressedrow.use) {
202 *ygpu = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
203 } else {
204 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
205 }
206 ViennaCLWaitForGPU();
207 } catch (std::exception const & ex) {
208 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
209 }
210 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
211 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
212 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
213 ierr = PetscLogGpuFlops(2.0*a->nz - a->nonzerorowcnt);CHKERRQ(ierr);
214 } else {
215 ierr = VecSet_SeqViennaCL(yy,0);CHKERRQ(ierr);
216 }
217 PetscFunctionReturn(0);
218 }
219
MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)220 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
221 {
222 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
223 PetscErrorCode ierr;
224 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
225 const ViennaCLVector *xgpu=NULL,*ygpu=NULL;
226 ViennaCLVector *zgpu=NULL;
227
228 PetscFunctionBegin;
229 /* The line below is necessary due to the operations that modify the matrix on the CPU (axpy, scale, etc) */
230 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
231 if (A->rmap->n > 0 && A->cmap->n > 0 && a->nz) {
232 try {
233 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
234 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
235 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
236 ierr = PetscLogGpuTimeBegin();CHKERRQ(ierr);
237 if (a->compressedrow.use) *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->compressed_mat, *xgpu);
238 else *viennaclstruct->tempvec = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
239 if (zz != yy) *zgpu = *ygpu + *viennaclstruct->tempvec;
240 else *zgpu += *viennaclstruct->tempvec;
241 ViennaCLWaitForGPU();
242 ierr = PetscLogGpuTimeEnd();CHKERRQ(ierr);
243 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
244 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
245 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
246
247 } catch(std::exception const & ex) {
248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
249 }
250 ierr = PetscLogGpuFlops(2.0*a->nz);CHKERRQ(ierr);
251 } else {
252 ierr = VecCopy_SeqViennaCL(yy,zz);CHKERRQ(ierr);
253 }
254 PetscFunctionReturn(0);
255 }
256
MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)257 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
258 {
259 PetscErrorCode ierr;
260
261 PetscFunctionBegin;
262 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
263 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
264 if (!A->boundtocpu) {
265 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
266 }
267 PetscFunctionReturn(0);
268 }
269
270 /* --------------------------------------------------------------------------------*/
271 /*@C
272 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
273 (the default parallel PETSc format). This matrix will ultimately be pushed down
274 to GPUs and use the ViennaCL library for calculations. For good matrix
275 assembly performance the user should preallocate the matrix storage by setting
276 the parameter nz (or the array nnz). By setting these parameters accurately,
277 performance during matrix assembly can be increased substantially.
278
279
280 Collective
281
282 Input Parameters:
283 + comm - MPI communicator, set to PETSC_COMM_SELF
284 . m - number of rows
285 . n - number of columns
286 . nz - number of nonzeros per row (same for all rows)
287 - nnz - array containing the number of nonzeros in the various rows
288 (possibly different for each row) or NULL
289
290 Output Parameter:
291 . A - the matrix
292
293 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
294 MatXXXXSetPreallocation() paradigm instead of this routine directly.
295 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
296
297 Notes:
298 If nnz is given then nz is ignored
299
300 The AIJ format (also called the Yale sparse matrix format or
301 compressed row storage), is fully compatible with standard Fortran 77
302 storage. That is, the stored row and column indices can begin at
303 either one (as in Fortran) or zero. See the users' manual for details.
304
305 Specify the preallocated storage with either nz or nnz (not both).
306 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
307 allocation. For large problems you MUST preallocate memory or you
308 will get TERRIBLE performance, see the users' manual chapter on matrices.
309
310 Level: intermediate
311
312 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSPARSE(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
313
314 @*/
MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)315 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
316 {
317 PetscErrorCode ierr;
318
319 PetscFunctionBegin;
320 ierr = MatCreate(comm,A);CHKERRQ(ierr);
321 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
322 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
323 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
324 PetscFunctionReturn(0);
325 }
326
327
MatDestroy_SeqAIJViennaCL(Mat A)328 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
329 {
330 PetscErrorCode ierr;
331 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
332
333 PetscFunctionBegin;
334 try {
335 if (viennaclcontainer) {
336 delete viennaclcontainer->tempvec;
337 delete viennaclcontainer->mat;
338 delete viennaclcontainer->compressed_mat;
339 delete viennaclcontainer;
340 }
341 A->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
342 } catch(std::exception const & ex) {
343 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
344 }
345
346 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",NULL);CHKERRQ(ierr);
347 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",NULL);CHKERRQ(ierr);
348 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",NULL);CHKERRQ(ierr);
349
350 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
351 A->spptr = 0;
352 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
353 PetscFunctionReturn(0);
354 }
355
356
MatCreate_SeqAIJViennaCL(Mat B)357 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
358 {
359 PetscErrorCode ierr;
360
361 PetscFunctionBegin;
362 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
363 ierr = MatConvert_SeqAIJ_SeqAIJViennaCL(B,MATSEQAIJVIENNACL,MAT_INPLACE_MATRIX,&B);
364 PetscFunctionReturn(0);
365 }
366
367 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat,PetscBool);
MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat * B)368 static PetscErrorCode MatDuplicate_SeqAIJViennaCL(Mat A,MatDuplicateOption cpvalues,Mat *B)
369 {
370 PetscErrorCode ierr;
371 Mat C;
372
373 PetscFunctionBegin;
374 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr);
375 C = *B;
376
377 ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
378 C->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
379
380 C->spptr = new Mat_SeqAIJViennaCL();
381 ((Mat_SeqAIJViennaCL*)C->spptr)->tempvec = NULL;
382 ((Mat_SeqAIJViennaCL*)C->spptr)->mat = NULL;
383 ((Mat_SeqAIJViennaCL*)C->spptr)->compressed_mat = NULL;
384
385 ierr = PetscObjectChangeTypeName((PetscObject)C,MATSEQAIJVIENNACL);CHKERRQ(ierr);
386
387 C->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
388
389 /* If the source matrix is already assembled, copy the destination matrix to the GPU */
390 if (C->assembled) {
391 ierr = MatViennaCLCopyToGPU(C);CHKERRQ(ierr);
392 }
393
394 PetscFunctionReturn(0);
395 }
396
MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar * array[])397 static PetscErrorCode MatSeqAIJGetArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
398 {
399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
400 PetscErrorCode ierr;
401
402 PetscFunctionBegin;
403 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
404 *array = a->a;
405 A->offloadmask = PETSC_OFFLOAD_CPU;
406 PetscFunctionReturn(0);
407 }
408
409
MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar * array[])410 static PetscErrorCode MatSeqAIJRestoreArray_SeqAIJViennaCL(Mat A,PetscScalar *array[])
411 {
412 PetscFunctionBegin;
413 *array = NULL;
414 PetscFunctionReturn(0);
415 }
416
MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)417 static PetscErrorCode MatBindToCPU_SeqAIJViennaCL(Mat A,PetscBool flg)
418 {
419 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)A->data;
420 PetscErrorCode ierr;
421
422 PetscFunctionBegin;
423 A->boundtocpu = flg;
424 aij->inode.use = flg;
425 if (flg) {
426 /* make sure we have an up-to-date copy on the CPU */
427 ierr = MatViennaCLCopyFromGPU(A,(const ViennaCLAIJMatrix *)NULL);CHKERRQ(ierr);
428 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJ);CHKERRQ(ierr);
429 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJ);CHKERRQ(ierr);
430
431 A->ops->mult = MatMult_SeqAIJ;
432 A->ops->multadd = MatMultAdd_SeqAIJ;
433 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
434 A->ops->duplicate = MatDuplicate_SeqAIJ;
435 } else {
436 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJGetArray_C",MatSeqAIJGetArray_SeqAIJViennaCL);CHKERRQ(ierr);
437 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqAIJRestoreArray_C",MatSeqAIJRestoreArray_SeqAIJViennaCL);CHKERRQ(ierr);
438
439 A->ops->mult = MatMult_SeqAIJViennaCL;
440 A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
441 A->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL;
442 A->ops->destroy = MatDestroy_SeqAIJViennaCL;
443 A->ops->duplicate = MatDuplicate_SeqAIJViennaCL;
444 }
445 PetscFunctionReturn(0);
446 }
447
MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat * newmat)448 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJViennaCL(Mat A,MatType type,MatReuse reuse,Mat *newmat)
449 {
450 PetscErrorCode ierr;
451 Mat B;
452
453 PetscFunctionBegin;
454
455 if (reuse == MAT_REUSE_MATRIX) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MAT_REUSE_MATRIX is not supported. Consider using MAT_INPLACE_MATRIX instead");
456
457 if (reuse == MAT_INITIAL_MATRIX) {
458 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
459 }
460
461 B = *newmat;
462
463 B->spptr = new Mat_SeqAIJViennaCL();
464
465 ((Mat_SeqAIJViennaCL*)B->spptr)->tempvec = NULL;
466 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = NULL;
467 ((Mat_SeqAIJViennaCL*)B->spptr)->compressed_mat = NULL;
468
469 ierr = MatBindToCPU_SeqAIJViennaCL(A,PETSC_FALSE);CHKERRQ(ierr);
470 A->ops->bindtocpu = MatBindToCPU_SeqAIJViennaCL;
471
472 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
473 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
474 ierr = PetscStrallocpy(VECVIENNACL,&B->defaultvectype);CHKERRQ(ierr);
475
476 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqaij_seqaijviennacl_C",MatConvert_SeqAIJ_SeqAIJViennaCL);CHKERRQ(ierr);
477 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqdense_C",MatProductSetFromOptions_SeqAIJ_SeqDense);CHKERRQ(ierr);
478 ierr = PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_seqaijviennacl_seqaij_C",MatProductSetFromOptions_SeqAIJ);CHKERRQ(ierr);
479
480 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
481
482 /* If the source matrix is already assembled, copy the destination matrix to the GPU */
483 if (B->assembled) {
484 ierr = MatViennaCLCopyToGPU(B);CHKERRQ(ierr);
485 }
486
487 PetscFunctionReturn(0);
488 }
489
490
491 /*MC
492 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
493
494 A matrix type type whose data resides on GPUs. These matrices are in CSR format by
495 default. All matrix calculations are performed using the ViennaCL library.
496
497 Options Database Keys:
498 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
499 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
500 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
501
502 Level: beginner
503
504 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
505 M*/
506
507
MatSolverTypeRegister_ViennaCL(void)508 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ViennaCL(void)
509 {
510 PetscErrorCode ierr;
511
512 PetscFunctionBegin;
513 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_LU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
514 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_CHOLESKY,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
515 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ILU,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
516 ierr = MatSolverTypeRegister(MATSOLVERPETSC, MATSEQAIJVIENNACL, MAT_FACTOR_ICC,MatGetFactor_seqaij_petsc);CHKERRQ(ierr);
517 PetscFunctionReturn(0);
518 }
519