1 #include "gpu.h"
2
SCS(accum_by_atrans_gpu)3 void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
4 const cusparseDnVecDescr_t x,
5 cusparseDnVecDescr_t y,
6 cusparseHandle_t cusparse_handle,
7 size_t *buffer_size, void **buffer) {
8 /* y += A'*x
9 x and y MUST be on GPU already
10 */
11 const scs_float onef = 1.0;
12 size_t new_buffer_size = 0;
13
14 CUSPARSE_GEN(SpMV_bufferSize)
15 (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
16 &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
17
18 if (new_buffer_size > *buffer_size) {
19 if (*buffer != SCS_NULL) {
20 cudaFree(*buffer);
21 }
22 cudaMalloc(buffer, *buffer_size);
23 *buffer_size = new_buffer_size;
24 }
25
26 CUSPARSE_GEN(SpMV)
27 (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
28 &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
29 }
30
31 /* this is slow, use trans routine if possible */
SCS(accum_by_a_gpu)32 void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
33 cusparseDnVecDescr_t y,
34 cusparseHandle_t cusparse_handle, size_t *buffer_size,
35 void **buffer) {
36 /* y += A*x
37 x and y MUST be on GPU already
38 */
39 const scs_float onef = 1.0;
40 size_t new_buffer_size = 0;
41
42 /* The A matrix idx pointers must be ORDERED */
43 CUSPARSE_GEN(SpMV_bufferSize)
44 (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
45 SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
46
47 if (new_buffer_size > *buffer_size) {
48 if (*buffer != SCS_NULL) {
49 cudaFree(*buffer);
50 }
51 cudaMalloc(buffer, *buffer_size);
52 *buffer_size = new_buffer_size;
53 }
54
55 CUSPARSE_GEN(SpMV)
56 (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
57 SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
58 }
59
60 /* This assumes that P has been made full (ie not triangular) and uses the
61 * fact that the GPU is faster for general sparse matrices than for symmetric
62 */
63 /* y += P*x
64 x and y MUST be on GPU already
65 */
SCS(accum_by_p_gpu)66 void SCS(accum_by_p_gpu)(const ScsGpuMatrix *Pg, const cusparseDnVecDescr_t x,
67 cusparseDnVecDescr_t y,
68 cusparseHandle_t cusparse_handle, size_t *buffer_size,
69 void **buffer) {
70 SCS(accum_by_atrans_gpu)(Pg, x, y, cusparse_handle, buffer_size, buffer);
71 }
72
SCS(free_gpu_matrix)73 void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
74 cudaFree(A->x);
75 cudaFree(A->i);
76 cudaFree(A->p);
77 cusparseDestroySpMat(A->descr);
78 }
79