1 #ifndef VEXCL_BACKEND_CUDA_CUSPARSE_HPP
2 #define VEXCL_BACKEND_CUDA_CUSPARSE_HPP
3 
4 #include <map>
5 #include <memory>
6 
7 #include <cusparse_v2.h>
8 
9 #include <vexcl/vector.hpp>
10 #include <vexcl/cache.hpp>
11 #include <vexcl/backend/cuda/error.hpp>
12 #include <vexcl/backend/cuda/context.hpp>
13 #include <vexcl/detail/backtrace.hpp>
14 
15 namespace vex {
16 namespace backend {
17 namespace cuda {
18 
19 /// Send human-readable representation of CUresult to the output stream.
operator <<(std::ostream & os,cusparseStatus_t rc)20 inline std::ostream& operator<<(std::ostream &os, cusparseStatus_t rc) {
21     os << "CUSPARSE Error (";
22 #define VEXCL_CUDA_ERR2TXT(e) case e: os << static_cast<int>(e) << " - " << #e; break
23     switch(rc) {
24         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_SUCCESS);
25         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_NOT_INITIALIZED);
26         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_ALLOC_FAILED);
27         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_INVALID_VALUE);
28         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_ARCH_MISMATCH);
29         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_MAPPING_ERROR);
30         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_EXECUTION_FAILED);
31         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_INTERNAL_ERROR);
32         VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
33         default:
34             os << "Unknown error";
35     }
36 #undef VEXCL_CUDA_ERR2TXT
37     return os << ")";
38 }
39 
check(cusparseStatus_t rc,const char * file,int line)40 inline void check(cusparseStatus_t rc, const char *file, int line) {
41     if (rc != CUSPARSE_STATUS_SUCCESS) {
42         vex::detail::print_backtrace();
43         throw error(rc, file, line);
44     }
45 }
46 
47 namespace detail {
48 
49 template <>
50 struct deleter_impl<cusparseHandle_t> {
disposevex::backend::cuda::detail::deleter_impl51     static void dispose(cusparseHandle_t handle) {
52         cuda_check( cusparseDestroy(handle) );
53     }
54 };
55 
56 template <>
57 struct deleter_impl<cusparseMatDescr_t> {
disposevex::backend::cuda::detail::deleter_impl58     static void dispose(cusparseMatDescr_t handle) {
59         cuda_check( cusparseDestroyMatDescr(handle) );
60     }
61 };
62 
63 template <>
64 struct deleter_impl<cusparseHybMat_t> {
disposevex::backend::cuda::detail::deleter_impl65     static void dispose(cusparseHybMat_t handle) {
66         cuda_check( cusparseDestroyHybMat(handle) );
67     }
68 };
69 
70 } // namespace detail
71 
cusparse_handle(const command_queue & q)72 inline cusparseHandle_t cusparse_handle(const command_queue &q) {
73     typedef std::shared_ptr<std::remove_pointer<cusparseHandle_t>::type> smart_handle;
74     typedef vex::detail::object_cache<vex::detail::index_by_context, smart_handle> cache_type;
75 
76     static cache_type cache;
77 
78     auto h = cache.find(q);
79 
80     if (h == cache.end()) {
81         select_context(q);
82         cusparseHandle_t handle;
83         cuda_check( cusparseCreate(&handle) );
84         cuda_check( cusparseSetStream(handle, q.raw()) );
85 
86         h = cache.insert(q, smart_handle(handle, detail::deleter(q.context().raw())));
87     }
88 
89     return h->second.get();
90 }
91 
92 template <typename val_t>
93 class spmat_hyb {
94     static_assert(
95             std::is_same<val_t, float>::value ||
96             std::is_same<val_t, double>::value,
97             "Unsupported value type for spmat_cusparse"
98             );
99 
100     public:
101         template <typename row_t, typename col_t>
spmat_hyb(const command_queue & queue,int n,int m,const row_t * row_begin,const col_t * col_begin,const val_t * val_begin)102         spmat_hyb(
103                 const command_queue &queue,
104                 int n, int m,
105                 const row_t *row_begin,
106                 const col_t *col_begin,
107                 const val_t *val_begin
108                 )
109             : handle( cusparse_handle(queue) ),
110               desc  ( create_description(), detail::deleter(queue.context().raw()) ),
111               mat   ( create_matrix(),      detail::deleter(queue.context().raw()) )
112         {
113             cuda_check( cusparseSetMatType(desc.get(), CUSPARSE_MATRIX_TYPE_GENERAL) );
114             cuda_check( cusparseSetMatIndexBase(desc.get(), CUSPARSE_INDEX_BASE_ZERO) );
115 
116             fill_matrix(queue, n, m, row_begin, col_begin, val_begin);
117         }
118 
apply(const vex::vector<val_t> & x,vex::vector<val_t> & y,val_t alpha=1,bool append=false) const119         void apply(const vex::vector<val_t> &x, vex::vector<val_t> &y,
120                  val_t alpha = 1, bool append = false) const
121         {
122             precondition(x.nparts() == 1 && y.nparts() == 1,
123                     "Incompatible vectors");
124 
125             mul(x(0), y(0), alpha, append);
126         }
127 
mul(const device_vector<float> & x,device_vector<float> & y,float alpha=1,bool append=false) const128         void mul(const device_vector<float> &x, device_vector<float> &y,
129                  float alpha = 1, bool append = false) const
130         {
131             float beta = append ? 1.0f : 0.0f;
132 
133             cuda_check(
134                     cusparseShybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
135                         &alpha, desc.get(), mat.get(),
136                         x.raw_ptr(), &beta, y.raw_ptr()
137                         )
138                     );
139         }
140 
mul(const device_vector<double> & x,device_vector<double> & y,double alpha=1,bool append=false) const141         void mul(const device_vector<double> &x, device_vector<double> &y,
142                  double alpha = 1, bool append = false) const
143         {
144             double beta = append ? 1.0 : 0.0;
145 
146             cuda_check(
147                     cusparseDhybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
148                         &alpha, desc.get(), mat.get(),
149                         x.raw_ptr(), &beta, y.raw_ptr()
150                         )
151                     );
152         }
153     private:
154         cusparseHandle_t handle;
155 
156         std::shared_ptr<std::remove_pointer<cusparseMatDescr_t>::type> desc;
157         std::shared_ptr<std::remove_pointer<cusparseHybMat_t>::type>   mat;
158 
create_description()159         static cusparseMatDescr_t create_description() {
160             cusparseMatDescr_t desc;
161             cuda_check( cusparseCreateMatDescr(&desc) );
162             return desc;
163         }
164 
create_matrix()165         static cusparseHybMat_t create_matrix() {
166             cusparseHybMat_t mat;
167             cuda_check( cusparseCreateHybMat(&mat) );
168             return mat;
169         }
170 
171         template <typename row_t, typename col_t>
fill_matrix(const command_queue & q,int n,int m,const row_t * row,const col_t * col,const float * val)172         void fill_matrix(const command_queue &q,
173                 int n, int m, const row_t *row, const col_t *col, const float *val)
174         {
175             device_vector<int>   r(q, n + 1,  row);
176             device_vector<int>   c(q, row[n], col + row[0]);
177             device_vector<float> v(q, row[n], val + row[0]);
178 
179             if (row[0] != 0) vector<int>(q, r) -= row[0];
180 
181             cuda_check(
182                     cusparseScsr2hyb(handle, n, m, desc.get(),
183                         v.raw_ptr(), r.raw_ptr(), c.raw_ptr(), mat.get(), 0,
184                         CUSPARSE_HYB_PARTITION_AUTO
185                         )
186                     );
187         }
188 
189         template <typename row_t, typename col_t>
fill_matrix(const command_queue & q,int n,int m,const row_t * row,const col_t * col,const double * val)190         void fill_matrix(const command_queue &q,
191                 int n, int m, const row_t *row, const col_t *col, const double *val)
192         {
193             device_vector<int>    r(q, n + 1,  row);
194             device_vector<int>    c(q, row[n], col + row[0]);
195             device_vector<double> v(q, row[n], val + row[0]);
196 
197             if (row[0] != 0) vector<int>(q, r) -= row[0];
198 
199             cuda_check(
200                     cusparseDcsr2hyb(handle, n, m, desc.get(),
201                         v.raw_ptr(), r.raw_ptr(), c.raw_ptr(), mat.get(), 0,
202                         CUSPARSE_HYB_PARTITION_AUTO
203                         )
204                     );
205         }
206 
207 };
208 
209 template <typename T>
210 additive_operator< spmat_hyb<T>, vector<T> >
operator *(const spmat_hyb<T> & A,const vector<T> & x)211 operator*(const spmat_hyb<T> &A, const vector<T> &x) {
212     return additive_operator< spmat_hyb<T>, vector<T> >(A, x);
213 }
214 
215 template <typename val_t>
216 class spmat_crs {
217     static_assert(
218             std::is_same<val_t, float>::value ||
219             std::is_same<val_t, double>::value,
220             "Unsupported value type for spmat_cusparse"
221             );
222 
223     public:
224         template <typename row_t, typename col_t>
spmat_crs(const command_queue & queue,int n,int m,const row_t * row_begin,const col_t * col_begin,const val_t * val_begin)225         spmat_crs(
226                 const command_queue &queue,
227                 int n, int m,
228                 const row_t *row_begin,
229                 const col_t *col_begin,
230                 const val_t *val_begin
231                 )
232             : n(n), m(m), nnz(static_cast<unsigned>(row_begin[n] - row_begin[0])),
233               handle( cusparse_handle(queue) ),
234               desc  ( create_description(), detail::deleter(queue.context().raw()) ),
235               row(queue, n+1, row_begin),
236               col(queue, nnz, col_begin + row_begin[0]),
237               val(queue, nnz, val_begin + row_begin[0])
238         {
239             if (row_begin[0] != 0)
240                 vector<int>(queue, row) -= row_begin[0];
241 
242             cuda_check( cusparseSetMatType(desc.get(), CUSPARSE_MATRIX_TYPE_GENERAL) );
243             cuda_check( cusparseSetMatIndexBase(desc.get(), CUSPARSE_INDEX_BASE_ZERO) );
244         }
245 
apply(const vex::vector<val_t> & x,vex::vector<val_t> & y,val_t alpha=1,bool append=false) const246         void apply(const vex::vector<val_t> &x, vex::vector<val_t> &y,
247                  val_t alpha = 1, bool append = false) const
248         {
249             precondition(x.nparts() == 1 && y.nparts() == 1,
250                     "Incompatible vectors");
251 
252             mul(x(0), y(0), alpha, append);
253         }
254 
mul(const device_vector<float> & x,device_vector<float> & y,float alpha=1,bool append=false) const255         void mul(const device_vector<float> &x, device_vector<float> &y,
256                  float alpha = 1, bool append = false) const
257         {
258             float beta = append ? 1.0f : 0.0f;
259 
260             cuda_check(
261                     cusparseScsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
262                         n, m, nnz, &alpha, desc.get(),
263                         val.raw_ptr(), row.raw_ptr(), col.raw_ptr(),
264                         x.raw_ptr(), &beta, y.raw_ptr()
265                         )
266                  );
267         }
268 
mul(const device_vector<double> & x,device_vector<double> & y,double alpha=1,bool append=false) const269         void mul(const device_vector<double> &x, device_vector<double> &y,
270                  double alpha = 1, bool append = false) const
271         {
272             double beta = append ? 1.0 : 0.0;
273 
274             cuda_check(
275                     cusparseDcsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
276                         n, m, nnz, &alpha, desc.get(),
277                         val.raw_ptr(), row.raw_ptr(), col.raw_ptr(),
278                         x.raw_ptr(), &beta, y.raw_ptr()
279                         )
280                  );
281         }
282     private:
283         unsigned n, m, nnz;
284 
285         cusparseHandle_t handle;
286 
287         std::shared_ptr<std::remove_pointer<cusparseMatDescr_t>::type> desc;
288 
289         device_vector<int>   row;
290         device_vector<int>   col;
291         device_vector<val_t> val;
292 
create_description()293         static cusparseMatDescr_t create_description() {
294             cusparseMatDescr_t desc;
295             cuda_check( cusparseCreateMatDescr(&desc) );
296             return desc;
297         }
298 };
299 
300 template <typename T>
301 additive_operator< spmat_crs<T>, vector<T> >
operator *(const spmat_crs<T> & A,const vector<T> & x)302 operator*(const spmat_crs<T> &A, const vector<T> &x) {
303     return additive_operator< spmat_crs<T>, vector<T> >(A, x);
304 }
305 
306 } // namespace cuda
307 } // namespace backend
308 } // namespace vex
309 
310 #endif
311