1 #ifndef VEXCL_BACKEND_CUDA_CUSPARSE_HPP
2 #define VEXCL_BACKEND_CUDA_CUSPARSE_HPP
3
4 #include <map>
5 #include <memory>
6
7 #include <cusparse_v2.h>
8
9 #include <vexcl/vector.hpp>
10 #include <vexcl/cache.hpp>
11 #include <vexcl/backend/cuda/error.hpp>
12 #include <vexcl/backend/cuda/context.hpp>
13 #include <vexcl/detail/backtrace.hpp>
14
15 namespace vex {
16 namespace backend {
17 namespace cuda {
18
19 /// Send human-readable representation of CUresult to the output stream.
operator <<(std::ostream & os,cusparseStatus_t rc)20 inline std::ostream& operator<<(std::ostream &os, cusparseStatus_t rc) {
21 os << "CUSPARSE Error (";
22 #define VEXCL_CUDA_ERR2TXT(e) case e: os << static_cast<int>(e) << " - " << #e; break
23 switch(rc) {
24 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_SUCCESS);
25 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_NOT_INITIALIZED);
26 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_ALLOC_FAILED);
27 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_INVALID_VALUE);
28 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_ARCH_MISMATCH);
29 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_MAPPING_ERROR);
30 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_EXECUTION_FAILED);
31 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_INTERNAL_ERROR);
32 VEXCL_CUDA_ERR2TXT(CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED);
33 default:
34 os << "Unknown error";
35 }
36 #undef VEXCL_CUDA_ERR2TXT
37 return os << ")";
38 }
39
check(cusparseStatus_t rc,const char * file,int line)40 inline void check(cusparseStatus_t rc, const char *file, int line) {
41 if (rc != CUSPARSE_STATUS_SUCCESS) {
42 vex::detail::print_backtrace();
43 throw error(rc, file, line);
44 }
45 }
46
47 namespace detail {
48
49 template <>
50 struct deleter_impl<cusparseHandle_t> {
disposevex::backend::cuda::detail::deleter_impl51 static void dispose(cusparseHandle_t handle) {
52 cuda_check( cusparseDestroy(handle) );
53 }
54 };
55
56 template <>
57 struct deleter_impl<cusparseMatDescr_t> {
disposevex::backend::cuda::detail::deleter_impl58 static void dispose(cusparseMatDescr_t handle) {
59 cuda_check( cusparseDestroyMatDescr(handle) );
60 }
61 };
62
63 template <>
64 struct deleter_impl<cusparseHybMat_t> {
disposevex::backend::cuda::detail::deleter_impl65 static void dispose(cusparseHybMat_t handle) {
66 cuda_check( cusparseDestroyHybMat(handle) );
67 }
68 };
69
70 } // namespace detail
71
cusparse_handle(const command_queue & q)72 inline cusparseHandle_t cusparse_handle(const command_queue &q) {
73 typedef std::shared_ptr<std::remove_pointer<cusparseHandle_t>::type> smart_handle;
74 typedef vex::detail::object_cache<vex::detail::index_by_context, smart_handle> cache_type;
75
76 static cache_type cache;
77
78 auto h = cache.find(q);
79
80 if (h == cache.end()) {
81 select_context(q);
82 cusparseHandle_t handle;
83 cuda_check( cusparseCreate(&handle) );
84 cuda_check( cusparseSetStream(handle, q.raw()) );
85
86 h = cache.insert(q, smart_handle(handle, detail::deleter(q.context().raw())));
87 }
88
89 return h->second.get();
90 }
91
92 template <typename val_t>
93 class spmat_hyb {
94 static_assert(
95 std::is_same<val_t, float>::value ||
96 std::is_same<val_t, double>::value,
97 "Unsupported value type for spmat_cusparse"
98 );
99
100 public:
101 template <typename row_t, typename col_t>
spmat_hyb(const command_queue & queue,int n,int m,const row_t * row_begin,const col_t * col_begin,const val_t * val_begin)102 spmat_hyb(
103 const command_queue &queue,
104 int n, int m,
105 const row_t *row_begin,
106 const col_t *col_begin,
107 const val_t *val_begin
108 )
109 : handle( cusparse_handle(queue) ),
110 desc ( create_description(), detail::deleter(queue.context().raw()) ),
111 mat ( create_matrix(), detail::deleter(queue.context().raw()) )
112 {
113 cuda_check( cusparseSetMatType(desc.get(), CUSPARSE_MATRIX_TYPE_GENERAL) );
114 cuda_check( cusparseSetMatIndexBase(desc.get(), CUSPARSE_INDEX_BASE_ZERO) );
115
116 fill_matrix(queue, n, m, row_begin, col_begin, val_begin);
117 }
118
apply(const vex::vector<val_t> & x,vex::vector<val_t> & y,val_t alpha=1,bool append=false) const119 void apply(const vex::vector<val_t> &x, vex::vector<val_t> &y,
120 val_t alpha = 1, bool append = false) const
121 {
122 precondition(x.nparts() == 1 && y.nparts() == 1,
123 "Incompatible vectors");
124
125 mul(x(0), y(0), alpha, append);
126 }
127
mul(const device_vector<float> & x,device_vector<float> & y,float alpha=1,bool append=false) const128 void mul(const device_vector<float> &x, device_vector<float> &y,
129 float alpha = 1, bool append = false) const
130 {
131 float beta = append ? 1.0f : 0.0f;
132
133 cuda_check(
134 cusparseShybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
135 &alpha, desc.get(), mat.get(),
136 x.raw_ptr(), &beta, y.raw_ptr()
137 )
138 );
139 }
140
mul(const device_vector<double> & x,device_vector<double> & y,double alpha=1,bool append=false) const141 void mul(const device_vector<double> &x, device_vector<double> &y,
142 double alpha = 1, bool append = false) const
143 {
144 double beta = append ? 1.0 : 0.0;
145
146 cuda_check(
147 cusparseDhybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
148 &alpha, desc.get(), mat.get(),
149 x.raw_ptr(), &beta, y.raw_ptr()
150 )
151 );
152 }
153 private:
154 cusparseHandle_t handle;
155
156 std::shared_ptr<std::remove_pointer<cusparseMatDescr_t>::type> desc;
157 std::shared_ptr<std::remove_pointer<cusparseHybMat_t>::type> mat;
158
create_description()159 static cusparseMatDescr_t create_description() {
160 cusparseMatDescr_t desc;
161 cuda_check( cusparseCreateMatDescr(&desc) );
162 return desc;
163 }
164
create_matrix()165 static cusparseHybMat_t create_matrix() {
166 cusparseHybMat_t mat;
167 cuda_check( cusparseCreateHybMat(&mat) );
168 return mat;
169 }
170
171 template <typename row_t, typename col_t>
fill_matrix(const command_queue & q,int n,int m,const row_t * row,const col_t * col,const float * val)172 void fill_matrix(const command_queue &q,
173 int n, int m, const row_t *row, const col_t *col, const float *val)
174 {
175 device_vector<int> r(q, n + 1, row);
176 device_vector<int> c(q, row[n], col + row[0]);
177 device_vector<float> v(q, row[n], val + row[0]);
178
179 if (row[0] != 0) vector<int>(q, r) -= row[0];
180
181 cuda_check(
182 cusparseScsr2hyb(handle, n, m, desc.get(),
183 v.raw_ptr(), r.raw_ptr(), c.raw_ptr(), mat.get(), 0,
184 CUSPARSE_HYB_PARTITION_AUTO
185 )
186 );
187 }
188
189 template <typename row_t, typename col_t>
fill_matrix(const command_queue & q,int n,int m,const row_t * row,const col_t * col,const double * val)190 void fill_matrix(const command_queue &q,
191 int n, int m, const row_t *row, const col_t *col, const double *val)
192 {
193 device_vector<int> r(q, n + 1, row);
194 device_vector<int> c(q, row[n], col + row[0]);
195 device_vector<double> v(q, row[n], val + row[0]);
196
197 if (row[0] != 0) vector<int>(q, r) -= row[0];
198
199 cuda_check(
200 cusparseDcsr2hyb(handle, n, m, desc.get(),
201 v.raw_ptr(), r.raw_ptr(), c.raw_ptr(), mat.get(), 0,
202 CUSPARSE_HYB_PARTITION_AUTO
203 )
204 );
205 }
206
207 };
208
209 template <typename T>
210 additive_operator< spmat_hyb<T>, vector<T> >
operator *(const spmat_hyb<T> & A,const vector<T> & x)211 operator*(const spmat_hyb<T> &A, const vector<T> &x) {
212 return additive_operator< spmat_hyb<T>, vector<T> >(A, x);
213 }
214
215 template <typename val_t>
216 class spmat_crs {
217 static_assert(
218 std::is_same<val_t, float>::value ||
219 std::is_same<val_t, double>::value,
220 "Unsupported value type for spmat_cusparse"
221 );
222
223 public:
224 template <typename row_t, typename col_t>
spmat_crs(const command_queue & queue,int n,int m,const row_t * row_begin,const col_t * col_begin,const val_t * val_begin)225 spmat_crs(
226 const command_queue &queue,
227 int n, int m,
228 const row_t *row_begin,
229 const col_t *col_begin,
230 const val_t *val_begin
231 )
232 : n(n), m(m), nnz(static_cast<unsigned>(row_begin[n] - row_begin[0])),
233 handle( cusparse_handle(queue) ),
234 desc ( create_description(), detail::deleter(queue.context().raw()) ),
235 row(queue, n+1, row_begin),
236 col(queue, nnz, col_begin + row_begin[0]),
237 val(queue, nnz, val_begin + row_begin[0])
238 {
239 if (row_begin[0] != 0)
240 vector<int>(queue, row) -= row_begin[0];
241
242 cuda_check( cusparseSetMatType(desc.get(), CUSPARSE_MATRIX_TYPE_GENERAL) );
243 cuda_check( cusparseSetMatIndexBase(desc.get(), CUSPARSE_INDEX_BASE_ZERO) );
244 }
245
apply(const vex::vector<val_t> & x,vex::vector<val_t> & y,val_t alpha=1,bool append=false) const246 void apply(const vex::vector<val_t> &x, vex::vector<val_t> &y,
247 val_t alpha = 1, bool append = false) const
248 {
249 precondition(x.nparts() == 1 && y.nparts() == 1,
250 "Incompatible vectors");
251
252 mul(x(0), y(0), alpha, append);
253 }
254
mul(const device_vector<float> & x,device_vector<float> & y,float alpha=1,bool append=false) const255 void mul(const device_vector<float> &x, device_vector<float> &y,
256 float alpha = 1, bool append = false) const
257 {
258 float beta = append ? 1.0f : 0.0f;
259
260 cuda_check(
261 cusparseScsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
262 n, m, nnz, &alpha, desc.get(),
263 val.raw_ptr(), row.raw_ptr(), col.raw_ptr(),
264 x.raw_ptr(), &beta, y.raw_ptr()
265 )
266 );
267 }
268
mul(const device_vector<double> & x,device_vector<double> & y,double alpha=1,bool append=false) const269 void mul(const device_vector<double> &x, device_vector<double> &y,
270 double alpha = 1, bool append = false) const
271 {
272 double beta = append ? 1.0 : 0.0;
273
274 cuda_check(
275 cusparseDcsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
276 n, m, nnz, &alpha, desc.get(),
277 val.raw_ptr(), row.raw_ptr(), col.raw_ptr(),
278 x.raw_ptr(), &beta, y.raw_ptr()
279 )
280 );
281 }
282 private:
283 unsigned n, m, nnz;
284
285 cusparseHandle_t handle;
286
287 std::shared_ptr<std::remove_pointer<cusparseMatDescr_t>::type> desc;
288
289 device_vector<int> row;
290 device_vector<int> col;
291 device_vector<val_t> val;
292
create_description()293 static cusparseMatDescr_t create_description() {
294 cusparseMatDescr_t desc;
295 cuda_check( cusparseCreateMatDescr(&desc) );
296 return desc;
297 }
298 };
299
300 template <typename T>
301 additive_operator< spmat_crs<T>, vector<T> >
operator *(const spmat_crs<T> & A,const vector<T> & x)302 operator*(const spmat_crs<T> &A, const vector<T> &x) {
303 return additive_operator< spmat_crs<T>, vector<T> >(A, x);
304 }
305
306 } // namespace cuda
307 } // namespace backend
308 } // namespace vex
309
310 #endif
311