1 #ifndef VIENNACL_ELL_MATRIX_HPP_
2 #define VIENNACL_ELL_MATRIX_HPP_
3
4 /* =========================================================================
5 Copyright (c) 2010-2016, Institute for Microelectronics,
6 Institute for Analysis and Scientific Computing,
7 TU Wien.
8 Portions of this software are copyright by UChicago Argonne, LLC.
9
10 -----------------
11 ViennaCL - The Vienna Computing Library
12 -----------------
13
14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15
16 (A list of authors and contributors can be found in the manual)
17
18 License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20
21 /** @file viennacl/ell_matrix.hpp
22 @brief Implementation of the ell_matrix class
23
24 Contributed by Volodymyr Kysenko.
25 */
26
27
28 #include "viennacl/forwards.h"
29 #include "viennacl/vector.hpp"
30
31 #include "viennacl/tools/tools.hpp"
32
33 #include "viennacl/linalg/sparse_matrix_operations.hpp"
34
35 namespace viennacl
36 {
37 /** @brief Sparse matrix class using the ELLPACK format for storing the nonzeros.
38 *
39 * This format works best for matrices where the number of nonzeros per row is mostly the same.
40 * Finite element and finite difference methods on nicely shaped domains often result in such a nonzero pattern.
41 * For a matrix
42 *
43 * (1 2 0 0 0)
44 * (2 3 4 0 0)
45 * (0 5 6 0 7)
46 * (0 0 8 9 0)
47 *
48 * the entries are layed out in chunks of size 3 as
49 * (1 2 5 8; 2 3 6 9; 0 4 7 0)
50 * Note that this is a 'transposed' representation in order to maximize coalesced memory access.
51 */
52 template<typename NumericT, unsigned int AlignmentV /* see forwards.h for default argument */>
53 class ell_matrix
54 {
55 public:
56 typedef viennacl::backend::mem_handle handle_type;
57 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type;
58 typedef vcl_size_t size_type;
59
ell_matrix()60 ell_matrix() : rows_(0), cols_(0), maxnnz_(0) {}
61
ell_matrix(viennacl::context ctx)62 ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), maxnnz_(0)
63 {
64 coords_.switch_active_handle_id(ctx.memory_type());
65 elements_.switch_active_handle_id(ctx.memory_type());
66
67 #ifdef VIENNACL_WITH_OPENCL
68 if (ctx.memory_type() == OPENCL_MEMORY)
69 {
70 coords_.opencl_handle().context(ctx.opencl_context());
71 elements_.opencl_handle().context(ctx.opencl_context());
72 }
73 #endif
74 }
75
76 /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */
clear()77 void clear()
78 {
79 maxnnz_ = 0;
80
81 viennacl::backend::typesafe_host_array<unsigned int> host_coords_buffer(coords_, internal_size1());
82 std::vector<NumericT> host_elements(internal_size1());
83
84 viennacl::backend::memory_create(coords_, host_coords_buffer.element_size() * internal_size1(), viennacl::traits::context(coords_), host_coords_buffer.get());
85 viennacl::backend::memory_create(elements_, sizeof(NumericT) * internal_size1(), viennacl::traits::context(elements_), &(host_elements[0]));
86 }
87
internal_size1() const88 vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, AlignmentV); }
internal_size2() const89 vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, AlignmentV); }
90
size1() const91 vcl_size_t size1() const { return rows_; }
size2() const92 vcl_size_t size2() const { return cols_; }
93
internal_maxnnz() const94 vcl_size_t internal_maxnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(maxnnz_, AlignmentV); }
maxnnz() const95 vcl_size_t maxnnz() const { return maxnnz_; }
96
nnz() const97 vcl_size_t nnz() const { return rows_ * maxnnz_; }
internal_nnz() const98 vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); }
99
handle()100 handle_type & handle() { return elements_; }
handle() const101 const handle_type & handle() const { return elements_; }
102
handle2()103 handle_type & handle2() { return coords_; }
handle2() const104 const handle_type & handle2() const { return coords_; }
105
106 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
107 template<typename CPUMatrixT>
108 friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix & gpu_matrix );
109 #else
110 template<typename CPUMatrixT, typename T, unsigned int ALIGN>
111 friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix<T, ALIGN> & gpu_matrix );
112 #endif
113
114 private:
115 vcl_size_t rows_;
116 vcl_size_t cols_;
117 vcl_size_t maxnnz_;
118
119 handle_type coords_;
120 handle_type elements_;
121 };
122
123 template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
copy(const CPUMatrixT & cpu_matrix,ell_matrix<NumericT,AlignmentV> & gpu_matrix)124 void copy(const CPUMatrixT& cpu_matrix, ell_matrix<NumericT, AlignmentV>& gpu_matrix )
125 {
126 assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
127 assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
128
129 if (cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
130 {
131 //determine max capacity for row
132 vcl_size_t max_entries_per_row = 0;
133 for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
134 {
135 vcl_size_t num_entries = 0;
136 for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
137 ++num_entries;
138
139 max_entries_per_row = std::max(max_entries_per_row, num_entries);
140 }
141
142 //setup GPU matrix
143 gpu_matrix.maxnnz_ = max_entries_per_row;
144 gpu_matrix.rows_ = cpu_matrix.size1();
145 gpu_matrix.cols_ = cpu_matrix.size2();
146
147 vcl_size_t nnz = gpu_matrix.internal_nnz();
148
149 viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), nnz);
150 std::vector<NumericT> elements(nnz, 0);
151
152 // std::cout << "ELL_MATRIX copy " << gpu_matrix.maxnnz_ << " " << gpu_matrix.rows_ << " " << gpu_matrix.cols_ << " "
153 // << gpu_matrix.internal_maxnnz() << "\n";
154
155 for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
156 {
157 vcl_size_t data_index = 0;
158
159 for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
160 {
161 coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2());
162 elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it;
163 //std::cout << *col_it << "\n";
164 data_index++;
165 }
166 }
167
168 viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(), traits::context(gpu_matrix.handle2()), coords.get());
169 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0]));
170 }
171 }
172
173
174
175 /** @brief Copies a sparse matrix from the host to the compute device. The host type is the std::vector< std::map < > > format .
176 *
177 * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map.
178 * @param gpu_matrix The sparse ell_matrix from ViennaCL
179 */
180 template<typename IndexT, typename NumericT, unsigned int AlignmentV>
copy(std::vector<std::map<IndexT,NumericT>> const & cpu_matrix,ell_matrix<NumericT,AlignmentV> & gpu_matrix)181 void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix,
182 ell_matrix<NumericT, AlignmentV> & gpu_matrix)
183 {
184 vcl_size_t max_col = 0;
185 for (vcl_size_t i=0; i<cpu_matrix.size(); ++i)
186 {
187 if (cpu_matrix[i].size() > 0)
188 max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
189 }
190
191 viennacl::copy(tools::const_sparse_matrix_adapter<NumericT, IndexT>(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix);
192 }
193
194
195
196
197
198
199 template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
copy(const ell_matrix<NumericT,AlignmentV> & gpu_matrix,CPUMatrixT & cpu_matrix)200 void copy(const ell_matrix<NumericT, AlignmentV>& gpu_matrix, CPUMatrixT& cpu_matrix)
201 {
202 assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
203 assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
204
205 if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
206 {
207 std::vector<NumericT> elements(gpu_matrix.internal_nnz());
208 viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz());
209
210 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * elements.size(), &(elements[0]));
211 viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get());
212
213 for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++)
214 {
215 for (vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++)
216 {
217 vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;
218
219 NumericT val = elements[offset];
220 if (val <= 0 && val >= 0) // val == 0 without compiler warnings
221 continue;
222
223 if (coords[offset] >= gpu_matrix.size2())
224 {
225 std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl;
226 return;
227 }
228
229 cpu_matrix(row, coords[offset]) = val;
230 }
231 }
232 }
233 }
234
235
236 /** @brief Copies a sparse matrix from the compute device to the host. The host type is the std::vector< std::map < > > format .
237 *
238 * @param gpu_matrix The sparse ell_matrix from ViennaCL
239 * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map.
240 */
241 template<typename NumericT, unsigned int AlignmentV, typename IndexT>
copy(const ell_matrix<NumericT,AlignmentV> & gpu_matrix,std::vector<std::map<IndexT,NumericT>> & cpu_matrix)242 void copy(const ell_matrix<NumericT, AlignmentV> & gpu_matrix,
243 std::vector< std::map<IndexT, NumericT> > & cpu_matrix)
244 {
245 if (cpu_matrix.size() == 0)
246 cpu_matrix.resize(gpu_matrix.size1());
247
248 assert(cpu_matrix.size() == gpu_matrix.size1() && bool("Matrix dimension mismatch!"));
249
250 tools::sparse_matrix_adapter<NumericT, IndexT> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2());
251 viennacl::copy(gpu_matrix, temp);
252 }
253
254 //
255 // Specify available operations:
256 //
257
258 /** \cond */
259
260 namespace linalg
261 {
262 namespace detail
263 {
264 // x = A * y
265 template<typename T, unsigned int A>
266 struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
267 {
applyviennacl::linalg::detail::op_executor268 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
269 {
270 // check for the special case x = A * x
271 if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
272 {
273 viennacl::vector<T> temp(lhs);
274 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
275 lhs = temp;
276 }
277 else
278 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0));
279 }
280 };
281
282 template<typename T, unsigned int A>
283 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
284 {
applyviennacl::linalg::detail::op_executor285 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
286 {
287 // check for the special case x += A * x
288 if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
289 {
290 viennacl::vector<T> temp(lhs);
291 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
292 lhs += temp;
293 }
294 else
295 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1));
296 }
297 };
298
299 template<typename T, unsigned int A>
300 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
301 {
applyviennacl::linalg::detail::op_executor302 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
303 {
304 // check for the special case x -= A * x
305 if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
306 {
307 viennacl::vector<T> temp(lhs);
308 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
309 lhs -= temp;
310 }
311 else
312 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1));
313 }
314 };
315
316
317 // x = A * vec_op
318 template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
319 struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
320 {
applyviennacl::linalg::detail::op_executor321 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
322 {
323 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
324 viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
325 }
326 };
327
328 // x = A * vec_op
329 template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
330 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
331 {
applyviennacl::linalg::detail::op_executor332 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
333 {
334 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
335 viennacl::vector<T> temp_result(lhs);
336 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
337 lhs += temp_result;
338 }
339 };
340
341 // x = A * vec_op
342 template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
343 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
344 {
applyviennacl::linalg::detail::op_executor345 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
346 {
347 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
348 viennacl::vector<T> temp_result(lhs);
349 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
350 lhs -= temp_result;
351 }
352 };
353
354 } // namespace detail
355 } // namespace linalg
356
357 /** \endcond */
358 }
359
360 #endif
361
362
363