1 #ifndef VIENNACL_ELL_MATRIX_HPP_
2 #define VIENNACL_ELL_MATRIX_HPP_
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 /** @file viennacl/ell_matrix.hpp
22     @brief Implementation of the ell_matrix class
23 
24     Contributed by Volodymyr Kysenko.
25 */
26 
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/vector.hpp"
30 
31 #include "viennacl/tools/tools.hpp"
32 
33 #include "viennacl/linalg/sparse_matrix_operations.hpp"
34 
35 namespace viennacl
36 {
37 /** @brief Sparse matrix class using the ELLPACK format for storing the nonzeros.
38     *
39     * This format works best for matrices where the number of nonzeros per row is mostly the same.
40     * Finite element and finite difference methods on nicely shaped domains often result in such a nonzero pattern.
41     * For a matrix
42     *
43     *   (1 2 0 0 0)
44     *   (2 3 4 0 0)
45     *   (0 5 6 0 7)
46     *   (0 0 8 9 0)
47     *
48     * the entries are layed out in chunks of size 3 as
49     *   (1 2 5 8; 2 3 6 9; 0 4 7 0)
50     * Note that this is a 'transposed' representation in order to maximize coalesced memory access.
51     */
52 template<typename NumericT, unsigned int AlignmentV /* see forwards.h for default argument */>
53 class ell_matrix
54 {
55 public:
56   typedef viennacl::backend::mem_handle                                                              handle_type;
57   typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType>   value_type;
58   typedef vcl_size_t                                                                                 size_type;
59 
ell_matrix()60   ell_matrix() : rows_(0), cols_(0), maxnnz_(0) {}
61 
ell_matrix(viennacl::context ctx)62   ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), maxnnz_(0)
63   {
64     coords_.switch_active_handle_id(ctx.memory_type());
65     elements_.switch_active_handle_id(ctx.memory_type());
66 
67 #ifdef VIENNACL_WITH_OPENCL
68     if (ctx.memory_type() == OPENCL_MEMORY)
69     {
70       coords_.opencl_handle().context(ctx.opencl_context());
71       elements_.opencl_handle().context(ctx.opencl_context());
72     }
73 #endif
74   }
75 
76   /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */
clear()77   void clear()
78   {
79     maxnnz_ = 0;
80 
81     viennacl::backend::typesafe_host_array<unsigned int> host_coords_buffer(coords_, internal_size1());
82     std::vector<NumericT> host_elements(internal_size1());
83 
84     viennacl::backend::memory_create(coords_,   host_coords_buffer.element_size() * internal_size1(), viennacl::traits::context(coords_),   host_coords_buffer.get());
85     viennacl::backend::memory_create(elements_, sizeof(NumericT) * internal_size1(),                  viennacl::traits::context(elements_), &(host_elements[0]));
86   }
87 
internal_size1() const88   vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, AlignmentV); }
internal_size2() const89   vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, AlignmentV); }
90 
size1() const91   vcl_size_t size1() const { return rows_; }
size2() const92   vcl_size_t size2() const { return cols_; }
93 
internal_maxnnz() const94   vcl_size_t internal_maxnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(maxnnz_, AlignmentV); }
maxnnz() const95   vcl_size_t maxnnz() const { return maxnnz_; }
96 
nnz() const97   vcl_size_t nnz() const { return rows_ * maxnnz_; }
internal_nnz() const98   vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); }
99 
handle()100   handle_type & handle()       { return elements_; }
handle() const101   const handle_type & handle() const { return elements_; }
102 
handle2()103   handle_type & handle2()       { return coords_; }
handle2() const104   const handle_type & handle2() const { return coords_; }
105 
106 #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment
107   template<typename CPUMatrixT>
108   friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix & gpu_matrix );
109 #else
110   template<typename CPUMatrixT, typename T, unsigned int ALIGN>
111   friend void copy(const CPUMatrixT & cpu_matrix, ell_matrix<T, ALIGN> & gpu_matrix );
112 #endif
113 
114 private:
115   vcl_size_t rows_;
116   vcl_size_t cols_;
117   vcl_size_t maxnnz_;
118 
119   handle_type coords_;
120   handle_type elements_;
121 };
122 
123 template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
copy(const CPUMatrixT & cpu_matrix,ell_matrix<NumericT,AlignmentV> & gpu_matrix)124 void copy(const CPUMatrixT& cpu_matrix, ell_matrix<NumericT, AlignmentV>& gpu_matrix )
125 {
126   assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
127   assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
128 
129   if (cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
130   {
131     //determine max capacity for row
132     vcl_size_t max_entries_per_row = 0;
133     for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
134     {
135       vcl_size_t num_entries = 0;
136       for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
137         ++num_entries;
138 
139       max_entries_per_row = std::max(max_entries_per_row, num_entries);
140     }
141 
142     //setup GPU matrix
143     gpu_matrix.maxnnz_ = max_entries_per_row;
144     gpu_matrix.rows_ = cpu_matrix.size1();
145     gpu_matrix.cols_ = cpu_matrix.size2();
146 
147     vcl_size_t nnz = gpu_matrix.internal_nnz();
148 
149     viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), nnz);
150     std::vector<NumericT> elements(nnz, 0);
151 
152     // std::cout << "ELL_MATRIX copy " << gpu_matrix.maxnnz_ << " " << gpu_matrix.rows_ << " " << gpu_matrix.cols_ << " "
153     //             << gpu_matrix.internal_maxnnz() << "\n";
154 
155     for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
156     {
157       vcl_size_t data_index = 0;
158 
159       for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
160       {
161         coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2());
162         elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it;
163         //std::cout << *col_it << "\n";
164         data_index++;
165       }
166     }
167 
168     viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(),                   traits::context(gpu_matrix.handle2()), coords.get());
169     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(NumericT) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0]));
170   }
171 }
172 
173 
174 
175 /** @brief Copies a sparse matrix from the host to the compute device. The host type is the std::vector< std::map < > > format .
176   *
177   * @param cpu_matrix   A sparse matrix on the host composed of an STL vector and an STL map.
178   * @param gpu_matrix   The sparse ell_matrix from ViennaCL
179   */
180 template<typename IndexT, typename NumericT, unsigned int AlignmentV>
copy(std::vector<std::map<IndexT,NumericT>> const & cpu_matrix,ell_matrix<NumericT,AlignmentV> & gpu_matrix)181 void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix,
182           ell_matrix<NumericT, AlignmentV> & gpu_matrix)
183 {
184   vcl_size_t max_col = 0;
185   for (vcl_size_t i=0; i<cpu_matrix.size(); ++i)
186   {
187     if (cpu_matrix[i].size() > 0)
188       max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
189   }
190 
191   viennacl::copy(tools::const_sparse_matrix_adapter<NumericT, IndexT>(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix);
192 }
193 
194 
195 
196 
197 
198 
199 template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
copy(const ell_matrix<NumericT,AlignmentV> & gpu_matrix,CPUMatrixT & cpu_matrix)200 void copy(const ell_matrix<NumericT, AlignmentV>& gpu_matrix, CPUMatrixT& cpu_matrix)
201 {
202   assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
203   assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
204 
205   if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
206   {
207     std::vector<NumericT> elements(gpu_matrix.internal_nnz());
208     viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz());
209 
210     viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * elements.size(), &(elements[0]));
211     viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get());
212 
213     for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++)
214     {
215       for (vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++)
216       {
217         vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;
218 
219         NumericT val = elements[offset];
220         if (val <= 0 && val >= 0) // val == 0 without compiler warnings
221           continue;
222 
223         if (coords[offset] >= gpu_matrix.size2())
224         {
225           std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl;
226           return;
227         }
228 
229         cpu_matrix(row, coords[offset]) = val;
230       }
231     }
232   }
233 }
234 
235 
236 /** @brief Copies a sparse matrix from the compute device to the host. The host type is the std::vector< std::map < > > format .
237   *
238   * @param gpu_matrix   The sparse ell_matrix from ViennaCL
239   * @param cpu_matrix   A sparse matrix on the host composed of an STL vector and an STL map.
240   */
241 template<typename NumericT, unsigned int AlignmentV, typename IndexT>
copy(const ell_matrix<NumericT,AlignmentV> & gpu_matrix,std::vector<std::map<IndexT,NumericT>> & cpu_matrix)242 void copy(const ell_matrix<NumericT, AlignmentV> & gpu_matrix,
243           std::vector< std::map<IndexT, NumericT> > & cpu_matrix)
244 {
245   if (cpu_matrix.size() == 0)
246     cpu_matrix.resize(gpu_matrix.size1());
247 
248   assert(cpu_matrix.size() == gpu_matrix.size1() && bool("Matrix dimension mismatch!"));
249 
250   tools::sparse_matrix_adapter<NumericT, IndexT> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2());
251   viennacl::copy(gpu_matrix, temp);
252 }
253 
254 //
255 // Specify available operations:
256 //
257 
258 /** \cond */
259 
260 namespace linalg
261 {
262 namespace detail
263 {
264   // x = A * y
265   template<typename T, unsigned int A>
266   struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
267   {
applyviennacl::linalg::detail::op_executor268     static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
269     {
270       // check for the special case x = A * x
271       if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
272       {
273         viennacl::vector<T> temp(lhs);
274         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
275         lhs = temp;
276       }
277       else
278         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0));
279     }
280   };
281 
282   template<typename T, unsigned int A>
283   struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
284   {
applyviennacl::linalg::detail::op_executor285     static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
286     {
287       // check for the special case x += A * x
288       if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
289       {
290         viennacl::vector<T> temp(lhs);
291         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
292         lhs += temp;
293       }
294       else
295         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1));
296     }
297   };
298 
299   template<typename T, unsigned int A>
300   struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
301   {
applyviennacl::linalg::detail::op_executor302     static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
303     {
304       // check for the special case x -= A * x
305       if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
306       {
307         viennacl::vector<T> temp(lhs);
308         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
309         lhs -= temp;
310       }
311       else
312         viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1));
313     }
314   };
315 
316 
317   // x = A * vec_op
318   template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
319   struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
320   {
applyviennacl::linalg::detail::op_executor321     static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
322     {
323       viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
324       viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
325     }
326   };
327 
328   // x = A * vec_op
329   template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
330   struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
331   {
applyviennacl::linalg::detail::op_executor332     static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
333     {
334       viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
335       viennacl::vector<T> temp_result(lhs);
336       viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
337       lhs += temp_result;
338     }
339   };
340 
341   // x = A * vec_op
342   template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
343   struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
344   {
applyviennacl::linalg::detail::op_executor345     static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
346     {
347       viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
348       viennacl::vector<T> temp_result(lhs);
349       viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
350       lhs -= temp_result;
351     }
352   };
353 
354 } // namespace detail
355 } // namespace linalg
356 
357 /** \endcond */
358 }
359 
360 #endif
361 
362 
363