1 #ifndef VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 /** @file viennacl/linalg/matrix_operations.hpp
22     @brief Implementations of dense matrix related operations including matrix-vector products.
23 */
24 
25 #include "viennacl/forwards.h"
26 #include "viennacl/scalar.hpp"
27 #include "viennacl/vector.hpp"
28 #include "viennacl/vector_proxy.hpp"
29 #include "viennacl/tools/tools.hpp"
30 #include "viennacl/meta/enable_if.hpp"
31 #include "viennacl/meta/predicate.hpp"
32 #include "viennacl/meta/result_of.hpp"
33 #include "viennacl/traits/size.hpp"
34 #include "viennacl/traits/start.hpp"
35 #include "viennacl/traits/handle.hpp"
36 #include "viennacl/traits/stride.hpp"
37 #include "viennacl/vector.hpp"
38 #include "viennacl/linalg/host_based/matrix_operations.hpp"
39 
40 #ifdef VIENNACL_WITH_OPENCL
41   #include "viennacl/linalg/opencl/matrix_operations.hpp"
42 #endif
43 
44 #ifdef VIENNACL_WITH_CUDA
45   #include "viennacl/linalg/cuda/matrix_operations.hpp"
46 #endif
47 
48 namespace viennacl
49 {
50   namespace linalg
51   {
52 
53     template<typename DestNumericT, typename SrcNumericT>
convert(matrix_base<DestNumericT> & dest,matrix_base<SrcNumericT> const & src)54     void convert(matrix_base<DestNumericT> & dest, matrix_base<SrcNumericT> const & src)
55     {
56       assert(viennacl::traits::size1(dest) == viennacl::traits::size1(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size1(m1) != size1(m2)"));
57       assert(viennacl::traits::size2(dest) == viennacl::traits::size2(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size2(m1) != size2(m2)"));
58 
59       switch (viennacl::traits::handle(dest).get_active_handle_id())
60       {
61         case viennacl::MAIN_MEMORY:
62           viennacl::linalg::host_based::convert(dest, src);
63           break;
64 #ifdef VIENNACL_WITH_OPENCL
65         case viennacl::OPENCL_MEMORY:
66           viennacl::linalg::opencl::convert(dest, src);
67           break;
68 #endif
69 #ifdef VIENNACL_WITH_CUDA
70         case viennacl::CUDA_MEMORY:
71           viennacl::linalg::cuda::convert(dest, src);
72           break;
73 #endif
74         case viennacl::MEMORY_NOT_INITIALIZED:
75           throw memory_exception("not initialised!");
76         default:
77           throw memory_exception("not implemented");
78       }
79     }
80 
81     template<typename NumericT,
82               typename SizeT, typename DistanceT>
trans(const matrix_expression<const matrix_base<NumericT,SizeT,DistanceT>,const matrix_base<NumericT,SizeT,DistanceT>,op_trans> & proxy,matrix_base<NumericT> & temp_trans)83     void trans(const matrix_expression<const matrix_base<NumericT, SizeT, DistanceT>,const matrix_base<NumericT, SizeT, DistanceT>, op_trans> & proxy,
84               matrix_base<NumericT> & temp_trans)
85     {
86       switch (viennacl::traits::handle(proxy).get_active_handle_id())
87       {
88         case viennacl::MAIN_MEMORY:
89           viennacl::linalg::host_based::trans(proxy, temp_trans);
90           break;
91 #ifdef VIENNACL_WITH_OPENCL
92         case viennacl::OPENCL_MEMORY:
93           viennacl::linalg::opencl::trans(proxy,temp_trans);
94           break;
95 #endif
96 #ifdef VIENNACL_WITH_CUDA
97         case viennacl::CUDA_MEMORY:
98           viennacl::linalg::cuda::trans(proxy,temp_trans);
99           break;
100 #endif
101         case viennacl::MEMORY_NOT_INITIALIZED:
102           throw memory_exception("not initialised!");
103         default:
104           throw memory_exception("not implemented");
105       }
106     }
107 
108 
109     template<typename NumericT,
110               typename ScalarType1>
am(matrix_base<NumericT> & mat1,matrix_base<NumericT> const & mat2,ScalarType1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha)111     void am(matrix_base<NumericT> & mat1,
112             matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
113     {
114       switch (viennacl::traits::handle(mat1).get_active_handle_id())
115       {
116         case viennacl::MAIN_MEMORY:
117           viennacl::linalg::host_based::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
118           break;
119 #ifdef VIENNACL_WITH_OPENCL
120         case viennacl::OPENCL_MEMORY:
121           viennacl::linalg::opencl::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
122           break;
123 #endif
124 #ifdef VIENNACL_WITH_CUDA
125         case viennacl::CUDA_MEMORY:
126           viennacl::linalg::cuda::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha);
127           break;
128 #endif
129         case viennacl::MEMORY_NOT_INITIALIZED:
130           throw memory_exception("not initialised!");
131         default:
132           throw memory_exception("not implemented");
133       }
134     }
135 
136 
137     template<typename NumericT,
138               typename ScalarType1, typename ScalarType2>
ambm(matrix_base<NumericT> & mat1,matrix_base<NumericT> const & mat2,ScalarType1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha,matrix_base<NumericT> const & mat3,ScalarType2 const & beta,vcl_size_t len_beta,bool reciprocal_beta,bool flip_sign_beta)139     void ambm(matrix_base<NumericT> & mat1,
140               matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
141               matrix_base<NumericT> const & mat3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
142     {
143       switch (viennacl::traits::handle(mat1).get_active_handle_id())
144       {
145         case viennacl::MAIN_MEMORY:
146           viennacl::linalg::host_based::ambm(mat1,
147                                              mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
148                                              mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
149           break;
150 #ifdef VIENNACL_WITH_OPENCL
151         case viennacl::OPENCL_MEMORY:
152           viennacl::linalg::opencl::ambm(mat1,
153                                          mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
154                                          mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
155           break;
156 #endif
157 #ifdef VIENNACL_WITH_CUDA
158         case viennacl::CUDA_MEMORY:
159           viennacl::linalg::cuda::ambm(mat1,
160                                        mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
161                                        mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
162           break;
163 #endif
164         case viennacl::MEMORY_NOT_INITIALIZED:
165           throw memory_exception("not initialised!");
166         default:
167           throw memory_exception("not implemented");
168       }
169     }
170 
171 
172     template<typename NumericT,
173               typename ScalarType1, typename ScalarType2>
ambm_m(matrix_base<NumericT> & mat1,matrix_base<NumericT> const & mat2,ScalarType1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha,matrix_base<NumericT> const & mat3,ScalarType2 const & beta,vcl_size_t len_beta,bool reciprocal_beta,bool flip_sign_beta)174     void ambm_m(matrix_base<NumericT> & mat1,
175                 matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
176                 matrix_base<NumericT> const & mat3, ScalarType2 const & beta,  vcl_size_t len_beta,  bool reciprocal_beta,  bool flip_sign_beta)
177     {
178       switch (viennacl::traits::handle(mat1).get_active_handle_id())
179       {
180         case viennacl::MAIN_MEMORY:
181           viennacl::linalg::host_based::ambm_m(mat1,
182                                                mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
183                                                mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
184           break;
185 #ifdef VIENNACL_WITH_OPENCL
186         case viennacl::OPENCL_MEMORY:
187           viennacl::linalg::opencl::ambm_m(mat1,
188                                            mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
189                                            mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
190           break;
191 #endif
192 #ifdef VIENNACL_WITH_CUDA
193         case viennacl::CUDA_MEMORY:
194           viennacl::linalg::cuda::ambm_m(mat1,
195                                          mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
196                                          mat3,  beta, len_beta,  reciprocal_beta,  flip_sign_beta);
197           break;
198 #endif
199         case viennacl::MEMORY_NOT_INITIALIZED:
200           throw memory_exception("not initialised!");
201         default:
202           throw memory_exception("not implemented");
203       }
204     }
205 
206 
207     template<typename NumericT>
matrix_assign(matrix_base<NumericT> & mat,NumericT s,bool clear=false)208     void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false)
209     {
210       switch (viennacl::traits::handle(mat).get_active_handle_id())
211       {
212         case viennacl::MAIN_MEMORY:
213           viennacl::linalg::host_based::matrix_assign(mat, s, clear);
214           break;
215 #ifdef VIENNACL_WITH_OPENCL
216         case viennacl::OPENCL_MEMORY:
217           viennacl::linalg::opencl::matrix_assign(mat, s, clear);
218           break;
219 #endif
220 #ifdef VIENNACL_WITH_CUDA
221         case viennacl::CUDA_MEMORY:
222           viennacl::linalg::cuda::matrix_assign(mat, s, clear);
223           break;
224 #endif
225         case viennacl::MEMORY_NOT_INITIALIZED:
226           throw memory_exception("not initialised!");
227         default:
228           throw memory_exception("not implemented");
229       }
230     }
231 
232 
233     template<typename NumericT>
matrix_diagonal_assign(matrix_base<NumericT> & mat,NumericT s)234     void matrix_diagonal_assign(matrix_base<NumericT> & mat, NumericT s)
235     {
236       switch (viennacl::traits::handle(mat).get_active_handle_id())
237       {
238         case viennacl::MAIN_MEMORY:
239           viennacl::linalg::host_based::matrix_diagonal_assign(mat, s);
240           break;
241 #ifdef VIENNACL_WITH_OPENCL
242         case viennacl::OPENCL_MEMORY:
243           viennacl::linalg::opencl::matrix_diagonal_assign(mat, s);
244           break;
245 #endif
246 #ifdef VIENNACL_WITH_CUDA
247         case viennacl::CUDA_MEMORY:
248           viennacl::linalg::cuda::matrix_diagonal_assign(mat, s);
249           break;
250 #endif
251         case viennacl::MEMORY_NOT_INITIALIZED:
252           throw memory_exception("not initialised!");
253         default:
254           throw memory_exception("not implemented");
255       }
256     }
257 
258 
259     /** @brief Dispatcher interface for A = diag(v, k) */
260     template<typename NumericT>
matrix_diag_from_vector(const vector_base<NumericT> & v,int k,matrix_base<NumericT> & A)261     void matrix_diag_from_vector(const vector_base<NumericT> & v, int k, matrix_base<NumericT> & A)
262     {
263       switch (viennacl::traits::handle(v).get_active_handle_id())
264       {
265         case viennacl::MAIN_MEMORY:
266           viennacl::linalg::host_based::matrix_diag_from_vector(v, k, A);
267           break;
268 #ifdef VIENNACL_WITH_OPENCL
269         case viennacl::OPENCL_MEMORY:
270           viennacl::linalg::opencl::matrix_diag_from_vector(v, k, A);
271           break;
272 #endif
273 #ifdef VIENNACL_WITH_CUDA
274         case viennacl::CUDA_MEMORY:
275           viennacl::linalg::cuda::matrix_diag_from_vector(v, k, A);
276           break;
277 #endif
278         case viennacl::MEMORY_NOT_INITIALIZED:
279           throw memory_exception("not initialised!");
280         default:
281           throw memory_exception("not implemented");
282       }
283     }
284 
285     /** @brief Dispatcher interface for v = diag(A, k) */
286     template<typename NumericT>
matrix_diag_to_vector(const matrix_base<NumericT> & A,int k,vector_base<NumericT> & v)287     void matrix_diag_to_vector(const matrix_base<NumericT> & A, int k, vector_base<NumericT> & v)
288     {
289       switch (viennacl::traits::handle(A).get_active_handle_id())
290       {
291         case viennacl::MAIN_MEMORY:
292           viennacl::linalg::host_based::matrix_diag_to_vector(A, k, v);
293           break;
294 #ifdef VIENNACL_WITH_OPENCL
295         case viennacl::OPENCL_MEMORY:
296           viennacl::linalg::opencl::matrix_diag_to_vector(A, k, v);
297           break;
298 #endif
299 #ifdef VIENNACL_WITH_CUDA
300         case viennacl::CUDA_MEMORY:
301           viennacl::linalg::cuda::matrix_diag_to_vector(A, k, v);
302           break;
303 #endif
304         case viennacl::MEMORY_NOT_INITIALIZED:
305           throw memory_exception("not initialised!");
306         default:
307           throw memory_exception("not implemented");
308       }
309     }
310 
311     template<typename NumericT>
matrix_row(const matrix_base<NumericT> & A,unsigned int i,vector_base<NumericT> & v)312     void matrix_row(const matrix_base<NumericT> & A, unsigned int i, vector_base<NumericT> & v)
313     {
314       switch (viennacl::traits::handle(A).get_active_handle_id())
315       {
316         case viennacl::MAIN_MEMORY:
317           viennacl::linalg::host_based::matrix_row(A, i, v);
318           break;
319 #ifdef VIENNACL_WITH_OPENCL
320         case viennacl::OPENCL_MEMORY:
321           viennacl::linalg::opencl::matrix_row(A, i, v);
322           break;
323 #endif
324 #ifdef VIENNACL_WITH_CUDA
325         case viennacl::CUDA_MEMORY:
326           viennacl::linalg::cuda::matrix_row(A, i, v);
327           break;
328 #endif
329         case viennacl::MEMORY_NOT_INITIALIZED:
330           throw memory_exception("not initialised!");
331         default:
332           throw memory_exception("not implemented");
333       }
334     }
335 
336     template<typename NumericT>
matrix_column(const matrix_base<NumericT> & A,unsigned int j,vector_base<NumericT> & v)337     void matrix_column(const matrix_base<NumericT> & A, unsigned int j, vector_base<NumericT> & v)
338     {
339       switch (viennacl::traits::handle(A).get_active_handle_id())
340       {
341         case viennacl::MAIN_MEMORY:
342           viennacl::linalg::host_based::matrix_column(A, j, v);
343           break;
344 #ifdef VIENNACL_WITH_OPENCL
345         case viennacl::OPENCL_MEMORY:
346           viennacl::linalg::opencl::matrix_column(A, j, v);
347           break;
348 #endif
349 #ifdef VIENNACL_WITH_CUDA
350         case viennacl::CUDA_MEMORY:
351           viennacl::linalg::cuda::matrix_column(A, j, v);
352           break;
353 #endif
354         case viennacl::MEMORY_NOT_INITIALIZED:
355           throw memory_exception("not initialised!");
356         default:
357           throw memory_exception("not implemented");
358       }
359     }
360 
361     /** @brief Computes the Frobenius norm of a matrix - dispatcher interface
362     *
363     * @param A      The matrix
364     * @param result The result scalar
365     *
366     * Note that if A is strided or off-set, then a copy will be created.
367     */
368     template<typename T>
norm_frobenius_impl(matrix_base<T> const & A,scalar<T> & result)369     void norm_frobenius_impl(matrix_base<T> const & A,
370                              scalar<T> & result)
371     {
372       typedef typename matrix_base<T>::handle_type  HandleType;
373 
374       if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) {
375         if (A.row_major()) {
376           viennacl::matrix<T, viennacl::row_major> temp_A(A);
377           viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
378           norm_2_impl(temp, result);
379         } else {
380           viennacl::matrix<T, viennacl::column_major> temp_A(A);
381           viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
382           norm_2_impl(temp, result);
383         }
384       } else {
385         viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1);
386         norm_2_impl(temp, result);
387       }
388 
389     }
390 
391     /** @brief Computes the Frobenius norm of a vector with final reduction on the CPU
392     *
393     * @param A      The matrix
394     * @param result The result scalar
395     *
396     * Note that if A is strided or off-set, then a copy will be created.
397     */
398     template<typename T>
norm_frobenius_cpu(matrix_base<T> const & A,T & result)399     void norm_frobenius_cpu(matrix_base<T> const & A,
400                             T & result)
401     {
402       typedef typename matrix_base<T>::handle_type  HandleType;
403 
404       if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) {
405         if (A.row_major()) {
406           viennacl::matrix<T, viennacl::row_major> temp_A(A);
407           viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
408           norm_2_cpu(temp, result);
409         } else {
410           viennacl::matrix<T, viennacl::column_major> temp_A(A);
411           viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1);
412           norm_2_cpu(temp, result);
413         }
414       } else {
415         viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1);
416         norm_2_cpu(temp, result);
417       }
418 
419     }
420 
421     //
422     /////////////////////////   matrix-vector products /////////////////////////////////
423     //
424 
425 
426 
427     // A * x
428 
429     /** @brief Carries out matrix-vector multiplication
430     *
431     * Implementation of the convenience expression result = prod(mat, vec);
432     *
433     * @param mat    The matrix
434     * @param vec    The vector
435     * @param result The result vector
436     */
437     template<typename NumericT>
prod_impl(const matrix_base<NumericT> & mat,const vector_base<NumericT> & vec,vector_base<NumericT> & result)438     void prod_impl(const matrix_base<NumericT> & mat,
439                    const vector_base<NumericT> & vec,
440                          vector_base<NumericT> & result)
441     {
442       assert( (viennacl::traits::size1(mat) == viennacl::traits::size(result)) && bool("Size check failed at v1 = prod(A, v2): size1(A) != size(v1)"));
443       assert( (viennacl::traits::size2(mat) == viennacl::traits::size(vec))    && bool("Size check failed at v1 = prod(A, v2): size2(A) != size(v2)"));
444 
445       switch (viennacl::traits::handle(mat).get_active_handle_id())
446       {
447         case viennacl::MAIN_MEMORY:
448           viennacl::linalg::host_based::prod_impl(mat, false, vec, result);
449           break;
450 #ifdef VIENNACL_WITH_OPENCL
451         case viennacl::OPENCL_MEMORY:
452           viennacl::linalg::opencl::prod_impl(mat, false, vec, result);
453           break;
454 #endif
455 #ifdef VIENNACL_WITH_CUDA
456         case viennacl::CUDA_MEMORY:
457           viennacl::linalg::cuda::prod_impl(mat, false, vec, result);
458           break;
459 #endif
460         case viennacl::MEMORY_NOT_INITIALIZED:
461           throw memory_exception("not initialised!");
462         default:
463           throw memory_exception("not implemented");
464       }
465     }
466 
467 
468     // trans(A) * x
469 
470     /** @brief Carries out matrix-vector multiplication with a transposed matrix
471     *
472     * Implementation of the convenience expression result = trans(mat) * vec;
473     *
474     * @param mat_trans  The transposed matrix proxy
475     * @param vec        The vector
476     * @param result     The result vector
477     */
478     template<typename NumericT>
prod_impl(const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & mat_trans,const vector_base<NumericT> & vec,vector_base<NumericT> & result)479     void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans,
480                    const vector_base<NumericT> & vec,
481                          vector_base<NumericT> & result)
482     {
483       assert( (viennacl::traits::size1(mat_trans.lhs()) == viennacl::traits::size(vec))    && bool("Size check failed at v1 = trans(A) * v2: size1(A) != size(v2)"));
484       assert( (viennacl::traits::size2(mat_trans.lhs()) == viennacl::traits::size(result)) && bool("Size check failed at v1 = trans(A) * v2: size2(A) != size(v1)"));
485 
486       switch (viennacl::traits::handle(mat_trans.lhs()).get_active_handle_id())
487       {
488         case viennacl::MAIN_MEMORY:
489           viennacl::linalg::host_based::prod_impl(mat_trans.lhs(), true, vec, result);
490           break;
491 #ifdef VIENNACL_WITH_OPENCL
492         case viennacl::OPENCL_MEMORY:
493           viennacl::linalg::opencl::prod_impl(mat_trans.lhs(), true, vec, result);
494           break;
495 #endif
496 #ifdef VIENNACL_WITH_CUDA
497         case viennacl::CUDA_MEMORY:
498           viennacl::linalg::cuda::prod_impl(mat_trans.lhs(), true, vec, result);
499           break;
500 #endif
501         case viennacl::MEMORY_NOT_INITIALIZED:
502           throw memory_exception("not initialised!");
503         default:
504           throw memory_exception("not implemented");
505       }
506     }
507 
508 
509     //
510     /////////////////////////   matrix-matrix products /////////////////////////////////
511     //
512 
513     /** @brief Carries out matrix-matrix multiplication
514     *
515     * Implementation of C = prod(A, B);
516     *
517     */
518     template<typename NumericT, typename ScalarType >
prod_impl(const matrix_base<NumericT> & A,const matrix_base<NumericT> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)519     void prod_impl(const matrix_base<NumericT> & A,
520                    const matrix_base<NumericT> & B,
521                          matrix_base<NumericT> & C,
522                    ScalarType alpha,
523                    ScalarType beta)
524     {
525       assert( (viennacl::traits::size1(A) == viennacl::traits::size1(C)) && bool("Size check failed at C = prod(A, B): size1(A) != size1(C)"));
526       assert( (viennacl::traits::size2(A) == viennacl::traits::size1(B)) && bool("Size check failed at C = prod(A, B): size2(A) != size1(B)"));
527       assert( (viennacl::traits::size2(B) == viennacl::traits::size2(C)) && bool("Size check failed at C = prod(A, B): size2(B) != size2(C)"));
528 
529 
530       switch (viennacl::traits::handle(A).get_active_handle_id())
531       {
532         case viennacl::MAIN_MEMORY:
533           viennacl::linalg::host_based::prod_impl(A, false, B, false, C, alpha, beta);
534           break;
535 #ifdef VIENNACL_WITH_OPENCL
536         case viennacl::OPENCL_MEMORY:
537           viennacl::linalg::opencl::prod_impl(A, false, B, false, C, alpha, beta);
538           break;
539 #endif
540 #ifdef VIENNACL_WITH_CUDA
541         case viennacl::CUDA_MEMORY:
542           viennacl::linalg::cuda::prod_impl(A, false, B, false, C, alpha, beta);
543           break;
544 #endif
545         case viennacl::MEMORY_NOT_INITIALIZED:
546           throw memory_exception("not initialised!");
547         default:
548           throw memory_exception("not implemented");
549       }
550     }
551 
552 
553 
554     /** @brief Carries out matrix-matrix multiplication
555     *
556     * Implementation of C = prod(trans(A), B);
557     *
558     */
559     template<typename NumericT, typename ScalarType >
prod_impl(const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & A,const matrix_base<NumericT> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)560     void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>,
561                                                       const matrix_base<NumericT>,
562                                                       op_trans> & A,
563                    const matrix_base<NumericT> & B,
564                          matrix_base<NumericT> & C,
565                    ScalarType alpha,
566                    ScalarType beta)
567     {
568       assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(trans(A), B): size2(A) != size1(C)"));
569       assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size1(B) && bool("Size check failed at C = prod(trans(A), B): size1(A) != size1(B)"));
570       assert(viennacl::traits::size2(B)       == viennacl::traits::size2(C) && bool("Size check failed at C = prod(trans(A), B): size2(B) != size2(C)"));
571 
572       switch (viennacl::traits::handle(A.lhs()).get_active_handle_id())
573       {
574         case viennacl::MAIN_MEMORY:
575           viennacl::linalg::host_based::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
576           break;
577 #ifdef VIENNACL_WITH_OPENCL
578         case viennacl::OPENCL_MEMORY:
579           viennacl::linalg::opencl::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
580           break;
581 #endif
582 #ifdef VIENNACL_WITH_CUDA
583         case viennacl::CUDA_MEMORY:
584           viennacl::linalg::cuda::prod_impl(A.lhs(), true, B, false, C, alpha, beta);
585           break;
586 #endif
587         case viennacl::MEMORY_NOT_INITIALIZED:
588           throw memory_exception("not initialised!");
589         default:
590           throw memory_exception("not implemented");
591       }
592     }
593 
594 
595 
596 
597     /** @brief Carries out matrix-matrix multiplication
598     *
599     * Implementation of C = prod(A, trans(B));
600     *
601     */
602     template<typename NumericT, typename ScalarType >
prod_impl(const matrix_base<NumericT> & A,const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)603     void prod_impl(const matrix_base<NumericT> & A,
604                    const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B,
605                          matrix_base<NumericT> & C,
606                    ScalarType alpha,
607                    ScalarType beta)
608     {
609       assert(viennacl::traits::size1(A)       == viennacl::traits::size1(C)       && bool("Size check failed at C = prod(A, trans(B)): size1(A) != size1(C)"));
610       assert(viennacl::traits::size2(A)       == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(A, trans(B)): size2(A) != size2(B)"));
611       assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C)       && bool("Size check failed at C = prod(A, trans(B)): size1(B) != size2(C)"));
612 
613       switch (viennacl::traits::handle(A).get_active_handle_id())
614       {
615         case viennacl::MAIN_MEMORY:
616           viennacl::linalg::host_based::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
617           break;
618 #ifdef VIENNACL_WITH_OPENCL
619         case viennacl::OPENCL_MEMORY:
620           viennacl::linalg::opencl::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
621           break;
622 #endif
623 #ifdef VIENNACL_WITH_CUDA
624         case viennacl::CUDA_MEMORY:
625           viennacl::linalg::cuda::prod_impl(A, false, B.lhs(), true, C, alpha, beta);
626           break;
627 #endif
628         case viennacl::MEMORY_NOT_INITIALIZED:
629           throw memory_exception("not initialised!");
630         default:
631           throw memory_exception("not implemented");
632       }
633     }
634 
635 
636 
637     /** @brief Carries out matrix-matrix multiplication
638     *
639     * Implementation of C = prod(trans(A), trans(B));
640     *
641     */
642     template<typename NumericT, typename ScalarType >
prod_impl(const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & A,const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)643     void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & A,
644                    const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B,
645                    matrix_base<NumericT> & C,
646                    ScalarType alpha,
647                    ScalarType beta)
648     {
649       assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C)       && bool("Size check failed at C = prod(trans(A), trans(B)): size2(A) != size1(C)"));
650       assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(trans(A), trans(B)): size1(A) != size2(B)"));
651       assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C)       && bool("Size check failed at C = prod(trans(A), trans(B)): size1(B) != size2(C)"));
652 
653       switch (viennacl::traits::handle(A.lhs()).get_active_handle_id())
654       {
655         case viennacl::MAIN_MEMORY:
656           viennacl::linalg::host_based::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
657           break;
658 #ifdef VIENNACL_WITH_OPENCL
659         case viennacl::OPENCL_MEMORY:
660           viennacl::linalg::opencl::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
661           break;
662 #endif
663 #ifdef VIENNACL_WITH_CUDA
664         case viennacl::CUDA_MEMORY:
665           viennacl::linalg::cuda::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta);
666           break;
667 #endif
668         case viennacl::MEMORY_NOT_INITIALIZED:
669           throw memory_exception("not initialised!");
670         default:
671           throw memory_exception("not implemented");
672       }
673     }
674 
675 
676     ///////////////////////// summation operations /////////////
677 
678     template<typename NumericT>
row_sum_impl(matrix_base<NumericT> const & A,vector_base<NumericT> & result)679     void row_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result)
680     {
681       viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size2(), NumericT(1), viennacl::traits::context(A));
682       viennacl::linalg::prod_impl(A, all_ones, result);
683     }
684 
685     template<typename NumericT>
column_sum_impl(matrix_base<NumericT> const & A,vector_base<NumericT> & result)686     void column_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result)
687     {
688       viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size1(), NumericT(1), viennacl::traits::context(A));
689       viennacl::linalg::prod_impl(matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(A, A), all_ones, result);
690     }
691 
692     ///////////////////////// Elementwise operations /////////////
693 
694 
695 
696     /** @brief Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syntax). Don't use this function directly, use element_prod() and element_div().
697     *
698     * @param A      The result matrix (or -range, or -slice)
699     * @param proxy  The proxy object holding B, C, and the operation
700     */
701     template<typename T, typename OP>
element_op(matrix_base<T> & A,matrix_expression<const matrix_base<T>,const matrix_base<T>,OP> const & proxy)702     void element_op(matrix_base<T> & A,
703                     matrix_expression<const matrix_base<T>, const matrix_base<T>, OP> const & proxy)
704     {
705       assert( (viennacl::traits::size1(A) == viennacl::traits::size1(proxy)) && bool("Size check failed at A = element_op(B): size1(A) != size1(B)"));
706       assert( (viennacl::traits::size2(A) == viennacl::traits::size2(proxy)) && bool("Size check failed at A = element_op(B): size2(A) != size2(B)"));
707 
708       switch (viennacl::traits::handle(A).get_active_handle_id())
709       {
710         case viennacl::MAIN_MEMORY:
711           viennacl::linalg::host_based::element_op(A, proxy);
712           break;
713 #ifdef VIENNACL_WITH_OPENCL
714         case viennacl::OPENCL_MEMORY:
715           viennacl::linalg::opencl::element_op(A, proxy);
716           break;
717 #endif
718 #ifdef VIENNACL_WITH_CUDA
719         case viennacl::CUDA_MEMORY:
720           viennacl::linalg::cuda::element_op(A, proxy);
721           break;
722 #endif
723         case viennacl::MEMORY_NOT_INITIALIZED:
724           throw memory_exception("not initialised!");
725         default:
726           throw memory_exception("not implemented");
727       }
728     }
729 
730 
731 #define VIENNACL_MAKE_BINARY_OP(OPNAME)\
732     template<typename T>\
733     viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >\
734     element_##OPNAME(matrix_base<T> const & A, matrix_base<T> const & B)\
735     {\
736       return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >(A, B);\
737     }\
738 \
739     template<typename M1, typename M2, typename OP, typename T>\
740     viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\
741                                 const matrix_base<T>,\
742                                 op_element_binary<op_##OPNAME> >\
743     element_##OPNAME(matrix_expression<const M1, const M2, OP> const & proxy, matrix_base<T> const & B)\
744     {\
745       return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\
746                                          const matrix_base<T>,\
747                                          op_element_binary<op_##OPNAME> >(proxy, B);\
748     }\
749 \
750     template<typename T, typename M2, typename M3, typename OP>\
751     viennacl::matrix_expression<const matrix_base<T>,\
752                                 const matrix_expression<const M2, const M3, OP>,\
753                                 op_element_binary<op_##OPNAME> >\
754     element_##OPNAME(matrix_base<T> const & A, matrix_expression<const M2, const M3, OP> const & proxy)\
755     {\
756       return viennacl::matrix_expression<const matrix_base<T>,\
757                                          const matrix_expression<const M2, const M3, OP>,\
758                                          op_element_binary<op_##OPNAME> >(A, proxy);\
759     }\
760 \
761     template<typename M1, typename M2, typename OP1,\
762               typename M3, typename M4, typename OP2>\
763     viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\
764                                 const matrix_expression<const M3, const M4, OP2>,\
765                                 op_element_binary<op_##OPNAME> >\
766     element_##OPNAME(matrix_expression<const M1, const M2, OP1> const & proxy1,\
767                  matrix_expression<const M3, const M4, OP2> const & proxy2)\
768     {\
769       return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\
770                                          const matrix_expression<const M3, const M4, OP2>,\
771                                          op_element_binary<op_##OPNAME> >(proxy1, proxy2);\
772     }
773 
774     VIENNACL_MAKE_BINARY_OP(prod)
VIENNACL_MAKE_BINARY_OP(div)775     VIENNACL_MAKE_BINARY_OP(div)
776     VIENNACL_MAKE_BINARY_OP(pow)
777 
778     VIENNACL_MAKE_BINARY_OP(eq)
779     VIENNACL_MAKE_BINARY_OP(neq)
780     VIENNACL_MAKE_BINARY_OP(greater)
781     VIENNACL_MAKE_BINARY_OP(less)
782     VIENNACL_MAKE_BINARY_OP(geq)
783     VIENNACL_MAKE_BINARY_OP(leq)
784 
785 #undef VIENNACL_GENERATE_BINARY_OP_OVERLOADS
786 
787 
788 
789 #define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \
790     template<typename T> \
791     viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> > \
792     element_##funcname(matrix_base<T> const & A) \
793     { \
794       return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> >(A, A); \
795     } \
796     template<typename LHS, typename RHS, typename OP> \
797     viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \
798                                 const matrix_expression<const LHS, const RHS, OP>, \
799                                 op_element_unary<op_##funcname> > \
800     element_##funcname(matrix_expression<const LHS, const RHS, OP> const & proxy) \
801     { \
802       return viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \
803                                          const matrix_expression<const LHS, const RHS, OP>, \
804                                          op_element_unary<op_##funcname> >(proxy, proxy); \
805     } \
806 
807     VIENNACL_MAKE_UNARY_ELEMENT_OP(abs)
808     VIENNACL_MAKE_UNARY_ELEMENT_OP(acos)
809     VIENNACL_MAKE_UNARY_ELEMENT_OP(asin)
810     VIENNACL_MAKE_UNARY_ELEMENT_OP(atan)
811     VIENNACL_MAKE_UNARY_ELEMENT_OP(ceil)
812     VIENNACL_MAKE_UNARY_ELEMENT_OP(cos)
813     VIENNACL_MAKE_UNARY_ELEMENT_OP(cosh)
814     VIENNACL_MAKE_UNARY_ELEMENT_OP(exp)
815     VIENNACL_MAKE_UNARY_ELEMENT_OP(fabs)
816     VIENNACL_MAKE_UNARY_ELEMENT_OP(floor)
817     VIENNACL_MAKE_UNARY_ELEMENT_OP(log)
818     VIENNACL_MAKE_UNARY_ELEMENT_OP(log10)
819     VIENNACL_MAKE_UNARY_ELEMENT_OP(sin)
820     VIENNACL_MAKE_UNARY_ELEMENT_OP(sinh)
821     VIENNACL_MAKE_UNARY_ELEMENT_OP(sqrt)
822     VIENNACL_MAKE_UNARY_ELEMENT_OP(tan)
823     VIENNACL_MAKE_UNARY_ELEMENT_OP(tanh)
824 
825 #undef VIENNACL_MAKE_UNARY_ELEMENT_OP
826 
827 
828     //
829     /////////////////////////   miscellaneous operations /////////////////////////////////
830     //
831 
832 
833     /** @brief Returns a proxy class for the operation mat += vec1 * vec2^T, i.e. a rank 1 update
834     *
835     * @param vec1    The first vector
836     * @param vec2    The second vector
837     */
838     template<typename NumericT>
839     viennacl::matrix_expression<const vector_base<NumericT>, const vector_base<NumericT>, op_prod>
840     outer_prod(const vector_base<NumericT> & vec1, const vector_base<NumericT> & vec2)
841     {
842       return viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>(vec1, vec2);
843     }
844 
845 
846     /** @brief The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update
847     *
848     * Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);
849     *
850     * @param mat1             The matrix to be updated
851     * @param alpha            The scaling factor (either a viennacl::scalar<>, float, or double)
852     * @param len_alpha        Length of the buffer for an eventual final reduction step (currently always '1')
853     * @param reciprocal_alpha Use 1/alpha instead of alpha
854     * @param flip_sign_alpha  Use -alpha instead of alpha
855     * @param vec1             The first vector
856     * @param vec2             The second vector
857     */
858     template<typename NumericT, typename S1>
scaled_rank_1_update(matrix_base<NumericT> & mat1,S1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha,const vector_base<NumericT> & vec1,const vector_base<NumericT> & vec2)859     void scaled_rank_1_update(matrix_base<NumericT> & mat1,
860                               S1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha,
861                               const vector_base<NumericT> & vec1,
862                               const vector_base<NumericT> & vec2)
863     {
864       switch (viennacl::traits::handle(mat1).get_active_handle_id())
865       {
866         case viennacl::MAIN_MEMORY:
867           viennacl::linalg::host_based::scaled_rank_1_update(mat1,
868                                                              alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
869                                                              vec1, vec2);
870           break;
871 #ifdef VIENNACL_WITH_OPENCL
872         case viennacl::OPENCL_MEMORY:
873           viennacl::linalg::opencl::scaled_rank_1_update(mat1,
874                                                          alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
875                                                          vec1, vec2);
876           break;
877 #endif
878 #ifdef VIENNACL_WITH_CUDA
879         case viennacl::CUDA_MEMORY:
880           viennacl::linalg::cuda::scaled_rank_1_update(mat1,
881                                                        alpha, len_alpha, reciprocal_alpha, flip_sign_alpha,
882                                                        vec1, vec2);
883           break;
884 #endif
885         case viennacl::MEMORY_NOT_INITIALIZED:
886           throw memory_exception("not initialised!");
887         default:
888           throw memory_exception("not implemented");
889       }
890     }
891 
892     /** @brief This function stores the diagonal and the superdiagonal of a matrix in two vectors.
893     *
894     *
895     * @param A     The matrix from which the vectors will be extracted of.
896     * @param dh    The vector in which the diagonal of the matrix will be stored in.
897     * @param sh    The vector in which the superdiagonal of the matrix will be stored in.
898     */
899 
900     template <typename NumericT, typename VectorType>
bidiag_pack(matrix_base<NumericT> & A,VectorType & dh,VectorType & sh)901     void bidiag_pack(matrix_base<NumericT> & A,
902                      VectorType & dh,
903                      VectorType & sh
904                     )
905     {
906       switch (viennacl::traits::handle(A).get_active_handle_id())
907       {
908         case viennacl::MAIN_MEMORY:
909           viennacl::linalg::host_based::bidiag_pack(A, dh, sh);
910           break;
911 #ifdef VIENNACL_WITH_OPENCL
912         case viennacl::OPENCL_MEMORY:
913           viennacl::linalg::opencl::bidiag_pack(A, dh, sh);
914           break;
915 #endif
916 
917 #ifdef VIENNACL_WITH_CUDA
918         case viennacl::CUDA_MEMORY:
919           viennacl::linalg::cuda::bidiag_pack(A, dh, sh);
920           break;
921 #endif
922 
923         case viennacl::MEMORY_NOT_INITIALIZED:
924           throw memory_exception("not initialised!");
925         default:
926           throw memory_exception("not implemented");
927       }
928 
929 
930     }
931     /** @brief This function copies a row or a column from a matrix to a vector.
932     *
933     *
934     * @param A          The matrix where to copy from.
935     * @param V          The vector to fill with data.
936     * @param row_start  The number of the first row to copy.
937     * @param col_start  The number of the first column to copy.
938     * @param copy_col   Set to TRUE to copy a column, FALSE to copy a row.
939     */
940 
941     template <typename SCALARTYPE>
copy_vec(matrix_base<SCALARTYPE> & A,vector_base<SCALARTYPE> & V,vcl_size_t row_start,vcl_size_t col_start,bool copy_col)942     void copy_vec(matrix_base<SCALARTYPE>& A,
943                   vector_base<SCALARTYPE>& V,
944                   vcl_size_t row_start,
945                   vcl_size_t col_start,
946                   bool copy_col
947     )
948     {
949       switch (viennacl::traits::handle(A).get_active_handle_id())
950       {
951         case viennacl::MAIN_MEMORY:
952           viennacl::linalg::host_based::copy_vec(A, V, row_start, col_start, copy_col);
953           break;
954 #ifdef VIENNACL_WITH_OPENCL
955         case viennacl::OPENCL_MEMORY:
956           viennacl::linalg::opencl::copy_vec(A, V, row_start, col_start, copy_col);
957           break;
958 #endif
959 
960 #ifdef VIENNACL_WITH_CUDA
961         case viennacl::CUDA_MEMORY:
962           viennacl::linalg::cuda::copy_vec(A, V, row_start, col_start, copy_col);
963           break;
964 #endif
965 
966         case viennacl::MEMORY_NOT_INITIALIZED:
967           throw memory_exception("not initialised!");
968         default:
969           throw memory_exception("not implemented");
970       }
971 
972     }
973 
974     /** @brief This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P
975     *
976     * @param A       The matrix to be updated.
977     * @param D       The normalized householder vector.
978     * @param start   The repetition counter.
979     */
980   template <typename NumericT>
house_update_A_left(matrix_base<NumericT> & A,vector_base<NumericT> & D,vcl_size_t start)981   void house_update_A_left(matrix_base<NumericT> & A,
982                            vector_base<NumericT>    & D,
983                            vcl_size_t start)
984   {
985     switch (viennacl::traits::handle(A).get_active_handle_id())
986     {
987       case viennacl::MAIN_MEMORY:
988         viennacl::linalg::host_based::house_update_A_left(A, D, start);
989         break;
990 #ifdef VIENNACL_WITH_OPENCL
991       case viennacl::OPENCL_MEMORY:
992         viennacl::linalg::opencl::house_update_A_left(A, D, start);
993         break;
994 #endif
995 
996 #ifdef VIENNACL_WITH_CUDA
997       case viennacl::CUDA_MEMORY:
998         viennacl::linalg::cuda::house_update_A_left(A, D, start);
999         break;
1000 #endif
1001 
1002       case viennacl::MEMORY_NOT_INITIALIZED:
1003         throw memory_exception("not initialised!");
1004       default:
1005         throw memory_exception("not implemented");
1006     }
1007   }
1008 
1009 
1010   /** @brief This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P
1011   *
1012   *
1013   * @param A        The matrix to be updated.
1014   * @param D        The normalized householder vector.
1015   */
1016 
1017   template <typename NumericT>
house_update_A_right(matrix_base<NumericT> & A,vector_base<NumericT> & D)1018   void house_update_A_right(matrix_base<NumericT>& A,
1019                             vector_base<NumericT>   & D)
1020   {
1021     switch (viennacl::traits::handle(A).get_active_handle_id())
1022     {
1023       case viennacl::MAIN_MEMORY:
1024         viennacl::linalg::host_based::house_update_A_right(A, D);
1025         break;
1026 #ifdef VIENNACL_WITH_OPENCL
1027       case viennacl::OPENCL_MEMORY:
1028         viennacl::linalg::opencl::house_update_A_right(A, D);
1029         break;
1030 #endif
1031 
1032 #ifdef VIENNACL_WITH_CUDA
1033       case viennacl::CUDA_MEMORY:
1034         viennacl::linalg::cuda::house_update_A_right(A, D);
1035         break;
1036 #endif
1037 
1038       case viennacl::MEMORY_NOT_INITIALIZED:
1039         throw memory_exception("not initialised!");
1040       default:
1041         throw memory_exception("not implemented");
1042     }
1043   }
1044 
1045   /** @brief This function updates the matrix Q, which is needed for the computation of the eigenvectors.
1046   *
1047   * @param Q        The matrix to be updated.
1048   * @param D        The householder vector.
1049   * @param A_size1  size1 of matrix A
1050   */
1051 
1052   template <typename NumericT>
house_update_QL(matrix_base<NumericT> & Q,vector_base<NumericT> & D,vcl_size_t A_size1)1053   void house_update_QL(matrix_base<NumericT> & Q,
1054                        vector_base<NumericT>    & D,
1055                        vcl_size_t A_size1)
1056   {
1057     switch (viennacl::traits::handle(Q).get_active_handle_id())
1058     {
1059       case viennacl::MAIN_MEMORY:
1060         viennacl::linalg::host_based::house_update_QL(Q, D, A_size1);
1061         break;
1062 #ifdef VIENNACL_WITH_OPENCL
1063       case viennacl::OPENCL_MEMORY:
1064         viennacl::linalg::opencl::house_update_QL(Q, D, A_size1);
1065         break;
1066 #endif
1067 
1068 #ifdef VIENNACL_WITH_CUDA
1069       case viennacl::CUDA_MEMORY:
1070         viennacl::linalg::cuda::house_update_QL(Q, D, A_size1);
1071         break;
1072 #endif
1073 
1074       case viennacl::MEMORY_NOT_INITIALIZED:
1075         throw memory_exception("not initialised!");
1076       default:
1077         throw memory_exception("not implemented");
1078     }
1079   }
1080 
1081 
1082   /** @brief This function updates the matrix Q. It is part of the tql2 algorithm.
1083   *
1084   *
1085   * @param Q       The matrix to be updated.
1086   * @param tmp1    Vector with data from the tql2 algorithm.
1087   * @param tmp2    Vector with data from the tql2 algorithm.
1088   * @param l       Data from the tql2 algorithm.
1089   * @param m       Data from the tql2 algorithm.
1090   */
1091   template<typename NumericT>
givens_next(matrix_base<NumericT> & Q,vector_base<NumericT> & tmp1,vector_base<NumericT> & tmp2,int l,int m)1092   void givens_next(matrix_base<NumericT> & Q,
1093                    vector_base<NumericT> & tmp1,
1094                    vector_base<NumericT> & tmp2,
1095                    int l,
1096                    int m
1097                 )
1098   {
1099     switch (viennacl::traits::handle(Q).get_active_handle_id())
1100     {
1101       case viennacl::MAIN_MEMORY:
1102         viennacl::linalg::host_based::givens_next(Q, tmp1, tmp2, l, m);
1103         break;
1104 #ifdef VIENNACL_WITH_OPENCL
1105       case viennacl::OPENCL_MEMORY:
1106         viennacl::linalg::opencl::givens_next(Q, tmp1, tmp2, l, m);
1107         break;
1108 #endif
1109 
1110 #ifdef VIENNACL_WITH_CUDA
1111       case viennacl::CUDA_MEMORY:
1112         viennacl::linalg::cuda::givens_next(Q, tmp1, tmp2, l, m);
1113         break;
1114 #endif
1115 
1116       case viennacl::MEMORY_NOT_INITIALIZED:
1117         throw memory_exception("not initialised!");
1118       default:
1119         throw memory_exception("not implemented");
1120     }
1121   }
1122 
1123   } //namespace linalg
1124 
1125 
1126 
1127 
1128   //
1129   /////////////////////////  Operator overloads /////////////////////////////////
1130   //
1131 
1132 
1133   //v += A * x
1134   /** @brief Implementation of the operation v1 += A * v2, where A is a matrix
1135   *
1136   * @param v1     The result vector v1 where A * v2 is added to
1137   * @param proxy  An expression template proxy class.
1138   */
1139   template<typename NumericT>
1140   vector<NumericT>
operator +=(vector_base<NumericT> & v1,const viennacl::vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,viennacl::op_prod> & proxy)1141   operator+=(vector_base<NumericT> & v1,
1142              const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy)
1143   {
1144     assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 += A * v2: size1(A) != size(v1)"));
1145 
1146     vector<NumericT> result(viennacl::traits::size1(proxy.lhs()));
1147     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1148     v1 += result;
1149     return v1;
1150   }
1151 
1152   /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix
1153   *
1154   * @param v1     The result vector v1 where A * v2 is subtracted from
1155   * @param proxy  An expression template proxy class.
1156   */
1157   template<typename NumericT>
1158   vector<NumericT>
operator -=(vector_base<NumericT> & v1,const viennacl::vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,viennacl::op_prod> & proxy)1159   operator-=(vector_base<NumericT> & v1,
1160              const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy)
1161   {
1162     assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 -= A * v2: size1(A) != size(v1)"));
1163 
1164     vector<NumericT> result(viennacl::traits::size1(proxy.lhs()));
1165     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1166     v1 -= result;
1167     return v1;
1168   }
1169 
1170 
1171 
1172 
1173 
1174   //free functions:
1175   /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix
1176   *
1177   * @param v1     The addend vector.
1178   * @param proxy  An expression template proxy class.
1179   */
1180   template<typename NumericT>
1181   viennacl::vector<NumericT>
operator +(const vector_base<NumericT> & v1,const vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,op_prod> & proxy)1182   operator+(const vector_base<NumericT> & v1,
1183             const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1184   {
1185     assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 + A * v2: size1(A) != size(v1)"));
1186 
1187     vector<NumericT> result(viennacl::traits::size(v1));
1188     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1189     result += v1;
1190     return result;
1191   }
1192 
1193   /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix
1194   *
1195   * @param v1     The addend vector.
1196   * @param proxy  An expression template proxy class.
1197   */
1198   template<typename NumericT>
1199   viennacl::vector<NumericT>
operator -(const vector_base<NumericT> & v1,const vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,op_prod> & proxy)1200   operator-(const vector_base<NumericT> & v1,
1201             const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1202   {
1203     assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 - A * v2: size1(A) != size(v1)"));
1204 
1205     vector<NumericT> result(viennacl::traits::size(v1));
1206     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1207     result = v1 - result;
1208     return result;
1209   }
1210 
1211 
1212   ////////// transposed_matrix_proxy
1213 
1214 
1215   //v += A^T * x
1216   /** @brief Implementation of the operation v1 += A * v2, where A is a matrix
1217   *
1218   * @param v1     The addend vector where the result is written to.
1219   * @param proxy  An expression template proxy class.
1220   */
1221   template<typename NumericT>
1222   vector<NumericT>
operator +=(vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1223   operator+=(vector_base<NumericT> & v1,
1224              const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
1225                                                               const vector_base<NumericT>,
1226                                                               op_prod> & proxy)
1227   {
1228     assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)"));
1229 
1230     vector<NumericT> result(viennacl::traits::size2(proxy.lhs()));
1231     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1232     v1 += result;
1233     return v1;
1234   }
1235 
1236   //v -= A^T * x
1237   /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix
1238   *
1239   * @param v1     The addend vector where the result is written to.
1240   * @param proxy  An expression template proxy class.
1241   */
1242   template<typename NumericT>
1243   vector<NumericT>
operator -=(vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1244   operator-=(vector_base<NumericT> & v1,
1245              const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
1246                                                               const vector_base<NumericT>,
1247                                                               op_prod> & proxy)
1248   {
1249     assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)"));
1250 
1251     vector<NumericT> result(viennacl::traits::size2(proxy.lhs()));
1252     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1253     v1 -= result;
1254     return v1;
1255   }
1256 
1257 
1258   //free functions:
1259   /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix
1260   *
1261   * @param v1     The addend vector.
1262   * @param proxy  An expression template proxy class.
1263   */
1264   template<typename NumericT>
1265   vector<NumericT>
operator +(const vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1266   operator+(const vector_base<NumericT> & v1,
1267             const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
1268                                      const vector_base<NumericT>,
1269                                      op_prod> & proxy)
1270   {
1271     assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 + trans(A) * v2: size2(A) != size(v1)"));
1272 
1273     vector<NumericT> result(viennacl::traits::size(v1));
1274     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1275     result += v1;
1276     return result;
1277   }
1278 
1279   /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix
1280   *
1281   * @param v1     The addend vector.
1282   * @param proxy  An expression template proxy class.
1283   */
1284   template<typename NumericT>
1285   vector<NumericT>
operator -(const vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1286   operator-(const vector_base<NumericT> & v1,
1287             const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>,
1288                                      const vector_base<NumericT>,
1289                                      op_prod> & proxy)
1290   {
1291     assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 - trans(A) * v2: size2(A) != size(v1)"));
1292 
1293     vector<NumericT> result(viennacl::traits::size(v1));
1294     viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result);
1295     result = v1 - result;
1296     return result;
1297   }
1298 
1299 
1300 } //namespace viennacl
1301 
1302 
1303 #endif
1304