1 #ifndef VIENNACL_FORWARDS_H
2 #define VIENNACL_FORWARDS_H
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
22 /** @file viennacl/forwards.h
23     @brief This file provides the forward declarations for the main types used within ViennaCL
24 */
25 
26 /**
27  @mainpage Main Page
28 
29  Here you can find all the documentation on how to use the GPU-accelerated linear algebra library ViennaCL.
30  The formerly separate \ref usermanual "user manual" is no longer available as a standalone PDF, but all integrated into the HTML-based documentation.
31  Please use the navigation panel on the left to access the desired information.
32 
33  Quick links:
34      - \ref manual-installation "Installation and building the examples"
35      - \ref manual-types        "Basic types"
36      - \ref manual-operations   "Basic operations"
37      - \ref manual-algorithms   "Algorithms"
38 
39 
40  -----------------------------------
41  \htmlonly
42  <div style="align: right; width: 100%">
43  <a href="http://www.tuwien.ac.at/"><img src="tuwien.png"></a>
44  <a href="http://www.iue.tuwien.ac.at/"><img src="iue.png"></a>
45  <a href="http://www.asc.tuwien.ac.at/"><img src="asc.png"></a>
46  </div>
47  \endhtmlonly
48 */
49 
50 
51 //compatibility defines:
52 #ifdef VIENNACL_HAVE_UBLAS
53   #define VIENNACL_WITH_UBLAS
54 #endif
55 
56 #ifdef VIENNACL_HAVE_EIGEN
57   #define VIENNACL_WITH_EIGEN
58 #endif
59 
60 #ifdef VIENNACL_HAVE_MTL4
61   #define VIENNACL_WITH_MTL4
62 #endif
63 
64 #include <cstddef>
65 #include <cassert>
66 #include <string>
67 #include <stdexcept>
68 
69 #include "viennacl/meta/enable_if.hpp"
70 #include "viennacl/version.hpp"
71 
72 /** @brief Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. */
73 namespace viennacl
74 {
75   typedef std::size_t                                       vcl_size_t;
76   typedef std::ptrdiff_t                                    vcl_ptrdiff_t;
77 
78 
79 
80   /** @brief A tag class representing assignment */
81   struct op_assign {};
82   /** @brief A tag class representing inplace addition */
83   struct op_inplace_add {};
84   /** @brief A tag class representing inplace subtraction */
85   struct op_inplace_sub {};
86 
87   /** @brief A tag class representing addition */
88   struct op_add {};
89   /** @brief A tag class representing subtraction */
90   struct op_sub {};
91   /** @brief A tag class representing multiplication by a scalar */
92   struct op_mult {};
93   /** @brief A tag class representing matrix-vector products and element-wise multiplications*/
94   struct op_prod {};
95   /** @brief A tag class representing matrix-matrix products */
96   struct op_mat_mat_prod {};
97   /** @brief A tag class representing division */
98   struct op_div {};
99   /** @brief A tag class representing the power function */
100   struct op_pow {};
101 
102   /** @brief A tag class representing equality */
103  struct op_eq {};
104  /** @brief A tag class representing inequality */
105  struct op_neq {};
106  /** @brief A tag class representing greater-than */
107  struct op_greater {};
108  /** @brief A tag class representing less-than */
109  struct op_less {};
110  /** @brief A tag class representing greater-than-or-equal-to */
111  struct op_geq {};
112  /** @brief A tag class representing less-than-or-equal-to */
113  struct op_leq {};
114 
115   /** @brief A tag class representing the summation of a vector */
116   struct op_sum {};
117 
118   /** @brief A tag class representing the summation of all rows of a matrix */
119   struct op_row_sum {};
120 
121   /** @brief A tag class representing the summation of all columns of a matrix */
122   struct op_col_sum {};
123 
124   /** @brief A tag class representing element-wise casting operations on vectors and matrices */
125   template<typename OP>
126   struct op_element_cast {};
127 
128   /** @brief A tag class representing element-wise binary operations (like multiplication) on vectors or matrices */
129   template<typename OP>
130   struct op_element_binary {};
131 
132   /** @brief A tag class representing element-wise unary operations (like sin()) on vectors or matrices */
133   template<typename OP>
134   struct op_element_unary {};
135 
136   /** @brief A tag class representing the modulus function for integers */
137   struct op_abs {};
138   /** @brief A tag class representing the acos() function */
139   struct op_acos {};
140   /** @brief A tag class representing the asin() function */
141   struct op_asin {};
142   /** @brief A tag class for representing the argmax() function */
143   struct op_argmax {};
144   /** @brief A tag class for representing the argmin() function */
145   struct op_argmin {};
146   /** @brief A tag class representing the atan() function */
147   struct op_atan {};
148   /** @brief A tag class representing the atan2() function */
149   struct op_atan2 {};
150   /** @brief A tag class representing the ceil() function */
151   struct op_ceil {};
152   /** @brief A tag class representing the cos() function */
153   struct op_cos {};
154   /** @brief A tag class representing the cosh() function */
155   struct op_cosh {};
156   /** @brief A tag class representing the exp() function */
157   struct op_exp {};
158   /** @brief A tag class representing the fabs() function */
159   struct op_fabs {};
160   /** @brief A tag class representing the fdim() function */
161   struct op_fdim {};
162   /** @brief A tag class representing the floor() function */
163   struct op_floor {};
164   /** @brief A tag class representing the fmax() function */
165   struct op_fmax {};
166   /** @brief A tag class representing the fmin() function */
167   struct op_fmin {};
168   /** @brief A tag class representing the fmod() function */
169   struct op_fmod {};
170   /** @brief A tag class representing the log() function */
171   struct op_log {};
172   /** @brief A tag class representing the log10() function */
173   struct op_log10 {};
174   /** @brief A tag class representing the sin() function */
175   struct op_sin {};
176   /** @brief A tag class representing the sinh() function */
177   struct op_sinh {};
178   /** @brief A tag class representing the sqrt() function */
179   struct op_sqrt {};
180   /** @brief A tag class representing the tan() function */
181   struct op_tan {};
182   /** @brief A tag class representing the tanh() function */
183   struct op_tanh {};
184 
185   /** @brief A tag class representing the (off-)diagonal of a matrix */
186   struct op_matrix_diag {};
187 
188   /** @brief A tag class representing a matrix given by a vector placed on a certain (off-)diagonal */
189   struct op_vector_diag {};
190 
191   /** @brief A tag class representing the extraction of a matrix row to a vector */
192   struct op_row {};
193 
194   /** @brief A tag class representing the extraction of a matrix column to a vector */
195   struct op_column {};
196 
197   /** @brief A tag class representing inner products of two vectors */
198   struct op_inner_prod {};
199 
200   /** @brief A tag class representing the 1-norm of a vector */
201   struct op_norm_1 {};
202 
203   /** @brief A tag class representing the 2-norm of a vector */
204   struct op_norm_2 {};
205 
206   /** @brief A tag class representing the inf-norm of a vector */
207   struct op_norm_inf {};
208 
209   /** @brief A tag class representing the maximum of a vector */
210   struct op_max {};
211 
212   /** @brief A tag class representing the minimum of a vector */
213   struct op_min {};
214 
215 
216   /** @brief A tag class representing the Frobenius-norm of a matrix */
217   struct op_norm_frobenius {};
218 
219   /** @brief A tag class representing transposed matrices */
220   struct op_trans {};
221 
222   /** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */
223   struct op_flip_sign {};
224 
225   //forward declaration of basic types:
226   template<class TYPE>
227   class scalar;
228 
229   template<typename LHS, typename RHS, typename OP>
230   class scalar_expression;
231 
232   template<typename SCALARTYPE>
233   class entry_proxy;
234 
235   template<typename SCALARTYPE>
236   class const_entry_proxy;
237 
238   template<typename LHS, typename RHS, typename OP>
239   class vector_expression;
240 
241   template<class SCALARTYPE, unsigned int ALIGNMENT>
242   class vector_iterator;
243 
244   template<class SCALARTYPE, unsigned int ALIGNMENT>
245   class const_vector_iterator;
246 
247   template<typename SCALARTYPE>
248   class implicit_vector_base;
249 
250   template<typename SCALARTYPE>
251   struct zero_vector;
252 
253   template<typename SCALARTYPE>
254   struct unit_vector;
255 
256   template<typename SCALARTYPE>
257   struct one_vector;
258 
259   template<typename SCALARTYPE>
260   struct scalar_vector;
261 
262   template<class SCALARTYPE, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t>
263   class vector_base;
264 
265   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
266   class vector;
267 
268   template<typename ScalarT>
269   class vector_tuple;
270 
271   //the following forwards are needed for GMRES
272   template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
273   void copy(CPU_ITERATOR const & cpu_begin,
274             CPU_ITERATOR const & cpu_end,
275             vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin);
276 
277   template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST>
278   void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_begin,
279             const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_end,
280             vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin);
281 
282   template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST>
283   void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_begin,
284             const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_end,
285             const_vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin);
286 
287   template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
288   void fast_copy(const const_vector_iterator<SCALARTYPE, ALIGNMENT> & gpu_begin,
289                  const const_vector_iterator<SCALARTYPE, ALIGNMENT> & gpu_end,
290                  CPU_ITERATOR cpu_begin );
291 
292   template<typename CPU_ITERATOR, typename SCALARTYPE, unsigned int ALIGNMENT>
293   void fast_copy(CPU_ITERATOR const & cpu_begin,
294                   CPU_ITERATOR const & cpu_end,
295                   vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin);
296 
297 
298   /** @brief Tag class for indicating row-major layout of a matrix. Not passed to the matrix directly, see row_major type. */
299   struct row_major_tag {};
300   /** @brief Tag class for indicating column-major layout of a matrix. Not passed to the matrix directly, see row_major type. */
301   struct column_major_tag {};
302 
303   /** @brief A tag for row-major storage of a dense matrix. */
304   struct row_major
305   {
306     typedef row_major_tag         orientation_category;
307 
308     /** @brief Returns the memory offset for entry (i,j) of a dense matrix.
309     *
310     * @param i   row index
311     * @param j   column index
312     * @param num_cols  number of entries per column (including alignment)
313     */
mem_indexrow_major314     static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t /* num_rows */, vcl_size_t num_cols)
315     {
316       return i * num_cols + j;
317     }
318   };
319 
320   /** @brief A tag for column-major storage of a dense matrix. */
321   struct column_major
322   {
323     typedef column_major_tag         orientation_category;
324 
325     /** @brief Returns the memory offset for entry (i,j) of a dense matrix.
326     *
327     * @param i   row index
328     * @param j   column index
329     * @param num_rows  number of entries per row (including alignment)
330     */
mem_indexcolumn_major331     static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t /* num_cols */)
332     {
333       return i + j * num_rows;
334     }
335   };
336 
337   struct row_iteration;
338   struct col_iteration;
339 
340   template<typename LHS, typename RHS, typename OP>
341   class matrix_expression;
342 
343   class context;
344 
345   enum memory_types
346   {
347     MEMORY_NOT_INITIALIZED
348     , MAIN_MEMORY
349     , OPENCL_MEMORY
350     , CUDA_MEMORY
351   };
352 
353   namespace backend
354   {
355     class mem_handle;
356   }
357 
358   //
359   // Matrix types:
360   //
361   static const vcl_size_t dense_padding_size = 128;
362 
363   /** @brief A dense matrix class
364   *
365   * @tparam SCALARTYPE   The underlying scalar type (either float or double)
366   * @tparam ALIGNMENT   The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory.
367   */
368   template<typename ROWCOL, typename MATRIXTYPE>
369   class matrix_iterator;
370 
371   template<class SCALARTYPE, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t>
372   class matrix_base;
373 
374   template<class SCALARTYPE, typename F = row_major, unsigned int ALIGNMENT = 1>
375   class matrix;
376 
377   template<typename SCALARTYPE>
378   class implicit_matrix_base;
379 
380   template<class SCALARTYPE>
381   class identity_matrix;
382 
383   template<class SCALARTYPE>
384   class zero_matrix;
385 
386   template<class SCALARTYPE>
387   class scalar_matrix;
388 
389   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
390   class compressed_matrix;
391 
392   template<class SCALARTYPE>
393   class compressed_compressed_matrix;
394 
395 
396   template<class SCALARTYPE, unsigned int ALIGNMENT = 128>
397   class coordinate_matrix;
398 
399   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
400   class ell_matrix;
401 
402   template<typename ScalarT, typename IndexT = unsigned int>
403   class sliced_ell_matrix;
404 
405   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
406   class hyb_matrix;
407 
408   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
409   class circulant_matrix;
410 
411   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
412   class hankel_matrix;
413 
414   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
415   class toeplitz_matrix;
416 
417   template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
418   class vandermonde_matrix;
419 
420   //
421   // Proxies:
422   //
423   template<typename SizeType = vcl_size_t, typename DistanceType = std::ptrdiff_t>
424   class basic_range;
425 
426   typedef basic_range<>  range;
427 
428   template<typename SizeType = vcl_size_t, typename DistanceType = std::ptrdiff_t>
429   class basic_slice;
430 
431   typedef basic_slice<>  slice;
432 
433   template<typename VectorType>
434   class vector_range;
435 
436   template<typename VectorType>
437   class vector_slice;
438 
439   template<typename MatrixType>
440   class matrix_range;
441 
442   template<typename MatrixType>
443   class matrix_slice;
444 
445 
446   /** @brief Helper struct for checking whether a type is a host scalar type (e.g. float, double) */
447   template<typename T>
448   struct is_cpu_scalar
449   {
450     enum { value = false };
451   };
452 
453   /** @brief Helper struct for checking whether a type is a viennacl::scalar<> */
454   template<typename T>
455   struct is_scalar
456   {
457     enum { value = false };
458   };
459 
460   /** @brief Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> */
461   template<typename T>
462   struct is_flip_sign_scalar
463   {
464     enum { value = false };
465   };
466 
467   /** @brief Helper struct for checking whether the provided type represents a scalar (either host, from ViennaCL, or a flip-sign proxy) */
468   template<typename T>
469   struct is_any_scalar
470   {
471     enum { value = (is_scalar<T>::value || is_cpu_scalar<T>::value || is_flip_sign_scalar<T>::value )};
472   };
473 
474   /** @brief Checks for a type being either vector_base or implicit_vector_base */
475   template<typename T>
476   struct is_any_vector { enum { value = 0 }; };
477 
478   /** @brief Checks for either matrix_base or implicit_matrix_base */
479   template<typename T>
480   struct is_any_dense_matrix { enum { value = 0 }; };
481 
482   /** @brief Helper class for checking whether a matrix has a row-major layout. */
483   template<typename T>
484   struct is_row_major
485   {
486     enum { value = false };
487   };
488 
489   /** @brief Helper class for checking whether a matrix is a compressed_matrix (CSR format) */
490   template<typename T>
491   struct is_compressed_matrix
492   {
493     enum { value = false };
494   };
495 
496   /** @brief Helper class for checking whether a matrix is a coordinate_matrix (COO format) */
497   template<typename T>
498   struct is_coordinate_matrix
499   {
500     enum { value = false };
501   };
502 
503   /** @brief Helper class for checking whether a matrix is an ell_matrix (ELL format) */
504   template<typename T>
505   struct is_ell_matrix
506   {
507     enum { value = false };
508   };
509 
510   /** @brief Helper class for checking whether a matrix is a sliced_ell_matrix (SELL-C-\f$ \sigma \f$ format) */
511   template<typename T>
512   struct is_sliced_ell_matrix
513   {
514     enum { value = false };
515   };
516 
517 
518   /** @brief Helper class for checking whether a matrix is a hyb_matrix (hybrid format: ELL plus CSR) */
519   template<typename T>
520   struct is_hyb_matrix
521   {
522     enum { value = false };
523   };
524 
525   /** @brief Helper class for checking whether the provided type is one of the sparse matrix types (compressed_matrix, coordinate_matrix, etc.) */
526   template<typename T>
527   struct is_any_sparse_matrix
528   {
529     enum { value = false };
530   };
531 
532 
533   /** @brief Helper class for checking whether a matrix is a circulant matrix */
534   template<typename T>
535   struct is_circulant_matrix
536   {
537     enum { value = false };
538   };
539 
540   /** @brief Helper class for checking whether a matrix is a Hankel matrix */
541   template<typename T>
542   struct is_hankel_matrix
543   {
544     enum { value = false };
545   };
546 
547   /** @brief Helper class for checking whether a matrix is a Toeplitz matrix */
548   template<typename T>
549   struct is_toeplitz_matrix
550   {
551     enum { value = false };
552   };
553 
554   /** @brief Helper class for checking whether a matrix is a Vandermonde matrix */
555   template<typename T>
556   struct is_vandermonde_matrix
557   {
558     enum { value = false };
559   };
560 
561   /** @brief Helper class for checking whether the provided type is any of the dense structured matrix types (circulant, Hankel, etc.) */
562   template<typename T>
563   struct is_any_dense_structured_matrix
564   {
565     enum { value = viennacl::is_circulant_matrix<T>::value || viennacl::is_hankel_matrix<T>::value || viennacl::is_toeplitz_matrix<T>::value || viennacl::is_vandermonde_matrix<T>::value };
566   };
567 
568 
569 
570 
571   /** @brief Exception class in case of memory errors */
572   class memory_exception : public std::exception
573   {
574   public:
memory_exception()575     memory_exception() : message_() {}
memory_exception(std::string message)576     memory_exception(std::string message) : message_("ViennaCL: Internal memory error: " + message) {}
577 
what()578     virtual const char* what() const throw() { return message_.c_str(); }
579 
~memory_exception()580     virtual ~memory_exception() throw() {}
581   private:
582     std::string message_;
583   };
584 
585   class cuda_not_available_exception : public std::exception
586   {
587   public:
cuda_not_available_exception()588     cuda_not_available_exception() : message_("ViennaCL was compiled without CUDA support, but CUDA functionality required for this operation.") {}
589 
what()590     virtual const char* what() const throw() { return message_.c_str(); }
591 
~cuda_not_available_exception()592     virtual ~cuda_not_available_exception() throw() {}
593   private:
594     std::string message_;
595   };
596 
597   class zero_on_diagonal_exception : public std::runtime_error
598   {
599   public:
zero_on_diagonal_exception(std::string const & what_arg)600     zero_on_diagonal_exception(std::string const & what_arg) : std::runtime_error(what_arg) {}
601   };
602 
603   class unknown_norm_exception : public std::runtime_error
604   {
605   public:
unknown_norm_exception(std::string const & what_arg)606     unknown_norm_exception(std::string const & what_arg) : std::runtime_error(what_arg) {}
607   };
608 
609 
610 
611   namespace tools
612   {
613     //helper for matrix row/col iterators
614     //must be specialized for every viennacl matrix type
615     /** @brief Helper class for incrementing an iterator in a dense matrix. */
616     template<typename ROWCOL, typename MATRIXTYPE>
617     struct MATRIX_ITERATOR_INCREMENTER
618     {
619       typedef typename MATRIXTYPE::ERROR_SPECIALIZATION_FOR_THIS_MATRIX_TYPE_MISSING          ErrorIndicator;
620 
applyMATRIX_ITERATOR_INCREMENTER621       static void apply(const MATRIXTYPE & /*mat*/, unsigned int & /*row*/, unsigned int & /*col*/) {}
622     };
623   }
624 
625   namespace linalg
626   {
627 #if !defined(_MSC_VER) || defined(__CUDACC__)
628 
629     template<class SCALARTYPE, unsigned int ALIGNMENT>
630     void convolve_i(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1,
631                     viennacl::vector<SCALARTYPE, ALIGNMENT>& input2,
632                     viennacl::vector<SCALARTYPE, ALIGNMENT>& output);
633 
634     template<typename T>
635     viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_prod> >
636     element_prod(vector_base<T> const & v1, vector_base<T> const & v2);
637 
638     template<typename T>
639     viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_div> >
640     element_div(vector_base<T> const & v1, vector_base<T> const & v2);
641 
642 
643 
644     template<typename T>
645     void inner_prod_impl(vector_base<T> const & vec1,
646                          vector_base<T> const & vec2,
647                          scalar<T> & result);
648 
649     template<typename LHS, typename RHS, typename OP, typename T>
650     void inner_prod_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec1,
651                          vector_base<T> const & vec2,
652                          scalar<T> & result);
653 
654     template<typename T, typename LHS, typename RHS, typename OP>
655     void inner_prod_impl(vector_base<T> const & vec1,
656                          viennacl::vector_expression<LHS, RHS, OP> const & vec2,
657                          scalar<T> & result);
658 
659     template<typename LHS1, typename RHS1, typename OP1,
660               typename LHS2, typename RHS2, typename OP2, typename T>
661     void inner_prod_impl(viennacl::vector_expression<LHS1, RHS1, OP1> const & vec1,
662                          viennacl::vector_expression<LHS2, RHS2, OP2> const & vec2,
663                          scalar<T> & result);
664 
665     ///////////////////////////
666 
667     template<typename T>
668     void inner_prod_cpu(vector_base<T> const & vec1,
669                         vector_base<T> const & vec2,
670                         T & result);
671 
672     template<typename LHS, typename RHS, typename OP, typename T>
673     void inner_prod_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec1,
674                         vector_base<T> const & vec2,
675                         T & result);
676 
677     template<typename T, typename LHS, typename RHS, typename OP>
678     void inner_prod_cpu(vector_base<T> const & vec1,
679                         viennacl::vector_expression<LHS, RHS, OP> const & vec2,
680                         T & result);
681 
682     template<typename LHS1, typename RHS1, typename OP1,
683               typename LHS2, typename RHS2, typename OP2, typename S3>
684     void inner_prod_cpu(viennacl::vector_expression<LHS1, RHS1, OP1> const & vec1,
685                         viennacl::vector_expression<LHS2, RHS2, OP2> const & vec2,
686                         S3 & result);
687 
688 
689 
690     //forward definition of norm_1_impl function
691     template<typename T>
692     void norm_1_impl(vector_base<T> const & vec, scalar<T> & result);
693 
694     template<typename LHS, typename RHS, typename OP, typename T>
695     void norm_1_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
696                      scalar<T> & result);
697 
698 
699     template<typename T>
700     void norm_1_cpu(vector_base<T> const & vec,
701                     T & result);
702 
703     template<typename LHS, typename RHS, typename OP, typename S2>
704     void norm_1_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
705                     S2 & result);
706 
707     //forward definition of norm_2_impl function
708     template<typename T>
709     void norm_2_impl(vector_base<T> const & vec, scalar<T> & result);
710 
711     template<typename LHS, typename RHS, typename OP, typename T>
712     void norm_2_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
713                      scalar<T> & result);
714 
715     template<typename T>
716     void norm_2_cpu(vector_base<T> const & vec, T & result);
717 
718     template<typename LHS, typename RHS, typename OP, typename S2>
719     void norm_2_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
720                     S2 & result);
721 
722 
723     //forward definition of norm_inf_impl function
724     template<typename T>
725     void norm_inf_impl(vector_base<T> const & vec, scalar<T> & result);
726 
727     template<typename LHS, typename RHS, typename OP, typename T>
728     void norm_inf_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
729                       scalar<T> & result);
730 
731 
732     template<typename T>
733     void norm_inf_cpu(vector_base<T> const & vec, T & result);
734 
735     template<typename LHS, typename RHS, typename OP, typename S2>
736     void norm_inf_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
737                       S2 & result);
738 
739     //forward definition of max()-related functions
740     template<typename T>
741     void max_impl(vector_base<T> const & vec, scalar<T> & result);
742 
743     template<typename LHS, typename RHS, typename OP, typename T>
744     void max_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
745                   scalar<T> & result);
746 
747 
748     template<typename T>
749     void max_cpu(vector_base<T> const & vec, T & result);
750 
751     template<typename LHS, typename RHS, typename OP, typename S2>
752     void max_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
753                  S2 & result);
754 
755     //forward definition of min()-related functions
756     template<typename T>
757     void min_impl(vector_base<T> const & vec, scalar<T> & result);
758 
759     template<typename LHS, typename RHS, typename OP, typename T>
760     void min_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
761                   scalar<T> & result);
762 
763 
764     template<typename T>
765     void min_cpu(vector_base<T> const & vec, T & result);
766 
767     template<typename LHS, typename RHS, typename OP, typename S2>
768     void min_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
769                  S2 & result);
770 
771     //forward definition of sum()-related functions
772     template<typename T>
773     void sum_impl(vector_base<T> const & vec, scalar<T> & result);
774 
775     template<typename LHS, typename RHS, typename OP, typename T>
776     void sum_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
777                   scalar<T> & result);
778 
779 
780     template<typename T>
781     void sum_cpu(vector_base<T> const & vec, T & result);
782 
783     template<typename LHS, typename RHS, typename OP, typename S2>
784     void sum_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
785                  S2 & result);
786 
787 
788     // forward definition of frobenius norm:
789     template<typename T>
790     void norm_frobenius_impl(matrix_base<T> const & vec, scalar<T> & result);
791 
792     template<typename T>
793     void norm_frobenius_cpu(matrix_base<T> const & vec, T & result);
794 
795 
796     template<typename T>
797     vcl_size_t index_norm_inf(vector_base<T> const & vec);
798 
799     template<typename LHS, typename RHS, typename OP>
800     vcl_size_t index_norm_inf(viennacl::vector_expression<LHS, RHS, OP> const & vec);
801 
802     //forward definition of prod_impl functions
803 
804     template<typename NumericT>
805     void prod_impl(const matrix_base<NumericT> & mat,
806                    const vector_base<NumericT> & vec,
807                          vector_base<NumericT> & result);
808 
809     template<typename NumericT>
810     void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans,
811                    const vector_base<NumericT> & vec,
812                          vector_base<NumericT> & result);
813 
814     template<typename SparseMatrixType, class SCALARTYPE, unsigned int ALIGNMENT>
815     typename viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value,
816                                   vector_expression<const SparseMatrixType,
817                                                     const vector<SCALARTYPE, ALIGNMENT>,
818                                                     op_prod >
819                                  >::type
820     prod_impl(const SparseMatrixType & mat,
821               const vector<SCALARTYPE, ALIGNMENT> & vec);
822 
823     // forward definition of summation routines for matrices:
824 
825     template<typename NumericT>
826     void row_sum_impl(const matrix_base<NumericT> & A,
827                             vector_base<NumericT> & result);
828 
829     template<typename NumericT>
830     void column_sum_impl(const matrix_base<NumericT> & A,
831                                vector_base<NumericT> & result);
832 
833 #endif
834 
835     namespace detail
836     {
837       enum row_info_types
838       {
839         SPARSE_ROW_NORM_INF = 0,
840         SPARSE_ROW_NORM_1,
841         SPARSE_ROW_NORM_2,
842         SPARSE_ROW_DIAGONAL
843       };
844 
845     }
846 
847 
848     /** @brief A tag class representing a lower triangular matrix */
849     struct lower_tag
850     {
namelower_tag851       static const char * name() { return "lower"; }
852     };      //lower triangular matrix
853     /** @brief A tag class representing an upper triangular matrix */
854     struct upper_tag
855     {
nameupper_tag856       static const char * name() { return "upper"; }
857     };      //upper triangular matrix
858     /** @brief A tag class representing a lower triangular matrix with unit diagonal*/
859     struct unit_lower_tag
860     {
nameunit_lower_tag861       static const char * name() { return "unit_lower"; }
862     }; //unit lower triangular matrix
863     /** @brief A tag class representing an upper triangular matrix with unit diagonal*/
864     struct unit_upper_tag
865     {
nameunit_upper_tag866       static const char * name() { return "unit_upper"; }
867     }; //unit upper triangular matrix
868 
869     //preconditioner tags
870     class ilut_tag;
871 
872     /** @brief A tag class representing the use of no preconditioner */
873     class no_precond
874     {
875       public:
876         template<typename VectorType>
apply(VectorType &)877         void apply(VectorType &) const {}
878     };
879 
880 
881   } //namespace linalg
882 
883   //
884   // More namespace comments to follow:
885   //
886 
887   /** @brief Namespace providing routines for handling the different memory domains. */
888   namespace backend
889   {
890     /** @brief Provides implementations for handling memory buffers in CPU RAM. */
891     namespace cpu_ram
892     {
893       /** @brief Holds implementation details for handling memory buffers in CPU RAM. Not intended for direct use by library users. */
894       namespace detail {}
895     }
896 
897     /** @brief Provides implementations for handling CUDA memory buffers. */
898     namespace cuda
899     {
900       /** @brief Holds implementation details for handling CUDA memory buffers. Not intended for direct use by library users. */
901       namespace detail {}
902     }
903 
904     /** @brief Implementation details for the generic memory backend interface. */
905     namespace detail {}
906 
907     /** @brief Provides implementations for handling OpenCL memory buffers. */
908     namespace opencl
909     {
910       /** @brief Holds implementation details for handling OpenCL memory buffers. Not intended for direct use by library users. */
911       namespace detail {}
912     }
913   }
914 
915 
916   /** @brief Holds implementation details for functionality in the main viennacl-namespace. Not intended for direct use by library users. */
917   namespace detail
918   {
919     /** @brief Helper namespace for fast Fourier transforms. Not to be used directly by library users. */
920     namespace fft
921     {
922       /** @brief Helper namespace for fast-Fourier transformation. Deprecated. */
923       namespace FFT_DATA_ORDER {}
924     }
925   }
926 
927 
928   /** @brief Provides an OpenCL kernel generator. */
929   namespace device_specific
930   {
931     /** @brief Provides the implementation for tuning the kernels for a particular device. */
932     namespace autotune {}
933 
934     /** @brief Contains implementation details of the kernel generator. */
935     namespace detail {}
936 
937     /** @brief Namespace holding the various device-specific parameters for generating the best kernels. */
938     namespace profiles {}
939 
940     /** @brief Contains various helper routines for kernel generation. */
941     namespace utils {}
942   }
943 
944   /** @brief Provides basic input-output functionality. */
945   namespace io
946   {
947     /** @brief Implementation details for IO functionality. Usually not of interest for a library user. */
948     namespace detail {}
949 
950     /** @brief Namespace holding the various XML tag definitions for the kernel parameter tuning facility. */
951     namespace tag {}
952 
953     /** @brief Namespace holding the various XML strings for the kernel parameter tuning facility. */
954     namespace val {}
955   }
956 
957   /** @brief Provides all linear algebra operations which are not covered by operator overloads. */
958   namespace linalg
959   {
960     /** @brief Holds all CUDA compute kernels used by ViennaCL. */
961     namespace cuda
962     {
963       /** @brief Helper functions for the CUDA linear algebra backend. */
964       namespace detail {}
965     }
966 
967     /** @brief Namespace holding implementation details for linear algebra routines. Usually not of interest for a library user. */
968     namespace detail
969     {
970       /** @brief Implementation namespace for algebraic multigrid preconditioner. */
971       namespace amg {}
972 
973       /** @brief Implementation namespace for sparse approximate inverse preconditioner. */
974       namespace spai {}
975     }
976 
977     /** @brief Holds all compute kernels with conventional host-based execution (buffers in CPU RAM). */
978     namespace host_based
979     {
980       /** @brief Helper functions for the host-based linear algebra backend. */
981       namespace detail {}
982     }
983 
984     /** @brief Namespace containing the OpenCL kernels. Deprecated, will be moved to viennacl::linalg::opencl in future releases. */
985     namespace kernels {}
986 
987     /** @brief Holds all routines providing OpenCL linear algebra operations. */
988     namespace opencl
989     {
990       /** @brief Helper functions for OpenCL-accelerated linear algebra operations. */
991       namespace detail {}
992 
993       /** @brief Contains the OpenCL kernel generation functions for a predefined set of functionality. */
994       namespace kernels
995       {
996         /** @brief Implementation details for the predefined OpenCL kernels. */
997         namespace detail {}
998       }
999     }
1000   }
1001 
1002   /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */
1003   namespace ocl {}
1004 
1005   /** @brief Namespace containing many meta-functions. */
1006   namespace result_of {}
1007 
1008   /** @brief Namespace for various tools used within ViennaCL. */
1009   namespace tools
1010   {
1011     /** @brief Contains implementation details for the tools. Usually not of interest for the library user. */
1012     namespace detail {}
1013   }
1014 
1015   /** @brief Namespace providing traits-information as well as generic wrappers to common routines for vectors and matrices such as size() or clear() */
1016   namespace traits {}
1017 
1018   /** @brief Contains the scheduling functionality which allows for dynamic kernel generation as well as the fusion of multiple statements into a single kernel. */
1019   namespace scheduler
1020   {
1021     /** @brief Implementation details for the scheduler */
1022     namespace detail {}
1023 
1024     /** @brief Helper metafunctions used for the scheduler */
1025     namespace result_of {}
1026   }
1027 
1028 } //namespace viennacl
1029 
1030 #endif
1031 
1032 /*@}*/
1033