1 #ifndef VIENNACL_FORWARDS_H 2 #define VIENNACL_FORWARDS_H 3 4 /* ========================================================================= 5 Copyright (c) 2010-2016, Institute for Microelectronics, 6 Institute for Analysis and Scientific Computing, 7 TU Wien. 8 Portions of this software are copyright by UChicago Argonne, LLC. 9 10 ----------------- 11 ViennaCL - The Vienna Computing Library 12 ----------------- 13 14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 15 16 (A list of authors and contributors can be found in the manual) 17 18 License: MIT (X11), see file LICENSE in the base directory 19 ============================================================================= */ 20 21 22 /** @file viennacl/forwards.h 23 @brief This file provides the forward declarations for the main types used within ViennaCL 24 */ 25 26 /** 27 @mainpage Main Page 28 29 Here you can find all the documentation on how to use the GPU-accelerated linear algebra library ViennaCL. 30 The formerly separate \ref usermanual "user manual" is no longer available as a standalone PDF, but all integrated into the HTML-based documentation. 31 Please use the navigation panel on the left to access the desired information. 32 33 Quick links: 34 - \ref manual-installation "Installation and building the examples" 35 - \ref manual-types "Basic types" 36 - \ref manual-operations "Basic operations" 37 - \ref manual-algorithms "Algorithms" 38 39 40 ----------------------------------- 41 \htmlonly 42 <div style="align: right; width: 100%"> 43 <a href="http://www.tuwien.ac.at/"><img src="tuwien.png"></a> 44 <a href="http://www.iue.tuwien.ac.at/"><img src="iue.png"></a> 45 <a href="http://www.asc.tuwien.ac.at/"><img src="asc.png"></a> 46 </div> 47 \endhtmlonly 48 */ 49 50 51 //compatibility defines: 52 #ifdef VIENNACL_HAVE_UBLAS 53 #define VIENNACL_WITH_UBLAS 54 #endif 55 56 #ifdef VIENNACL_HAVE_EIGEN 57 #define VIENNACL_WITH_EIGEN 58 #endif 59 60 #ifdef VIENNACL_HAVE_MTL4 61 #define VIENNACL_WITH_MTL4 62 #endif 63 64 #include <cstddef> 65 #include <cassert> 66 #include <string> 67 #include <stdexcept> 68 69 #include "viennacl/meta/enable_if.hpp" 70 #include "viennacl/version.hpp" 71 72 /** @brief Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. */ 73 namespace viennacl 74 { 75 typedef std::size_t vcl_size_t; 76 typedef std::ptrdiff_t vcl_ptrdiff_t; 77 78 79 80 /** @brief A tag class representing assignment */ 81 struct op_assign {}; 82 /** @brief A tag class representing inplace addition */ 83 struct op_inplace_add {}; 84 /** @brief A tag class representing inplace subtraction */ 85 struct op_inplace_sub {}; 86 87 /** @brief A tag class representing addition */ 88 struct op_add {}; 89 /** @brief A tag class representing subtraction */ 90 struct op_sub {}; 91 /** @brief A tag class representing multiplication by a scalar */ 92 struct op_mult {}; 93 /** @brief A tag class representing matrix-vector products and element-wise multiplications*/ 94 struct op_prod {}; 95 /** @brief A tag class representing matrix-matrix products */ 96 struct op_mat_mat_prod {}; 97 /** @brief A tag class representing division */ 98 struct op_div {}; 99 /** @brief A tag class representing the power function */ 100 struct op_pow {}; 101 102 /** @brief A tag class representing equality */ 103 struct op_eq {}; 104 /** @brief A tag class representing inequality */ 105 struct op_neq {}; 106 /** @brief A tag class representing greater-than */ 107 struct op_greater {}; 108 /** @brief A tag class representing less-than */ 109 struct op_less {}; 110 /** @brief A tag class representing greater-than-or-equal-to */ 111 struct op_geq {}; 112 /** @brief A tag class representing less-than-or-equal-to */ 113 struct op_leq {}; 114 115 /** @brief A tag class representing the summation of a vector */ 116 struct op_sum {}; 117 118 /** @brief A tag class representing the summation of all rows of a matrix */ 119 struct op_row_sum {}; 120 121 /** @brief A tag class representing the summation of all columns of a matrix */ 122 struct op_col_sum {}; 123 124 /** @brief A tag class representing element-wise casting operations on vectors and matrices */ 125 template<typename OP> 126 struct op_element_cast {}; 127 128 /** @brief A tag class representing element-wise binary operations (like multiplication) on vectors or matrices */ 129 template<typename OP> 130 struct op_element_binary {}; 131 132 /** @brief A tag class representing element-wise unary operations (like sin()) on vectors or matrices */ 133 template<typename OP> 134 struct op_element_unary {}; 135 136 /** @brief A tag class representing the modulus function for integers */ 137 struct op_abs {}; 138 /** @brief A tag class representing the acos() function */ 139 struct op_acos {}; 140 /** @brief A tag class representing the asin() function */ 141 struct op_asin {}; 142 /** @brief A tag class for representing the argmax() function */ 143 struct op_argmax {}; 144 /** @brief A tag class for representing the argmin() function */ 145 struct op_argmin {}; 146 /** @brief A tag class representing the atan() function */ 147 struct op_atan {}; 148 /** @brief A tag class representing the atan2() function */ 149 struct op_atan2 {}; 150 /** @brief A tag class representing the ceil() function */ 151 struct op_ceil {}; 152 /** @brief A tag class representing the cos() function */ 153 struct op_cos {}; 154 /** @brief A tag class representing the cosh() function */ 155 struct op_cosh {}; 156 /** @brief A tag class representing the exp() function */ 157 struct op_exp {}; 158 /** @brief A tag class representing the fabs() function */ 159 struct op_fabs {}; 160 /** @brief A tag class representing the fdim() function */ 161 struct op_fdim {}; 162 /** @brief A tag class representing the floor() function */ 163 struct op_floor {}; 164 /** @brief A tag class representing the fmax() function */ 165 struct op_fmax {}; 166 /** @brief A tag class representing the fmin() function */ 167 struct op_fmin {}; 168 /** @brief A tag class representing the fmod() function */ 169 struct op_fmod {}; 170 /** @brief A tag class representing the log() function */ 171 struct op_log {}; 172 /** @brief A tag class representing the log10() function */ 173 struct op_log10 {}; 174 /** @brief A tag class representing the sin() function */ 175 struct op_sin {}; 176 /** @brief A tag class representing the sinh() function */ 177 struct op_sinh {}; 178 /** @brief A tag class representing the sqrt() function */ 179 struct op_sqrt {}; 180 /** @brief A tag class representing the tan() function */ 181 struct op_tan {}; 182 /** @brief A tag class representing the tanh() function */ 183 struct op_tanh {}; 184 185 /** @brief A tag class representing the (off-)diagonal of a matrix */ 186 struct op_matrix_diag {}; 187 188 /** @brief A tag class representing a matrix given by a vector placed on a certain (off-)diagonal */ 189 struct op_vector_diag {}; 190 191 /** @brief A tag class representing the extraction of a matrix row to a vector */ 192 struct op_row {}; 193 194 /** @brief A tag class representing the extraction of a matrix column to a vector */ 195 struct op_column {}; 196 197 /** @brief A tag class representing inner products of two vectors */ 198 struct op_inner_prod {}; 199 200 /** @brief A tag class representing the 1-norm of a vector */ 201 struct op_norm_1 {}; 202 203 /** @brief A tag class representing the 2-norm of a vector */ 204 struct op_norm_2 {}; 205 206 /** @brief A tag class representing the inf-norm of a vector */ 207 struct op_norm_inf {}; 208 209 /** @brief A tag class representing the maximum of a vector */ 210 struct op_max {}; 211 212 /** @brief A tag class representing the minimum of a vector */ 213 struct op_min {}; 214 215 216 /** @brief A tag class representing the Frobenius-norm of a matrix */ 217 struct op_norm_frobenius {}; 218 219 /** @brief A tag class representing transposed matrices */ 220 struct op_trans {}; 221 222 /** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */ 223 struct op_flip_sign {}; 224 225 //forward declaration of basic types: 226 template<class TYPE> 227 class scalar; 228 229 template<typename LHS, typename RHS, typename OP> 230 class scalar_expression; 231 232 template<typename SCALARTYPE> 233 class entry_proxy; 234 235 template<typename SCALARTYPE> 236 class const_entry_proxy; 237 238 template<typename LHS, typename RHS, typename OP> 239 class vector_expression; 240 241 template<class SCALARTYPE, unsigned int ALIGNMENT> 242 class vector_iterator; 243 244 template<class SCALARTYPE, unsigned int ALIGNMENT> 245 class const_vector_iterator; 246 247 template<typename SCALARTYPE> 248 class implicit_vector_base; 249 250 template<typename SCALARTYPE> 251 struct zero_vector; 252 253 template<typename SCALARTYPE> 254 struct unit_vector; 255 256 template<typename SCALARTYPE> 257 struct one_vector; 258 259 template<typename SCALARTYPE> 260 struct scalar_vector; 261 262 template<class SCALARTYPE, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> 263 class vector_base; 264 265 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 266 class vector; 267 268 template<typename ScalarT> 269 class vector_tuple; 270 271 //the following forwards are needed for GMRES 272 template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR> 273 void copy(CPU_ITERATOR const & cpu_begin, 274 CPU_ITERATOR const & cpu_end, 275 vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin); 276 277 template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST> 278 void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_begin, 279 const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_end, 280 vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin); 281 282 template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST> 283 void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_begin, 284 const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_end, 285 const_vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin); 286 287 template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR> 288 void fast_copy(const const_vector_iterator<SCALARTYPE, ALIGNMENT> & gpu_begin, 289 const const_vector_iterator<SCALARTYPE, ALIGNMENT> & gpu_end, 290 CPU_ITERATOR cpu_begin ); 291 292 template<typename CPU_ITERATOR, typename SCALARTYPE, unsigned int ALIGNMENT> 293 void fast_copy(CPU_ITERATOR const & cpu_begin, 294 CPU_ITERATOR const & cpu_end, 295 vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin); 296 297 298 /** @brief Tag class for indicating row-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ 299 struct row_major_tag {}; 300 /** @brief Tag class for indicating column-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ 301 struct column_major_tag {}; 302 303 /** @brief A tag for row-major storage of a dense matrix. */ 304 struct row_major 305 { 306 typedef row_major_tag orientation_category; 307 308 /** @brief Returns the memory offset for entry (i,j) of a dense matrix. 309 * 310 * @param i row index 311 * @param j column index 312 * @param num_cols number of entries per column (including alignment) 313 */ mem_indexrow_major314 static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t /* num_rows */, vcl_size_t num_cols) 315 { 316 return i * num_cols + j; 317 } 318 }; 319 320 /** @brief A tag for column-major storage of a dense matrix. */ 321 struct column_major 322 { 323 typedef column_major_tag orientation_category; 324 325 /** @brief Returns the memory offset for entry (i,j) of a dense matrix. 326 * 327 * @param i row index 328 * @param j column index 329 * @param num_rows number of entries per row (including alignment) 330 */ mem_indexcolumn_major331 static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t /* num_cols */) 332 { 333 return i + j * num_rows; 334 } 335 }; 336 337 struct row_iteration; 338 struct col_iteration; 339 340 template<typename LHS, typename RHS, typename OP> 341 class matrix_expression; 342 343 class context; 344 345 enum memory_types 346 { 347 MEMORY_NOT_INITIALIZED 348 , MAIN_MEMORY 349 , OPENCL_MEMORY 350 , CUDA_MEMORY 351 }; 352 353 namespace backend 354 { 355 class mem_handle; 356 } 357 358 // 359 // Matrix types: 360 // 361 static const vcl_size_t dense_padding_size = 128; 362 363 /** @brief A dense matrix class 364 * 365 * @tparam SCALARTYPE The underlying scalar type (either float or double) 366 * @tparam ALIGNMENT The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. 367 */ 368 template<typename ROWCOL, typename MATRIXTYPE> 369 class matrix_iterator; 370 371 template<class SCALARTYPE, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> 372 class matrix_base; 373 374 template<class SCALARTYPE, typename F = row_major, unsigned int ALIGNMENT = 1> 375 class matrix; 376 377 template<typename SCALARTYPE> 378 class implicit_matrix_base; 379 380 template<class SCALARTYPE> 381 class identity_matrix; 382 383 template<class SCALARTYPE> 384 class zero_matrix; 385 386 template<class SCALARTYPE> 387 class scalar_matrix; 388 389 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 390 class compressed_matrix; 391 392 template<class SCALARTYPE> 393 class compressed_compressed_matrix; 394 395 396 template<class SCALARTYPE, unsigned int ALIGNMENT = 128> 397 class coordinate_matrix; 398 399 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 400 class ell_matrix; 401 402 template<typename ScalarT, typename IndexT = unsigned int> 403 class sliced_ell_matrix; 404 405 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 406 class hyb_matrix; 407 408 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 409 class circulant_matrix; 410 411 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 412 class hankel_matrix; 413 414 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 415 class toeplitz_matrix; 416 417 template<class SCALARTYPE, unsigned int ALIGNMENT = 1> 418 class vandermonde_matrix; 419 420 // 421 // Proxies: 422 // 423 template<typename SizeType = vcl_size_t, typename DistanceType = std::ptrdiff_t> 424 class basic_range; 425 426 typedef basic_range<> range; 427 428 template<typename SizeType = vcl_size_t, typename DistanceType = std::ptrdiff_t> 429 class basic_slice; 430 431 typedef basic_slice<> slice; 432 433 template<typename VectorType> 434 class vector_range; 435 436 template<typename VectorType> 437 class vector_slice; 438 439 template<typename MatrixType> 440 class matrix_range; 441 442 template<typename MatrixType> 443 class matrix_slice; 444 445 446 /** @brief Helper struct for checking whether a type is a host scalar type (e.g. float, double) */ 447 template<typename T> 448 struct is_cpu_scalar 449 { 450 enum { value = false }; 451 }; 452 453 /** @brief Helper struct for checking whether a type is a viennacl::scalar<> */ 454 template<typename T> 455 struct is_scalar 456 { 457 enum { value = false }; 458 }; 459 460 /** @brief Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> */ 461 template<typename T> 462 struct is_flip_sign_scalar 463 { 464 enum { value = false }; 465 }; 466 467 /** @brief Helper struct for checking whether the provided type represents a scalar (either host, from ViennaCL, or a flip-sign proxy) */ 468 template<typename T> 469 struct is_any_scalar 470 { 471 enum { value = (is_scalar<T>::value || is_cpu_scalar<T>::value || is_flip_sign_scalar<T>::value )}; 472 }; 473 474 /** @brief Checks for a type being either vector_base or implicit_vector_base */ 475 template<typename T> 476 struct is_any_vector { enum { value = 0 }; }; 477 478 /** @brief Checks for either matrix_base or implicit_matrix_base */ 479 template<typename T> 480 struct is_any_dense_matrix { enum { value = 0 }; }; 481 482 /** @brief Helper class for checking whether a matrix has a row-major layout. */ 483 template<typename T> 484 struct is_row_major 485 { 486 enum { value = false }; 487 }; 488 489 /** @brief Helper class for checking whether a matrix is a compressed_matrix (CSR format) */ 490 template<typename T> 491 struct is_compressed_matrix 492 { 493 enum { value = false }; 494 }; 495 496 /** @brief Helper class for checking whether a matrix is a coordinate_matrix (COO format) */ 497 template<typename T> 498 struct is_coordinate_matrix 499 { 500 enum { value = false }; 501 }; 502 503 /** @brief Helper class for checking whether a matrix is an ell_matrix (ELL format) */ 504 template<typename T> 505 struct is_ell_matrix 506 { 507 enum { value = false }; 508 }; 509 510 /** @brief Helper class for checking whether a matrix is a sliced_ell_matrix (SELL-C-\f$ \sigma \f$ format) */ 511 template<typename T> 512 struct is_sliced_ell_matrix 513 { 514 enum { value = false }; 515 }; 516 517 518 /** @brief Helper class for checking whether a matrix is a hyb_matrix (hybrid format: ELL plus CSR) */ 519 template<typename T> 520 struct is_hyb_matrix 521 { 522 enum { value = false }; 523 }; 524 525 /** @brief Helper class for checking whether the provided type is one of the sparse matrix types (compressed_matrix, coordinate_matrix, etc.) */ 526 template<typename T> 527 struct is_any_sparse_matrix 528 { 529 enum { value = false }; 530 }; 531 532 533 /** @brief Helper class for checking whether a matrix is a circulant matrix */ 534 template<typename T> 535 struct is_circulant_matrix 536 { 537 enum { value = false }; 538 }; 539 540 /** @brief Helper class for checking whether a matrix is a Hankel matrix */ 541 template<typename T> 542 struct is_hankel_matrix 543 { 544 enum { value = false }; 545 }; 546 547 /** @brief Helper class for checking whether a matrix is a Toeplitz matrix */ 548 template<typename T> 549 struct is_toeplitz_matrix 550 { 551 enum { value = false }; 552 }; 553 554 /** @brief Helper class for checking whether a matrix is a Vandermonde matrix */ 555 template<typename T> 556 struct is_vandermonde_matrix 557 { 558 enum { value = false }; 559 }; 560 561 /** @brief Helper class for checking whether the provided type is any of the dense structured matrix types (circulant, Hankel, etc.) */ 562 template<typename T> 563 struct is_any_dense_structured_matrix 564 { 565 enum { value = viennacl::is_circulant_matrix<T>::value || viennacl::is_hankel_matrix<T>::value || viennacl::is_toeplitz_matrix<T>::value || viennacl::is_vandermonde_matrix<T>::value }; 566 }; 567 568 569 570 571 /** @brief Exception class in case of memory errors */ 572 class memory_exception : public std::exception 573 { 574 public: memory_exception()575 memory_exception() : message_() {} memory_exception(std::string message)576 memory_exception(std::string message) : message_("ViennaCL: Internal memory error: " + message) {} 577 what()578 virtual const char* what() const throw() { return message_.c_str(); } 579 ~memory_exception()580 virtual ~memory_exception() throw() {} 581 private: 582 std::string message_; 583 }; 584 585 class cuda_not_available_exception : public std::exception 586 { 587 public: cuda_not_available_exception()588 cuda_not_available_exception() : message_("ViennaCL was compiled without CUDA support, but CUDA functionality required for this operation.") {} 589 what()590 virtual const char* what() const throw() { return message_.c_str(); } 591 ~cuda_not_available_exception()592 virtual ~cuda_not_available_exception() throw() {} 593 private: 594 std::string message_; 595 }; 596 597 class zero_on_diagonal_exception : public std::runtime_error 598 { 599 public: zero_on_diagonal_exception(std::string const & what_arg)600 zero_on_diagonal_exception(std::string const & what_arg) : std::runtime_error(what_arg) {} 601 }; 602 603 class unknown_norm_exception : public std::runtime_error 604 { 605 public: unknown_norm_exception(std::string const & what_arg)606 unknown_norm_exception(std::string const & what_arg) : std::runtime_error(what_arg) {} 607 }; 608 609 610 611 namespace tools 612 { 613 //helper for matrix row/col iterators 614 //must be specialized for every viennacl matrix type 615 /** @brief Helper class for incrementing an iterator in a dense matrix. */ 616 template<typename ROWCOL, typename MATRIXTYPE> 617 struct MATRIX_ITERATOR_INCREMENTER 618 { 619 typedef typename MATRIXTYPE::ERROR_SPECIALIZATION_FOR_THIS_MATRIX_TYPE_MISSING ErrorIndicator; 620 applyMATRIX_ITERATOR_INCREMENTER621 static void apply(const MATRIXTYPE & /*mat*/, unsigned int & /*row*/, unsigned int & /*col*/) {} 622 }; 623 } 624 625 namespace linalg 626 { 627 #if !defined(_MSC_VER) || defined(__CUDACC__) 628 629 template<class SCALARTYPE, unsigned int ALIGNMENT> 630 void convolve_i(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1, 631 viennacl::vector<SCALARTYPE, ALIGNMENT>& input2, 632 viennacl::vector<SCALARTYPE, ALIGNMENT>& output); 633 634 template<typename T> 635 viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_prod> > 636 element_prod(vector_base<T> const & v1, vector_base<T> const & v2); 637 638 template<typename T> 639 viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_div> > 640 element_div(vector_base<T> const & v1, vector_base<T> const & v2); 641 642 643 644 template<typename T> 645 void inner_prod_impl(vector_base<T> const & vec1, 646 vector_base<T> const & vec2, 647 scalar<T> & result); 648 649 template<typename LHS, typename RHS, typename OP, typename T> 650 void inner_prod_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec1, 651 vector_base<T> const & vec2, 652 scalar<T> & result); 653 654 template<typename T, typename LHS, typename RHS, typename OP> 655 void inner_prod_impl(vector_base<T> const & vec1, 656 viennacl::vector_expression<LHS, RHS, OP> const & vec2, 657 scalar<T> & result); 658 659 template<typename LHS1, typename RHS1, typename OP1, 660 typename LHS2, typename RHS2, typename OP2, typename T> 661 void inner_prod_impl(viennacl::vector_expression<LHS1, RHS1, OP1> const & vec1, 662 viennacl::vector_expression<LHS2, RHS2, OP2> const & vec2, 663 scalar<T> & result); 664 665 /////////////////////////// 666 667 template<typename T> 668 void inner_prod_cpu(vector_base<T> const & vec1, 669 vector_base<T> const & vec2, 670 T & result); 671 672 template<typename LHS, typename RHS, typename OP, typename T> 673 void inner_prod_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec1, 674 vector_base<T> const & vec2, 675 T & result); 676 677 template<typename T, typename LHS, typename RHS, typename OP> 678 void inner_prod_cpu(vector_base<T> const & vec1, 679 viennacl::vector_expression<LHS, RHS, OP> const & vec2, 680 T & result); 681 682 template<typename LHS1, typename RHS1, typename OP1, 683 typename LHS2, typename RHS2, typename OP2, typename S3> 684 void inner_prod_cpu(viennacl::vector_expression<LHS1, RHS1, OP1> const & vec1, 685 viennacl::vector_expression<LHS2, RHS2, OP2> const & vec2, 686 S3 & result); 687 688 689 690 //forward definition of norm_1_impl function 691 template<typename T> 692 void norm_1_impl(vector_base<T> const & vec, scalar<T> & result); 693 694 template<typename LHS, typename RHS, typename OP, typename T> 695 void norm_1_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, 696 scalar<T> & result); 697 698 699 template<typename T> 700 void norm_1_cpu(vector_base<T> const & vec, 701 T & result); 702 703 template<typename LHS, typename RHS, typename OP, typename S2> 704 void norm_1_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, 705 S2 & result); 706 707 //forward definition of norm_2_impl function 708 template<typename T> 709 void norm_2_impl(vector_base<T> const & vec, scalar<T> & result); 710 711 template<typename LHS, typename RHS, typename OP, typename T> 712 void norm_2_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, 713 scalar<T> & result); 714 715 template<typename T> 716 void norm_2_cpu(vector_base<T> const & vec, T & result); 717 718 template<typename LHS, typename RHS, typename OP, typename S2> 719 void norm_2_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, 720 S2 & result); 721 722 723 //forward definition of norm_inf_impl function 724 template<typename T> 725 void norm_inf_impl(vector_base<T> const & vec, scalar<T> & result); 726 727 template<typename LHS, typename RHS, typename OP, typename T> 728 void norm_inf_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, 729 scalar<T> & result); 730 731 732 template<typename T> 733 void norm_inf_cpu(vector_base<T> const & vec, T & result); 734 735 template<typename LHS, typename RHS, typename OP, typename S2> 736 void norm_inf_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, 737 S2 & result); 738 739 //forward definition of max()-related functions 740 template<typename T> 741 void max_impl(vector_base<T> const & vec, scalar<T> & result); 742 743 template<typename LHS, typename RHS, typename OP, typename T> 744 void max_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, 745 scalar<T> & result); 746 747 748 template<typename T> 749 void max_cpu(vector_base<T> const & vec, T & result); 750 751 template<typename LHS, typename RHS, typename OP, typename S2> 752 void max_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, 753 S2 & result); 754 755 //forward definition of min()-related functions 756 template<typename T> 757 void min_impl(vector_base<T> const & vec, scalar<T> & result); 758 759 template<typename LHS, typename RHS, typename OP, typename T> 760 void min_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, 761 scalar<T> & result); 762 763 764 template<typename T> 765 void min_cpu(vector_base<T> const & vec, T & result); 766 767 template<typename LHS, typename RHS, typename OP, typename S2> 768 void min_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, 769 S2 & result); 770 771 //forward definition of sum()-related functions 772 template<typename T> 773 void sum_impl(vector_base<T> const & vec, scalar<T> & result); 774 775 template<typename LHS, typename RHS, typename OP, typename T> 776 void sum_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, 777 scalar<T> & result); 778 779 780 template<typename T> 781 void sum_cpu(vector_base<T> const & vec, T & result); 782 783 template<typename LHS, typename RHS, typename OP, typename S2> 784 void sum_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, 785 S2 & result); 786 787 788 // forward definition of frobenius norm: 789 template<typename T> 790 void norm_frobenius_impl(matrix_base<T> const & vec, scalar<T> & result); 791 792 template<typename T> 793 void norm_frobenius_cpu(matrix_base<T> const & vec, T & result); 794 795 796 template<typename T> 797 vcl_size_t index_norm_inf(vector_base<T> const & vec); 798 799 template<typename LHS, typename RHS, typename OP> 800 vcl_size_t index_norm_inf(viennacl::vector_expression<LHS, RHS, OP> const & vec); 801 802 //forward definition of prod_impl functions 803 804 template<typename NumericT> 805 void prod_impl(const matrix_base<NumericT> & mat, 806 const vector_base<NumericT> & vec, 807 vector_base<NumericT> & result); 808 809 template<typename NumericT> 810 void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans, 811 const vector_base<NumericT> & vec, 812 vector_base<NumericT> & result); 813 814 template<typename SparseMatrixType, class SCALARTYPE, unsigned int ALIGNMENT> 815 typename viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value, 816 vector_expression<const SparseMatrixType, 817 const vector<SCALARTYPE, ALIGNMENT>, 818 op_prod > 819 >::type 820 prod_impl(const SparseMatrixType & mat, 821 const vector<SCALARTYPE, ALIGNMENT> & vec); 822 823 // forward definition of summation routines for matrices: 824 825 template<typename NumericT> 826 void row_sum_impl(const matrix_base<NumericT> & A, 827 vector_base<NumericT> & result); 828 829 template<typename NumericT> 830 void column_sum_impl(const matrix_base<NumericT> & A, 831 vector_base<NumericT> & result); 832 833 #endif 834 835 namespace detail 836 { 837 enum row_info_types 838 { 839 SPARSE_ROW_NORM_INF = 0, 840 SPARSE_ROW_NORM_1, 841 SPARSE_ROW_NORM_2, 842 SPARSE_ROW_DIAGONAL 843 }; 844 845 } 846 847 848 /** @brief A tag class representing a lower triangular matrix */ 849 struct lower_tag 850 { namelower_tag851 static const char * name() { return "lower"; } 852 }; //lower triangular matrix 853 /** @brief A tag class representing an upper triangular matrix */ 854 struct upper_tag 855 { nameupper_tag856 static const char * name() { return "upper"; } 857 }; //upper triangular matrix 858 /** @brief A tag class representing a lower triangular matrix with unit diagonal*/ 859 struct unit_lower_tag 860 { nameunit_lower_tag861 static const char * name() { return "unit_lower"; } 862 }; //unit lower triangular matrix 863 /** @brief A tag class representing an upper triangular matrix with unit diagonal*/ 864 struct unit_upper_tag 865 { nameunit_upper_tag866 static const char * name() { return "unit_upper"; } 867 }; //unit upper triangular matrix 868 869 //preconditioner tags 870 class ilut_tag; 871 872 /** @brief A tag class representing the use of no preconditioner */ 873 class no_precond 874 { 875 public: 876 template<typename VectorType> apply(VectorType &)877 void apply(VectorType &) const {} 878 }; 879 880 881 } //namespace linalg 882 883 // 884 // More namespace comments to follow: 885 // 886 887 /** @brief Namespace providing routines for handling the different memory domains. */ 888 namespace backend 889 { 890 /** @brief Provides implementations for handling memory buffers in CPU RAM. */ 891 namespace cpu_ram 892 { 893 /** @brief Holds implementation details for handling memory buffers in CPU RAM. Not intended for direct use by library users. */ 894 namespace detail {} 895 } 896 897 /** @brief Provides implementations for handling CUDA memory buffers. */ 898 namespace cuda 899 { 900 /** @brief Holds implementation details for handling CUDA memory buffers. Not intended for direct use by library users. */ 901 namespace detail {} 902 } 903 904 /** @brief Implementation details for the generic memory backend interface. */ 905 namespace detail {} 906 907 /** @brief Provides implementations for handling OpenCL memory buffers. */ 908 namespace opencl 909 { 910 /** @brief Holds implementation details for handling OpenCL memory buffers. Not intended for direct use by library users. */ 911 namespace detail {} 912 } 913 } 914 915 916 /** @brief Holds implementation details for functionality in the main viennacl-namespace. Not intended for direct use by library users. */ 917 namespace detail 918 { 919 /** @brief Helper namespace for fast Fourier transforms. Not to be used directly by library users. */ 920 namespace fft 921 { 922 /** @brief Helper namespace for fast-Fourier transformation. Deprecated. */ 923 namespace FFT_DATA_ORDER {} 924 } 925 } 926 927 928 /** @brief Provides an OpenCL kernel generator. */ 929 namespace device_specific 930 { 931 /** @brief Provides the implementation for tuning the kernels for a particular device. */ 932 namespace autotune {} 933 934 /** @brief Contains implementation details of the kernel generator. */ 935 namespace detail {} 936 937 /** @brief Namespace holding the various device-specific parameters for generating the best kernels. */ 938 namespace profiles {} 939 940 /** @brief Contains various helper routines for kernel generation. */ 941 namespace utils {} 942 } 943 944 /** @brief Provides basic input-output functionality. */ 945 namespace io 946 { 947 /** @brief Implementation details for IO functionality. Usually not of interest for a library user. */ 948 namespace detail {} 949 950 /** @brief Namespace holding the various XML tag definitions for the kernel parameter tuning facility. */ 951 namespace tag {} 952 953 /** @brief Namespace holding the various XML strings for the kernel parameter tuning facility. */ 954 namespace val {} 955 } 956 957 /** @brief Provides all linear algebra operations which are not covered by operator overloads. */ 958 namespace linalg 959 { 960 /** @brief Holds all CUDA compute kernels used by ViennaCL. */ 961 namespace cuda 962 { 963 /** @brief Helper functions for the CUDA linear algebra backend. */ 964 namespace detail {} 965 } 966 967 /** @brief Namespace holding implementation details for linear algebra routines. Usually not of interest for a library user. */ 968 namespace detail 969 { 970 /** @brief Implementation namespace for algebraic multigrid preconditioner. */ 971 namespace amg {} 972 973 /** @brief Implementation namespace for sparse approximate inverse preconditioner. */ 974 namespace spai {} 975 } 976 977 /** @brief Holds all compute kernels with conventional host-based execution (buffers in CPU RAM). */ 978 namespace host_based 979 { 980 /** @brief Helper functions for the host-based linear algebra backend. */ 981 namespace detail {} 982 } 983 984 /** @brief Namespace containing the OpenCL kernels. Deprecated, will be moved to viennacl::linalg::opencl in future releases. */ 985 namespace kernels {} 986 987 /** @brief Holds all routines providing OpenCL linear algebra operations. */ 988 namespace opencl 989 { 990 /** @brief Helper functions for OpenCL-accelerated linear algebra operations. */ 991 namespace detail {} 992 993 /** @brief Contains the OpenCL kernel generation functions for a predefined set of functionality. */ 994 namespace kernels 995 { 996 /** @brief Implementation details for the predefined OpenCL kernels. */ 997 namespace detail {} 998 } 999 } 1000 } 1001 1002 /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */ 1003 namespace ocl {} 1004 1005 /** @brief Namespace containing many meta-functions. */ 1006 namespace result_of {} 1007 1008 /** @brief Namespace for various tools used within ViennaCL. */ 1009 namespace tools 1010 { 1011 /** @brief Contains implementation details for the tools. Usually not of interest for the library user. */ 1012 namespace detail {} 1013 } 1014 1015 /** @brief Namespace providing traits-information as well as generic wrappers to common routines for vectors and matrices such as size() or clear() */ 1016 namespace traits {} 1017 1018 /** @brief Contains the scheduling functionality which allows for dynamic kernel generation as well as the fusion of multiple statements into a single kernel. */ 1019 namespace scheduler 1020 { 1021 /** @brief Implementation details for the scheduler */ 1022 namespace detail {} 1023 1024 /** @brief Helper metafunctions used for the scheduler */ 1025 namespace result_of {} 1026 } 1027 1028 } //namespace viennacl 1029 1030 #endif 1031 1032 /*@}*/ 1033