1 #ifndef VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_ 2 #define VIENNACL_LINALG_MATRIX_OPERATIONS_HPP_ 3 4 /* ========================================================================= 5 Copyright (c) 2010-2016, Institute for Microelectronics, 6 Institute for Analysis and Scientific Computing, 7 TU Wien. 8 Portions of this software are copyright by UChicago Argonne, LLC. 9 10 ----------------- 11 ViennaCL - The Vienna Computing Library 12 ----------------- 13 14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 15 16 (A list of authors and contributors can be found in the manual) 17 18 License: MIT (X11), see file LICENSE in the base directory 19 ============================================================================= */ 20 21 /** @file viennacl/linalg/matrix_operations.hpp 22 @brief Implementations of dense matrix related operations including matrix-vector products. 23 */ 24 25 #include "viennacl/forwards.h" 26 #include "viennacl/scalar.hpp" 27 #include "viennacl/vector.hpp" 28 #include "viennacl/vector_proxy.hpp" 29 #include "viennacl/tools/tools.hpp" 30 #include "viennacl/meta/enable_if.hpp" 31 #include "viennacl/meta/predicate.hpp" 32 #include "viennacl/meta/result_of.hpp" 33 #include "viennacl/traits/size.hpp" 34 #include "viennacl/traits/start.hpp" 35 #include "viennacl/traits/handle.hpp" 36 #include "viennacl/traits/stride.hpp" 37 #include "viennacl/vector.hpp" 38 #include "viennacl/linalg/host_based/matrix_operations.hpp" 39 40 #ifdef VIENNACL_WITH_OPENCL 41 #include "viennacl/linalg/opencl/matrix_operations.hpp" 42 #endif 43 44 #ifdef VIENNACL_WITH_CUDA 45 #include "viennacl/linalg/cuda/matrix_operations.hpp" 46 #endif 47 48 namespace viennacl 49 { 50 namespace linalg 51 { 52 53 template<typename DestNumericT, typename SrcNumericT> convert(matrix_base<DestNumericT> & dest,matrix_base<SrcNumericT> const & src)54 void convert(matrix_base<DestNumericT> & dest, matrix_base<SrcNumericT> const & src) 55 { 56 assert(viennacl::traits::size1(dest) == viennacl::traits::size1(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size1(m1) != size1(m2)")); 57 assert(viennacl::traits::size2(dest) == viennacl::traits::size2(src) && bool("Incompatible matrix sizes in m1 = m2 (convert): size2(m1) != size2(m2)")); 58 59 switch (viennacl::traits::handle(dest).get_active_handle_id()) 60 { 61 case viennacl::MAIN_MEMORY: 62 viennacl::linalg::host_based::convert(dest, src); 63 break; 64 #ifdef VIENNACL_WITH_OPENCL 65 case viennacl::OPENCL_MEMORY: 66 viennacl::linalg::opencl::convert(dest, src); 67 break; 68 #endif 69 #ifdef VIENNACL_WITH_CUDA 70 case viennacl::CUDA_MEMORY: 71 viennacl::linalg::cuda::convert(dest, src); 72 break; 73 #endif 74 case viennacl::MEMORY_NOT_INITIALIZED: 75 throw memory_exception("not initialised!"); 76 default: 77 throw memory_exception("not implemented"); 78 } 79 } 80 81 template<typename NumericT, 82 typename SizeT, typename DistanceT> trans(const matrix_expression<const matrix_base<NumericT,SizeT,DistanceT>,const matrix_base<NumericT,SizeT,DistanceT>,op_trans> & proxy,matrix_base<NumericT> & temp_trans)83 void trans(const matrix_expression<const matrix_base<NumericT, SizeT, DistanceT>,const matrix_base<NumericT, SizeT, DistanceT>, op_trans> & proxy, 84 matrix_base<NumericT> & temp_trans) 85 { 86 switch (viennacl::traits::handle(proxy).get_active_handle_id()) 87 { 88 case viennacl::MAIN_MEMORY: 89 viennacl::linalg::host_based::trans(proxy, temp_trans); 90 break; 91 #ifdef VIENNACL_WITH_OPENCL 92 case viennacl::OPENCL_MEMORY: 93 viennacl::linalg::opencl::trans(proxy,temp_trans); 94 break; 95 #endif 96 #ifdef VIENNACL_WITH_CUDA 97 case viennacl::CUDA_MEMORY: 98 viennacl::linalg::cuda::trans(proxy,temp_trans); 99 break; 100 #endif 101 case viennacl::MEMORY_NOT_INITIALIZED: 102 throw memory_exception("not initialised!"); 103 default: 104 throw memory_exception("not implemented"); 105 } 106 } 107 108 109 template<typename NumericT, 110 typename ScalarType1> am(matrix_base<NumericT> & mat1,matrix_base<NumericT> const & mat2,ScalarType1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha)111 void am(matrix_base<NumericT> & mat1, 112 matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) 113 { 114 switch (viennacl::traits::handle(mat1).get_active_handle_id()) 115 { 116 case viennacl::MAIN_MEMORY: 117 viennacl::linalg::host_based::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha); 118 break; 119 #ifdef VIENNACL_WITH_OPENCL 120 case viennacl::OPENCL_MEMORY: 121 viennacl::linalg::opencl::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha); 122 break; 123 #endif 124 #ifdef VIENNACL_WITH_CUDA 125 case viennacl::CUDA_MEMORY: 126 viennacl::linalg::cuda::am(mat1, mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha); 127 break; 128 #endif 129 case viennacl::MEMORY_NOT_INITIALIZED: 130 throw memory_exception("not initialised!"); 131 default: 132 throw memory_exception("not implemented"); 133 } 134 } 135 136 137 template<typename NumericT, 138 typename ScalarType1, typename ScalarType2> ambm(matrix_base<NumericT> & mat1,matrix_base<NumericT> const & mat2,ScalarType1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha,matrix_base<NumericT> const & mat3,ScalarType2 const & beta,vcl_size_t len_beta,bool reciprocal_beta,bool flip_sign_beta)139 void ambm(matrix_base<NumericT> & mat1, 140 matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, 141 matrix_base<NumericT> const & mat3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) 142 { 143 switch (viennacl::traits::handle(mat1).get_active_handle_id()) 144 { 145 case viennacl::MAIN_MEMORY: 146 viennacl::linalg::host_based::ambm(mat1, 147 mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 148 mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); 149 break; 150 #ifdef VIENNACL_WITH_OPENCL 151 case viennacl::OPENCL_MEMORY: 152 viennacl::linalg::opencl::ambm(mat1, 153 mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 154 mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); 155 break; 156 #endif 157 #ifdef VIENNACL_WITH_CUDA 158 case viennacl::CUDA_MEMORY: 159 viennacl::linalg::cuda::ambm(mat1, 160 mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 161 mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); 162 break; 163 #endif 164 case viennacl::MEMORY_NOT_INITIALIZED: 165 throw memory_exception("not initialised!"); 166 default: 167 throw memory_exception("not implemented"); 168 } 169 } 170 171 172 template<typename NumericT, 173 typename ScalarType1, typename ScalarType2> ambm_m(matrix_base<NumericT> & mat1,matrix_base<NumericT> const & mat2,ScalarType1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha,matrix_base<NumericT> const & mat3,ScalarType2 const & beta,vcl_size_t len_beta,bool reciprocal_beta,bool flip_sign_beta)174 void ambm_m(matrix_base<NumericT> & mat1, 175 matrix_base<NumericT> const & mat2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, 176 matrix_base<NumericT> const & mat3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) 177 { 178 switch (viennacl::traits::handle(mat1).get_active_handle_id()) 179 { 180 case viennacl::MAIN_MEMORY: 181 viennacl::linalg::host_based::ambm_m(mat1, 182 mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 183 mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); 184 break; 185 #ifdef VIENNACL_WITH_OPENCL 186 case viennacl::OPENCL_MEMORY: 187 viennacl::linalg::opencl::ambm_m(mat1, 188 mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 189 mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); 190 break; 191 #endif 192 #ifdef VIENNACL_WITH_CUDA 193 case viennacl::CUDA_MEMORY: 194 viennacl::linalg::cuda::ambm_m(mat1, 195 mat2, alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 196 mat3, beta, len_beta, reciprocal_beta, flip_sign_beta); 197 break; 198 #endif 199 case viennacl::MEMORY_NOT_INITIALIZED: 200 throw memory_exception("not initialised!"); 201 default: 202 throw memory_exception("not implemented"); 203 } 204 } 205 206 207 template<typename NumericT> matrix_assign(matrix_base<NumericT> & mat,NumericT s,bool clear=false)208 void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false) 209 { 210 switch (viennacl::traits::handle(mat).get_active_handle_id()) 211 { 212 case viennacl::MAIN_MEMORY: 213 viennacl::linalg::host_based::matrix_assign(mat, s, clear); 214 break; 215 #ifdef VIENNACL_WITH_OPENCL 216 case viennacl::OPENCL_MEMORY: 217 viennacl::linalg::opencl::matrix_assign(mat, s, clear); 218 break; 219 #endif 220 #ifdef VIENNACL_WITH_CUDA 221 case viennacl::CUDA_MEMORY: 222 viennacl::linalg::cuda::matrix_assign(mat, s, clear); 223 break; 224 #endif 225 case viennacl::MEMORY_NOT_INITIALIZED: 226 throw memory_exception("not initialised!"); 227 default: 228 throw memory_exception("not implemented"); 229 } 230 } 231 232 233 template<typename NumericT> matrix_diagonal_assign(matrix_base<NumericT> & mat,NumericT s)234 void matrix_diagonal_assign(matrix_base<NumericT> & mat, NumericT s) 235 { 236 switch (viennacl::traits::handle(mat).get_active_handle_id()) 237 { 238 case viennacl::MAIN_MEMORY: 239 viennacl::linalg::host_based::matrix_diagonal_assign(mat, s); 240 break; 241 #ifdef VIENNACL_WITH_OPENCL 242 case viennacl::OPENCL_MEMORY: 243 viennacl::linalg::opencl::matrix_diagonal_assign(mat, s); 244 break; 245 #endif 246 #ifdef VIENNACL_WITH_CUDA 247 case viennacl::CUDA_MEMORY: 248 viennacl::linalg::cuda::matrix_diagonal_assign(mat, s); 249 break; 250 #endif 251 case viennacl::MEMORY_NOT_INITIALIZED: 252 throw memory_exception("not initialised!"); 253 default: 254 throw memory_exception("not implemented"); 255 } 256 } 257 258 259 /** @brief Dispatcher interface for A = diag(v, k) */ 260 template<typename NumericT> matrix_diag_from_vector(const vector_base<NumericT> & v,int k,matrix_base<NumericT> & A)261 void matrix_diag_from_vector(const vector_base<NumericT> & v, int k, matrix_base<NumericT> & A) 262 { 263 switch (viennacl::traits::handle(v).get_active_handle_id()) 264 { 265 case viennacl::MAIN_MEMORY: 266 viennacl::linalg::host_based::matrix_diag_from_vector(v, k, A); 267 break; 268 #ifdef VIENNACL_WITH_OPENCL 269 case viennacl::OPENCL_MEMORY: 270 viennacl::linalg::opencl::matrix_diag_from_vector(v, k, A); 271 break; 272 #endif 273 #ifdef VIENNACL_WITH_CUDA 274 case viennacl::CUDA_MEMORY: 275 viennacl::linalg::cuda::matrix_diag_from_vector(v, k, A); 276 break; 277 #endif 278 case viennacl::MEMORY_NOT_INITIALIZED: 279 throw memory_exception("not initialised!"); 280 default: 281 throw memory_exception("not implemented"); 282 } 283 } 284 285 /** @brief Dispatcher interface for v = diag(A, k) */ 286 template<typename NumericT> matrix_diag_to_vector(const matrix_base<NumericT> & A,int k,vector_base<NumericT> & v)287 void matrix_diag_to_vector(const matrix_base<NumericT> & A, int k, vector_base<NumericT> & v) 288 { 289 switch (viennacl::traits::handle(A).get_active_handle_id()) 290 { 291 case viennacl::MAIN_MEMORY: 292 viennacl::linalg::host_based::matrix_diag_to_vector(A, k, v); 293 break; 294 #ifdef VIENNACL_WITH_OPENCL 295 case viennacl::OPENCL_MEMORY: 296 viennacl::linalg::opencl::matrix_diag_to_vector(A, k, v); 297 break; 298 #endif 299 #ifdef VIENNACL_WITH_CUDA 300 case viennacl::CUDA_MEMORY: 301 viennacl::linalg::cuda::matrix_diag_to_vector(A, k, v); 302 break; 303 #endif 304 case viennacl::MEMORY_NOT_INITIALIZED: 305 throw memory_exception("not initialised!"); 306 default: 307 throw memory_exception("not implemented"); 308 } 309 } 310 311 template<typename NumericT> matrix_row(const matrix_base<NumericT> & A,unsigned int i,vector_base<NumericT> & v)312 void matrix_row(const matrix_base<NumericT> & A, unsigned int i, vector_base<NumericT> & v) 313 { 314 switch (viennacl::traits::handle(A).get_active_handle_id()) 315 { 316 case viennacl::MAIN_MEMORY: 317 viennacl::linalg::host_based::matrix_row(A, i, v); 318 break; 319 #ifdef VIENNACL_WITH_OPENCL 320 case viennacl::OPENCL_MEMORY: 321 viennacl::linalg::opencl::matrix_row(A, i, v); 322 break; 323 #endif 324 #ifdef VIENNACL_WITH_CUDA 325 case viennacl::CUDA_MEMORY: 326 viennacl::linalg::cuda::matrix_row(A, i, v); 327 break; 328 #endif 329 case viennacl::MEMORY_NOT_INITIALIZED: 330 throw memory_exception("not initialised!"); 331 default: 332 throw memory_exception("not implemented"); 333 } 334 } 335 336 template<typename NumericT> matrix_column(const matrix_base<NumericT> & A,unsigned int j,vector_base<NumericT> & v)337 void matrix_column(const matrix_base<NumericT> & A, unsigned int j, vector_base<NumericT> & v) 338 { 339 switch (viennacl::traits::handle(A).get_active_handle_id()) 340 { 341 case viennacl::MAIN_MEMORY: 342 viennacl::linalg::host_based::matrix_column(A, j, v); 343 break; 344 #ifdef VIENNACL_WITH_OPENCL 345 case viennacl::OPENCL_MEMORY: 346 viennacl::linalg::opencl::matrix_column(A, j, v); 347 break; 348 #endif 349 #ifdef VIENNACL_WITH_CUDA 350 case viennacl::CUDA_MEMORY: 351 viennacl::linalg::cuda::matrix_column(A, j, v); 352 break; 353 #endif 354 case viennacl::MEMORY_NOT_INITIALIZED: 355 throw memory_exception("not initialised!"); 356 default: 357 throw memory_exception("not implemented"); 358 } 359 } 360 361 /** @brief Computes the Frobenius norm of a matrix - dispatcher interface 362 * 363 * @param A The matrix 364 * @param result The result scalar 365 * 366 * Note that if A is strided or off-set, then a copy will be created. 367 */ 368 template<typename T> norm_frobenius_impl(matrix_base<T> const & A,scalar<T> & result)369 void norm_frobenius_impl(matrix_base<T> const & A, 370 scalar<T> & result) 371 { 372 typedef typename matrix_base<T>::handle_type HandleType; 373 374 if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) { 375 if (A.row_major()) { 376 viennacl::matrix<T, viennacl::row_major> temp_A(A); 377 viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); 378 norm_2_impl(temp, result); 379 } else { 380 viennacl::matrix<T, viennacl::column_major> temp_A(A); 381 viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); 382 norm_2_impl(temp, result); 383 } 384 } else { 385 viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1); 386 norm_2_impl(temp, result); 387 } 388 389 } 390 391 /** @brief Computes the Frobenius norm of a vector with final reduction on the CPU 392 * 393 * @param A The matrix 394 * @param result The result scalar 395 * 396 * Note that if A is strided or off-set, then a copy will be created. 397 */ 398 template<typename T> norm_frobenius_cpu(matrix_base<T> const & A,T & result)399 void norm_frobenius_cpu(matrix_base<T> const & A, 400 T & result) 401 { 402 typedef typename matrix_base<T>::handle_type HandleType; 403 404 if ((A.start1() > 0) || (A.start2() > 0) || (A.stride1() > 1) || (A.stride2() > 1)) { 405 if (A.row_major()) { 406 viennacl::matrix<T, viennacl::row_major> temp_A(A); 407 viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); 408 norm_2_cpu(temp, result); 409 } else { 410 viennacl::matrix<T, viennacl::column_major> temp_A(A); 411 viennacl::vector_base<T> temp(const_cast<HandleType &>(temp_A.handle()), temp_A.internal_size(), 0, 1); 412 norm_2_cpu(temp, result); 413 } 414 } else { 415 viennacl::vector_base<T> temp(const_cast<HandleType &>(A.handle()), A.internal_size(), 0, 1); 416 norm_2_cpu(temp, result); 417 } 418 419 } 420 421 // 422 ///////////////////////// matrix-vector products ///////////////////////////////// 423 // 424 425 426 427 // A * x 428 429 /** @brief Carries out matrix-vector multiplication 430 * 431 * Implementation of the convenience expression result = prod(mat, vec); 432 * 433 * @param mat The matrix 434 * @param vec The vector 435 * @param result The result vector 436 */ 437 template<typename NumericT> prod_impl(const matrix_base<NumericT> & mat,const vector_base<NumericT> & vec,vector_base<NumericT> & result)438 void prod_impl(const matrix_base<NumericT> & mat, 439 const vector_base<NumericT> & vec, 440 vector_base<NumericT> & result) 441 { 442 assert( (viennacl::traits::size1(mat) == viennacl::traits::size(result)) && bool("Size check failed at v1 = prod(A, v2): size1(A) != size(v1)")); 443 assert( (viennacl::traits::size2(mat) == viennacl::traits::size(vec)) && bool("Size check failed at v1 = prod(A, v2): size2(A) != size(v2)")); 444 445 switch (viennacl::traits::handle(mat).get_active_handle_id()) 446 { 447 case viennacl::MAIN_MEMORY: 448 viennacl::linalg::host_based::prod_impl(mat, false, vec, result); 449 break; 450 #ifdef VIENNACL_WITH_OPENCL 451 case viennacl::OPENCL_MEMORY: 452 viennacl::linalg::opencl::prod_impl(mat, false, vec, result); 453 break; 454 #endif 455 #ifdef VIENNACL_WITH_CUDA 456 case viennacl::CUDA_MEMORY: 457 viennacl::linalg::cuda::prod_impl(mat, false, vec, result); 458 break; 459 #endif 460 case viennacl::MEMORY_NOT_INITIALIZED: 461 throw memory_exception("not initialised!"); 462 default: 463 throw memory_exception("not implemented"); 464 } 465 } 466 467 468 // trans(A) * x 469 470 /** @brief Carries out matrix-vector multiplication with a transposed matrix 471 * 472 * Implementation of the convenience expression result = trans(mat) * vec; 473 * 474 * @param mat_trans The transposed matrix proxy 475 * @param vec The vector 476 * @param result The result vector 477 */ 478 template<typename NumericT> prod_impl(const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & mat_trans,const vector_base<NumericT> & vec,vector_base<NumericT> & result)479 void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans, 480 const vector_base<NumericT> & vec, 481 vector_base<NumericT> & result) 482 { 483 assert( (viennacl::traits::size1(mat_trans.lhs()) == viennacl::traits::size(vec)) && bool("Size check failed at v1 = trans(A) * v2: size1(A) != size(v2)")); 484 assert( (viennacl::traits::size2(mat_trans.lhs()) == viennacl::traits::size(result)) && bool("Size check failed at v1 = trans(A) * v2: size2(A) != size(v1)")); 485 486 switch (viennacl::traits::handle(mat_trans.lhs()).get_active_handle_id()) 487 { 488 case viennacl::MAIN_MEMORY: 489 viennacl::linalg::host_based::prod_impl(mat_trans.lhs(), true, vec, result); 490 break; 491 #ifdef VIENNACL_WITH_OPENCL 492 case viennacl::OPENCL_MEMORY: 493 viennacl::linalg::opencl::prod_impl(mat_trans.lhs(), true, vec, result); 494 break; 495 #endif 496 #ifdef VIENNACL_WITH_CUDA 497 case viennacl::CUDA_MEMORY: 498 viennacl::linalg::cuda::prod_impl(mat_trans.lhs(), true, vec, result); 499 break; 500 #endif 501 case viennacl::MEMORY_NOT_INITIALIZED: 502 throw memory_exception("not initialised!"); 503 default: 504 throw memory_exception("not implemented"); 505 } 506 } 507 508 509 // 510 ///////////////////////// matrix-matrix products ///////////////////////////////// 511 // 512 513 /** @brief Carries out matrix-matrix multiplication 514 * 515 * Implementation of C = prod(A, B); 516 * 517 */ 518 template<typename NumericT, typename ScalarType > prod_impl(const matrix_base<NumericT> & A,const matrix_base<NumericT> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)519 void prod_impl(const matrix_base<NumericT> & A, 520 const matrix_base<NumericT> & B, 521 matrix_base<NumericT> & C, 522 ScalarType alpha, 523 ScalarType beta) 524 { 525 assert( (viennacl::traits::size1(A) == viennacl::traits::size1(C)) && bool("Size check failed at C = prod(A, B): size1(A) != size1(C)")); 526 assert( (viennacl::traits::size2(A) == viennacl::traits::size1(B)) && bool("Size check failed at C = prod(A, B): size2(A) != size1(B)")); 527 assert( (viennacl::traits::size2(B) == viennacl::traits::size2(C)) && bool("Size check failed at C = prod(A, B): size2(B) != size2(C)")); 528 529 530 switch (viennacl::traits::handle(A).get_active_handle_id()) 531 { 532 case viennacl::MAIN_MEMORY: 533 viennacl::linalg::host_based::prod_impl(A, false, B, false, C, alpha, beta); 534 break; 535 #ifdef VIENNACL_WITH_OPENCL 536 case viennacl::OPENCL_MEMORY: 537 viennacl::linalg::opencl::prod_impl(A, false, B, false, C, alpha, beta); 538 break; 539 #endif 540 #ifdef VIENNACL_WITH_CUDA 541 case viennacl::CUDA_MEMORY: 542 viennacl::linalg::cuda::prod_impl(A, false, B, false, C, alpha, beta); 543 break; 544 #endif 545 case viennacl::MEMORY_NOT_INITIALIZED: 546 throw memory_exception("not initialised!"); 547 default: 548 throw memory_exception("not implemented"); 549 } 550 } 551 552 553 554 /** @brief Carries out matrix-matrix multiplication 555 * 556 * Implementation of C = prod(trans(A), B); 557 * 558 */ 559 template<typename NumericT, typename ScalarType > prod_impl(const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & A,const matrix_base<NumericT> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)560 void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>, 561 const matrix_base<NumericT>, 562 op_trans> & A, 563 const matrix_base<NumericT> & B, 564 matrix_base<NumericT> & C, 565 ScalarType alpha, 566 ScalarType beta) 567 { 568 assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(trans(A), B): size2(A) != size1(C)")); 569 assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size1(B) && bool("Size check failed at C = prod(trans(A), B): size1(A) != size1(B)")); 570 assert(viennacl::traits::size2(B) == viennacl::traits::size2(C) && bool("Size check failed at C = prod(trans(A), B): size2(B) != size2(C)")); 571 572 switch (viennacl::traits::handle(A.lhs()).get_active_handle_id()) 573 { 574 case viennacl::MAIN_MEMORY: 575 viennacl::linalg::host_based::prod_impl(A.lhs(), true, B, false, C, alpha, beta); 576 break; 577 #ifdef VIENNACL_WITH_OPENCL 578 case viennacl::OPENCL_MEMORY: 579 viennacl::linalg::opencl::prod_impl(A.lhs(), true, B, false, C, alpha, beta); 580 break; 581 #endif 582 #ifdef VIENNACL_WITH_CUDA 583 case viennacl::CUDA_MEMORY: 584 viennacl::linalg::cuda::prod_impl(A.lhs(), true, B, false, C, alpha, beta); 585 break; 586 #endif 587 case viennacl::MEMORY_NOT_INITIALIZED: 588 throw memory_exception("not initialised!"); 589 default: 590 throw memory_exception("not implemented"); 591 } 592 } 593 594 595 596 597 /** @brief Carries out matrix-matrix multiplication 598 * 599 * Implementation of C = prod(A, trans(B)); 600 * 601 */ 602 template<typename NumericT, typename ScalarType > prod_impl(const matrix_base<NumericT> & A,const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)603 void prod_impl(const matrix_base<NumericT> & A, 604 const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B, 605 matrix_base<NumericT> & C, 606 ScalarType alpha, 607 ScalarType beta) 608 { 609 assert(viennacl::traits::size1(A) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(A, trans(B)): size1(A) != size1(C)")); 610 assert(viennacl::traits::size2(A) == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(A, trans(B)): size2(A) != size2(B)")); 611 assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C) && bool("Size check failed at C = prod(A, trans(B)): size1(B) != size2(C)")); 612 613 switch (viennacl::traits::handle(A).get_active_handle_id()) 614 { 615 case viennacl::MAIN_MEMORY: 616 viennacl::linalg::host_based::prod_impl(A, false, B.lhs(), true, C, alpha, beta); 617 break; 618 #ifdef VIENNACL_WITH_OPENCL 619 case viennacl::OPENCL_MEMORY: 620 viennacl::linalg::opencl::prod_impl(A, false, B.lhs(), true, C, alpha, beta); 621 break; 622 #endif 623 #ifdef VIENNACL_WITH_CUDA 624 case viennacl::CUDA_MEMORY: 625 viennacl::linalg::cuda::prod_impl(A, false, B.lhs(), true, C, alpha, beta); 626 break; 627 #endif 628 case viennacl::MEMORY_NOT_INITIALIZED: 629 throw memory_exception("not initialised!"); 630 default: 631 throw memory_exception("not implemented"); 632 } 633 } 634 635 636 637 /** @brief Carries out matrix-matrix multiplication 638 * 639 * Implementation of C = prod(trans(A), trans(B)); 640 * 641 */ 642 template<typename NumericT, typename ScalarType > prod_impl(const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & A,const viennacl::matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans> & B,matrix_base<NumericT> & C,ScalarType alpha,ScalarType beta)643 void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & A, 644 const viennacl::matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & B, 645 matrix_base<NumericT> & C, 646 ScalarType alpha, 647 ScalarType beta) 648 { 649 assert(viennacl::traits::size2(A.lhs()) == viennacl::traits::size1(C) && bool("Size check failed at C = prod(trans(A), trans(B)): size2(A) != size1(C)")); 650 assert(viennacl::traits::size1(A.lhs()) == viennacl::traits::size2(B.lhs()) && bool("Size check failed at C = prod(trans(A), trans(B)): size1(A) != size2(B)")); 651 assert(viennacl::traits::size1(B.lhs()) == viennacl::traits::size2(C) && bool("Size check failed at C = prod(trans(A), trans(B)): size1(B) != size2(C)")); 652 653 switch (viennacl::traits::handle(A.lhs()).get_active_handle_id()) 654 { 655 case viennacl::MAIN_MEMORY: 656 viennacl::linalg::host_based::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta); 657 break; 658 #ifdef VIENNACL_WITH_OPENCL 659 case viennacl::OPENCL_MEMORY: 660 viennacl::linalg::opencl::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta); 661 break; 662 #endif 663 #ifdef VIENNACL_WITH_CUDA 664 case viennacl::CUDA_MEMORY: 665 viennacl::linalg::cuda::prod_impl(A.lhs(), true, B.lhs(), true, C, alpha, beta); 666 break; 667 #endif 668 case viennacl::MEMORY_NOT_INITIALIZED: 669 throw memory_exception("not initialised!"); 670 default: 671 throw memory_exception("not implemented"); 672 } 673 } 674 675 676 ///////////////////////// summation operations ///////////// 677 678 template<typename NumericT> row_sum_impl(matrix_base<NumericT> const & A,vector_base<NumericT> & result)679 void row_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result) 680 { 681 viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size2(), NumericT(1), viennacl::traits::context(A)); 682 viennacl::linalg::prod_impl(A, all_ones, result); 683 } 684 685 template<typename NumericT> column_sum_impl(matrix_base<NumericT> const & A,vector_base<NumericT> & result)686 void column_sum_impl(matrix_base<NumericT> const & A, vector_base<NumericT> & result) 687 { 688 viennacl::vector<NumericT> all_ones = viennacl::scalar_vector<NumericT>(A.size1(), NumericT(1), viennacl::traits::context(A)); 689 viennacl::linalg::prod_impl(matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>(A, A), all_ones, result); 690 } 691 692 ///////////////////////// Elementwise operations ///////////// 693 694 695 696 /** @brief Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syntax). Don't use this function directly, use element_prod() and element_div(). 697 * 698 * @param A The result matrix (or -range, or -slice) 699 * @param proxy The proxy object holding B, C, and the operation 700 */ 701 template<typename T, typename OP> element_op(matrix_base<T> & A,matrix_expression<const matrix_base<T>,const matrix_base<T>,OP> const & proxy)702 void element_op(matrix_base<T> & A, 703 matrix_expression<const matrix_base<T>, const matrix_base<T>, OP> const & proxy) 704 { 705 assert( (viennacl::traits::size1(A) == viennacl::traits::size1(proxy)) && bool("Size check failed at A = element_op(B): size1(A) != size1(B)")); 706 assert( (viennacl::traits::size2(A) == viennacl::traits::size2(proxy)) && bool("Size check failed at A = element_op(B): size2(A) != size2(B)")); 707 708 switch (viennacl::traits::handle(A).get_active_handle_id()) 709 { 710 case viennacl::MAIN_MEMORY: 711 viennacl::linalg::host_based::element_op(A, proxy); 712 break; 713 #ifdef VIENNACL_WITH_OPENCL 714 case viennacl::OPENCL_MEMORY: 715 viennacl::linalg::opencl::element_op(A, proxy); 716 break; 717 #endif 718 #ifdef VIENNACL_WITH_CUDA 719 case viennacl::CUDA_MEMORY: 720 viennacl::linalg::cuda::element_op(A, proxy); 721 break; 722 #endif 723 case viennacl::MEMORY_NOT_INITIALIZED: 724 throw memory_exception("not initialised!"); 725 default: 726 throw memory_exception("not implemented"); 727 } 728 } 729 730 731 #define VIENNACL_MAKE_BINARY_OP(OPNAME)\ 732 template<typename T>\ 733 viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >\ 734 element_##OPNAME(matrix_base<T> const & A, matrix_base<T> const & B)\ 735 {\ 736 return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_binary<op_##OPNAME> >(A, B);\ 737 }\ 738 \ 739 template<typename M1, typename M2, typename OP, typename T>\ 740 viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\ 741 const matrix_base<T>,\ 742 op_element_binary<op_##OPNAME> >\ 743 element_##OPNAME(matrix_expression<const M1, const M2, OP> const & proxy, matrix_base<T> const & B)\ 744 {\ 745 return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP>,\ 746 const matrix_base<T>,\ 747 op_element_binary<op_##OPNAME> >(proxy, B);\ 748 }\ 749 \ 750 template<typename T, typename M2, typename M3, typename OP>\ 751 viennacl::matrix_expression<const matrix_base<T>,\ 752 const matrix_expression<const M2, const M3, OP>,\ 753 op_element_binary<op_##OPNAME> >\ 754 element_##OPNAME(matrix_base<T> const & A, matrix_expression<const M2, const M3, OP> const & proxy)\ 755 {\ 756 return viennacl::matrix_expression<const matrix_base<T>,\ 757 const matrix_expression<const M2, const M3, OP>,\ 758 op_element_binary<op_##OPNAME> >(A, proxy);\ 759 }\ 760 \ 761 template<typename M1, typename M2, typename OP1,\ 762 typename M3, typename M4, typename OP2>\ 763 viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\ 764 const matrix_expression<const M3, const M4, OP2>,\ 765 op_element_binary<op_##OPNAME> >\ 766 element_##OPNAME(matrix_expression<const M1, const M2, OP1> const & proxy1,\ 767 matrix_expression<const M3, const M4, OP2> const & proxy2)\ 768 {\ 769 return viennacl::matrix_expression<const matrix_expression<const M1, const M2, OP1>,\ 770 const matrix_expression<const M3, const M4, OP2>,\ 771 op_element_binary<op_##OPNAME> >(proxy1, proxy2);\ 772 } 773 774 VIENNACL_MAKE_BINARY_OP(prod) VIENNACL_MAKE_BINARY_OP(div)775 VIENNACL_MAKE_BINARY_OP(div) 776 VIENNACL_MAKE_BINARY_OP(pow) 777 778 VIENNACL_MAKE_BINARY_OP(eq) 779 VIENNACL_MAKE_BINARY_OP(neq) 780 VIENNACL_MAKE_BINARY_OP(greater) 781 VIENNACL_MAKE_BINARY_OP(less) 782 VIENNACL_MAKE_BINARY_OP(geq) 783 VIENNACL_MAKE_BINARY_OP(leq) 784 785 #undef VIENNACL_GENERATE_BINARY_OP_OVERLOADS 786 787 788 789 #define VIENNACL_MAKE_UNARY_ELEMENT_OP(funcname) \ 790 template<typename T> \ 791 viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> > \ 792 element_##funcname(matrix_base<T> const & A) \ 793 { \ 794 return viennacl::matrix_expression<const matrix_base<T>, const matrix_base<T>, op_element_unary<op_##funcname> >(A, A); \ 795 } \ 796 template<typename LHS, typename RHS, typename OP> \ 797 viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \ 798 const matrix_expression<const LHS, const RHS, OP>, \ 799 op_element_unary<op_##funcname> > \ 800 element_##funcname(matrix_expression<const LHS, const RHS, OP> const & proxy) \ 801 { \ 802 return viennacl::matrix_expression<const matrix_expression<const LHS, const RHS, OP>, \ 803 const matrix_expression<const LHS, const RHS, OP>, \ 804 op_element_unary<op_##funcname> >(proxy, proxy); \ 805 } \ 806 807 VIENNACL_MAKE_UNARY_ELEMENT_OP(abs) 808 VIENNACL_MAKE_UNARY_ELEMENT_OP(acos) 809 VIENNACL_MAKE_UNARY_ELEMENT_OP(asin) 810 VIENNACL_MAKE_UNARY_ELEMENT_OP(atan) 811 VIENNACL_MAKE_UNARY_ELEMENT_OP(ceil) 812 VIENNACL_MAKE_UNARY_ELEMENT_OP(cos) 813 VIENNACL_MAKE_UNARY_ELEMENT_OP(cosh) 814 VIENNACL_MAKE_UNARY_ELEMENT_OP(exp) 815 VIENNACL_MAKE_UNARY_ELEMENT_OP(fabs) 816 VIENNACL_MAKE_UNARY_ELEMENT_OP(floor) 817 VIENNACL_MAKE_UNARY_ELEMENT_OP(log) 818 VIENNACL_MAKE_UNARY_ELEMENT_OP(log10) 819 VIENNACL_MAKE_UNARY_ELEMENT_OP(sin) 820 VIENNACL_MAKE_UNARY_ELEMENT_OP(sinh) 821 VIENNACL_MAKE_UNARY_ELEMENT_OP(sqrt) 822 VIENNACL_MAKE_UNARY_ELEMENT_OP(tan) 823 VIENNACL_MAKE_UNARY_ELEMENT_OP(tanh) 824 825 #undef VIENNACL_MAKE_UNARY_ELEMENT_OP 826 827 828 // 829 ///////////////////////// miscellaneous operations ///////////////////////////////// 830 // 831 832 833 /** @brief Returns a proxy class for the operation mat += vec1 * vec2^T, i.e. a rank 1 update 834 * 835 * @param vec1 The first vector 836 * @param vec2 The second vector 837 */ 838 template<typename NumericT> 839 viennacl::matrix_expression<const vector_base<NumericT>, const vector_base<NumericT>, op_prod> 840 outer_prod(const vector_base<NumericT> & vec1, const vector_base<NumericT> & vec2) 841 { 842 return viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>(vec1, vec2); 843 } 844 845 846 /** @brief The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update 847 * 848 * Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2); 849 * 850 * @param mat1 The matrix to be updated 851 * @param alpha The scaling factor (either a viennacl::scalar<>, float, or double) 852 * @param len_alpha Length of the buffer for an eventual final reduction step (currently always '1') 853 * @param reciprocal_alpha Use 1/alpha instead of alpha 854 * @param flip_sign_alpha Use -alpha instead of alpha 855 * @param vec1 The first vector 856 * @param vec2 The second vector 857 */ 858 template<typename NumericT, typename S1> scaled_rank_1_update(matrix_base<NumericT> & mat1,S1 const & alpha,vcl_size_t len_alpha,bool reciprocal_alpha,bool flip_sign_alpha,const vector_base<NumericT> & vec1,const vector_base<NumericT> & vec2)859 void scaled_rank_1_update(matrix_base<NumericT> & mat1, 860 S1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, 861 const vector_base<NumericT> & vec1, 862 const vector_base<NumericT> & vec2) 863 { 864 switch (viennacl::traits::handle(mat1).get_active_handle_id()) 865 { 866 case viennacl::MAIN_MEMORY: 867 viennacl::linalg::host_based::scaled_rank_1_update(mat1, 868 alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 869 vec1, vec2); 870 break; 871 #ifdef VIENNACL_WITH_OPENCL 872 case viennacl::OPENCL_MEMORY: 873 viennacl::linalg::opencl::scaled_rank_1_update(mat1, 874 alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 875 vec1, vec2); 876 break; 877 #endif 878 #ifdef VIENNACL_WITH_CUDA 879 case viennacl::CUDA_MEMORY: 880 viennacl::linalg::cuda::scaled_rank_1_update(mat1, 881 alpha, len_alpha, reciprocal_alpha, flip_sign_alpha, 882 vec1, vec2); 883 break; 884 #endif 885 case viennacl::MEMORY_NOT_INITIALIZED: 886 throw memory_exception("not initialised!"); 887 default: 888 throw memory_exception("not implemented"); 889 } 890 } 891 892 /** @brief This function stores the diagonal and the superdiagonal of a matrix in two vectors. 893 * 894 * 895 * @param A The matrix from which the vectors will be extracted of. 896 * @param dh The vector in which the diagonal of the matrix will be stored in. 897 * @param sh The vector in which the superdiagonal of the matrix will be stored in. 898 */ 899 900 template <typename NumericT, typename VectorType> bidiag_pack(matrix_base<NumericT> & A,VectorType & dh,VectorType & sh)901 void bidiag_pack(matrix_base<NumericT> & A, 902 VectorType & dh, 903 VectorType & sh 904 ) 905 { 906 switch (viennacl::traits::handle(A).get_active_handle_id()) 907 { 908 case viennacl::MAIN_MEMORY: 909 viennacl::linalg::host_based::bidiag_pack(A, dh, sh); 910 break; 911 #ifdef VIENNACL_WITH_OPENCL 912 case viennacl::OPENCL_MEMORY: 913 viennacl::linalg::opencl::bidiag_pack(A, dh, sh); 914 break; 915 #endif 916 917 #ifdef VIENNACL_WITH_CUDA 918 case viennacl::CUDA_MEMORY: 919 viennacl::linalg::cuda::bidiag_pack(A, dh, sh); 920 break; 921 #endif 922 923 case viennacl::MEMORY_NOT_INITIALIZED: 924 throw memory_exception("not initialised!"); 925 default: 926 throw memory_exception("not implemented"); 927 } 928 929 930 } 931 /** @brief This function copies a row or a column from a matrix to a vector. 932 * 933 * 934 * @param A The matrix where to copy from. 935 * @param V The vector to fill with data. 936 * @param row_start The number of the first row to copy. 937 * @param col_start The number of the first column to copy. 938 * @param copy_col Set to TRUE to copy a column, FALSE to copy a row. 939 */ 940 941 template <typename SCALARTYPE> copy_vec(matrix_base<SCALARTYPE> & A,vector_base<SCALARTYPE> & V,vcl_size_t row_start,vcl_size_t col_start,bool copy_col)942 void copy_vec(matrix_base<SCALARTYPE>& A, 943 vector_base<SCALARTYPE>& V, 944 vcl_size_t row_start, 945 vcl_size_t col_start, 946 bool copy_col 947 ) 948 { 949 switch (viennacl::traits::handle(A).get_active_handle_id()) 950 { 951 case viennacl::MAIN_MEMORY: 952 viennacl::linalg::host_based::copy_vec(A, V, row_start, col_start, copy_col); 953 break; 954 #ifdef VIENNACL_WITH_OPENCL 955 case viennacl::OPENCL_MEMORY: 956 viennacl::linalg::opencl::copy_vec(A, V, row_start, col_start, copy_col); 957 break; 958 #endif 959 960 #ifdef VIENNACL_WITH_CUDA 961 case viennacl::CUDA_MEMORY: 962 viennacl::linalg::cuda::copy_vec(A, V, row_start, col_start, copy_col); 963 break; 964 #endif 965 966 case viennacl::MEMORY_NOT_INITIALIZED: 967 throw memory_exception("not initialised!"); 968 default: 969 throw memory_exception("not implemented"); 970 } 971 972 } 973 974 /** @brief This function applies a householder transformation to a matrix. A <- P * A with a householder reflection P 975 * 976 * @param A The matrix to be updated. 977 * @param D The normalized householder vector. 978 * @param start The repetition counter. 979 */ 980 template <typename NumericT> house_update_A_left(matrix_base<NumericT> & A,vector_base<NumericT> & D,vcl_size_t start)981 void house_update_A_left(matrix_base<NumericT> & A, 982 vector_base<NumericT> & D, 983 vcl_size_t start) 984 { 985 switch (viennacl::traits::handle(A).get_active_handle_id()) 986 { 987 case viennacl::MAIN_MEMORY: 988 viennacl::linalg::host_based::house_update_A_left(A, D, start); 989 break; 990 #ifdef VIENNACL_WITH_OPENCL 991 case viennacl::OPENCL_MEMORY: 992 viennacl::linalg::opencl::house_update_A_left(A, D, start); 993 break; 994 #endif 995 996 #ifdef VIENNACL_WITH_CUDA 997 case viennacl::CUDA_MEMORY: 998 viennacl::linalg::cuda::house_update_A_left(A, D, start); 999 break; 1000 #endif 1001 1002 case viennacl::MEMORY_NOT_INITIALIZED: 1003 throw memory_exception("not initialised!"); 1004 default: 1005 throw memory_exception("not implemented"); 1006 } 1007 } 1008 1009 1010 /** @brief This function applies a householder transformation to a matrix: A <- A * P with a householder reflection P 1011 * 1012 * 1013 * @param A The matrix to be updated. 1014 * @param D The normalized householder vector. 1015 */ 1016 1017 template <typename NumericT> house_update_A_right(matrix_base<NumericT> & A,vector_base<NumericT> & D)1018 void house_update_A_right(matrix_base<NumericT>& A, 1019 vector_base<NumericT> & D) 1020 { 1021 switch (viennacl::traits::handle(A).get_active_handle_id()) 1022 { 1023 case viennacl::MAIN_MEMORY: 1024 viennacl::linalg::host_based::house_update_A_right(A, D); 1025 break; 1026 #ifdef VIENNACL_WITH_OPENCL 1027 case viennacl::OPENCL_MEMORY: 1028 viennacl::linalg::opencl::house_update_A_right(A, D); 1029 break; 1030 #endif 1031 1032 #ifdef VIENNACL_WITH_CUDA 1033 case viennacl::CUDA_MEMORY: 1034 viennacl::linalg::cuda::house_update_A_right(A, D); 1035 break; 1036 #endif 1037 1038 case viennacl::MEMORY_NOT_INITIALIZED: 1039 throw memory_exception("not initialised!"); 1040 default: 1041 throw memory_exception("not implemented"); 1042 } 1043 } 1044 1045 /** @brief This function updates the matrix Q, which is needed for the computation of the eigenvectors. 1046 * 1047 * @param Q The matrix to be updated. 1048 * @param D The householder vector. 1049 * @param A_size1 size1 of matrix A 1050 */ 1051 1052 template <typename NumericT> house_update_QL(matrix_base<NumericT> & Q,vector_base<NumericT> & D,vcl_size_t A_size1)1053 void house_update_QL(matrix_base<NumericT> & Q, 1054 vector_base<NumericT> & D, 1055 vcl_size_t A_size1) 1056 { 1057 switch (viennacl::traits::handle(Q).get_active_handle_id()) 1058 { 1059 case viennacl::MAIN_MEMORY: 1060 viennacl::linalg::host_based::house_update_QL(Q, D, A_size1); 1061 break; 1062 #ifdef VIENNACL_WITH_OPENCL 1063 case viennacl::OPENCL_MEMORY: 1064 viennacl::linalg::opencl::house_update_QL(Q, D, A_size1); 1065 break; 1066 #endif 1067 1068 #ifdef VIENNACL_WITH_CUDA 1069 case viennacl::CUDA_MEMORY: 1070 viennacl::linalg::cuda::house_update_QL(Q, D, A_size1); 1071 break; 1072 #endif 1073 1074 case viennacl::MEMORY_NOT_INITIALIZED: 1075 throw memory_exception("not initialised!"); 1076 default: 1077 throw memory_exception("not implemented"); 1078 } 1079 } 1080 1081 1082 /** @brief This function updates the matrix Q. It is part of the tql2 algorithm. 1083 * 1084 * 1085 * @param Q The matrix to be updated. 1086 * @param tmp1 Vector with data from the tql2 algorithm. 1087 * @param tmp2 Vector with data from the tql2 algorithm. 1088 * @param l Data from the tql2 algorithm. 1089 * @param m Data from the tql2 algorithm. 1090 */ 1091 template<typename NumericT> givens_next(matrix_base<NumericT> & Q,vector_base<NumericT> & tmp1,vector_base<NumericT> & tmp2,int l,int m)1092 void givens_next(matrix_base<NumericT> & Q, 1093 vector_base<NumericT> & tmp1, 1094 vector_base<NumericT> & tmp2, 1095 int l, 1096 int m 1097 ) 1098 { 1099 switch (viennacl::traits::handle(Q).get_active_handle_id()) 1100 { 1101 case viennacl::MAIN_MEMORY: 1102 viennacl::linalg::host_based::givens_next(Q, tmp1, tmp2, l, m); 1103 break; 1104 #ifdef VIENNACL_WITH_OPENCL 1105 case viennacl::OPENCL_MEMORY: 1106 viennacl::linalg::opencl::givens_next(Q, tmp1, tmp2, l, m); 1107 break; 1108 #endif 1109 1110 #ifdef VIENNACL_WITH_CUDA 1111 case viennacl::CUDA_MEMORY: 1112 viennacl::linalg::cuda::givens_next(Q, tmp1, tmp2, l, m); 1113 break; 1114 #endif 1115 1116 case viennacl::MEMORY_NOT_INITIALIZED: 1117 throw memory_exception("not initialised!"); 1118 default: 1119 throw memory_exception("not implemented"); 1120 } 1121 } 1122 1123 } //namespace linalg 1124 1125 1126 1127 1128 // 1129 ///////////////////////// Operator overloads ///////////////////////////////// 1130 // 1131 1132 1133 //v += A * x 1134 /** @brief Implementation of the operation v1 += A * v2, where A is a matrix 1135 * 1136 * @param v1 The result vector v1 where A * v2 is added to 1137 * @param proxy An expression template proxy class. 1138 */ 1139 template<typename NumericT> 1140 vector<NumericT> operator +=(vector_base<NumericT> & v1,const viennacl::vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,viennacl::op_prod> & proxy)1141 operator+=(vector_base<NumericT> & v1, 1142 const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy) 1143 { 1144 assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 += A * v2: size1(A) != size(v1)")); 1145 1146 vector<NumericT> result(viennacl::traits::size1(proxy.lhs())); 1147 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1148 v1 += result; 1149 return v1; 1150 } 1151 1152 /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix 1153 * 1154 * @param v1 The result vector v1 where A * v2 is subtracted from 1155 * @param proxy An expression template proxy class. 1156 */ 1157 template<typename NumericT> 1158 vector<NumericT> operator -=(vector_base<NumericT> & v1,const viennacl::vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,viennacl::op_prod> & proxy)1159 operator-=(vector_base<NumericT> & v1, 1160 const viennacl::vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, viennacl::op_prod> & proxy) 1161 { 1162 assert(viennacl::traits::size1(proxy.lhs()) == v1.size() && bool("Size check failed for v1 -= A * v2: size1(A) != size(v1)")); 1163 1164 vector<NumericT> result(viennacl::traits::size1(proxy.lhs())); 1165 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1166 v1 -= result; 1167 return v1; 1168 } 1169 1170 1171 1172 1173 1174 //free functions: 1175 /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix 1176 * 1177 * @param v1 The addend vector. 1178 * @param proxy An expression template proxy class. 1179 */ 1180 template<typename NumericT> 1181 viennacl::vector<NumericT> operator +(const vector_base<NumericT> & v1,const vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,op_prod> & proxy)1182 operator+(const vector_base<NumericT> & v1, 1183 const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy) 1184 { 1185 assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 + A * v2: size1(A) != size(v1)")); 1186 1187 vector<NumericT> result(viennacl::traits::size(v1)); 1188 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1189 result += v1; 1190 return result; 1191 } 1192 1193 /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix 1194 * 1195 * @param v1 The addend vector. 1196 * @param proxy An expression template proxy class. 1197 */ 1198 template<typename NumericT> 1199 viennacl::vector<NumericT> operator -(const vector_base<NumericT> & v1,const vector_expression<const matrix_base<NumericT>,const vector_base<NumericT>,op_prod> & proxy)1200 operator-(const vector_base<NumericT> & v1, 1201 const vector_expression< const matrix_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy) 1202 { 1203 assert(viennacl::traits::size1(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed for v1 - A * v2: size1(A) != size(v1)")); 1204 1205 vector<NumericT> result(viennacl::traits::size(v1)); 1206 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1207 result = v1 - result; 1208 return result; 1209 } 1210 1211 1212 ////////// transposed_matrix_proxy 1213 1214 1215 //v += A^T * x 1216 /** @brief Implementation of the operation v1 += A * v2, where A is a matrix 1217 * 1218 * @param v1 The addend vector where the result is written to. 1219 * @param proxy An expression template proxy class. 1220 */ 1221 template<typename NumericT> 1222 vector<NumericT> operator +=(vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1223 operator+=(vector_base<NumericT> & v1, 1224 const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, 1225 const vector_base<NumericT>, 1226 op_prod> & proxy) 1227 { 1228 assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)")); 1229 1230 vector<NumericT> result(viennacl::traits::size2(proxy.lhs())); 1231 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1232 v1 += result; 1233 return v1; 1234 } 1235 1236 //v -= A^T * x 1237 /** @brief Implementation of the operation v1 -= A * v2, where A is a matrix 1238 * 1239 * @param v1 The addend vector where the result is written to. 1240 * @param proxy An expression template proxy class. 1241 */ 1242 template<typename NumericT> 1243 vector<NumericT> operator -=(vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1244 operator-=(vector_base<NumericT> & v1, 1245 const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, 1246 const vector_base<NumericT>, 1247 op_prod> & proxy) 1248 { 1249 assert(viennacl::traits::size2(proxy.lhs()) == v1.size() && bool("Size check failed in v1 += trans(A) * v2: size2(A) != size(v1)")); 1250 1251 vector<NumericT> result(viennacl::traits::size2(proxy.lhs())); 1252 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1253 v1 -= result; 1254 return v1; 1255 } 1256 1257 1258 //free functions: 1259 /** @brief Implementation of the operation 'result = v1 + A * v2', where A is a matrix 1260 * 1261 * @param v1 The addend vector. 1262 * @param proxy An expression template proxy class. 1263 */ 1264 template<typename NumericT> 1265 vector<NumericT> operator +(const vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1266 operator+(const vector_base<NumericT> & v1, 1267 const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, 1268 const vector_base<NumericT>, 1269 op_prod> & proxy) 1270 { 1271 assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 + trans(A) * v2: size2(A) != size(v1)")); 1272 1273 vector<NumericT> result(viennacl::traits::size(v1)); 1274 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1275 result += v1; 1276 return result; 1277 } 1278 1279 /** @brief Implementation of the operation 'result = v1 - A * v2', where A is a matrix 1280 * 1281 * @param v1 The addend vector. 1282 * @param proxy An expression template proxy class. 1283 */ 1284 template<typename NumericT> 1285 vector<NumericT> operator -(const vector_base<NumericT> & v1,const vector_expression<const matrix_expression<const matrix_base<NumericT>,const matrix_base<NumericT>,op_trans>,const vector_base<NumericT>,op_prod> & proxy)1286 operator-(const vector_base<NumericT> & v1, 1287 const vector_expression< const matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans>, 1288 const vector_base<NumericT>, 1289 op_prod> & proxy) 1290 { 1291 assert(viennacl::traits::size2(proxy.lhs()) == viennacl::traits::size(v1) && bool("Size check failed in v1 - trans(A) * v2: size2(A) != size(v1)")); 1292 1293 vector<NumericT> result(viennacl::traits::size(v1)); 1294 viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), result); 1295 result = v1 - result; 1296 return result; 1297 } 1298 1299 1300 } //namespace viennacl 1301 1302 1303 #endif 1304