1 /* 2 * Copyright (C) 2011 David Saunders 3 * 2011-2012 Matthew Wezowicz 4 * 5 * Written by Matthew Wezowicz <mwezz@udel.edu> 6 * 7 * ========LICENCE======== 8 * This file is part of the library LinBox. 9 * 10 * LinBox is free software: you can redistribute it and/or modify 11 * it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 * ========LICENCE======== 24 *. 25 */ 26 27 /*! @file matrix/matrixdomain/opencl-domain.h 28 * @ingroup matrixdomain 29 * @ingroup opencl 30 * @brief NO DOC 31 * @warning An <code>OpenCLMatrixDomain<Field></code> should be templated by a 32 * Givaro::Modular<double> or Givaro::Modular<float> field only. 33 */ 34 35 #ifndef __LINBOX_opencl_matrix_domain_H 36 #define __LINBOX_opencl_matrix_domain_H 37 38 #include <vector> 39 #include <iostream> 40 #include <pthread.h> 41 42 #ifdef __LINBOX_HAVE_OCL 43 #include <CL/cl.h> 44 #endif 45 46 #include "linbox/linbox-config.h" 47 #include "linbox/util/debug.h" 48 #include "linbox/matrix/matrixdomain/blas-matrix-domain.h" 49 50 namespace LinBox{ 51 52 /** 53 * Generic submatrix view adapter used internally in the OpenCLMatrixDomain 54 */ 55 template <class _Matrix> 56 class SubmatrixAdapter{ 57 public: 58 //Access to underlying types 59 typedef typename _Matrix::Field Field; 60 typedef typename Field::Element Element; 61 typedef SubmatrixAdapter<_Matrix> Self_t; 62 63 private: 64 _Matrix* _Mat; //!< Parent Matrix (ie raw vector) 65 size_t _row; //!< row dimension of Submatrix 66 size_t _col; //!< col dimension of Submatrix 67 size_t _r0; //!< upper left corner row of Submatrix in \p _Mat 68 size_t _c0; //!< upper left corner row of Submatrix in \p _Mat 69 size_t _stride; //!< number of columns in \p _Mat (or stride of \p _Mat) 70 size_t _off; //!< offset from start of parent matrix 71 72 public: 73 /** NULL constructor. */ SubmatrixAdapter()74 SubmatrixAdapter() : 75 _Mat(NULL), 76 _row(0), 77 _col(0), 78 _r0(0), 79 _c0(0), 80 _stride(0), 81 _off(0){} 82 83 /** Constructor from an existing @refMatrix 84 * \param M Pointer to @ref Matrix of which to construct submatrix 85 */ SubmatrixAdapter(const _Matrix & M)86 SubmatrixAdapter(const _Matrix& M) : 87 _Mat(&(const_cast<_Matrix&>(M))), 88 _row(M.rowdim()), 89 _col(M.coldim()), 90 _r0(0), 91 _c0(0), 92 _stride(M.coldim()), 93 _off(0){} 94 95 /** Constructor from an existing @ref Matrix and dimensions. 96 * \param M Pointer to @ref Matrix of which to construct submatrix 97 * \param row Starting row 98 * \param col Starting column 99 * \param Rowdim Row dimension 100 * \param Coldim Column dimension 101 */ SubmatrixAdapter(const _Matrix & M,size_t row,size_t col,size_t Rowdim,size_t Coldim)102 SubmatrixAdapter( 103 const _Matrix& M, 104 size_t row, 105 size_t col, 106 size_t Rowdim, 107 size_t Coldim) : 108 //Init list starts here 109 _Mat(&(const_cast<_Matrix&>(M))), 110 _row(Rowdim), 111 _col(Coldim), 112 _r0(row), 113 _c0(col), 114 _stride(M.coldim()), 115 _off(row * _stride + col){} 116 117 //! BB constructor to reduce warnings in clang SubmatrixAdapter(const _Matrix & M,int row,int col,int Rowdim,int Coldim)118 SubmatrixAdapter( 119 const _Matrix& M, 120 int row, 121 int col, 122 int Rowdim, 123 int Coldim) : 124 //Init list starts here 125 _Mat(&(const_cast<_Matrix&>(M))), 126 _row((size_t) Rowdim), 127 _col((size_t) Coldim), 128 _r0((size_t) row), 129 _c0((size_t) col), 130 _stride( M.coldim()), 131 _off((size_t) row * _stride + (size_t)col){} 132 133 /** Constructor from an existing @ref SubmatrixAdapter 134 * \param SM Pointer to @ref SubmatrixAdapter of which to construct submatrix 135 */ SubmatrixAdapter(const SubmatrixAdapter<_Matrix> & SM)136 SubmatrixAdapter(const SubmatrixAdapter<_Matrix>& SM) : 137 _Mat(SM._Mat), 138 _row(SM._row), 139 _col(SM._col), 140 _r0(SM._r0), 141 _c0(SM._c0), 142 _stride(SM._stride), 143 _off(SM._off){} 144 145 /** Constructor from an existing submatrix and dimensions 146 * @param SM Constant reference to SubmatrixAdapter from which to 147 * construct submatrix 148 * @param rowbeg Starting row 149 * @param colbeg Starting column 150 * @param Rowdim Row dimension 151 * @param Coldim Column dimension 152 */ SubmatrixAdapter(const SubmatrixAdapter<_Matrix> & SM,size_t row,size_t col,size_t Rowdim,size_t Coldim)153 SubmatrixAdapter( 154 const SubmatrixAdapter<_Matrix>& SM, 155 size_t row, 156 size_t col, 157 size_t Rowdim, 158 size_t Coldim) : 159 //Init list starts here 160 _Mat(SM._Mat), 161 _row(Rowdim), 162 _col(Coldim), 163 _r0(SM._r0 + row), 164 _c0(SM._c0 + col), 165 _stride(SM._stride), 166 _off(SM._off + (row * _stride + col)){} 167 168 //! BB constructor to reduce warnings in clang SubmatrixAdapter(const SubmatrixAdapter<_Matrix> & SM,int row,int col,int Rowdim,int Coldim)169 SubmatrixAdapter( 170 const SubmatrixAdapter<_Matrix>& SM, 171 int row, 172 int col, 173 int Rowdim, 174 int Coldim) : 175 //Init list starts here 176 _Mat(SM._Mat), 177 _row((size_t)Rowdim), 178 _col((size_t)Coldim), 179 _r0(SM._r0 + (size_t)row), 180 _c0(SM._c0 + (size_t)col), 181 _stride(SM._stride), 182 _off(SM._off + ((size_t)row * _stride + (size_t)col)){} 183 184 185 /** Get the number of rows in the matrix 186 * @return Number of rows in matrix 187 */ rowdim()188 size_t rowdim() const{ 189 return _row; 190 } 191 192 /** Get the number of columns in the matrix 193 * @return Number of columns in matrix 194 */ coldim()195 size_t coldim() const{ 196 return _col; 197 } 198 199 /*! Get the stride of the matrix. 200 * @return stride of submatrix (number of cols of parent matrix) 201 */ getStride()202 size_t getStride() const{ 203 return _stride; 204 } 205 206 /** Set the entry at (i, j). 207 * @param i Row index of entry, 0...rowdim() - 1 208 * @param j Column index of entry, 0...coldim() - 1 209 * @param a_ij Element to set 210 */ setEntry(size_t i,size_t j,const Element & a_ij)211 const Element& setEntry(size_t i, size_t j, const Element& a_ij){ 212 return _Mat->setEntry(_r0 + i, _c0 + j, a_ij); 213 } 214 215 /** Get a writeable reference to an entry in the matrix. 216 * @param i Row index of entry, 0...rowdim() - 1 217 * @param j Column index of entry, 0...coldim() - 1 218 * @return Reference to matrix entry 219 */ refEntry(size_t i,size_t j)220 Element& refEntry(size_t i, size_t j){ 221 return _Mat->refEntry(_r0 + i, _c0 + j); 222 } 223 224 /** Get a read-only individual entry from the matrix. 225 * @param i Row index of entry, 0...rowdim() - 1 226 * @param j Column index of entry, 0...coldim() - 1 227 * @return Const reference to matrix entry 228 */ getEntry(size_t i,size_t j)229 const Element& getEntry(size_t i, size_t j) const{ 230 return _Mat->getEntry(_r0 + i, _c0 + j); 231 } 232 233 /** Get an entry and store it in the given value. 234 * This form is more in the Linbox style and is provided for interface 235 * compatibility with other parts of the library 236 * @param x Element in which to store result 237 * @param i Row index of entry, 0...rowdim() - 1 238 * @param j Column index of entry, 0...coldim() - 1 239 * @return Reference to x 240 */ getEntry(Element & x,size_t i,size_t j)241 Element& getEntry(Element& x, size_t i, size_t j){ 242 return _Mat->getEntry(x, _r0 + i, _c0 + j); 243 } 244 245 /** Access the parent matrix 246 * @return Reference to _Mat 247 */ getMatrix()248 _Matrix& getMatrix(){ 249 return *_Mat; 250 } 251 }; 252 253 /** 254 * Interface for all functionnalities provided 255 * for BlasMatrix using GPUs. 256 * @internal 257 * Done through specialization of some of the member funcions 258 * defined below. Otherwise, by default the single processor 259 * BlasMatrixDomain funcions are invoked. 260 */ 261 template <class Field_> 262 class OpenCLMatrixDomain { 263 264 public: 265 typedef Field_ Field; 266 typedef typename Field::Element Element; 267 typedef BlasMatrix<Field,typename Vector<Field>::Dense > Matrix; 268 #ifdef __LINBOX_HAVE_OCL 269 friend class OpenCLMatrixDomainFactory; 270 #endif 271 272 protected: 273 274 const Field& _F; 275 276 #ifdef __LINBOX_HAVE_OCL 277 //OpenCL specific variables 278 cl_context context; 279 cl_device_id device; 280 cl_command_queue commandQue; 281 cl_int errcode; 282 283 //Storage for memory levels 284 unsigned long memCapacity; 285 unsigned long maxBufferSize; 286 287 //Container type flag 288 bool GPUcontainer; 289 bool CPUcontainer; 290 bool setupCorrect; 291 bool doubleSupported; 292 293 //Storage for kernels and flags for availability 294 cl_kernel dpKernels[20]; 295 bool dpKernelsAvailable[20]; 296 cl_kernel spKernels[20]; 297 bool spKernelsAvailable[20]; 298 299 //ID number assigned by OpenCLMatrixDomainFactory 300 unsigned int IDnum; 301 302 //Mutex 303 pthread_mutex_t* deviceLock; 304 305 /** 306 * @internal 307 * Initializes the OpenCL compute environment 308 */ 309 void oclMatrixDomainAcquire(); 310 311 /** 312 * @internal 313 * Releases OpenCL cumpute resources 314 */ 315 void oclMatrixDomainRelease(unsigned int IDnum); 316 317 /** 318 * @internal 319 * Checks to see if the memory levels required are possible 320 */ 321 template <class Operand1, class Operand2, class Operand3> 322 bool oclMemCheck( 323 Operand1& D, 324 const Operand2& A, 325 const Operand3& B, 326 const Operand1& C) const; 327 328 template <class Operand1> 329 bool oclMemCheck( 330 Operand1& D, 331 Operand1& A, 332 Operand1& B, 333 Operand1& C) const; 334 335 /** 336 * @internal 337 * OpenCL memory management functions 338 */ 339 template <typename T, class Operand1> 340 cl_mem oclCreateMatrixBuffer(Operand1 &matrix) const; 341 342 template <typename T, class Operand1> 343 cl_mem oclCreateAndLoadMatrixBuffer(const Operand1 &matrix) const; 344 345 template <typename T, class Operand2> 346 Operand2& oclReadMatrixBuffer(cl_mem buffer, Operand2 &matrix) const; 347 348 template <typename T, class Operand1> 349 cl_mem oclPadMatrix( 350 cl_mem matrixBuffer, 351 int matrixBufferSize, 352 int newDimX, 353 const Operand1 &matrix) const; 354 355 template <typename T, class Operand1> 356 Operand1& oclDepadMatrix( 357 cl_mem matrixBuffer, 358 int matrixBufferSize, 359 int outputSize, 360 int newDimX, 361 Operand1& matrix) const; 362 363 /** 364 * @internal 365 * Functions to call the passed kernel on the passed buffers 366 */ 367 template <typename T, typename U> 368 void oclCallKernel( 369 cl_mem bufferC, 370 cl_mem bufferA, 371 cl_mem bufferB, 372 int widthA, 373 int heightA, 374 int widthB, 375 T p, 376 cl_kernel selectedKernel) const; 377 378 template <typename T, typename U> 379 void oclCallKernel( 380 cl_mem bufferD, 381 cl_mem bufferA, 382 cl_mem bufferB, 383 cl_mem bufferC, 384 int widthA, 385 int heightA, 386 int widthB, 387 T p, 388 cl_kernel selectedKernel) const; 389 390 template <typename T, typename U> 391 void oclCallKernel( 392 cl_mem bufferD, 393 cl_mem bufferA, 394 cl_mem bufferB, 395 cl_mem bufferC, 396 T alpha, 397 T beta, 398 int widthA, 399 int heightA, 400 int widthB, 401 T p, 402 cl_kernel selectedKernel) const; 403 404 /** 405 * @internal 406 * Functions to partition the matrices into submatrix views 407 */ 408 template <class Operand1, class Operand2, class Operand3> 409 std::vector<int> oclPartition( 410 Operand1& C, 411 const Operand2& A, 412 const Operand3& B, 413 std::vector<SubmatrixAdapter<Operand1> >& VC, 414 std::vector<SubmatrixAdapter<Operand2> >& VA, 415 std::vector<SubmatrixAdapter<Operand3> >& VB) const; 416 417 template <class Operand1, class Operand2, class Operand3> 418 std::vector<int> oclPartition( 419 Operand1& D, 420 const Operand2& A, 421 const Operand3& B, 422 const Operand1& C, 423 std::vector<SubmatrixAdapter<Operand1> >& VD, 424 std::vector<SubmatrixAdapter<Operand2> >& VA, 425 std::vector<SubmatrixAdapter<Operand3> >& VB, 426 std::vector<SubmatrixAdapter<Operand1> >& VC) const; 427 428 void printClErrstring(cl_int err) const; 429 #else 430 bool setupCorrect; 431 #endif 432 public: 433 434 //! Constructor of OpenCLDomain. OpenCLMatrixDomain(const Field & F)435 OpenCLMatrixDomain(const Field& F ) : _F(F), setupCorrect(false){ 436 437 #ifndef NDEBUG 438 if(!Givaro::Protected::probab_prime(_F.characteristic())){ 439 std::cout << " *** WARNING *** " << std::endl; 440 std::cout << " You are using a OpenCL Matrix Domain" 441 << " where your field is not prime " 442 << std::endl; 443 } 444 #endif 445 446 #ifdef __LINBOX_HAVE_OCL 447 //Initialize OpenCL environment 448 oclMatrixDomainAcquire(); 449 #endif 450 } 451 452 //! Copy constructor OpenCLMatrixDomain(const OpenCLMatrixDomain<Field> & OMD)453 OpenCLMatrixDomain(const OpenCLMatrixDomain<Field> & OMD) : 454 _F(OMD._F), 455 setupCorrect(false){ 456 457 #ifndef NDEBUG 458 if(!Givaro::Protected::probab_prime(_F.characteristic())){ 459 std::cout << " *** WARNING *** " << std::endl; 460 std::cout << " You are using a OpenCL Matrix Domain" 461 << " where your field is not prime " 462 << std::endl; 463 } 464 #endif 465 466 #ifdef __LINBOX_HAVE_OCL 467 //Initialize OpenCL environment 468 oclMatrixDomainAcquire(); 469 #endif 470 } 471 472 //! Deconstructor ~OpenCLMatrixDomain()473 ~OpenCLMatrixDomain(){ 474 #ifdef __LINBOX_HAVE_OCL 475 oclMatrixDomainRelease(IDnum); 476 #endif 477 } 478 479 //! Field accessor field()480 const Field& field() const{ 481 return _F; 482 } 483 484 /* 485 * Basics operation available matrix respecting BlasMatrix interface 486 */ 487 488 //! multiplication. 489 //! C = A*B 490 template <class Operand1, class Operand2, class Operand3> mul(Operand1 & C,const Operand2 & A,const Operand3 & B)491 Operand1& mul(Operand1& C, const Operand2& A, const Operand3& B) const{ 492 return BlasMatrixDomainMul<Field,Operand1,Operand2,Operand3>()(_F,C,A,B); 493 } 494 495 //! addition. 496 //! C = A+B 497 template <class Operand1, class Operand2, class Operand3> add(Operand1 & C,const Operand2 & A,const Operand3 & B)498 Operand1& add(Operand1& C, const Operand2& A, const Operand3& B) const{ 499 return BlasMatrixDomainAdd<Field,Operand1,Operand2,Operand3>()(_F,C,A,B); 500 } 501 502 //! copy. 503 //! B = A 504 template <class Operand1, class Operand2> copy(Operand1 & B,const Operand2 & A)505 Operand1& copy(Operand1& B, const Operand2& A) const{ 506 return BlasMatrixDomainCopy<Field,Operand1,Operand2>()(_F,B,A); 507 } 508 509 //! substraction 510 //! C = A-B 511 template <class Operand1, class Operand2, class Operand3> sub(Operand1 & C,const Operand2 & A,const Operand3 & B)512 Operand1& sub(Operand1& C, const Operand2& A, const Operand3& B) const{ 513 return BlasMatrixDomainSub<Field,Operand1,Operand2,Operand3>()(_F,C,A,B); 514 } 515 516 //! substraction (in place) 517 //! C -= B 518 template <class Operand1, class Operand3> subin(Operand1 & C,const Operand3 & B)519 Operand1& subin(Operand1& C, const Operand3& B) const{ 520 return BlasMatrixDomainSubin<Field,Operand1,Operand3>()(_F,C,B); 521 } 522 523 //! addition (in place) 524 //! C += B 525 template <class Operand1, class Operand3> addin(Operand1 & C,const Operand3 & B)526 Operand1& addin(Operand1& C, const Operand3& B) const{ 527 return BlasMatrixDomainAddin<Field,Operand1,Operand3>()(_F,C,B); 528 } 529 530 //! multiplication with scaling. 531 //! C = alpha.A*B 532 template <class Operand1, class Operand2, class Operand3> mul(Operand1 & C,const Element & alpha,const Operand2 & A,const Operand3 & B)533 Operand1& mul( 534 Operand1& C, 535 const Element& alpha, 536 const Operand2& A, 537 const Operand3& B) const{ 538 539 return muladdin(_F.zero,C,alpha,A,B); 540 } 541 542 //! In place multiplication. 543 //! A = A*B 544 template <class Operand1, class Operand2> mulin_left(Operand1 & A,const Operand2 & B)545 Operand1& mulin_left(Operand1& A, const Operand2& B) const{ 546 return BlasMatrixDomainMulin<Field,Operand1,Operand2>()(_F,A,B); 547 } 548 549 //! In place multiplication. 550 //! B = A*B 551 template <class Operand1, class Operand2> mulin_right(const Operand1 & A,Operand2 & B)552 Operand2& mulin_right(const Operand1& A, Operand2& B) const{ 553 return BlasMatrixDomainMulin<Field,Operand2,Operand1>()(_F,A,B); 554 } 555 556 //! axpy. 557 //! D = A*B + C 558 template <class Operand1, class Operand2, class Operand3> axpy(Operand1 & D,const Operand2 & A,const Operand3 & B,const Operand1 & C)559 Operand1& axpy( 560 Operand1& D, 561 const Operand2& A, 562 const Operand3& B, 563 const Operand1& C) const{ 564 565 return muladd(D,_F.one,C,_F.one,A,B); 566 } 567 568 //! axpyin. 569 //! C += A*B 570 template <class Operand1, class Operand2, class Operand3> axpyin(Operand1 & C,const Operand2 & A,const Operand3 & B)571 Operand1& axpyin(Operand1& C, const Operand2& A, const Operand3& B) const{ 572 return muladdin(_F.one,C,_F.one,A,B); 573 } 574 575 //! maxpy. 576 //! D = C - A*B 577 template <class Operand1, class Operand2, class Operand3> maxpy(Operand1 & D,const Operand2 & A,const Operand3 & B,const Operand1 & C)578 Operand1& maxpy( 579 Operand1& D, 580 const Operand2& A, 581 const Operand3& B, 582 const Operand1& C) const{ 583 584 return muladd(D,_F.one,C,_F.mOne,A,B); 585 } 586 587 //! maxpyin. 588 //! C -= A*B 589 template <class Operand1, class Operand2, class Operand3> maxpyin(Operand1 & C,const Operand2 & A,const Operand3 & B)590 Operand1& maxpyin(Operand1& C, const Operand2& A, const Operand3& B) const{ 591 return muladdin(_F.one,C,_F.mOne,A,B); 592 } 593 594 //! axmy. 595 //! D= A*B - C 596 template <class Operand1, class Operand2, class Operand3> axmy(Operand1 & D,const Operand2 & A,const Operand3 & B,const Operand1 & C)597 Operand1& axmy( 598 Operand1& D, 599 const Operand2& A, 600 const Operand3& B, 601 const Operand1& C) const{ 602 603 return muladd(D,_F.mOne,C,_F.one,A,B); 604 } 605 606 //! axmyin. 607 //! C = A*B - C 608 template <class Operand1, class Operand2, class Operand3> axmyin(Operand1 & C,const Operand2 & A,const Operand3 & B)609 Operand1& axmyin(Operand1& C, const Operand2& A, const Operand3& B) const{ 610 return muladdin(_F.mOne,C,_F.one,A,B); 611 } 612 613 //! general matrix-matrix multiplication and addition with scaling. 614 //! D= beta.C + alpha.A*B 615 template <class Operand1, class Operand2, class Operand3> muladd(Operand1 & D,const Element & beta,const Operand1 & C,const Element & alpha,const Operand2 & A,const Operand3 & B)616 Operand1& muladd( 617 Operand1& D, 618 const Element& beta, 619 const Operand1& C, 620 const Element& alpha, 621 const Operand2& A, 622 const Operand3& B) const{ 623 624 return BlasMatrixDomainMulAdd<Operand1,Operand2,Operand3>()( 625 D, 626 beta, 627 C, 628 alpha, 629 A, 630 B); 631 } 632 633 //! muladdin. 634 //! C= beta.C + alpha.A*B. 635 template <class Operand1, class Operand2, class Operand3> muladdin(const Element & beta,Operand1 & C,const Element & alpha,const Operand2 & A,const Operand3 & B)636 Operand1& muladdin( 637 const Element& beta, 638 Operand1& C, 639 const Element& alpha, 640 const Operand2& A, 641 const Operand3& B) const{ 642 643 return BlasMatrixDomainMulAdd<Operand1,Operand2,Operand3>()( 644 _F, 645 beta, 646 C, 647 alpha, 648 A, 649 B); 650 } 651 652 /*! 653 * @name Solutions available for matrix respecting BlasMatrix interface 654 */ 655 //@{ 656 657 //! Inversion 658 template <class Matrix> inv(Matrix & Ainv,const Matrix & A)659 Matrix& inv( Matrix &Ainv, const Matrix &A) const{ 660 BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A); 661 return Ainv; 662 } 663 664 //! Inversion (in place) 665 template <class Matrix> invin(Matrix & Ainv,Matrix & A)666 Matrix& invin( Matrix &Ainv, Matrix &A) const{ 667 BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A); 668 return Ainv; 669 } 670 671 //! Inversion (the matrix A is modified) 672 template <class Matrix> invin(Matrix & A)673 Matrix& invin(Matrix &A) const{ 674 Matrix tmp(A.rowdim(), A.coldim()); 675 tmp = A; 676 BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,A,tmp); 677 return A; 678 } 679 680 /*! Division. 681 * C = A B^{-1} ==> C . B = A 682 */ 683 template <class Matrix> div(Matrix & C,const Matrix & A,const Matrix & B)684 Matrix& div(Matrix &C, const Matrix &A, const Matrix &B) const{ 685 return this->right_solve(C,B,A); 686 } 687 688 689 //! Inversion w singular check 690 template <class Matrix> inv(Matrix & Ainv,const Matrix & A,int & nullity)691 Matrix& inv(Matrix &Ainv, const Matrix &A, int& nullity) const{ 692 nullity = BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A); 693 return Ainv; 694 } 695 696 //! Inversion (the matrix A is modified) w singular check 697 template <class Matrix> invin(Matrix & Ainv,Matrix & A,int & nullity)698 Matrix& invin(Matrix &Ainv, Matrix &A, int& nullity) const{ 699 nullity = BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A); 700 return Ainv; 701 } 702 703 //! Rank 704 template <class Matrix> rank(const Matrix & A)705 unsigned int rank(const Matrix &A) const{ 706 return BlasMatrixDomainRank<Field,Matrix>()(_F,A); 707 } 708 709 //! in-place Rank (the matrix is modified) 710 template <class Matrix> rankInPlace(Matrix & A)711 unsigned int rankInPlace(Matrix &A) const{ 712 return BlasMatrixDomainRank<Field, Matrix>()(_F,A); 713 } 714 715 //! determinant 716 template <class Matrix> det(const Matrix & A)717 Element det(const Matrix &A) const{ 718 return BlasMatrixDomainDet<Field, Matrix>()(_F,A); 719 } 720 721 //! in-place Determinant (the matrix is modified) 722 template <class Matrix> detInPlace(Matrix & A)723 Element detInPlace(Matrix &A) const{ 724 return BlasMatrixDomainDet<Field, Matrix>()(_F,A); 725 } 726 //@} 727 728 /*! 729 * @name Solvers for Matrix (respecting BlasMatrix interface) 730 * with Operand as right or left hand side 731 */ 732 //@{ 733 //! linear solve with matrix right hand side. 734 //! AX=B 735 template <class Operand, class Matrix> left_solve(Operand & X,const Matrix & A,const Operand & B)736 Operand& left_solve (Operand& X, const Matrix& A, const Operand& B) const{ 737 return BlasMatrixDomainLeftSolve<Field,Operand,Matrix>()(_F,X,A,B); 738 } 739 740 //! linear solve with matrix right hand side, the result is stored in-place in B. 741 //! @pre A must be square 742 //! AX=B , (B<-X) 743 template <class Operand,class Matrix> left_solve(const Matrix & A,Operand & B)744 Operand& left_solve (const Matrix& A, Operand& B) const{ 745 return BlasMatrixDomainLeftSolve<Field,Operand,Matrix>()(_F,A,B); 746 } 747 748 //! linear solve with matrix right hand side. 749 //! XA=B 750 template <class Operand, class Matrix> right_solve(Operand & X,const Matrix & A,const Operand & B)751 Operand& right_solve (Operand& X, const Matrix& A, const Operand& B) const{ 752 return BlasMatrixDomainRightSolve<Field,Operand,Matrix>()(_F,X,A,B); 753 } 754 755 //! linear solve with matrix right hand side, the result is stored in-place in B. 756 //! @pre A must be square 757 //! XA=B , (B<-X) 758 template <class Operand, class Matrix> right_solve(const Matrix & A,Operand & B)759 Operand& right_solve (const Matrix& A, Operand& B) const{ 760 return BlasMatrixDomainRightSolve<Field,Operand,Matrix>()(_F,A,B); 761 } 762 763 //! minimal polynomial computation. 764 template <class Polynomial, class Matrix> minpoly(Polynomial & P,const Matrix & A)765 Polynomial& minpoly( Polynomial& P, const Matrix& A ) const{ 766 return BlasMatrixDomainMinpoly<Field, Polynomial, Matrix>()(_F,P,A); 767 } 768 769 //! characteristic polynomial computation. 770 template <class Polynomial, class Matrix > charpoly(Polynomial & P,const Matrix & A)771 Polynomial& charpoly( Polynomial& P, const Matrix& A ) const{ 772 773 commentator().start ("Givaro::Modular Dense Charpoly ", "MDCharpoly"); 774 std::list<Polynomial> P_list; 775 P_list.clear(); 776 BlasMatrixDomainCharpoly<Field, std::list<Polynomial>, Matrix >()( 777 _F, 778 P_list, 779 A); 780 781 Polynomial tmp(A.rowdim() + 1); 782 typename std::list<Polynomial>::iterator it = P_list.begin(); 783 P = *(it++); 784 while(it != P_list.end()){ 785 // Waiting for an implementation of a domain of polynomials 786 mulpoly(tmp, P, *it); 787 P = tmp; 788 //delete &(*it); 789 ++it; 790 } 791 commentator().stop ("done", NULL, "MDCharpoly"); 792 793 return P; 794 } 795 796 //! characteristic polynomial computation. 797 template <class Polynomial, class Matrix > charpoly(std::list<Polynomial> & P,const Matrix & A)798 std::list<Polynomial>& charpoly( 799 std::list<Polynomial>& P, 800 const Matrix& A ) const{ 801 802 return BlasMatrixDomainCharpoly< 803 Field, 804 std::list<Polynomial>, 805 Matrix >()(_F,P,A); 806 } 807 808 809 //private: 810 //! @todo Temporary: waiting for an implementation of a domain of polynomial 811 template<class Polynomial> mulpoly(Polynomial & res,const Polynomial & P1,const Polynomial & P2)812 Polynomial& mulpoly( 813 Polynomial &res, 814 const Polynomial & P1, 815 const Polynomial & P2) const{ 816 817 res.resize(P1.size() + P2.size() - 1); 818 819 for(int i = 0; i < res.size(); i++){ 820 _F.assign(res[i],_F.zero); 821 } 822 823 for(int i = 0; i < P1.size(); i++){ 824 for(int j = 0; j < P2.size(); j++){ 825 _F.axpyin(res[i + j],P1[i],P2[j]); 826 } 827 } 828 829 return res; 830 } 831 //@} 832 833 template<class Matrix1, class Matrix2> areEqual(const Matrix1 & A,const Matrix2 & B)834 bool areEqual(const Matrix1 & A, const Matrix2 & B){ 835 if((A.rowdim() != B.rowdim()) || (A.coldim() != B.coldim())){ 836 return false ; 837 } 838 839 for(size_t i = 0 ; i < A.rowdim() ; ++i){ 840 for(size_t j = 0 ; j < A.coldim() ; ++j){ 841 if(!_F.areEqual(A.getEntry(i,j),B.getEntry(i,j))){ //!@bug use refs 842 return false ; 843 } 844 } 845 } 846 847 return true ; 848 } 849 850 template<class Matrix> setIdentity(Matrix & I)851 void setIdentity(Matrix & I){ 852 for(size_t i = 0 ; i < I.rowdim() ; ++i){ 853 for(size_t j = 0 ; j < I.coldim() ; ++j){ 854 if(i == j){ 855 I.setEntry(i,j,_F.one); 856 } 857 else{ 858 I.setEntry(i,j,_F.zero); 859 } 860 } 861 } 862 } 863 864 template<class Matrix> setZero(Matrix & I)865 void setZero(Matrix & I){ 866 // use Iterator 867 for(size_t i = 0 ; i < I.rowdim() ; ++i){ 868 for(size_t j = 0 ; j < I.coldim() ; ++j){ 869 I.setEntry(i,j,_F.zero); 870 } 871 } 872 } 873 874 template<class Matrix1> isZero(const Matrix1 & A)875 bool isZero(const Matrix1 & A){ 876 for(size_t i = 0 ; i < A.rowdim() ; ++i){ 877 for(size_t j = 0 ; j < A.coldim() ; ++j){ 878 if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs 879 return false; 880 } 881 } 882 } 883 884 return true ; 885 } 886 887 template<class Matrix1> isIdentity(const Matrix1 & A)888 bool isIdentity(const Matrix1 & A){ 889 if(A.rowdim() != A.coldim()){ 890 return false; 891 } 892 893 for(size_t i = 0 ; i < A.rowdim() ; ++i){ 894 if(!_F.isOne(A.getEntry(i,i))){ 895 return false; 896 } 897 } 898 899 for(size_t i = 0 ; i < A.rowdim() ; ++i){ 900 for(size_t j = 0 ; j < i ; ++j){ 901 if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs 902 return false; 903 } 904 } 905 } 906 907 for(size_t i = 0 ; i < A.rowdim() ; ++i){ 908 for(size_t j = i + 1 ; j < A.coldim() ; ++j){ 909 if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs 910 return false; 911 } 912 } 913 } 914 915 return true ; 916 } 917 918 template<class Matrix1> isIdentityGeneralized(const Matrix1 & A)919 bool isIdentityGeneralized(const Matrix1 & A){ 920 size_t mn = std::min(A.rowdim(),A.coldim()); 921 for(size_t i = 0 ; i < mn ; ++i){ 922 if(!_F.isOne(A.getEntry(i,i))){ 923 return false; 924 } 925 } 926 927 for(size_t i = 0 ; i < A.rowdim() ; ++i){ 928 for(size_t j = 0 ; j < std::min(i,mn) ; ++j){ 929 if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs 930 return false; 931 } 932 } 933 } 934 935 for(size_t i = 0 ; i < A.rowdim() ; ++i){ 936 for(size_t j = i+1 ; j < A.coldim() ; ++j){ 937 if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs 938 return false; 939 } 940 } 941 } 942 943 return true; 944 } 945 946 //public: 947 948 /** Print matrix. 949 * @param os Output stream to which matrix is written. 950 * @param A Matrix. 951 * @returns reference to os. 952 */ 953 template <class Matrix> write(std::ostream & os,const Matrix & A)954 inline std::ostream &write(std::ostream &os, const Matrix &A) const{ 955 return A.write(os, _F); 956 } 957 958 template <class Matrix> write(std::ostream & os,const Matrix & A,bool maple_format)959 inline std::ostream &write(std::ostream &os, 960 const Matrix &A, 961 bool maple_format) const{ 962 963 return A.write(os, _F, maple_format); 964 } 965 966 /** Read matrix 967 * @param is Input stream from which matrix is read. 968 * @param A Matrix. 969 * @returns reference to is. 970 */ 971 template <class Matrix> read(std::istream & is,Matrix & A)972 inline std::istream &read(std::istream &is, Matrix &A) const{ 973 return A.read (is, _F); 974 } 975 976 }; /* end of class OpenCLMatrixDomain */ 977 978 } /* end of namespace LinBox */ 979 980 #ifdef __LINBOX_HAVE_OCL 981 #include "linbox/matrix/matrixdomain/opencl-domain-factory.h" 982 #include "linbox/matrix/matrixdomain/opencl-domain-util.inl" 983 #include "linbox/matrix/matrixdomain/opencl-domain-memory.inl" 984 #include "linbox/matrix/matrixdomain/opencl-domain.inl" 985 #endif 986 987 #endif // __LINBOX_opencl_matrix_domain_H 988 989 // Local Variables: 990 // mode: C++ 991 // tab-width: 4 992 // indent-tabs-mode: nil 993 // c-basic-offset: 4 994 // End: 995 // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 996