1 /*
2  * Copyright (C) 2011      David Saunders
3  *               2011-2012 Matthew Wezowicz
4  *
5  * Written by Matthew Wezowicz <mwezz@udel.edu>
6  *
7  * ========LICENCE========
8  * This file is part of the library LinBox.
9  *
10  * LinBox is free software: you can redistribute it and/or modify
11  * it under the terms of the  GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23  * ========LICENCE========
24  *.
25  */
26 
27 /*! @file matrix/matrixdomain/opencl-domain.h
28  * @ingroup matrixdomain
29  * @ingroup opencl
30  * @brief NO DOC
31  * @warning An <code>OpenCLMatrixDomain<Field></code> should be templated by a
32  * Givaro::Modular<double> or Givaro::Modular<float> field only.
33  */
34 
35 #ifndef __LINBOX_opencl_matrix_domain_H
36 #define __LINBOX_opencl_matrix_domain_H
37 
38 #include <vector>
39 #include <iostream>
40 #include <pthread.h>
41 
42 #ifdef __LINBOX_HAVE_OCL
43 #include <CL/cl.h>
44 #endif
45 
46 #include "linbox/linbox-config.h"
47 #include "linbox/util/debug.h"
48 #include "linbox/matrix/matrixdomain/blas-matrix-domain.h"
49 
50 namespace LinBox{
51 
52 	/**
53 	 * Generic submatrix view adapter used internally in the OpenCLMatrixDomain
54 	 */
55 	template <class _Matrix>
56 	class SubmatrixAdapter{
57 	public:
58 		//Access to underlying types
59 		typedef typename _Matrix::Field     Field;
60 		typedef typename Field::Element     Element;
61 		typedef SubmatrixAdapter<_Matrix>   Self_t;
62 
63 	private:
64 		_Matrix* _Mat;  //!< Parent Matrix (ie raw vector)
65 		size_t _row;    //!< row dimension of Submatrix
66 		size_t _col;    //!< col dimension of Submatrix
67 		size_t _r0;     //!< upper left corner row of Submatrix in \p _Mat
68 		size_t _c0;     //!< upper left corner row of Submatrix in \p _Mat
69 		size_t _stride; //!< number of columns in \p _Mat (or stride of \p _Mat)
70 		size_t _off;    //!< offset from start of parent matrix
71 
72 	public:
73 		/** NULL constructor.  */
SubmatrixAdapter()74 		SubmatrixAdapter() :
75 			_Mat(NULL),
76 			_row(0),
77 			_col(0),
78 			_r0(0),
79 			_c0(0),
80 			_stride(0),
81 			_off(0){}
82 
83 		/** Constructor from an existing @refMatrix
84 		 * \param M Pointer to @ref Matrix of which to construct submatrix
85 		 */
SubmatrixAdapter(const _Matrix & M)86 		SubmatrixAdapter(const _Matrix& M) :
87 			_Mat(&(const_cast<_Matrix&>(M))),
88 			_row(M.rowdim()),
89 			_col(M.coldim()),
90 			_r0(0),
91 			_c0(0),
92 			_stride(M.coldim()),
93 			_off(0){}
94 
95 		/** Constructor from an existing @ref Matrix and dimensions.
96 		 * \param M Pointer to @ref Matrix of which to construct submatrix
97 		 * \param row Starting row
98 		 * \param col Starting column
99 		 * \param Rowdim Row dimension
100 		 * \param Coldim Column dimension
101 		 */
SubmatrixAdapter(const _Matrix & M,size_t row,size_t col,size_t Rowdim,size_t Coldim)102 		SubmatrixAdapter(
103 			const _Matrix& M,
104 			size_t row,
105 			size_t col,
106 			size_t Rowdim,
107 			size_t Coldim) :
108 			//Init list starts here
109 			_Mat(&(const_cast<_Matrix&>(M))),
110 			_row(Rowdim),
111 			_col(Coldim),
112 			_r0(row),
113 			_c0(col),
114 			_stride(M.coldim()),
115 			_off(row * _stride + col){}
116 
117 		//! BB constructor to reduce warnings in clang
SubmatrixAdapter(const _Matrix & M,int row,int col,int Rowdim,int Coldim)118 		SubmatrixAdapter(
119 			const _Matrix& M,
120 			int row,
121 			int col,
122 			int Rowdim,
123 			int Coldim) :
124 			//Init list starts here
125 			_Mat(&(const_cast<_Matrix&>(M))),
126 			_row((size_t) Rowdim),
127 			_col((size_t) Coldim),
128 			_r0((size_t) row),
129 			_c0((size_t) col),
130 			_stride( M.coldim()),
131 			_off((size_t) row * _stride + (size_t)col){}
132 
133 		/** Constructor from an existing @ref SubmatrixAdapter
134 		 * \param SM Pointer to @ref SubmatrixAdapter of which to construct submatrix
135 		 */
SubmatrixAdapter(const SubmatrixAdapter<_Matrix> & SM)136 		SubmatrixAdapter(const SubmatrixAdapter<_Matrix>& SM) :
137 			_Mat(SM._Mat),
138 			_row(SM._row),
139 			_col(SM._col),
140 			_r0(SM._r0),
141 			_c0(SM._c0),
142 			_stride(SM._stride),
143 			_off(SM._off){}
144 
145 		/** Constructor from an existing submatrix and dimensions
146 		 * @param SM Constant reference to SubmatrixAdapter from which to
147 		 *           construct submatrix
148 		 * @param rowbeg Starting row
149 		 * @param colbeg Starting column
150 		 * @param Rowdim Row dimension
151 		 * @param Coldim Column dimension
152 		 */
SubmatrixAdapter(const SubmatrixAdapter<_Matrix> & SM,size_t row,size_t col,size_t Rowdim,size_t Coldim)153 		SubmatrixAdapter(
154 			const SubmatrixAdapter<_Matrix>& SM,
155 			size_t row,
156 			size_t col,
157 			size_t Rowdim,
158 			size_t Coldim) :
159 			//Init list starts here
160 			_Mat(SM._Mat),
161 			_row(Rowdim),
162 			_col(Coldim),
163 			_r0(SM._r0 + row),
164 			_c0(SM._c0 + col),
165 			_stride(SM._stride),
166 			_off(SM._off + (row * _stride + col)){}
167 
168 		//! BB constructor to reduce warnings in clang
SubmatrixAdapter(const SubmatrixAdapter<_Matrix> & SM,int row,int col,int Rowdim,int Coldim)169 		SubmatrixAdapter(
170 			const SubmatrixAdapter<_Matrix>& SM,
171 			int row,
172 			int col,
173 			int Rowdim,
174 			int Coldim) :
175 			//Init list starts here
176 			_Mat(SM._Mat),
177 			_row((size_t)Rowdim),
178 			_col((size_t)Coldim),
179 			_r0(SM._r0 + (size_t)row),
180 			_c0(SM._c0 + (size_t)col),
181 			_stride(SM._stride),
182 			_off(SM._off + ((size_t)row * _stride + (size_t)col)){}
183 
184 
185 		/** Get the number of rows in the matrix
186 		 * @return Number of rows in matrix
187 		 */
rowdim()188 		size_t rowdim() const{
189 			return _row;
190 		}
191 
192 		/** Get the number of columns in the matrix
193 		 * @return Number of columns in matrix
194 		 */
coldim()195 		size_t coldim() const{
196 			return _col;
197 		}
198 
199 		/*! Get the stride of the matrix.
200 		 * @return stride of submatrix (number of cols of parent matrix)
201 		 */
getStride()202 		size_t getStride() const{
203 			return _stride;
204 		}
205 
206 		/** Set the entry at (i, j).
207 		 * @param i Row index of entry, 0...rowdim() - 1
208 		 * @param j Column index of entry, 0...coldim() - 1
209 		 * @param a_ij Element to set
210 		 */
setEntry(size_t i,size_t j,const Element & a_ij)211 		const Element& setEntry(size_t i, size_t j, const Element& a_ij){
212 			return _Mat->setEntry(_r0 + i, _c0 + j, a_ij);
213 		}
214 
215 		/** Get a writeable reference to an entry in the matrix.
216 		 * @param i Row index of entry,  0...rowdim() - 1
217 		 * @param j Column index of entry, 0...coldim() - 1
218 		 * @return Reference to matrix entry
219 		 */
refEntry(size_t i,size_t j)220 		Element& refEntry(size_t i, size_t j){
221 			return _Mat->refEntry(_r0 + i, _c0 + j);
222 		}
223 
224 		/** Get a read-only individual entry from the matrix.
225 		 * @param i Row index of entry,  0...rowdim() - 1
226 		 * @param j Column index of entry, 0...coldim() - 1
227 		 * @return Const reference to matrix entry
228 		 */
getEntry(size_t i,size_t j)229 		const Element& getEntry(size_t i, size_t j) const{
230 			return _Mat->getEntry(_r0 + i, _c0 + j);
231 		}
232 
233 		/** Get an entry and store it in the given value.
234 		 * This form is more in the Linbox style and is provided for interface
235 		 * compatibility with other parts of the library
236 		 * @param x Element in which to store result
237 		 * @param i Row index of entry,  0...rowdim() - 1
238 		 * @param j Column index of entry, 0...coldim() - 1
239 		 * @return Reference to x
240 		 */
getEntry(Element & x,size_t i,size_t j)241 		Element& getEntry(Element& x, size_t i, size_t j){
242 			return _Mat->getEntry(x, _r0 + i, _c0 + j);
243 		}
244 
245 		/** Access the parent matrix
246 		 * @return Reference to _Mat
247 		 */
getMatrix()248 		_Matrix& getMatrix(){
249 			return *_Mat;
250 		}
251 	};
252 
253 	/**
254 	 * Interface for all functionnalities provided
255 	 * for BlasMatrix using GPUs.
256 	 * @internal
257 	 * Done through specialization of some of the member funcions
258 	 * defined below.  Otherwise, by default the single processor
259  	 * BlasMatrixDomain funcions are invoked.
260 	 */
261 	template <class Field_>
262 	class OpenCLMatrixDomain {
263 
264 	public:
265 		typedef Field_                          Field;
266 		typedef typename Field::Element         Element;
267 		typedef BlasMatrix<Field,typename Vector<Field>::Dense >               Matrix;
268 #ifdef __LINBOX_HAVE_OCL
269 		friend class OpenCLMatrixDomainFactory;
270 #endif
271 
272 	protected:
273 
274 		const Field& _F;
275 
276 #ifdef __LINBOX_HAVE_OCL
277 		//OpenCL specific variables
278 		cl_context context;
279 		cl_device_id device;
280 		cl_command_queue commandQue;
281 		cl_int errcode;
282 
283 		//Storage for memory levels
284 		unsigned long memCapacity;
285 		unsigned long maxBufferSize;
286 
287 		//Container type flag
288 		bool GPUcontainer;
289 		bool CPUcontainer;
290 		bool setupCorrect;
291 		bool doubleSupported;
292 
293 		//Storage for kernels and flags for availability
294 		cl_kernel dpKernels[20];
295 		bool dpKernelsAvailable[20];
296 		cl_kernel spKernels[20];
297 		bool spKernelsAvailable[20];
298 
299 		//ID number assigned by OpenCLMatrixDomainFactory
300 		unsigned int IDnum;
301 
302 		//Mutex
303 		pthread_mutex_t* deviceLock;
304 
305 		/**
306 		 * @internal
307 		 * Initializes the OpenCL compute environment
308 		 */
309 		void oclMatrixDomainAcquire();
310 
311 		/**
312 		 * @internal
313 		 * Releases OpenCL cumpute resources
314 		 */
315 		void oclMatrixDomainRelease(unsigned int IDnum);
316 
317 		/**
318 		 * @internal
319 		 * Checks to see if the memory levels required are possible
320 		 */
321 		template <class Operand1, class Operand2, class Operand3>
322 		bool oclMemCheck(
323 			Operand1& D,
324 			const Operand2& A,
325 			const Operand3& B,
326 			const Operand1& C) const;
327 
328 		template <class Operand1>
329 		bool oclMemCheck(
330 			Operand1& D,
331 			Operand1& A,
332 			Operand1& B,
333 			Operand1& C) const;
334 
335 		/**
336 		 * @internal
337 		 * OpenCL memory management functions
338 		 */
339 		template <typename T, class Operand1>
340 		cl_mem oclCreateMatrixBuffer(Operand1 &matrix) const;
341 
342 		template <typename T, class Operand1>
343 		cl_mem oclCreateAndLoadMatrixBuffer(const Operand1 &matrix) const;
344 
345 		template <typename T, class Operand2>
346 		Operand2& oclReadMatrixBuffer(cl_mem buffer, Operand2 &matrix) const;
347 
348 		template <typename T, class Operand1>
349 		cl_mem oclPadMatrix(
350 			cl_mem matrixBuffer,
351 			int matrixBufferSize,
352 			int newDimX,
353 			const Operand1 &matrix) const;
354 
355 		template <typename T, class Operand1>
356 		Operand1& oclDepadMatrix(
357 			cl_mem matrixBuffer,
358 			int matrixBufferSize,
359 			int outputSize,
360 			int newDimX,
361 			Operand1& matrix) const;
362 
363 		/**
364 		 * @internal
365 		 * Functions to call the passed kernel on the passed buffers
366 		 */
367 		template <typename T, typename U>
368 		void oclCallKernel(
369 			cl_mem bufferC,
370 			cl_mem bufferA,
371 			cl_mem bufferB,
372 			int widthA,
373 			int heightA,
374 			int widthB,
375 			T p,
376 			cl_kernel selectedKernel) const;
377 
378 		template <typename T, typename U>
379 		void oclCallKernel(
380 			cl_mem bufferD,
381 			cl_mem bufferA,
382 			cl_mem bufferB,
383 			cl_mem bufferC,
384 			int widthA,
385 			int heightA,
386 			int widthB,
387 			T p,
388 			cl_kernel selectedKernel) const;
389 
390 		template <typename T, typename U>
391 		void oclCallKernel(
392 			cl_mem bufferD,
393 			cl_mem bufferA,
394 			cl_mem bufferB,
395 			cl_mem bufferC,
396 			T alpha,
397 			T beta,
398 			int widthA,
399 			int heightA,
400 			int widthB,
401 			T p,
402 			cl_kernel selectedKernel) const;
403 
404 		/**
405 		 * @internal
406 		 * Functions to partition the matrices into submatrix views
407 		 */
408 		template <class Operand1, class Operand2, class Operand3>
409 		std::vector<int> oclPartition(
410 			Operand1& C,
411 			const Operand2& A,
412 			const Operand3& B,
413 			std::vector<SubmatrixAdapter<Operand1> >& VC,
414 			std::vector<SubmatrixAdapter<Operand2> >& VA,
415 			std::vector<SubmatrixAdapter<Operand3> >& VB) const;
416 
417 		template <class Operand1, class Operand2, class Operand3>
418 		std::vector<int> oclPartition(
419 			Operand1& D,
420 			const Operand2& A,
421 			const Operand3& B,
422 			const Operand1& C,
423 			std::vector<SubmatrixAdapter<Operand1> >& VD,
424 			std::vector<SubmatrixAdapter<Operand2> >& VA,
425 			std::vector<SubmatrixAdapter<Operand3> >& VB,
426 			std::vector<SubmatrixAdapter<Operand1> >& VC) const;
427 
428 		void printClErrstring(cl_int err) const;
429 #else
430 		bool setupCorrect;
431 #endif
432 	public:
433 
434 		//! Constructor of OpenCLDomain.
OpenCLMatrixDomain(const Field & F)435 		OpenCLMatrixDomain(const Field& F ) : _F(F), setupCorrect(false){
436 
437 #ifndef NDEBUG
438 			if(!Givaro::Protected::probab_prime(_F.characteristic())){
439 				std::cout << " *** WARNING *** " << std::endl;
440 				std::cout << " You are using a OpenCL Matrix Domain"
441 				          << " where your field is not prime "
442 				          << std::endl;
443 			}
444 #endif
445 
446 #ifdef __LINBOX_HAVE_OCL
447 			//Initialize OpenCL environment
448 			oclMatrixDomainAcquire();
449 #endif
450 		}
451 
452 		//! Copy constructor
OpenCLMatrixDomain(const OpenCLMatrixDomain<Field> & OMD)453 		OpenCLMatrixDomain(const OpenCLMatrixDomain<Field> & OMD) :
454 			_F(OMD._F),
455 			setupCorrect(false){
456 
457 #ifndef NDEBUG
458 			if(!Givaro::Protected::probab_prime(_F.characteristic())){
459 				std::cout << " *** WARNING *** " << std::endl;
460 				std::cout << " You are using a OpenCL Matrix Domain"
461 				          << " where your field is not prime "
462 				          << std::endl;
463 			}
464 #endif
465 
466 #ifdef __LINBOX_HAVE_OCL
467 			//Initialize OpenCL environment
468 			oclMatrixDomainAcquire();
469 #endif
470 		}
471 
472 		//! Deconstructor
~OpenCLMatrixDomain()473 		~OpenCLMatrixDomain(){
474 #ifdef __LINBOX_HAVE_OCL
475 			oclMatrixDomainRelease(IDnum);
476 #endif
477 		}
478 
479 		//! Field accessor
field()480 		const Field& field() const{
481 			return _F;
482 		}
483 
484 		/*
485 		 * Basics operation available matrix respecting BlasMatrix interface
486 		 */
487 
488 		//! multiplication.
489 		//! C = A*B
490 		template <class Operand1, class Operand2, class Operand3>
mul(Operand1 & C,const Operand2 & A,const Operand3 & B)491 		Operand1& mul(Operand1& C, const Operand2& A, const Operand3& B) const{
492 			return BlasMatrixDomainMul<Field,Operand1,Operand2,Operand3>()(_F,C,A,B);
493 		}
494 
495 		//! addition.
496 		//! C = A+B
497 		template <class Operand1, class Operand2, class Operand3>
add(Operand1 & C,const Operand2 & A,const Operand3 & B)498 		Operand1& add(Operand1& C, const Operand2& A, const Operand3& B) const{
499 			return BlasMatrixDomainAdd<Field,Operand1,Operand2,Operand3>()(_F,C,A,B);
500 		}
501 
502 		//! copy.
503 		//! B = A
504 		template <class Operand1, class Operand2>
copy(Operand1 & B,const Operand2 & A)505 		Operand1& copy(Operand1& B, const Operand2& A) const{
506 			return BlasMatrixDomainCopy<Field,Operand1,Operand2>()(_F,B,A);
507 		}
508 
509 		//! substraction
510 		//! C = A-B
511 		template <class Operand1, class Operand2, class Operand3>
sub(Operand1 & C,const Operand2 & A,const Operand3 & B)512 		Operand1& sub(Operand1& C, const Operand2& A, const Operand3& B) const{
513 			return BlasMatrixDomainSub<Field,Operand1,Operand2,Operand3>()(_F,C,A,B);
514 		}
515 
516 		//! substraction (in place)
517 		//! C -= B
518 		template <class Operand1, class Operand3>
subin(Operand1 & C,const Operand3 & B)519 		Operand1& subin(Operand1& C, const Operand3& B) const{
520 			return BlasMatrixDomainSubin<Field,Operand1,Operand3>()(_F,C,B);
521 		}
522 
523 		//! addition (in place)
524 		//! C += B
525 		template <class Operand1, class Operand3>
addin(Operand1 & C,const Operand3 & B)526 		Operand1& addin(Operand1& C, const Operand3& B) const{
527 			return BlasMatrixDomainAddin<Field,Operand1,Operand3>()(_F,C,B);
528 		}
529 
530 		//! multiplication with scaling.
531 		//! C = alpha.A*B
532 		template <class Operand1, class Operand2, class Operand3>
mul(Operand1 & C,const Element & alpha,const Operand2 & A,const Operand3 & B)533 		Operand1& mul(
534 			Operand1& C,
535 			const Element& alpha,
536 			const Operand2& A,
537 			const Operand3& B) const{
538 
539 			return muladdin(_F.zero,C,alpha,A,B);
540 		}
541 
542 		//! In place multiplication.
543 		//! A = A*B
544 		template <class Operand1, class Operand2>
mulin_left(Operand1 & A,const Operand2 & B)545 		Operand1& mulin_left(Operand1& A, const Operand2& B) const{
546 			return BlasMatrixDomainMulin<Field,Operand1,Operand2>()(_F,A,B);
547 		}
548 
549 		//! In place multiplication.
550 		//! B = A*B
551 		template <class Operand1, class Operand2>
mulin_right(const Operand1 & A,Operand2 & B)552 		Operand2& mulin_right(const Operand1& A, Operand2& B) const{
553 			return BlasMatrixDomainMulin<Field,Operand2,Operand1>()(_F,A,B);
554 		}
555 
556 		//! axpy.
557 		//! D = A*B + C
558 		template <class Operand1, class Operand2, class Operand3>
axpy(Operand1 & D,const Operand2 & A,const Operand3 & B,const Operand1 & C)559 		Operand1& axpy(
560 			Operand1& D,
561 			const Operand2& A,
562 			const Operand3& B,
563 			const Operand1& C) const{
564 
565 			return muladd(D,_F.one,C,_F.one,A,B);
566 		}
567 
568 		//! axpyin.
569 		//! C += A*B
570 		template <class Operand1, class Operand2, class Operand3>
axpyin(Operand1 & C,const Operand2 & A,const Operand3 & B)571 		Operand1& axpyin(Operand1& C, const Operand2& A, const Operand3& B) const{
572 			return muladdin(_F.one,C,_F.one,A,B);
573 		}
574 
575 		//! maxpy.
576 		//! D = C - A*B
577 		template <class Operand1, class Operand2, class Operand3>
maxpy(Operand1 & D,const Operand2 & A,const Operand3 & B,const Operand1 & C)578 		Operand1& maxpy(
579 			Operand1& D,
580 			const Operand2& A,
581 			const Operand3& B,
582 			const Operand1& C) const{
583 
584 			return muladd(D,_F.one,C,_F.mOne,A,B);
585 		}
586 
587 		//! maxpyin.
588 		//! C -= A*B
589 		template <class Operand1, class Operand2, class Operand3>
maxpyin(Operand1 & C,const Operand2 & A,const Operand3 & B)590 		Operand1& maxpyin(Operand1& C, const Operand2& A, const Operand3& B) const{
591 			return muladdin(_F.one,C,_F.mOne,A,B);
592 		}
593 
594 		//! axmy.
595 		//! D= A*B - C
596 		template <class Operand1, class Operand2, class Operand3>
axmy(Operand1 & D,const Operand2 & A,const Operand3 & B,const Operand1 & C)597 		Operand1& axmy(
598 			Operand1& D,
599 			const Operand2& A,
600 			const Operand3& B,
601 			const Operand1& C) const{
602 
603 			return muladd(D,_F.mOne,C,_F.one,A,B);
604 		}
605 
606 		//! axmyin.
607 		//! C = A*B - C
608 		template <class Operand1, class Operand2, class Operand3>
axmyin(Operand1 & C,const Operand2 & A,const Operand3 & B)609 		Operand1& axmyin(Operand1& C, const Operand2& A, const Operand3& B) const{
610 			return muladdin(_F.mOne,C,_F.one,A,B);
611 		}
612 
613 		//!  general matrix-matrix multiplication and addition with scaling.
614 		//! D= beta.C + alpha.A*B
615 		template <class Operand1, class Operand2, class Operand3>
muladd(Operand1 & D,const Element & beta,const Operand1 & C,const Element & alpha,const Operand2 & A,const Operand3 & B)616 		Operand1& muladd(
617 			Operand1& D,
618 			const Element& beta,
619 			const Operand1& C,
620 			const Element& alpha,
621 			const Operand2& A,
622 			const Operand3& B) const{
623 
624 			return BlasMatrixDomainMulAdd<Operand1,Operand2,Operand3>()(
625 				D,
626 				beta,
627 				C,
628 				alpha,
629 				A,
630 				B);
631 		}
632 
633 		//! muladdin.
634 		//! C= beta.C + alpha.A*B.
635 		template <class Operand1, class Operand2, class Operand3>
muladdin(const Element & beta,Operand1 & C,const Element & alpha,const Operand2 & A,const Operand3 & B)636 		Operand1& muladdin(
637 			const Element& beta,
638 			Operand1& C,
639 			const Element& alpha,
640 			const Operand2& A,
641 			const Operand3& B) const{
642 
643 			return BlasMatrixDomainMulAdd<Operand1,Operand2,Operand3>()(
644 				_F,
645 				beta,
646 				C,
647 				alpha,
648 				A,
649 				B);
650 		}
651 
652 		/*!
653 		 * @name Solutions available for matrix respecting BlasMatrix interface
654 		 */
655 		//@{
656 
657 		//! Inversion
658 		template <class Matrix>
inv(Matrix & Ainv,const Matrix & A)659 		Matrix& inv( Matrix &Ainv, const Matrix &A) const{
660 			BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A);
661 			return Ainv;
662 		}
663 
664 		//! Inversion (in place)
665 		template <class Matrix>
invin(Matrix & Ainv,Matrix & A)666 		Matrix& invin( Matrix &Ainv, Matrix &A) const{
667 			BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A);
668 			return Ainv;
669 		}
670 
671 		//! Inversion (the matrix A is modified)
672 		template <class Matrix>
invin(Matrix & A)673 		Matrix& invin(Matrix &A) const{
674 			Matrix tmp(A.rowdim(), A.coldim());
675 			tmp = A;
676 			BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,A,tmp);
677 			return A;
678 		}
679 
680 		/*! Division.
681 		 * C = A B^{-1}  ==>  C . B = A
682 		 */
683 		template <class Matrix>
div(Matrix & C,const Matrix & A,const Matrix & B)684 		Matrix& div(Matrix &C, const Matrix &A, const Matrix &B) const{
685 			return this->right_solve(C,B,A);
686 		}
687 
688 
689 		//! Inversion w singular check
690 		template <class Matrix>
inv(Matrix & Ainv,const Matrix & A,int & nullity)691 		Matrix& inv(Matrix &Ainv, const Matrix &A, int& nullity) const{
692 			nullity = BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A);
693 			return Ainv;
694 		}
695 
696 		//! Inversion (the matrix A is modified) w singular check
697 		template <class Matrix>
invin(Matrix & Ainv,Matrix & A,int & nullity)698 		Matrix& invin(Matrix &Ainv, Matrix &A, int& nullity) const{
699 			nullity = BlasMatrixDomainInv<Field,Matrix,Matrix>()(_F,Ainv,A);
700 			return Ainv;
701 		}
702 
703 		//! Rank
704 		template <class Matrix>
rank(const Matrix & A)705 		unsigned int rank(const Matrix &A) const{
706 			return BlasMatrixDomainRank<Field,Matrix>()(_F,A);
707 		}
708 
709 		//! in-place Rank (the matrix is modified)
710 		template <class Matrix>
rankInPlace(Matrix & A)711 		unsigned int rankInPlace(Matrix &A) const{
712 			return BlasMatrixDomainRank<Field, Matrix>()(_F,A);
713 		}
714 
715 		//! determinant
716 		template <class Matrix>
det(const Matrix & A)717 		Element det(const Matrix &A) const{
718 			return BlasMatrixDomainDet<Field, Matrix>()(_F,A);
719 		}
720 
721 		//! in-place Determinant (the matrix is modified)
722 		template <class Matrix>
detInPlace(Matrix & A)723 		Element detInPlace(Matrix &A) const{
724 			return BlasMatrixDomainDet<Field, Matrix>()(_F,A);
725 		}
726 		//@}
727 
728 		/*!
729 		 * @name Solvers for Matrix (respecting BlasMatrix interface)
730 		 * with Operand as right or left hand side
731 		 */
732 		//@{
733 		//! linear solve with matrix right hand side.
734 		//! AX=B
735 		template <class Operand, class Matrix>
left_solve(Operand & X,const Matrix & A,const Operand & B)736 		Operand& left_solve (Operand& X, const Matrix& A, const Operand& B) const{
737 			return BlasMatrixDomainLeftSolve<Field,Operand,Matrix>()(_F,X,A,B);
738 		}
739 
740 		//! linear solve with matrix right hand side, the result is stored in-place in B.
741 		//! @pre A must be square
742 		//! AX=B , (B<-X)
743 		template <class Operand,class Matrix>
left_solve(const Matrix & A,Operand & B)744 		Operand& left_solve (const Matrix& A, Operand& B) const{
745 			return BlasMatrixDomainLeftSolve<Field,Operand,Matrix>()(_F,A,B);
746 		}
747 
748 		//! linear solve with matrix right hand side.
749 		//! XA=B
750 		template <class Operand, class Matrix>
right_solve(Operand & X,const Matrix & A,const Operand & B)751 		Operand& right_solve (Operand& X, const Matrix& A, const Operand& B) const{
752 			return BlasMatrixDomainRightSolve<Field,Operand,Matrix>()(_F,X,A,B);
753 		}
754 
755 		//! linear solve with matrix right hand side, the result is stored in-place in B.
756 		//! @pre A must be square
757 		//! XA=B , (B<-X)
758 		template <class Operand, class Matrix>
right_solve(const Matrix & A,Operand & B)759 		Operand& right_solve (const Matrix& A, Operand& B) const{
760 			return BlasMatrixDomainRightSolve<Field,Operand,Matrix>()(_F,A,B);
761 		}
762 
763 		//! minimal polynomial computation.
764 		template <class Polynomial, class Matrix>
minpoly(Polynomial & P,const Matrix & A)765 		Polynomial& minpoly( Polynomial& P, const Matrix& A ) const{
766 			return BlasMatrixDomainMinpoly<Field, Polynomial, Matrix>()(_F,P,A);
767 		}
768 
769 		//! characteristic polynomial computation.
770 		template <class Polynomial,  class Matrix >
charpoly(Polynomial & P,const Matrix & A)771 		Polynomial& charpoly( Polynomial& P, const Matrix& A ) const{
772 
773 			commentator().start ("Givaro::Modular Dense Charpoly ", "MDCharpoly");
774 			std::list<Polynomial> P_list;
775 			P_list.clear();
776 			BlasMatrixDomainCharpoly<Field, std::list<Polynomial>, Matrix >()(
777 				_F,
778 				P_list,
779 				A);
780 
781 			Polynomial tmp(A.rowdim() + 1);
782 			typename std::list<Polynomial>::iterator it = P_list.begin();
783 			P = *(it++);
784 			while(it != P_list.end()){
785 				// Waiting for an implementation of a domain of polynomials
786 				mulpoly(tmp, P, *it);
787 				P = tmp;
788 				//delete &(*it);
789 				++it;
790 			}
791 			commentator().stop ("done", NULL, "MDCharpoly");
792 
793 			return P;
794 		}
795 
796 		//! characteristic polynomial computation.
797 		template <class Polynomial, class Matrix >
charpoly(std::list<Polynomial> & P,const Matrix & A)798 		std::list<Polynomial>& charpoly(
799 			std::list<Polynomial>& P,
800 			const Matrix& A ) const{
801 
802 			return BlasMatrixDomainCharpoly<
803 				Field,
804 				std::list<Polynomial>,
805 				Matrix >()(_F,P,A);
806 		}
807 
808 
809 		//private:
810 		//! @todo Temporary: waiting for an implementation of a domain of polynomial
811 		template<class Polynomial>
mulpoly(Polynomial & res,const Polynomial & P1,const Polynomial & P2)812 		Polynomial& mulpoly(
813 			Polynomial &res,
814 			const Polynomial & P1,
815 			const Polynomial & P2) const{
816 
817 			res.resize(P1.size() + P2.size() - 1);
818 
819 			for(int i = 0; i < res.size(); i++){
820 				_F.assign(res[i],_F.zero);
821 			}
822 
823 			for(int i = 0; i < P1.size(); i++){
824 				for(int j = 0; j < P2.size(); j++){
825 					_F.axpyin(res[i + j],P1[i],P2[j]);
826 				}
827 			}
828 
829 			return res;
830 		}
831 		//@}
832 
833 		template<class Matrix1, class Matrix2>
areEqual(const Matrix1 & A,const Matrix2 & B)834 		bool areEqual(const Matrix1 & A, const Matrix2 & B){
835 			if((A.rowdim() != B.rowdim()) || (A.coldim() != B.coldim())){
836 				return false ;
837 			}
838 
839 			for(size_t i = 0 ; i < A.rowdim() ; ++i){
840 				for(size_t j = 0 ; j < A.coldim() ; ++j){
841 					if(!_F.areEqual(A.getEntry(i,j),B.getEntry(i,j))){ //!@bug use refs
842 						return false ;
843 					}
844 				}
845 			}
846 
847 			return true ;
848 		}
849 
850 		template<class Matrix>
setIdentity(Matrix & I)851 		void setIdentity(Matrix & I){
852 			for(size_t i = 0 ; i < I.rowdim() ; ++i){
853 				for(size_t j = 0 ; j < I.coldim() ; ++j){
854 					if(i == j){
855 						I.setEntry(i,j,_F.one);
856 					}
857 					else{
858 						I.setEntry(i,j,_F.zero);
859 					}
860 				}
861 			}
862 		}
863 
864 		template<class Matrix>
setZero(Matrix & I)865 		void setZero(Matrix & I){
866 			// use Iterator
867 			for(size_t i = 0 ; i < I.rowdim() ; ++i){
868 				for(size_t j = 0 ; j < I.coldim() ; ++j){
869 					I.setEntry(i,j,_F.zero);
870 				}
871 			}
872 		}
873 
874 		template<class Matrix1>
isZero(const Matrix1 & A)875 		bool isZero(const Matrix1 & A){
876 			for(size_t i = 0 ; i < A.rowdim() ; ++i){
877 				for(size_t j = 0 ; j < A.coldim() ; ++j){
878 					if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs
879 						return false;
880 					}
881 				}
882 			}
883 
884 			return true ;
885 		}
886 
887 		template<class Matrix1>
isIdentity(const Matrix1 & A)888 		bool isIdentity(const Matrix1 & A){
889 			if(A.rowdim() != A.coldim()){
890 				return false;
891 			}
892 
893 			for(size_t i = 0 ; i < A.rowdim() ; ++i){
894 				if(!_F.isOne(A.getEntry(i,i))){
895 					return false;
896 				}
897 			}
898 
899 			for(size_t i = 0 ; i < A.rowdim() ; ++i){
900 				for(size_t j = 0 ; j < i ; ++j){
901 					if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs
902 						return false;
903 					}
904 				}
905 			}
906 
907 			for(size_t i = 0 ; i < A.rowdim() ; ++i){
908 				for(size_t j = i + 1 ; j < A.coldim() ; ++j){
909 					if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs
910 						return false;
911 					}
912 				}
913 			}
914 
915 			return true ;
916 		}
917 
918 		template<class Matrix1>
isIdentityGeneralized(const Matrix1 & A)919 		bool isIdentityGeneralized(const Matrix1 & A){
920 			size_t mn = std::min(A.rowdim(),A.coldim());
921 			for(size_t i = 0 ; i < mn ; ++i){
922 				if(!_F.isOne(A.getEntry(i,i))){
923 					return false;
924 				}
925 			}
926 
927 			for(size_t i = 0 ; i < A.rowdim() ; ++i){
928 				for(size_t j = 0 ; j < std::min(i,mn) ; ++j){
929 					if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs
930 						return false;
931 					}
932 				}
933 			}
934 
935 			for(size_t i = 0 ; i < A.rowdim() ; ++i){
936 				for(size_t j = i+1 ; j < A.coldim() ; ++j){
937 					if(!_F.isZero(A.getEntry(i,j))){ //!@bug use refs
938 						return false;
939 					}
940 				}
941 			}
942 
943 			return true;
944 		}
945 
946 	//public:
947 
948 		/** Print matrix.
949 		 * @param  os  Output stream to which matrix is written.
950 		 * @param  A   Matrix.
951 		 * @returns reference to os.
952 		 */
953 		template <class Matrix>
write(std::ostream & os,const Matrix & A)954 		inline std::ostream &write(std::ostream &os, const Matrix &A) const{
955 			return A.write(os, _F);
956 		}
957 
958 		template <class Matrix>
write(std::ostream & os,const Matrix & A,bool maple_format)959 		inline std::ostream &write(std::ostream &os,
960 		                           const Matrix &A,
961 		                           bool maple_format) const{
962 
963 			return A.write(os, _F, maple_format);
964 		}
965 
966 		/** Read matrix
967 		 * @param  is  Input stream from which matrix is read.
968 		 * @param  A   Matrix.
969 		 * @returns reference to is.
970 		 */
971 		template <class Matrix>
read(std::istream & is,Matrix & A)972 		inline std::istream &read(std::istream &is, Matrix &A) const{
973 			return A.read (is, _F);
974 		}
975 
976 	}; /* end of class OpenCLMatrixDomain */
977 
978 } /* end of namespace LinBox */
979 
980 #ifdef __LINBOX_HAVE_OCL
981 	#include "linbox/matrix/matrixdomain/opencl-domain-factory.h"
982 	#include "linbox/matrix/matrixdomain/opencl-domain-util.inl"
983 	#include "linbox/matrix/matrixdomain/opencl-domain-memory.inl"
984 	#include "linbox/matrix/matrixdomain/opencl-domain.inl"
985 #endif
986 
987 #endif // __LINBOX_opencl_matrix_domain_H
988 
989 // Local Variables:
990 // mode: C++
991 // tab-width: 4
992 // indent-tabs-mode: nil
993 // c-basic-offset: 4
994 // End:
995 // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
996