1 #pragma once
2 /*
3   HMat-OSS (HMatrix library, open source software)
4 
5   Copyright (C) 2014-2015 Airbus Group SAS
6 
7   This program is free software; you can redistribute it and/or
8   modify it under the terms of the GNU General Public License
9   as published by the Free Software Foundation; either version 2
10   of the License, or (at your option) any later version.
11 
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16 
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
20 
21   http://github.com/jeromerobert/hmat-oss
22 */
23 
24 /*! \file
25   \ingroup HMatrix
26   \brief Scalar Array type used by the HMatrix library.
27 */
28 #pragma once
29 
30 #include <cstddef>
31 #include "assert.h"
32 #include "data_types.hpp"
33 #include "hmat/hmat.h"
34 
35 namespace hmat {
36 
37 // Forward declaration
38 template<typename T> class Vector;
39 
40 enum class Factorization {
41     NONE = hmat_factorization_none,
42     LU = hmat_factorization_lu,
43     LDLT = hmat_factorization_ldlt,
44     LLT = hmat_factorization_llt,
45 };
46 
47 enum class Side {
48     LEFT,
49     RIGHT,
50 };
51 
52 enum class Uplo {
53     UPPER,
54     LOWER,
55 };
56 
57 enum class Diag {
58     NONUNIT,
59     UNIT,
60 };
61 
62 template<typename T>
63 class FactorizationData {
64 public:
65     Factorization algo;
66     union {  // This union is not to save space but to show that each
67              // algorithm should define its own auxiliary data.
68         int *pivots;
69         Vector<T> *diagonal;
70     } data;
71 };
72 
73 // Convert from hmat_factorization_t into Factorization
74 Factorization convert_int_to_factorization(int);
75 
76 // Convert from Factorization into hmat_factorization_t
77 int convert_factorization_to_int(Factorization);
78 
79 /*! \brief Templated dense Matrix type.
80 
81   The template parameter represents the scalar type of the matrix elements.  The
82   supported types are \a S_t, \a D_t, \a C_t and \a Z_t, as defined in
83   @data_types.hpp.
84  */
85 template<typename T> class ScalarArray {
86   friend class ScalarArray<D_t>; // needed for some methods that manipulate both ScalarArray<T> and ScalarArray<D_t>
87 
88 private:
89   /*! True if the matrix owns its memory, ie has to free it upon destruction */
90   char ownsMemory:1;
91 protected:
92   /// Fortran style pointer (columnwise)
93   T* m;
94   /*! flag for column orthogonality in m[] (it is a pointer because it is copied with m in subsets) */
95   int *is_ortho;
96   /*! True if we own 'is_ortho' (there are cases where we own the flag and not the memory, with the constructor taking a 'T*' as input) */
97   char ownsFlag:1;
98 public:
99   /// Number of rows
100   int rows;
101   /// Number of columns
102   int cols;
103   /*! Leading dimension, as in BLAS */
104   int lda;
105 
106   /** \brief Initialize the ScalarArray with existing ScalarArray.
107 
108       In this case the matrix doesn't own the data (the memory is not
109       freed at the object destruction).
110 
111       \param d a ScalarArray
112    */
ScalarArray(const ScalarArray & d)113   ScalarArray(const ScalarArray& d) : ownsMemory(false), m(d.m), is_ortho(d.is_ortho), ownsFlag(false), rows(d.rows), cols(d.cols), lda(d.lda) {}
114   /** \brief Initialize the matrix with existing data.
115 
116       In this case the matrix doesn't own the data (the memory is not
117       freed at the object destruction).
118 
119       \param _m Pointer to the data
120       \param _rows Number of rows
121       \param _cols Number of cols
122       \param lda Leading dimension, as in BLAS
123    */
124   ScalarArray(T* _m, int _rows, int _cols, int _lda=-1);
125   /** \brief Create an empty matrix, filled with 0s.
126 
127      In this case, the memory is freed when the object is destroyed.
128 
129      \param _rows Number of rows
130      \param _cols Number of columns
131      \param zeroinit Fill the array with zeros
132    */
133   ScalarArray(int _rows, int _cols, bool zeroinit=true);
134   /** \brief Initialize the ScalarArray with subset of existing ScalarArray.
135    */
ScalarArray(const ScalarArray & d,const int rowsOffset,const int rowsSize,const int colsOffset,const int colsSize)136   ScalarArray(const ScalarArray& d, const int rowsOffset, const int rowsSize, const int colsOffset, const int colsSize): ownsMemory(false), m(d.m+rowsOffset+(size_t)colsOffset*d.lda), is_ortho(d.is_ortho), ownsFlag(false), rows(rowsSize), cols(colsSize), lda(d.lda){}
137 
138   ~ScalarArray();
139 
140   /** This <- 0.
141    */
142   void clear();
143   /** \brief Returns number of allocated zeros
144    */
145   size_t storedZeros() const;
146   /** \brief this *= alpha.
147 
148       \param alpha The scaling factor.
149    */
150   void scale(T alpha);
151   /** \brief Transpose in place.
152    */
153   void transpose();
154   /** \brief Conjugate in place.
155    */
156   void conjugate();
157   /** Return a copy of this.
158    */
159   ScalarArray<T>* copy(ScalarArray<T>* result = NULL) const;
160   /** \brief Return a new matrix that is a transposed version of this.
161 
162     This new matrix is created in \a result (if provided)
163    */
164   ScalarArray<T>* copyAndTranspose(ScalarArray<T>* result = NULL) const;
165   /**  Returns a pointer to a new ScalarArray representing a subset of row indices.
166        Columns and leading dimension are unchanged.
167        \param rowOffset offset to apply on the rows
168        \param rowSize new number of rows
169        \return pointer to a new ScalarArray.
170    */
171   ScalarArray<T> rowsSubset(const int rowOffset, const int rowSize) const ;
172 
173   /** this = alpha * op(A) * op(B) + beta * this
174 
175       Standard "GEMM" call, as in BLAS.
176 
177       \param transA 'N' or 'T', as in BLAS
178       \param transB 'N' or 'T', as in BLAS
179       \param alpha alpha
180       \param a the matrix A
181       \param b the matrix B
182       \param beta beta
183    */
184   void gemm(char transA, char transB, T alpha, const ScalarArray<T>* a,
185             const ScalarArray<T>* b, T beta);
186   /*! Copy a matrix A into 'this' at offset (rowOffset, colOffset) (indices start at 0).
187 
188     \param a the matrix A
189     \param rowOffset the row offset
190     \param colOffset the column offset
191    */
192   void copyMatrixAtOffset(const ScalarArray<T>* a, int rowOffset, int colOffset);
193 
194   /*!
195    * \brief resize Change the number of column.
196    * When increasing the existing values are kept and the added column contains
197    * undertermined values.
198    * \param col_num the new number of columns
199    */
200   void resize(int col_num);
201   /*! \brief add term by term a random value
202 
203     \param epsilon  x *= (1 + a),  a = epsilon*(1.0-2.0*rand()/(double)RAND_MAX)
204    */
205   void addRand(double epsilon);
206   /*! \brief this += alpha * A
207 
208     \param a the Matrix A
209    */
210   void axpy(T alpha, const ScalarArray<T>* a);
211   /*! \brief Return square of the Frobenius norm of the matrix.
212 
213     \return the matrix norm.
214    */
215   double normSqr() const;
216   /*! \brief Return the Frobenius norm of the matrix.
217 
218     \return the matrix norm.
219    */
220   double norm() const;
221   /*! \brief Return square of the Frobenius norm of the matrix 'this' x B^T.
222 
223     \return the matrix norm.
224    */
225   double norm_abt_Sqr(const ScalarArray<T> &b) const ;
226 
227   /*! \brief Compute dot product between a[i,*] and b[j,*]
228     */
229   T dot_aibj(int i, const ScalarArray<T> &b, int j) const ;
230 
231   /*! \brief Write the matrix to a binary file.
232 
233     \param filename output filename
234    */
235   void toFile(const char *filename) const;
236 
237   void fromFile(const char * filename);
238   /** Simpler accessors for the data.
239 
240       There are 2 types to allow matrix modification or not.
241    */
get(int i=0,int j=0)242   inline T& get(int i=0, int j=0) {
243     // here I might modify the data with this
244     setOrtho(0);
245     return m[i + ((size_t) lda) * j];
246   }
get(int i=0,int j=0) const247   inline const T& get(int i=0, int j=0) const {
248     // here this is not supposed to allow content modification (unless casted into non-const)
249     return m[i + ((size_t) lda) * j];
250   }
251 
252   /** Simpler accessors for the pointer on the data (i,j) in the scalar array.
253 
254       There are 2 types to allow matrix modification or not (const or not).
255    */
ptr(int i=0,int j=0) const256   inline T* ptr(int i=0, int j=0) const {
257     // here I might modify the data with this pointer
258     setOrtho(0);
259     return &m[i + ((size_t) lda) * j];
260   }
const_ptr(int i=0,int j=0) const261   inline const T * const_ptr(int i=0, int j=0) const {
262     // here this pointer is not supposed to allow content modification (unless casted into non-const)
263     return &m[i + ((size_t) lda) * j];
264   }
265 
266   /*! Check the matrix for the presence of NaN numbers.
267 
268     If a NaN is found, an assertion is triggered.
269    */
270   void checkNan() const;
271 
272   /*! Returns true if the matrix contains only zero values.
273    */
274   bool isZero() const ;
275 
276   size_t memorySize() const;
277 
278   /*! \brief Return a short string describing the content of this ScalarArray for debug (like: "ScalarArray [320 x 100] norm=22.34758")
279     */
description() const280   std::string description() const {
281     std::ostringstream convert;   // stream used for the conversion
282     convert << "ScalarArray [" << rows << " x " << cols << "] norm=" << norm() ;
283     return convert.str();
284   }
285   /*! \brief performs the rank 1 operation this := alpha*x*y**T + this,
286 
287      where alpha is a scalar, x and y are 2 Vector<T> of size 'm' and 'n', and this is a ScalarArray of size m x n
288   */
289   void rankOneUpdate(const T alpha, const ScalarArray<T> &x, const ScalarArray<T> &y);
290 
291   /*! \brief performs the rank 1 operation this := alpha*x*y + this,
292 
293      where alpha is a scalar, x is a Vector<T> of size 'm x 1' , y is a ScalarArray of size 1 x n (a 'horizontal'
294      vector), and 'this' is a ScalarArray of size m x n
295   */
296   void rankOneUpdateT(const T alpha, const ScalarArray<T> &x, const ScalarArray<T> &ty);
297   /*! \brief Write the ScalarArray data 'm' in a stream (FILE*, unix fd, ...)
298     */
299   void writeArray(hmat_iostream writeFunc, void * userData) const;
300 
301   /*! \brief Read the ScalarArray data 'm' from a stream (FILE*, unix fd, ...)
302     */
303   void readArray(hmat_iostream writeFunc, void * userData) ;
304 
305   /*! \brief LU decomposition (in-place)
306     */
307   void luDecomposition(int *pivots);
308 
309   /*! \brief LLT decomposition (in-place)
310     */
311   void lltDecomposition();
312 
313   /*! \brief LDLT decomposition (in-place)
314     */
315   void ldltDecomposition(Vector<T>& diagonal);
316 
317   /*! \brief Solve the system L X = B, with B = X on entry, and L = this.
318 
319     This function requires the matrix to be factored by
320     HMatrix::luDecomposition() beforehand.
321 
322     \param x B on entry, the solution on exit.
323    */
324   void solveLowerTriangularLeft(ScalarArray<T>* x, const FactorizationData<T>& context, Diag diag, Uplo uplo) const;
325 
326   /*! \brief Solve the system X U = B, with B = X on entry, and U = this.
327 
328     This function requires the matrix to be factored by
329     HMatrix::luDecomposition() beforehand.
330 
331     \param x B on entry, the solution on exit.
332    */
333   void solveUpperTriangularRight(ScalarArray<T>* x, const FactorizationData<T>& context, Diag diag, Uplo uplo) const;
334 
335   /*! \brief Solve the system U X = B, with B = X on entry, and U = this.
336 
337     This function requires the matrix to be factored by
338     HMatrix::luDecomposition() beforehand.
339 
340     \param x B on entry, the solution on exit.
341    */
342   void solveUpperTriangularLeft(ScalarArray<T>* x, const FactorizationData<T>& context, Diag diag, Uplo uplo) const;
343 
344   /*! \brief Solve the system U X = B, with B = X on entry, and U = this.
345 
346     This function requires the matrix to be factored by
347     HMatrix::luDecomposition() beforehand.
348 
349     \param x B on entry, the solution on exit.
350    */
351   void solve(ScalarArray<T>* x, const FactorizationData<T>& context) const;
352 
353   /*! \brief Compute the inverse of this in place.
354    */
355   void inverse();
356 
357   /** Makes an SVD of 'this' with LAPACK.
358 
359    If workAroundFailures is true, then the lapack exception thrown by failures in lapack SVD
360    are caught, and a fake result is returned that allows the computation to proceed.
361    If rows<cols, u is identity, sigma is filled with 1, v is 'this^T'
362    If rows>=cols, u is 'this', sigma is filled with 1, v is identity
363    Hence we still have this=U.S.V^T
364       \param u
365       \param sigma
366       \param v
367       \param workAroundFailures: handles the failures in lapack SVD (defaut is false)
368       \return 0 for success, lapack error code otherwise.
369    */
370   int svdDecomposition(ScalarArray<T>** u, Vector<double>** sigma, ScalarArray<T>** v, bool workAroundFailures=false) const;
371 
372   /** Makes an truncated SVD of 'this' with accuracy 'epsilon'.
373 
374    'workAroundFailures' has the same meaning as for svdDecomposition().
375 
376       \param u
377       \param v
378       \param epsilon the accuracy of the approximation
379       \param workAroundFailures: handles the failures in lapack SVD (defaut is false)
380       \return the rank of the approximation
381    */
382   int truncatedSvdDecomposition(ScalarArray<T>** u, ScalarArray<T>** v, double epsilon, bool workAroundFailures=false) const;
383 
384   /*! \brief Orthogonalization between columns of 'this'
385 
386     Columns [0, initialPivot[ of 'this' are supposed orthogonals. We normalize them, then we modify the others
387     columns [initialPivot, cols[ to make them orthogonals to these, by removing the colinear
388     components. This routine is part of QR factorization (classical or MGS style) so it modifies
389     and return the R factor as well in 'resultR'. Only the initialPivot first lines of resultR
390     are modified.
391     This is done using blas3 if env. var. HMAT_MGS_BLAS3 is set.
392     */
393   void orthoColumns(ScalarArray<T> *resultR, int initialPivot) ;
394 
395   /** QR matrix decomposition.
396 
397     Warning: m is modified!
398     tau is now stored in the last column of 'this'
399     \param resultR the R upper triangular bloc (also available in 'this')
400     \param initialPivot the number of initial columns orthogonal in the array
401     \return
402   */
403   void qrDecomposition(ScalarArray<T> *resultR, int initialPivot=0);
404 
405   /** Do the product by Q.
406 
407       this=qr has to be factored using \a qrDecomposition.
408       The arguments side and trans have the same meaning as in the
409       LAPACK xORMQR function. Beware, only the 'L', 'N' case has been
410       tested !
411 
412       \param side either 'L' or 'R', as in xORMQR
413       \param trans either 'N' or 'T' as in xORMQR
414       \param c as in xORMQR
415       \return 0 for success
416    */
417   int productQ(char side, char trans, ScalarArray<T>* c) const;
418 
419 
420   /** Multiplication used in RkMatrix::truncate()
421 
422        A B -> computing "AB^t" with A=this and B full upper triangular
423        (non-unitary diagonal)
424 
425    */
426   void myTrmm(const ScalarArray<T>* bTri);
427 
428   /** modified Gram-Schmidt algorithm of A='this'
429 
430       Computes a QR-decomposition of a matrix A=[a_1,...,a_n] thanks to the
431       modified Gram-Schmidt procedure with column pivoting.
432 
433       The matrix A is overwritten with a matrix Q=[q_1,...,q_r] whose columns are
434       orthonormal and are a basis of Im(A).
435       A pivoting strategy is used to improve stability:
436       Each new qj vector is computed from the vector with the maximal 2-norm
437       amongst the remaining a_k vectors.
438 
439       To further improve stability for each newly computed q_j vector its
440       component is removed from the remaining columns a_k of A.
441 
442       Stopping criterion:
443       whenever the maximal norm of the remaining vectors is smaller than
444       prec * max(||ai||) the algorithm stops and the numerical rank at precision
445       prec is the number of q_j vectors computed.
446 
447       Eventually the computed decomposition is:
448       [a_{perm[0]},...,a_{perm[rank-1]}] = [q_1,...,q_{rank-1}] * [r]
449       where [r] is an upper triangular matrix.
450 
451       If some columns [0..j] are already orthogonal in A, it can be interesting to use
452       these as pivots (and spare orthogonalisation within these columns). The
453       optionnal parameter initialPivot indicates the number of columns [0..initialPivot-1]
454       orthogonal.
455 
456       \param prec is a small parameter describing a relative precision thus
457       0 < prec < 1.
458       WARNING: the lowest precision allowed is 1e-6.
459       \param initialPivot the number of initial columns orthogonal in the array
460       \return rank
461 
462       NB: On exit the orthonormal matrix stored in A is 'full' and not represented
463       as a product of Householder reflectors. OR/ZU-MQR from LAPACK is NOT
464       the way to apply the matrix: one has to use matrix-vector product instead.
465   */
466   int modifiedGramSchmidt(ScalarArray<T> *r, double prec, int initialPivot=0 );
467 
468   /*! \brief B <- B*D or B <- B*D^-1  (or with D on the left).
469 
470     B = this, and D a diagonal matrix (given as a Vector or 1 column ScalarArray).
471 
472      \param d  D
473      \param inverse true : B<-B*D^-1, false B<-B*D
474      \param left true : B<-D*B, false B<-B*D
475   */
476   void multiplyWithDiagOrDiagInv(const ScalarArray<T>* d, bool inverse, Side side) ;
477 
478   /*! \brief B <- B*D
479 
480     B = this, and D a 'double' diagonal matrix (given as a Vector or 1 column ScalarArray).
481 
482      \param d  D
483   */
484   void multiplyWithDiag(const ScalarArray<double>* d) ;
485 
486   /*! \brief Computes if 'this' has orthogonal columns
487 
488     \return true or false
489     */
490   bool testOrtho() const ;
491 
492   /*! \brief Set orthogonality flag
493    */
setOrtho(const int flag) const494   inline void setOrtho(const int flag) const {
495     *is_ortho = flag;
496     static char *test = getenv("HMAT_TEST_ORTHO");
497     if (flag && test) assert(getOrtho() == testOrtho());
498   }
499   /*! \brief Get orthogonality flag
500    */
getOrtho() const501   inline int getOrtho() const {
502     return *is_ortho;
503   }
504 
505 };
506 
507   /*! \brief Templated Vector class = a ScalarArray with 1 column
508 
509     As for \a ScalarArray, the template parameter is the scalar type.
510    */
511   template<typename T> class Vector : public ScalarArray<T> {
512 
513   public:
Vector(T * _m,int _rows)514     Vector(T* _m, int _rows):ScalarArray<T>(_m, _rows, 1){}
Vector(int _rows)515     Vector(int _rows):ScalarArray<T>(_rows, 1){}
516     /** \brief Create Vector with column 'col' of existing ScalarArray
517      */
Vector(const ScalarArray<T> & d,int _col)518     Vector(const ScalarArray<T> &d, int _col):ScalarArray<T>(d, 0, d.rows, _col, 1){}
519     //~Vector(){}
520     /** L2 norm of the vector.
521      */
522     int absoluteMaxIndex(int startIndex = 0) const;
523     /** Compute the dot product of two \Vector.
524 
525         For real-valued vectors, this is the usual dot product. For
526         complex-valued ones, this is defined as:
527            <x, y> = \bar{x}^t \times y
528         as in BLAS
529 
530         \warning DOES NOT work with vectors with >INT_MAX elements
531 
532         \param x
533         \param y
534         \return <x, y>
535      */
536     static T dot(const Vector<T>* x, const Vector<T>* y);
537 
538     /** Simpler accessors for the vector data.
539      */
operator [](std::size_t i)540     inline T& operator[](std::size_t i){
541       return this->get(i);
542     }
operator [](std::size_t i) const543     inline const T& operator[] (std::size_t i) const {
544       return this->get(i);
545     }
546   private:
547     /// Disallow the copy
548     Vector<T>(const Vector<T>& o);
549   };
550 
551 }  // end namespace hmat
552 
553