1 // Gmsh - Copyright (C) 1997-2021 C. Geuzaine, J.-F. Remacle
2 //
3 // See the LICENSE.txt file in the Gmsh root directory for license information.
4 // Please report all issues on https://gitlab.onelab.info/gmsh/gmsh/issues.
5 
6 #include <complex>
7 #include <string.h>
8 #include <algorithm>
9 #include "GmshConfig.h"
10 #include "fullMatrix.h"
11 #include "GmshMessage.h"
12 
13 #if !defined(F77NAME)
14 #define F77NAME(x) (x##_)
15 #endif
16 
17 // Specialisation of fullVector/Matrix operations using BLAS and LAPACK
18 
19 #if defined(HAVE_BLAS) && !defined(HAVE_EIGEN)
20 
21 extern "C" {
22 void F77NAME(daxpy)(int *n, double *alpha, double *x, int *incx, double *y,
23                     int *incy);
24 void F77NAME(zaxpy)(int *n, std::complex<double> *alpha,
25                     std::complex<double> *x, int *incx, std::complex<double> *y,
26                     int *incy);
27 void F77NAME(dcopy)(int *n, double *a, int *inca, double *b, int *incb);
28 void F77NAME(zcopy)(int *n, std::complex<double> *a, int *inca,
29                     std::complex<double> *b, int *incb);
30 void F77NAME(dgemm)(const char *transa, const char *transb, int *m, int *n,
31                     int *k, double *alpha, double *a, int *lda, double *b,
32                     int *ldb, double *beta, double *c, int *ldc);
33 void F77NAME(zgemm)(const char *transa, const char *transb, int *m, int *n,
34                     int *k, std::complex<double> *alpha,
35                     std::complex<double> *a, int *lda, std::complex<double> *b,
36                     int *ldb, std::complex<double> *beta,
37                     std::complex<double> *c, int *ldc);
38 void F77NAME(dgemv)(const char *trans, int *m, int *n, double *alpha, double *a,
39                     int *lda, double *x, int *incx, double *beta, double *y,
40                     int *incy);
41 void F77NAME(zgemv)(const char *trans, int *m, int *n,
42                     std::complex<double> *alpha, std::complex<double> *a,
43                     int *lda, std::complex<double> *x, int *incx,
44                     std::complex<double> *beta, std::complex<double> *y,
45                     int *incy);
46 void F77NAME(dscal)(int *n, double *alpha, double *x, int *incx);
47 void F77NAME(zscal)(int *n, std::complex<double> *alpha,
48                     std::complex<double> *x, int *incx);
49 }
50 
setAll(const fullVector<double> & m)51 template <> void fullVector<double>::setAll(const fullVector<double> &m)
52 {
53   int stride = 1;
54   F77NAME(dcopy)(&_r, m._data, &stride, _data, &stride);
55 }
56 
57 template <>
setAll(const fullVector<std::complex<double>> & m)58 void fullVector<std::complex<double> >::setAll(
59   const fullVector<std::complex<double> > &m)
60 {
61   int stride = 1;
62   F77NAME(zcopy)(&_r, m._data, &stride, _data, &stride);
63 }
64 
65 template <>
axpy(const fullVector<double> & x,double alpha)66 void fullVector<double>::axpy(const fullVector<double> &x, double alpha)
67 {
68   int M = _r, INCX = 1, INCY = 1;
69   F77NAME(daxpy)(&M, &alpha, x._data, &INCX, _data, &INCY);
70 }
71 
72 template <>
axpy(const fullVector<std::complex<double>> & x,std::complex<double> alpha)73 void fullVector<std::complex<double> >::axpy(
74   const fullVector<std::complex<double> > &x, std::complex<double> alpha)
75 {
76   int M = _r, INCX = 1, INCY = 1;
77   F77NAME(zaxpy)(&M, &alpha, x._data, &INCX, _data, &INCY);
78 }
79 
setAll(const fullMatrix<int> & m)80 template <> void fullMatrix<int>::setAll(const fullMatrix<int> &m)
81 {
82   if(_r != m._r || _c != m._c) {
83     Msg::Error("fullMatrix size does not match");
84     return;
85   }
86   int N = _r * _c;
87   for(int i = 0; i < N; ++i) _data[i] = m._data[i];
88 }
89 
setAll(const fullMatrix<double> & m)90 template <> void fullMatrix<double>::setAll(const fullMatrix<double> &m)
91 {
92   if(_r != m._r || _c != m._c) {
93     Msg::Error("fullMatrix size does not match");
94     return;
95   }
96   int N = _r * _c;
97   int stride = 1;
98   F77NAME(dcopy)(&N, m._data, &stride, _data, &stride);
99 }
100 
101 template <>
setAll(const fullMatrix<std::complex<double>> & m)102 void fullMatrix<std::complex<double> >::setAll(
103   const fullMatrix<std::complex<double> > &m)
104 {
105   if(_r != m._r || _c != m._c) {
106     Msg::Error("fullMatrix size does not match");
107     return;
108   }
109   int N = _r * _c;
110   int stride = 1;
111   F77NAME(zcopy)(&N, m._data, &stride, _data, &stride);
112 }
113 
scale(const double s)114 template <> void fullMatrix<double>::scale(const double s)
115 {
116   int N = _r * _c;
117   int stride = 1;
118   double ss = s;
119   F77NAME(dscal)(&N, &ss, _data, &stride);
120 }
121 
scale(const std::complex<double> s)122 template <> void fullMatrix<std::complex<double> >::scale(const std::complex<double> s)
123 {
124   int N = _r * _c;
125   int stride = 1;
126   std::complex<double> ss(s);
127   F77NAME(zscal)(&N, &ss, _data, &stride);
128 }
129 
scale(const int s)130 template <> void fullMatrix<int>::scale(const int s)
131 {
132   for(int i = 0; i < _r * _c; ++i) _data[i] *= s;
133 }
134 
135 template <>
mult(const fullMatrix<double> & b,fullMatrix<double> & c) const136 void fullMatrix<double>::mult(const fullMatrix<double> &b,
137                               fullMatrix<double> &c) const
138 {
139   int M = c.size1(), N = c.size2(), K = _c;
140   int LDA = _r, LDB = b.size1(), LDC = c.size1();
141   double alpha = 1., beta = 0.;
142   F77NAME(dgemm)
143   ("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, &beta, c._data,
144    &LDC);
145 }
146 
147 template <>
mult(const fullMatrix<std::complex<double>> & b,fullMatrix<std::complex<double>> & c) const148 void fullMatrix<std::complex<double> >::mult(
149   const fullMatrix<std::complex<double> > &b,
150   fullMatrix<std::complex<double> > &c) const
151 {
152   int M = c.size1(), N = c.size2(), K = _c;
153   int LDA = _r, LDB = b.size1(), LDC = c.size1();
154   std::complex<double> alpha = 1., beta = 0.;
155   F77NAME(zgemm)
156   ("N", "N", &M, &N, &K, &alpha, _data, &LDA, b._data, &LDB, &beta, c._data,
157    &LDC);
158 }
159 
160 template <>
mult(const fullVector<double> & x,fullVector<double> & y) const161 void fullMatrix<double>::mult(const fullVector<double> &x,
162                               fullVector<double> &y) const
163 {
164   int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
165   double alpha = 1., beta = 0.;
166   F77NAME(dgemv)
167   ("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX, &beta, y._data, &INCY);
168 }
169 
170 template <>
mult(const fullVector<std::complex<double>> & x,fullVector<std::complex<double>> & y) const171 void fullMatrix<std::complex<double> >::mult(
172   const fullVector<std::complex<double> > &x,
173   fullVector<std::complex<double> > &y) const
174 {
175   int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
176   std::complex<double> alpha = 1., beta = 0.;
177   F77NAME(zgemv)
178   ("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX, &beta, y._data, &INCY);
179 }
180 
181 template <>
multAddy(const fullVector<double> & x,fullVector<double> & y) const182 void fullMatrix<double>::multAddy(const fullVector<double> &x,
183                                   fullVector<double> &y) const
184 {
185   int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
186   double alpha = 1., beta = 1.;
187   F77NAME(dgemv)
188   ("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX, &beta, y._data, &INCY);
189 }
190 
191 template <>
multAddy(const fullVector<std::complex<double>> & x,fullVector<std::complex<double>> & y) const192 void fullMatrix<std::complex<double> >::multAddy(
193   const fullVector<std::complex<double> > &x,
194   fullVector<std::complex<double> > &y) const
195 {
196   int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
197   std::complex<double> alpha = 1., beta = 1.;
198   F77NAME(zgemv)
199   ("N", &M, &N, &alpha, _data, &LDA, x._data, &INCX, &beta, y._data, &INCY);
200 }
201 
202 template <>
multOnBlock(const fullMatrix<double> & b,const int ncol,const int fcol,const int alpha_,const int beta_,fullVector<double> & c) const203 void fullMatrix<double>::multOnBlock(const fullMatrix<double> &b,
204                                      const int ncol, const int fcol,
205                                      const int alpha_, const int beta_,
206                                      fullVector<double> &c) const
207 {
208   int M = 1, N = ncol, K = b.size1();
209   int LDA = _r, LDB = b.size1(), LDC = 1;
210   double alpha = alpha_, beta = beta_;
211   F77NAME(dgemm)
212   ("N", "N", &M, &N, &K, &alpha, _data, &LDA, &(b._data[fcol * K]), &LDB, &beta,
213    &(c._data[fcol]), &LDC);
214 }
215 
216 template <>
multWithATranspose(const fullVector<double> & x,double alpha,double beta,fullVector<double> & y) const217 void fullMatrix<double>::multWithATranspose(const fullVector<double> &x,
218                                             double alpha, double beta,
219                                             fullVector<double> &y) const
220 {
221   int M = _r, N = _c, LDA = _r, INCX = 1, INCY = 1;
222   F77NAME(dgemv)
223   ("T", &M, &N, &alpha, _data, &LDA, x._data, &INCX, &beta, y._data, &INCY);
224 }
225 
226 template <>
gemm(const fullMatrix<double> & a,const fullMatrix<double> & b,double alpha,double beta,bool transposeA,bool transposeB)227 void fullMatrix<double>::gemm(const fullMatrix<double> &a,
228                               const fullMatrix<double> &b, double alpha,
229                               double beta, bool transposeA, bool transposeB)
230 {
231   int M = size1(), N = size2(), K = transposeA ? a.size1() : a.size2();
232   int LDA = a.size1(), LDB = b.size1(), LDC = size1();
233   F77NAME(dgemm)
234   (transposeA ? "T" : "N", transposeB ? "T" : "N", &M, &N, &K, &alpha, a._data,
235    &LDA, b._data, &LDB, &beta, _data, &LDC);
236 }
237 
238 template <>
gemm(const fullMatrix<std::complex<double>> & a,const fullMatrix<std::complex<double>> & b,std::complex<double> alpha,std::complex<double> beta,bool transposeA,bool transposeB)239 void fullMatrix<std::complex<double> >::gemm(
240   const fullMatrix<std::complex<double> > &a,
241   const fullMatrix<std::complex<double> > &b, std::complex<double> alpha,
242   std::complex<double> beta, bool transposeA, bool transposeB)
243 {
244   int M = size1(), N = size2(), K = transposeA ? a.size1() : a.size2();
245   int LDA = a.size1(), LDB = b.size1(), LDC = size1();
246   F77NAME(zgemm)
247   (transposeA ? "T" : "N", transposeB ? "T" : "N", &M, &N, &K, &alpha, a._data,
248    &LDA, b._data, &LDB, &beta, _data, &LDC);
249 }
250 
251 template <>
axpy(const fullMatrix<double> & x,double alpha)252 void fullMatrix<double>::axpy(const fullMatrix<double> &x, double alpha)
253 {
254   int M = _r * _c, INCX = 1, INCY = 1;
255   F77NAME(daxpy)(&M, &alpha, x._data, &INCX, _data, &INCY);
256 }
257 
258 #endif
259 
260 #if defined(HAVE_LAPACK) && !defined(HAVE_EIGEN)
261 
262 extern "C" {
263 void F77NAME(dgesv)(int *N, int *nrhs, double *A, int *lda, int *ipiv,
264                     double *b, int *ldb, int *info);
265 void F77NAME(dgetrf)(int *M, int *N, double *A, int *lda, int *ipiv, int *info);
266 void F77NAME(dgetrs)(char *trans, int *N, int *nrhs, double *A, int *lda,
267                      int *ipiv, double *b, int *ldb, int *info);
268 void F77NAME(dgetri)(int *M, double *A, int *lda, int *ipiv, double *work,
269                      int *lwork, int *info);
270 void F77NAME(dgesvd)(const char *jobu, const char *jobvt, int *M, int *N,
271                      double *A, int *lda, double *S, double *U, int *ldu,
272                      double *VT, int *ldvt, double *work, int *lwork,
273                      int *info);
274 void F77NAME(dgeev)(const char *jobvl, const char *jobvr, int *n, double *a,
275                     int *lda, double *wr, double *wi, double *vl, int *ldvl,
276                     double *vr, int *ldvr, double *work, int *lwork, int *info);
277 }
278 
279 template <>
luSolve(const fullVector<double> & rhs,fullVector<double> & result)280 bool fullMatrix<double>::luSolve(const fullVector<double> &rhs,
281                                  fullVector<double> &result)
282 {
283   int N = size1(), nrhs = 1, lda = N, ldb = N, info;
284   int *ipiv = new int[N];
285   for(int i = 0; i < N; i++) result(i) = rhs(i);
286   F77NAME(dgesv)(&N, &nrhs, _data, &lda, ipiv, result._data, &ldb, &info);
287   delete[] ipiv;
288   if(info == 0) return true;
289   return false;
290 }
291 
luFactor(fullVector<int> & ipiv)292 template <> bool fullMatrix<double>::luFactor(fullVector<int> &ipiv)
293 {
294   int M = size1(), N = size2(), lda = size1(), info;
295   ipiv.resize(std::min(M, N));
296   F77NAME(dgetrf)(&M, &N, _data, &lda, &ipiv(0), &info);
297   if(info == 0) return true;
298   return false;
299 }
300 
301 template <>
luSubstitute(const fullVector<double> & rhs,fullVector<int> & ipiv,fullVector<double> & result)302 bool fullMatrix<double>::luSubstitute(const fullVector<double> &rhs,
303                                       fullVector<int> &ipiv,
304                                       fullVector<double> &result)
305 {
306   int N = size1(), nrhs = 1, lda = N, ldb = N, info;
307   char trans = 'N';
308   for(int i = 0; i < N; i++) result(i) = rhs(i);
309   F77NAME(dgetrs)
310   (&trans, &N, &nrhs, _data, &lda, &ipiv(0), result._data, &ldb, &info);
311   if(info == 0) return true;
312   return false;
313 }
314 
invert(fullMatrix<double> & result) const315 template <> bool fullMatrix<double>::invert(fullMatrix<double> &result) const
316 {
317   int M = size1(), N = size2(), lda = size1(), info;
318   int *ipiv = new int[std::min(M, N)];
319   if(result.size2() != M || result.size1() != N) {
320     if(result._ownData || !result._data)
321       result.resize(M, N, false);
322     else {
323       Msg::Error("FullMatrix: Bad dimension, I cannot write in proxy");
324       return false;
325     }
326   }
327   result.setAll(*this);
328   F77NAME(dgetrf)(&M, &N, result._data, &lda, ipiv, &info);
329   if(info == 0) {
330     int lwork = M * 4;
331     double *work = new double[lwork];
332     F77NAME(dgetri)(&M, result._data, &lda, ipiv, work, &lwork, &info);
333     delete[] work;
334   }
335   delete[] ipiv;
336   if(info == 0)
337     return true;
338   else if(info > 0)
339     Msg::Warning("U(%d,%d)=0 in matrix inversion", info, info);
340   else
341     Msg::Error("Wrong %d-th argument in matrix inversion", -info);
342   return false;
343 }
344 
invertInPlace()345 template <> bool fullMatrix<double>::invertInPlace()
346 {
347   int N = size1(), nrhs = N, lda = N, ldb = N, info;
348   int *ipiv = new int[N];
349   double *invA = new double[N * N];
350 
351   for(int i = 0; i < N * N; i++) invA[i] = 0.;
352   for(int i = 0; i < N; i++) invA[i * N + i] = 1.;
353 
354   F77NAME(dgesv)(&N, &nrhs, _data, &lda, ipiv, invA, &ldb, &info);
355   memcpy(_data, invA, N * N * sizeof(double));
356 
357   delete[] invA;
358   delete[] ipiv;
359 
360   if(info == 0) return true;
361   if(info > 0)
362     Msg::Warning("U(%d,%d)=0 in matrix in place inversion", info, info);
363   else
364     Msg::Error("Wrong %d-th argument in matrix inversion", -info);
365   return false;
366 }
367 
determinant() const368 template <> double fullMatrix<double>::determinant() const
369 {
370   fullMatrix<double> tmp(*this);
371   int M = size1(), N = size2(), lda = size1(), info;
372   int *ipiv = new int[std::min(M, N)];
373   F77NAME(dgetrf)(&M, &N, tmp._data, &lda, ipiv, &info);
374   double det = 1.;
375   if(info == 0) {
376     for(int i = 0; i < size1(); i++) {
377       det *= tmp(i, i);
378       if(ipiv[i] != i + 1) det = -det;
379     }
380   }
381   else if(info > 0)
382     det = 0.;
383   else
384     Msg::Error("Wrong %d-th argument in matrix factorization", -info);
385   delete[] ipiv;
386   return det;
387 }
388 
389 template <>
eig(fullVector<double> & DR,fullVector<double> & DI,fullMatrix<double> & VL,fullMatrix<double> & VR,bool sortRealPart)390 bool fullMatrix<double>::eig(fullVector<double> &DR, fullVector<double> &DI,
391                              fullMatrix<double> &VL, fullMatrix<double> &VR,
392                              bool sortRealPart)
393 {
394   int N = size1(), info;
395   int lwork = 10 * N;
396   double *work = new double[lwork];
397   F77NAME(dgeev)
398   ("V", "V", &N, _data, &N, DR._data, DI._data, VL._data, &N, VR._data, &N,
399    work, &lwork, &info);
400   delete[] work;
401 
402   if(info > 0)
403     Msg::Error("QR Algorithm failed to compute all the eigenvalues", info,
404                info);
405   else if(info < 0)
406     Msg::Error("Wrong %d-th argument in eig", -info);
407   else if(sortRealPart)
408     eigSort(N, DR._data, DI._data, VL._data, VR._data);
409 
410   return true;
411 }
412 
413 template <>
svd(fullMatrix<double> & V,fullVector<double> & S)414 bool fullMatrix<double>::svd(fullMatrix<double> &V, fullVector<double> &S)
415 {
416   fullMatrix<double> VT(V.size2(), V.size1());
417   int M = size1(), N = size2(), LDA = size1(), LDVT = VT.size1(), info;
418   int lwork = std::max(3 * std::min(M, N) + std::max(M, N), 5 * std::min(M, N));
419   fullVector<double> WORK(lwork);
420   F77NAME(dgesvd)
421   ("O", "A", &M, &N, _data, &LDA, S._data, _data, &LDA, VT._data, &LDVT,
422    WORK._data, &lwork, &info);
423   V = VT.transpose();
424   if(info == 0) return true;
425   if(info > 0)
426     Msg::Error("SVD did not converge");
427   else
428     Msg::Error("Wrong %d-th argument in SVD decomposition", -info);
429   return false;
430 }
431 
432 #endif
433 
434 // Specialisation of norm() and print()
435 
norm() const436 template <> double fullVector<double>::norm() const
437 {
438   double n = 0.;
439   for(int i = 0; i < _r; ++i) n += _data[i] * _data[i];
440   return sqrt(n);
441 }
442 
norm() const443 template <> std::complex<double> fullVector<std::complex<double> >::norm() const
444 {
445   double n = 0.;
446   for(int i = 0; i < _r; ++i)
447     n += _data[i].real() * _data[i].real() + _data[i].imag() * _data[i].imag();
448   return std::complex<double>(sqrt(n), 0.);
449 }
450 
451 template <>
print(const std::string name,const std::string format) const452 void fullVector<double>::print(const std::string name,
453                                const std::string format) const
454 {
455   std::string rformat = (format == "") ? "%12.5E " : format;
456   printf("double %s[%d]=\n", name.c_str(), size());
457   printf("{  ");
458   for(int I = 0; I < size(); I++) { printf(rformat.c_str(), (*this)(I)); }
459   printf("};\n");
460 }
461 
462 template <>
print(const std::string name,const std::string format) const463 void fullVector<int>::print(const std::string name,
464                             const std::string format) const
465 {
466   std::string rformat = (format == "") ? "%12d " : format;
467   printf("double %s[%d]=\n", name.c_str(), size());
468   printf("{  ");
469   for(int I = 0; I < size(); I++) { printf(rformat.c_str(), (*this)(I)); }
470   printf("};\n");
471 }
472 
473 template <>
print(const std::string & name,const std::string & format) const474 void fullMatrix<double>::print(const std::string &name,
475                                const std::string &format) const
476 {
477   std::string rformat = (format == "") ? "%12.5E " : format;
478   int ni = size1();
479   int nj = size2();
480   printf("double %s [ %d ][ %d ]= { \n", name.c_str(), ni, nj);
481   for(int I = 0; I < ni; I++) {
482     printf("{  ");
483     for(int J = 0; J < nj; J++) {
484       printf(rformat.c_str(), (*this)(I, J));
485       if(J != nj - 1) printf(",");
486     }
487     if(I != ni - 1)
488       printf("},\n");
489     else
490       printf("}\n");
491   }
492   printf("};\n");
493 }
494 
495 template <>
print(const std::string & name,const std::string & format) const496 void fullMatrix<int>::print(const std::string &name,
497                             const std::string &format) const
498 {
499   std::string rformat = (format == "") ? "%12d " : format;
500   int ni = size1();
501   int nj = size2();
502   printf("int %s [ %d ][ %d ]= { \n", name.c_str(), ni, nj);
503   for(int I = 0; I < ni; I++) {
504     printf("{  ");
505     for(int J = 0; J < nj; J++) {
506       printf(rformat.c_str(), (*this)(I, J));
507       if(J != nj - 1) printf(",");
508     }
509     if(I != ni - 1)
510       printf("},\n");
511     else
512       printf("}\n");
513   }
514   printf("};\n");
515 }
516