1 // -*- c++ -*- (enables emacs c++ mode)
2 //===========================================================================
3 //
4 // Copyright (C) 2003-2008 Yves Renard
5 //
6 // This file is a part of GETFEM++
7 //
8 // Getfem++  is  free software;  you  can  redistribute  it  and/or modify it
9 // under  the  terms  of the  GNU  Lesser General Public License as published
10 // by  the  Free Software Foundation;  either version 2.1 of the License,  or
11 // (at your option) any later version.
12 // This program  is  distributed  in  the  hope  that it will be useful,  but
13 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 // or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 // License for more details.
16 // You  should  have received a copy of the GNU Lesser General Public License
17 // along  with  this program;  if not, write to the Free Software Foundation,
18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19 //
20 // As a special exception, you may use this file as part of a free software
21 // library without restriction.  Specifically, if other files instantiate
22 // templates or use macros or inline functions from this file, or you compile
23 // this file and link it with other files to produce an executable, this
24 // file does not by itself cause the resulting executable to be covered by
25 // the GNU General Public License.  This exception does not however
26 // invalidate any other reasons why the executable file might be covered by
27 // the GNU General Public License.
28 //
29 //===========================================================================
30 
31 /**@file gmm_lapack_interface.h
32    @author  Yves Renard <Yves.Renard@insa-lyon.fr>
33    @date October 7, 2003.
34    @brief gmm interface for LAPACK
35 */
36 
37 #if defined(GMM_USES_LAPACK) || defined(GMM_USES_ATLAS)
38 
39 #ifndef GMM_LAPACK_INTERFACE_H
40 #define GMM_LAPACK_INTERFACE_H
41 
42 #include "gmm_blas_interface.h"
43 #include "gmm_dense_lu.h"
44 #include "gmm_dense_qr.h"
45 
46 
47 namespace gmm {
48 
49   /* ********************************************************************* */
50   /* Operations interfaced for T = float, double, std::complex<float>      */
51   /*    or std::complex<double> :                                          */
52   /*                                                                       */
53   /* lu_factor(dense_matrix<T>, std::vector<int>)                          */
54   /* lu_solve(dense_matrix<T>, std::vector<T>, std::vector<T>)             */
55   /* lu_solve(dense_matrix<T>, std::vector<int>, std::vector<T>,           */
56   /*          std::vector<T>)                                              */
57   /* lu_solve_transposed(dense_matrix<T>, std::vector<int>, std::vector<T>,*/
58   /*          std::vector<T>)                                              */
59   /* lu_inverse(dense_matrix<T>)                                           */
60   /* lu_inverse(dense_matrix<T>, std::vector<int>, dense_matrix<T>)        */
61   /*                                                                       */
62   /* qr_factor(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>)          */
63   /*                                                                       */
64   /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>)                */
65   /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>,                */
66   /*                       dense_matrix<T>)                                */
67   /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >) */
68   /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >, */
69   /*                       dense_matrix<T>)                                */
70   /*                                                                       */
71   /* ********************************************************************* */
72 
73   /* ********************************************************************* */
74   /* LAPACK functions used.                                                */
75   /* ********************************************************************* */
76 
77   extern "C" {
78     void sgetrf_(...); void dgetrf_(...); void cgetrf_(...); void zgetrf_(...);
79     void sgetrs_(...); void dgetrs_(...); void cgetrs_(...); void zgetrs_(...);
80     void sgetri_(...); void dgetri_(...); void cgetri_(...); void zgetri_(...);
81     void sgeqrf_(...); void dgeqrf_(...); void cgeqrf_(...); void zgeqrf_(...);
82     void sorgqr_(...); void dorgqr_(...); void cungqr_(...); void zungqr_(...);
83     void sormqr_(...); void dormqr_(...); void cunmqr_(...); void zunmqr_(...);
84     void sgees_ (...); void dgees_ (...); void cgees_ (...); void zgees_ (...);
85     void sgeev_ (...); void dgeev_ (...); void cgeev_ (...); void zgeev_ (...);
86   }
87 
88   /* ********************************************************************* */
89   /* LU decomposition.                                                     */
90   /* ********************************************************************* */
91 
92 # define getrf_interface(lapack_name, base_type) inline                    \
93   size_type lu_factor(dense_matrix<base_type > &A, std::vector<int> &ipvt){\
94     GMMLAPACK_TRACE("getrf_interface");                                    \
95     int m(mat_nrows(A)), n(mat_ncols(A)), lda(m), info(0);                 \
96     if (m && n) lapack_name(&m, &n, &A(0,0), &lda, &ipvt[0], &info);       \
97     return size_type(info);                                                \
98   }
99 
100   getrf_interface(sgetrf_, BLAS_S)
101   getrf_interface(dgetrf_, BLAS_D)
102   getrf_interface(cgetrf_, BLAS_C)
103   getrf_interface(zgetrf_, BLAS_Z)
104 
105   /* ********************************************************************* */
106   /* LU solve.                                                             */
107   /* ********************************************************************* */
108 
109 # define getrs_interface(f_name, trans1, lapack_name, base_type) inline    \
110   void f_name(const dense_matrix<base_type > &A,                           \
111 	      const std::vector<int> &ipvt, std::vector<base_type > &x,    \
112 	      const std::vector<base_type > &b) {                          \
113     GMMLAPACK_TRACE("getrs_interface");                                    \
114     int n(mat_nrows(A)), info, nrhs(1);                                    \
115     gmm::copy(b, x); trans1;                                               \
116     if (n)                                                                 \
117       lapack_name(&t, &n, &nrhs, &(A(0,0)),&n,&ipvt[0], &x[0], &n, &info); \
118   }
119 
120 # define getrs_trans_n const char t = 'N'
121 # define getrs_trans_t const char t = 'T'
122 
123   getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S)
124   getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D)
125   getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C)
126   getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z)
127   getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S)
128   getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D)
129   getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C)
130   getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z)
131 
132   /* ********************************************************************* */
133   /* LU inverse.                                                           */
134   /* ********************************************************************* */
135 
136 # define getri_interface(lapack_name, base_type) inline                    \
137   void lu_inverse(const dense_matrix<base_type > &LU,                      \
138        std::vector<int> &ipvt, const dense_matrix<base_type > &A_) {       \
139     GMMLAPACK_TRACE("getri_interface");                                    \
140     dense_matrix<base_type >&                                              \
141     A = const_cast<dense_matrix<base_type > &>(A_);                        \
142     int n(mat_nrows(A)), info, lwork(-1); base_type work1;                 \
143     if (n) {                                                               \
144       gmm::copy(LU, A);                                                    \
145       lapack_name(&n, &A(0,0), &n, &ipvt[0], &work1, &lwork, &info);       \
146       lwork = int(gmm::real(work1));                                       \
147       std::vector<base_type > work(lwork);                                 \
148       lapack_name(&n, &A(0,0), &n, &ipvt[0], &work[0], &lwork, &info);     \
149     }                                                                      \
150   }
151 
152   getri_interface(sgetri_, BLAS_S)
153   getri_interface(dgetri_, BLAS_D)
154   getri_interface(cgetri_, BLAS_C)
155   getri_interface(zgetri_, BLAS_Z)
156 
157 
158   /* ********************************************************************* */
159   /* QR factorization.                                                     */
160   /* ********************************************************************* */
161 
162 # define geqrf_interface(lapack_name1, base_type) inline		   \
163   void qr_factor(dense_matrix<base_type > &A){			           \
164     GMMLAPACK_TRACE("geqrf_interface");				           \
165     int m(mat_nrows(A)), n(mat_ncols(A)), info, lwork(-1); base_type work1;\
166     if (m && n) {							   \
167       std::vector<base_type > tau(n);					   \
168       lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work1  , &lwork, &info); \
169       lwork = int(gmm::real(work1));					   \
170       std::vector<base_type > work(lwork);				   \
171       lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work[0], &lwork, &info); \
172       GMM_ASSERT1(!info, "QR factorization failed");			   \
173     }									   \
174   }
175 
176   geqrf_interface(sgeqrf_, BLAS_S)
177   geqrf_interface(dgeqrf_, BLAS_D)
178     // For complex values, housholder vectors are not the same as in
179     // gmm::lu_factor. Impossible to interface for the moment.
180     //  geqrf_interface(cgeqrf_, BLAS_C)
181     //  geqrf_interface(zgeqrf_, BLAS_Z)
182 
183 # define geqrf_interface2(lapack_name1, lapack_name2, base_type) inline    \
184   void qr_factor(const dense_matrix<base_type > &A,                        \
185        dense_matrix<base_type > &Q, dense_matrix<base_type > &R) {         \
186     GMMLAPACK_TRACE("geqrf_interface2");                                   \
187     int m(mat_nrows(A)), n(mat_ncols(A)), info, lwork(-1); base_type work1;\
188     if (m && n) {                                                          \
189       gmm::copy(A, Q);                                                     \
190       std::vector<base_type > tau(n);                                      \
191       lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work1  , &lwork, &info); \
192       lwork = int(gmm::real(work1));                                       \
193       std::vector<base_type > work(lwork);                                 \
194       lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work[0], &lwork, &info); \
195       GMM_ASSERT1(!info, "QR factorization failed");                       \
196       base_type *p = &R(0,0), *q = &Q(0,0);                                \
197       for (int j = 0; j < n; ++j, q += m-n)                                \
198         for (int i = 0; i < n; ++i, ++p, ++q)                              \
199           *p = (j < i) ? base_type(0) : *q;                                \
200       lapack_name2(&m, &n, &n, &Q(0,0), &m,&tau[0],&work[0],&lwork,&info); \
201     }                                                                      \
202     else gmm::clear(Q);                                                    \
203   }
204 
205   geqrf_interface2(sgeqrf_, sorgqr_, BLAS_S)
206   geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D)
207   geqrf_interface2(cgeqrf_, cungqr_, BLAS_C)
208   geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z)
209 
210   /* ********************************************************************* */
211   /* QR algorithm for eigenvalues search.                                  */
212   /* ********************************************************************* */
213 
214 # define gees_interface(lapack_name, base_type)                            \
215   template <typename VECT> inline void implicit_qr_algorithm(              \
216          const dense_matrix<base_type > &A,  const VECT &eigval_,          \
217          dense_matrix<base_type > &Q,                                      \
218          double tol=gmm::default_tol(base_type()), bool compvect = true) { \
219     GMMLAPACK_TRACE("gees_interface");                                     \
220     typedef bool (*L_fp)(...);  L_fp p = 0;                                \
221     int n(mat_nrows(A)), info, lwork(-1), sdim; base_type work1;           \
222     if (!n) return;                                                        \
223     dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
224     char jobvs = (compvect ? 'V' : 'N'), sort = 'N';                       \
225     std::vector<double> rwork(n), eigv1(n), eigv2(n);                      \
226     lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0],       \
227                 &eigv2[0], &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
228     lwork = int(gmm::real(work1));                                         \
229     std::vector<base_type > work(lwork);                                   \
230     lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0],       \
231 		&eigv2[0], &Q(0,0), &n, &work[0], &lwork, &rwork[0],&info);\
232     GMM_ASSERT1(!info, "QR algorithm failed");                             \
233     extract_eig(H, const_cast<VECT &>(eigval_), tol);                      \
234   }
235 
236 # define gees_interface2(lapack_name, base_type)                           \
237   template <typename VECT> inline void implicit_qr_algorithm(              \
238          const dense_matrix<base_type > &A,  const VECT &eigval_,          \
239          dense_matrix<base_type > &Q,                                      \
240          double tol=gmm::default_tol(base_type()), bool compvect = true) { \
241     GMMLAPACK_TRACE("gees_interface2");                                    \
242     typedef bool (*L_fp)(...);  L_fp p = 0;                                \
243     int n(mat_nrows(A)), info, lwork(-1), sdim; base_type work1;           \
244     if (!n) return;                                                        \
245     dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
246     char jobvs = (compvect ? 'V' : 'N'), sort = 'N';                       \
247     std::vector<double> rwork(n), eigvv(n*2);                              \
248     lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0],       \
249                 &Q(0,0), &n, &work1, &lwork, &rwork[0], &rwork[0], &info); \
250     lwork = int(gmm::real(work1));                                         \
251     std::vector<base_type > work(lwork);                                   \
252     lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0],       \
253                 &Q(0,0), &n, &work[0], &lwork, &rwork[0], &rwork[0],&info);\
254     GMM_ASSERT1(!info, "QR algorithm failed");                             \
255     extract_eig(H, const_cast<VECT &>(eigval_), tol);                      \
256   }
257 
258   gees_interface(sgees_, BLAS_S)
259   gees_interface(dgees_, BLAS_D)
260   gees_interface2(cgees_, BLAS_C)
261   gees_interface2(zgees_, BLAS_Z)
262 
263 # define geev_int_right(lapack_name, base_type)			           \
264   template <typename VECT> inline void geev_interface_right(               \
265          const dense_matrix<base_type > &A,  const VECT &eigval_,          \
266          dense_matrix<base_type > &Q) {		                           \
267     GMMLAPACK_TRACE("geev_interface");                                     \
268     int n(mat_nrows(A)), info, lwork(-1); base_type work1;                 \
269     if (!n) return;                                                        \
270     dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
271     char jobvl = 'N', jobvr = 'V';                                         \
272     std::vector<base_type > eigvr(n), eigvi(n);                            \
273     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
274 		&Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info);   	   \
275     lwork = int(gmm::real(work1));                                         \
276     std::vector<base_type > work(lwork);                                   \
277     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
278 		&Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info);    	   \
279     GMM_ASSERT1(!info, "QR algorithm failed");                             \
280     gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_)));	   \
281     gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_)));	   \
282   }
283 
284 # define geev_int_rightc(lapack_name, base_type)			   \
285   template <typename VECT> inline void geev_interface_right(               \
286          const dense_matrix<base_type > &A,  const VECT &eigval_,          \
287          dense_matrix<base_type > &Q) {		                           \
288     GMMLAPACK_TRACE("geev_interface");                                     \
289     int n(mat_nrows(A)), info, lwork(-1); base_type work1;                 \
290     if (!n) return;                                                        \
291     dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
292     char jobvl = 'N', jobvr = 'V';                                         \
293     std::vector<double> rwork(2*n);                                        \
294     std::vector<base_type> eigv(n);                                        \
295     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
296 		&Q(0,0), &n, &work1, &lwork, &rwork[0], &info);	   	   \
297     lwork = int(gmm::real(work1));                                         \
298     std::vector<base_type > work(lwork);                                   \
299     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
300 		&Q(0,0), &n, &work[0], &lwork,  &rwork[0],  &info);	   \
301     GMM_ASSERT1(!info, "QR algorithm failed");                             \
302     gmm::copy(eigv, const_cast<VECT &>(eigval_));			   \
303   }
304 
305   geev_int_right(sgeev_, BLAS_S)
306   geev_int_right(dgeev_, BLAS_D)
307   geev_int_rightc(cgeev_, BLAS_C)
308   geev_int_rightc(zgeev_, BLAS_Z)
309 
310 # define geev_int_left(lapack_name, base_type)                             \
311   template <typename VECT> inline void geev_interface_left(                \
312          const dense_matrix<base_type > &A,  const VECT &eigval_,          \
313          dense_matrix<base_type > &Q) {	 	                           \
314     GMMLAPACK_TRACE("geev_interface");                                     \
315     int n(mat_nrows(A)), info, lwork(-1); base_type work1;                 \
316     if (!n) return;                                                        \
317     dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
318     char jobvl = 'V', jobvr = 'N';                                         \
319     std::vector<base_type > eigvr(n), eigvi(n);                            \
320     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
321 		&Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info);   	   \
322     lwork = int(gmm::real(work1));                                         \
323     std::vector<base_type > work(lwork);                                   \
324     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0],     \
325 		&Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info);    	   \
326     GMM_ASSERT1(!info, "QR algorithm failed");                             \
327     gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_)));	   \
328     gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_)));	   \
329   }
330 
331 # define geev_int_leftc(lapack_name, base_type)	  		           \
332   template <typename VECT> inline void geev_interface_left(                \
333          const dense_matrix<base_type > &A,  const VECT &eigval_,          \
334          dense_matrix<base_type > &Q) {		                           \
335     GMMLAPACK_TRACE("geev_interface");                                     \
336     int n(mat_nrows(A)), info, lwork(-1); base_type work1;                 \
337     if (!n) return;                                                        \
338     dense_matrix<base_type > H(n,n); gmm::copy(A, H);                      \
339     char jobvl = 'V', jobvr = 'N';                                         \
340     std::vector<double> rwork(2*n);                                        \
341     std::vector<base_type> eigv(n);                                        \
342     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
343 		&Q(0,0), &n, &work1, &lwork, &rwork[0], &info);	   	   \
344     lwork = int(gmm::real(work1));                                         \
345     std::vector<base_type > work(lwork);                                   \
346     lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n,    \
347 		&Q(0,0), &n, &work[0], &lwork,  &rwork[0],  &info);	   \
348     GMM_ASSERT1(!info, "QR algorithm failed");                             \
349     gmm::copy(eigv, const_cast<VECT &>(eigval_));			   \
350   }
351 
352   geev_int_left(sgeev_, BLAS_S)
353   geev_int_left(dgeev_, BLAS_D)
354   geev_int_leftc(cgeev_, BLAS_C)
355   geev_int_leftc(zgeev_, BLAS_Z)
356 
357 
358 }
359 
360 #endif // GMM_LAPACK_INTERFACE_H
361 
362 #endif // GMM_USES_LAPACK || GMM_USES_ATLAS
363