1 // -*- c++ -*- (enables emacs c++ mode) 2 //=========================================================================== 3 // 4 // Copyright (C) 2003-2008 Yves Renard 5 // 6 // This file is a part of GETFEM++ 7 // 8 // Getfem++ is free software; you can redistribute it and/or modify it 9 // under the terms of the GNU Lesser General Public License as published 10 // by the Free Software Foundation; either version 2.1 of the License, or 11 // (at your option) any later version. 12 // This program is distributed in the hope that it will be useful, but 13 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15 // License for more details. 16 // You should have received a copy of the GNU Lesser General Public License 17 // along with this program; if not, write to the Free Software Foundation, 18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 // 20 // As a special exception, you may use this file as part of a free software 21 // library without restriction. Specifically, if other files instantiate 22 // templates or use macros or inline functions from this file, or you compile 23 // this file and link it with other files to produce an executable, this 24 // file does not by itself cause the resulting executable to be covered by 25 // the GNU General Public License. This exception does not however 26 // invalidate any other reasons why the executable file might be covered by 27 // the GNU General Public License. 28 // 29 //=========================================================================== 30 31 /**@file gmm_lapack_interface.h 32 @author Yves Renard <Yves.Renard@insa-lyon.fr> 33 @date October 7, 2003. 34 @brief gmm interface for LAPACK 35 */ 36 37 #if defined(GMM_USES_LAPACK) || defined(GMM_USES_ATLAS) 38 39 #ifndef GMM_LAPACK_INTERFACE_H 40 #define GMM_LAPACK_INTERFACE_H 41 42 #include "gmm_blas_interface.h" 43 #include "gmm_dense_lu.h" 44 #include "gmm_dense_qr.h" 45 46 47 namespace gmm { 48 49 /* ********************************************************************* */ 50 /* Operations interfaced for T = float, double, std::complex<float> */ 51 /* or std::complex<double> : */ 52 /* */ 53 /* lu_factor(dense_matrix<T>, std::vector<int>) */ 54 /* lu_solve(dense_matrix<T>, std::vector<T>, std::vector<T>) */ 55 /* lu_solve(dense_matrix<T>, std::vector<int>, std::vector<T>, */ 56 /* std::vector<T>) */ 57 /* lu_solve_transposed(dense_matrix<T>, std::vector<int>, std::vector<T>,*/ 58 /* std::vector<T>) */ 59 /* lu_inverse(dense_matrix<T>) */ 60 /* lu_inverse(dense_matrix<T>, std::vector<int>, dense_matrix<T>) */ 61 /* */ 62 /* qr_factor(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>) */ 63 /* */ 64 /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>) */ 65 /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>, */ 66 /* dense_matrix<T>) */ 67 /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >) */ 68 /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >, */ 69 /* dense_matrix<T>) */ 70 /* */ 71 /* ********************************************************************* */ 72 73 /* ********************************************************************* */ 74 /* LAPACK functions used. */ 75 /* ********************************************************************* */ 76 77 extern "C" { 78 void sgetrf_(...); void dgetrf_(...); void cgetrf_(...); void zgetrf_(...); 79 void sgetrs_(...); void dgetrs_(...); void cgetrs_(...); void zgetrs_(...); 80 void sgetri_(...); void dgetri_(...); void cgetri_(...); void zgetri_(...); 81 void sgeqrf_(...); void dgeqrf_(...); void cgeqrf_(...); void zgeqrf_(...); 82 void sorgqr_(...); void dorgqr_(...); void cungqr_(...); void zungqr_(...); 83 void sormqr_(...); void dormqr_(...); void cunmqr_(...); void zunmqr_(...); 84 void sgees_ (...); void dgees_ (...); void cgees_ (...); void zgees_ (...); 85 void sgeev_ (...); void dgeev_ (...); void cgeev_ (...); void zgeev_ (...); 86 } 87 88 /* ********************************************************************* */ 89 /* LU decomposition. */ 90 /* ********************************************************************* */ 91 92 # define getrf_interface(lapack_name, base_type) inline \ 93 size_type lu_factor(dense_matrix<base_type > &A, std::vector<int> &ipvt){\ 94 GMMLAPACK_TRACE("getrf_interface"); \ 95 int m(mat_nrows(A)), n(mat_ncols(A)), lda(m), info(0); \ 96 if (m && n) lapack_name(&m, &n, &A(0,0), &lda, &ipvt[0], &info); \ 97 return size_type(info); \ 98 } 99 100 getrf_interface(sgetrf_, BLAS_S) 101 getrf_interface(dgetrf_, BLAS_D) 102 getrf_interface(cgetrf_, BLAS_C) 103 getrf_interface(zgetrf_, BLAS_Z) 104 105 /* ********************************************************************* */ 106 /* LU solve. */ 107 /* ********************************************************************* */ 108 109 # define getrs_interface(f_name, trans1, lapack_name, base_type) inline \ 110 void f_name(const dense_matrix<base_type > &A, \ 111 const std::vector<int> &ipvt, std::vector<base_type > &x, \ 112 const std::vector<base_type > &b) { \ 113 GMMLAPACK_TRACE("getrs_interface"); \ 114 int n(mat_nrows(A)), info, nrhs(1); \ 115 gmm::copy(b, x); trans1; \ 116 if (n) \ 117 lapack_name(&t, &n, &nrhs, &(A(0,0)),&n,&ipvt[0], &x[0], &n, &info); \ 118 } 119 120 # define getrs_trans_n const char t = 'N' 121 # define getrs_trans_t const char t = 'T' 122 123 getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S) 124 getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D) 125 getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C) 126 getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z) 127 getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S) 128 getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D) 129 getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C) 130 getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z) 131 132 /* ********************************************************************* */ 133 /* LU inverse. */ 134 /* ********************************************************************* */ 135 136 # define getri_interface(lapack_name, base_type) inline \ 137 void lu_inverse(const dense_matrix<base_type > &LU, \ 138 std::vector<int> &ipvt, const dense_matrix<base_type > &A_) { \ 139 GMMLAPACK_TRACE("getri_interface"); \ 140 dense_matrix<base_type >& \ 141 A = const_cast<dense_matrix<base_type > &>(A_); \ 142 int n(mat_nrows(A)), info, lwork(-1); base_type work1; \ 143 if (n) { \ 144 gmm::copy(LU, A); \ 145 lapack_name(&n, &A(0,0), &n, &ipvt[0], &work1, &lwork, &info); \ 146 lwork = int(gmm::real(work1)); \ 147 std::vector<base_type > work(lwork); \ 148 lapack_name(&n, &A(0,0), &n, &ipvt[0], &work[0], &lwork, &info); \ 149 } \ 150 } 151 152 getri_interface(sgetri_, BLAS_S) 153 getri_interface(dgetri_, BLAS_D) 154 getri_interface(cgetri_, BLAS_C) 155 getri_interface(zgetri_, BLAS_Z) 156 157 158 /* ********************************************************************* */ 159 /* QR factorization. */ 160 /* ********************************************************************* */ 161 162 # define geqrf_interface(lapack_name1, base_type) inline \ 163 void qr_factor(dense_matrix<base_type > &A){ \ 164 GMMLAPACK_TRACE("geqrf_interface"); \ 165 int m(mat_nrows(A)), n(mat_ncols(A)), info, lwork(-1); base_type work1;\ 166 if (m && n) { \ 167 std::vector<base_type > tau(n); \ 168 lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work1 , &lwork, &info); \ 169 lwork = int(gmm::real(work1)); \ 170 std::vector<base_type > work(lwork); \ 171 lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work[0], &lwork, &info); \ 172 GMM_ASSERT1(!info, "QR factorization failed"); \ 173 } \ 174 } 175 176 geqrf_interface(sgeqrf_, BLAS_S) 177 geqrf_interface(dgeqrf_, BLAS_D) 178 // For complex values, housholder vectors are not the same as in 179 // gmm::lu_factor. Impossible to interface for the moment. 180 // geqrf_interface(cgeqrf_, BLAS_C) 181 // geqrf_interface(zgeqrf_, BLAS_Z) 182 183 # define geqrf_interface2(lapack_name1, lapack_name2, base_type) inline \ 184 void qr_factor(const dense_matrix<base_type > &A, \ 185 dense_matrix<base_type > &Q, dense_matrix<base_type > &R) { \ 186 GMMLAPACK_TRACE("geqrf_interface2"); \ 187 int m(mat_nrows(A)), n(mat_ncols(A)), info, lwork(-1); base_type work1;\ 188 if (m && n) { \ 189 gmm::copy(A, Q); \ 190 std::vector<base_type > tau(n); \ 191 lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work1 , &lwork, &info); \ 192 lwork = int(gmm::real(work1)); \ 193 std::vector<base_type > work(lwork); \ 194 lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work[0], &lwork, &info); \ 195 GMM_ASSERT1(!info, "QR factorization failed"); \ 196 base_type *p = &R(0,0), *q = &Q(0,0); \ 197 for (int j = 0; j < n; ++j, q += m-n) \ 198 for (int i = 0; i < n; ++i, ++p, ++q) \ 199 *p = (j < i) ? base_type(0) : *q; \ 200 lapack_name2(&m, &n, &n, &Q(0,0), &m,&tau[0],&work[0],&lwork,&info); \ 201 } \ 202 else gmm::clear(Q); \ 203 } 204 205 geqrf_interface2(sgeqrf_, sorgqr_, BLAS_S) 206 geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D) 207 geqrf_interface2(cgeqrf_, cungqr_, BLAS_C) 208 geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z) 209 210 /* ********************************************************************* */ 211 /* QR algorithm for eigenvalues search. */ 212 /* ********************************************************************* */ 213 214 # define gees_interface(lapack_name, base_type) \ 215 template <typename VECT> inline void implicit_qr_algorithm( \ 216 const dense_matrix<base_type > &A, const VECT &eigval_, \ 217 dense_matrix<base_type > &Q, \ 218 double tol=gmm::default_tol(base_type()), bool compvect = true) { \ 219 GMMLAPACK_TRACE("gees_interface"); \ 220 typedef bool (*L_fp)(...); L_fp p = 0; \ 221 int n(mat_nrows(A)), info, lwork(-1), sdim; base_type work1; \ 222 if (!n) return; \ 223 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \ 224 char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \ 225 std::vector<double> rwork(n), eigv1(n), eigv2(n); \ 226 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \ 227 &eigv2[0], &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \ 228 lwork = int(gmm::real(work1)); \ 229 std::vector<base_type > work(lwork); \ 230 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \ 231 &eigv2[0], &Q(0,0), &n, &work[0], &lwork, &rwork[0],&info);\ 232 GMM_ASSERT1(!info, "QR algorithm failed"); \ 233 extract_eig(H, const_cast<VECT &>(eigval_), tol); \ 234 } 235 236 # define gees_interface2(lapack_name, base_type) \ 237 template <typename VECT> inline void implicit_qr_algorithm( \ 238 const dense_matrix<base_type > &A, const VECT &eigval_, \ 239 dense_matrix<base_type > &Q, \ 240 double tol=gmm::default_tol(base_type()), bool compvect = true) { \ 241 GMMLAPACK_TRACE("gees_interface2"); \ 242 typedef bool (*L_fp)(...); L_fp p = 0; \ 243 int n(mat_nrows(A)), info, lwork(-1), sdim; base_type work1; \ 244 if (!n) return; \ 245 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \ 246 char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \ 247 std::vector<double> rwork(n), eigvv(n*2); \ 248 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \ 249 &Q(0,0), &n, &work1, &lwork, &rwork[0], &rwork[0], &info); \ 250 lwork = int(gmm::real(work1)); \ 251 std::vector<base_type > work(lwork); \ 252 lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \ 253 &Q(0,0), &n, &work[0], &lwork, &rwork[0], &rwork[0],&info);\ 254 GMM_ASSERT1(!info, "QR algorithm failed"); \ 255 extract_eig(H, const_cast<VECT &>(eigval_), tol); \ 256 } 257 258 gees_interface(sgees_, BLAS_S) 259 gees_interface(dgees_, BLAS_D) 260 gees_interface2(cgees_, BLAS_C) 261 gees_interface2(zgees_, BLAS_Z) 262 263 # define geev_int_right(lapack_name, base_type) \ 264 template <typename VECT> inline void geev_interface_right( \ 265 const dense_matrix<base_type > &A, const VECT &eigval_, \ 266 dense_matrix<base_type > &Q) { \ 267 GMMLAPACK_TRACE("geev_interface"); \ 268 int n(mat_nrows(A)), info, lwork(-1); base_type work1; \ 269 if (!n) return; \ 270 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \ 271 char jobvl = 'N', jobvr = 'V'; \ 272 std::vector<base_type > eigvr(n), eigvi(n); \ 273 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \ 274 &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info); \ 275 lwork = int(gmm::real(work1)); \ 276 std::vector<base_type > work(lwork); \ 277 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \ 278 &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info); \ 279 GMM_ASSERT1(!info, "QR algorithm failed"); \ 280 gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_))); \ 281 gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_))); \ 282 } 283 284 # define geev_int_rightc(lapack_name, base_type) \ 285 template <typename VECT> inline void geev_interface_right( \ 286 const dense_matrix<base_type > &A, const VECT &eigval_, \ 287 dense_matrix<base_type > &Q) { \ 288 GMMLAPACK_TRACE("geev_interface"); \ 289 int n(mat_nrows(A)), info, lwork(-1); base_type work1; \ 290 if (!n) return; \ 291 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \ 292 char jobvl = 'N', jobvr = 'V'; \ 293 std::vector<double> rwork(2*n); \ 294 std::vector<base_type> eigv(n); \ 295 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \ 296 &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \ 297 lwork = int(gmm::real(work1)); \ 298 std::vector<base_type > work(lwork); \ 299 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \ 300 &Q(0,0), &n, &work[0], &lwork, &rwork[0], &info); \ 301 GMM_ASSERT1(!info, "QR algorithm failed"); \ 302 gmm::copy(eigv, const_cast<VECT &>(eigval_)); \ 303 } 304 305 geev_int_right(sgeev_, BLAS_S) 306 geev_int_right(dgeev_, BLAS_D) 307 geev_int_rightc(cgeev_, BLAS_C) 308 geev_int_rightc(zgeev_, BLAS_Z) 309 310 # define geev_int_left(lapack_name, base_type) \ 311 template <typename VECT> inline void geev_interface_left( \ 312 const dense_matrix<base_type > &A, const VECT &eigval_, \ 313 dense_matrix<base_type > &Q) { \ 314 GMMLAPACK_TRACE("geev_interface"); \ 315 int n(mat_nrows(A)), info, lwork(-1); base_type work1; \ 316 if (!n) return; \ 317 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \ 318 char jobvl = 'V', jobvr = 'N'; \ 319 std::vector<base_type > eigvr(n), eigvi(n); \ 320 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \ 321 &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info); \ 322 lwork = int(gmm::real(work1)); \ 323 std::vector<base_type > work(lwork); \ 324 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \ 325 &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info); \ 326 GMM_ASSERT1(!info, "QR algorithm failed"); \ 327 gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_))); \ 328 gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_))); \ 329 } 330 331 # define geev_int_leftc(lapack_name, base_type) \ 332 template <typename VECT> inline void geev_interface_left( \ 333 const dense_matrix<base_type > &A, const VECT &eigval_, \ 334 dense_matrix<base_type > &Q) { \ 335 GMMLAPACK_TRACE("geev_interface"); \ 336 int n(mat_nrows(A)), info, lwork(-1); base_type work1; \ 337 if (!n) return; \ 338 dense_matrix<base_type > H(n,n); gmm::copy(A, H); \ 339 char jobvl = 'V', jobvr = 'N'; \ 340 std::vector<double> rwork(2*n); \ 341 std::vector<base_type> eigv(n); \ 342 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \ 343 &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \ 344 lwork = int(gmm::real(work1)); \ 345 std::vector<base_type > work(lwork); \ 346 lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \ 347 &Q(0,0), &n, &work[0], &lwork, &rwork[0], &info); \ 348 GMM_ASSERT1(!info, "QR algorithm failed"); \ 349 gmm::copy(eigv, const_cast<VECT &>(eigval_)); \ 350 } 351 352 geev_int_left(sgeev_, BLAS_S) 353 geev_int_left(dgeev_, BLAS_D) 354 geev_int_leftc(cgeev_, BLAS_C) 355 geev_int_leftc(zgeev_, BLAS_Z) 356 357 358 } 359 360 #endif // GMM_LAPACK_INTERFACE_H 361 362 #endif // GMM_USES_LAPACK || GMM_USES_ATLAS 363