1 #ifndef __PLINK_MATRIX_H__
2 #define __PLINK_MATRIX_H__
3 
4 // This file is part of PLINK 1.90, copyright (C) 2005-2020 Shaun Purcell,
5 // Christopher Chang.
6 //
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20 
21 // Wrappers for frequent LAPACK calls (sometimes with no-LAPACK fallbacks).
22 // (Update, 11 Oct 2018: Backported PLINK 2.0's MKL support.)
23 
24 #ifdef NOLAPACK
25 
26 #  define MATRIX_INVERT_BUF1_TYPE double
27 #  define MATRIX_INVERT_BUF1_ELEM_ALLOC (2 * sizeof(double))
28 #  define MATRIX_INVERT_BUF1_CHECKED_ALLOC (2 * sizeof(double))
29 #  define __CLPK_integer int
30 
31 #else // !NOLAPACK
32 
33 #  ifdef __APPLE__
34 #    include <Accelerate/Accelerate.h>
35 #    define USE_CBLAS_XGEMM
36 #  endif
37 
38 #  ifndef __APPLE__
39 
40 #    ifdef __cplusplus
41 extern "C" {
42 #    endif
43 
44   typedef double __CLPK_doublereal;
45 #    if defined(__LP64__) || defined(_WIN32)
46   typedef int32_t __CLPK_integer;
47 #    else
48   typedef long int __CLPK_integer;
49 #    endif
50 
51 #    ifdef _WIN32
52   // openblas is easy enough to set up on Windows nowadays.
53   // not worth the trouble of ripping out vector extensions, etc. just so we
54   // can compile with Visual Studio and gain access to MKL.
55   // (todo: upgrade from 0.2.19 to a later version, build setup will probably
56   // need to change a bit)
57 #     define HAVE_LAPACK_CONFIG_H
58 #     define LAPACK_COMPLEX_STRUCTURE
59 #     include "lapacke.h"
60 
61   __CLPK_doublereal ddot_(__CLPK_integer* n, __CLPK_doublereal* dx,
62                           __CLPK_integer* incx, __CLPK_doublereal* dy,
63                           __CLPK_integer* incy);
64 
65   void dger_(int* m, int* n, double* alpha, double* x, int* incx, double* y,
66              int* incy, double* a, int* lda);
67 
68   void dgemm_(char* transa, char* transb, __CLPK_integer* m, __CLPK_integer* n,
69               __CLPK_integer* k, double* alpha, double* a, __CLPK_integer* lda,
70               double* b, __CLPK_integer* ldb, double* beta, double* c,
71               __CLPK_integer* ldc);
72 
73   void dsymv_(char* uplo, int* n, double* alpha, double* a, int* lda,
74               double* x, int* incx, double* beta, double* y, int* incy);
75 
76   void dgetrf_(__CLPK_integer* m, __CLPK_integer* n,
77                __CLPK_doublereal* a, __CLPK_integer* lda,
78                __CLPK_integer* ipiv, __CLPK_integer* info);
79 
80   void sgemm_(char* transa, char* transb, __CLPK_integer* m, __CLPK_integer* n,
81               __CLPK_integer* k, float* alpha, float* a, __CLPK_integer* lda,
82               float* b, __CLPK_integer* ldb, float* beta, float* c,
83               __CLPK_integer* ldc);
84 
85 #    else  // Linux
86 #      ifdef USE_MKL
87 #        ifndef __LP64__
88 #          error "32-bit Linux build does not support Intel MKL."
89 #        endif
90 #        define USE_CBLAS_XGEMM
91   // sizeof(MKL_INT) should be 4.
92 #        define MKL_LP64
93 #        ifdef DYNAMIC_MKL
94 #          include <mkl_service.h>
95 #          include <mkl_cblas.h>
96 #          include <mkl_lapack.h>
97 #        else
98 #          include "mkl_service.h"
99 #          include "mkl_cblas.h"
100 #          include "mkl_lapack.h"
101 #        endif
102 #      else
103 #        ifdef USE_CBLAS_XGEMM
104 #          include <cblas.h>
105 #        else
106   // ARGH
107   // cmake on Ubuntu 14 seems to require use of cblas_f77.h instead of cblas.h.
108   // Conversely, cblas_f77.h does not seem to be available on the Scientific
109   // Linux ATLAS/LAPACK install, and right now that's my only option for
110   // producing 32-bit static builds...
111   // So.  Default include is cblas.h.  To play well with cmake + Ubuntu 14 and
112   // 16 simultaneously, there is a CBLAS_F77_ON_OLD_GCC mode which picks
113   // cblas_f77.h on Ubuntu 14 and cblas.h on 16.
114 #          ifdef FORCE_CBLAS_F77
115 #            include <cblas_f77.h>
116 #          elif !defined(CBLAS_F77_ON_OLD_GCC)
117 #            include <cblas.h>
118 #          else
119 #            if (__GNUC__ <= 4)
120 #              include <cblas_f77.h>
121 #            else
122 #              if __has_include(<cblas.h>)
123 #                include <cblas.h>
124 #              else
125 #                include <cblas_f77.h>
126 #              endif
127 #            endif
128 #          endif
129   __CLPK_doublereal ddot_(__CLPK_integer* n, __CLPK_doublereal* dx,
130                           __CLPK_integer* incx, __CLPK_doublereal* dy,
131                           __CLPK_integer* incy);
132 #        endif
133 
134   int dgetrf_(__CLPK_integer* m, __CLPK_integer* n,
135               __CLPK_doublereal* a, __CLPK_integer* lda,
136               __CLPK_integer* ipiv, __CLPK_integer* info);
137 
138   int dgetri_(__CLPK_integer* n, __CLPK_doublereal* a,
139               __CLPK_integer* lda, __CLPK_integer* ipiv,
140               __CLPK_doublereal* work, __CLPK_integer* lwork,
141               __CLPK_integer* info);
142 
143   double dlange_(char* norm, __CLPK_integer* m, __CLPK_integer* n,
144                  __CLPK_doublereal* a, __CLPK_integer* lda,
145                  __CLPK_doublereal* work);
146 
147   int dgecon_(char* norm, __CLPK_integer* n, __CLPK_doublereal* a,
148               __CLPK_integer* lda, __CLPK_doublereal* anorm,
149               __CLPK_doublereal* rcond, __CLPK_doublereal* work,
150               __CLPK_integer* iwork, __CLPK_integer* info);
151 
152   void dgels_(char* trans, __CLPK_integer* m, __CLPK_integer* n,
153               __CLPK_integer* nrhs, __CLPK_doublereal* a, __CLPK_integer* lda,
154               __CLPK_doublereal* b, __CLPK_integer* ldb,
155               __CLPK_doublereal* work, __CLPK_integer* lwork,
156               __CLPK_integer* info);
157 
158   int dsyevr_(char* jobz, char* range, char* uplo, __CLPK_integer* n,
159               __CLPK_doublereal* a, __CLPK_integer* lda, __CLPK_doublereal* vl,
160               __CLPK_doublereal* vu, __CLPK_integer* il, __CLPK_integer* iu,
161               __CLPK_doublereal* abstol, __CLPK_integer* m,
162               __CLPK_doublereal* w, __CLPK_doublereal* z, __CLPK_integer* ldz,
163               __CLPK_integer* isuppz, __CLPK_doublereal* work,
164               __CLPK_integer* lwork, __CLPK_integer* iwork,
165               __CLPK_integer* liwork, __CLPK_integer* info);
166 
167   void dgesdd_(char* jobs, __CLPK_integer* m, __CLPK_integer* n,
168                __CLPK_doublereal* a, __CLPK_integer* lda, __CLPK_doublereal* s,
169                __CLPK_doublereal* u, __CLPK_integer* ldu,
170                __CLPK_doublereal* vt, __CLPK_integer* ldvt,
171                __CLPK_doublereal* work, __CLPK_integer* lwork,
172                __CLPK_integer* iwork, __CLPK_integer* info);
173 
174 #        ifndef USE_CBLAS_XGEMM
175   void dgemm_(char* transa, char* transb, __CLPK_integer* m, __CLPK_integer* n,
176               __CLPK_integer* k, double* alpha, double* a, __CLPK_integer* lda,
177               double* b, __CLPK_integer* ldb, double* beta, double* c,
178               __CLPK_integer* ldc);
179 
180   void dsymv_(char* uplo, int* n, double* alpha, double* a, int* lda,
181               double* x, int* incx, double* beta, double* y, int* incy);
182 
183   void sgemm_(char* transa, char* transb, __CLPK_integer* m, __CLPK_integer* n,
184               __CLPK_integer* k, float* alpha, float* a, __CLPK_integer* lda,
185               float* b, __CLPK_integer* ldb, float* beta, float* c,
186               __CLPK_integer* ldc);
187 #        endif
188 
189 #      endif  // !USE_MKL
190 #    endif
191 
192 
193   void xerbla_(void);
194 #    ifdef __cplusplus
195 } // extern "C"
196 #    endif
197 
198 #  endif  // !__APPLE__
199 
200 #  define MATRIX_INVERT_BUF1_TYPE __CLPK_integer
201 #  define MATRIX_INVERT_BUF1_ELEM_ALLOC sizeof(__CLPK_integer)
202 // invert_matrix_checked() usually requires a larger buffer
203 #  define MATRIX_INVERT_BUF1_CHECKED_ALLOC (2 * sizeof(__CLPK_integer))
204 
205 #endif  // !NOLAPACK
206 
207 #define MATRIX_SINGULAR_RCOND 1e-14
208 
209 #ifdef NOLAPACK
210 int32_t invert_matrix(int32_t dim, double* matrix, MATRIX_INVERT_BUF1_TYPE* dbl_1d_buf, double* dbl_2d_buf);
211 
212 #  define invert_matrix_checked invert_matrix
213 #else
214 int32_t invert_matrix(__CLPK_integer dim, double* matrix, MATRIX_INVERT_BUF1_TYPE* int_1d_buf, double* dbl_2d_buf);
215 
216 int32_t invert_matrix_checked(__CLPK_integer dim, double* matrix, MATRIX_INVERT_BUF1_TYPE* int_1d_buf, double* dbl_2d_buf);
217 #endif
218 
219 void col_major_matrix_multiply(__CLPK_integer row1_ct, __CLPK_integer col2_ct, __CLPK_integer common_ct, double* inmatrix1, double* inmatrix2, double* outmatrix);
220 
221 void col_major_fmatrix_multiply(__CLPK_integer row1_ct, __CLPK_integer col2_ct, __CLPK_integer common_ct, float* inmatrix1, float* inmatrix2, float* outmatrix);
222 
223 void transpose_copy(uintptr_t old_maj, uintptr_t new_maj, double* old_matrix, double* new_matrix);
224 
225 void transpose_copy_float(uintptr_t old_maj, uintptr_t new_maj, uint32_t new_maj_max, float* old_matrix, float* new_matrix);
226 
227 #endif // __PLINK_MATRIX_H__
228