1 /*************************************************************************
2 * *
3 * Vega FEM Simulation Library Version 3.1 *
4 * *
5 * "matrix" library , Copyright (C) 2007 CMU, 2009 MIT *
6 * All rights reserved. *
7 * *
8 * Code author: Jernej Barbic *
9 * http://www.jernejbarbic.com/code *
10 * Research: Jernej Barbic, Doug L. James, Jovan Popovic *
11 * Funding: NSF, Link Foundation, Singapore-MIT GAMBIT Game Lab *
12 * *
13 * This library is free software; you can redistribute it and/or *
14 * modify it under the terms of the BSD-style license that is *
15 * included with this library in the file LICENSE.txt *
16 * *
17 * This library is distributed in the hope that it will be useful, *
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file *
20 * LICENSE.TXT for more details. *
21 * *
22 *************************************************************************/
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <math.h>
28 #include "lapack-headers.h"
29 #include "matrixMacros.h"
30 #include "matrixLAPACK.h"
31 #include "matrixBLAS.h"
32
33 #ifdef __APPLE__
34 #define DGETRF dgetrf_
35 #define DGETRI dgetri_
36 #define SGETRF sgetrf_
37 #define SGETRI sgetri_
38 #define DGESVD dgesvd_
39 #define SGESVD sgesvd_
40 #define DGELSY dgelsy_
41 #define SGELSY sgelsy_
42 #define DGESV dgesv_
43 #define SGESV sgesv_
44 #define DSYEV dsyev_
45 #define SSYEV ssyev_
46 #define DSYGV dsygv_
47 #define SSYGV ssygv_
48 #define SGEEV sgeev_
49 #define DGEEV dgeev_
50 #define DGPADM dgpadm_
51 #define SGPADM sgpadm_
52 #define INTEGER __CLPK_integer
53 #else
54 #define DGETRF dgetrf
55 #define DGETRI dgetri
56 #define SGETRF sgetrf
57 #define SGETRI sgetri
58 #define DGESVD dgesvd
59 #define SGESVD sgesvd
60 #define DGELSY dgelsy
61 #define SGELSY sgelsy
62 #define DGESV dgesv
63 #define SGESV sgesv
64 #define DSYEV dsyev
65 #define SSYEV ssyev
66 #define DSYGV dsygv
67 #define SSYGV ssygv
68 #define SGEEV sgeev
69 #define DGEEV dgeev
70 #define DGPADM dgpadm_
71 #define SGPADM sgpadm_
72 #define INTEGER int
73 #endif
74
75 template<bool C>
76 class _xinverse {};
77
78 template<>
79 class _xinverse<true> {
80 public:
f(INTEGER m,float * A,INTEGER * IPIV,float * WORK,INTEGER LWORK)81 inline static INTEGER f( INTEGER m, float * A,
82 INTEGER * IPIV, float * WORK,
83 INTEGER LWORK)
84 {
85 INTEGER M = m;
86 INTEGER N = m;
87 INTEGER LDA = m;
88 INTEGER INFO;
89 SGETRF (&M, &N, A, &LDA, IPIV, &INFO);
90
91 if (INFO != 0)
92 {
93 printf("Warning: LAPACK routine SGETRF returned non-zero exit status %d.\n",(int)INFO);
94 }
95
96 //SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO );
97 SGETRI(&M, A, &LDA, IPIV, WORK, &LWORK, &INFO);
98
99 if (INFO != 0)
100 {
101 printf("Warning: LAPACK routine SGETRI returned non-zero exit status %d.\n",(int)INFO);
102 }
103
104 return INFO;
105 }
106 };
107
108 template<>
109 class _xinverse<false> {
110 public:
f(INTEGER m,double * A,INTEGER * IPIV,double * WORK,INTEGER LWORK)111 inline static INTEGER f( INTEGER m, double * A,
112 INTEGER * IPIV, double * WORK,
113 INTEGER LWORK)
114 {
115 INTEGER M = m;
116 INTEGER N = m;
117 INTEGER LDA = m;
118 INTEGER INFO;
119 DGETRF (&M, &N, A, &LDA, IPIV, &INFO);
120
121 if (INFO != 0)
122 {
123 printf("Warning: LAPACK routine DGETRF returned non-zero exit status %d.\n",(int)INFO);
124 }
125
126 //SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO );
127 DGETRI(&M, A, &LDA, IPIV, WORK, &LWORK, &INFO);
128
129 if (INFO != 0)
130 {
131 printf("Warning: LAPACK routine DGETRI returned non-zero exit status %d.\n",(int)INFO);
132 }
133
134 return INFO;
135 }
136 };
137
138 template<class real>
InverseMatrix(int m,real * mtx,real * output)139 real * InverseMatrix(int m, real * mtx, real * output)
140 {
141 real * target = output;
142 if (target == NULL)
143 target = (real*) malloc (sizeof(real) * m * m);
144
145 memcpy(target, mtx, sizeof(real) * m * m);
146
147 real * A = target;
148 INTEGER * IPIV = (INTEGER*) malloc(sizeof(INTEGER) * m);
149
150 INTEGER NB = 32; // optimal block size; could also call ilaenv
151 INTEGER LWORK = m * NB;
152 real * WORK = (real*) malloc (sizeof(real) * LWORK);
153
154 _xinverse<sizeof(float)==sizeof(real)>::f(m, A, IPIV, WORK, LWORK);
155
156 free(WORK);
157 free(IPIV);
158
159 return target;
160 }
161
162
163 template<bool C>
164 class _xgesvd {};
165
166 template<>
167 class _xgesvd<true> {
168 public:
169 //SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
170
f(char * jobu,char * jobvt,INTEGER * m,INTEGER * n,float * A,INTEGER * LDA,float * S,float * U,INTEGER * LDU,float * VT,INTEGER * LDVT,float * WORK,INTEGER * LWORK,INTEGER * INFO)171 inline static void f( char * jobu, char * jobvt, INTEGER * m, INTEGER * n,
172 float * A, INTEGER * LDA, float * S, float * U,
173 INTEGER * LDU, float * VT, INTEGER * LDVT,
174 float * WORK, INTEGER * LWORK, INTEGER * INFO)
175 {
176 SGESVD(jobu, jobvt, m, n, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO);
177 }
178 };
179
180 template<>
181 class _xgesvd<false> {
182 public:
f(char * jobu,char * jobvt,INTEGER * m,INTEGER * n,double * A,INTEGER * LDA,double * S,double * U,INTEGER * LDU,double * VT,INTEGER * LDVT,double * WORK,INTEGER * LWORK,INTEGER * INFO)183 inline static void f( char * jobu, char * jobvt, INTEGER * m, INTEGER * n,
184 double * A, INTEGER * LDA, double * S, double * U,
185 INTEGER * LDU, double * VT, INTEGER * LDVT,
186 double * WORK, INTEGER * LWORK, INTEGER * INFO)
187 {
188 DGESVD(jobu, jobvt, m, n, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO);
189 }
190 };
191
192 template<class real>
PseudoInverseMatrix(int m,int n,real * mtx,real singularValueThreshold,int * rank_,real * output)193 real * PseudoInverseMatrix(int m, int n, real * mtx, real singularValueThreshold, int * rank_, real * output)
194 {
195 real * A = (real*) malloc (sizeof(real) * m * n);
196 memcpy(A, mtx, sizeof(real) * m * n);
197
198 bool transpose = (m > n);
199 if (transpose)
200 {
201 // swap m,n
202 InPlaceTransposeMatrix(m,n, A);
203 int buffer = m;
204 m = n;
205 n = buffer;
206 }
207
208 // now, we always have m <= n
209
210 char jobu = 'O';//overwrites A with U (left singular vectors)
211 char jobvt = 'S';//all rows returned in VT
212
213 int ldA = m;
214 int ldU = m;
215
216 INTEGER NB = 32; // optimal block size; could also call ilaenv
217 int lwork = NB*MAX(3*MIN( m, n)+MAX(m,n), 5*MIN(m,n)-4);
218
219 real * work = (real*) malloc (sizeof(real) * lwork);
220 if (!work)
221 {
222 printf("Error: failed to allocate workspace.\n");
223 throw 1;
224 }
225
226 //printf("Workspace size is: %G Mb .\n",1.0 * lwork * sizeof(int) / 1024 / 1024);
227
228 // allocate array for singular vectors
229 real * S = (real *) malloc (sizeof(real) * MIN(m,n));
230 if (!S)
231 {
232 printf("Error: failed to allocate singular vectors.\n");
233 free(work);
234 throw 2;
235 }
236
237 // allocate array for VT
238 int ldVT = MIN(m,n);
239 real * VT = (real *) malloc (sizeof(real) * ldVT * n);
240 if (!VT)
241 {
242 printf("Error: failed to allocate VT.\n");
243 free(S);
244 free(work);
245 throw 3;
246 }
247
248 INTEGER M = m;
249 INTEGER N = n;
250 INTEGER LDA = ldA;
251 INTEGER LDU = ldU;
252 INTEGER LDVT = ldVT;
253 INTEGER LWORK = lwork;
254 INTEGER INFO;
255
256 //printf("Calling LAPACK dgesvd routine...\n");fflush(NULL);
257
258 //SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
259
260 _xgesvd<sizeof(real)==sizeof(float)>::f
261 (&jobu, &jobvt, &M, &N, A, &LDA,
262 S, NULL, &LDU, VT,
263 &LDVT, work, &LWORK, &INFO);
264
265 if (INFO != 0)
266 {
267 int code = INFO;
268 printf("Error: SVD solver returned non-zero exit code: %d.\n", code);
269 free(VT);
270 free(S);
271 free(work);
272 throw 4;
273 }
274
275 free(work);
276
277 //printf ("Singular values:\n");fflush(NULL);
278 //for (int i=0; i< MIN(m,n); i++)
279 // printf("%f ",S[i]);
280 //printf ("\n");
281
282 // discard small singular values
283 int rank = MIN(m,n);
284 for(int i=0; i< MIN(m,n); i++)
285 {
286 if (S[i] / S[0] < singularValueThreshold)
287 {
288 rank = i;
289 break;
290 }
291 }
292
293 for(int i=0; i< rank; i++)
294 S[i] = ((real)1.0) / S[i];
295
296 for(int i=rank; i< MIN(m,n); i++)
297 S[i] = 0.0;
298
299 real * U = A;
300 memset(&U[ELT(m,0,rank)], 0, sizeof(real) * m * (n-rank));
301 InPlaceTransposeMatrix(m, m, U);
302 real * UT = U;
303 // UT is now m x m
304
305 InPlaceTransposeMatrix(m,n,VT);
306 real * V = VT;
307 // V is now n x m
308
309 // multiply A^{\dagger} = V * Sigma^{-1} * U^T
310 for(int j=0; j<m; j++) // over all columns
311 for(int i=0; i<n; i++) // over all rows
312 V[ELT(n, i, j)] *= S[j];
313
314 real * target = output;
315 if (target == NULL)
316 target = (real*) malloc (sizeof(real) * n * m);
317
318 // V * UT
319 // (n x m) * (m x m)
320 // output is n x m
321 MultiplyMatrices(n, m, m, V, UT, target);
322
323 if (transpose)
324 InPlaceTransposeMatrix(n, m, target);
325
326 free(A);
327 free(S);
328 free(VT);
329
330 if (rank_ != NULL)
331 *rank_ = rank;
332
333 return target;
334 }
335
336 template<bool C>
337 class _xgelsy {};
338
339 //SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
340 // WORK, LWORK, INFO )
341
342 template<>
343 class _xgelsy<true> {
344 public:
345
f(INTEGER * M,INTEGER * N,INTEGER * NRHS,float * A,INTEGER * LDA,float * B,INTEGER * LDB,INTEGER * JPVT,float * RCOND,INTEGER * RANK,float * WORK,INTEGER * LWORK,INTEGER * INFO)346 inline static void f( INTEGER * M, INTEGER * N, INTEGER * NRHS,
347 float * A, INTEGER * LDA,
348 float * B, INTEGER * LDB,
349 INTEGER * JPVT, float * RCOND, INTEGER * RANK,
350 float * WORK, INTEGER * LWORK, INTEGER * INFO)
351 {
352 SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
353 WORK, LWORK, INFO );
354 }
355 };
356
357 template<>
358 class _xgelsy<false> {
359 public:
360
f(INTEGER * M,INTEGER * N,INTEGER * NRHS,double * A,INTEGER * LDA,double * B,INTEGER * LDB,INTEGER * JPVT,double * RCOND,INTEGER * RANK,double * WORK,INTEGER * LWORK,INTEGER * INFO)361 inline static void f( INTEGER * M, INTEGER * N, INTEGER * NRHS,
362 double * A, INTEGER * LDA,
363 double * B, INTEGER * LDB,
364 INTEGER * JPVT, double * RCOND, INTEGER * RANK,
365 double * WORK, INTEGER * LWORK, INTEGER * INFO)
366 {
367 DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
368 WORK, LWORK, INFO );
369 }
370 };
371
372 template<class real>
MatrixLeastSquareSolve(int m,int n,int nRhs,real * mtx,real * b,real rcond,int * rank_,real * output)373 real * MatrixLeastSquareSolve(int m, int n, int nRhs, real * mtx, real * b, real rcond, int * rank_, real * output)
374 {
375 real * A = (real*) malloc (sizeof(real) * m * n);
376 if (!A)
377 {
378 printf("Error: failed to allocate A.\n");
379 throw 1;
380 }
381 memcpy(A, mtx, sizeof(real) * m * n);
382
383 INTEGER M = m;
384 INTEGER N = n;
385 INTEGER NRHS = nRhs;
386 INTEGER LDA = m;
387
388 INTEGER LDB = MAX(M,N);
389 real * B = (real*) calloc (LDB * nRhs, sizeof(real));
390 if (!B)
391 {
392 printf("Error: failed to allocate B.\n");
393 free(A);
394 throw 2;
395 }
396 for(int i=0; i < nRhs; i++)
397 memcpy(&B[ELT(LDB,0,i)], &b[ELT(m,0,i)], sizeof(real) * m);
398
399 INTEGER * JPVT = (INTEGER*) malloc (sizeof(INTEGER) * N);
400 if (!JPVT)
401 {
402 printf("Error: failed to allocate JPVT.\n");
403 free(B);
404 free(A);
405 throw 3;
406 }
407
408 INTEGER RANK;
409 real RCOND = rcond;
410
411 INTEGER NB = 32; // optimal block size; could also call ilaenv
412 INTEGER LWORK = MAX( M*N+2*N+NB*(N+1), 2*M*N+NB*NRHS );
413
414 real * WORK = (real*) malloc (sizeof(real) * LWORK);
415 if (!WORK)
416 {
417 printf("Error: failed to allocate workspace.\n");
418 free(JPVT);
419 free(B);
420 free(A);
421 throw 4;
422 }
423
424 //printf("Workspace size is: %G Mb .\n",1.0 * LWORK * sizeof(int) / 1024 / 1024);
425
426 INTEGER INFO;
427
428 //printf("M=%d N=%d NRHS=%d LDA=%d LDB=%d\n", (int)M, (int)N, (int)NRHS, (int)LDA, (int)LDB);
429 _xgelsy<sizeof(real)==sizeof(float)>::f
430 (&M, &N, &NRHS, A, &LDA, B, &LDB,
431 JPVT, &RCOND, &RANK,
432 WORK, &LWORK, &INFO );
433
434 if (INFO != 0)
435 {
436 int code = INFO;
437 printf("Error: xGELSY solver returned non-zero exit code: %d.\n", code);
438 free(WORK);
439 free(JPVT);
440 free(B);
441 free(A);
442 throw 5;
443 }
444
445 free(JPVT);
446 free(WORK);
447 free(A);
448
449 real * target = output;
450 if (target == NULL)
451 target = (real*) malloc (sizeof(real) * n * nRhs);
452
453 for(int i=0; i < nRhs; i++)
454 memcpy(&target[ELT(n,0,i)], &B[ELT(LDB,0,i)], sizeof(real) * n);
455
456 free(B);
457
458 if (rank_ != NULL)
459 *rank_ = RANK;
460
461 return target;
462 }
463
464 template<bool C>
465 class _xgesv {};
466
467
468 //DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
469
470 template<>
471 class _xgesv<true> {
472 public:
473
f(INTEGER * N,INTEGER * NRHS,float * A,INTEGER * LDA,INTEGER * IPIV,float * B,INTEGER * LDB,INTEGER * INFO)474 inline static void f( INTEGER * N, INTEGER * NRHS,
475 float * A, INTEGER * LDA,
476 INTEGER * IPIV,
477 float * B, INTEGER * LDB,
478 INTEGER * INFO)
479 {
480 SGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO );
481 }
482 };
483
484 template<>
485 class _xgesv<false> {
486 public:
f(INTEGER * N,INTEGER * NRHS,double * A,INTEGER * LDA,INTEGER * IPIV,double * B,INTEGER * LDB,INTEGER * INFO)487 inline static void f( INTEGER * N, INTEGER * NRHS,
488 double * A, INTEGER * LDA,
489 INTEGER * IPIV,
490 double * B, INTEGER * LDB,
491 INTEGER * INFO)
492 {
493 DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO );
494 }
495 };
496
497 template<class real>
MatrixLUSolve(int n,int nRhs,real * mtx,real * x,real * b)498 void MatrixLUSolve(int n, int nRhs, real * mtx, real * x, real * b)
499 {
500 real * A = (real*) malloc (sizeof(real) * n * n);
501 if (!A)
502 {
503 printf("Error: failed to allocate A.\n");
504 throw 1;
505 }
506 memcpy(A, mtx, sizeof(real) * n * n);
507
508 INTEGER N = n;
509 INTEGER NRHS = nRhs;
510 INTEGER LDA = n;
511
512 memcpy(x, b, sizeof(real) * n * nRhs);
513 real * B = x;
514
515 INTEGER * IPIV = (INTEGER*) malloc (sizeof(INTEGER) * N);
516 if (!IPIV)
517 {
518 printf("Error: failed to allocate IPIV.\n");
519 free(A);
520 throw 3;
521 }
522
523 INTEGER LDB = n;
524 INTEGER INFO;
525
526 _xgesv<sizeof(real)==sizeof(float)>::f
527 (&N, &NRHS, A, &LDA, IPIV, B, &LDB, &INFO);
528
529 if (INFO != 0)
530 {
531 int code = INFO;
532 printf("Error: xGESV solver returned non-zero exit code: %d.\n", code);
533 free(IPIV);
534 free(A);
535 throw 4;
536 }
537
538 free(IPIV);
539 free(A);
540 }
541
542 template<bool C>
543 class _xsyev {};
544
545 //SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
546 template<>
547 class _xsyev<true> {
548 public:
549
f(char * JOBZ,char * UPLO,INTEGER * N,float * A,INTEGER * LDA,float * W,float * WORK,INTEGER * LWORK,INTEGER * INFO)550 inline static void f( char * JOBZ, char * UPLO, INTEGER * N,
551 float * A, INTEGER * LDA, float * W,
552 float * WORK, INTEGER * LWORK, INTEGER * INFO)
553 {
554 SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO );
555 }
556 };
557
558
559 template<>
560 class _xsyev<false> {
561 public:
562
f(char * JOBZ,char * UPLO,INTEGER * N,double * A,INTEGER * LDA,double * W,double * WORK,INTEGER * LWORK,INTEGER * INFO)563 inline static void f( char * JOBZ, char * UPLO, INTEGER * N,
564 double * A, INTEGER * LDA, double * W,
565 double * WORK, INTEGER * LWORK, INTEGER * INFO)
566 {
567 DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO );
568 }
569 };
570
571 template<class real>
SymmetricMatrixEigenDecomposition(int m,real * mtx,real * Q,real * Lambda)572 void SymmetricMatrixEigenDecomposition(int m, real * mtx, real * Q, real * Lambda)
573 {
574 char JOBZ = 'V';
575 char UPLO = 'U';
576 INTEGER N = m;
577
578 memcpy(Q, mtx, sizeof(real) * m * m);
579
580 real * A = Q;
581
582 INTEGER LDA = m;
583 real * W = Lambda;
584
585 INTEGER NB = 32;
586 INTEGER LWORK = (NB+2)*N;
587
588 real * WORK = (real*) malloc (sizeof(real) * LWORK);
589
590 INTEGER INFO;
591
592 _xsyev<sizeof(real)==sizeof(float)>::f(&JOBZ, &UPLO, &N, A, &LDA, W, WORK, &LWORK, &INFO);
593
594 if (INFO != 0)
595 {
596 int code = INFO;
597 printf("Error: xSYEV solver returned non-zero exit code: %d.\n", code); fflush(NULL);
598 free(WORK);
599 throw 1;
600 }
601
602 free(WORK);
603 }
604
605 template<bool C>
606 class _xsygv {};
607
608 //SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO )
609
610 template<>
611 class _xsygv<true> {
612 public:
613
f(INTEGER * ITYPE,char * JOBZ,char * UPLO,INTEGER * N,float * A,INTEGER * LDA,float * B,INTEGER * LDB,float * W,float * WORK,INTEGER * LWORK,INTEGER * INFO)614 inline static void f( INTEGER * ITYPE, char * JOBZ, char * UPLO, INTEGER * N,
615 float * A, INTEGER * LDA, float * B, INTEGER * LDB,
616 float * W, float * WORK, INTEGER * LWORK, INTEGER * INFO)
617 {
618 SSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO );
619 }
620 };
621
622
623 template<>
624 class _xsygv<false> {
625 public:
626
f(INTEGER * ITYPE,char * JOBZ,char * UPLO,INTEGER * N,double * A,INTEGER * LDA,double * B,INTEGER * LDB,double * W,double * WORK,INTEGER * LWORK,INTEGER * INFO)627 inline static void f( INTEGER * ITYPE, char * JOBZ, char * UPLO, INTEGER * N,
628 double * A, INTEGER * LDA, double * B, INTEGER * LDB,
629 double * W, double * WORK, INTEGER * LWORK, INTEGER * INFO)
630 {
631 DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO );
632 }
633 };
634
635 template<class real>
SymmetricMatrixGeneralEigenDecomposition(int m,real * mtx,real * mtx2,real * Q,real * Lambda)636 void SymmetricMatrixGeneralEigenDecomposition(int m, real * mtx, real * mtx2, real * Q, real * Lambda)
637 {
638 int ITYPE = 1;
639 char JOBZ = 'V';
640 char UPLO = 'U';
641 int LWORK = 2 * (3 * m - 1);
642 real * WORK = (real*) malloc (sizeof(real) * LWORK);
643 int INFO;
644
645 memcpy(Q, mtx, sizeof(real) * m * m);
646 real * A = Q;
647 real * B = mtx2;
648
649 _xsygv<sizeof(real)==sizeof(float)>::f( &ITYPE, &JOBZ, &UPLO, &m, A, &m, B, &m, Lambda, WORK, &LWORK, &INFO );
650
651 if (INFO != 0)
652 {
653 int code = INFO;
654 printf("Error: xSYGV solver returned non-zero exit code: %d.\n", code); fflush(NULL);
655 free(WORK);
656 throw 1;
657 }
658
659 free(WORK);
660 }
661
662 template<bool C>
663 class _xgeev {};
664
665 //SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
666 // LDVR, WORK, LWORK, INFO )
667
668 template<>
669 class _xgeev<true> {
670 public:
671
f(char * JOBVL,char * JOBVR,INTEGER * N,float * A,INTEGER * LDA,float * WR,float * WI,float * VL,INTEGER * LDVL,float * VR,INTEGER * LDVR,float * WORK,INTEGER * LWORK,INTEGER * INFO)672 inline static void f( char * JOBVL, char * JOBVR, INTEGER * N,
673 float * A, INTEGER * LDA, float * WR, float * WI,
674 float * VL, INTEGER * LDVL, float * VR, INTEGER * LDVR,
675 float * WORK, INTEGER * LWORK, INTEGER * INFO)
676 {
677 SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
678 LDVR, WORK, LWORK, INFO );
679 }
680 };
681
682
683 template<>
684 class _xgeev<false> {
685 public:
f(char * JOBVL,char * JOBVR,INTEGER * N,double * A,INTEGER * LDA,double * WR,double * WI,double * VL,INTEGER * LDVL,double * VR,INTEGER * LDVR,double * WORK,INTEGER * LWORK,INTEGER * INFO)686 inline static void f( char * JOBVL, char * JOBVR, INTEGER * N,
687 double * A, INTEGER * LDA, double * WR, double * WI,
688 double * VL, INTEGER * LDVL, double * VR, INTEGER * LDVR,
689 double * WORK, INTEGER * LWORK, INTEGER * INFO)
690 {
691 DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
692 LDVR, WORK, LWORK, INFO );
693 }
694 };
695
696 template<class real>
MatrixEigenDecomposition(int m,real * mtx,real * EigenVectors,real * LambdaRe,real * LambdaIm)697 int MatrixEigenDecomposition(int m, real * mtx, real * EigenVectors, real * LambdaRe, real * LambdaIm)
698 {
699 char JOBVL = 'N';
700 char JOBVR = 'V';
701 INTEGER N = m;
702
703 real * A = (real*) malloc (sizeof(real) * m * m);
704 memcpy(A, mtx, sizeof(real) * m * m);
705 INTEGER LDA = m;
706
707 real * WR = LambdaRe;
708 real * WI = LambdaIm;
709
710 real * VL = NULL;
711 INTEGER LDVL = m;
712
713 real * VR = EigenVectors;
714 INTEGER LDVR = m;
715
716 INTEGER LWORK = 16*N;
717
718 real * WORK = (real*) malloc (sizeof(real) * LWORK);
719
720 INTEGER INFO;
721
722 _xgeev<sizeof(real)==sizeof(float)>::f(&JOBVL, &JOBVR, &N,
723 A, &LDA, WR, WI, VL, &LDVL, VR, &LDVR, WORK, &LWORK, &INFO);
724
725 if (INFO != 0)
726 {
727 int code = INFO;
728 printf("Warning: xGEEV solver returned non-zero exit code: %d.\n", code);
729 }
730
731 free(WORK);
732 return INFO;
733 }
734
735 template<class real>
MatrixSVD(int m,int n,real * mtx,real * U,real * Sigma,real * VT)736 void MatrixSVD(int m, int n, real * mtx, real * U, real * Sigma, real * VT)
737 {
738 real * A = (real*) malloc (sizeof(real) * m * n);
739 memcpy(A, mtx, sizeof(real) * m * n);
740
741 // must deal with transpose since Intel MKL sometimes crashes on thin matrices
742 bool transpose = (m > n);
743 if (transpose)
744 {
745 InPlaceTransposeMatrix(m, n, A);
746
747 // swap arguments
748 int buffer = m;
749 m = n;
750 n = buffer;
751
752 real * bufferr = U;
753 U = VT;
754 VT = bufferr;
755 }
756
757 // now, we always have m <= n
758
759 char jobu = 'S'; //left singular vectors returned in U
760 char jobvt = 'S'; //right singular vectors returned in VT
761
762 INTEGER NB = 32; // optimal block size; could also call ilaenv
763 int lwork = NB*MAX(3*MIN( m, n)+MAX(m,n), 5*MIN(m,n)-4);
764
765 real * work = (real*) malloc (sizeof(real) * lwork);
766 if (!work)
767 {
768 printf("Error: failed to allocate workspace.\n");
769 throw 1;
770 }
771
772 //printf("Workspace size is: %G Mb .\n",1.0 * lwork * sizeof(int) / 1024 / 1024);
773
774 INTEGER M = m;
775 INTEGER N = n;
776 INTEGER LDA = m;
777 INTEGER LDU = m;
778 INTEGER LDVT = MIN(m,n);
779 INTEGER LWORK = lwork;
780 INTEGER INFO;
781
782 //printf("Calling LAPACK dgesvd routine...\n");fflush(NULL);
783
784 //SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
785
786 _xgesvd<sizeof(real)==sizeof(float)>::f
787 (&jobu, &jobvt, &M, &N, A, &LDA,
788 Sigma, U, &LDU, VT,
789 &LDVT, work, &LWORK, &INFO);
790
791 if (INFO != 0)
792 {
793 int code = INFO;
794 printf("Error: SVD solver returned non-zero exit code: %d.\n", code);
795 free(work);
796 free(A);
797 throw 2;
798 }
799
800 free(work);
801 free(A);
802
803 if (transpose)
804 {
805 InPlaceTransposeMatrix(m, MIN(m,n), U);
806 InPlaceTransposeMatrix(MIN(m,n), n, VT);
807 }
808 }
809
810 template float * InverseMatrix(int m, float * mtx, float * output);
811 template double * InverseMatrix(int m, double * mtx, double * output);
812 template float * PseudoInverseMatrix(int m, int n, float * mtx, float singularValueThreshold, int * rank, float * output);
813 template double * PseudoInverseMatrix(int m, int n, double * mtx, double singularValueThreshold, int * rank, double * output);
814 template float * MatrixLeastSquareSolve(int m, int n, int nRhs, float * mtx, float * b, float rcond, int * rank, float * output);
815 template double * MatrixLeastSquareSolve(int m, int n, int nRhs, double * mtx, double * b, double rcond, int * rank, double * output);
816 template void SymmetricMatrixEigenDecomposition(int m, float * mtx, float * Q, float * Lambda);
817 template void SymmetricMatrixEigenDecomposition(int m, double * mtx, double * Q, double * Lambda);
818 template void SymmetricMatrixGeneralEigenDecomposition(int m, float * mtx, float * mtx2, float * Q, float * Lambda);
819 template void SymmetricMatrixGeneralEigenDecomposition(int m, double * mtx, double * mtx2, double * Q, double * Lambda);
820 template int MatrixEigenDecomposition(int m, float * mtx, float * EigenVectors, float * LambdaRe, float * LambdaIm);
821 template int MatrixEigenDecomposition(int m, double * mtx, double * EigenVectors, double * LambdaRe, double * LambdaIm);
822 template void MatrixSVD(int m, int n, float * mtx, float * U, float * Sigma, float * VT);
823 template void MatrixSVD(int m, int n, double * mtx, double * U, double * Sigma, double * VT);
824 template void MatrixLUSolve(int n, int nRhs, float * mtx, float * x, float * b);
825 template void MatrixLUSolve(int n, int nRhs, double * mtx, double * x, double * b);
826
827