1 /*************************************************************************
2  *                                                                       *
3  * Vega FEM Simulation Library Version 3.1                               *
4  *                                                                       *
5  * "matrix" library , Copyright (C) 2007 CMU, 2009 MIT                   *
6  * All rights reserved.                                                  *
7  *                                                                       *
8  * Code author: Jernej Barbic                                            *
9  * http://www.jernejbarbic.com/code                                      *
10  * Research: Jernej Barbic, Doug L. James, Jovan Popovic                 *
11  * Funding: NSF, Link Foundation, Singapore-MIT GAMBIT Game Lab          *
12  *                                                                       *
13  * This library is free software; you can redistribute it and/or         *
14  * modify it under the terms of the BSD-style license that is            *
15  * included with this library in the file LICENSE.txt                    *
16  *                                                                       *
17  * This library is distributed in the hope that it will be useful,       *
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file     *
20  * LICENSE.TXT for more details.                                         *
21  *                                                                       *
22  *************************************************************************/
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <math.h>
28 #include "lapack-headers.h"
29 #include "matrixMacros.h"
30 #include "matrixLAPACK.h"
31 #include "matrixBLAS.h"
32 
33 #ifdef __APPLE__
34   #define DGETRF dgetrf_
35   #define DGETRI dgetri_
36   #define SGETRF sgetrf_
37   #define SGETRI sgetri_
38   #define DGESVD dgesvd_
39   #define SGESVD sgesvd_
40   #define DGELSY dgelsy_
41   #define SGELSY sgelsy_
42   #define DGESV dgesv_
43   #define SGESV sgesv_
44   #define DSYEV dsyev_
45   #define SSYEV ssyev_
46   #define DSYGV dsygv_
47   #define SSYGV ssygv_
48   #define SGEEV sgeev_
49   #define DGEEV dgeev_
50   #define DGPADM dgpadm_
51   #define SGPADM sgpadm_
52   #define INTEGER __CLPK_integer
53 #else
54   #define DGETRF dgetrf
55   #define DGETRI dgetri
56   #define SGETRF sgetrf
57   #define SGETRI sgetri
58   #define DGESVD dgesvd
59   #define SGESVD sgesvd
60   #define DGELSY dgelsy
61   #define SGELSY sgelsy
62   #define DGESV dgesv
63   #define SGESV sgesv
64   #define DSYEV dsyev
65   #define SSYEV ssyev
66   #define DSYGV dsygv
67   #define SSYGV ssygv
68   #define SGEEV sgeev
69   #define DGEEV dgeev
70   #define DGPADM dgpadm_
71   #define SGPADM sgpadm_
72   #define INTEGER int
73 #endif
74 
75 template<bool C>
76 class _xinverse {};
77 
78 template<>
79 class _xinverse<true> {
80 public:
f(INTEGER m,float * A,INTEGER * IPIV,float * WORK,INTEGER LWORK)81     inline static INTEGER f( INTEGER m, float * A,
82                           INTEGER * IPIV, float * WORK,
83                           INTEGER LWORK)
84     {
85       INTEGER M = m;
86       INTEGER N = m;
87       INTEGER LDA = m;
88       INTEGER INFO;
89       SGETRF (&M, &N, A, &LDA, IPIV, &INFO);
90 
91       if (INFO != 0)
92       {
93         printf("Warning: LAPACK routine SGETRF returned non-zero exit status %d.\n",(int)INFO);
94       }
95 
96       //SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO );
97       SGETRI(&M, A, &LDA, IPIV, WORK, &LWORK, &INFO);
98 
99       if (INFO != 0)
100       {
101         printf("Warning: LAPACK routine SGETRI returned non-zero exit status %d.\n",(int)INFO);
102       }
103 
104       return INFO;
105     }
106 };
107 
108 template<>
109 class _xinverse<false> {
110 public:
f(INTEGER m,double * A,INTEGER * IPIV,double * WORK,INTEGER LWORK)111     inline static INTEGER f( INTEGER m, double * A,
112                           INTEGER * IPIV, double * WORK,
113                           INTEGER LWORK)
114     {
115       INTEGER M = m;
116       INTEGER N = m;
117       INTEGER LDA = m;
118       INTEGER INFO;
119       DGETRF (&M, &N, A, &LDA, IPIV, &INFO);
120 
121       if (INFO != 0)
122       {
123         printf("Warning: LAPACK routine DGETRF returned non-zero exit status %d.\n",(int)INFO);
124       }
125 
126       //SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO );
127       DGETRI(&M, A, &LDA, IPIV, WORK, &LWORK, &INFO);
128 
129       if (INFO != 0)
130       {
131         printf("Warning: LAPACK routine DGETRI returned non-zero exit status %d.\n",(int)INFO);
132       }
133 
134       return INFO;
135     }
136 };
137 
138 template<class real>
InverseMatrix(int m,real * mtx,real * output)139 real * InverseMatrix(int m, real * mtx, real * output)
140 {
141   real * target = output;
142   if (target == NULL)
143     target = (real*) malloc (sizeof(real) * m * m);
144 
145   memcpy(target, mtx, sizeof(real) * m * m);
146 
147   real * A = target;
148   INTEGER * IPIV = (INTEGER*) malloc(sizeof(INTEGER) * m);
149 
150   INTEGER NB = 32; // optimal block size; could also call ilaenv
151   INTEGER LWORK = m * NB;
152   real * WORK = (real*) malloc (sizeof(real) * LWORK);
153 
154   _xinverse<sizeof(float)==sizeof(real)>::f(m, A, IPIV, WORK, LWORK);
155 
156   free(WORK);
157   free(IPIV);
158 
159   return target;
160 }
161 
162 
163 template<bool C>
164 class _xgesvd {};
165 
166 template<>
167 class _xgesvd<true> {
168 public:
169     //SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
170 
f(char * jobu,char * jobvt,INTEGER * m,INTEGER * n,float * A,INTEGER * LDA,float * S,float * U,INTEGER * LDU,float * VT,INTEGER * LDVT,float * WORK,INTEGER * LWORK,INTEGER * INFO)171     inline static void f( char * jobu, char * jobvt, INTEGER * m, INTEGER * n,
172                           float * A, INTEGER * LDA, float * S, float * U,
173                           INTEGER * LDU, float * VT, INTEGER * LDVT,
174                           float * WORK, INTEGER * LWORK, INTEGER * INFO)
175     {
176       SGESVD(jobu, jobvt, m, n, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO);
177     }
178 };
179 
180 template<>
181 class _xgesvd<false> {
182 public:
f(char * jobu,char * jobvt,INTEGER * m,INTEGER * n,double * A,INTEGER * LDA,double * S,double * U,INTEGER * LDU,double * VT,INTEGER * LDVT,double * WORK,INTEGER * LWORK,INTEGER * INFO)183     inline static void f( char * jobu, char * jobvt, INTEGER * m, INTEGER * n,
184                           double * A, INTEGER * LDA, double * S, double * U,
185                           INTEGER * LDU, double * VT, INTEGER * LDVT,
186                           double * WORK, INTEGER * LWORK, INTEGER * INFO)
187     {
188       DGESVD(jobu, jobvt, m, n, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO);
189     }
190 };
191 
192 template<class real>
PseudoInverseMatrix(int m,int n,real * mtx,real singularValueThreshold,int * rank_,real * output)193 real * PseudoInverseMatrix(int m, int n, real * mtx, real singularValueThreshold, int * rank_, real * output)
194 {
195   real * A = (real*) malloc (sizeof(real) * m * n);
196   memcpy(A, mtx, sizeof(real) * m * n);
197 
198   bool transpose = (m > n);
199   if (transpose)
200   {
201     // swap m,n
202     InPlaceTransposeMatrix(m,n, A);
203     int buffer = m;
204     m = n;
205     n = buffer;
206   }
207 
208   // now, we always have m <= n
209 
210   char jobu  = 'O';//overwrites A with U (left singular vectors)
211   char jobvt = 'S';//all rows returned in VT
212 
213   int ldA = m;
214   int ldU = m;
215 
216   INTEGER NB = 32; // optimal block size; could also call ilaenv
217   int lwork = NB*MAX(3*MIN( m, n)+MAX(m,n), 5*MIN(m,n)-4);
218 
219   real * work = (real*) malloc (sizeof(real) * lwork);
220   if (!work)
221   {
222     printf("Error: failed to allocate workspace.\n");
223     throw 1;
224   }
225 
226   //printf("Workspace size is: %G Mb .\n",1.0 * lwork * sizeof(int) / 1024 / 1024);
227 
228   // allocate array for singular vectors
229   real * S = (real *) malloc (sizeof(real) * MIN(m,n));
230   if (!S)
231   {
232     printf("Error: failed to allocate singular vectors.\n");
233     free(work);
234     throw 2;
235   }
236 
237   // allocate array for VT
238   int ldVT = MIN(m,n);
239   real * VT = (real *) malloc (sizeof(real) * ldVT * n);
240   if (!VT)
241   {
242     printf("Error: failed to allocate VT.\n");
243     free(S);
244     free(work);
245     throw 3;
246   }
247 
248   INTEGER M = m;
249   INTEGER N = n;
250   INTEGER LDA = ldA;
251   INTEGER LDU = ldU;
252   INTEGER LDVT = ldVT;
253   INTEGER LWORK = lwork;
254   INTEGER INFO;
255 
256   //printf("Calling LAPACK dgesvd routine...\n");fflush(NULL);
257 
258   //SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
259 
260   _xgesvd<sizeof(real)==sizeof(float)>::f
261                  (&jobu, &jobvt, &M, &N, A, &LDA,
262                   S, NULL, &LDU, VT,
263                   &LDVT, work, &LWORK, &INFO);
264 
265   if (INFO != 0)
266   {
267     int code = INFO;
268     printf("Error: SVD solver returned non-zero exit code: %d.\n", code);
269     free(VT);
270     free(S);
271     free(work);
272     throw 4;
273   }
274 
275   free(work);
276 
277   //printf ("Singular values:\n");fflush(NULL);
278   //for (int i=0; i< MIN(m,n); i++)
279    // printf("%f ",S[i]);
280   //printf ("\n");
281 
282   // discard small singular values
283   int rank = MIN(m,n);
284   for(int i=0; i< MIN(m,n); i++)
285   {
286     if (S[i] / S[0] < singularValueThreshold)
287     {
288       rank = i;
289       break;
290     }
291   }
292 
293   for(int i=0; i< rank; i++)
294     S[i] = ((real)1.0) / S[i];
295 
296   for(int i=rank; i< MIN(m,n); i++)
297     S[i] = 0.0;
298 
299   real * U = A;
300   memset(&U[ELT(m,0,rank)], 0, sizeof(real) * m * (n-rank));
301   InPlaceTransposeMatrix(m, m, U);
302   real * UT = U;
303   // UT is now m x m
304 
305   InPlaceTransposeMatrix(m,n,VT);
306   real * V = VT;
307   // V is now n x m
308 
309   // multiply A^{\dagger} = V * Sigma^{-1} * U^T
310   for(int j=0; j<m; j++) // over all columns
311     for(int i=0; i<n; i++) // over all rows
312       V[ELT(n, i, j)] *= S[j];
313 
314   real * target = output;
315   if (target == NULL)
316     target = (real*) malloc (sizeof(real) * n * m);
317 
318   // V * UT
319   // (n x m) * (m x m)
320   // output is n x m
321   MultiplyMatrices(n, m, m, V, UT, target);
322 
323   if (transpose)
324     InPlaceTransposeMatrix(n, m, target);
325 
326   free(A);
327   free(S);
328   free(VT);
329 
330   if (rank_ != NULL)
331     *rank_ = rank;
332 
333   return target;
334 }
335 
336 template<bool C>
337 class _xgelsy {};
338 
339 //SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
340 //                   WORK, LWORK, INFO )
341 
342 template<>
343 class _xgelsy<true> {
344 public:
345 
f(INTEGER * M,INTEGER * N,INTEGER * NRHS,float * A,INTEGER * LDA,float * B,INTEGER * LDB,INTEGER * JPVT,float * RCOND,INTEGER * RANK,float * WORK,INTEGER * LWORK,INTEGER * INFO)346     inline static void f( INTEGER * M, INTEGER * N, INTEGER * NRHS,
347                           float * A, INTEGER * LDA,
348                           float * B, INTEGER * LDB,
349                           INTEGER * JPVT, float * RCOND, INTEGER * RANK,
350                           float * WORK, INTEGER * LWORK, INTEGER * INFO)
351     {
352       SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
353               WORK, LWORK, INFO );
354     }
355 };
356 
357 template<>
358 class _xgelsy<false> {
359 public:
360 
f(INTEGER * M,INTEGER * N,INTEGER * NRHS,double * A,INTEGER * LDA,double * B,INTEGER * LDB,INTEGER * JPVT,double * RCOND,INTEGER * RANK,double * WORK,INTEGER * LWORK,INTEGER * INFO)361     inline static void f( INTEGER * M, INTEGER * N, INTEGER * NRHS,
362                           double * A, INTEGER * LDA,
363                           double * B, INTEGER * LDB,
364                           INTEGER * JPVT, double * RCOND, INTEGER * RANK,
365                           double * WORK, INTEGER * LWORK, INTEGER * INFO)
366     {
367       DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
368               WORK, LWORK, INFO );
369     }
370 };
371 
372 template<class real>
MatrixLeastSquareSolve(int m,int n,int nRhs,real * mtx,real * b,real rcond,int * rank_,real * output)373 real * MatrixLeastSquareSolve(int m, int n, int nRhs, real * mtx, real * b, real rcond, int * rank_, real * output)
374 {
375   real * A = (real*) malloc (sizeof(real) * m * n);
376   if (!A)
377   {
378     printf("Error: failed to allocate A.\n");
379     throw 1;
380   }
381   memcpy(A, mtx, sizeof(real) * m * n);
382 
383   INTEGER M = m;
384   INTEGER N = n;
385   INTEGER NRHS = nRhs;
386   INTEGER LDA = m;
387 
388   INTEGER LDB = MAX(M,N);
389   real * B = (real*) calloc (LDB * nRhs, sizeof(real));
390   if (!B)
391   {
392     printf("Error: failed to allocate B.\n");
393     free(A);
394     throw 2;
395   }
396   for(int i=0; i < nRhs; i++)
397     memcpy(&B[ELT(LDB,0,i)], &b[ELT(m,0,i)], sizeof(real) * m);
398 
399   INTEGER * JPVT = (INTEGER*) malloc (sizeof(INTEGER) * N);
400   if (!JPVT)
401   {
402     printf("Error: failed to allocate JPVT.\n");
403     free(B);
404     free(A);
405     throw 3;
406   }
407 
408   INTEGER RANK;
409   real RCOND = rcond;
410 
411   INTEGER NB = 32; // optimal block size; could also call ilaenv
412   INTEGER LWORK = MAX( M*N+2*N+NB*(N+1), 2*M*N+NB*NRHS );
413 
414   real * WORK = (real*) malloc (sizeof(real) * LWORK);
415   if (!WORK)
416   {
417     printf("Error: failed to allocate workspace.\n");
418     free(JPVT);
419     free(B);
420     free(A);
421     throw 4;
422   }
423 
424   //printf("Workspace size is: %G Mb .\n",1.0 * LWORK * sizeof(int) / 1024 / 1024);
425 
426   INTEGER INFO;
427 
428   //printf("M=%d N=%d NRHS=%d LDA=%d LDB=%d\n", (int)M, (int)N, (int)NRHS, (int)LDA, (int)LDB);
429   _xgelsy<sizeof(real)==sizeof(float)>::f
430                  (&M, &N, &NRHS, A, &LDA, B, &LDB,
431                   JPVT, &RCOND, &RANK,
432                   WORK, &LWORK, &INFO );
433 
434   if (INFO != 0)
435   {
436     int code = INFO;
437     printf("Error: xGELSY solver returned non-zero exit code: %d.\n", code);
438     free(WORK);
439     free(JPVT);
440     free(B);
441     free(A);
442     throw 5;
443   }
444 
445   free(JPVT);
446   free(WORK);
447   free(A);
448 
449   real * target = output;
450   if (target == NULL)
451     target = (real*) malloc (sizeof(real) * n * nRhs);
452 
453   for(int i=0; i < nRhs; i++)
454     memcpy(&target[ELT(n,0,i)], &B[ELT(LDB,0,i)], sizeof(real) * n);
455 
456   free(B);
457 
458   if (rank_ != NULL)
459     *rank_ = RANK;
460 
461   return target;
462 }
463 
464 template<bool C>
465 class _xgesv {};
466 
467 
468 //DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
469 
470 template<>
471 class _xgesv<true> {
472 public:
473 
f(INTEGER * N,INTEGER * NRHS,float * A,INTEGER * LDA,INTEGER * IPIV,float * B,INTEGER * LDB,INTEGER * INFO)474     inline static void f( INTEGER * N, INTEGER * NRHS,
475                           float * A, INTEGER * LDA,
476                           INTEGER * IPIV,
477                           float * B, INTEGER * LDB,
478                           INTEGER * INFO)
479     {
480       SGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO );
481     }
482 };
483 
484 template<>
485 class _xgesv<false> {
486 public:
f(INTEGER * N,INTEGER * NRHS,double * A,INTEGER * LDA,INTEGER * IPIV,double * B,INTEGER * LDB,INTEGER * INFO)487     inline static void f( INTEGER * N, INTEGER * NRHS,
488                           double * A, INTEGER * LDA,
489                           INTEGER * IPIV,
490                           double * B, INTEGER * LDB,
491                           INTEGER * INFO)
492     {
493       DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO );
494     }
495 };
496 
497 template<class real>
MatrixLUSolve(int n,int nRhs,real * mtx,real * x,real * b)498 void MatrixLUSolve(int n, int nRhs, real * mtx, real * x, real * b)
499 {
500   real * A = (real*) malloc (sizeof(real) * n * n);
501   if (!A)
502   {
503     printf("Error: failed to allocate A.\n");
504     throw 1;
505   }
506   memcpy(A, mtx, sizeof(real) * n * n);
507 
508   INTEGER N = n;
509   INTEGER NRHS = nRhs;
510   INTEGER LDA = n;
511 
512   memcpy(x, b, sizeof(real) * n * nRhs);
513   real * B = x;
514 
515   INTEGER * IPIV = (INTEGER*) malloc (sizeof(INTEGER) * N);
516   if (!IPIV)
517   {
518     printf("Error: failed to allocate IPIV.\n");
519     free(A);
520     throw 3;
521   }
522 
523   INTEGER LDB = n;
524   INTEGER INFO;
525 
526   _xgesv<sizeof(real)==sizeof(float)>::f
527                  (&N, &NRHS, A, &LDA, IPIV, B, &LDB, &INFO);
528 
529   if (INFO != 0)
530   {
531     int code = INFO;
532     printf("Error: xGESV solver returned non-zero exit code: %d.\n", code);
533     free(IPIV);
534     free(A);
535     throw 4;
536   }
537 
538   free(IPIV);
539   free(A);
540 }
541 
542 template<bool C>
543 class _xsyev {};
544 
545 //SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
546 template<>
547 class _xsyev<true> {
548 public:
549 
f(char * JOBZ,char * UPLO,INTEGER * N,float * A,INTEGER * LDA,float * W,float * WORK,INTEGER * LWORK,INTEGER * INFO)550     inline static void f( char * JOBZ, char * UPLO, INTEGER * N,
551                           float * A, INTEGER * LDA, float * W,
552                           float * WORK, INTEGER * LWORK, INTEGER * INFO)
553     {
554       SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO );
555     }
556 };
557 
558 
559 template<>
560 class _xsyev<false> {
561 public:
562 
f(char * JOBZ,char * UPLO,INTEGER * N,double * A,INTEGER * LDA,double * W,double * WORK,INTEGER * LWORK,INTEGER * INFO)563     inline static void f( char * JOBZ, char * UPLO, INTEGER * N,
564                           double * A, INTEGER * LDA, double * W,
565                           double * WORK, INTEGER * LWORK, INTEGER * INFO)
566     {
567       DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO );
568     }
569 };
570 
571 template<class real>
SymmetricMatrixEigenDecomposition(int m,real * mtx,real * Q,real * Lambda)572 void SymmetricMatrixEigenDecomposition(int m, real * mtx, real * Q, real * Lambda)
573 {
574   char JOBZ = 'V';
575   char UPLO = 'U';
576   INTEGER N = m;
577 
578   memcpy(Q, mtx, sizeof(real) * m * m);
579 
580   real * A = Q;
581 
582   INTEGER LDA = m;
583   real * W = Lambda;
584 
585   INTEGER NB = 32;
586   INTEGER LWORK = (NB+2)*N;
587 
588   real * WORK = (real*) malloc (sizeof(real) * LWORK);
589 
590   INTEGER INFO;
591 
592   _xsyev<sizeof(real)==sizeof(float)>::f(&JOBZ, &UPLO, &N, A, &LDA, W, WORK, &LWORK, &INFO);
593 
594   if (INFO != 0)
595   {
596     int code = INFO;
597     printf("Error: xSYEV solver returned non-zero exit code: %d.\n", code); fflush(NULL);
598     free(WORK);
599     throw 1;
600   }
601 
602   free(WORK);
603 }
604 
605 template<bool C>
606 class _xsygv {};
607 
608 //SUBROUTINE  DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO )
609 
610 template<>
611 class _xsygv<true> {
612 public:
613 
f(INTEGER * ITYPE,char * JOBZ,char * UPLO,INTEGER * N,float * A,INTEGER * LDA,float * B,INTEGER * LDB,float * W,float * WORK,INTEGER * LWORK,INTEGER * INFO)614     inline static void f( INTEGER * ITYPE, char * JOBZ, char * UPLO, INTEGER * N,
615                           float * A, INTEGER * LDA, float * B, INTEGER * LDB,
616                           float * W, float * WORK, INTEGER * LWORK, INTEGER * INFO)
617     {
618       SSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO );
619     }
620 };
621 
622 
623 template<>
624 class _xsygv<false> {
625 public:
626 
f(INTEGER * ITYPE,char * JOBZ,char * UPLO,INTEGER * N,double * A,INTEGER * LDA,double * B,INTEGER * LDB,double * W,double * WORK,INTEGER * LWORK,INTEGER * INFO)627     inline static void f( INTEGER * ITYPE, char * JOBZ, char * UPLO, INTEGER * N,
628                           double * A, INTEGER * LDA, double * B, INTEGER * LDB,
629                           double * W, double * WORK, INTEGER * LWORK, INTEGER * INFO)
630     {
631       DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, LWORK, INFO );
632     }
633 };
634 
635 template<class real>
SymmetricMatrixGeneralEigenDecomposition(int m,real * mtx,real * mtx2,real * Q,real * Lambda)636 void SymmetricMatrixGeneralEigenDecomposition(int m, real * mtx, real * mtx2, real * Q, real * Lambda)
637 {
638   int ITYPE = 1;
639   char JOBZ = 'V';
640   char UPLO = 'U';
641   int LWORK = 2 * (3 * m - 1);
642   real * WORK = (real*) malloc (sizeof(real) * LWORK);
643   int INFO;
644 
645   memcpy(Q, mtx, sizeof(real) * m * m);
646   real * A = Q;
647   real * B = mtx2;
648 
649   _xsygv<sizeof(real)==sizeof(float)>::f( &ITYPE, &JOBZ, &UPLO, &m, A, &m, B, &m, Lambda, WORK, &LWORK, &INFO );
650 
651   if (INFO != 0)
652   {
653     int code = INFO;
654     printf("Error: xSYGV solver returned non-zero exit code: %d.\n", code); fflush(NULL);
655     free(WORK);
656     throw 1;
657   }
658 
659   free(WORK);
660 }
661 
662 template<bool C>
663 class _xgeev {};
664 
665 //SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
666 //                    LDVR, WORK, LWORK, INFO )
667 
668 template<>
669 class _xgeev<true> {
670 public:
671 
f(char * JOBVL,char * JOBVR,INTEGER * N,float * A,INTEGER * LDA,float * WR,float * WI,float * VL,INTEGER * LDVL,float * VR,INTEGER * LDVR,float * WORK,INTEGER * LWORK,INTEGER * INFO)672     inline static void f( char * JOBVL, char * JOBVR, INTEGER * N,
673                           float * A, INTEGER * LDA, float * WR, float * WI,
674                           float * VL, INTEGER * LDVL, float * VR, INTEGER * LDVR,
675                           float * WORK, INTEGER * LWORK, INTEGER * INFO)
676     {
677       SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
678                     LDVR, WORK, LWORK, INFO );
679     }
680 };
681 
682 
683 template<>
684 class _xgeev<false> {
685 public:
f(char * JOBVL,char * JOBVR,INTEGER * N,double * A,INTEGER * LDA,double * WR,double * WI,double * VL,INTEGER * LDVL,double * VR,INTEGER * LDVR,double * WORK,INTEGER * LWORK,INTEGER * INFO)686     inline static void f( char * JOBVL, char * JOBVR, INTEGER * N,
687                           double * A, INTEGER * LDA, double * WR, double * WI,
688                           double * VL, INTEGER * LDVL, double * VR, INTEGER * LDVR,
689                           double * WORK, INTEGER * LWORK, INTEGER * INFO)
690     {
691       DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
692                     LDVR, WORK, LWORK, INFO );
693     }
694 };
695 
696 template<class real>
MatrixEigenDecomposition(int m,real * mtx,real * EigenVectors,real * LambdaRe,real * LambdaIm)697 int MatrixEigenDecomposition(int m, real * mtx, real * EigenVectors, real * LambdaRe, real * LambdaIm)
698 {
699   char JOBVL = 'N';
700   char JOBVR = 'V';
701   INTEGER N = m;
702 
703   real * A = (real*) malloc (sizeof(real) * m * m);
704   memcpy(A, mtx, sizeof(real) * m * m);
705   INTEGER LDA = m;
706 
707   real * WR = LambdaRe;
708   real * WI = LambdaIm;
709 
710   real * VL = NULL;
711   INTEGER LDVL = m;
712 
713   real * VR = EigenVectors;
714   INTEGER LDVR = m;
715 
716   INTEGER LWORK = 16*N;
717 
718   real * WORK = (real*) malloc (sizeof(real) * LWORK);
719 
720   INTEGER INFO;
721 
722   _xgeev<sizeof(real)==sizeof(float)>::f(&JOBVL, &JOBVR, &N,
723      A, &LDA, WR, WI, VL, &LDVL, VR, &LDVR, WORK, &LWORK, &INFO);
724 
725   if (INFO != 0)
726   {
727     int code = INFO;
728     printf("Warning: xGEEV solver returned non-zero exit code: %d.\n", code);
729   }
730 
731   free(WORK);
732   return INFO;
733 }
734 
735 template<class real>
MatrixSVD(int m,int n,real * mtx,real * U,real * Sigma,real * VT)736 void MatrixSVD(int m, int n, real * mtx, real * U, real * Sigma, real * VT)
737 {
738   real * A = (real*) malloc (sizeof(real) * m * n);
739   memcpy(A, mtx, sizeof(real) * m * n);
740 
741   // must deal with transpose since Intel MKL sometimes crashes on thin matrices
742   bool transpose = (m > n);
743   if (transpose)
744   {
745     InPlaceTransposeMatrix(m, n, A);
746 
747     // swap arguments
748     int buffer = m;
749     m = n;
750     n = buffer;
751 
752     real * bufferr = U;
753     U = VT;
754     VT = bufferr;
755   }
756 
757   // now, we always have m <= n
758 
759   char jobu  = 'S'; //left singular vectors returned in U
760   char jobvt = 'S'; //right singular vectors returned in VT
761 
762   INTEGER NB = 32; // optimal block size; could also call ilaenv
763   int lwork = NB*MAX(3*MIN( m, n)+MAX(m,n), 5*MIN(m,n)-4);
764 
765   real * work = (real*) malloc (sizeof(real) * lwork);
766   if (!work)
767   {
768     printf("Error: failed to allocate workspace.\n");
769     throw 1;
770   }
771 
772   //printf("Workspace size is: %G Mb .\n",1.0 * lwork * sizeof(int) / 1024 / 1024);
773 
774   INTEGER M = m;
775   INTEGER N = n;
776   INTEGER LDA = m;
777   INTEGER LDU = m;
778   INTEGER LDVT = MIN(m,n);
779   INTEGER LWORK = lwork;
780   INTEGER INFO;
781 
782   //printf("Calling LAPACK dgesvd routine...\n");fflush(NULL);
783 
784   //SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
785 
786   _xgesvd<sizeof(real)==sizeof(float)>::f
787                  (&jobu, &jobvt, &M, &N, A, &LDA,
788                   Sigma, U, &LDU, VT,
789                   &LDVT, work, &LWORK, &INFO);
790 
791   if (INFO != 0)
792   {
793     int code = INFO;
794     printf("Error: SVD solver returned non-zero exit code: %d.\n", code);
795     free(work);
796     free(A);
797     throw 2;
798   }
799 
800   free(work);
801   free(A);
802 
803   if (transpose)
804   {
805     InPlaceTransposeMatrix(m, MIN(m,n), U);
806     InPlaceTransposeMatrix(MIN(m,n), n, VT);
807   }
808 }
809 
810 template float * InverseMatrix(int m, float * mtx, float * output);
811 template double * InverseMatrix(int m, double * mtx, double * output);
812 template float * PseudoInverseMatrix(int m, int n, float * mtx, float singularValueThreshold, int * rank, float * output);
813 template double * PseudoInverseMatrix(int m, int n, double * mtx, double singularValueThreshold, int * rank, double * output);
814 template float * MatrixLeastSquareSolve(int m, int n, int nRhs, float * mtx, float * b, float rcond, int * rank, float * output);
815 template double * MatrixLeastSquareSolve(int m, int n, int nRhs, double * mtx, double * b, double rcond, int * rank, double * output);
816 template void SymmetricMatrixEigenDecomposition(int m, float * mtx, float * Q, float * Lambda);
817 template void SymmetricMatrixEigenDecomposition(int m, double * mtx, double * Q, double * Lambda);
818 template void SymmetricMatrixGeneralEigenDecomposition(int m, float * mtx, float * mtx2, float * Q, float * Lambda);
819 template void SymmetricMatrixGeneralEigenDecomposition(int m, double * mtx, double * mtx2, double * Q, double * Lambda);
820 template int MatrixEigenDecomposition(int m, float * mtx, float * EigenVectors, float * LambdaRe, float * LambdaIm);
821 template int MatrixEigenDecomposition(int m, double * mtx, double * EigenVectors, double * LambdaRe, double * LambdaIm);
822 template void MatrixSVD(int m, int n, float * mtx, float * U, float * Sigma, float * VT);
823 template void MatrixSVD(int m, int n, double * mtx, double * U, double * Sigma, double * VT);
824 template void MatrixLUSolve(int n, int nRhs, float * mtx, float * x, float * b);
825 template void MatrixLUSolve(int n, int nRhs, double * mtx, double * x, double * b);
826 
827