1 /* Copyright (c) 2015 Gerald Knizia
2 *
3 * This file is part of the IboView program (see: http://www.iboview.org)
4 *
5 * IboView is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3.
8 *
9 * IboView is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with bfint (LICENSE). If not, see http://www.gnu.org/licenses/
16 *
17 * Please see IboView documentation in README.txt for:
18 * -- A list of included external software and their licenses. The included
19 * external software's copyright is not touched by this agreement.
20 * -- Notes on re-distribution and contributions to/further development of
21 * the IboView software
22 */
23
24 #ifndef CX_ALGEBRA_H
25 #define CX_ALGEBRA_H
26
27 #include <cstddef>
28 #include "CxDefs.h"
29 #include "CxFortranInt.h"
30
31 using std::ptrdiff_t;
32 using std::size_t;
33 typedef unsigned int uint;
34
35 // behold the elegance of FORTRAN77/C interfaces!
36 extern "C"{
37 // declaration of FORTRAN77 blas/lapack routines... (to exist in ACML/MKL/etc.)
38
39 // C := alpha*op( A )*op( B ) + beta*C,
40 // Trans*: 'N' (notranspose), 'T' (transpose) or 'C' (conjugate-transpose)(=T)
41 #define DGEMM FORT_Extern(dgemm,DGEMM)
42 void DGEMM( char const &TransA, char const &TransB, FINTARG M, FINTARG N, FINTARG K,
43 FDBLARG alpha, double const *A, FINTARG lda, double const *B, FINTARG ldb,
44 FDBLARG beta, double *C, FINTARG ldc );
45
46 // C := alpha*op(A)*op(A^T) + beta*C,
47 #define DSYRK FORT_Extern(dsyrk,DSYRK)
48 void DSYRK( char const &UpLo, char const &TransA, FINTARG N, FINTARG K,
49 FDBLARG alpha, double const *A, FINTARG lda,
50 FDBLARG beta, double *C, FINTARG ldc );
51
52 // y += alpha * M * x
53 #define DGEMV FORT_Extern(dgemv,DGEMV)
54 void DGEMV(char const &Trans, FINTARG M, FINTARG N, FDBLARG Alpha, double const *RESTRICT A, FINTARG lda, double const *RESTRICT X, FINTARG incx, FDBLARG Beta, double *RESTRICT y, FINTARG incy);
55
56 // A += alpha * x y^T
57 #define DGER FORT_Extern(dger,DGER)
58 void DGER(FINTARG M, FINTARG N, double const &Alpha, double const *RESTRICT X, FINTARG incx, double const *RESTRICT Y, FINTARG incy, double *RESTRICT A, FINTARG ldA);
59
60 // computes eigenvalues and eigenvectors of a symmetric matrix:
61 // jobz: 'N'/'V': compute eigenvalues only/compute also eigenvectors
62 // uplo: 'U'/'L': upper/lower triangle of A is stored.
63 // N: matrix size. A is N x N matrix.
64 // A: input matrix, output vectors. LDA: row stride A.
65 // W: output, eigenvectors in ascending order. (vector of length N).
66 // Work: work space
67 // lWork: Work space size. "For optimal efficiency, LWORK >= (NB+2)*N,
68 // where NB is the blocksize for DSYTRD returned by ILAENV."
69 #define DSYEV FORT_Extern(dsyev,DSYEV)
70 void DSYEV(char const &jobz, char const &uplo, FINTARG N, double *A, FINTARG LDA, double *W, double *WORK, FINTARG LWORK, FORTINT &INFO);
71 #define DSYEVD FORT_Extern(dsyevd,DSYEVD)
72 void DSYEVD(char const &jobz, char const &uplo, FINTARG N, double *A, FINTARG LDA, double *W, double *WORK, FINTARG LWORK, FORTINT *IWORK, FINTARG LIWORK, FORTINT &INFO);
73
74 #define DSYGV FORT_Extern(dsygv,DSYGV)
75 void DSYGV(FINTARG ITYPE, char const &JOBZ, char const &UPLO, FINTARG N,
76 double *A, FINTARG LDA, double const *B, FINTARG LDB, double *EW,
77 double *WORK, FORTINT &LWORK, FORTINT &INFO );
78
79 // compute m x n matrix LU factorization.
80 // info: =0 success. > 0: matrix is singular, factorization cannot be used
81 // to solve linear systems.
82 #define DGETRF FORT_Extern(dgetrf,DGETRF)
83 void DGETRF(FINTARG M, FINTARG N, double const *pA, FINTARG LDA, FORTINT *ipiv, FORTINT *INFO );
84 #define DTRTRI FORT_Extern(dtrtri,DTRTRI)
85 void DTRTRI(char const &Uplo, char const &Diag, FINTARG N, double *pA, FINTARG LDA, FORTINT *info);
86
87
88 // solves A * X = B for X. n: number of equations (order of A).
89 // needs LU decomposition as input.
90 #define DGESV FORT_Extern(dgesv,DGESV)
91 void DGESV( FINTARG n, FINTARG nrhs, double *A, FINTARG lda, FINTARG ipivot, double *B,
92 FINTARG ldb, FORTINT &info );
93
94 #define DPOTRF FORT_Extern(dpotrf,DPOTRF)
95 void DPOTRF(char const &UpLo, FINTARG n, double *A, FINTARG lda, FORTINT *info);
96 #define DPOTRS FORT_Extern(dpotrs,DPOTRS)
97 void DPOTRS(char const &UpLo, FINTARG n, FINTARG nRhs, double *A, FINTARG lda, double *B, FINTARG ldb, FORTINT *info);
98 #define DTRTRS FORT_Extern(dtrtrs,DTRTRS)
99 void DTRTRS(char const &UpLo, char const &Trans, char const &Diag, FINTARG N, FINTARG NRHS, double *A, FINTARG lda, double *B, FINTARG ldb, FORTINT *info);
100 // ^- gna.. dtrtrs is rather useless. It just does some argument checks and
101 // then calls dtrsm with side == 'L' (which probably does the same checks again).
102 #define DTRSM FORT_Extern(dtrsm,DTRSM)
103 void DTRSM(char const &Side, char const &UpLo, char const &Trans, char const &Diag, FINTARG nRowsB, FINTARG nColsB, double const &Alpha, double *A, FINTARG lda, double *B, FINTARG ldb, FORTINT *info);
104 #define DTRMM FORT_Extern(dtrmm,DTRMM)
105 void DTRMM(char const &Side, char const &UpLo, char const &Trans, char const &Diag, FINTARG nRowsB, FINTARG nColsB, double const &Alpha, double *A, FINTARG lda, double *B, FINTARG ldb);
106
107 // linear least squares using divide & conquer SVD.
108 #define DGELSS FORT_Extern(dgelss,DGELSS)
109 void DGELSS(FINTARG M, FINTARG N, FINTARG NRHS, double *A, FINTARG LDA, double *B, FINTARG LDB, double *S, double const &RCOND, FORTINT *RANK,
110 double *WORK, FINTARG LWORK, FORTINT *INFO );
111
112 // // compute SVD (simple driver for small cases)
113 // #define DGESVD FORT_Extern(dgesvd,DGESVD)
114 // void DGESVD(char const &JobU, char const &JobVT, FINTARG M, FINTARG N, double *A, FINTARG ldA, double *S, double *U, FINTARG ldU, double *Vt, FINTARG ldVt, double *Work, FINTARG lWork, FORTINT *info);
115 // double *WORK, FINTARG LWORK, FORTINT *INFO );
116 // divide & conquer SVD.
117 #define DGESDD FORT_Extern(dgesdd,DGESDD)
118 void DGESDD(char const &JobZ, FINTARG M, FINTARG N, double *A, FINTARG ldA, double *S, double *U, FINTARG ldU, double *Vt, FINTARG ldVt, double *Work, FINTARG lWork, FORTINT *piWork, FORTINT *info);
119 }
120
121
122
123
124
125 namespace ct {
126
127 void Mxm(double *pOut, ptrdiff_t iRowStO, ptrdiff_t iColStO,
128 double const *pA, ptrdiff_t iRowStA, ptrdiff_t iColStA,
129 double const *pB, ptrdiff_t iRowStB, ptrdiff_t iColStB,
130 size_t nRows, size_t nLink, size_t nCols, bool AddToDest = false, double fFactor = 1.0);
131
132 // note: both H and S are overwritten. Eigenvectors go into H.
133 void DiagonalizeGen(double *pEw, double *pH, uint ldH, double *pS, uint ldS, uint N);
134 void Diagonalize(double *pEw, double *pH, uint ldH, uint N);
135 // note: pInAndTmp will be overwritten. Output is U: nRows x nSig, Vt : nCols x nSig, nSig = min(nRows,nCols).
136 void ComputeSvd(double *pU, size_t ldU, double *pSigma, double *pVt, size_t ldVt, double *pInAndTmp, size_t ldIn, size_t nRows, size_t nCols);
137
138 void MxvLame(double *RESTRICT pOut, ptrdiff_t iStO, double const *RESTRICT pMat, ptrdiff_t iRowStM, ptrdiff_t iColStM,
139 double const *RESTRICT pIn, ptrdiff_t iStI, size_t nRows, size_t nLink, bool AddToDest = false, double fFactor = 1.0);
140
141
142 inline void Mxv(double *RESTRICT pOut, ptrdiff_t iStO, double const *RESTRICT pMat, ptrdiff_t iRowStM, ptrdiff_t iColStM,
143 double const *RESTRICT pIn, ptrdiff_t iStI, size_t nRows, size_t nLink, bool AddToDest = false, double fFactor = 1.0)
144 {
145 if ( iRowStM == 1 )
146 DGEMV('N', nRows, nLink, fFactor, pMat, iColStM, pIn,iStI, AddToDest? 1.0 : 0.0, pOut,iStO);
147 else if ( iColStM == 1 )
148 DGEMV('T', nLink, nRows, fFactor, pMat, iRowStM, pIn,iStI, AddToDest? 1.0 : 0.0, pOut,iStO);
149 else
150 MxvLame(pOut, iStO, pMat, iRowStM, iColStM, pIn, iStI, nRows, nLink, AddToDest, fFactor);
151 }
152
153
154 template<class FScalar>
Dot(FScalar const * a,FScalar const * b,size_t n)155 inline FScalar Dot( FScalar const *a, FScalar const *b, size_t n )
156 {
157 FScalar
158 r = 0;
159 for ( size_t i = 0; i < n; ++ i )
160 r += a[i] * b[i];
161 return r;
162 }
163
164 // r += f * x
165 template<class FScalar>
Add(FScalar * RESTRICT r,FScalar const * RESTRICT x,FScalar f,size_t n)166 inline void Add( FScalar * RESTRICT r, FScalar const * RESTRICT x, FScalar f, size_t n )
167 {
168 if ( f != 1.0 ) {
169 for ( size_t i = 0; i < n; ++ i )
170 r[i] += f * x[i];
171 } else {
172 for ( size_t i = 0; i < n; ++ i )
173 r[i] += x[i];
174 }
175 }
176
177 // r *= f
178 template<class FScalar>
Scale(FScalar * r,FScalar f,size_t n)179 inline void Scale( FScalar *r, FScalar f, size_t n )
180 {
181 for ( size_t i = 0; i < n; ++ i )
182 r[i] *= f;
183 }
184
185
186 // r = f * x
187 template<class FScalar>
Move(FScalar * RESTRICT r,FScalar const * RESTRICT x,FScalar f,size_t n)188 inline void Move( FScalar * RESTRICT r, FScalar const * RESTRICT x, FScalar f, size_t n )
189 {
190 for ( size_t i = 0; i < n; ++ i )
191 r[i] = f * x[i];
192 }
193
194
195 } // namespace ct
196
197 #endif // CX_ALGEBRA_H
198
199 // kate: indent-width 4
200