1 /* Copyright (c) 2015  Gerald Knizia
2  *
3  * This file is part of the IboView program (see: http://www.iboview.org)
4  *
5  * IboView is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, version 3.
8  *
9  * IboView is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with bfint (LICENSE). If not, see http://www.gnu.org/licenses/
16  *
17  * Please see IboView documentation in README.txt for:
18  * -- A list of included external software and their licenses. The included
19  *    external software's copyright is not touched by this agreement.
20  * -- Notes on re-distribution and contributions to/further development of
21  *    the IboView software
22  */
23 
24 #ifndef CX_ALGEBRA_H
25 #define CX_ALGEBRA_H
26 
27 #include <cstddef>
28 #include "CxDefs.h"
29 #include "CxFortranInt.h"
30 
31 using std::ptrdiff_t;
32 using std::size_t;
33 typedef unsigned int uint;
34 
35 // behold the elegance of FORTRAN77/C interfaces!
36 extern "C"{
37         // declaration of FORTRAN77 blas/lapack routines... (to exist in ACML/MKL/etc.)
38 
39         //  C := alpha*op( A )*op( B ) + beta*C,
40         // Trans*: 'N' (notranspose), 'T' (transpose) or 'C' (conjugate-transpose)(=T)
41         #define DGEMM FORT_Extern(dgemm,DGEMM)
42         void DGEMM( char const &TransA, char const &TransB, FINTARG M, FINTARG N, FINTARG K,
43             FDBLARG alpha, double const *A, FINTARG lda, double const *B, FINTARG ldb,
44             FDBLARG beta, double *C, FINTARG ldc );
45 
46         //  C := alpha*op(A)*op(A^T) + beta*C,
47         #define DSYRK FORT_Extern(dsyrk,DSYRK)
48         void DSYRK( char const &UpLo, char const &TransA, FINTARG N, FINTARG K,
49             FDBLARG alpha, double const *A, FINTARG lda,
50             FDBLARG beta, double *C, FINTARG ldc );
51 
52         // y += alpha * M * x
53         #define DGEMV FORT_Extern(dgemv,DGEMV)
54         void DGEMV(char const &Trans, FINTARG M, FINTARG N, FDBLARG Alpha, double const *RESTRICT A, FINTARG lda, double const *RESTRICT X, FINTARG incx, FDBLARG Beta, double *RESTRICT y, FINTARG incy);
55 
56         // A += alpha * x y^T
57         #define DGER FORT_Extern(dger,DGER)
58         void DGER(FINTARG M, FINTARG N, double const &Alpha, double const *RESTRICT X, FINTARG incx, double const *RESTRICT Y, FINTARG incy, double *RESTRICT A, FINTARG ldA);
59 
60         // computes eigenvalues and eigenvectors of a symmetric matrix:
61         //  jobz: 'N'/'V': compute eigenvalues only/compute also eigenvectors
62         //  uplo: 'U'/'L': upper/lower triangle of A is stored.
63         //  N: matrix size. A is N x N matrix.
64         //  A: input matrix, output vectors. LDA: row stride A.
65         //  W: output, eigenvectors in ascending order. (vector of length N).
66         //  Work: work space
67         //  lWork: Work space size. "For optimal efficiency, LWORK >= (NB+2)*N,
68         // where NB is the blocksize for DSYTRD returned by ILAENV."
69         #define DSYEV FORT_Extern(dsyev,DSYEV)
70         void DSYEV(char const &jobz, char const &uplo, FINTARG N, double *A, FINTARG LDA, double *W, double *WORK, FINTARG LWORK, FORTINT &INFO);
71         #define DSYEVD FORT_Extern(dsyevd,DSYEVD)
72         void DSYEVD(char const &jobz, char const &uplo, FINTARG N, double *A, FINTARG LDA, double *W, double *WORK, FINTARG LWORK, FORTINT *IWORK, FINTARG LIWORK, FORTINT &INFO);
73 
74         #define DSYGV FORT_Extern(dsygv,DSYGV)
75         void DSYGV(FINTARG ITYPE, char const &JOBZ, char const &UPLO, FINTARG N,
76             double *A, FINTARG LDA, double const *B, FINTARG LDB, double *EW,
77             double *WORK, FORTINT &LWORK, FORTINT &INFO );
78 
79         // compute m x n matrix LU factorization.
80         // info: =0 success. > 0: matrix is singular, factorization cannot be used
81         // to solve linear systems.
82         #define DGETRF FORT_Extern(dgetrf,DGETRF)
83         void DGETRF(FINTARG M, FINTARG N, double const *pA, FINTARG LDA, FORTINT *ipiv, FORTINT *INFO );
84         #define DTRTRI FORT_Extern(dtrtri,DTRTRI)
85         void DTRTRI(char const &Uplo, char const &Diag, FINTARG N, double *pA, FINTARG LDA, FORTINT *info);
86 
87 
88         // solves A * X = B for X. n: number of equations (order of A).
89         // needs LU decomposition as input.
90         #define DGESV FORT_Extern(dgesv,DGESV)
91         void DGESV( FINTARG n, FINTARG nrhs, double *A, FINTARG lda, FINTARG ipivot, double *B,
92             FINTARG ldb, FORTINT &info );
93 
94         #define DPOTRF FORT_Extern(dpotrf,DPOTRF)
95         void DPOTRF(char const &UpLo, FINTARG n, double *A, FINTARG lda, FORTINT *info);
96         #define DPOTRS FORT_Extern(dpotrs,DPOTRS)
97         void DPOTRS(char const &UpLo, FINTARG n, FINTARG nRhs, double *A, FINTARG lda, double *B, FINTARG ldb, FORTINT *info);
98         #define DTRTRS FORT_Extern(dtrtrs,DTRTRS)
99         void DTRTRS(char const &UpLo, char const &Trans, char const &Diag, FINTARG N, FINTARG NRHS, double *A, FINTARG lda, double *B, FINTARG ldb, FORTINT *info);
100         // ^- gna.. dtrtrs is rather useless. It just does some argument checks and
101         // then calls dtrsm with side == 'L' (which probably does the same checks again).
102         #define DTRSM FORT_Extern(dtrsm,DTRSM)
103         void DTRSM(char const &Side, char const &UpLo, char const &Trans, char const &Diag, FINTARG nRowsB, FINTARG nColsB, double const &Alpha, double *A, FINTARG lda, double *B, FINTARG ldb, FORTINT *info);
104         #define DTRMM FORT_Extern(dtrmm,DTRMM)
105         void DTRMM(char const &Side, char const &UpLo, char const &Trans, char const &Diag, FINTARG nRowsB, FINTARG nColsB, double const &Alpha, double *A, FINTARG lda, double *B, FINTARG ldb);
106 
107         // linear least squares using divide & conquer SVD.
108         #define DGELSS FORT_Extern(dgelss,DGELSS)
109         void DGELSS(FINTARG M, FINTARG N, FINTARG NRHS, double *A, FINTARG LDA, double *B, FINTARG LDB, double *S, double const &RCOND, FORTINT *RANK,
110             double *WORK, FINTARG LWORK, FORTINT *INFO );
111 
112 //         // compute SVD (simple driver for small cases)
113 //         #define DGESVD FORT_Extern(dgesvd,DGESVD)
114 //         void DGESVD(char const &JobU, char const &JobVT, FINTARG M, FINTARG N, double *A, FINTARG ldA, double *S, double *U, FINTARG ldU, double *Vt, FINTARG ldVt, double *Work, FINTARG lWork, FORTINT *info);
115 //             double *WORK, FINTARG LWORK, FORTINT *INFO );
116         // divide & conquer SVD.
117         #define DGESDD FORT_Extern(dgesdd,DGESDD)
118         void DGESDD(char const &JobZ, FINTARG M, FINTARG N, double *A, FINTARG ldA, double *S, double *U, FINTARG ldU, double *Vt, FINTARG ldVt, double *Work, FINTARG lWork, FORTINT *piWork, FORTINT *info);
119 }
120 
121 
122 
123 
124 
125 namespace ct {
126 
127 void Mxm(double *pOut, ptrdiff_t iRowStO, ptrdiff_t iColStO,
128          double const *pA, ptrdiff_t iRowStA, ptrdiff_t iColStA,
129          double const *pB, ptrdiff_t iRowStB, ptrdiff_t iColStB,
130          size_t nRows, size_t nLink, size_t nCols, bool AddToDest = false, double fFactor = 1.0);
131 
132 // note: both H and S are overwritten. Eigenvectors go into H.
133 void DiagonalizeGen(double *pEw, double *pH, uint ldH, double *pS, uint ldS, uint N);
134 void Diagonalize(double *pEw, double *pH, uint ldH, uint N);
135 // note: pInAndTmp will be overwritten. Output is U: nRows x nSig, Vt : nCols x nSig, nSig = min(nRows,nCols).
136 void ComputeSvd(double *pU, size_t ldU, double *pSigma, double *pVt, size_t ldVt, double *pInAndTmp, size_t ldIn, size_t nRows, size_t nCols);
137 
138 void MxvLame(double *RESTRICT pOut, ptrdiff_t iStO, double const *RESTRICT pMat, ptrdiff_t iRowStM, ptrdiff_t iColStM,
139     double const *RESTRICT pIn, ptrdiff_t iStI, size_t nRows, size_t nLink, bool AddToDest = false, double fFactor = 1.0);
140 
141 
142 inline void Mxv(double *RESTRICT pOut, ptrdiff_t iStO, double const *RESTRICT pMat, ptrdiff_t iRowStM, ptrdiff_t iColStM,
143     double const *RESTRICT pIn, ptrdiff_t iStI, size_t nRows, size_t nLink, bool AddToDest = false, double fFactor = 1.0)
144 {
145     if ( iRowStM == 1 )
146         DGEMV('N', nRows, nLink, fFactor, pMat, iColStM,  pIn,iStI,  AddToDest? 1.0 : 0.0, pOut,iStO);
147     else if ( iColStM == 1 )
148         DGEMV('T', nLink, nRows, fFactor, pMat, iRowStM,  pIn,iStI,  AddToDest? 1.0 : 0.0, pOut,iStO);
149     else
150         MxvLame(pOut, iStO, pMat, iRowStM, iColStM, pIn, iStI, nRows, nLink, AddToDest, fFactor);
151 }
152 
153 
154 template<class FScalar>
Dot(FScalar const * a,FScalar const * b,size_t n)155 inline FScalar Dot( FScalar const *a, FScalar const *b, size_t n )
156 {
157     FScalar
158         r = 0;
159     for ( size_t i = 0; i < n; ++ i )
160         r += a[i] * b[i];
161     return r;
162 }
163 
164 // r += f * x
165 template<class FScalar>
Add(FScalar * RESTRICT r,FScalar const * RESTRICT x,FScalar f,size_t n)166 inline void Add( FScalar * RESTRICT r, FScalar const * RESTRICT x, FScalar f, size_t n )
167 {
168     if ( f != 1.0 ) {
169         for ( size_t i = 0; i < n; ++ i )
170             r[i] += f * x[i];
171     } else {
172         for ( size_t i = 0; i < n; ++ i )
173             r[i] += x[i];
174     }
175 }
176 
177 // r *= f
178 template<class FScalar>
Scale(FScalar * r,FScalar f,size_t n)179 inline void Scale( FScalar *r, FScalar f, size_t n )
180 {
181     for ( size_t i = 0; i < n; ++ i )
182         r[i] *= f;
183 }
184 
185 
186 // r = f * x
187 template<class FScalar>
Move(FScalar * RESTRICT r,FScalar const * RESTRICT x,FScalar f,size_t n)188 inline void Move( FScalar * RESTRICT r, FScalar const * RESTRICT x, FScalar f, size_t n )
189 {
190     for ( size_t i = 0; i < n; ++ i )
191         r[i] = f * x[i];
192 }
193 
194 
195 } // namespace ct
196 
197 #endif // CX_ALGEBRA_H
198 
199 // kate: indent-width 4
200