1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University
8 //                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
9 //                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
10 //                    Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
11 //                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
12 //
13 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
14 //////////////////////////////////////////////////////////////////////////////////////
15 
16 
17 #ifndef QMCPLUSPLUS_MATRIXOPERATORS_H
18 #define QMCPLUSPLUS_MATRIXOPERATORS_H
19 
20 #include "OhmmsPETE/OhmmsMatrix.h"
21 #include "OhmmsPETE/OhmmsVector.h"
22 #include "OhmmsPETE/TinyVector.h"
23 #include "OhmmsPETE/Tensor.h"
24 #include "CPU/BLAS.hpp"
25 #include "CPU/SIMD/inner_product.hpp"
26 
27 namespace qmcplusplus
28 {
29 template<typename T>
TESTDOT(const T * restrict f,const T * restrict l,const T * restrict b)30 inline T TESTDOT(const T* restrict f, const T* restrict l, const T* restrict b)
31 {
32   T s = 0;
33   while (f != l)
34     s += (*f++) * (*b++);
35   return s;
36 }
37 
38 namespace MatrixOperators
39 {
40 /** static function to perform C=AB for real matrices
41    *
42    * Call dgemm/zgemm/sgemm/cgemm via BLAS::gemm
43    */
44 template<typename T>
product(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C)45 inline void product(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C)
46 {
47   const char transa = 'N';
48   const char transb = 'N';
49   const T one(1.0);
50   const T zero(0.0);
51   BLAS::gemm(transa, transb, B.cols(), A.rows(), B.rows(), one, B.data(), B.cols(), A.data(), A.cols(), zero, C.data(),
52              C.cols());
53 }
54 
55 template<typename T>
product_ABt(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C)56 inline void product_ABt(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C)
57 {
58   const char transa = 't';
59   const char transb = 'n';
60   const T one(1.0);
61   const T zero(0.0);
62   BLAS::gemm(transa, transb, B.rows(), A.rows(), B.cols(), one, B.data(), B.cols(), A.data(), A.cols(), zero, C.data(),
63              C.cols());
64 }
65 
66 template<typename T>
product_AtB(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C)67 inline void product_AtB(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C)
68 {
69   const char transa = 'n';
70   const char transb = 't';
71   const T one(1.0);
72   const T zero(0.0);
73   BLAS::gemm(transa, transb, B.cols(), A.cols(), B.rows(), one, B.data(), B.cols(), A.data(), A.cols(), zero, C.data(),
74              C.cols());
75 }
76 
77 
half_outerProduct(const Matrix<double> & M,const Vector<double> & B,int iat,Matrix<double> & C)78 inline void half_outerProduct(const Matrix<double>& M, const Vector<double>& B, int iat, Matrix<double>& C)
79 {
80   for (int i = 0; i < C.rows(); i++)
81     for (int j = 0; j < C.cols(); j++)
82       C(iat, i) += M(i, j) * B[j];
83   //for (int i=0; i<C.rows(); i++)
84   //  C(iat,i)+=simd::dot(M[i],B.data(),C.cols());
85 }
86 
other_half_outerProduct(const Matrix<double> & M,const Vector<double> & B,int iat,Matrix<double> & C)87 inline void other_half_outerProduct(const Matrix<double>& M, const Vector<double>& B, int iat, Matrix<double>& C)
88 {
89   for (int i = 0; i < C.rows(); i++)
90     for (int j = 0; j < C.cols(); j++)
91       C(i, iat) += M(i, j) * B[j];
92   //for (int i=0; i<C.rows(); i++)
93   //  C(i,iat)+=simd::dot(M[i],B.data(),C.cols());
94 }
95 
innerProduct(const Vector<double> & A,const Vector<double> & B)96 inline double innerProduct(const Vector<double>& A, const Vector<double>& B)
97 {
98   double tot = 0.0;
99   for (int i = 0; i < A.size(); i++)
100     tot += A[i] * B[i];
101   return tot;
102   //return simd::dot(A.data(),B.data(),A.size());
103 }
104 
105 
106 template<typename T>
transpose(Matrix<T> & A)107 inline void transpose(Matrix<T>& A)
108 {
109   for (int i = 0; i < A.extent(0); i++)
110     for (int j = 0; j < i; j++)
111       std::swap(A(i, j), A(j, i));
112 }
113 
114 template<typename T>
transpose(const Matrix<T> & A,Matrix<T> & B)115 inline void transpose(const Matrix<T>& A, Matrix<T>& B)
116 {
117   simd::transpose(A.data(), A.rows(), A.cols(), B.data(), B.rows(), B.cols());
118 }
119 
120 
121 /// C = A*diag(B)
122 template<typename T1, typename T2, typename T3>
diag_product(const Matrix<T1> & A,const Vector<T2> & B,Matrix<T3> & C)123 inline void diag_product(const Matrix<T1>& A, const Vector<T2>& B, Matrix<T3>& C)
124 {
125   for (int i = 0; i < C.rows(); ++i)
126     for (int j = 0; j < C.cols(); ++j)
127       C(i, j) = A(i, j) * B[j];
128   //works?
129   //const int ccols = C.cols();
130   //const int ijmax = C.size();
131   //for (int ij=0; ij<ijmax; ++ij)
132   //  C(ij)=A(ij)*B(ij%ccols);
133 }
134 
135 
136 /// C = diag(A)*B
137 template<typename T1, typename T2, typename T3>
diag_product(const Vector<T1> & A,const Matrix<T2> & B,Matrix<T3> & C)138 inline void diag_product(const Vector<T1>& A, const Matrix<T2>& B, Matrix<T3>& C)
139 {
140   for (int i = 0; i < C.rows(); ++i)
141     for (int j = 0; j < C.cols(); ++j)
142       C(i, j) = A[i] * B(i, j);
143 
144 
145   //const int crows = C.rows();
146   //const int ccols = C.cols();
147   //for (int i=0,ijb=0; i<crows; ++i,ijb+=ccols)
148   //{
149   //  const T1 a = A(i);
150   //  for (int j=0; j<ccols; ++j)
151   //  {
152   //    int ij = ijb+j;
153   //    C(ij)=a*B(ij);
154   //  }
155   //}
156   //
157   //const int crows = C.rows();
158   //const int ijmax = C.size();
159   //for (int ij=0; ij<ijmax; ++ij)
160   //  C(ij)=A(ij%crows)*B(ij);
161 }
162 
163 
164 /** static function to perform C=AB for complex matrices
165    *
166    * Call zgemm
167    */
product(const Matrix<double> & A,const Matrix<std::complex<double>> & B,Matrix<double> & C)168 inline void product(const Matrix<double>& A, const Matrix<std::complex<double>>& B, Matrix<double>& C)
169 {
170   std::cerr << " Undefined C=AB with real A and complex B " << std::endl;
171 }
172 
173 /** static function to perform y=Ax for generic matrix and vector
174    */
175 template<typename T>
product(const Matrix<T> & A,const Vector<T> & x,T * restrict yptr)176 inline void product(const Matrix<T>& A, const Vector<T>& x, T* restrict yptr)
177 {
178   const char transa = 'T';
179   const T one       = 1.0;
180   const T zero      = 0.0;
181   BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), x.data(), 1, zero, yptr, 1);
182 }
183 
184 /** static function to perform y = A^t x for generic matrix and vector
185    */
186 template<typename T>
product_Atx(const Matrix<T> & A,const Vector<T> & x,T * restrict yptr)187 inline void product_Atx(const Matrix<T>& A, const Vector<T>& x, T* restrict yptr)
188 {
189   const char transa = 'N';
190   const T one       = 1.0;
191   const T zero      = 0.0;
192   BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), x.data(), 1, zero, yptr, 1);
193 }
194 
195 /** static function to perform y=Ax for generic matrix and vector
196    */
197 template<typename T>
product(const Matrix<T> & A,const T * restrict xptr,T * restrict yptr)198 inline void product(const Matrix<T>& A, const T* restrict xptr, T* restrict yptr)
199 {
200   const char transa = 'T';
201   const T one       = 1.0;
202   const T zero      = 0.0;
203   BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), xptr, 1, zero, yptr, 1);
204 }
205 
206 /** static function to perform y = A^t x for generic matrix and vector
207    */
208 template<typename T>
product_Atx(const Matrix<T> & A,const T * restrict xptr,T * restrict yptr)209 inline void product_Atx(const Matrix<T>& A, const T* restrict xptr, T* restrict yptr)
210 {
211   const char transa = 'N';
212   const T one       = 1.0;
213   const T zero      = 0.0;
214   BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), xptr, 1, zero, yptr, 1);
215 }
216 
217 /** static function to perform y=Ax for generic matrix and vector
218    */
219 template<typename T, unsigned D>
product(const Matrix<T> & A,const TinyVector<T,D> * xvPtr,TinyVector<T,D> * restrict yptr)220 inline void product(const Matrix<T>& A, const TinyVector<T, D>* xvPtr, TinyVector<T, D>* restrict yptr)
221 {
222   const T one       = 1.0;
223   const T zero      = 0.0;
224   const char transa = 'N';
225   const char transb = 'N';
226   BLAS::gemm(transa, transb, D, A.rows(), A.cols(), one, xvPtr->begin(), D, A.data(), A.cols(), zero, yptr->begin(), D);
227 }
228 
229 template<typename T, unsigned D>
product(const Matrix<T> & A,const Tensor<T,D> * xvPtr,Tensor<T,D> * restrict yptr)230 inline void product(const Matrix<T>& A, const Tensor<T, D>* xvPtr, Tensor<T, D>* restrict yptr)
231 {
232   const T one       = 1.0;
233   const T zero      = 0.0;
234   const char transa = 'N';
235   const char transb = 'N';
236   BLAS::gemm(transa, transb, D * D, A.rows(), A.cols(), one, xvPtr->begin(), D * D, A.data(), A.cols(), zero,
237              yptr->begin(), D * D);
238 }
239 
240 /** static function to perform y=Ax for generic matrix and vector
241    */
242 template<unsigned D>
product(const Matrix<double> & A,const Vector<TinyVector<double,D>> & x,TinyVector<double,D> * restrict yptr)243 inline void product(const Matrix<double>& A,
244                     const Vector<TinyVector<double, D>>& x,
245                     TinyVector<double, D>* restrict yptr)
246 {
247   const double one  = 1.0;
248   const double zero = 0.0;
249   const char transa = 'N';
250   const char transb = 'N';
251   dgemm(transa, transb, D, A.rows(), x.size(), one, x.data()->begin(), D, A.data(), A.cols(), zero, yptr->begin(), D);
252 }
253 
254 /** static function to perform y=Ax for generic matrix and vector
255    */
256 template<unsigned D>
product(const Matrix<std::complex<double>> & A,const Vector<TinyVector<std::complex<double>,D>> & x,TinyVector<std::complex<double>,D> * restrict yptr)257 inline void product(const Matrix<std::complex<double>>& A,
258                     const Vector<TinyVector<std::complex<double>, D>>& x,
259                     TinyVector<std::complex<double>, D>* restrict yptr)
260 {
261   const char transa = 'N';
262   const char transb = 'N';
263   const std::complex<double> zone(1.0, 0.0);
264   const std::complex<double> zero(0.0, 0.0);
265   zgemm(transa, transb, D, A.rows(), x.size(), zone, x.data()->begin(), D, A.data(), A.cols(), zero, yptr->begin(), D);
266 }
267 
268 
269 /** static function to perform y=Ax for generic matrix and vector
270    */
product(const Matrix<std::complex<double>> & A,const Vector<std::complex<double>> & x,std::complex<double> * restrict yptr)271 inline void product(const Matrix<std::complex<double>>& A,
272                     const Vector<std::complex<double>>& x,
273                     std::complex<double>* restrict yptr)
274 {
275   const char transa = 'T';
276   const std::complex<double> zone(1.0, 0.0);
277   const std::complex<double> zero(0.0, 0.0);
278   zgemv(transa, A.cols(), A.rows(), zone, A.data(), A.cols(), x.data(), 1, zero, yptr, 1);
279 }
280 
281 /** static function to perform y=Ax for generic matrix and vector
282    */
product(const Matrix<std::complex<double>> & A,const Vector<double> & x,std::complex<double> * restrict yptr)283 inline void product(const Matrix<std::complex<double>>& A, const Vector<double>& x, std::complex<double>* restrict yptr)
284 {
285   const int n                               = A.rows();
286   const int m                               = A.cols();
287   const std::complex<double>* restrict aptr = A.data();
288   for (int i = 0, ij = 0; i < n; ++i)
289   {
290     std::complex<double> t = 0.0;
291     for (int j = 0; j < m; ++j, ++ij)
292       t += aptr[ij] * x[j];
293     yptr[i] = t;
294   }
295 }
296 
product(const Matrix<std::complex<double>> & A,const std::complex<double> * restrict x,std::complex<double> * restrict yptr)297 inline void product(const Matrix<std::complex<double>>& A,
298                     const std::complex<double>* restrict x,
299                     std::complex<double>* restrict yptr)
300 {
301   const char transa = 'T';
302   const std::complex<double> zone(1.0, 0.0);
303   const std::complex<double> zero(0.0, 0.0);
304   zgemv(transa, A.cols(), A.rows(), zone, A.data(), A.cols(), x, 1, zero, yptr, 1);
305 }
306 
307 /** static function to perform y=Ax for generic matrix and vector
308    */
product(const Matrix<double> & A,const Vector<std::complex<double>> & x,double * restrict yptr)309 inline void product(const Matrix<double>& A, const Vector<std::complex<double>>& x, double* restrict yptr)
310 {
311   std::cerr << " Undefined C=AB with real A and complex x " << std::endl;
312 }
313 
314 template<typename T>
product(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C,std::vector<int> & offset)315 inline void product(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C, std::vector<int>& offset)
316 {
317   int nr = C.rows();
318   int nb = offset.size() - 1;
319   for (int i = 0; i < nr; i++)
320   {
321     for (int b = 0; b < nb; b++)
322     {
323       int firstK               = offset[b];
324       int lastK                = offset[b + 1];
325       const T* restrict firstY = A[i] + firstK;
326       const T* restrict lastY  = A[i] + lastK;
327       for (int k = firstK; k < lastK; k++)
328       {
329         C(i, k) = TESTDOT(firstY, lastY, B[k] + firstK);
330       }
331     }
332   }
333 }
334 
335 //    template<typename T>
336 //      inline statis void product(const Matrix<T>& A, const T* restrict x, T* restrict y)
337 //      {
338 //        GEMV<T,0>::apply(A.data(),x,y,A.rows(),A.cols());
339 //      }
340 } // namespace MatrixOperators
341 
342 /** API to handle gemv */
343 namespace simd
344 {
345 template<typename T>
gemv(const Matrix<T> & a,const T * restrict v,T * restrict b)346 inline void gemv(const Matrix<T>& a, const T* restrict v, T* restrict b)
347 {
348   MatrixOperators::product(a, v, b);
349 }
350 
351 template<typename T, unsigned D>
gemv(const Matrix<T> & a,const TinyVector<T,D> * restrict v,TinyVector<T,D> * restrict b)352 inline void gemv(const Matrix<T>& a, const TinyVector<T, D>* restrict v, TinyVector<T, D>* restrict b)
353 {
354   MatrixOperators::product(a, v, b);
355 }
356 
357 template<typename T, unsigned D>
gemv(const Matrix<T> & a,const Tensor<T,D> * restrict v,Tensor<T,D> * restrict b)358 inline void gemv(const Matrix<T>& a, const Tensor<T, D>* restrict v, Tensor<T, D>* restrict b)
359 {
360   MatrixOperators::product(a, v, b);
361 }
362 
363 } // namespace simd
364 
365 } // namespace qmcplusplus
366 #endif
367