1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University
8 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
9 // Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
10 // Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
11 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
12 //
13 // File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
14 //////////////////////////////////////////////////////////////////////////////////////
15
16
17 #ifndef QMCPLUSPLUS_MATRIXOPERATORS_H
18 #define QMCPLUSPLUS_MATRIXOPERATORS_H
19
20 #include "OhmmsPETE/OhmmsMatrix.h"
21 #include "OhmmsPETE/OhmmsVector.h"
22 #include "OhmmsPETE/TinyVector.h"
23 #include "OhmmsPETE/Tensor.h"
24 #include "CPU/BLAS.hpp"
25 #include "CPU/SIMD/inner_product.hpp"
26
27 namespace qmcplusplus
28 {
29 template<typename T>
TESTDOT(const T * restrict f,const T * restrict l,const T * restrict b)30 inline T TESTDOT(const T* restrict f, const T* restrict l, const T* restrict b)
31 {
32 T s = 0;
33 while (f != l)
34 s += (*f++) * (*b++);
35 return s;
36 }
37
38 namespace MatrixOperators
39 {
40 /** static function to perform C=AB for real matrices
41 *
42 * Call dgemm/zgemm/sgemm/cgemm via BLAS::gemm
43 */
44 template<typename T>
product(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C)45 inline void product(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C)
46 {
47 const char transa = 'N';
48 const char transb = 'N';
49 const T one(1.0);
50 const T zero(0.0);
51 BLAS::gemm(transa, transb, B.cols(), A.rows(), B.rows(), one, B.data(), B.cols(), A.data(), A.cols(), zero, C.data(),
52 C.cols());
53 }
54
55 template<typename T>
product_ABt(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C)56 inline void product_ABt(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C)
57 {
58 const char transa = 't';
59 const char transb = 'n';
60 const T one(1.0);
61 const T zero(0.0);
62 BLAS::gemm(transa, transb, B.rows(), A.rows(), B.cols(), one, B.data(), B.cols(), A.data(), A.cols(), zero, C.data(),
63 C.cols());
64 }
65
66 template<typename T>
product_AtB(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C)67 inline void product_AtB(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C)
68 {
69 const char transa = 'n';
70 const char transb = 't';
71 const T one(1.0);
72 const T zero(0.0);
73 BLAS::gemm(transa, transb, B.cols(), A.cols(), B.rows(), one, B.data(), B.cols(), A.data(), A.cols(), zero, C.data(),
74 C.cols());
75 }
76
77
half_outerProduct(const Matrix<double> & M,const Vector<double> & B,int iat,Matrix<double> & C)78 inline void half_outerProduct(const Matrix<double>& M, const Vector<double>& B, int iat, Matrix<double>& C)
79 {
80 for (int i = 0; i < C.rows(); i++)
81 for (int j = 0; j < C.cols(); j++)
82 C(iat, i) += M(i, j) * B[j];
83 //for (int i=0; i<C.rows(); i++)
84 // C(iat,i)+=simd::dot(M[i],B.data(),C.cols());
85 }
86
other_half_outerProduct(const Matrix<double> & M,const Vector<double> & B,int iat,Matrix<double> & C)87 inline void other_half_outerProduct(const Matrix<double>& M, const Vector<double>& B, int iat, Matrix<double>& C)
88 {
89 for (int i = 0; i < C.rows(); i++)
90 for (int j = 0; j < C.cols(); j++)
91 C(i, iat) += M(i, j) * B[j];
92 //for (int i=0; i<C.rows(); i++)
93 // C(i,iat)+=simd::dot(M[i],B.data(),C.cols());
94 }
95
innerProduct(const Vector<double> & A,const Vector<double> & B)96 inline double innerProduct(const Vector<double>& A, const Vector<double>& B)
97 {
98 double tot = 0.0;
99 for (int i = 0; i < A.size(); i++)
100 tot += A[i] * B[i];
101 return tot;
102 //return simd::dot(A.data(),B.data(),A.size());
103 }
104
105
106 template<typename T>
transpose(Matrix<T> & A)107 inline void transpose(Matrix<T>& A)
108 {
109 for (int i = 0; i < A.extent(0); i++)
110 for (int j = 0; j < i; j++)
111 std::swap(A(i, j), A(j, i));
112 }
113
114 template<typename T>
transpose(const Matrix<T> & A,Matrix<T> & B)115 inline void transpose(const Matrix<T>& A, Matrix<T>& B)
116 {
117 simd::transpose(A.data(), A.rows(), A.cols(), B.data(), B.rows(), B.cols());
118 }
119
120
121 /// C = A*diag(B)
122 template<typename T1, typename T2, typename T3>
diag_product(const Matrix<T1> & A,const Vector<T2> & B,Matrix<T3> & C)123 inline void diag_product(const Matrix<T1>& A, const Vector<T2>& B, Matrix<T3>& C)
124 {
125 for (int i = 0; i < C.rows(); ++i)
126 for (int j = 0; j < C.cols(); ++j)
127 C(i, j) = A(i, j) * B[j];
128 //works?
129 //const int ccols = C.cols();
130 //const int ijmax = C.size();
131 //for (int ij=0; ij<ijmax; ++ij)
132 // C(ij)=A(ij)*B(ij%ccols);
133 }
134
135
136 /// C = diag(A)*B
137 template<typename T1, typename T2, typename T3>
diag_product(const Vector<T1> & A,const Matrix<T2> & B,Matrix<T3> & C)138 inline void diag_product(const Vector<T1>& A, const Matrix<T2>& B, Matrix<T3>& C)
139 {
140 for (int i = 0; i < C.rows(); ++i)
141 for (int j = 0; j < C.cols(); ++j)
142 C(i, j) = A[i] * B(i, j);
143
144
145 //const int crows = C.rows();
146 //const int ccols = C.cols();
147 //for (int i=0,ijb=0; i<crows; ++i,ijb+=ccols)
148 //{
149 // const T1 a = A(i);
150 // for (int j=0; j<ccols; ++j)
151 // {
152 // int ij = ijb+j;
153 // C(ij)=a*B(ij);
154 // }
155 //}
156 //
157 //const int crows = C.rows();
158 //const int ijmax = C.size();
159 //for (int ij=0; ij<ijmax; ++ij)
160 // C(ij)=A(ij%crows)*B(ij);
161 }
162
163
164 /** static function to perform C=AB for complex matrices
165 *
166 * Call zgemm
167 */
product(const Matrix<double> & A,const Matrix<std::complex<double>> & B,Matrix<double> & C)168 inline void product(const Matrix<double>& A, const Matrix<std::complex<double>>& B, Matrix<double>& C)
169 {
170 std::cerr << " Undefined C=AB with real A and complex B " << std::endl;
171 }
172
173 /** static function to perform y=Ax for generic matrix and vector
174 */
175 template<typename T>
product(const Matrix<T> & A,const Vector<T> & x,T * restrict yptr)176 inline void product(const Matrix<T>& A, const Vector<T>& x, T* restrict yptr)
177 {
178 const char transa = 'T';
179 const T one = 1.0;
180 const T zero = 0.0;
181 BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), x.data(), 1, zero, yptr, 1);
182 }
183
184 /** static function to perform y = A^t x for generic matrix and vector
185 */
186 template<typename T>
product_Atx(const Matrix<T> & A,const Vector<T> & x,T * restrict yptr)187 inline void product_Atx(const Matrix<T>& A, const Vector<T>& x, T* restrict yptr)
188 {
189 const char transa = 'N';
190 const T one = 1.0;
191 const T zero = 0.0;
192 BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), x.data(), 1, zero, yptr, 1);
193 }
194
195 /** static function to perform y=Ax for generic matrix and vector
196 */
197 template<typename T>
product(const Matrix<T> & A,const T * restrict xptr,T * restrict yptr)198 inline void product(const Matrix<T>& A, const T* restrict xptr, T* restrict yptr)
199 {
200 const char transa = 'T';
201 const T one = 1.0;
202 const T zero = 0.0;
203 BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), xptr, 1, zero, yptr, 1);
204 }
205
206 /** static function to perform y = A^t x for generic matrix and vector
207 */
208 template<typename T>
product_Atx(const Matrix<T> & A,const T * restrict xptr,T * restrict yptr)209 inline void product_Atx(const Matrix<T>& A, const T* restrict xptr, T* restrict yptr)
210 {
211 const char transa = 'N';
212 const T one = 1.0;
213 const T zero = 0.0;
214 BLAS::gemv(transa, A.cols(), A.rows(), one, A.data(), A.cols(), xptr, 1, zero, yptr, 1);
215 }
216
217 /** static function to perform y=Ax for generic matrix and vector
218 */
219 template<typename T, unsigned D>
product(const Matrix<T> & A,const TinyVector<T,D> * xvPtr,TinyVector<T,D> * restrict yptr)220 inline void product(const Matrix<T>& A, const TinyVector<T, D>* xvPtr, TinyVector<T, D>* restrict yptr)
221 {
222 const T one = 1.0;
223 const T zero = 0.0;
224 const char transa = 'N';
225 const char transb = 'N';
226 BLAS::gemm(transa, transb, D, A.rows(), A.cols(), one, xvPtr->begin(), D, A.data(), A.cols(), zero, yptr->begin(), D);
227 }
228
229 template<typename T, unsigned D>
product(const Matrix<T> & A,const Tensor<T,D> * xvPtr,Tensor<T,D> * restrict yptr)230 inline void product(const Matrix<T>& A, const Tensor<T, D>* xvPtr, Tensor<T, D>* restrict yptr)
231 {
232 const T one = 1.0;
233 const T zero = 0.0;
234 const char transa = 'N';
235 const char transb = 'N';
236 BLAS::gemm(transa, transb, D * D, A.rows(), A.cols(), one, xvPtr->begin(), D * D, A.data(), A.cols(), zero,
237 yptr->begin(), D * D);
238 }
239
240 /** static function to perform y=Ax for generic matrix and vector
241 */
242 template<unsigned D>
product(const Matrix<double> & A,const Vector<TinyVector<double,D>> & x,TinyVector<double,D> * restrict yptr)243 inline void product(const Matrix<double>& A,
244 const Vector<TinyVector<double, D>>& x,
245 TinyVector<double, D>* restrict yptr)
246 {
247 const double one = 1.0;
248 const double zero = 0.0;
249 const char transa = 'N';
250 const char transb = 'N';
251 dgemm(transa, transb, D, A.rows(), x.size(), one, x.data()->begin(), D, A.data(), A.cols(), zero, yptr->begin(), D);
252 }
253
254 /** static function to perform y=Ax for generic matrix and vector
255 */
256 template<unsigned D>
product(const Matrix<std::complex<double>> & A,const Vector<TinyVector<std::complex<double>,D>> & x,TinyVector<std::complex<double>,D> * restrict yptr)257 inline void product(const Matrix<std::complex<double>>& A,
258 const Vector<TinyVector<std::complex<double>, D>>& x,
259 TinyVector<std::complex<double>, D>* restrict yptr)
260 {
261 const char transa = 'N';
262 const char transb = 'N';
263 const std::complex<double> zone(1.0, 0.0);
264 const std::complex<double> zero(0.0, 0.0);
265 zgemm(transa, transb, D, A.rows(), x.size(), zone, x.data()->begin(), D, A.data(), A.cols(), zero, yptr->begin(), D);
266 }
267
268
269 /** static function to perform y=Ax for generic matrix and vector
270 */
product(const Matrix<std::complex<double>> & A,const Vector<std::complex<double>> & x,std::complex<double> * restrict yptr)271 inline void product(const Matrix<std::complex<double>>& A,
272 const Vector<std::complex<double>>& x,
273 std::complex<double>* restrict yptr)
274 {
275 const char transa = 'T';
276 const std::complex<double> zone(1.0, 0.0);
277 const std::complex<double> zero(0.0, 0.0);
278 zgemv(transa, A.cols(), A.rows(), zone, A.data(), A.cols(), x.data(), 1, zero, yptr, 1);
279 }
280
281 /** static function to perform y=Ax for generic matrix and vector
282 */
product(const Matrix<std::complex<double>> & A,const Vector<double> & x,std::complex<double> * restrict yptr)283 inline void product(const Matrix<std::complex<double>>& A, const Vector<double>& x, std::complex<double>* restrict yptr)
284 {
285 const int n = A.rows();
286 const int m = A.cols();
287 const std::complex<double>* restrict aptr = A.data();
288 for (int i = 0, ij = 0; i < n; ++i)
289 {
290 std::complex<double> t = 0.0;
291 for (int j = 0; j < m; ++j, ++ij)
292 t += aptr[ij] * x[j];
293 yptr[i] = t;
294 }
295 }
296
product(const Matrix<std::complex<double>> & A,const std::complex<double> * restrict x,std::complex<double> * restrict yptr)297 inline void product(const Matrix<std::complex<double>>& A,
298 const std::complex<double>* restrict x,
299 std::complex<double>* restrict yptr)
300 {
301 const char transa = 'T';
302 const std::complex<double> zone(1.0, 0.0);
303 const std::complex<double> zero(0.0, 0.0);
304 zgemv(transa, A.cols(), A.rows(), zone, A.data(), A.cols(), x, 1, zero, yptr, 1);
305 }
306
307 /** static function to perform y=Ax for generic matrix and vector
308 */
product(const Matrix<double> & A,const Vector<std::complex<double>> & x,double * restrict yptr)309 inline void product(const Matrix<double>& A, const Vector<std::complex<double>>& x, double* restrict yptr)
310 {
311 std::cerr << " Undefined C=AB with real A and complex x " << std::endl;
312 }
313
314 template<typename T>
product(const Matrix<T> & A,const Matrix<T> & B,Matrix<T> & C,std::vector<int> & offset)315 inline void product(const Matrix<T>& A, const Matrix<T>& B, Matrix<T>& C, std::vector<int>& offset)
316 {
317 int nr = C.rows();
318 int nb = offset.size() - 1;
319 for (int i = 0; i < nr; i++)
320 {
321 for (int b = 0; b < nb; b++)
322 {
323 int firstK = offset[b];
324 int lastK = offset[b + 1];
325 const T* restrict firstY = A[i] + firstK;
326 const T* restrict lastY = A[i] + lastK;
327 for (int k = firstK; k < lastK; k++)
328 {
329 C(i, k) = TESTDOT(firstY, lastY, B[k] + firstK);
330 }
331 }
332 }
333 }
334
335 // template<typename T>
336 // inline statis void product(const Matrix<T>& A, const T* restrict x, T* restrict y)
337 // {
338 // GEMV<T,0>::apply(A.data(),x,y,A.rows(),A.cols());
339 // }
340 } // namespace MatrixOperators
341
342 /** API to handle gemv */
343 namespace simd
344 {
345 template<typename T>
gemv(const Matrix<T> & a,const T * restrict v,T * restrict b)346 inline void gemv(const Matrix<T>& a, const T* restrict v, T* restrict b)
347 {
348 MatrixOperators::product(a, v, b);
349 }
350
351 template<typename T, unsigned D>
gemv(const Matrix<T> & a,const TinyVector<T,D> * restrict v,TinyVector<T,D> * restrict b)352 inline void gemv(const Matrix<T>& a, const TinyVector<T, D>* restrict v, TinyVector<T, D>* restrict b)
353 {
354 MatrixOperators::product(a, v, b);
355 }
356
357 template<typename T, unsigned D>
gemv(const Matrix<T> & a,const Tensor<T,D> * restrict v,Tensor<T,D> * restrict b)358 inline void gemv(const Matrix<T>& a, const Tensor<T, D>* restrict v, Tensor<T, D>* restrict b)
359 {
360 MatrixOperators::product(a, v, b);
361 }
362
363 } // namespace simd
364
365 } // namespace qmcplusplus
366 #endif
367