1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 //
9 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
10 //////////////////////////////////////////////////////////////////////////////////////
11 // -*- C++ -*-
12 /**@file detemrinant.hpp
13 *
14 * Standalone determinant class for determinant miniapps
15 */
16 #ifndef QMCPLUSPLUS_DETERMINANT_MINIAPPS_H
17 #define QMCPLUSPLUS_DETERMINANT_MINIAPPS_H
18 #include "OhmmsPETE/OhmmsMatrix.h"
19 #include "CPU/SIMD/aligned_allocator.hpp"
20 #include "Numerics/DeterminantOperators.h"
21
22 namespace qmcplusplus
23 {
24 /**@{Determinant utilities */
25 /** Inversion of a double matrix after LU factorization*/
getri(int n,double * restrict a,int lda,int * restrict piv,double * restrict work,int & lwork)26 inline void getri(int n, double* restrict a, int lda, int* restrict piv, double* restrict work, int& lwork)
27 {
28 int status;
29 dgetri(n,a,lda,piv,work,lwork,status);
30 }
31
32 /** Inversion of a float matrix after LU factorization*/
getri(int n,float * restrict a,int lda,int * restrict piv,float * restrict work,int & lwork)33 inline void getri(int n, float* restrict a, int lda, int* restrict piv, float* restrict work, int& lwork)
34 {
35 int status;
36 sgetri(n,a,lda,piv,work,lwork,status);
37 }
38
39 /** Inversion of a std::complex<double> matrix after LU factorization*/
getri(int n,std::complex<double> * restrict a,int lda,int * restrict piv,std::complex<double> * restrict work,int & lwork)40 inline void getri(int n, std::complex<double>* restrict a, int lda, int* restrict piv, std::complex<double>* restrict work, int& lwork)
41 {
42 int status;
43 zgetri(n,a,lda,piv,work,lwork,status);
44 }
45
46 /** Inversion of a complex<float> matrix after LU factorization*/
getri(int n,std::complex<float> * restrict a,int lda,int * restrict piv,std::complex<float> * restrict work,int & lwork)47 inline void getri(int n, std::complex<float>* restrict a, int lda, int* restrict piv, std::complex<float>* restrict work, int& lwork)
48 {
49 int status;
50 cgetri(n,a,lda,piv,work,lwork,status);
51 }
52
53 /** query the size of workspace for Xgetri after LU decompisiton */
54 template<class T>
getGetriWorkspace(T * restrict x,int n,int lda,int * restrict pivot)55 inline int getGetriWorkspace(T* restrict x, int n, int lda, int* restrict pivot)
56 {
57 T work;
58 int lwork=-1;
59 getri(n, x, lda, pivot, &work, lwork);
60 lwork=static_cast<int>(work);
61 return lwork;
62 }
63
64 ///inner product
65 template<typename T1, typename T2, typename T3>
inner_product_n(const T1 * restrict a,const T2 * restrict b,int n,T3 res)66 inline T3 inner_product_n(const T1* restrict a, const T2* restrict b, int n, T3 res)
67 {
68 for(int i=0; i<n; ++i) res += a[i]*b[i];
69 return res;
70 }
71
72 /** transpose in to out
73 *
74 * Assume: in[n][lda] and out[n][lda]
75 */
76 template<typename TIN, typename TOUT>
transpose(const TIN * restrict in,TOUT * restrict out,int n,int lda)77 inline void transpose(const TIN* restrict in, TOUT* restrict out, int n, int lda)
78 {
79 for(int i=0; i<n; ++i)
80 for(int j=0; j<n; ++j)
81 out[i*lda+j]=in[i+j*lda];
82 }
83
84 ///used only for debugging or walker move
85 template<class T>
86 void
InvertWithLog(T * restrict x,int n,int lda,T * restrict work,int lwork,int * restrict pivot,std::complex<T> & logdet)87 InvertWithLog(T* restrict x, int n, int lda,
88 T* restrict work, int lwork, int* restrict pivot, std::complex<T>& logdet)
89 {
90 logdet = std::complex<T>(1.0);
91 LUFactorization(n, n, x, lda, pivot);
92 for(int i=0; i<n; i++)
93 {
94 logdet *= (pivot[i] == i+1)?1:-1;
95 logdet *= x[i*lda+i];
96 }
97 getri(n, x, lda, pivot, work, lwork);
98 logdet = std::log(logdet);
99 }
100
101 ///recompute inverse, do not evaluate log|det|
102 template<class T>
103 inline void
InvertOnly(T * restrict x,int n,int lda,T * restrict work,int * restrict pivot,int lwork)104 InvertOnly(T* restrict x, int n, int lda, T* restrict work, int* restrict pivot, int lwork)
105 {
106 LUFactorization(n,n,x,lda,pivot);
107 getri(n, x, lda, pivot, work, lwork);
108 }
109
110 /** update Row as implemented in the full code */
111 template<typename T, typename RT>
112 inline void
inverseRowUpdate(T * restrict pinv,const T * restrict tv,int m,int lda,int rowchanged,RT c_ratio_in)113 inverseRowUpdate(T* restrict pinv, const T* restrict tv, int m, int lda, int rowchanged, RT c_ratio_in)
114 {
115 constexpr T cone(1);
116 constexpr T czero(0);
117 T temp[m], rcopy[m];
118 T c_ratio=cone/c_ratio_in;
119 BLAS::gemv('T', m, m, c_ratio, pinv, m, tv, 1, czero, temp, 1);
120 temp[rowchanged]=cone-c_ratio;
121 copy_n(pinv+m*rowchanged,m,rcopy);
122 BLAS::ger(m,m,-cone,rcopy,1,temp,1,pinv,m);
123 }
124 /**@}*/
125
126 template<typename T, typename INVT=double>
127 struct DiracDet
128 {
129 ///log|det|
130 std::complex<INVT> LogValue;
131 ///current ratio
132 INVT curRatio;
133 ///workspace size
134 int LWork;
135 ///inverse matrix to be update
136 Matrix<T> psiMinv;
137 ///a SPO set for the row update
138 aligned_vector<T> psiV;
139 ///internal storage to perform inversion correctly
140 Matrix<INVT> psiM;//matrix to be inverted
141 ///random number generator for testing
142 RandomGenerator<T> myRandom;
143
144 //temporary workspace for inversion
145 aligned_vector<int> pivot;
146 aligned_vector<INVT> work;
147 Matrix<T> psiMsave;
148
DiracDetqmcplusplus::DiracDet149 explicit DiracDet(int nels)
150 {
151 psiMinv.resize(nels,nels);
152 psiV.resize(nels);
153 psiM.resize(nels,nels);
154
155 pivot.resize(nels);
156 psiMsave.resize(nels,nels);
157 }
158
initializeqmcplusplus::DiracDet159 void initialize(RandomGenerator<T> RNG)
160 {
161 int nels=psiM.rows();
162 //get lwork and resize workspace
163 LWork=getGetriWorkspace(psiM.data(),nels,nels,pivot.data());
164 work.resize(LWork);
165
166 myRandom=RNG;
167 constexpr T shift(0.5);
168 RNG.generate_uniform(psiMsave.data(),nels*nels);
169 psiMsave -= shift;
170
171 transpose(psiMsave.data(),psiM.data(),nels,nels);
172 InvertWithLog(psiM.data(),nels,nels,work.data(),LWork,pivot.data(),LogValue);
173 copy_n(psiM.data(),nels*nels,psiMinv.data());
174
175 if(omp_get_num_threads()==1)
176 {
177 checkIdentity(psiMsave,psiM,"Psi_0 * psiM(double)");
178 checkIdentity(psiMsave,psiMinv,"Psi_0 * psiMinv(T)");
179 checkDiff(psiM,psiMinv,"psiM(double)-psiMinv(T)");
180 }
181 }
182
183 ///recompute the inverse
recomputeqmcplusplus::DiracDet184 inline void recompute()
185 {
186 const int nels=psiV.size();
187 transpose(psiMsave.data(),psiM.data(),nels,nels);
188 InvertOnly(psiM.data(),nels,nels,work.data(),pivot.data(),LWork);
189 copy_n(psiM.data(),nels*nels,psiMinv.data());
190 }
191
192 /** return determinant ratio for the row replacement
193 * @param iel the row (active particle) index
194 */
ratioqmcplusplus::DiracDet195 inline INVT ratio(int iel)
196 {
197 const int nels=psiV.size();
198 constexpr T shift(0.5);
199 constexpr INVT czero(0);
200 for(int j=0; j<nels; ++j) psiV[j]=myRandom()-shift;
201 curRatio=inner_product_n(psiV.data(),psiMinv[iel],nels,czero);
202 return curRatio;
203 }
204
205 /** accept the row and update the inverse */
acceptqmcplusplus::DiracDet206 inline void accept(int iel)
207 {
208 const int nels=psiV.size();
209 inverseRowUpdate(psiMinv.data(),psiV.data(),nels,nels,iel,curRatio);
210 copy_n(psiV.data(),nels,psiMsave[iel]);
211 }
212
debugqmcplusplus::DiracDet213 void debug()
214 {
215 const int nels=psiV.size();
216 constexpr T shift(0.5);
217 constexpr INVT czero(0.0);
218 constexpr INVT eps=10.*numeric_limits<float>::epsilon();
219 double ratio_error=czero;
220 {
221 for(int i=0; i<nels; ++i)
222 {
223 auto r=ratio(i);
224 double ratio_full;
225 if(r>0 && r>myRandom())
226 {
227 accept(i);
228 //update A0
229 copy_n(psiV.data(),nels,psiMsave[i]);
230 transpose(psiMsave.data(),psiM.data(),nels,nels);
231 std::complex<INVT> newlog;
232 InvertWithLog(psiM.data(), nels, nels, work.data(), pivot.data(), newlog);
233
234 ratio_full=std::exp(std::real(newlog-LogValue));
235 LogValue=newlog;
236 double err=r/ratio_full-1;
237 ratio_error += err;
238 #pragma omp master
239 if(std::abs(err)>eps)
240 {
241 cout << i << " accepted: curRatio " << r << " error " << err <<endl;
242 checkDiff(psiMinv,psiM,"update");
243 }
244 }
245 }
246 }
247 #pragma omp master
248 cout << "Cummulative ratio_error " << ratio_error << endl;
249 }
250
251 };
252
253 template<typename MT1, typename MT2>
checkIdentity(const MT1 & a,const MT2 & b,const string & tag)254 void checkIdentity(const MT1& a, const MT2& b, const string& tag)
255 {
256 constexpr double czero(0.0);
257 constexpr double cone(1.0);
258 const int nrows=a.rows();
259 const int ncols=a.cols();
260 double error=czero;
261 for(int i=0; i<nrows; ++i)
262 {
263 for(int j=0; j<nrows; ++j)
264 {
265 double e=inner_product_n(a[i],b[j],ncols,czero);
266 error += (i==j)? std::abs(e-cone):std::abs(e);
267 }
268 }
269 #pragma omp master
270 cout << tag << " Identity Error = " << error/nrows/nrows << endl;
271 }
272
273 template<typename MT1, typename MT2>
checkDiff(const MT1 & a,const MT2 & b,const string & tag)274 void checkDiff(const MT1& a, const MT2& b, const string& tag)
275 {
276 const int nrows=a.rows();
277 const int ncols=a.cols();
278 constexpr double czero(0.0);
279 double error=czero;
280 for(int i=0; i<nrows; ++i)
281 for(int j=0; j<ncols; ++j)
282 error += std::abs(static_cast<double>(a(i,j)-b(i,j)));
283 #pragma omp master
284 cout << tag << " diff Error = " << error/nrows/nrows << endl;
285 }
286
287 }
288 #endif
289