1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 //
9 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
10 //////////////////////////////////////////////////////////////////////////////////////
11 // -*- C++ -*-
12 /**@file detemrinant.hpp
13  *
14  * Standalone determinant class for determinant miniapps
15  */
16 #ifndef QMCPLUSPLUS_DETERMINANT_MINIAPPS_H
17 #define QMCPLUSPLUS_DETERMINANT_MINIAPPS_H
18 #include "OhmmsPETE/OhmmsMatrix.h"
19 #include "CPU/SIMD/aligned_allocator.hpp"
20 #include "Numerics/DeterminantOperators.h"
21 
22 namespace qmcplusplus
23 {
24   /**@{Determinant utilities */
25   /** Inversion of a double matrix after LU factorization*/
getri(int n,double * restrict a,int lda,int * restrict piv,double * restrict work,int & lwork)26   inline void getri(int n, double* restrict a, int lda, int* restrict piv, double* restrict work, int& lwork)
27   {
28     int status;
29     dgetri(n,a,lda,piv,work,lwork,status);
30   }
31 
32   /** Inversion of a float matrix after LU factorization*/
getri(int n,float * restrict a,int lda,int * restrict piv,float * restrict work,int & lwork)33   inline void getri(int n, float* restrict a, int lda, int* restrict piv, float* restrict work, int& lwork)
34   {
35     int status;
36     sgetri(n,a,lda,piv,work,lwork,status);
37   }
38 
39   /** Inversion of a std::complex<double> matrix after LU factorization*/
getri(int n,std::complex<double> * restrict a,int lda,int * restrict piv,std::complex<double> * restrict work,int & lwork)40   inline void getri(int n, std::complex<double>* restrict a, int lda, int* restrict piv, std::complex<double>* restrict work, int& lwork)
41   {
42     int status;
43     zgetri(n,a,lda,piv,work,lwork,status);
44   }
45 
46   /** Inversion of a complex<float> matrix after LU factorization*/
getri(int n,std::complex<float> * restrict a,int lda,int * restrict piv,std::complex<float> * restrict work,int & lwork)47   inline void getri(int n, std::complex<float>* restrict a, int lda, int* restrict piv, std::complex<float>* restrict work, int& lwork)
48   {
49     int status;
50     cgetri(n,a,lda,piv,work,lwork,status);
51   }
52 
53   /** query the size of workspace for Xgetri after LU decompisiton */
54   template<class T>
getGetriWorkspace(T * restrict x,int n,int lda,int * restrict pivot)55     inline int getGetriWorkspace(T* restrict x, int n, int lda, int* restrict pivot)
56     {
57       T work;
58       int lwork=-1;
59       getri(n, x, lda, pivot, &work, lwork);
60       lwork=static_cast<int>(work);
61       return lwork;
62     }
63 
64   ///inner product
65   template<typename T1, typename T2, typename T3>
inner_product_n(const T1 * restrict a,const T2 * restrict b,int n,T3 res)66     inline T3 inner_product_n(const T1* restrict a, const T2* restrict b, int n, T3 res)
67     {
68       for(int i=0; i<n; ++i) res += a[i]*b[i];
69       return res;
70     }
71 
72   /** transpose in to out
73    *
74    * Assume: in[n][lda] and out[n][lda]
75    */
76   template<typename TIN, typename TOUT>
transpose(const TIN * restrict in,TOUT * restrict out,int n,int lda)77     inline void transpose(const TIN* restrict in, TOUT* restrict out, int n, int lda)
78     {
79       for(int i=0; i<n; ++i)
80         for(int j=0; j<n; ++j)
81           out[i*lda+j]=in[i+j*lda];
82     }
83 
84   ///used only for debugging or walker move
85   template<class T>
86     void
InvertWithLog(T * restrict x,int n,int lda,T * restrict work,int lwork,int * restrict pivot,std::complex<T> & logdet)87     InvertWithLog(T* restrict x, int n, int lda,
88         T* restrict work, int lwork, int* restrict pivot, std::complex<T>& logdet)
89     {
90       logdet = std::complex<T>(1.0);
91       LUFactorization(n, n, x, lda, pivot);
92       for(int i=0; i<n; i++)
93       {
94         logdet *= (pivot[i] == i+1)?1:-1;
95         logdet *= x[i*lda+i];
96       }
97       getri(n, x, lda, pivot, work, lwork);
98       logdet = std::log(logdet);
99     }
100 
101   ///recompute inverse, do not evaluate log|det|
102   template<class T>
103     inline void
InvertOnly(T * restrict x,int n,int lda,T * restrict work,int * restrict pivot,int lwork)104     InvertOnly(T* restrict x, int n, int lda, T* restrict work, int* restrict pivot, int lwork)
105     {
106       LUFactorization(n,n,x,lda,pivot);
107       getri(n, x, lda, pivot, work, lwork);
108     }
109 
110   /** update Row as implemented in the full code */
111   template<typename T, typename RT>
112   inline void
inverseRowUpdate(T * restrict pinv,const T * restrict tv,int m,int lda,int rowchanged,RT c_ratio_in)113   inverseRowUpdate(T* restrict pinv,  const T* restrict tv, int m, int lda, int rowchanged, RT c_ratio_in)
114   {
115     constexpr T cone(1);
116     constexpr T czero(0);
117     T temp[m], rcopy[m];
118     T c_ratio=cone/c_ratio_in;
119     BLAS::gemv('T', m, m, c_ratio, pinv, m, tv, 1, czero, temp, 1);
120     temp[rowchanged]=cone-c_ratio;
121     copy_n(pinv+m*rowchanged,m,rcopy);
122     BLAS::ger(m,m,-cone,rcopy,1,temp,1,pinv,m);
123   }
124   /**@}*/
125 
126 template<typename T, typename INVT=double>
127 struct DiracDet
128 {
129   ///log|det|
130   std::complex<INVT> LogValue;
131   ///current ratio
132   INVT curRatio;
133   ///workspace size
134   int LWork;
135   ///inverse matrix to be update
136   Matrix<T> psiMinv;
137   ///a SPO set for the row update
138   aligned_vector<T> psiV;
139   ///internal storage to perform inversion correctly
140   Matrix<INVT> psiM;//matrix to be inverted
141   ///random number generator for testing
142   RandomGenerator<T> myRandom;
143 
144   //temporary workspace for inversion
145   aligned_vector<int> pivot;
146   aligned_vector<INVT> work;
147   Matrix<T> psiMsave;
148 
DiracDetqmcplusplus::DiracDet149   explicit DiracDet(int nels)
150   {
151     psiMinv.resize(nels,nels);
152     psiV.resize(nels);
153     psiM.resize(nels,nels);
154 
155     pivot.resize(nels);
156     psiMsave.resize(nels,nels);
157   }
158 
initializeqmcplusplus::DiracDet159   void initialize(RandomGenerator<T> RNG)
160   {
161     int nels=psiM.rows();
162     //get lwork and resize workspace
163     LWork=getGetriWorkspace(psiM.data(),nels,nels,pivot.data());
164     work.resize(LWork);
165 
166     myRandom=RNG;
167     constexpr T shift(0.5);
168     RNG.generate_uniform(psiMsave.data(),nels*nels);
169     psiMsave -= shift;
170 
171     transpose(psiMsave.data(),psiM.data(),nels,nels);
172     InvertWithLog(psiM.data(),nels,nels,work.data(),LWork,pivot.data(),LogValue);
173     copy_n(psiM.data(),nels*nels,psiMinv.data());
174 
175     if(omp_get_num_threads()==1)
176     {
177       checkIdentity(psiMsave,psiM,"Psi_0 * psiM(double)");
178       checkIdentity(psiMsave,psiMinv,"Psi_0 * psiMinv(T)");
179       checkDiff(psiM,psiMinv,"psiM(double)-psiMinv(T)");
180     }
181   }
182 
183   ///recompute the inverse
recomputeqmcplusplus::DiracDet184   inline void recompute()
185   {
186     const int nels=psiV.size();
187     transpose(psiMsave.data(),psiM.data(),nels,nels);
188     InvertOnly(psiM.data(),nels,nels,work.data(),pivot.data(),LWork);
189     copy_n(psiM.data(),nels*nels,psiMinv.data());
190   }
191 
192   /** return determinant ratio for the row replacement
193    * @param iel the row (active particle) index
194    */
ratioqmcplusplus::DiracDet195   inline INVT ratio(int iel)
196   {
197     const int nels=psiV.size();
198     constexpr T shift(0.5);
199     constexpr INVT czero(0);
200     for(int j=0; j<nels; ++j) psiV[j]=myRandom()-shift;
201     curRatio=inner_product_n(psiV.data(),psiMinv[iel],nels,czero);
202     return curRatio;
203   }
204 
205   /** accept the row and update the inverse */
acceptqmcplusplus::DiracDet206   inline void accept(int iel)
207   {
208     const int nels=psiV.size();
209     inverseRowUpdate(psiMinv.data(),psiV.data(),nels,nels,iel,curRatio);
210     copy_n(psiV.data(),nels,psiMsave[iel]);
211   }
212 
debugqmcplusplus::DiracDet213   void debug()
214   {
215     const int nels=psiV.size();
216     constexpr T shift(0.5);
217     constexpr INVT czero(0.0);
218     constexpr INVT eps=10.*numeric_limits<float>::epsilon();
219     double ratio_error=czero;
220     {
221       for(int i=0; i<nels; ++i)
222       {
223         auto r=ratio(i);
224         double ratio_full;
225         if(r>0 && r>myRandom())
226         {
227           accept(i);
228           //update A0
229           copy_n(psiV.data(),nels,psiMsave[i]);
230           transpose(psiMsave.data(),psiM.data(),nels,nels);
231           std::complex<INVT> newlog;
232           InvertWithLog(psiM.data(), nels, nels, work.data(), pivot.data(), newlog);
233 
234           ratio_full=std::exp(std::real(newlog-LogValue));
235           LogValue=newlog;
236           double err=r/ratio_full-1;
237           ratio_error += err;
238 #pragma omp master
239           if(std::abs(err)>eps)
240           {
241             cout << i << " accepted: curRatio " << r << " error " << err <<endl;
242             checkDiff(psiMinv,psiM,"update");
243           }
244         }
245       }
246     }
247 #pragma omp master
248     cout << "Cummulative ratio_error " << ratio_error << endl;
249   }
250 
251 };
252 
253 template<typename MT1, typename MT2>
checkIdentity(const MT1 & a,const MT2 & b,const string & tag)254 void checkIdentity(const MT1& a, const MT2& b, const string& tag)
255 {
256   constexpr double czero(0.0);
257   constexpr double cone(1.0);
258   const int nrows=a.rows();
259   const int ncols=a.cols();
260   double error=czero;
261   for(int i=0; i<nrows; ++i)
262   {
263     for(int j=0; j<nrows; ++j)
264     {
265       double e=inner_product_n(a[i],b[j],ncols,czero);
266       error += (i==j)? std::abs(e-cone):std::abs(e);
267     }
268   }
269 #pragma omp master
270   cout << tag << " Identity Error = " << error/nrows/nrows << endl;
271 }
272 
273 template<typename MT1, typename MT2>
checkDiff(const MT1 & a,const MT2 & b,const string & tag)274 void checkDiff(const MT1& a, const MT2& b, const string& tag)
275 {
276   const int nrows=a.rows();
277   const int ncols=a.cols();
278   constexpr double czero(0.0);
279   double error=czero;
280   for(int i=0; i<nrows; ++i)
281     for(int j=0; j<ncols; ++j)
282       error += std::abs(static_cast<double>(a(i,j)-b(i,j)));
283 #pragma omp master
284   cout << tag << " diff Error = " << error/nrows/nrows << endl;
285 }
286 
287 }
288 #endif
289