1 #ifndef QMCPLUSPLUS_AFQMC_SLATERDETOPERATIONS_H
2 #define QMCPLUSPLUS_AFQMC_SLATERDETOPERATIONS_H
3 
4 #include <fstream>
5 
6 #include "AFQMC/config.h"
7 #include "Message/MPIObjectBase.h"
8 #include "Numerics/DeterminantOperators.h"
9 #include "Numerics/MatrixOperators.h"
10 #include "Message/Communicate.h"
11 //#include "Message/CommOperators.h"
12 
13 #include "AFQMC/Hamiltonians/HamiltonianBase.h"
14 #include "AFQMC/Walkers/SlaterDetWalker.h"
15 #include "AFQMC/Numerics/DenseMatrixOperations.h"
16 #include "AFQMC/Numerics/SparseMatrixOperations.h"
17 
18 namespace qmcplusplus
19 {
20 //class SlaterDetOperations: public EstimatorBase
21 class SlaterDetOperations : public MPIObjectBase, public AFQMCInfo
22 {
23 public:
24   typedef HamiltonianBase* HamPtr;
25 
SlaterDetOperations(Communicate * c)26   SlaterDetOperations(Communicate* c) : MPIObjectBase(c), ham(NULL) {}
27 
setup(HamPtr h,myTimer * timer_)28   void setup(HamPtr h, myTimer* timer_)
29   {
30     ham   = h;
31     timer = timer_;
32     GF.resize(2 * NMO, NMO);
33     tGF.resize(2 * NMO, NMO);
34     V0.resize(2 * NMO * NMO);
35     Cwork.resize(2 * NMO);
36     pivot.resize(2 * NMO);
37   }
38 
39   void green_function(ComplexType* A, ComplexType* B, ComplexType& ovlp, SPComplexMatrix& G, bool getG = true)
40   {
41     const ComplexType one  = ComplexType(1.0);
42     const ComplexType zero = ComplexType(0.0);
43 
44     // G = transpose( B * ( transpose(conjg(A)) * B )^-1 * transpose(conjg(A)) )
45     ComplexMatrix S0(NAEA, NAEA);
46     ComplexMatrix S1(NAEB, NAEB);
47     ComplexMatrix SS0(2 * NMO, NAEA);
48 
49 
50     if (getG)
51       tGF = ComplexType(0.0);
52     S0 = ComplexType(0.0);
53     S1 = ComplexType(0.0);
54 
55     // S0 = transpose(conjg(A))*B
56     DenseMatrixOperators::product_AhB(NAEA, NAEA, NMO, one, A, NAEA, B, NAEA, zero, S0.data(), NAEA);
57 
58     // S0 = S0^-1
59     ovlp = Invert(S0.data(), NAEA, NAEA, Cwork.data(), pivot.data());
60 
61     // SS0 = B * S0
62     if (getG)
63       DenseMatrixOperators::product(NMO, NAEA, NAEA, one, B, NAEA, S0.data(), NAEA, zero, SS0.data(), NAEA);
64     // G(beta) = SS0*transpose(conjg(A))
65     if (getG)
66       DenseMatrixOperators::product_ABh(NMO, NMO, NAEA, one, SS0.data(), NAEA, A, NAEA, zero, tGF.data(), NMO);
67 
68     // S1 = transpose(conjg(A))*B
69     DenseMatrixOperators::product_AhB(NAEB, NAEB, NMO, one, A + NMO * NAEA, NAEA, B + NAEA * NMO, NAEA, zero, S1.data(),
70                                       NAEB);
71 
72     // S0 = S0^-1
73     ovlp *= Invert(S1.data(), NAEB, NAEB, Cwork.data(), pivot.data());
74 
75     if (!getG)
76       return;
77 
78     if (std::abs(ovlp) < 1e-6)
79     {
80       G = SPComplexType(0.0);
81       return;
82     }
83 
84     // SS0(beta) = B(beta) * S1
85     DenseMatrixOperators::product(NMO, NAEB, NAEB, one, B + NAEA * NMO, NAEA, S1.data(), NAEB, zero,
86                                   SS0.data() + NAEA * NMO, NAEA);
87 
88     // G(beta) = SS0*transpose(conjg(A))
89     DenseMatrixOperators::product_ABh(NMO, NMO, NAEB, one, SS0.data() + NAEA * NMO, NAEA, A + NAEA * NMO, NAEA, zero,
90                                       tGF.data() + NMO * NMO, NMO);
91 
92     for (int i = 0; i < NMO; i++)
93       for (int j = 0; j < i; j++)
94       {
95         std::swap(tGF(i, j), tGF(j, i));
96         std::swap(tGF(i + NMO, j), tGF(j + NMO, i));
97       }
98     std::copy(tGF.begin(), tGF.end(), G.begin());
99   }
100 
101   void green_function(ComplexMatrix& A, ComplexMatrix& B, ComplexType& ovlp, ComplexMatrix& G, bool getG = true)
102   {
103     const ComplexType one  = ComplexType(1.0);
104     const ComplexType zero = ComplexType(0.0);
105 
106     // G = transpose( B * ( transpose(conjg(A)) * B )^-1 * transpose(conjg(A)) )
107     ComplexMatrix S0(NAEA, NAEA);
108     ComplexMatrix S1(NAEB, NAEB);
109     ComplexMatrix SS0(2 * NMO, NAEA);
110 
111     if (getG)
112       G = ComplexType(0.0);
113     S0 = ComplexType(0.0);
114     S1 = ComplexType(0.0);
115 
116     // S0 = transpose(conjg(A))*B
117     DenseMatrixOperators::product_AhB(NAEA, NAEA, NMO, one, A.data(), NAEA, B.data(), NAEA, zero, S0.data(), NAEA);
118 
119     // S0 = S0^-1
120     ovlp = Invert(S0.data(), NAEA, NAEA, Cwork.data(), pivot.data());
121 
122     // SS0 = B * S0
123     if (getG)
124       DenseMatrixOperators::product(NMO, NAEA, NAEA, one, B.data(), NAEA, S0.data(), NAEA, zero, SS0.data(), NAEA);
125     // G(beta) = SS0*transpose(conjg(A))
126     if (getG)
127       DenseMatrixOperators::product_ABh(NMO, NMO, NAEA, one, SS0.data(), NAEA, A.data(), NAEA, zero, G.data(), NMO);
128 
129     // S1 = transpose(conjg(A))*B
130     DenseMatrixOperators::product_AhB(NAEB, NAEB, NMO, one, A.data() + NMO * NAEA, NAEA, B.data() + NAEA * NMO, NAEA,
131                                       zero, S1.data(), NAEB);
132 
133     // S0 = S0^-1
134     ovlp *= Invert(S1.data(), NAEB, NAEB, Cwork.data(), pivot.data());
135 
136     if (!getG)
137       return;
138 
139     if (std::abs(ovlp) < 1e-6)
140     {
141       G = ComplexType(0.0);
142       return;
143     }
144 
145     // SS0(beta) = B(beta) * S1
146     DenseMatrixOperators::product(NMO, NAEB, NAEB, one, B.data() + NAEA * NMO, NAEA, S1.data(), NAEB, zero,
147                                   SS0.data() + NAEA * NMO, NAEA);
148 
149     // G(beta) = SS0*transpose(conjg(A))
150     DenseMatrixOperators::product_ABh(NMO, NMO, NAEB, one, SS0.data() + NAEA * NMO, NAEA, A.data() + NAEA * NMO, NAEA,
151                                       zero, G.data() + NMO * NMO, NMO);
152 
153     for (int i = 0; i < NMO; i++)
154       for (int j = 0; j < i; j++)
155       {
156         std::swap(G(i, j), G(j, i));
157         std::swap(G(i + NMO, j), G(j + NMO, i));
158       }
159   }
160 
matrix_element_and_overlap(ComplexType * A,ComplexType * B,ComplexType & ovlp,ComplexType & hamME)161   void matrix_element_and_overlap(ComplexType* A, ComplexType* B, ComplexType& ovlp, ComplexType& hamME)
162   {
163     green_function(A, B, ovlp, GF);
164 
165     if (std::abs(ovlp) < 1e-6)
166     {
167       ovlp  = ComplexType(0.0);
168       hamME = ComplexType(0.0);
169       return;
170     }
171 
172     SPValueSMSpMat* V;
173     std::vector<s1D<ValueType>>* h;
174     int nr1 = 1, nc1 = 2 * NMO * NMO;
175     SPValueType one  = SPValueType(1.0);
176     SPValueType zero = SPValueType(0.0);
177 
178     ham->getFullHam(h, V);
179 
180     hamME = ComplexType(0.0);
181 
182     SparseMatrixOperators::product_SpMatV(nc1, nc1, one, V->values(), V->column_data(), V->row_index(), GF.data(), zero,
183                                           V0.data());
184 
185     SPComplexMatrix::iterator itG = GF.begin();
186     SPComplexVector::iterator itV = V0.begin();
187     for (int i = 0; i < nc1; i++, ++itG, ++itV)
188       hamME += static_cast<ComplexType>(*itV) * static_cast<ComplexType>(*itG);
189     hamME = 0.5 * hamME + ham->NuclearCoulombEnergy;
190 
191     std::vector<s1D<ValueType>>::iterator end1 = h->end();
192     itG                                        = GF.begin();
193     for (std::vector<s1D<ValueType>>::iterator it = h->begin(); it != end1; it++)
194       hamME += static_cast<ComplexType>(*(itG + std::get<0>(*it))) * std::get<1>(*it);
195 
196     hamME *= ovlp;
197   }
198 
199   void diag(std::vector<SlaterDetWalker>::iterator itbegin,
200             std::vector<SlaterDetWalker>::iterator itend,
201             int nstates,
202             std::vector<RealType>& eigVal,
203             ComplexMatrix& eigVec,
204             ComplexType& exactEnergy,
205             ComplexMatrix* HF,
206             bool getEigV = false)
207   {
208     if (myComm->size() > 1)
209       APP_ABORT(" ERROR: Estimators::SlaterDetOperations::diag(): Only implemented in serial. \n");
210 
211     int N = 0;
212     for (std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1 != itend; it1++)
213       if (it1->alive && std::abs(it1->weight) > 1e-3)
214         N++;
215     if (N == 0)
216       return;
217     if (HF != NULL)
218       N++;
219     nstates = std::min(nstates, N);
220     ComplexMatrix H(N), S(N);
221     exactEnergy      = ComplexType(0.0);
222     ComplexType nume = ComplexType(0.0);
223     ComplexType deno = ComplexType(0.0);
224 #ifdef AFQMC_TIMER
225     timer->start("SlaterDetOperations::diag::evaluate_H_S");
226 #endif
227     int i = 0;
228     if (HF != NULL)
229     { // always include HF state in list
230       int j = i;
231       matrix_element_and_overlap(HF->data(), HF->data(), S(i, j), H(i, j));
232       j++;
233       for (std::vector<SlaterDetWalker>::iterator it2 = itbegin; it2 != itend; it2++)
234       {
235         if (!(it2->alive && std::abs(it2->weight) > 1e-3))
236           continue;
237         matrix_element_and_overlap(HF->data(), (it2->SlaterMat).data(), S(i, j), H(i, j));
238         if (i != j)
239         {
240           H(j, i) = ma::conj(H(i, j));
241           S(j, i) = ma::conj(S(i, j));
242         }
243         j++;
244       }
245       i++;
246     }
247     for (std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1 != itend; it1++)
248     {
249       if (!(it1->alive && std::abs(it1->weight) > 1e-3))
250         continue;
251       int j = i;
252       for (std::vector<SlaterDetWalker>::iterator it2 = it1; it2 != itend; it2++)
253       {
254         if (!(it2->alive && std::abs(it2->weight) > 1e-3))
255           continue;
256         matrix_element_and_overlap((it1->SlaterMat).data(), (it2->SlaterMat).data(), S(i, j), H(i, j));
257         nume += it1->weight * it2->weight * H(i, j);
258         deno += it1->weight * it2->weight * S(i, j);
259         if (i != j)
260         {
261           H(j, i) = ma::conj(H(i, j));
262           S(j, i) = ma::conj(S(i, j));
263           nume += ma::conj(it1->weight) * it2->weight * H(j, i) /
264               (ma::conj(std::get<0>(it1->overlap_alpha) * std::get<0>(it1->overlap_beta)) *
265                std::get<0>(it2->overlap_alpha) * std::get<0>(it2->overlap_beta));
266           deno += ma::conj(it1->weight) * it2->weight * S(j, i) /
267               (ma::conj(std::get<0>(it1->overlap_alpha) * std::get<0>(it1->overlap_beta)) *
268                std::get<0>(it2->overlap_alpha) * std::get<0>(it2->overlap_beta));
269         }
270         j++;
271       }
272       i++;
273     }
274     exactEnergy = nume / deno;
275 #ifdef AFQMC_TIMER
276     timer->stop("SlaterDetOperations::diag::evaluate_H_S");
277 #endif
278 #ifdef AFQMC_TIMER
279     timer->start("SlaterDetOperations::diag::solve_GEV");
280 #endif
281     if (nstates > 0)
282     {
283       std::vector<int> ifail(N);
284       eigVal.resize(nstates);
285       getEigV = true;
286       if (getEigV)
287         eigVec.resize(nstates, N);
288       /*
289 std::ofstream out1("Hr.dat");
290 std::ofstream out2("Hc.dat");
291 std::ofstream out3("Sr.dat");
292 std::ofstream out4("Sc.dat");
293 for(int i=0; i<N; i++) {
294 for(int j=0; j<N; j++) out1<<H(i,j).real() <<" ";
295 for(int j=0; j<N; j++) out2<<H(i,j).imag() <<" ";
296 out1<<std::endl;
297 out2<<std::endl;
298 }
299 for(int i=0; i<N; i++) {
300 for(int j=0; j<N; j++) out3<<S(i,j).real() <<" ";
301 for(int j=0; j<N; j++) out4<<S(i,j).imag() <<" ";
302 out3<<std::endl;
303 out4<<std::endl;
304 }
305 out1.close();
306 out2.close();
307 out3.close();
308 out4.close();
309 APP_ABORT("Testing. \n");
310 */
311       bool success =
312           DenseMatrixOperators::genHermitianEigenSysSelect(N, H.data(), N, S.data(), N, nstates, eigVal.data(), getEigV,
313                                                            eigVec.data(), eigVec.size2(), ifail.data());
314       if (!success)
315         for (int i = 0; i < nstates; i++)
316           eigVal[i] = 0.0;
317       else
318       {
319         std::ofstream out("diag.dat", std::ios_base::app | std::ios_base::out);
320         std::vector<double> coeff(N);
321         for (int i = 0; i < N; i++)
322           coeff[i] = std::abs(eigVec(1, i));
323         std::sort(coeff.begin(), coeff.end());
324         for (int i = 0; i < N; i++)
325           out << coeff[i] << " ";
326         out << std::endl;
327         out.close();
328       }
329     }
330 #ifdef AFQMC_TIMER
331     timer->stop("SlaterDetOperations::diag::solve_GEV");
332 #endif
333   }
334 
overlap(std::vector<SlaterDetWalker>::iterator itbegin,std::vector<SlaterDetWalker>::iterator itend)335   ComplexType overlap(std::vector<SlaterDetWalker>::iterator itbegin, std::vector<SlaterDetWalker>::iterator itend)
336   {
337     std::vector<ComplexType> ovlp(2, ComplexType(0.0));
338     ComplexType sum_w = ComplexType(0.0);
339     ComplexMatrix A(2 * NMO, NAEA);
340     ComplexMatrix B(2 * NMO, NAEA);
341     ComplexMatrix G(1);
342     std::vector<char> buffer_in;
343     std::vector<char> buffer_out;
344     std::vector<int> to(1);
345     std::vector<int> from(myComm->size());
346     int sz    = itbegin->sizeForDump();
347     int nWtot = 0, nW = 0, nWmax = 0;
348     for (std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1 != itend; it1++)
349       if (it1->alive)
350         nW++;
351 
352     to[0] = nW;
353     myComm->allgather(to, from, 1);
354     for (int i = 0; i < myComm->size(); i++)
355     {
356       nWtot += from[i];
357       if (from[i] > nWmax)
358         nWmax = from[i];
359     }
360 
361     buffer_out.resize(nW * sz);
362 
363     int cnt = 0;
364     for (std::vector<SlaterDetWalker>::iterator it = itbegin; it != itend; it++)
365       if (it->alive)
366       {
367         it->dumpToChar(buffer_out.data() + cnt);
368         cnt += sz;
369       }
370 
371     ovlp[0] = ovlp[1] = ComplexType(0.0);
372     ComplexType w1, w2, o1, o2, e1, e2, ov;
373     // diagonal contribution
374     for (int i = 0; i < nW; i++)
375     {
376       itbegin->unpackFromChar(buffer_out.data() + sz * i, A, w1, e1, o1);
377       ovlp[0] += w1;
378       for (int j = i; j < nW; j++)
379       {
380         itbegin->unpackFromChar(buffer_out.data() + j * sz, B, w2, e2, o2);
381         green_function(A, B, ov, G, false);
382         ovlp[1] +=
383             ma::conj(w1) * w2 * ov / (ma::conj(o1) * o2) + ma::conj(w2) * w1 * ma::conj(ov) / (ma::conj(o2) * o1);
384       }
385     }
386 
387     if (myComm->size() == 1)
388       return ovlp[0] / std::sqrt(std::abs(ovlp[1]));
389     buffer_in.resize(nWmax * sz);
390     int rec  = (myComm->rank() + 1) % (myComm->size());
391     int send = (myComm->rank() - 1) % (myComm->size());
392     for (int i = 0; i < myComm->size() - 1; i++)
393     {
394       //        myComm->isend(send, send*myComm->size()+myComm->rank() ,buffer_out);
395       //        myComm->irecv(rec, myComm->rank()*myComm->size()+rec ,buffer_in);
396 
397       // dump way to avoid double counting, but efficiency depends heavily on load balance
398       if (rec < myComm->rank())
399       {
400         // I only do the top half
401         for (int i = 0; i < from[rec] / 2; i++)
402         {
403           itbegin->unpackFromChar(buffer_in.data() + sz * i, A, w1, e1, o1);
404           for (int j = 0; j < nW; j++)
405           {
406             itbegin->unpackFromChar(buffer_out.data() + j * sz, B, w2, e2, o2);
407             green_function(A, B, ov, G, false);
408             ovlp[1] +=
409                 ma::conj(w1) * w2 * ov / (ma::conj(o1) * o2) + ma::conj(w2) * w1 * ma::conj(ov) / (ma::conj(o2) * o1);
410           }
411         }
412       }
413       else
414       {
415         // I only do the bottom half
416         for (int i = nW / 2; i < nW; i++)
417         {
418           itbegin->unpackFromChar(buffer_out.data() + sz * i, A, w1, e1, o1);
419           for (int j = 0; j < from[rec]; j++)
420           {
421             itbegin->unpackFromChar(buffer_in.data() + j * sz, B, w2, e2, o2);
422             green_function(A, B, ov, G, false);
423             ovlp[1] +=
424                 ma::conj(w1) * w2 * ov / (ma::conj(o1) * o2) + ma::conj(w2) * w1 * ma::conj(ov) / (ma::conj(o2) * o1);
425           }
426         }
427       }
428 
429       rec  = (rec + 1) % (myComm->size());
430       send = (send - 1) % (myComm->size());
431     }
432 
433     std::vector<ComplexType> res(2);
434     //myComm->gsum(ovlp);
435     return res[0] / std::sqrt(std::abs(res[1]));
436   }
437 
438 private:
439   std::vector<ComplexType> Cwork;
440   std::vector<int> pivot;
441 
442   HamPtr ham;
443   ComplexMatrix tGF;
444   SPComplexMatrix GF;
445   SPComplexVector V0;
446   myTimer* timer;
447 };
448 
449 } // namespace qmcplusplus
450 
451 #endif
452