1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
8 //
9 // File created by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 #ifndef QMCPLUSPLUS_AFQMC_FULLOBSHANDLER_HPP
13 #define QMCPLUSPLUS_AFQMC_FULLOBSHANDLER_HPP
14 
15 #include <vector>
16 #include <string>
17 #include <iostream>
18 
19 #include "hdf/hdf_multi.h"
20 #include "hdf/hdf_archive.h"
21 #include "OhmmsData/libxmldefs.h"
22 
23 #include "AFQMC/Estimators/Observables/Observable.hpp"
24 #include "AFQMC/config.h"
25 #include "AFQMC/Utilities/type_conversion.hpp"
26 #include "AFQMC/Numerics/ma_operations.hpp"
27 #include "AFQMC/Wavefunctions/Wavefunction.hpp"
28 #include "AFQMC/Walkers/WalkerSet.hpp"
29 #include "AFQMC/Memory/buffer_managers.h"
30 
31 namespace qmcplusplus
32 {
33 namespace afqmc
34 {
35 /*
36  * This class manages a list of "full" observables.
37  * Full observables are those that have a walker dependent left-hand side,
38  * which result from back propagation.
39  * This implementation of the class assumes a multi-determinant trial wavefunction,
40  * resulting in the loop over "references" (each determinant in the trial wavefunciton
41  * being back-propagated).
42  * Given a walker set and an array of (back propagated) slater matrices,
43  * this routine will calculate and accumulate all requested observables.
44  * To make the implementation of the BackPropagated class cleaner,
45  * this class also handles all the hdf5 I/O (given a hdf archive).
46  */
47 class FullObsHandler : public AFQMCInfo
48 {
49   // allocators
50   using sharedAllocator = localTG_allocator<ComplexType>;
51 
52   using shared_pointer       = typename sharedAllocator::pointer;
53   using const_shared_pointer = typename sharedAllocator::const_pointer;
54 
55   using devCMatrix_ptr = boost::multi::array_ptr<ComplexType, 2, device_ptr<ComplexType>>;
56 
57   using sharedCVector      = boost::multi::array<ComplexType, 1, sharedAllocator>;
58   using sharedCVector_ref  = boost::multi::array_ref<ComplexType, 1, shared_pointer>;
59   using sharedCMatrix_ref  = boost::multi::array_ref<ComplexType, 2, shared_pointer>;
60   using sharedC4Tensor_ref = boost::multi::array_ref<ComplexType, 4, shared_pointer>;
61 
62   using mpi3C4Tensor = boost::multi::array<ComplexType, 4, shared_allocator<ComplexType>>;
63 
64   using stdCVector     = boost::multi::array<ComplexType, 1>;
65   using stdCMatrix     = boost::multi::array<ComplexType, 2>;
66   using stdCVector_ref = boost::multi::array_ref<ComplexType, 1>;
67 
68   using shm_stack_alloc_type = LocalTGBufferManager::template allocator_t<ComplexType>;
69   using StaticSHMVector      = boost::multi::static_array<ComplexType, 1, shm_stack_alloc_type>;
70   using StaticSHM4Tensor     = boost::multi::static_array<ComplexType, 4, shm_stack_alloc_type>;
71 
72 public:
FullObsHandler(afqmc::TaskGroup_ & tg_,AFQMCInfo & info,std::string name_,xmlNodePtr cur,WALKER_TYPES wlk,Wavefunction & wfn)73   FullObsHandler(afqmc::TaskGroup_& tg_,
74                  AFQMCInfo& info,
75                  std::string name_,
76                  xmlNodePtr cur,
77                  WALKER_TYPES wlk,
78                  Wavefunction& wfn)
79       : AFQMCInfo(info),
80         TG(tg_),
81         walker_type(wlk),
82         wfn0(wfn),
83         writer(false),
84         block_size(1),
85         nave(1),
86         name(name_),
87         nspins((walker_type == COLLINEAR) ? 2 : 1),
88         G4D_host({0, 0, 0, 0}, shared_allocator<ComplexType>{TG.TG_local()})
89   {
90     using std::fill_n;
91 
92     xmlNodePtr curRoot = cur;
93     if (cur != NULL)
94     {
95       ParameterSet m_param;
96       m_param.add(nave, "naverages");
97       m_param.add(block_size, "block_size");
98       m_param.put(cur);
99     }
100 
101     if (nave <= 0)
102       APP_ABORT("naverages <= 0 is not allowed.\n");
103 
104     cur = curRoot->children;
105     while (cur != NULL)
106     {
107       std::string cname((const char*)(cur->name));
108       std::transform(cname.begin(), cname.end(), cname.begin(), (int (*)(int))tolower);
109       if (cname == "onerdm")
110       {
111         properties.emplace_back(Observable(full1rdm(TG, info, cur, walker_type, nave, block_size)));
112       }
113       else if (cname == "gfock" || cname == "genfock" || cname == "ekt")
114       {
115         properties.emplace_back(Observable(
116             generalizedFockMatrix(TG, info, cur, walker_type, wfn0.getHamiltonianOperations(), nave, block_size)));
117       }
118       else if (cname == "diag2rdm")
119       {
120         properties.emplace_back(Observable(diagonal2rdm(TG, info, cur, walker_type, nave, block_size)));
121       }
122       else if (cname == "twordm")
123       {
124         properties.emplace_back(Observable(full2rdm(TG, info, cur, walker_type, nave, block_size)));
125       }
126       else if (cname == "n2r" || cname == "ontop2rdm")
127       {
128 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
129         std::string str("false");
130         ParameterSet m_param;
131         m_param.add(str, "use_host_memory");
132         m_param.put(cur);
133         std::transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower);
134         if (str == "false" || str == "no")
135         {
136           properties.emplace_back(Observable(
137               n2r<device_allocator<ComplexType>>(TG, info, cur, walker_type, false, device_allocator<ComplexType>{},
138                                                  device_allocator<ComplexType>{}, nave, block_size)));
139         }
140         else
141 #endif
142         {
143           properties.emplace_back(Observable(
144               n2r<shared_allocator<ComplexType>>(TG, info, cur, walker_type, true,
145                                                  shared_allocator<ComplexType>{TG.TG_local()},
146                                                  shared_allocator<ComplexType>{TG.Node()}, nave, block_size)));
147         }
148       }
149       else if (cname == "realspace_correlators")
150       {
151         properties.emplace_back(Observable(realspace_correlators(TG, info, cur, walker_type, nave, block_size)));
152       }
153       else if (cname == "correlators")
154       {
155         properties.emplace_back(Observable(atomcentered_correlators(TG, info, cur, walker_type, nave, block_size)));
156       }
157       cur = cur->next;
158     }
159 
160     if (properties.size() == 0)
161       APP_ABORT("empty observables list is not allowed.\n");
162 
163     Gdims = std::make_tuple(NMO, NMO);
164     if (walker_type == NONCOLLINEAR)
165       Gdims = std::make_tuple(2 * NMO, 2 * NMO);
166     dm_size = nspins * std::get<0>(Gdims) * std::get<1>(Gdims);
167 
168     writer = (TG.getGlobalRank() == 0);
169 
170     denominator = stdCVector(iextensions<1u>{nave});
171     fill_n(denominator.begin(), denominator.num_elements(), ComplexType(0.0, 0.0));
172   }
173 
print(int iblock,hdf_archive & dump)174   void print(int iblock, hdf_archive& dump)
175   {
176     using std::fill_n;
177 
178     if (TG.TG_local().root())
179     {
180       ma::scal(ComplexType(1.0 / block_size), denominator);
181       TG.TG_heads().reduce_in_place_n(to_address(denominator.origin()), denominator.num_elements(), std::plus<>(), 0);
182     }
183 
184     for (auto& v : properties)
185       v.print(iblock, dump, denominator);
186     fill_n(denominator.origin(), denominator.num_elements(), ComplexType(0.0, 0.0));
187   }
188 
189   template<class WlkSet, class MatR, class CVec, class MatD>
accumulate(int iav,WlkSet & wset,MatR && Refs,CVec && wgt,MatD && DevdetR,bool impsamp)190   void accumulate(int iav, WlkSet& wset, MatR&& Refs, CVec&& wgt, MatD&& DevdetR, bool impsamp)
191   {
192     if (iav < 0 || iav >= nave)
193       APP_ABORT("Runtime Error: iav out of range in full1rdm::accumulate. \n\n\n");
194 
195     int nw(wset.size());
196     int nrefs(Refs.size(1));
197     double LogOverlapFactor(wset.getLogOverlapFactor());
198     LocalTGBufferManager shm_buffer_manager;
199     StaticSHM4Tensor G4D({nw, nspins, std::get<0>(Gdims), std::get<1>(Gdims)},
200                          shm_buffer_manager.get_generator().template get_allocator<ComplexType>());
201     StaticSHMVector DevOv(iextensions<1u>{2 * nw},
202                           shm_buffer_manager.get_generator().template get_allocator<ComplexType>());
203     sharedCMatrix_ref G2D(G4D.origin(), {nw, dm_size});
204 
205     if (G4D_host.num_elements() != G4D.num_elements())
206     {
207       G4D_host = mpi3C4Tensor(G4D.extensions(), shared_allocator<ComplexType>{TG.TG_local()});
208       TG.TG_local().barrier();
209     }
210 
211     stdCVector Xw(iextensions<1u>{nw});
212     std::fill_n(Xw.origin(), Xw.num_elements(), ComplexType(1.0, 0.0));
213     stdCVector Ov(iextensions<1u>{2 * nw});
214     stdCMatrix detR(DevdetR);
215 
216     using SMType = typename WlkSet::reference::SMType;
217     // MAM: The pointer type of GA/GB needs to be device_ptr, it can not be
218     //      one of the shared_memory types. The dispatching in DensityMatrices is done
219     //      through the pointer type of the result matrix (GA/GB).
220     std::vector<devCMatrix_ptr> GA;
221     std::vector<devCMatrix_ptr> GB;
222     std::vector<SMType> RefsA;
223     std::vector<SMType> RefsB;
224     std::vector<SMType> SMA;
225     std::vector<SMType> SMB;
226     GA.reserve(nw);
227     SMA.reserve(nw);
228     RefsA.reserve(nw);
229     if (walker_type == COLLINEAR)
230       RefsB.reserve(nw);
231     if (walker_type == COLLINEAR)
232       SMB.reserve(nw);
233     if (walker_type == COLLINEAR)
234       GB.reserve(nw);
235 
236     if (impsamp)
237       denominator[iav] += std::accumulate(wgt.begin(), wgt.end(), ComplexType(0.0));
238     else
239     {
240       APP_ABORT(" Finish implementation of free projection. \n\n\n");
241     }
242 
243     for (int iref = 0, is = 0; iref < nrefs; iref++, is += nspins)
244     {
245       // conjugated here!
246       ComplexType CIcoeff(std::conj(wfn0.getReferenceWeight(iref)));
247 
248       //1. Calculate Green functions
249       // Refs({wset.size(),nrefs,ref_size}
250       RefsA.clear();
251       RefsB.clear();
252       SMA.clear();
253       SMB.clear();
254       GA.clear();
255       GB.clear();
256       // using SlaterMatrixAux to store References in device memory
257       if (walker_type == COLLINEAR)
258       {
259         for (int iw = 0; iw < nw; iw++)
260         {
261           SMA.emplace_back(wset[iw].SlaterMatrixN(Alpha));
262           SMB.emplace_back(wset[iw].SlaterMatrixN(Beta));
263           GA.emplace_back(make_device_ptr(G2D[iw].origin()), iextensions<2u>{NMO, NMO});
264           GB.emplace_back(make_device_ptr(G2D[iw].origin()) + NMO * NMO, iextensions<2u>{NMO, NMO});
265           RefsA.emplace_back(wset[iw].SlaterMatrixAux(Alpha));
266           RefsB.emplace_back(wset[iw].SlaterMatrixAux(Beta));
267           copy_n(Refs[iw][iref].origin(), (*RefsA.back()).num_elements(), (*RefsA.back()).origin());
268           copy_n(Refs[iw][iref].origin() + (*RefsA.back()).num_elements(), (*RefsB.back()).num_elements(),
269                  (*RefsB.back()).origin());
270         }
271         wfn0.DensityMatrix(RefsA, SMA, GA, DevOv.sliced(0, nw), LogOverlapFactor, false, false);
272         wfn0.DensityMatrix(RefsB, SMB, GB, DevOv.sliced(nw, 2 * nw), LogOverlapFactor, false, false);
273       }
274       else
275       {
276         for (int iw = 0; iw < nw; iw++)
277         {
278           SMA.emplace_back(wset[iw].SlaterMatrixN(Alpha));
279           GA.emplace_back(make_device_ptr(G2D[iw].origin()), iextensions<2u>{NMO, NMO});
280           RefsA.emplace_back(wset[iw].SlaterMatrixAux(Alpha));
281           copy_n(Refs[iw][iref].origin(), (*RefsA.back()).num_elements(), (*RefsA.back()).origin());
282         }
283         wfn0.DensityMatrix(RefsA, SMA, GA, DevOv.sliced(0, nw), LogOverlapFactor, false, false);
284       }
285 
286       //2. calculate and accumulate appropriate weights
287       copy_n(DevOv.origin(), 2 * nw, Ov.origin());
288       if (nrefs > 1)
289       {
290         if (walker_type == CLOSED)
291         {
292           for (int iw = 0; iw < nw; iw++)
293             Xw[iw] = CIcoeff * Ov[iw] * Ov[iw] * std::conj(detR[iw][iref] * detR[iw][iref]);
294         }
295         else if (walker_type == COLLINEAR)
296         {
297           for (int iw = 0; iw < nw; iw++)
298             Xw[iw] = CIcoeff * Ov[iw] * Ov[iw + nw] * std::conj(detR[iw][2 * iref] * detR[iw][2 * iref + 1]);
299         }
300         else if (walker_type == NONCOLLINEAR)
301         {
302           for (int iw = 0; iw < nw; iw++)
303             Xw[iw] = CIcoeff * Ov[iw] * std::conj(detR[iw][iref]);
304         }
305       }
306       if (nrefs == 1)
307         for (int iw = 0; iw < nw; iw++)
308           Xw[iw] = ComplexType(1.0);
309 
310       // MAM: Since most of the simpler estimators need G4D in host memory,
311       //      I'm providing a copy of the structure there already
312       TG.TG_local().barrier();
313       int i0, iN;
314       std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), int(G4D_host.num_elements()), TG.TG_local().size());
315       copy_n(make_device_ptr(G4D.origin()) + i0, iN - i0, to_address(G4D_host.origin()) + i0);
316       TG.TG_local().barrier();
317 
318       //3. accumulate references
319       for (auto& v : properties)
320         v.accumulate_reference(iav, iref, G4D, G4D_host, wgt, Xw, Ov, impsamp);
321     }
322     //4. accumulate block (normalize and accumulate sum over references)
323     for (auto& v : properties)
324       v.accumulate_block(iav, wgt, impsamp);
325   }
326 
327 private:
328   TaskGroup_& TG;
329 
330   WALKER_TYPES walker_type;
331 
332   Wavefunction& wfn0;
333 
334   bool writer;
335 
336   int block_size;
337 
338   int nave;
339 
340   std::string name;
341 
342   int nspins;
343   int dm_size;
344   std::tuple<int, int> Gdims;
345 
346   std::vector<Observable> properties;
347 
348   // denominator (nave, ...)
349   stdCVector denominator;
350 
351   // space for G in host space
352   mpi3C4Tensor G4D_host;
353 };
354 
355 } // namespace afqmc
356 
357 } // namespace qmcplusplus
358 
359 #endif
360