1 ////////////////////////////////////////////////////////////////////////////////////// 2 // This file is distributed under the University of Illinois/NCSA Open Source License. 3 // See LICENSE file in top directory for details. 4 // 5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. 6 // 7 // File developed by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory 8 // 9 // File created by: Miguel Morales, moralessilva2@llnl.gov, Lawrence Livermore National Laboratory 10 ////////////////////////////////////////////////////////////////////////////////////// 11 12 #ifndef QMCPLUSPLUS_AFQMC_FULLOBSHANDLER_HPP 13 #define QMCPLUSPLUS_AFQMC_FULLOBSHANDLER_HPP 14 15 #include <vector> 16 #include <string> 17 #include <iostream> 18 19 #include "hdf/hdf_multi.h" 20 #include "hdf/hdf_archive.h" 21 #include "OhmmsData/libxmldefs.h" 22 23 #include "AFQMC/Estimators/Observables/Observable.hpp" 24 #include "AFQMC/config.h" 25 #include "AFQMC/Utilities/type_conversion.hpp" 26 #include "AFQMC/Numerics/ma_operations.hpp" 27 #include "AFQMC/Wavefunctions/Wavefunction.hpp" 28 #include "AFQMC/Walkers/WalkerSet.hpp" 29 #include "AFQMC/Memory/buffer_managers.h" 30 31 namespace qmcplusplus 32 { 33 namespace afqmc 34 { 35 /* 36 * This class manages a list of "full" observables. 37 * Full observables are those that have a walker dependent left-hand side, 38 * which result from back propagation. 39 * This implementation of the class assumes a multi-determinant trial wavefunction, 40 * resulting in the loop over "references" (each determinant in the trial wavefunciton 41 * being back-propagated). 42 * Given a walker set and an array of (back propagated) slater matrices, 43 * this routine will calculate and accumulate all requested observables. 44 * To make the implementation of the BackPropagated class cleaner, 45 * this class also handles all the hdf5 I/O (given a hdf archive). 46 */ 47 class FullObsHandler : public AFQMCInfo 48 { 49 // allocators 50 using sharedAllocator = localTG_allocator<ComplexType>; 51 52 using shared_pointer = typename sharedAllocator::pointer; 53 using const_shared_pointer = typename sharedAllocator::const_pointer; 54 55 using devCMatrix_ptr = boost::multi::array_ptr<ComplexType, 2, device_ptr<ComplexType>>; 56 57 using sharedCVector = boost::multi::array<ComplexType, 1, sharedAllocator>; 58 using sharedCVector_ref = boost::multi::array_ref<ComplexType, 1, shared_pointer>; 59 using sharedCMatrix_ref = boost::multi::array_ref<ComplexType, 2, shared_pointer>; 60 using sharedC4Tensor_ref = boost::multi::array_ref<ComplexType, 4, shared_pointer>; 61 62 using mpi3C4Tensor = boost::multi::array<ComplexType, 4, shared_allocator<ComplexType>>; 63 64 using stdCVector = boost::multi::array<ComplexType, 1>; 65 using stdCMatrix = boost::multi::array<ComplexType, 2>; 66 using stdCVector_ref = boost::multi::array_ref<ComplexType, 1>; 67 68 using shm_stack_alloc_type = LocalTGBufferManager::template allocator_t<ComplexType>; 69 using StaticSHMVector = boost::multi::static_array<ComplexType, 1, shm_stack_alloc_type>; 70 using StaticSHM4Tensor = boost::multi::static_array<ComplexType, 4, shm_stack_alloc_type>; 71 72 public: FullObsHandler(afqmc::TaskGroup_ & tg_,AFQMCInfo & info,std::string name_,xmlNodePtr cur,WALKER_TYPES wlk,Wavefunction & wfn)73 FullObsHandler(afqmc::TaskGroup_& tg_, 74 AFQMCInfo& info, 75 std::string name_, 76 xmlNodePtr cur, 77 WALKER_TYPES wlk, 78 Wavefunction& wfn) 79 : AFQMCInfo(info), 80 TG(tg_), 81 walker_type(wlk), 82 wfn0(wfn), 83 writer(false), 84 block_size(1), 85 nave(1), 86 name(name_), 87 nspins((walker_type == COLLINEAR) ? 2 : 1), 88 G4D_host({0, 0, 0, 0}, shared_allocator<ComplexType>{TG.TG_local()}) 89 { 90 using std::fill_n; 91 92 xmlNodePtr curRoot = cur; 93 if (cur != NULL) 94 { 95 ParameterSet m_param; 96 m_param.add(nave, "naverages"); 97 m_param.add(block_size, "block_size"); 98 m_param.put(cur); 99 } 100 101 if (nave <= 0) 102 APP_ABORT("naverages <= 0 is not allowed.\n"); 103 104 cur = curRoot->children; 105 while (cur != NULL) 106 { 107 std::string cname((const char*)(cur->name)); 108 std::transform(cname.begin(), cname.end(), cname.begin(), (int (*)(int))tolower); 109 if (cname == "onerdm") 110 { 111 properties.emplace_back(Observable(full1rdm(TG, info, cur, walker_type, nave, block_size))); 112 } 113 else if (cname == "gfock" || cname == "genfock" || cname == "ekt") 114 { 115 properties.emplace_back(Observable( 116 generalizedFockMatrix(TG, info, cur, walker_type, wfn0.getHamiltonianOperations(), nave, block_size))); 117 } 118 else if (cname == "diag2rdm") 119 { 120 properties.emplace_back(Observable(diagonal2rdm(TG, info, cur, walker_type, nave, block_size))); 121 } 122 else if (cname == "twordm") 123 { 124 properties.emplace_back(Observable(full2rdm(TG, info, cur, walker_type, nave, block_size))); 125 } 126 else if (cname == "n2r" || cname == "ontop2rdm") 127 { 128 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP) 129 std::string str("false"); 130 ParameterSet m_param; 131 m_param.add(str, "use_host_memory"); 132 m_param.put(cur); 133 std::transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); 134 if (str == "false" || str == "no") 135 { 136 properties.emplace_back(Observable( 137 n2r<device_allocator<ComplexType>>(TG, info, cur, walker_type, false, device_allocator<ComplexType>{}, 138 device_allocator<ComplexType>{}, nave, block_size))); 139 } 140 else 141 #endif 142 { 143 properties.emplace_back(Observable( 144 n2r<shared_allocator<ComplexType>>(TG, info, cur, walker_type, true, 145 shared_allocator<ComplexType>{TG.TG_local()}, 146 shared_allocator<ComplexType>{TG.Node()}, nave, block_size))); 147 } 148 } 149 else if (cname == "realspace_correlators") 150 { 151 properties.emplace_back(Observable(realspace_correlators(TG, info, cur, walker_type, nave, block_size))); 152 } 153 else if (cname == "correlators") 154 { 155 properties.emplace_back(Observable(atomcentered_correlators(TG, info, cur, walker_type, nave, block_size))); 156 } 157 cur = cur->next; 158 } 159 160 if (properties.size() == 0) 161 APP_ABORT("empty observables list is not allowed.\n"); 162 163 Gdims = std::make_tuple(NMO, NMO); 164 if (walker_type == NONCOLLINEAR) 165 Gdims = std::make_tuple(2 * NMO, 2 * NMO); 166 dm_size = nspins * std::get<0>(Gdims) * std::get<1>(Gdims); 167 168 writer = (TG.getGlobalRank() == 0); 169 170 denominator = stdCVector(iextensions<1u>{nave}); 171 fill_n(denominator.begin(), denominator.num_elements(), ComplexType(0.0, 0.0)); 172 } 173 print(int iblock,hdf_archive & dump)174 void print(int iblock, hdf_archive& dump) 175 { 176 using std::fill_n; 177 178 if (TG.TG_local().root()) 179 { 180 ma::scal(ComplexType(1.0 / block_size), denominator); 181 TG.TG_heads().reduce_in_place_n(to_address(denominator.origin()), denominator.num_elements(), std::plus<>(), 0); 182 } 183 184 for (auto& v : properties) 185 v.print(iblock, dump, denominator); 186 fill_n(denominator.origin(), denominator.num_elements(), ComplexType(0.0, 0.0)); 187 } 188 189 template<class WlkSet, class MatR, class CVec, class MatD> accumulate(int iav,WlkSet & wset,MatR && Refs,CVec && wgt,MatD && DevdetR,bool impsamp)190 void accumulate(int iav, WlkSet& wset, MatR&& Refs, CVec&& wgt, MatD&& DevdetR, bool impsamp) 191 { 192 if (iav < 0 || iav >= nave) 193 APP_ABORT("Runtime Error: iav out of range in full1rdm::accumulate. \n\n\n"); 194 195 int nw(wset.size()); 196 int nrefs(Refs.size(1)); 197 double LogOverlapFactor(wset.getLogOverlapFactor()); 198 LocalTGBufferManager shm_buffer_manager; 199 StaticSHM4Tensor G4D({nw, nspins, std::get<0>(Gdims), std::get<1>(Gdims)}, 200 shm_buffer_manager.get_generator().template get_allocator<ComplexType>()); 201 StaticSHMVector DevOv(iextensions<1u>{2 * nw}, 202 shm_buffer_manager.get_generator().template get_allocator<ComplexType>()); 203 sharedCMatrix_ref G2D(G4D.origin(), {nw, dm_size}); 204 205 if (G4D_host.num_elements() != G4D.num_elements()) 206 { 207 G4D_host = mpi3C4Tensor(G4D.extensions(), shared_allocator<ComplexType>{TG.TG_local()}); 208 TG.TG_local().barrier(); 209 } 210 211 stdCVector Xw(iextensions<1u>{nw}); 212 std::fill_n(Xw.origin(), Xw.num_elements(), ComplexType(1.0, 0.0)); 213 stdCVector Ov(iextensions<1u>{2 * nw}); 214 stdCMatrix detR(DevdetR); 215 216 using SMType = typename WlkSet::reference::SMType; 217 // MAM: The pointer type of GA/GB needs to be device_ptr, it can not be 218 // one of the shared_memory types. The dispatching in DensityMatrices is done 219 // through the pointer type of the result matrix (GA/GB). 220 std::vector<devCMatrix_ptr> GA; 221 std::vector<devCMatrix_ptr> GB; 222 std::vector<SMType> RefsA; 223 std::vector<SMType> RefsB; 224 std::vector<SMType> SMA; 225 std::vector<SMType> SMB; 226 GA.reserve(nw); 227 SMA.reserve(nw); 228 RefsA.reserve(nw); 229 if (walker_type == COLLINEAR) 230 RefsB.reserve(nw); 231 if (walker_type == COLLINEAR) 232 SMB.reserve(nw); 233 if (walker_type == COLLINEAR) 234 GB.reserve(nw); 235 236 if (impsamp) 237 denominator[iav] += std::accumulate(wgt.begin(), wgt.end(), ComplexType(0.0)); 238 else 239 { 240 APP_ABORT(" Finish implementation of free projection. \n\n\n"); 241 } 242 243 for (int iref = 0, is = 0; iref < nrefs; iref++, is += nspins) 244 { 245 // conjugated here! 246 ComplexType CIcoeff(std::conj(wfn0.getReferenceWeight(iref))); 247 248 //1. Calculate Green functions 249 // Refs({wset.size(),nrefs,ref_size} 250 RefsA.clear(); 251 RefsB.clear(); 252 SMA.clear(); 253 SMB.clear(); 254 GA.clear(); 255 GB.clear(); 256 // using SlaterMatrixAux to store References in device memory 257 if (walker_type == COLLINEAR) 258 { 259 for (int iw = 0; iw < nw; iw++) 260 { 261 SMA.emplace_back(wset[iw].SlaterMatrixN(Alpha)); 262 SMB.emplace_back(wset[iw].SlaterMatrixN(Beta)); 263 GA.emplace_back(make_device_ptr(G2D[iw].origin()), iextensions<2u>{NMO, NMO}); 264 GB.emplace_back(make_device_ptr(G2D[iw].origin()) + NMO * NMO, iextensions<2u>{NMO, NMO}); 265 RefsA.emplace_back(wset[iw].SlaterMatrixAux(Alpha)); 266 RefsB.emplace_back(wset[iw].SlaterMatrixAux(Beta)); 267 copy_n(Refs[iw][iref].origin(), (*RefsA.back()).num_elements(), (*RefsA.back()).origin()); 268 copy_n(Refs[iw][iref].origin() + (*RefsA.back()).num_elements(), (*RefsB.back()).num_elements(), 269 (*RefsB.back()).origin()); 270 } 271 wfn0.DensityMatrix(RefsA, SMA, GA, DevOv.sliced(0, nw), LogOverlapFactor, false, false); 272 wfn0.DensityMatrix(RefsB, SMB, GB, DevOv.sliced(nw, 2 * nw), LogOverlapFactor, false, false); 273 } 274 else 275 { 276 for (int iw = 0; iw < nw; iw++) 277 { 278 SMA.emplace_back(wset[iw].SlaterMatrixN(Alpha)); 279 GA.emplace_back(make_device_ptr(G2D[iw].origin()), iextensions<2u>{NMO, NMO}); 280 RefsA.emplace_back(wset[iw].SlaterMatrixAux(Alpha)); 281 copy_n(Refs[iw][iref].origin(), (*RefsA.back()).num_elements(), (*RefsA.back()).origin()); 282 } 283 wfn0.DensityMatrix(RefsA, SMA, GA, DevOv.sliced(0, nw), LogOverlapFactor, false, false); 284 } 285 286 //2. calculate and accumulate appropriate weights 287 copy_n(DevOv.origin(), 2 * nw, Ov.origin()); 288 if (nrefs > 1) 289 { 290 if (walker_type == CLOSED) 291 { 292 for (int iw = 0; iw < nw; iw++) 293 Xw[iw] = CIcoeff * Ov[iw] * Ov[iw] * std::conj(detR[iw][iref] * detR[iw][iref]); 294 } 295 else if (walker_type == COLLINEAR) 296 { 297 for (int iw = 0; iw < nw; iw++) 298 Xw[iw] = CIcoeff * Ov[iw] * Ov[iw + nw] * std::conj(detR[iw][2 * iref] * detR[iw][2 * iref + 1]); 299 } 300 else if (walker_type == NONCOLLINEAR) 301 { 302 for (int iw = 0; iw < nw; iw++) 303 Xw[iw] = CIcoeff * Ov[iw] * std::conj(detR[iw][iref]); 304 } 305 } 306 if (nrefs == 1) 307 for (int iw = 0; iw < nw; iw++) 308 Xw[iw] = ComplexType(1.0); 309 310 // MAM: Since most of the simpler estimators need G4D in host memory, 311 // I'm providing a copy of the structure there already 312 TG.TG_local().barrier(); 313 int i0, iN; 314 std::tie(i0, iN) = FairDivideBoundary(TG.TG_local().rank(), int(G4D_host.num_elements()), TG.TG_local().size()); 315 copy_n(make_device_ptr(G4D.origin()) + i0, iN - i0, to_address(G4D_host.origin()) + i0); 316 TG.TG_local().barrier(); 317 318 //3. accumulate references 319 for (auto& v : properties) 320 v.accumulate_reference(iav, iref, G4D, G4D_host, wgt, Xw, Ov, impsamp); 321 } 322 //4. accumulate block (normalize and accumulate sum over references) 323 for (auto& v : properties) 324 v.accumulate_block(iav, wgt, impsamp); 325 } 326 327 private: 328 TaskGroup_& TG; 329 330 WALKER_TYPES walker_type; 331 332 Wavefunction& wfn0; 333 334 bool writer; 335 336 int block_size; 337 338 int nave; 339 340 std::string name; 341 342 int nspins; 343 int dm_size; 344 std::tuple<int, int> Gdims; 345 346 std::vector<Observable> properties; 347 348 // denominator (nave, ...) 349 stdCVector denominator; 350 351 // space for G in host space 352 mpi3C4Tensor G4D_host; 353 }; 354 355 } // namespace afqmc 356 357 } // namespace qmcplusplus 358 359 #endif 360