1 ////////////////////////////////////////////////////////////////////// 2 // This file is distributed under the University of Illinois/NCSA Open Source 3 // License. See LICENSE file in top directory for details. 4 // 5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. 6 // 7 // File developed by: 8 // Miguel A. Morales, moralessilva2@llnl.gov 9 // Lawrence Livermore National Laboratory 10 // 11 // File created by: 12 // Miguel A. Morales, moralessilva2@llnl.gov 13 // Lawrence Livermore National Laboratory 14 //////////////////////////////////////////////////////////////////////////////// 15 16 #ifndef QMCPLUSPLUS_AFQMC_NOMSD_H 17 #define QMCPLUSPLUS_AFQMC_NOMSD_H 18 19 #include <vector> 20 #include <map> 21 #include <string> 22 #include <iostream> 23 #include <tuple> 24 25 #include "AFQMC/Utilities/readWfn.h" 26 #include "AFQMC/config.h" 27 #include "mpi3/shm/mutex.hpp" 28 #include "AFQMC/Utilities/taskgroup.h" 29 #include "AFQMC/Walkers/WalkerConfig.hpp" 30 #include "AFQMC/Memory/buffer_managers.h" 31 32 #include "AFQMC/HamiltonianOperations/HamiltonianOperations.hpp" 33 #include "AFQMC/SlaterDeterminantOperations/SlaterDetOperations.hpp" 34 35 36 namespace qmcplusplus 37 { 38 namespace afqmc 39 { 40 /* 41 * Class that implements a multi-Slater determinant trial wave-function. 42 * Single determinant wfns are also allowed. 43 * No relation between different determinants in the expansion is assumed. 44 * Designed for non-orthogonal MSD expansions. 45 * For particle-hole orthogonal MSD wfns, use FastMSD. 46 */ 47 template<class devPsiT> 48 class NOMSD : public AFQMCInfo 49 { 50 // Note: 51 // if number_of_devices > 0, nextra should always be 0, 52 // so code doesn't need to be portable in places guarded by if(nextra>0) 53 54 // allocators 55 using Allocator = device_allocator<ComplexType>; 56 using Allocator_shared = localTG_allocator<ComplexType>; 57 58 // type defs 59 using pointer = typename Allocator::pointer; 60 using const_pointer = typename Allocator::const_pointer; 61 using pointer_shared = typename Allocator_shared::pointer; 62 using const_pointer_shared = typename Allocator_shared::const_pointer; 63 64 using buffer_alloc_type = DeviceBufferManager::template allocator_t<ComplexType>; 65 using shm_buffer_alloc_type = LocalTGBufferManager::template allocator_t<ComplexType>; 66 67 using CVector = boost::multi::array<ComplexType, 1, Allocator>; 68 using CMatrix = boost::multi::array<ComplexType, 2, Allocator>; 69 using CTensor = boost::multi::array<ComplexType, 3, Allocator>; 70 using CVector_ref = boost::multi::array_ref<ComplexType, 1, pointer>; 71 using CMatrix_ref = boost::multi::array_ref<ComplexType, 2, pointer>; 72 using CMatrix_ptr = boost::multi::array_ptr<ComplexType, 2, pointer>; 73 using CMatrix_cref = boost::multi::array_ref<const ComplexType, 2, const_pointer>; 74 using CTensor_ref = boost::multi::array_ref<ComplexType, 3, pointer>; 75 using CTensor_cref = boost::multi::array_ref<const ComplexType, 3, const_pointer>; 76 using shmCVector = boost::multi::array<ComplexType, 1, Allocator_shared>; 77 using shmCMatrix = boost::multi::array<ComplexType, 2, Allocator_shared>; 78 using shared_mutex = boost::mpi3::shm::mutex; 79 80 using stdCVector = boost::multi::array<ComplexType, 1>; 81 using stdCMatrix = boost::multi::array<ComplexType, 2>; 82 using stdCTensor = boost::multi::array<ComplexType, 3>; 83 using mpi3CVector = boost::multi::array<ComplexType, 1, shared_allocator<ComplexType>>; 84 using mpi3CMatrix = boost::multi::array<ComplexType, 2, shared_allocator<ComplexType>>; 85 using mpi3CTensor = boost::multi::array<ComplexType, 3, shared_allocator<ComplexType>>; 86 87 using stdCMatrix_ref = boost::multi::array_ref<ComplexType, 2>; 88 89 using StaticVector = boost::multi::static_array<ComplexType, 1, buffer_alloc_type>; 90 using StaticMatrix = boost::multi::static_array<ComplexType, 2, buffer_alloc_type>; 91 using Static3Tensor = boost::multi::static_array<ComplexType, 3, buffer_alloc_type>; 92 93 using StaticSHMVector = boost::multi::static_array<ComplexType, 1, shm_buffer_alloc_type>; 94 using StaticSHMMatrix = boost::multi::static_array<ComplexType, 2, shm_buffer_alloc_type>; 95 96 public: 97 template<class MType> NOMSD(AFQMCInfo & info,xmlNodePtr cur,afqmc::TaskGroup_ & tg_,SlaterDetOperations && sdet_,HamiltonianOperations && hop_,std::vector<ComplexType> && ci_,std::vector<MType> && orbs_,WALKER_TYPES wlk,ValueType nce,int targetNW=1)98 NOMSD(AFQMCInfo& info, 99 xmlNodePtr cur, 100 afqmc::TaskGroup_& tg_, 101 SlaterDetOperations&& sdet_, 102 HamiltonianOperations&& hop_, 103 std::vector<ComplexType>&& ci_, 104 std::vector<MType>&& orbs_, 105 WALKER_TYPES wlk, 106 ValueType nce, 107 int targetNW = 1) 108 : AFQMCInfo(info), 109 TG(tg_), 110 alloc_(), // right now device_allocator is default constructible 111 alloc_shared_(make_localTG_allocator<ComplexType>(TG)), 112 buffer_manager(), 113 shm_buffer_manager(), 114 SDetOp(std::move(sdet_)), 115 HamOp(std::move(hop_)), 116 ci(std::move(ci_)), 117 OrbMats(move_vector<devPsiT>(std::move(orbs_))), 118 RefOrbMats({0, 0}, shared_allocator<ComplexType>{TG.Node()}), 119 mutex(std::make_unique<shared_mutex>(TG.TG_local())), 120 walker_type(wlk), 121 nspins((walker_type == COLLINEAR) ? (2) : (1)), 122 number_of_references(-1), 123 NuclearCoulombEnergy(nce), 124 last_number_extra_tasks(-1), 125 last_task_index(-1), 126 local_group_comm(), 127 shmbuff_for_G(nullptr) 128 { 129 compact_G_for_vbias = (ci.size() == 1); // this should be input, since it is determined by HOps 130 transposed_G_for_vbias_ = HamOp.transposed_G_for_vbias(); 131 transposed_G_for_E_ = HamOp.transposed_G_for_E(); 132 transposed_vHS_ = HamOp.transposed_vHS(); 133 134 excitedState = false; 135 std::string rediag(""); 136 std::string excited_file(""); 137 std::string svd_Gf("no"); 138 std::string svd_O("no"); 139 std::string svd_Gm("no"); 140 int i_ = -1, a_ = -1; 141 nbatch = ((number_of_devices() > 0) ? -1 : 0); 142 nbatch_qr = ((number_of_devices() > 0) ? -1 : 0); 143 if (NMO > 1024 || NAEA > 512) 144 nbatch_qr = 0; 145 146 ParameterSet m_param; 147 m_param.add(number_of_references, "number_of_references"); 148 m_param.add(number_of_references, "nrefs"); 149 m_param.add(excited_file, "excited"); 150 // generalize this to multi-particle excitations, how do I read a list of integers??? 151 m_param.add(i_, "i"); 152 m_param.add(a_, "a"); 153 m_param.add(rediag, "rediag"); 154 m_param.add(svd_Gf, "svd_with_Gfull"); 155 m_param.add(svd_Gm, "svd_with_Gmix"); 156 m_param.add(svd_O, "svd_with_Ovlp"); 157 if (TG.TG_local().size() == 1) 158 m_param.add(nbatch, "nbatch"); 159 if (TG.TG_local().size() == 1) 160 m_param.add(nbatch_qr, "nbatch_qr"); 161 m_param.put(cur); 162 163 if (omp_get_num_threads() > 1 && (nbatch == 0)) 164 { 165 app_log() << " WARNING!!!: Found OMP_NUM_THREADS > 1 with nbatch=0.\n" 166 << " This will lead to low performance. Set nbatch. \n"; 167 } 168 std::transform(svd_Gf.begin(), svd_Gf.end(), svd_Gf.begin(), (int (*)(int))tolower); 169 if (svd_Gf == "yes" || svd_Gf == "true") 170 useSVD_in_Gfull = true; 171 std::transform(svd_Gm.begin(), svd_Gm.end(), svd_Gm.begin(), (int (*)(int))tolower); 172 if (svd_Gm == "yes" || svd_Gm == "true") 173 useSVD_in_Gmix = true; 174 std::transform(svd_O.begin(), svd_O.end(), svd_O.begin(), (int (*)(int))tolower); 175 if (svd_O == "yes" || svd_O == "true") 176 useSVD_in_Ovlp = true; 177 178 179 if (excited_file != "" && i_ >= 0 && a_ >= 0) 180 { 181 if (i_ < NMO && a_ < NMO) 182 { 183 if (i_ >= NAEA || a_ < NAEA) 184 APP_ABORT(" Errors: Inconsistent excited orbitals for alpha electrons. \n"); 185 excitedState = true; 186 maxOccupExtendedMat = {a_, NAEB}; 187 numExcitations = {1, 0}; 188 excitations.push_back({i_, a_}); 189 } 190 else if (i_ >= NMO && a_ >= NMO) 191 { 192 if (i_ >= NMO + NAEB || a_ < NMO + NAEB) 193 APP_ABORT(" Errors: Inconsistent excited orbitals for beta electrons. \n"); 194 excitedState = true; 195 maxOccupExtendedMat = {NAEA, a_ - NMO}; 196 numExcitations = {0, 1}; 197 excitations.push_back({i_ - NMO, a_ - NMO}); 198 } 199 else 200 { 201 APP_ABORT(" Errors: Inconsistent excited orbitals. \n"); 202 } 203 stdCTensor excitedOrbMat_; 204 readWfn(excited_file, excitedOrbMat_, NMO, maxOccupExtendedMat.first, maxOccupExtendedMat.second); 205 excitedOrbMat = excitedOrbMat_; 206 } 207 208 std::transform(rediag.begin(), rediag.end(), rediag.begin(), (int (*)(int))tolower); 209 if (rediag == "yes" || rediag == "true") 210 recompute_ci(); 211 } 212 ~NOMSD()213 ~NOMSD() {} 214 215 NOMSD(NOMSD const& other) = delete; 216 NOMSD& operator=(NOMSD const& other) = delete; 217 NOMSD(NOMSD&& other) = default; 218 NOMSD& operator=(NOMSD&& other) = delete; 219 local_number_of_cholesky_vectors() const220 int local_number_of_cholesky_vectors() const { return HamOp.local_number_of_cholesky_vectors(); } global_number_of_cholesky_vectors() const221 int global_number_of_cholesky_vectors() const { return HamOp.global_number_of_cholesky_vectors(); } global_origin_cholesky_vector() const222 int global_origin_cholesky_vector() const { return HamOp.global_origin_cholesky_vector(); } distribution_over_cholesky_vectors() const223 bool distribution_over_cholesky_vectors() const { return HamOp.distribution_over_cholesky_vectors(); } 224 size_of_G_for_vbias() const225 int size_of_G_for_vbias() const { return dm_size(!compact_G_for_vbias); } 226 transposed_G_for_vbias() const227 bool transposed_G_for_vbias() const { return transposed_G_for_vbias_; } transposed_G_for_E() const228 bool transposed_G_for_E() const { return transposed_G_for_E_; } transposed_vHS() const229 bool transposed_vHS() const { return transposed_vHS_; } getWalkerType() const230 WALKER_TYPES getWalkerType() const { return walker_type; } 231 232 template<class Vec> 233 void vMF(Vec&& v); 234 getSlaterDetOperations()235 SlaterDetOperations* getSlaterDetOperations() { return std::addressof(SDetOp); } getHamiltonianOperations()236 HamiltonianOperations* getHamiltonianOperations() { return std::addressof(HamOp); } 237 238 /* 239 * local contribution to vbias for the Green functions in G 240 * G: [size_of_G_for_vbias()][nW] 241 * v: [local # Chol. Vectors][nW] 242 */ 243 template<class MatG, class MatA> vbias(const MatG & G,MatA && v,double a=1.0)244 void vbias(const MatG& G, MatA&& v, double a = 1.0) 245 { 246 if (transposed_G_for_vbias_) 247 { 248 assert(G.size(0) == v.size(1)); 249 assert(G.size(1) == size_of_G_for_vbias()); 250 } 251 else 252 { 253 assert(G.size(0) == size_of_G_for_vbias()); 254 assert(G.size(1) == v.size(1)); 255 } 256 assert(v.size(0) == HamOp.local_number_of_cholesky_vectors()); 257 if (ci.size() == 1) 258 { 259 // HamOp expects a compact Gc with alpha/beta components 260 HamOp.vbias(G, std::forward<MatA>(v), a); 261 } 262 else 263 { 264 if (walker_type == CLOSED) 265 HamOp.vbias(G, std::forward<MatA>(v), a); // factor of 2 now in HamOps 266 else if (walker_type == NONCOLLINEAR) 267 HamOp.vbias(G, std::forward<MatA>(v), a); 268 else 269 { 270 if(transposed_G_for_vbias_) 271 APP_ABORT(" Error in NOMSD::vbias: transposed_G_for_vbias_ should be false. \n"); 272 // HamOp expects either alpha or beta, so must be called twice 273 HamOp.vbias(G.sliced(0, NMO * NMO), std::forward<MatA>(v), a, 0.0); 274 HamOp.vbias(G.sliced(NMO * NMO, 2 * NMO * NMO), std::forward<MatA>(v), a, 1.0); 275 } 276 } 277 TG.local_barrier(); 278 } 279 280 /* 281 * local contribution to vHS for the Green functions in G 282 * X: [# chol vecs][nW] 283 * v: [NMO^2][nW] 284 */ 285 template<class MatX, class MatA> vHS(MatX && X,MatA && v,double a=1.0)286 void vHS(MatX&& X, MatA&& v, double a = 1.0) 287 { 288 assert(X.size(0) == HamOp.local_number_of_cholesky_vectors()); 289 if (transposed_vHS_) 290 assert(X.size(1) == v.size(0)); 291 else 292 assert(X.size(1) == v.size(1)); 293 HamOp.vHS(std::forward<MatX>(X), std::forward<MatA>(v), a); 294 TG.local_barrier(); 295 } 296 297 /* 298 * Calculates the local energy and overlaps of all the walkers in the set and stores 299 * them in the wset data 300 */ 301 template<class WlkSet> Energy(WlkSet & wset)302 void Energy(WlkSet& wset) 303 { 304 int nw = wset.size(); 305 StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>()); 306 StaticMatrix eloc({nw, 3}, buffer_manager.get_generator().template get_allocator<ComplexType>()); 307 Energy(wset, eloc, ovlp); 308 TG.local_barrier(); 309 if (TG.getLocalTGRank() == 0) 310 { 311 wset.setProperty(OVLP, ovlp); 312 wset.setProperty(E1_, eloc(eloc.extension(), 0)); 313 wset.setProperty(EXX_, eloc(eloc.extension(), 1)); 314 wset.setProperty(EJ_, eloc(eloc.extension(), 2)); 315 } 316 TG.local_barrier(); 317 } 318 319 /* 320 * Calculates the local energy and overlaps of all the walkers in the set and 321 * returns them in the appropriate data structures 322 */ 323 template<class WlkSet, class Mat, class TVec> Energy(const WlkSet & wset,Mat && E,TVec && Ov)324 void Energy(const WlkSet& wset, Mat&& E, TVec&& Ov) 325 { 326 if (TG.getNGroupsPerTG() > 1) 327 Energy_distributed(wset, std::forward<Mat>(E), std::forward<TVec>(Ov)); 328 else 329 Energy_shared(wset, std::forward<Mat>(E), std::forward<TVec>(Ov)); 330 } 331 332 /* 333 * Calculates the mixed density matrix for all walkers in the walker set. 334 * Options: 335 * - compact: If true (default), returns compact form with Dim: [NEL*NMO], 336 * otherwise returns full form with Dim: [NMO*NMO]. 337 * - transpose: If false (default), returns standard form with Dim: [XXX][nW] 338 * otherwise returns the transpose with Dim: [nW][XXX} 339 */ 340 template<class WlkSet, class MatG> MixedDensityMatrix(const WlkSet & wset,MatG && G,bool compact=true,bool transpose=false)341 void MixedDensityMatrix(const WlkSet& wset, MatG&& G, bool compact = true, bool transpose = false) 342 { 343 int nw = wset.size(); 344 StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>()); 345 MixedDensityMatrix(wset, std::forward<MatG>(G), ovlp, compact, transpose); 346 } 347 348 template<class WlkSet, class MatG, class TVec> MixedDensityMatrix(const WlkSet & wset,MatG && G,TVec && Ov,bool compact=true,bool transpose=false)349 void MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool compact = true, bool transpose = false) 350 { 351 if (nbatch != 0) 352 MixedDensityMatrix_batched(wset, std::forward<MatG>(G), std::forward<TVec>(Ov), compact, transpose); 353 else 354 MixedDensityMatrix_shared(wset, std::forward<MatG>(G), std::forward<TVec>(Ov), compact, transpose); 355 } 356 357 /* 358 * Calculates the density matrix with respect to a given Reference 359 * for all walkers in the walker set. 360 */ 361 template<class WlkSet, class MatA, class MatB, class MatG, class TVec> DensityMatrix(const WlkSet & wset,MatA && RefA,MatB && RefB,MatG && G,TVec && Ov,bool herm,bool compact,bool transposed)362 void DensityMatrix(const WlkSet& wset, 363 MatA&& RefA, 364 MatB&& RefB, 365 MatG&& G, 366 TVec&& Ov, 367 bool herm, 368 bool compact, 369 bool transposed) 370 { 371 if (nbatch != 0) 372 DensityMatrix_batched(wset, std::forward<MatA>(RefA), std::forward<MatB>(RefB), std::forward<MatG>(G), 373 std::forward<TVec>(Ov), herm, compact, transposed); 374 else 375 DensityMatrix_shared(wset, std::forward<MatA>(RefA), std::forward<MatB>(RefB), std::forward<MatG>(G), 376 std::forward<TVec>(Ov), herm, compact, transposed); 377 } 378 379 template<class MatA, class MatB, class MatG, class TVec> DensityMatrix(std::vector<MatA> & Left,std::vector<MatB> & Right,std::vector<MatG> & G,TVec && Ov,double LogOverlapFactor,bool herm,bool compact)380 void DensityMatrix(std::vector<MatA>& Left, 381 std::vector<MatB>& Right, 382 std::vector<MatG>& G, 383 TVec&& Ov, 384 double LogOverlapFactor, 385 bool herm, 386 bool compact) 387 { 388 if (nbatch != 0) 389 DensityMatrix_batched(Left, Right, G, std::forward<TVec>(Ov), LogOverlapFactor, herm, compact); 390 else 391 DensityMatrix_shared(Left, Right, G, std::forward<TVec>(Ov), LogOverlapFactor, herm, compact); 392 } 393 394 /* 395 * Calculates the walker averaged density matrix. 396 * Options: 397 * - free_projection: If false (default), assumes using phaseless approximation 398 * otherwise assumes using free projection. 399 */ 400 template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2> WalkerAveragedDensityMatrix(const WlkSet & wset,CVec1 & wgt,MatG & G,CVec2 & denom,Mat1 && Ovlp,Mat2 && DMsum,bool free_projection=false,boost::multi::array_ref<ComplexType,3> * Refs=nullptr,boost::multi::array<ComplexType,2> * detR=nullptr)401 void WalkerAveragedDensityMatrix(const WlkSet& wset, 402 CVec1& wgt, 403 MatG& G, 404 CVec2& denom, 405 Mat1&& Ovlp, 406 Mat2&& DMsum, 407 bool free_projection = false, 408 boost::multi::array_ref<ComplexType, 3>* Refs = nullptr, 409 boost::multi::array<ComplexType, 2>* detR = nullptr) 410 { 411 // if(nbatch != 0) 412 // WalkerAveragedDensityMatrix_batched(wset,wgt,G,denom,free_projection,Refs,detR); 413 // else 414 // having problems with underflow with large (back) projection times and multidets, 415 // mainly from the normalization coming out of the orthonormalization (detR) 416 // writing specialized version for single det which doesn;t have this issues 417 if (ci.size() == 1) 418 WalkerAveragedDensityMatrix_shared_single_det(wset, wgt, G, denom, Ovlp, DMsum, free_projection, Refs, detR); 419 else 420 WalkerAveragedDensityMatrix_shared(wset, wgt, G, denom, Ovlp, DMsum, free_projection, Refs, detR); 421 } 422 423 /* 424 * Calculates the mixed density matrix for all walkers in the walker set 425 * with a format consistent with (and expected by) the vbias routine. 426 * This is implementation dependent, so this density matrix should ONLY be used 427 * in conjunction with vbias. 428 */ 429 template<class WlkSet, class MatG> MixedDensityMatrix_for_vbias(const WlkSet & wset,MatG && G)430 void MixedDensityMatrix_for_vbias(const WlkSet& wset, MatG&& G) 431 { 432 int nw = wset.size(); 433 StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>()); 434 MixedDensityMatrix(wset, std::forward<MatG>(G), ovlp, compact_G_for_vbias, transposed_G_for_vbias_); 435 } 436 437 /* 438 * Calculates the overlaps of all walkers in the set. Returns values in arrays. 439 */ 440 template<class WlkSet, class TVec> Overlap(const WlkSet & wset,TVec && Ov)441 void Overlap(const WlkSet& wset, TVec&& Ov) 442 { 443 if (nbatch != 0) 444 Overlap_batched(wset, std::forward<TVec>(Ov)); 445 else 446 Overlap_shared(wset, std::forward<TVec>(Ov)); 447 } 448 449 /* 450 * Calculates the overlaps of all walkers in the set. Updates values in wset. 451 */ 452 template<class WlkSet> Overlap(WlkSet & wset)453 void Overlap(WlkSet& wset) 454 { 455 int nw = wset.size(); 456 StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>()); 457 Overlap(wset, ovlp); 458 TG.local_barrier(); 459 if (TG.getLocalTGRank() == 0) 460 { 461 wset.setProperty(OVLP, ovlp); 462 } 463 TG.local_barrier(); 464 } 465 466 /* 467 * Orthogonalizes the Slater matrices of all walkers in the set. 468 * Options: 469 * - bool importanceSamplingt(default=true): use algorithm appropriate for importance sampling. 470 * This means that the determinant of the R matrix in the QR decomposition is ignored. 471 * If false, add the determinant of R to the weight of the walker. 472 */ 473 template<class WlkSet> Orthogonalize(WlkSet & wset,bool impSamp)474 void Orthogonalize(WlkSet& wset, bool impSamp) 475 { 476 if (not excitedState && (nbatch_qr != 0)) 477 Orthogonalize_batched(wset, impSamp); 478 else 479 Orthogonalize_shared(wset, impSamp); 480 } 481 482 /* 483 * Orthogonalizes the Slater matrix of a walker in an excited state calculation. 484 */ 485 template<class Mat> 486 void OrthogonalizeExcited(Mat&& A, SpinTypes spin, double LogOverlapFactor); 487 488 489 /* 490 * Returns the number of reference Slater Matrices needed for back propagation. 491 */ number_of_references_for_back_propagation() const492 int number_of_references_for_back_propagation() const 493 { 494 if (number_of_references > 0) 495 return number_of_references; 496 else 497 return ((walker_type == COLLINEAR) ? OrbMats.size() / 2 : OrbMats.size()); 498 } 499 getReferenceWeight(int i) const500 ComplexType getReferenceWeight(int i) const { return ci[i]; } 501 502 /* 503 * Returns the reference Slater Matrices needed for back propagation. 504 */ 505 template<class Mat, class Ptr = ComplexType*> getReferencesForBackPropagation(Mat && A)506 void getReferencesForBackPropagation(Mat&& A) 507 { 508 static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality"); 509 int ndet = number_of_references_for_back_propagation(); 510 assert(A.size(0) == ndet); 511 if (RefOrbMats.size(0) == 0) 512 { 513 TG.Node().barrier(); // for safety 514 int nrow(NMO * ((walker_type == NONCOLLINEAR) ? 2 : 1)); 515 int ncol(NAEA + ((walker_type == CLOSED) ? 0 : NAEB)); //careful here, spins are stored contiguously 516 RefOrbMats.reextent({ndet, nrow * ncol}); 517 TG.Node().barrier(); // for safety 518 if (TG.Node().root()) 519 { 520 if (walker_type != COLLINEAR) 521 { 522 for (int i = 0; i < ndet; ++i) 523 { 524 boost::multi::array_ref<ComplexType, 2> A_(to_address(RefOrbMats[i].origin()), {nrow, ncol}); 525 ma::Matrix2MAREF('H', OrbMats[i], A_); 526 } 527 } 528 else 529 { 530 for (int i = 0; i < ndet; ++i) 531 { 532 boost::multi::array_ref<ComplexType, 2> A_(to_address(RefOrbMats[i].origin()), {NMO, NAEA}); 533 ma::Matrix2MAREF('H', OrbMats[2 * i], A_); 534 boost::multi::array_ref<ComplexType, 2> B_(A_.origin() + A_.num_elements(), {NMO, NAEB}); 535 ma::Matrix2MAREF('H', OrbMats[2 * i + 1], B_); 536 } 537 } 538 } // TG.Node().root() 539 TG.Node().barrier(); // for safety 540 } 541 assert(RefOrbMats.size(0) == ndet); 542 assert(RefOrbMats.size(1) == A.size(1)); 543 auto&& RefOrbMats_(boost::multi::static_array_cast<ComplexType, ComplexType*>(RefOrbMats)); 544 auto&& A_(boost::multi::static_array_cast<ComplexType, Ptr>(A)); 545 using std::copy_n; 546 int n0, n1; 547 std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(A.size(1)), TG.getNCoresPerTG()); 548 for (int i = 0; i < ndet; i++) 549 copy_n(RefOrbMats_[i].origin() + n0, n1 - n0, A_[i].origin() + n0); 550 TG.TG_local().barrier(); 551 } 552 553 protected: 554 TaskGroup_& TG; 555 556 Allocator alloc_; 557 Allocator_shared alloc_shared_; 558 559 DeviceBufferManager buffer_manager; 560 LocalTGBufferManager shm_buffer_manager; 561 562 //SlaterDetOperations_shared<ComplexType> SDetOp; 563 SlaterDetOperations SDetOp; 564 565 HamiltonianOperations HamOp; 566 567 std::vector<ComplexType> ci; 568 569 // eventually switched from CMatrix to SMHSparseMatrix(node) 570 std::vector<devPsiT> OrbMats; 571 mpi3CMatrix RefOrbMats; 572 573 std::unique_ptr<shared_mutex> mutex; 574 575 // in both cases below: closed_shell=0, UHF/ROHF=1, GHF=2 576 WALKER_TYPES walker_type; 577 int nspins; 578 579 int number_of_references; 580 581 int nbatch; 582 int nbatch_qr; 583 bool useSVD_in_Ovlp = false; 584 bool useSVD_in_Gmix = false; 585 bool useSVD_in_Gfull = false; 586 587 bool compact_G_for_vbias; 588 589 // in the 3 cases, true means [nwalk][...], false means [...][nwalk] 590 bool transposed_G_for_vbias_; 591 bool transposed_G_for_E_; 592 bool transposed_vHS_; 593 594 ValueType NuclearCoulombEnergy; 595 596 // not elegant, but reasonable for now 597 int last_number_extra_tasks; 598 int last_task_index; 599 600 // shared_communicator for parallel work within TG_local() 601 //std::unique_ptr<shared_communicator> local_group_comm; 602 shared_communicator local_group_comm; 603 std::unique_ptr<mpi3CVector> shmbuff_for_G; 604 605 // excited states 606 bool excitedState; 607 std::vector<std::pair<int, int>> excitations; 608 CTensor excitedOrbMat; 609 CMatrix extendedMatAlpha; 610 CMatrix extendedMatBeta; 611 std::pair<int, int> maxOccupExtendedMat; 612 std::pair<int, int> numExcitations; 613 614 void recompute_ci(); 615 616 /* 617 * Computes the density matrix with respect to a given reference. 618 * Intended to be used in combination with the energy evaluation routine. 619 * G and Ov are expected to be in shared memory. 620 */ 621 template<class WlkSet, class MatA, class MatB, class MatG, class TVec> 622 void DensityMatrix_shared(const WlkSet& wset, 623 MatA&& RefsA, 624 MatB&& RefsB, 625 MatG&& G, 626 TVec&& Ov, 627 bool herm, 628 bool compact, 629 bool transposed); 630 631 template<class WlkSet, class MatA, class MatB, class MatG, class TVec> 632 void DensityMatrix_batched(const WlkSet& wset, 633 MatA&& RefsA, 634 MatB&& RefsB, 635 MatG&& G, 636 TVec&& Ov, 637 bool herm, 638 bool compact, 639 bool transposed); 640 641 template<class MatA, class MatB, class MatG, class TVec> 642 void DensityMatrix_shared(std::vector<MatA>& Left, 643 std::vector<MatB>& Right, 644 std::vector<MatG>& G, 645 TVec&& Ov, 646 double LogOverlapFactor, 647 bool herm, 648 bool compact); 649 650 template<class MatA, class MatB, class MatG, class TVec> 651 void DensityMatrix_batched(std::vector<MatA>& Left, 652 std::vector<MatB>& Right, 653 std::vector<MatG>& G, 654 TVec&& Ov, 655 double LogOverlapFactor, 656 bool herm, 657 bool compact); 658 659 template<class MatSM, class MatG, class TVec> 660 void MixedDensityMatrix_for_E_from_SM(const MatSM& SM, MatG&& G, TVec&& Ov, int nd, double LogOverlapFactor); 661 662 /* 663 * Calculates the local energy and overlaps of all the walkers in the set and 664 * returns them in the appropriate data structures 665 */ 666 template<class WlkSet, class Mat, class TVec> 667 void Energy_shared(const WlkSet& wset, Mat&& E, TVec&& Ov); 668 669 template<class WlkSet, class MatG, class TVec> 670 void MixedDensityMatrix_shared(const WlkSet& wset, MatG&& G, TVec&& Ov, bool compact = true, bool transpose = false); 671 672 template<class WlkSet, class MatG, class TVec> 673 void MixedDensityMatrix_batched(const WlkSet& wset, MatG&& G, TVec&& Ov, bool compact = true, bool transpose = false); 674 675 template<class WlkSet, class TVec> 676 void Overlap_shared(const WlkSet& wset, TVec&& Ov); 677 678 template<class WlkSet, class TVec> 679 void Overlap_batched(const WlkSet& wset, TVec&& Ov); 680 681 template<class WlkSet> 682 void Orthogonalize_batched(WlkSet& wset, bool impSamp); 683 684 template<class WlkSet> 685 void Orthogonalize_shared(WlkSet& wset, bool impSamp); 686 687 /* 688 * Calculates the local energy and overlaps of all the walkers in the set and 689 * returns them in the appropriate data structures 690 */ 691 template<class WlkSet, class Mat, class TVec> Energy_distributed(const WlkSet & wset,Mat && E,TVec && Ov)692 void Energy_distributed(const WlkSet& wset, Mat&& E, TVec&& Ov) 693 { 694 if (ci.size() == 1) 695 Energy_distributed_singleDet(wset, std::forward<Mat>(E), std::forward<TVec>(Ov)); 696 else 697 Energy_distributed_multiDet(wset, std::forward<Mat>(E), std::forward<TVec>(Ov)); 698 } 699 700 template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2> 701 void WalkerAveragedDensityMatrix_batched(const WlkSet& wset, 702 CVec1& wgt, 703 MatG& G, 704 CVec2& denom, 705 Mat1&& Ovlp, 706 Mat2&& DMsum, 707 bool free_projection, 708 boost::multi::array_ref<ComplexType, 3>* Refs, 709 boost::multi::array<ComplexType, 2>* detR); 710 711 template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2> 712 void WalkerAveragedDensityMatrix_shared(const WlkSet& wset, 713 CVec1& wgt, 714 MatG& G, 715 CVec2& denom, 716 Mat1&& Ovlp, 717 Mat2&& DMsum, 718 bool free_projection, 719 boost::multi::array_ref<ComplexType, 3>* Refs, 720 boost::multi::array<ComplexType, 2>* detR); 721 722 template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2> 723 void WalkerAveragedDensityMatrix_shared_single_det(const WlkSet& wset, 724 CVec1& wgt, 725 MatG& G, 726 CVec2& denom, 727 Mat1&& Ovlp, 728 Mat2&& DMsum, 729 bool free_projection, 730 boost::multi::array_ref<ComplexType, 3>* Refs, 731 boost::multi::array<ComplexType, 2>* detR); 732 733 template<class WlkSet, class Mat, class TVec> 734 void Energy_distributed_singleDet(const WlkSet& wset, Mat&& E, TVec&& Ov); 735 736 template<class WlkSet, class Mat, class TVec> 737 void Energy_distributed_multiDet(const WlkSet& wset, Mat&& E, TVec&& Ov); 738 dm_size(bool full) const739 int dm_size(bool full) const 740 { 741 switch (walker_type) 742 { 743 case CLOSED: // closed-shell RHF 744 return (full) ? (NMO * NMO) : (NAEA * NMO); 745 break; 746 case COLLINEAR: 747 return (full) ? (2 * NMO * NMO) : ((NAEA + NAEB) * NMO); 748 break; 749 case NONCOLLINEAR: 750 return (full) ? (4 * NMO * NMO) : (NAEA * 2 * NMO); 751 break; 752 default: 753 APP_ABORT(" Error: Unknown walker_type in dm_size. \n"); 754 return -1; 755 } 756 } 757 // dimensions for each component of the DM. dm_dims(bool full,SpinTypes sp=Alpha) const758 std::pair<int, int> dm_dims(bool full, SpinTypes sp = Alpha) const 759 { 760 using arr = std::pair<int, int>; 761 switch (walker_type) 762 { 763 case CLOSED: // closed-shell RHF 764 return (full) ? (arr{NMO, NMO}) : (arr{NAEA, NMO}); 765 break; 766 case COLLINEAR: 767 return (full) ? (arr{NMO, NMO}) : ((sp == Alpha) ? (arr{NAEA, NMO}) : (arr{NAEB, NMO})); 768 break; 769 case NONCOLLINEAR: 770 return (full) ? (arr{2 * NMO, 2 * NMO}) : (arr{NAEA, 2 * NMO}); 771 break; 772 default: 773 APP_ABORT(" Error: Unknown walker_type in dm_size. \n"); 774 return arr{-1, -1}; 775 } 776 } 777 }; 778 779 } // namespace afqmc 780 781 } // namespace qmcplusplus 782 783 #include "AFQMC/Wavefunctions/NOMSD.icc" 784 785 #endif 786