1 //////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source
3 // License.  See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 // Miguel A. Morales, moralessilva2@llnl.gov
9 //    Lawrence Livermore National Laboratory
10 //
11 // File created by:
12 // Miguel A. Morales, moralessilva2@llnl.gov
13 //    Lawrence Livermore National Laboratory
14 ////////////////////////////////////////////////////////////////////////////////
15 
16 #ifndef QMCPLUSPLUS_AFQMC_NOMSD_H
17 #define QMCPLUSPLUS_AFQMC_NOMSD_H
18 
19 #include <vector>
20 #include <map>
21 #include <string>
22 #include <iostream>
23 #include <tuple>
24 
25 #include "AFQMC/Utilities/readWfn.h"
26 #include "AFQMC/config.h"
27 #include "mpi3/shm/mutex.hpp"
28 #include "AFQMC/Utilities/taskgroup.h"
29 #include "AFQMC/Walkers/WalkerConfig.hpp"
30 #include "AFQMC/Memory/buffer_managers.h"
31 
32 #include "AFQMC/HamiltonianOperations/HamiltonianOperations.hpp"
33 #include "AFQMC/SlaterDeterminantOperations/SlaterDetOperations.hpp"
34 
35 
36 namespace qmcplusplus
37 {
38 namespace afqmc
39 {
40 /*
41  * Class that implements a multi-Slater determinant trial wave-function.
42  * Single determinant wfns are also allowed.
43  * No relation between different determinants in the expansion is assumed.
44  * Designed for non-orthogonal MSD expansions.
45  * For particle-hole orthogonal MSD wfns, use FastMSD.
46  */
47 template<class devPsiT>
48 class NOMSD : public AFQMCInfo
49 {
50   // Note:
51   // if number_of_devices > 0, nextra should always be 0,
52   // so code doesn't need to be portable in places guarded by if(nextra>0)
53 
54   // allocators
55   using Allocator        = device_allocator<ComplexType>;
56   using Allocator_shared = localTG_allocator<ComplexType>;
57 
58   // type defs
59   using pointer              = typename Allocator::pointer;
60   using const_pointer        = typename Allocator::const_pointer;
61   using pointer_shared       = typename Allocator_shared::pointer;
62   using const_pointer_shared = typename Allocator_shared::const_pointer;
63 
64   using buffer_alloc_type     = DeviceBufferManager::template allocator_t<ComplexType>;
65   using shm_buffer_alloc_type = LocalTGBufferManager::template allocator_t<ComplexType>;
66 
67   using CVector      = boost::multi::array<ComplexType, 1, Allocator>;
68   using CMatrix      = boost::multi::array<ComplexType, 2, Allocator>;
69   using CTensor      = boost::multi::array<ComplexType, 3, Allocator>;
70   using CVector_ref  = boost::multi::array_ref<ComplexType, 1, pointer>;
71   using CMatrix_ref  = boost::multi::array_ref<ComplexType, 2, pointer>;
72   using CMatrix_ptr  = boost::multi::array_ptr<ComplexType, 2, pointer>;
73   using CMatrix_cref = boost::multi::array_ref<const ComplexType, 2, const_pointer>;
74   using CTensor_ref  = boost::multi::array_ref<ComplexType, 3, pointer>;
75   using CTensor_cref = boost::multi::array_ref<const ComplexType, 3, const_pointer>;
76   using shmCVector   = boost::multi::array<ComplexType, 1, Allocator_shared>;
77   using shmCMatrix   = boost::multi::array<ComplexType, 2, Allocator_shared>;
78   using shared_mutex = boost::mpi3::shm::mutex;
79 
80   using stdCVector  = boost::multi::array<ComplexType, 1>;
81   using stdCMatrix  = boost::multi::array<ComplexType, 2>;
82   using stdCTensor  = boost::multi::array<ComplexType, 3>;
83   using mpi3CVector = boost::multi::array<ComplexType, 1, shared_allocator<ComplexType>>;
84   using mpi3CMatrix = boost::multi::array<ComplexType, 2, shared_allocator<ComplexType>>;
85   using mpi3CTensor = boost::multi::array<ComplexType, 3, shared_allocator<ComplexType>>;
86 
87   using stdCMatrix_ref = boost::multi::array_ref<ComplexType, 2>;
88 
89   using StaticVector  = boost::multi::static_array<ComplexType, 1, buffer_alloc_type>;
90   using StaticMatrix  = boost::multi::static_array<ComplexType, 2, buffer_alloc_type>;
91   using Static3Tensor = boost::multi::static_array<ComplexType, 3, buffer_alloc_type>;
92 
93   using StaticSHMVector = boost::multi::static_array<ComplexType, 1, shm_buffer_alloc_type>;
94   using StaticSHMMatrix = boost::multi::static_array<ComplexType, 2, shm_buffer_alloc_type>;
95 
96 public:
97   template<class MType>
NOMSD(AFQMCInfo & info,xmlNodePtr cur,afqmc::TaskGroup_ & tg_,SlaterDetOperations && sdet_,HamiltonianOperations && hop_,std::vector<ComplexType> && ci_,std::vector<MType> && orbs_,WALKER_TYPES wlk,ValueType nce,int targetNW=1)98   NOMSD(AFQMCInfo& info,
99         xmlNodePtr cur,
100         afqmc::TaskGroup_& tg_,
101         SlaterDetOperations&& sdet_,
102         HamiltonianOperations&& hop_,
103         std::vector<ComplexType>&& ci_,
104         std::vector<MType>&& orbs_,
105         WALKER_TYPES wlk,
106         ValueType nce,
107         int targetNW = 1)
108       : AFQMCInfo(info),
109         TG(tg_),
110         alloc_(), // right now device_allocator is default constructible
111         alloc_shared_(make_localTG_allocator<ComplexType>(TG)),
112         buffer_manager(),
113         shm_buffer_manager(),
114         SDetOp(std::move(sdet_)),
115         HamOp(std::move(hop_)),
116         ci(std::move(ci_)),
117         OrbMats(move_vector<devPsiT>(std::move(orbs_))),
118         RefOrbMats({0, 0}, shared_allocator<ComplexType>{TG.Node()}),
119         mutex(std::make_unique<shared_mutex>(TG.TG_local())),
120         walker_type(wlk),
121         nspins((walker_type == COLLINEAR) ? (2) : (1)),
122         number_of_references(-1),
123         NuclearCoulombEnergy(nce),
124         last_number_extra_tasks(-1),
125         last_task_index(-1),
126         local_group_comm(),
127         shmbuff_for_G(nullptr)
128   {
129     compact_G_for_vbias     = (ci.size() == 1); // this should be input, since it is determined by HOps
130     transposed_G_for_vbias_ = HamOp.transposed_G_for_vbias();
131     transposed_G_for_E_     = HamOp.transposed_G_for_E();
132     transposed_vHS_         = HamOp.transposed_vHS();
133 
134     excitedState = false;
135     std::string rediag("");
136     std::string excited_file("");
137     std::string svd_Gf("no");
138     std::string svd_O("no");
139     std::string svd_Gm("no");
140     int i_ = -1, a_ = -1;
141     nbatch    = ((number_of_devices() > 0) ? -1 : 0);
142     nbatch_qr = ((number_of_devices() > 0) ? -1 : 0);
143     if (NMO > 1024 || NAEA > 512)
144       nbatch_qr = 0;
145 
146     ParameterSet m_param;
147     m_param.add(number_of_references, "number_of_references");
148     m_param.add(number_of_references, "nrefs");
149     m_param.add(excited_file, "excited");
150     // generalize this to multi-particle excitations, how do I read a list of integers???
151     m_param.add(i_, "i");
152     m_param.add(a_, "a");
153     m_param.add(rediag, "rediag");
154     m_param.add(svd_Gf, "svd_with_Gfull");
155     m_param.add(svd_Gm, "svd_with_Gmix");
156     m_param.add(svd_O, "svd_with_Ovlp");
157     if (TG.TG_local().size() == 1)
158       m_param.add(nbatch, "nbatch");
159     if (TG.TG_local().size() == 1)
160       m_param.add(nbatch_qr, "nbatch_qr");
161     m_param.put(cur);
162 
163     if (omp_get_num_threads() > 1 && (nbatch == 0))
164     {
165       app_log() << " WARNING!!!: Found OMP_NUM_THREADS > 1 with nbatch=0.\n"
166                 << "             This will lead to low performance. Set nbatch. \n";
167     }
168     std::transform(svd_Gf.begin(), svd_Gf.end(), svd_Gf.begin(), (int (*)(int))tolower);
169     if (svd_Gf == "yes" || svd_Gf == "true")
170       useSVD_in_Gfull = true;
171     std::transform(svd_Gm.begin(), svd_Gm.end(), svd_Gm.begin(), (int (*)(int))tolower);
172     if (svd_Gm == "yes" || svd_Gm == "true")
173       useSVD_in_Gmix = true;
174     std::transform(svd_O.begin(), svd_O.end(), svd_O.begin(), (int (*)(int))tolower);
175     if (svd_O == "yes" || svd_O == "true")
176       useSVD_in_Ovlp = true;
177 
178 
179     if (excited_file != "" && i_ >= 0 && a_ >= 0)
180     {
181       if (i_ < NMO && a_ < NMO)
182       {
183         if (i_ >= NAEA || a_ < NAEA)
184           APP_ABORT(" Errors: Inconsistent excited orbitals for alpha electrons. \n");
185         excitedState        = true;
186         maxOccupExtendedMat = {a_, NAEB};
187         numExcitations      = {1, 0};
188         excitations.push_back({i_, a_});
189       }
190       else if (i_ >= NMO && a_ >= NMO)
191       {
192         if (i_ >= NMO + NAEB || a_ < NMO + NAEB)
193           APP_ABORT(" Errors: Inconsistent excited orbitals for beta electrons. \n");
194         excitedState        = true;
195         maxOccupExtendedMat = {NAEA, a_ - NMO};
196         numExcitations      = {0, 1};
197         excitations.push_back({i_ - NMO, a_ - NMO});
198       }
199       else
200       {
201         APP_ABORT(" Errors: Inconsistent excited orbitals. \n");
202       }
203       stdCTensor excitedOrbMat_;
204       readWfn(excited_file, excitedOrbMat_, NMO, maxOccupExtendedMat.first, maxOccupExtendedMat.second);
205       excitedOrbMat = excitedOrbMat_;
206     }
207 
208     std::transform(rediag.begin(), rediag.end(), rediag.begin(), (int (*)(int))tolower);
209     if (rediag == "yes" || rediag == "true")
210       recompute_ci();
211   }
212 
~NOMSD()213   ~NOMSD() {}
214 
215   NOMSD(NOMSD const& other) = delete;
216   NOMSD& operator=(NOMSD const& other) = delete;
217   NOMSD(NOMSD&& other)                 = default;
218   NOMSD& operator=(NOMSD&& other) = delete;
219 
local_number_of_cholesky_vectors() const220   int local_number_of_cholesky_vectors() const { return HamOp.local_number_of_cholesky_vectors(); }
global_number_of_cholesky_vectors() const221   int global_number_of_cholesky_vectors() const { return HamOp.global_number_of_cholesky_vectors(); }
global_origin_cholesky_vector() const222   int global_origin_cholesky_vector() const { return HamOp.global_origin_cholesky_vector(); }
distribution_over_cholesky_vectors() const223   bool distribution_over_cholesky_vectors() const { return HamOp.distribution_over_cholesky_vectors(); }
224 
size_of_G_for_vbias() const225   int size_of_G_for_vbias() const { return dm_size(!compact_G_for_vbias); }
226 
transposed_G_for_vbias() const227   bool transposed_G_for_vbias() const { return transposed_G_for_vbias_; }
transposed_G_for_E() const228   bool transposed_G_for_E() const { return transposed_G_for_E_; }
transposed_vHS() const229   bool transposed_vHS() const { return transposed_vHS_; }
getWalkerType() const230   WALKER_TYPES getWalkerType() const { return walker_type; }
231 
232   template<class Vec>
233   void vMF(Vec&& v);
234 
getSlaterDetOperations()235   SlaterDetOperations* getSlaterDetOperations() { return std::addressof(SDetOp); }
getHamiltonianOperations()236   HamiltonianOperations* getHamiltonianOperations() { return std::addressof(HamOp); }
237 
238   /*
239      * local contribution to vbias for the Green functions in G
240      * G: [size_of_G_for_vbias()][nW]
241      * v: [local # Chol. Vectors][nW]
242      */
243   template<class MatG, class MatA>
vbias(const MatG & G,MatA && v,double a=1.0)244   void vbias(const MatG& G, MatA&& v, double a = 1.0)
245   {
246     if (transposed_G_for_vbias_)
247     {
248       assert(G.size(0) == v.size(1));
249       assert(G.size(1) == size_of_G_for_vbias());
250     }
251     else
252     {
253       assert(G.size(0) == size_of_G_for_vbias());
254       assert(G.size(1) == v.size(1));
255     }
256     assert(v.size(0) == HamOp.local_number_of_cholesky_vectors());
257     if (ci.size() == 1)
258     {
259       // HamOp expects a compact Gc with alpha/beta components
260       HamOp.vbias(G, std::forward<MatA>(v), a);
261     }
262     else
263     {
264       if (walker_type == CLOSED)
265         HamOp.vbias(G, std::forward<MatA>(v), a); // factor of 2 now in HamOps
266       else if (walker_type == NONCOLLINEAR)
267         HamOp.vbias(G, std::forward<MatA>(v), a);
268       else
269       {
270         if(transposed_G_for_vbias_)
271           APP_ABORT(" Error in NOMSD::vbias: transposed_G_for_vbias_ should be false. \n");
272         // HamOp expects either alpha or beta, so must be called twice
273         HamOp.vbias(G.sliced(0, NMO * NMO), std::forward<MatA>(v), a, 0.0);
274         HamOp.vbias(G.sliced(NMO * NMO, 2 * NMO * NMO), std::forward<MatA>(v), a, 1.0);
275       }
276     }
277     TG.local_barrier();
278   }
279 
280   /*
281      * local contribution to vHS for the Green functions in G
282      * X: [# chol vecs][nW]
283      * v: [NMO^2][nW]
284      */
285   template<class MatX, class MatA>
vHS(MatX && X,MatA && v,double a=1.0)286   void vHS(MatX&& X, MatA&& v, double a = 1.0)
287   {
288     assert(X.size(0) == HamOp.local_number_of_cholesky_vectors());
289     if (transposed_vHS_)
290       assert(X.size(1) == v.size(0));
291     else
292       assert(X.size(1) == v.size(1));
293     HamOp.vHS(std::forward<MatX>(X), std::forward<MatA>(v), a);
294     TG.local_barrier();
295   }
296 
297   /*
298      * Calculates the local energy and overlaps of all the walkers in the set and stores
299      * them in the wset data
300      */
301   template<class WlkSet>
Energy(WlkSet & wset)302   void Energy(WlkSet& wset)
303   {
304     int nw = wset.size();
305     StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>());
306     StaticMatrix eloc({nw, 3}, buffer_manager.get_generator().template get_allocator<ComplexType>());
307     Energy(wset, eloc, ovlp);
308     TG.local_barrier();
309     if (TG.getLocalTGRank() == 0)
310     {
311       wset.setProperty(OVLP, ovlp);
312       wset.setProperty(E1_, eloc(eloc.extension(), 0));
313       wset.setProperty(EXX_, eloc(eloc.extension(), 1));
314       wset.setProperty(EJ_, eloc(eloc.extension(), 2));
315     }
316     TG.local_barrier();
317   }
318 
319   /*
320      * Calculates the local energy and overlaps of all the walkers in the set and
321      * returns them in the appropriate data structures
322      */
323   template<class WlkSet, class Mat, class TVec>
Energy(const WlkSet & wset,Mat && E,TVec && Ov)324   void Energy(const WlkSet& wset, Mat&& E, TVec&& Ov)
325   {
326     if (TG.getNGroupsPerTG() > 1)
327       Energy_distributed(wset, std::forward<Mat>(E), std::forward<TVec>(Ov));
328     else
329       Energy_shared(wset, std::forward<Mat>(E), std::forward<TVec>(Ov));
330   }
331 
332   /*
333      * Calculates the mixed density matrix for all walkers in the walker set.
334      * Options:
335      *  - compact:   If true (default), returns compact form with Dim: [NEL*NMO],
336      *                 otherwise returns full form with Dim: [NMO*NMO].
337      *  - transpose: If false (default), returns standard form with Dim: [XXX][nW]
338      *                 otherwise returns the transpose with Dim: [nW][XXX}
339      */
340   template<class WlkSet, class MatG>
MixedDensityMatrix(const WlkSet & wset,MatG && G,bool compact=true,bool transpose=false)341   void MixedDensityMatrix(const WlkSet& wset, MatG&& G, bool compact = true, bool transpose = false)
342   {
343     int nw = wset.size();
344     StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>());
345     MixedDensityMatrix(wset, std::forward<MatG>(G), ovlp, compact, transpose);
346   }
347 
348   template<class WlkSet, class MatG, class TVec>
MixedDensityMatrix(const WlkSet & wset,MatG && G,TVec && Ov,bool compact=true,bool transpose=false)349   void MixedDensityMatrix(const WlkSet& wset, MatG&& G, TVec&& Ov, bool compact = true, bool transpose = false)
350   {
351     if (nbatch != 0)
352       MixedDensityMatrix_batched(wset, std::forward<MatG>(G), std::forward<TVec>(Ov), compact, transpose);
353     else
354       MixedDensityMatrix_shared(wset, std::forward<MatG>(G), std::forward<TVec>(Ov), compact, transpose);
355   }
356 
357   /*
358      * Calculates the density matrix with respect to a given Reference
359      * for all walkers in the walker set.
360      */
361   template<class WlkSet, class MatA, class MatB, class MatG, class TVec>
DensityMatrix(const WlkSet & wset,MatA && RefA,MatB && RefB,MatG && G,TVec && Ov,bool herm,bool compact,bool transposed)362   void DensityMatrix(const WlkSet& wset,
363                      MatA&& RefA,
364                      MatB&& RefB,
365                      MatG&& G,
366                      TVec&& Ov,
367                      bool herm,
368                      bool compact,
369                      bool transposed)
370   {
371     if (nbatch != 0)
372       DensityMatrix_batched(wset, std::forward<MatA>(RefA), std::forward<MatB>(RefB), std::forward<MatG>(G),
373                             std::forward<TVec>(Ov), herm, compact, transposed);
374     else
375       DensityMatrix_shared(wset, std::forward<MatA>(RefA), std::forward<MatB>(RefB), std::forward<MatG>(G),
376                            std::forward<TVec>(Ov), herm, compact, transposed);
377   }
378 
379   template<class MatA, class MatB, class MatG, class TVec>
DensityMatrix(std::vector<MatA> & Left,std::vector<MatB> & Right,std::vector<MatG> & G,TVec && Ov,double LogOverlapFactor,bool herm,bool compact)380   void DensityMatrix(std::vector<MatA>& Left,
381                      std::vector<MatB>& Right,
382                      std::vector<MatG>& G,
383                      TVec&& Ov,
384                      double LogOverlapFactor,
385                      bool herm,
386                      bool compact)
387   {
388     if (nbatch != 0)
389       DensityMatrix_batched(Left, Right, G, std::forward<TVec>(Ov), LogOverlapFactor, herm, compact);
390     else
391       DensityMatrix_shared(Left, Right, G, std::forward<TVec>(Ov), LogOverlapFactor, herm, compact);
392   }
393 
394   /*
395      * Calculates the walker averaged density matrix.
396      * Options:
397      *  - free_projection: If false (default), assumes using phaseless approximation
398      *                       otherwise assumes using free projection.
399      */
400   template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2>
WalkerAveragedDensityMatrix(const WlkSet & wset,CVec1 & wgt,MatG & G,CVec2 & denom,Mat1 && Ovlp,Mat2 && DMsum,bool free_projection=false,boost::multi::array_ref<ComplexType,3> * Refs=nullptr,boost::multi::array<ComplexType,2> * detR=nullptr)401   void WalkerAveragedDensityMatrix(const WlkSet& wset,
402                                    CVec1& wgt,
403                                    MatG& G,
404                                    CVec2& denom,
405                                    Mat1&& Ovlp,
406                                    Mat2&& DMsum,
407                                    bool free_projection                          = false,
408                                    boost::multi::array_ref<ComplexType, 3>* Refs = nullptr,
409                                    boost::multi::array<ComplexType, 2>* detR     = nullptr)
410   {
411     //      if(nbatch != 0)
412     //        WalkerAveragedDensityMatrix_batched(wset,wgt,G,denom,free_projection,Refs,detR);
413     //      else
414     // having problems with underflow with large (back) projection times and multidets,
415     // mainly from the normalization coming out of the orthonormalization (detR)
416     // writing specialized version for single det which doesn;t have this issues
417     if (ci.size() == 1)
418       WalkerAveragedDensityMatrix_shared_single_det(wset, wgt, G, denom, Ovlp, DMsum, free_projection, Refs, detR);
419     else
420       WalkerAveragedDensityMatrix_shared(wset, wgt, G, denom, Ovlp, DMsum, free_projection, Refs, detR);
421   }
422 
423   /*
424      * Calculates the mixed density matrix for all walkers in the walker set
425      *   with a format consistent with (and expected by) the vbias routine.
426      * This is implementation dependent, so this density matrix should ONLY be used
427      * in conjunction with vbias.
428      */
429   template<class WlkSet, class MatG>
MixedDensityMatrix_for_vbias(const WlkSet & wset,MatG && G)430   void MixedDensityMatrix_for_vbias(const WlkSet& wset, MatG&& G)
431   {
432     int nw = wset.size();
433     StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>());
434     MixedDensityMatrix(wset, std::forward<MatG>(G), ovlp, compact_G_for_vbias, transposed_G_for_vbias_);
435   }
436 
437   /*
438      * Calculates the overlaps of all walkers in the set. Returns values in arrays.
439      */
440   template<class WlkSet, class TVec>
Overlap(const WlkSet & wset,TVec && Ov)441   void Overlap(const WlkSet& wset, TVec&& Ov)
442   {
443     if (nbatch != 0)
444       Overlap_batched(wset, std::forward<TVec>(Ov));
445     else
446       Overlap_shared(wset, std::forward<TVec>(Ov));
447   }
448 
449   /*
450      * Calculates the overlaps of all walkers in the set. Updates values in wset.
451      */
452   template<class WlkSet>
Overlap(WlkSet & wset)453   void Overlap(WlkSet& wset)
454   {
455     int nw = wset.size();
456     StaticVector ovlp(iextensions<1u>{nw}, buffer_manager.get_generator().template get_allocator<ComplexType>());
457     Overlap(wset, ovlp);
458     TG.local_barrier();
459     if (TG.getLocalTGRank() == 0)
460     {
461       wset.setProperty(OVLP, ovlp);
462     }
463     TG.local_barrier();
464   }
465 
466   /*
467      * Orthogonalizes the Slater matrices of all walkers in the set.
468      * Options:
469      *  - bool importanceSamplingt(default=true): use algorithm appropriate for importance sampling.
470      *         This means that the determinant of the R matrix in the QR decomposition is ignored.
471      *         If false, add the determinant of R to the weight of the walker.
472      */
473   template<class WlkSet>
Orthogonalize(WlkSet & wset,bool impSamp)474   void Orthogonalize(WlkSet& wset, bool impSamp)
475   {
476     if (not excitedState && (nbatch_qr != 0))
477       Orthogonalize_batched(wset, impSamp);
478     else
479       Orthogonalize_shared(wset, impSamp);
480   }
481 
482   /*
483      * Orthogonalizes the Slater matrix of a walker in an excited state calculation.
484      */
485   template<class Mat>
486   void OrthogonalizeExcited(Mat&& A, SpinTypes spin, double LogOverlapFactor);
487 
488 
489   /*
490      * Returns the number of reference Slater Matrices needed for back propagation.
491      */
number_of_references_for_back_propagation() const492   int number_of_references_for_back_propagation() const
493   {
494     if (number_of_references > 0)
495       return number_of_references;
496     else
497       return ((walker_type == COLLINEAR) ? OrbMats.size() / 2 : OrbMats.size());
498   }
499 
getReferenceWeight(int i) const500   ComplexType getReferenceWeight(int i) const { return ci[i]; }
501 
502   /*
503      * Returns the reference Slater Matrices needed for back propagation.
504      */
505   template<class Mat, class Ptr = ComplexType*>
getReferencesForBackPropagation(Mat && A)506   void getReferencesForBackPropagation(Mat&& A)
507   {
508     static_assert(std::decay<Mat>::type::dimensionality == 2, "Wrong dimensionality");
509     int ndet = number_of_references_for_back_propagation();
510     assert(A.size(0) == ndet);
511     if (RefOrbMats.size(0) == 0)
512     {
513       TG.Node().barrier(); // for safety
514       int nrow(NMO * ((walker_type == NONCOLLINEAR) ? 2 : 1));
515       int ncol(NAEA + ((walker_type == CLOSED) ? 0 : NAEB)); //careful here, spins are stored contiguously
516       RefOrbMats.reextent({ndet, nrow * ncol});
517       TG.Node().barrier(); // for safety
518       if (TG.Node().root())
519       {
520         if (walker_type != COLLINEAR)
521         {
522           for (int i = 0; i < ndet; ++i)
523           {
524             boost::multi::array_ref<ComplexType, 2> A_(to_address(RefOrbMats[i].origin()), {nrow, ncol});
525             ma::Matrix2MAREF('H', OrbMats[i], A_);
526           }
527         }
528         else
529         {
530           for (int i = 0; i < ndet; ++i)
531           {
532             boost::multi::array_ref<ComplexType, 2> A_(to_address(RefOrbMats[i].origin()), {NMO, NAEA});
533             ma::Matrix2MAREF('H', OrbMats[2 * i], A_);
534             boost::multi::array_ref<ComplexType, 2> B_(A_.origin() + A_.num_elements(), {NMO, NAEB});
535             ma::Matrix2MAREF('H', OrbMats[2 * i + 1], B_);
536           }
537         }
538       }                    // TG.Node().root()
539       TG.Node().barrier(); // for safety
540     }
541     assert(RefOrbMats.size(0) == ndet);
542     assert(RefOrbMats.size(1) == A.size(1));
543     auto&& RefOrbMats_(boost::multi::static_array_cast<ComplexType, ComplexType*>(RefOrbMats));
544     auto&& A_(boost::multi::static_array_cast<ComplexType, Ptr>(A));
545     using std::copy_n;
546     int n0, n1;
547     std::tie(n0, n1) = FairDivideBoundary(TG.getLocalTGRank(), int(A.size(1)), TG.getNCoresPerTG());
548     for (int i = 0; i < ndet; i++)
549       copy_n(RefOrbMats_[i].origin() + n0, n1 - n0, A_[i].origin() + n0);
550     TG.TG_local().barrier();
551   }
552 
553 protected:
554   TaskGroup_& TG;
555 
556   Allocator alloc_;
557   Allocator_shared alloc_shared_;
558 
559   DeviceBufferManager buffer_manager;
560   LocalTGBufferManager shm_buffer_manager;
561 
562   //SlaterDetOperations_shared<ComplexType> SDetOp;
563   SlaterDetOperations SDetOp;
564 
565   HamiltonianOperations HamOp;
566 
567   std::vector<ComplexType> ci;
568 
569   // eventually switched from CMatrix to SMHSparseMatrix(node)
570   std::vector<devPsiT> OrbMats;
571   mpi3CMatrix RefOrbMats;
572 
573   std::unique_ptr<shared_mutex> mutex;
574 
575   // in both cases below: closed_shell=0, UHF/ROHF=1, GHF=2
576   WALKER_TYPES walker_type;
577   int nspins;
578 
579   int number_of_references;
580 
581   int nbatch;
582   int nbatch_qr;
583   bool useSVD_in_Ovlp  = false;
584   bool useSVD_in_Gmix  = false;
585   bool useSVD_in_Gfull = false;
586 
587   bool compact_G_for_vbias;
588 
589   // in the 3 cases, true means [nwalk][...], false means [...][nwalk]
590   bool transposed_G_for_vbias_;
591   bool transposed_G_for_E_;
592   bool transposed_vHS_;
593 
594   ValueType NuclearCoulombEnergy;
595 
596   // not elegant, but reasonable for now
597   int last_number_extra_tasks;
598   int last_task_index;
599 
600   // shared_communicator for parallel work within TG_local()
601   //std::unique_ptr<shared_communicator> local_group_comm;
602   shared_communicator local_group_comm;
603   std::unique_ptr<mpi3CVector> shmbuff_for_G;
604 
605   // excited states
606   bool excitedState;
607   std::vector<std::pair<int, int>> excitations;
608   CTensor excitedOrbMat;
609   CMatrix extendedMatAlpha;
610   CMatrix extendedMatBeta;
611   std::pair<int, int> maxOccupExtendedMat;
612   std::pair<int, int> numExcitations;
613 
614   void recompute_ci();
615 
616   /*
617      * Computes the density matrix with respect to a given reference.
618      * Intended to be used in combination with the energy evaluation routine.
619      * G and Ov are expected to be in shared memory.
620      */
621   template<class WlkSet, class MatA, class MatB, class MatG, class TVec>
622   void DensityMatrix_shared(const WlkSet& wset,
623                             MatA&& RefsA,
624                             MatB&& RefsB,
625                             MatG&& G,
626                             TVec&& Ov,
627                             bool herm,
628                             bool compact,
629                             bool transposed);
630 
631   template<class WlkSet, class MatA, class MatB, class MatG, class TVec>
632   void DensityMatrix_batched(const WlkSet& wset,
633                              MatA&& RefsA,
634                              MatB&& RefsB,
635                              MatG&& G,
636                              TVec&& Ov,
637                              bool herm,
638                              bool compact,
639                              bool transposed);
640 
641   template<class MatA, class MatB, class MatG, class TVec>
642   void DensityMatrix_shared(std::vector<MatA>& Left,
643                             std::vector<MatB>& Right,
644                             std::vector<MatG>& G,
645                             TVec&& Ov,
646                             double LogOverlapFactor,
647                             bool herm,
648                             bool compact);
649 
650   template<class MatA, class MatB, class MatG, class TVec>
651   void DensityMatrix_batched(std::vector<MatA>& Left,
652                              std::vector<MatB>& Right,
653                              std::vector<MatG>& G,
654                              TVec&& Ov,
655                              double LogOverlapFactor,
656                              bool herm,
657                              bool compact);
658 
659   template<class MatSM, class MatG, class TVec>
660   void MixedDensityMatrix_for_E_from_SM(const MatSM& SM, MatG&& G, TVec&& Ov, int nd, double LogOverlapFactor);
661 
662   /*
663      * Calculates the local energy and overlaps of all the walkers in the set and
664      * returns them in the appropriate data structures
665      */
666   template<class WlkSet, class Mat, class TVec>
667   void Energy_shared(const WlkSet& wset, Mat&& E, TVec&& Ov);
668 
669   template<class WlkSet, class MatG, class TVec>
670   void MixedDensityMatrix_shared(const WlkSet& wset, MatG&& G, TVec&& Ov, bool compact = true, bool transpose = false);
671 
672   template<class WlkSet, class MatG, class TVec>
673   void MixedDensityMatrix_batched(const WlkSet& wset, MatG&& G, TVec&& Ov, bool compact = true, bool transpose = false);
674 
675   template<class WlkSet, class TVec>
676   void Overlap_shared(const WlkSet& wset, TVec&& Ov);
677 
678   template<class WlkSet, class TVec>
679   void Overlap_batched(const WlkSet& wset, TVec&& Ov);
680 
681   template<class WlkSet>
682   void Orthogonalize_batched(WlkSet& wset, bool impSamp);
683 
684   template<class WlkSet>
685   void Orthogonalize_shared(WlkSet& wset, bool impSamp);
686 
687   /*
688      * Calculates the local energy and overlaps of all the walkers in the set and
689      * returns them in the appropriate data structures
690      */
691   template<class WlkSet, class Mat, class TVec>
Energy_distributed(const WlkSet & wset,Mat && E,TVec && Ov)692   void Energy_distributed(const WlkSet& wset, Mat&& E, TVec&& Ov)
693   {
694     if (ci.size() == 1)
695       Energy_distributed_singleDet(wset, std::forward<Mat>(E), std::forward<TVec>(Ov));
696     else
697       Energy_distributed_multiDet(wset, std::forward<Mat>(E), std::forward<TVec>(Ov));
698   }
699 
700   template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2>
701   void WalkerAveragedDensityMatrix_batched(const WlkSet& wset,
702                                            CVec1& wgt,
703                                            MatG& G,
704                                            CVec2& denom,
705                                            Mat1&& Ovlp,
706                                            Mat2&& DMsum,
707                                            bool free_projection,
708                                            boost::multi::array_ref<ComplexType, 3>* Refs,
709                                            boost::multi::array<ComplexType, 2>* detR);
710 
711   template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2>
712   void WalkerAveragedDensityMatrix_shared(const WlkSet& wset,
713                                           CVec1& wgt,
714                                           MatG& G,
715                                           CVec2& denom,
716                                           Mat1&& Ovlp,
717                                           Mat2&& DMsum,
718                                           bool free_projection,
719                                           boost::multi::array_ref<ComplexType, 3>* Refs,
720                                           boost::multi::array<ComplexType, 2>* detR);
721 
722   template<class WlkSet, class MatG, class CVec1, class CVec2, class Mat1, class Mat2>
723   void WalkerAveragedDensityMatrix_shared_single_det(const WlkSet& wset,
724                                                      CVec1& wgt,
725                                                      MatG& G,
726                                                      CVec2& denom,
727                                                      Mat1&& Ovlp,
728                                                      Mat2&& DMsum,
729                                                      bool free_projection,
730                                                      boost::multi::array_ref<ComplexType, 3>* Refs,
731                                                      boost::multi::array<ComplexType, 2>* detR);
732 
733   template<class WlkSet, class Mat, class TVec>
734   void Energy_distributed_singleDet(const WlkSet& wset, Mat&& E, TVec&& Ov);
735 
736   template<class WlkSet, class Mat, class TVec>
737   void Energy_distributed_multiDet(const WlkSet& wset, Mat&& E, TVec&& Ov);
738 
dm_size(bool full) const739   int dm_size(bool full) const
740   {
741     switch (walker_type)
742     {
743     case CLOSED: // closed-shell RHF
744       return (full) ? (NMO * NMO) : (NAEA * NMO);
745       break;
746     case COLLINEAR:
747       return (full) ? (2 * NMO * NMO) : ((NAEA + NAEB) * NMO);
748       break;
749     case NONCOLLINEAR:
750       return (full) ? (4 * NMO * NMO) : (NAEA * 2 * NMO);
751       break;
752     default:
753       APP_ABORT(" Error: Unknown walker_type in dm_size. \n");
754       return -1;
755     }
756   }
757   // dimensions for each component of the DM.
dm_dims(bool full,SpinTypes sp=Alpha) const758   std::pair<int, int> dm_dims(bool full, SpinTypes sp = Alpha) const
759   {
760     using arr = std::pair<int, int>;
761     switch (walker_type)
762     {
763     case CLOSED: // closed-shell RHF
764       return (full) ? (arr{NMO, NMO}) : (arr{NAEA, NMO});
765       break;
766     case COLLINEAR:
767       return (full) ? (arr{NMO, NMO}) : ((sp == Alpha) ? (arr{NAEA, NMO}) : (arr{NAEB, NMO}));
768       break;
769     case NONCOLLINEAR:
770       return (full) ? (arr{2 * NMO, 2 * NMO}) : (arr{NAEA, 2 * NMO});
771       break;
772     default:
773       APP_ABORT(" Error: Unknown walker_type in dm_size. \n");
774       return arr{-1, -1};
775     }
776   }
777 };
778 
779 } // namespace afqmc
780 
781 } // namespace qmcplusplus
782 
783 #include "AFQMC/Wavefunctions/NOMSD.icc"
784 
785 #endif
786