1 #include <cstdlib>
2 #include <algorithm>
3 #include <complex>
4 #include <iostream>
5 #include <fstream>
6 #include <iomanip>
7 #include <map>
8 #include <utility>
9 #include <vector>
10 #include <numeric>
11 #include <functional>
12 #if defined(USE_MPI)
13 #include <mpi.h>
14 #endif
15 
16 #include "Configuration.h"
17 #include "type_traits/container_traits_multi.h"
18 #include "hdf/hdf_multi.h"
19 #include "hdf/hdf_archive.h"
20 
21 #include "AFQMC/config.h"
22 #include "AFQMC/Utilities/Utils.hpp"
23 #include "AFQMC/Utilities/kp_utilities.hpp"
24 #include "RealDenseHamiltonian_v2.h"
25 #include "AFQMC/SlaterDeterminantOperations/rotate.hpp"
26 
27 namespace qmcplusplus
28 {
29 namespace afqmc
30 {
31 #ifndef QMC_COMPLEX
32 
getHamiltonianOperations(bool pureSD,bool addCoulomb,WALKER_TYPES type,std::vector<PsiT_Matrix> & PsiT,double cutvn,double cutv2,TaskGroup_ & TGprop,TaskGroup_ & TGwfn,hdf_archive & hdf_restart)33 HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pureSD,
34                                                                         bool addCoulomb,
35                                                                         WALKER_TYPES type,
36                                                                         std::vector<PsiT_Matrix>& PsiT,
37                                                                         double cutvn,
38                                                                         double cutv2,
39                                                                         TaskGroup_& TGprop,
40                                                                         TaskGroup_& TGwfn,
41                                                                         hdf_archive& hdf_restart)
42 {
43   using shmIMatrix    = boost::multi::array<int, 2, shared_allocator<int>>;
44   using shmCVector    = boost::multi::array<ComplexType, 1, shared_allocator<ComplexType>>;
45   using shmCMatrix    = boost::multi::array<ComplexType, 2, shared_allocator<ComplexType>>;
46   using shmRMatrix    = boost::multi::array<RealType, 2, shared_allocator<RealType>>;
47   using shmCTensor    = boost::multi::array<ComplexType, 3, shared_allocator<ComplexType>>;
48   using shmSpMatrix   = boost::multi::array<SPComplexType, 2, shared_allocator<SPComplexType>>;
49   using shmSpRMatrix  = boost::multi::array<SPRealType, 2, shared_allocator<SPRealType>>;
50   using shmSp3Tensor  = boost::multi::array<SPComplexType, 3, shared_allocator<SPComplexType>>;
51   using IVector       = boost::multi::array<int, 1>;
52   using CMatrix       = boost::multi::array<ComplexType, 2>;
53   using SpMatrix      = boost::multi::array<SPComplexType, 2>;
54   using SpVector_ref  = boost::multi::array_ref<SPComplexType, 1>;
55   using SpMatrix_ref  = boost::multi::array_ref<SPComplexType, 2>;
56   using SpRMatrix     = boost::multi::array<SPRealType, 2>;
57   using SpRMatrix_ref = boost::multi::array_ref<SPRealType, 2>;
58   using CMatrix_ref   = boost::multi::array_ref<ComplexType, 2>;
59   using RMatrix_ref   = boost::multi::array_ref<RealType, 2>;
60   using Sp3Tensor_ref = boost::multi::array_ref<SPComplexType, 3>;
61 
62   if (type == COLLINEAR)
63     assert(PsiT.size() % 2 == 0);
64   int nspins = ((type != COLLINEAR) ? 1 : 2);
65   int ndet   = PsiT.size() / nspins;
66   int nup    = PsiT[0].size(0);
67   int ndown  = 0;
68   if (nspins == 2)
69     ndown = PsiT[1].size(0);
70   int NEL = nup + ndown;
71 
72   // distribute work over equivalent nodes in TGprop.TG() across TG.Global()
73   auto Qcomm(TG.Global().split(TGprop.getLocalGroupNumber(), TG.Global().rank()));
74 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
75   auto distNode(TG.Node().split(TGprop.getLocalGroupNumber(), TG.Node().rank()));
76 #else
77   auto distNode(TG.Node().split(0, TG.Node().rank()));
78 #endif
79   auto Qcomm_roots(Qcomm.split(distNode.rank(), Qcomm.rank()));
80   auto distNode_roots(TG.Global().split(distNode.rank(), TG.Global().rank()));
81 
82   hdf_archive dump(TG.Global());
83   // right now only Node.root() reads
84   if (distNode.root())
85   {
86     if (!dump.open(fileName, H5F_ACC_RDONLY))
87     {
88       app_error() << " Error opening integral file in THCHamiltonian. \n";
89       APP_ABORT("");
90     }
91     if (!dump.push("Hamiltonian", false))
92     {
93       app_error() << " Error in THCHamiltonian::getHamiltonianOperations():"
94                   << " Group not Hamiltonian found. \n";
95       APP_ABORT("");
96     }
97   }
98 
99   std::vector<int> Idata(8);
100   if (TG.Global().root())
101   {
102     if (!dump.readEntry(Idata, "dims"))
103     {
104       app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
105                   << " Problems reading dims. \n";
106       APP_ABORT("");
107     }
108   }
109   TG.Global().broadcast_n(Idata.begin(), 8, 0);
110 
111   ValueType E0;
112   if (TG.Global().root())
113   {
114     std::vector<RealType> E_(2);
115     if (!dump.readEntry(E_, "Energies"))
116     {
117       app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
118                   << " Problems reading Energies. \n";
119       APP_ABORT("");
120     }
121     E0 = E_[0] + E_[1];
122   }
123   TG.Global().broadcast_n(&E0, 1, 0);
124 
125   int global_ncvecs = Idata[7];
126   int nc0, ncN;
127   int node_number    = TGprop.getLocalGroupNumber();
128   int nnodes_per_TG  = TGprop.getNGroupsPerTG();
129   std::tie(nc0, ncN) = FairDivideBoundary(node_number, global_ncvecs, nnodes_per_TG);
130   int local_ncv      = ncN - nc0;
131 
132   shmRMatrix H1({NMO, NMO}, shared_allocator<RealType>{TG.Node()});
133   shmSpRMatrix Likn({NMO * NMO, local_ncv}, shared_allocator<SPRealType>{distNode});
134 
135   if (TG.Node().root())
136   {
137     // now read H1, use ref to avoid issues with shared pointers!
138     std::vector<int> shape;
139     if (dump.getShape<boost::multi::array<RealType, 2>>("hcore", shape))
140     {
141       if (shape.size() == 3)
142       {
143         app_error() << " Found complex one-body integrals in RealDenseHamiltonian_v2::getHamiltonianOperations().\n";
144         app_error() << " Please generate real integrals.\n";
145         APP_ABORT("");
146       }
147     }
148     RMatrix_ref h1_(to_address(H1.origin()), H1.extensions());
149     if (!dump.readEntry(h1_, std::string("hcore")))
150     {
151       app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
152                   << " Problems reading /Hamiltonian/hcore. \n";
153       APP_ABORT("");
154     }
155   }
156   if (distNode.root())
157   {
158     // read L
159     if (!dump.push("DenseFactorized", false))
160     {
161       app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
162                   << " Group DenseFactorized not found. \n";
163       APP_ABORT("");
164     }
165     SpRMatrix_ref L(to_address(Likn.origin()), Likn.extensions());
166     hyperslab_proxy<SpRMatrix_ref, 2> hslab(L,
167                                             std::array<size_t, 2>{static_cast<size_t>(NMO * NMO),
168                                                                   static_cast<size_t>(global_ncvecs)},
169                                             std::array<size_t, 2>{static_cast<size_t>(NMO * NMO),
170                                                                   static_cast<size_t>(local_ncv)},
171                                             std::array<size_t, 2>{0, static_cast<size_t>(nc0)});
172     std::vector<int> shape;
173     if (dump.getShape<boost::multi::array<RealType, 2>>("L", shape))
174     {
175       if (shape.size() == 3)
176       {
177         app_log()
178             << " Error: Found complex cholesky integrals in RealDenseHamiltonian_v2::getHamiltonianOperations().\n";
179         APP_ABORT(" Please generate real integrals.\n");
180       }
181     }
182     if (!dump.readEntry(hslab, std::string("L")))
183     {
184       app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
185                   << " Problems reading /Hamiltonian/DenseFactorized/L. \n";
186       APP_ABORT("");
187     }
188     if (Likn.size(0) != NMO * NMO || Likn.size(1) != local_ncv)
189     {
190       app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
191                   << " Problems reading /Hamiltonian/DenseFactorized/L. \n"
192                   << " Unexpected dimensins: " << Likn.size(0) << " " << Likn.size(1) << std::endl;
193       APP_ABORT("");
194     }
195     dump.pop();
196   }
197   TG.Node().barrier();
198 
199   shmCMatrix vn0({NMO, NMO}, shared_allocator<ComplexType>{distNode});
200   shmCMatrix haj({ndet, NEL * NMO}, shared_allocator<ComplexType>{TG.Node()});
201   std::vector<shmSp3Tensor> Lnak;
202   Lnak.reserve(PsiT.size());
203   for (int nd = 0; nd < PsiT.size(); nd++)
204     Lnak.emplace_back(shmSp3Tensor({local_ncv, PsiT[nd].size(0), NMO}, shared_allocator<SPComplexType>{distNode}));
205   int nrow = NEL;
206   if (ndet > 1)
207     nrow = 0; // not used if ndet>1
208   TG.Node().barrier();
209 
210   // for simplicity
211   CMatrix H1C({NMO, NMO});
212   copy_n_cast(to_address(H1.origin()), NMO * NMO, H1C.origin());
213 
214   int nt = 0;
215   for (int nd = 0; nd < ndet; nd++)
216   {
217     // haj and add half-transformed right-handed rotation for Q=0
218     if (nd % TG.Node().size() != TG.Node().rank())
219       continue;
220     if (type == COLLINEAR)
221     {
222       CMatrix_ref haj_r(to_address(haj[nd].origin()), {nup, NMO});
223       ma::product(PsiT[2 * nd], H1C, haj_r);
224       CMatrix_ref hbj_r(to_address(haj[nd].origin()) + (nup * NMO), {ndown, NMO});
225       if (ndown > 0)
226         ma::product(PsiT[2 * nd + 1], H1C, hbj_r);
227     }
228     else
229     {
230       CMatrix_ref haj_r(to_address(haj[nd].origin()), {nup, NMO});
231       ma::product(ComplexType(2.0), PsiT[nd], H1C, ComplexType(0.0), haj_r);
232     }
233   }
234   // Generate Lnak
235   {
236     CMatrix lik({NMO, NMO});
237     CMatrix lak({nup, NMO});
238     for (int nd = 0; nd < ndet; nd++)
239     {
240       // all nodes across Qcomm share same segment {nc0,ncN}
241       for (int nc = 0; nc < local_ncv; nc++)
242       {
243         if (nc % Qcomm.size() != Qcomm.rank())
244           continue;
245         for (int i = 0, ik = 0; i < NMO; i++)
246           for (int k = 0; k < NMO; k++, ik++)
247             lik[i][k] = ComplexType(static_cast<RealType>(Likn[ik][nc]), 0.0);
248         ma::product(PsiT[nspins * nd], lik, lak);
249         copy_n_cast(lak.origin(), nup * NMO, to_address(Lnak[nspins * nd][nc].origin()));
250         if (type == COLLINEAR)
251         {
252           ma::product(PsiT[2 * nd + 1], lik, lak.sliced(0, ndown));
253           copy_n_cast(lak.origin(), ndown * NMO, to_address(Lnak[2 * nd + 1][nc].origin()));
254         }
255       }
256     }
257   }
258   TG.Global().barrier();
259   if (distNode.root())
260   {
261     for (auto& v : Lnak)
262       Qcomm_roots.all_reduce_in_place_n(to_address(v.origin()), v.num_elements(), std::plus<>());
263     std::fill_n(to_address(vn0.origin()), vn0.num_elements(), ComplexType(0.0));
264   }
265   TG.Node().barrier();
266 
267   // calculate vn0(i,l) = -0.5 sum_j sum_n L[i][j][n] L[j][l][n] = -0.5 sum_j sum_n L[i][j][n] L[l][j][n]
268   {
269     int i0, iN;
270     std::tie(i0, iN) = FairDivideBoundary(Qcomm.rank(), NMO, Qcomm.size());
271     if (iN > i0)
272     {
273       SpRMatrix v_({iN - i0, NMO});
274       SpRMatrix_ref Lijn(to_address(Likn.origin()), {NMO, NMO * local_ncv});
275       ma::product(-0.5, Lijn.sliced(i0, iN), ma::T(Lijn), 0.0, v_);
276       copy_n_cast(v_.origin(), v_.num_elements(), to_address(vn0[i0].origin()));
277     }
278   }
279   TG.Node().barrier();
280 
281   if (distNode.root())
282   {
283     distNode_roots.all_reduce_in_place_n(to_address(vn0.origin()), vn0.num_elements(), std::plus<>());
284     dump.pop();
285     dump.close();
286   }
287 
288   //  if(TG.TG_local().size() > 1 || not (batched=="yes" || batched == "true" ))
289   //    return HamiltonianOperations(Real3IndexFactorization(TGwfn,type,std::move(H1),std::move(haj),
290   //            std::move(Likn),std::move(Lakn),std::move(Lank),std::move(vn0),E0,nc0,global_ncvecs));
291   //  else
292   return HamiltonianOperations(Real3IndexFactorization_batched_v2(type, TGprop, std::move(H1), std::move(haj),
293                                                                   std::move(Likn), std::move(Lnak), std::move(vn0), E0,
294                                                                   device_allocator<ComplexType>{}, nc0, global_ncvecs,
295                                                                   max_memory_MB));
296 }
297 
298 #else
299 
300 HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pureSD,
301                                                                         bool addCoulomb,
302                                                                         WALKER_TYPES type,
303                                                                         std::vector<PsiT_Matrix>& PsiT,
304                                                                         double cutvn,
305                                                                         double cutv2,
306                                                                         TaskGroup_& TGprop,
307                                                                         TaskGroup_& TGwfn,
308                                                                         hdf_archive& hdf_restart)
309 {
310   APP_ABORT(" Error: RealDenseHamiltonian_v2 requires complex build (QMC_COMPLEX=1). \n");
311   return HamiltonianOperations();
312 }
313 
314 
315 #endif
316 
317 } // namespace afqmc
318 } // namespace qmcplusplus
319