1 #include <cstdlib>
2 #include <algorithm>
3 #include <complex>
4 #include <iostream>
5 #include <fstream>
6 #include <iomanip>
7 #include <map>
8 #include <utility>
9 #include <vector>
10 #include <numeric>
11 #include <functional>
12 #if defined(USE_MPI)
13 #include <mpi.h>
14 #endif
15
16 #include "Configuration.h"
17 #include "type_traits/container_traits_multi.h"
18 #include "hdf/hdf_multi.h"
19 #include "hdf/hdf_archive.h"
20
21 #include "AFQMC/config.h"
22 #include "AFQMC/Utilities/Utils.hpp"
23 #include "AFQMC/Utilities/kp_utilities.hpp"
24 #include "RealDenseHamiltonian_v2.h"
25 #include "AFQMC/SlaterDeterminantOperations/rotate.hpp"
26
27 namespace qmcplusplus
28 {
29 namespace afqmc
30 {
31 #ifndef QMC_COMPLEX
32
getHamiltonianOperations(bool pureSD,bool addCoulomb,WALKER_TYPES type,std::vector<PsiT_Matrix> & PsiT,double cutvn,double cutv2,TaskGroup_ & TGprop,TaskGroup_ & TGwfn,hdf_archive & hdf_restart)33 HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pureSD,
34 bool addCoulomb,
35 WALKER_TYPES type,
36 std::vector<PsiT_Matrix>& PsiT,
37 double cutvn,
38 double cutv2,
39 TaskGroup_& TGprop,
40 TaskGroup_& TGwfn,
41 hdf_archive& hdf_restart)
42 {
43 using shmIMatrix = boost::multi::array<int, 2, shared_allocator<int>>;
44 using shmCVector = boost::multi::array<ComplexType, 1, shared_allocator<ComplexType>>;
45 using shmCMatrix = boost::multi::array<ComplexType, 2, shared_allocator<ComplexType>>;
46 using shmRMatrix = boost::multi::array<RealType, 2, shared_allocator<RealType>>;
47 using shmCTensor = boost::multi::array<ComplexType, 3, shared_allocator<ComplexType>>;
48 using shmSpMatrix = boost::multi::array<SPComplexType, 2, shared_allocator<SPComplexType>>;
49 using shmSpRMatrix = boost::multi::array<SPRealType, 2, shared_allocator<SPRealType>>;
50 using shmSp3Tensor = boost::multi::array<SPComplexType, 3, shared_allocator<SPComplexType>>;
51 using IVector = boost::multi::array<int, 1>;
52 using CMatrix = boost::multi::array<ComplexType, 2>;
53 using SpMatrix = boost::multi::array<SPComplexType, 2>;
54 using SpVector_ref = boost::multi::array_ref<SPComplexType, 1>;
55 using SpMatrix_ref = boost::multi::array_ref<SPComplexType, 2>;
56 using SpRMatrix = boost::multi::array<SPRealType, 2>;
57 using SpRMatrix_ref = boost::multi::array_ref<SPRealType, 2>;
58 using CMatrix_ref = boost::multi::array_ref<ComplexType, 2>;
59 using RMatrix_ref = boost::multi::array_ref<RealType, 2>;
60 using Sp3Tensor_ref = boost::multi::array_ref<SPComplexType, 3>;
61
62 if (type == COLLINEAR)
63 assert(PsiT.size() % 2 == 0);
64 int nspins = ((type != COLLINEAR) ? 1 : 2);
65 int ndet = PsiT.size() / nspins;
66 int nup = PsiT[0].size(0);
67 int ndown = 0;
68 if (nspins == 2)
69 ndown = PsiT[1].size(0);
70 int NEL = nup + ndown;
71
72 // distribute work over equivalent nodes in TGprop.TG() across TG.Global()
73 auto Qcomm(TG.Global().split(TGprop.getLocalGroupNumber(), TG.Global().rank()));
74 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
75 auto distNode(TG.Node().split(TGprop.getLocalGroupNumber(), TG.Node().rank()));
76 #else
77 auto distNode(TG.Node().split(0, TG.Node().rank()));
78 #endif
79 auto Qcomm_roots(Qcomm.split(distNode.rank(), Qcomm.rank()));
80 auto distNode_roots(TG.Global().split(distNode.rank(), TG.Global().rank()));
81
82 hdf_archive dump(TG.Global());
83 // right now only Node.root() reads
84 if (distNode.root())
85 {
86 if (!dump.open(fileName, H5F_ACC_RDONLY))
87 {
88 app_error() << " Error opening integral file in THCHamiltonian. \n";
89 APP_ABORT("");
90 }
91 if (!dump.push("Hamiltonian", false))
92 {
93 app_error() << " Error in THCHamiltonian::getHamiltonianOperations():"
94 << " Group not Hamiltonian found. \n";
95 APP_ABORT("");
96 }
97 }
98
99 std::vector<int> Idata(8);
100 if (TG.Global().root())
101 {
102 if (!dump.readEntry(Idata, "dims"))
103 {
104 app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
105 << " Problems reading dims. \n";
106 APP_ABORT("");
107 }
108 }
109 TG.Global().broadcast_n(Idata.begin(), 8, 0);
110
111 ValueType E0;
112 if (TG.Global().root())
113 {
114 std::vector<RealType> E_(2);
115 if (!dump.readEntry(E_, "Energies"))
116 {
117 app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
118 << " Problems reading Energies. \n";
119 APP_ABORT("");
120 }
121 E0 = E_[0] + E_[1];
122 }
123 TG.Global().broadcast_n(&E0, 1, 0);
124
125 int global_ncvecs = Idata[7];
126 int nc0, ncN;
127 int node_number = TGprop.getLocalGroupNumber();
128 int nnodes_per_TG = TGprop.getNGroupsPerTG();
129 std::tie(nc0, ncN) = FairDivideBoundary(node_number, global_ncvecs, nnodes_per_TG);
130 int local_ncv = ncN - nc0;
131
132 shmRMatrix H1({NMO, NMO}, shared_allocator<RealType>{TG.Node()});
133 shmSpRMatrix Likn({NMO * NMO, local_ncv}, shared_allocator<SPRealType>{distNode});
134
135 if (TG.Node().root())
136 {
137 // now read H1, use ref to avoid issues with shared pointers!
138 std::vector<int> shape;
139 if (dump.getShape<boost::multi::array<RealType, 2>>("hcore", shape))
140 {
141 if (shape.size() == 3)
142 {
143 app_error() << " Found complex one-body integrals in RealDenseHamiltonian_v2::getHamiltonianOperations().\n";
144 app_error() << " Please generate real integrals.\n";
145 APP_ABORT("");
146 }
147 }
148 RMatrix_ref h1_(to_address(H1.origin()), H1.extensions());
149 if (!dump.readEntry(h1_, std::string("hcore")))
150 {
151 app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
152 << " Problems reading /Hamiltonian/hcore. \n";
153 APP_ABORT("");
154 }
155 }
156 if (distNode.root())
157 {
158 // read L
159 if (!dump.push("DenseFactorized", false))
160 {
161 app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
162 << " Group DenseFactorized not found. \n";
163 APP_ABORT("");
164 }
165 SpRMatrix_ref L(to_address(Likn.origin()), Likn.extensions());
166 hyperslab_proxy<SpRMatrix_ref, 2> hslab(L,
167 std::array<size_t, 2>{static_cast<size_t>(NMO * NMO),
168 static_cast<size_t>(global_ncvecs)},
169 std::array<size_t, 2>{static_cast<size_t>(NMO * NMO),
170 static_cast<size_t>(local_ncv)},
171 std::array<size_t, 2>{0, static_cast<size_t>(nc0)});
172 std::vector<int> shape;
173 if (dump.getShape<boost::multi::array<RealType, 2>>("L", shape))
174 {
175 if (shape.size() == 3)
176 {
177 app_log()
178 << " Error: Found complex cholesky integrals in RealDenseHamiltonian_v2::getHamiltonianOperations().\n";
179 APP_ABORT(" Please generate real integrals.\n");
180 }
181 }
182 if (!dump.readEntry(hslab, std::string("L")))
183 {
184 app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
185 << " Problems reading /Hamiltonian/DenseFactorized/L. \n";
186 APP_ABORT("");
187 }
188 if (Likn.size(0) != NMO * NMO || Likn.size(1) != local_ncv)
189 {
190 app_error() << " Error in RealDenseHamiltonian_v2::getHamiltonianOperations():"
191 << " Problems reading /Hamiltonian/DenseFactorized/L. \n"
192 << " Unexpected dimensins: " << Likn.size(0) << " " << Likn.size(1) << std::endl;
193 APP_ABORT("");
194 }
195 dump.pop();
196 }
197 TG.Node().barrier();
198
199 shmCMatrix vn0({NMO, NMO}, shared_allocator<ComplexType>{distNode});
200 shmCMatrix haj({ndet, NEL * NMO}, shared_allocator<ComplexType>{TG.Node()});
201 std::vector<shmSp3Tensor> Lnak;
202 Lnak.reserve(PsiT.size());
203 for (int nd = 0; nd < PsiT.size(); nd++)
204 Lnak.emplace_back(shmSp3Tensor({local_ncv, PsiT[nd].size(0), NMO}, shared_allocator<SPComplexType>{distNode}));
205 int nrow = NEL;
206 if (ndet > 1)
207 nrow = 0; // not used if ndet>1
208 TG.Node().barrier();
209
210 // for simplicity
211 CMatrix H1C({NMO, NMO});
212 copy_n_cast(to_address(H1.origin()), NMO * NMO, H1C.origin());
213
214 int nt = 0;
215 for (int nd = 0; nd < ndet; nd++)
216 {
217 // haj and add half-transformed right-handed rotation for Q=0
218 if (nd % TG.Node().size() != TG.Node().rank())
219 continue;
220 if (type == COLLINEAR)
221 {
222 CMatrix_ref haj_r(to_address(haj[nd].origin()), {nup, NMO});
223 ma::product(PsiT[2 * nd], H1C, haj_r);
224 CMatrix_ref hbj_r(to_address(haj[nd].origin()) + (nup * NMO), {ndown, NMO});
225 if (ndown > 0)
226 ma::product(PsiT[2 * nd + 1], H1C, hbj_r);
227 }
228 else
229 {
230 CMatrix_ref haj_r(to_address(haj[nd].origin()), {nup, NMO});
231 ma::product(ComplexType(2.0), PsiT[nd], H1C, ComplexType(0.0), haj_r);
232 }
233 }
234 // Generate Lnak
235 {
236 CMatrix lik({NMO, NMO});
237 CMatrix lak({nup, NMO});
238 for (int nd = 0; nd < ndet; nd++)
239 {
240 // all nodes across Qcomm share same segment {nc0,ncN}
241 for (int nc = 0; nc < local_ncv; nc++)
242 {
243 if (nc % Qcomm.size() != Qcomm.rank())
244 continue;
245 for (int i = 0, ik = 0; i < NMO; i++)
246 for (int k = 0; k < NMO; k++, ik++)
247 lik[i][k] = ComplexType(static_cast<RealType>(Likn[ik][nc]), 0.0);
248 ma::product(PsiT[nspins * nd], lik, lak);
249 copy_n_cast(lak.origin(), nup * NMO, to_address(Lnak[nspins * nd][nc].origin()));
250 if (type == COLLINEAR)
251 {
252 ma::product(PsiT[2 * nd + 1], lik, lak.sliced(0, ndown));
253 copy_n_cast(lak.origin(), ndown * NMO, to_address(Lnak[2 * nd + 1][nc].origin()));
254 }
255 }
256 }
257 }
258 TG.Global().barrier();
259 if (distNode.root())
260 {
261 for (auto& v : Lnak)
262 Qcomm_roots.all_reduce_in_place_n(to_address(v.origin()), v.num_elements(), std::plus<>());
263 std::fill_n(to_address(vn0.origin()), vn0.num_elements(), ComplexType(0.0));
264 }
265 TG.Node().barrier();
266
267 // calculate vn0(i,l) = -0.5 sum_j sum_n L[i][j][n] L[j][l][n] = -0.5 sum_j sum_n L[i][j][n] L[l][j][n]
268 {
269 int i0, iN;
270 std::tie(i0, iN) = FairDivideBoundary(Qcomm.rank(), NMO, Qcomm.size());
271 if (iN > i0)
272 {
273 SpRMatrix v_({iN - i0, NMO});
274 SpRMatrix_ref Lijn(to_address(Likn.origin()), {NMO, NMO * local_ncv});
275 ma::product(-0.5, Lijn.sliced(i0, iN), ma::T(Lijn), 0.0, v_);
276 copy_n_cast(v_.origin(), v_.num_elements(), to_address(vn0[i0].origin()));
277 }
278 }
279 TG.Node().barrier();
280
281 if (distNode.root())
282 {
283 distNode_roots.all_reduce_in_place_n(to_address(vn0.origin()), vn0.num_elements(), std::plus<>());
284 dump.pop();
285 dump.close();
286 }
287
288 // if(TG.TG_local().size() > 1 || not (batched=="yes" || batched == "true" ))
289 // return HamiltonianOperations(Real3IndexFactorization(TGwfn,type,std::move(H1),std::move(haj),
290 // std::move(Likn),std::move(Lakn),std::move(Lank),std::move(vn0),E0,nc0,global_ncvecs));
291 // else
292 return HamiltonianOperations(Real3IndexFactorization_batched_v2(type, TGprop, std::move(H1), std::move(haj),
293 std::move(Likn), std::move(Lnak), std::move(vn0), E0,
294 device_allocator<ComplexType>{}, nc0, global_ncvecs,
295 max_memory_MB));
296 }
297
298 #else
299
300 HamiltonianOperations RealDenseHamiltonian_v2::getHamiltonianOperations(bool pureSD,
301 bool addCoulomb,
302 WALKER_TYPES type,
303 std::vector<PsiT_Matrix>& PsiT,
304 double cutvn,
305 double cutv2,
306 TaskGroup_& TGprop,
307 TaskGroup_& TGwfn,
308 hdf_archive& hdf_restart)
309 {
310 APP_ABORT(" Error: RealDenseHamiltonian_v2 requires complex build (QMC_COMPLEX=1). \n");
311 return HamiltonianOperations();
312 }
313
314
315 #endif
316
317 } // namespace afqmc
318 } // namespace qmcplusplus
319