1 #include <cstdlib>
2 #include <algorithm>
3 #include <complex>
4 #include <iostream>
5 #include <fstream>
6 #include <map>
7 #include <utility>
8 #include <vector>
9 #include <numeric>
10 #include <functional>
11 #if defined(USE_MPI)
12 #include <mpi.h>
13 #endif
14
15 #include "Configuration.h"
16
17 #include "AFQMC/config.h"
18 #include "AFQMC/Utilities/Utils.hpp"
19 #include "FactorizedSparseHamiltonian.h"
20 #include "AFQMC/Hamiltonians/HSPotential_Helpers.h"
21 #include "AFQMC/Hamiltonians/generateHijkl.hpp"
22 #include "AFQMC/Matrix/array_partition.hpp"
23 #include "AFQMC/Hamiltonians/rotateHamiltonian.hpp"
24 #include "AFQMC/SlaterDeterminantOperations/rotate.hpp"
25 #include "AFQMC/HamiltonianOperations/SparseTensorIO.hpp"
26
27 namespace qmcplusplus
28 {
29 namespace afqmc
30 {
halfRotatedHij(WALKER_TYPES type,PsiT_Matrix * Alpha,PsiT_Matrix * Beta)31 boost::multi::array<ComplexType, 1> FactorizedSparseHamiltonian::halfRotatedHij(WALKER_TYPES type,
32 PsiT_Matrix* Alpha,
33 PsiT_Matrix* Beta)
34 {
35 check_wavefunction_consistency(type, Alpha, Beta, NMO, NAEA, NAEB);
36 #if defined(QMC_COMPLEX)
37 return rotateHij(type, Alpha, Beta, OneBodyHamiltonian::H1);
38 #else
39 boost::multi::array<ComplexType, 2> H1_(OneBodyHamiltonian::H1);
40 return rotateHij(type, Alpha, Beta, H1_);
41 #endif
42 }
43
calculateHSPotentials(double cut,TaskGroup_ & TGprop,boost::multi::array<ComplexType,2> & vn0)44 SpVType_shm_csr_matrix FactorizedSparseHamiltonian::calculateHSPotentials(double cut,
45 TaskGroup_& TGprop,
46 boost::multi::array<ComplexType, 2>& vn0)
47 {
48 using Alloc = shared_allocator<SPValueType>;
49 if (TG.getNumberOfTGs() > 1)
50 APP_ABORT("Error: HSPotential not implemented with distributed Hamiltonian. \n");
51
52 vn0.reextent({NMO, NMO});
53 std::fill_n(vn0.data_elements(), NMO * NMO, ComplexType(0));
54 for (int i = 0, cnt = 0; i < NMO; i++)
55 for (int l = i; l < NMO; l++, cnt++)
56 {
57 if (cnt % TG.Global().size() != TG.Global().rank())
58 continue;
59 ValueType vl = ValueType(0);
60 for (int j = 0; j < NMO; j++)
61 vl += H(i, j, j, l);
62 vn0[i][l] -= 0.5 * vl;
63 if (i != l)
64 vn0[l][i] -= 0.5 * ma::conj(vl);
65 }
66 TG.Global().all_reduce_in_place_n(vn0.origin(), vn0.num_elements(), std::plus<>());
67
68 if (TG.getNumberOfTGs() > 1)
69 {
70 APP_ABORT(" Finish HS. \n");
71 return SpVType_shm_csr_matrix(tp_ul_ul{0, 0}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
72 }
73 else
74 {
75 // you always need to count since some vectors might be empty
76 auto nnz_per_cv = HamHelper::count_nnz_per_cholvec(cut, TG, V2_fact, NMO);
77
78 // count vectors and make mapping from 2*n/2*n+1 to actual value
79 std::vector<int> map_;
80 map_.reserve(nnz_per_cv.size());
81 int cnt = 0;
82 for (auto& n : nnz_per_cv)
83 map_.emplace_back((n > 0) ? (cnt++) : (-1));
84
85 // partition and
86 std::size_t cv0, cvN;
87 if (TGprop.getNGroupsPerTG() == 1)
88 { // Spvn is not distributed
89 cv0 = 0;
90 cvN = nnz_per_cv.size();
91 if (TG.Global().root())
92 {
93 app_log() << std::endl << "Partition of cholesky vector over nodes in TG: ";
94 app_log() << std::count_if(nnz_per_cv.begin(), nnz_per_cv.begin() + cvN, [](std::size_t i) { return i > 0; });
95 app_log() << std::endl;
96 app_log() << "Number of terms in Cholesky Matrix per node in TG: ";
97 app_log() << std::accumulate(nnz_per_cv.begin(), nnz_per_cv.begin() + cvN, std::size_t(0));
98 app_log() << std::endl;
99 }
100 }
101 else
102 {
103 std::vector<std::size_t> cv_boundaries(TGprop.getNGroupsPerTG() + 1);
104 simple_matrix_partition<TaskGroup_, std::size_t, double> split(V2_fact.size(0), nnz_per_cv.size(), cut);
105 // no need for all cores to do this
106 if (TG.Global().root())
107 split.partition(TGprop, false, nnz_per_cv, cv_boundaries);
108 TG.Global().broadcast_n(cv_boundaries.begin(), cv_boundaries.size());
109 cv0 = cv_boundaries[TGprop.getLocalGroupNumber()];
110 cvN = cv_boundaries[TGprop.getLocalGroupNumber() + 1];
111 // no need for all cores to do this
112 if (TG.Global().root())
113 {
114 app_log() << std::endl << "Partition of cholesky vector over nodes in TG: ";
115 for (int i = 0; i < TGprop.getNGroupsPerTG(); i++)
116 app_log() << std::count_if(nnz_per_cv.begin() + cv_boundaries[i], nnz_per_cv.begin() + cv_boundaries[i + 1],
117 [](std::size_t i) { return i > 0; })
118 << " ";
119 app_log() << std::endl;
120 app_log() << "Number of terms in Cholesky Matrix per node in TG: ";
121 for (int i = 0; i < TGprop.getNGroupsPerTG(); i++)
122 app_log() << std::accumulate(nnz_per_cv.begin() + cv_boundaries[i], nnz_per_cv.begin() + cv_boundaries[i + 1],
123 std::size_t(0))
124 << " ";
125 app_log() << std::endl << std::endl;
126 }
127 }
128
129 auto nnz_per_ik = HamHelper::count_nnz_per_ik(cut, TG, V2_fact, NMO, cv0, cvN);
130
131 std::size_t nvec =
132 std::count_if(nnz_per_cv.begin() + cv0, nnz_per_cv.begin() + cvN, [](std::size_t const& i) { return i > 0; });
133
134 std::size_t cv_origin =
135 std::count_if(nnz_per_cv.begin(), nnz_per_cv.begin() + cv0, [](std::size_t const& i) { return i > 0; });
136
137 // can build csr directly since cores work on non-overlapping rows
138 // and can use emplace_back
139 SpVType_shm_csr_matrix csr(tp_ul_ul{NMO * NMO, nvec}, tp_ul_ul{0, cv_origin}, nnz_per_ik, Alloc(TG.Node()));
140
141 HamHelper::generateHSPotential(csr, map_, cut, TG, V2_fact, NMO, cv0, cvN);
142 TG.node_barrier();
143
144 return csr;
145 }
146 }
147
halfRotatedHijkl(WALKER_TYPES type,bool addCoulomb,TaskGroup_ & TGHam,PsiT_Matrix_t<SPComplexType> * Alpha,PsiT_Matrix_t<SPComplexType> * Beta,RealType const cut)148 SpCType_shm_csr_matrix FactorizedSparseHamiltonian::halfRotatedHijkl(WALKER_TYPES type,
149 bool addCoulomb,
150 TaskGroup_& TGHam,
151 PsiT_Matrix_t<SPComplexType>* Alpha,
152 PsiT_Matrix_t<SPComplexType>* Beta,
153 RealType const cut)
154 {
155 check_wavefunction_consistency(type, Alpha, Beta, NMO, NAEA, NAEB);
156 using Alloc = shared_allocator<SPComplexType>;
157 assert(Alpha != nullptr);
158 if (type == COLLINEAR)
159 assert(Beta != nullptr);
160 std::size_t nr = Alpha->size(0) * Alpha->size(1);
161 if (type == COLLINEAR)
162 nr = (Alpha->size(0) + Beta->size(0)) * Alpha->size(1);
163 if (TGHam.getNGroupsPerTG() > 1)
164 {
165 using tvec = std::vector<std::tuple<int, int, SPComplexType>>;
166 tvec tmat;
167 tmat.reserve(100000); // reserve some space
168 rotateHijkl<tvec>(factorizedHalfRotationType, type, addCoulomb, TG, tmat, Alpha, Beta, V2_fact, cut,
169 maximum_buffer_size, false, false);
170 TG.node_barrier();
171 return csr::shm::construct_distributed_csr_matrix_from_distributed_containers<SpCType_shm_csr_matrix>(tmat, nr, nr,
172 TGHam);
173 }
174 else
175 {
176 using ucsr_mat = SpCType_shm_csr_matrix::base;
177 ucsr_mat ucsr(tp_ul_ul{nr, nr}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
178 if (TG.getTotalNodes() > 1)
179 rotateHijkl<ucsr_mat>(factorizedHalfRotationType, type, addCoulomb, TG, ucsr, Alpha, Beta, V2_fact, cut,
180 maximum_buffer_size, true, true);
181 else
182 rotateHijkl_single_node<ucsr_mat>(factorizedHalfRotationType, type, addCoulomb, TG, ucsr, Alpha, Beta, V2_fact,
183 cut, maximum_buffer_size, true);
184 TG.node_barrier();
185 return csr::shm::construct_csr_matrix_from_distributed_ucsr<SpCType_shm_csr_matrix, TaskGroup_>(std::move(ucsr),
186 TG);
187 }
188 }
189
generateHijkl(WALKER_TYPES type,bool addCoulomb,TaskGroup_ & TGwfn,std::map<IndexType,std::pair<bool,IndexType>> & occ_a,std::map<IndexType,std::pair<bool,IndexType>> & occ_b,RealType const cut)190 SpVType_shm_csr_matrix FactorizedSparseHamiltonian::generateHijkl(
191 WALKER_TYPES type,
192 bool addCoulomb,
193 TaskGroup_& TGwfn,
194 std::map<IndexType, std::pair<bool, IndexType>>& occ_a,
195 std::map<IndexType, std::pair<bool, IndexType>>& occ_b,
196 RealType const cut)
197 {
198 using Alloc = shared_allocator<SPValueType>;
199 int nel = 0;
200 for (auto& i : occ_a)
201 if (i.second.first)
202 nel++;
203 if (type != CLOSED)
204 for (auto& i : occ_b)
205 if (i.second.first)
206 nel++;
207 std::size_t nr = nel * NMO;
208 if (type == NONCOLLINEAR)
209 nr *= 2;
210 if (TGwfn.getNGroupsPerTG() > 1)
211 {
212 APP_ABORT("Finish \n");
213 return SpVType_shm_csr_matrix(tp_ul_ul{0, 0}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
214 }
215 else
216 {
217 using ucsr_mat = SpVType_shm_csr_matrix::base;
218 using wrapper = csr::matrix_emplace_wrapper<ucsr_mat>;
219 using namespace std::placeholders;
220 ucsr_mat ucsr(tp_ul_ul{nr, nr}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
221 wrapper ucsr_wrapper(ucsr, TG.Node());
222 HamHelper::generateHijkl(type, addCoulomb, TG, ucsr_wrapper, NMO, occ_a, occ_b, *this, true, true, cut);
223 ucsr_wrapper.push_buffer();
224 TG.node_barrier();
225 return csr::shm::construct_csr_matrix_from_distributed_ucsr<SpVType_shm_csr_matrix, TaskGroup_>(std::move(ucsr),
226 TG);
227 }
228 }
229
getHamiltonianOperations(bool pureSD,bool addCoulomb,WALKER_TYPES type,std::vector<PsiT_Matrix> & PsiT,double cutvn,double cutv2,TaskGroup_ & TGprop,TaskGroup_ & TGwfn,hdf_archive & dump)230 HamiltonianOperations FactorizedSparseHamiltonian::getHamiltonianOperations(bool pureSD,
231 bool addCoulomb,
232 WALKER_TYPES type,
233 std::vector<PsiT_Matrix>& PsiT,
234 double cutvn,
235 double cutv2,
236 TaskGroup_& TGprop,
237 TaskGroup_& TGwfn,
238 hdf_archive& dump)
239 {
240 if (type == COLLINEAR)
241 assert(PsiT.size() % 2 == 0);
242
243 #if defined(MIXED_PRECISION)
244 auto PsiTsp(csr::shm::CSRvector_to_single_precision<PsiT_Matrix_t<SPComplexType>>(PsiT));
245 #else
246 auto& PsiTsp(PsiT);
247 #endif
248
249 // hack until parallel hdf is in place
250 bool write_hdf = false;
251 if (TGwfn.Global().root())
252 write_hdf = !dump.closed();
253 // if(TGwfn.Global().root()) write_hdf = (dump.file_id != hdf_archive::is_closed);
254 TGwfn.Global().broadcast_value(write_hdf);
255
256 boost::multi::array<ComplexType, 2> vn0({NMO, NMO});
257 auto Spvn(calculateHSPotentials(cutvn, TGprop, vn0));
258 auto Spvnview(csr::shm::local_balanced_partition(Spvn, TGprop));
259
260 // trick: the last node always knows what the total # of chol vecs is
261 int global_ncvecs = 0;
262 if (TG.getNodeID() == TG.getTotalNodes() - 1 && TG.getCoreID() == 0)
263 global_ncvecs = Spvn.global_origin()[1] + Spvn.size(1);
264 global_ncvecs = (TG.Global() += global_ncvecs);
265
266 ValueType E0 = OneBodyHamiltonian::NuclearCoulombEnergy + OneBodyHamiltonian::FrozenCoreEnergy;
267
268 // dense one body hamiltonian
269 auto H1 = getH1();
270
271 // several posibilities
272 int ndet = ((type != COLLINEAR) ? (PsiT.size()) : (PsiT.size() / 2));
273 // SparseTensor<Integrals_Type, SpvnT_Type>
274 if (ndet == 1)
275 {
276 std::vector<boost::multi::array<ComplexType, 1>> hij;
277 hij.reserve(ndet);
278 hij.emplace_back(halfRotatedHij(type, &PsiT[0], ((type == COLLINEAR) ? (&PsiT[1]) : (&PsiT[0]))));
279 std::vector<SpCType_shm_csr_matrix> SpvnT;
280 using matrix_view = typename SpCType_shm_csr_matrix::template matrix_view<int>;
281 std::vector<matrix_view> SpvnTview;
282 SpvnT.emplace_back(
283 sparse_rotate::halfRotateCholeskyMatrixForBias<SPComplexType>(type, TGprop, &PsiTsp[0],
284 ((type == COLLINEAR) ? (&PsiTsp[1])
285 : (&PsiTsp[0])),
286 Spvn, cutv2));
287 SpvnTview.emplace_back(csr::shm::local_balanced_partition(SpvnT[0], TGprop));
288
289 // in single determinant, SpvnT is the half rotated cholesky matrix
290 if (pureSD)
291 {
292 // in pureSD: V2 is ValueType
293 using sparse_ham = SparseTensor<ValueType, ComplexType>;
294
295 if (type != CLOSED)
296 APP_ABORT(" Error: pureSD only implemented for closed shell system right now. \n");
297
298 std::map<IndexType, std::pair<bool, IndexType>> occ_a;
299 for (int i = 0; i < NMO; i++)
300 occ_a[i] = {false, 0};
301 auto nel = PsiT[0].size(0);
302 for (std::size_t i = 0; i < nel; i++)
303 {
304 if (PsiT[0].num_non_zero_elements(i) != 1)
305 APP_ABORT(" Error: PsiT is not of pureSD form: Too many nnz. \n");
306 auto val = PsiT[0].non_zero_values_data(i)[0];
307 if (std::abs(val - ValueType(1.0)) > 1e-8)
308 APP_ABORT(" Error: PsiT is not of pureSD form: Coeff != 1.0. \n");
309 auto orb = PsiT[0].non_zero_indices2_data(i)[0];
310 occ_a[orb] = {true, IndexType(i)};
311 }
312
313 std::vector<SpVType_shm_csr_matrix> V2;
314 V2.reserve(ndet);
315 V2.emplace_back(generateHijkl(type, addCoulomb, TGwfn, occ_a, occ_a, cutv2));
316 std::vector<SpVType_shm_csr_matrix::template matrix_view<int>> V2view;
317 V2view.reserve(ndet);
318 V2view.emplace_back(csr::shm::local_balanced_partition(V2[0], TGwfn));
319
320 if (write_hdf)
321 writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 12);
322
323 return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
324 std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
325 std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
326 }
327 else
328 {
329 // in this case: V2 is ComplexType since it is half rotated
330 using sparse_ham = SparseTensor<ComplexType, ComplexType>;
331
332 std::vector<SpCType_shm_csr_matrix> V2;
333 V2.reserve(ndet);
334 V2.emplace_back(halfRotatedHijkl(type, addCoulomb, TGwfn, &PsiTsp[0],
335 ((type == COLLINEAR) ? (&PsiTsp[1]) : (&PsiTsp[0])), cutv2));
336 std::vector<SpCType_shm_csr_matrix::template matrix_view<int>> V2view;
337 V2view.reserve(ndet);
338 V2view.emplace_back(csr::shm::local_balanced_partition(V2[0], TGwfn));
339
340 if (write_hdf)
341 writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 22);
342
343 return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
344 std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
345 std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
346 }
347 }
348 else if (addCoulomb)
349 {
350 // in multi determinant with addCoulomb, SpvnT is transposed(Spvn)
351
352 std::vector<boost::multi::array<ComplexType, 1>> hij;
353 hij.reserve(ndet);
354 int skp = ((type == COLLINEAR) ? 1 : 0);
355 for (int n = 0, nd = 0; n < ndet; ++n, nd += (skp + 1))
356 hij.emplace_back(halfRotatedHij(type, &PsiT[nd], &PsiT[nd + skp]));
357 std::vector<SpVType_shm_csr_matrix> SpvnT;
358 using matrix_view = typename SpVType_shm_csr_matrix::template matrix_view<int>;
359 std::vector<matrix_view> SpvnTview;
360 SpvnT.emplace_back(csr::shm::transpose<SpVType_shm_csr_matrix>(Spvn));
361 SpvnTview.emplace_back(csr::shm::local_balanced_partition(SpvnT[0], TGprop));
362
363 if (pureSD)
364 {
365 // in pureSD: V2 is ValueType
366 using sparse_ham = SparseTensor<ValueType, ValueType>;
367
368 return HamiltonianOperations{};
369 //return HamiltonianOperations(sparse_ham(std::move(H1),std::move(hij),std::move(V2),
370 // std::move(V2view),std::move(Spvn),std::move(Spvnview),
371 // std::move(vn0),std::move(SpvnT),std::move(SpvnTview),E0,global_ncvecs));
372 }
373 else
374 {
375 // in this case: V2 is ComplexType since it is half rotated
376 using sparse_ham = SparseTensor<ComplexType, ValueType>;
377
378 std::vector<SpCType_shm_csr_matrix> V2;
379 V2.reserve(ndet);
380 for (int n = 0, nd = 0; n < ndet; ++n, nd += (skp + 1))
381 V2.emplace_back(halfRotatedHijkl(type, addCoulomb, TGwfn, &PsiTsp[nd], &PsiTsp[nd + skp], cutv2));
382 std::vector<SpCType_shm_csr_matrix::template matrix_view<int>> V2view;
383 V2view.reserve(ndet);
384 for (auto& v : V2)
385 V2view.emplace_back(csr::shm::local_balanced_partition(v, TGwfn));
386
387 if (write_hdf)
388 writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 21);
389
390 return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
391 std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
392 std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
393 }
394 }
395 else
396 { // multideterminant for PHMSD
397
398 assert(type == CLOSED);
399 std::vector<boost::multi::array<ComplexType, 1>> hij;
400 hij.reserve(ndet);
401 for (int n = 0, nd = 0; n < ndet; ++n, nd++)
402 hij.emplace_back(halfRotatedHij(type, &PsiT[nd], &PsiT[nd]));
403 std::vector<SpCType_shm_csr_matrix> SpvnT;
404 using matrix_view = typename SpCType_shm_csr_matrix::template matrix_view<int>;
405 std::vector<matrix_view> SpvnTview;
406 SpvnT.reserve(ndet);
407 for (int n = 0; n < ndet; ++n)
408 {
409 SpvnT.emplace_back(sparse_rotate::halfRotateCholeskyMatrixForBias<SPComplexType>(type, TGprop, &PsiTsp[n],
410 &PsiTsp[n], Spvn, cutv2));
411 SpvnTview.emplace_back(csr::shm::local_balanced_partition(SpvnT[n], TGprop));
412 }
413
414 if (pureSD)
415 {
416 // in pureSD: V2 is ValueType
417 using sparse_ham = SparseTensor<ValueType, ValueType>;
418
419 return HamiltonianOperations{};
420 //return HamiltonianOperations(sparse_ham(std::move(H1),std::move(hij),std::move(V2),
421 // std::move(V2view),std::move(Spvn),std::move(Spvnview),
422 // std::move(vn0),std::move(SpvnT),std::move(SpvnTview),E0,global_ncvecs));
423 }
424 else
425 {
426 // in this case: V2 is ComplexType since it is half rotated
427 using sparse_ham = SparseTensor<ComplexType, ComplexType>;
428
429 std::vector<SpCType_shm_csr_matrix> V2;
430 V2.reserve(ndet);
431 for (int n = 0, nd = 0; n < ndet; ++n, nd++)
432 V2.emplace_back(halfRotatedHijkl(type, addCoulomb, TGwfn, &PsiTsp[nd], &PsiTsp[nd], cutv2));
433 std::vector<SpCType_shm_csr_matrix::template matrix_view<int>> V2view;
434 V2view.reserve(ndet);
435 for (auto& v : V2)
436 V2view.emplace_back(csr::shm::local_balanced_partition(v, TGwfn));
437
438 if (write_hdf)
439 writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 21);
440
441 return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
442 std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
443 std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
444 }
445 }
446 }
447
448 } // namespace afqmc
449 } // namespace qmcplusplus
450