1 #include <cstdlib>
2 #include <algorithm>
3 #include <complex>
4 #include <iostream>
5 #include <fstream>
6 #include <map>
7 #include <utility>
8 #include <vector>
9 #include <numeric>
10 #include <functional>
11 #if defined(USE_MPI)
12 #include <mpi.h>
13 #endif
14 
15 #include "Configuration.h"
16 
17 #include "AFQMC/config.h"
18 #include "AFQMC/Utilities/Utils.hpp"
19 #include "FactorizedSparseHamiltonian.h"
20 #include "AFQMC/Hamiltonians/HSPotential_Helpers.h"
21 #include "AFQMC/Hamiltonians/generateHijkl.hpp"
22 #include "AFQMC/Matrix/array_partition.hpp"
23 #include "AFQMC/Hamiltonians/rotateHamiltonian.hpp"
24 #include "AFQMC/SlaterDeterminantOperations/rotate.hpp"
25 #include "AFQMC/HamiltonianOperations/SparseTensorIO.hpp"
26 
27 namespace qmcplusplus
28 {
29 namespace afqmc
30 {
halfRotatedHij(WALKER_TYPES type,PsiT_Matrix * Alpha,PsiT_Matrix * Beta)31 boost::multi::array<ComplexType, 1> FactorizedSparseHamiltonian::halfRotatedHij(WALKER_TYPES type,
32                                                                                 PsiT_Matrix* Alpha,
33                                                                                 PsiT_Matrix* Beta)
34 {
35   check_wavefunction_consistency(type, Alpha, Beta, NMO, NAEA, NAEB);
36 #if defined(QMC_COMPLEX)
37   return rotateHij(type, Alpha, Beta, OneBodyHamiltonian::H1);
38 #else
39   boost::multi::array<ComplexType, 2> H1_(OneBodyHamiltonian::H1);
40   return rotateHij(type, Alpha, Beta, H1_);
41 #endif
42 }
43 
calculateHSPotentials(double cut,TaskGroup_ & TGprop,boost::multi::array<ComplexType,2> & vn0)44 SpVType_shm_csr_matrix FactorizedSparseHamiltonian::calculateHSPotentials(double cut,
45                                                                           TaskGroup_& TGprop,
46                                                                           boost::multi::array<ComplexType, 2>& vn0)
47 {
48   using Alloc = shared_allocator<SPValueType>;
49   if (TG.getNumberOfTGs() > 1)
50     APP_ABORT("Error: HSPotential not implemented with distributed Hamiltonian. \n");
51 
52   vn0.reextent({NMO, NMO});
53   std::fill_n(vn0.data_elements(), NMO * NMO, ComplexType(0));
54   for (int i = 0, cnt = 0; i < NMO; i++)
55     for (int l = i; l < NMO; l++, cnt++)
56     {
57       if (cnt % TG.Global().size() != TG.Global().rank())
58         continue;
59       ValueType vl = ValueType(0);
60       for (int j = 0; j < NMO; j++)
61         vl += H(i, j, j, l);
62       vn0[i][l] -= 0.5 * vl;
63       if (i != l)
64         vn0[l][i] -= 0.5 * ma::conj(vl);
65     }
66   TG.Global().all_reduce_in_place_n(vn0.origin(), vn0.num_elements(), std::plus<>());
67 
68   if (TG.getNumberOfTGs() > 1)
69   {
70     APP_ABORT(" Finish HS. \n");
71     return SpVType_shm_csr_matrix(tp_ul_ul{0, 0}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
72   }
73   else
74   {
75     // you always need to count since some vectors might be empty
76     auto nnz_per_cv = HamHelper::count_nnz_per_cholvec(cut, TG, V2_fact, NMO);
77 
78     // count vectors and make mapping from 2*n/2*n+1 to actual value
79     std::vector<int> map_;
80     map_.reserve(nnz_per_cv.size());
81     int cnt = 0;
82     for (auto& n : nnz_per_cv)
83       map_.emplace_back((n > 0) ? (cnt++) : (-1));
84 
85     // partition and
86     std::size_t cv0, cvN;
87     if (TGprop.getNGroupsPerTG() == 1)
88     { // Spvn is not distributed
89       cv0 = 0;
90       cvN = nnz_per_cv.size();
91       if (TG.Global().root())
92       {
93         app_log() << std::endl << "Partition of cholesky vector over nodes in TG: ";
94         app_log() << std::count_if(nnz_per_cv.begin(), nnz_per_cv.begin() + cvN, [](std::size_t i) { return i > 0; });
95         app_log() << std::endl;
96         app_log() << "Number of terms in Cholesky Matrix per node in TG: ";
97         app_log() << std::accumulate(nnz_per_cv.begin(), nnz_per_cv.begin() + cvN, std::size_t(0));
98         app_log() << std::endl;
99       }
100     }
101     else
102     {
103       std::vector<std::size_t> cv_boundaries(TGprop.getNGroupsPerTG() + 1);
104       simple_matrix_partition<TaskGroup_, std::size_t, double> split(V2_fact.size(0), nnz_per_cv.size(), cut);
105       // no need for all cores to do this
106       if (TG.Global().root())
107         split.partition(TGprop, false, nnz_per_cv, cv_boundaries);
108       TG.Global().broadcast_n(cv_boundaries.begin(), cv_boundaries.size());
109       cv0 = cv_boundaries[TGprop.getLocalGroupNumber()];
110       cvN = cv_boundaries[TGprop.getLocalGroupNumber() + 1];
111       // no need for all cores to do this
112       if (TG.Global().root())
113       {
114         app_log() << std::endl << "Partition of cholesky vector over nodes in TG: ";
115         for (int i = 0; i < TGprop.getNGroupsPerTG(); i++)
116           app_log() << std::count_if(nnz_per_cv.begin() + cv_boundaries[i], nnz_per_cv.begin() + cv_boundaries[i + 1],
117                                      [](std::size_t i) { return i > 0; })
118                     << " ";
119         app_log() << std::endl;
120         app_log() << "Number of terms in Cholesky Matrix per node in TG: ";
121         for (int i = 0; i < TGprop.getNGroupsPerTG(); i++)
122           app_log() << std::accumulate(nnz_per_cv.begin() + cv_boundaries[i], nnz_per_cv.begin() + cv_boundaries[i + 1],
123                                        std::size_t(0))
124                     << " ";
125         app_log() << std::endl << std::endl;
126       }
127     }
128 
129     auto nnz_per_ik = HamHelper::count_nnz_per_ik(cut, TG, V2_fact, NMO, cv0, cvN);
130 
131     std::size_t nvec =
132         std::count_if(nnz_per_cv.begin() + cv0, nnz_per_cv.begin() + cvN, [](std::size_t const& i) { return i > 0; });
133 
134     std::size_t cv_origin =
135         std::count_if(nnz_per_cv.begin(), nnz_per_cv.begin() + cv0, [](std::size_t const& i) { return i > 0; });
136 
137     // can build csr directly since cores work on non-overlapping rows
138     // and can use emplace_back
139     SpVType_shm_csr_matrix csr(tp_ul_ul{NMO * NMO, nvec}, tp_ul_ul{0, cv_origin}, nnz_per_ik, Alloc(TG.Node()));
140 
141     HamHelper::generateHSPotential(csr, map_, cut, TG, V2_fact, NMO, cv0, cvN);
142     TG.node_barrier();
143 
144     return csr;
145   }
146 }
147 
halfRotatedHijkl(WALKER_TYPES type,bool addCoulomb,TaskGroup_ & TGHam,PsiT_Matrix_t<SPComplexType> * Alpha,PsiT_Matrix_t<SPComplexType> * Beta,RealType const cut)148 SpCType_shm_csr_matrix FactorizedSparseHamiltonian::halfRotatedHijkl(WALKER_TYPES type,
149                                                                      bool addCoulomb,
150                                                                      TaskGroup_& TGHam,
151                                                                      PsiT_Matrix_t<SPComplexType>* Alpha,
152                                                                      PsiT_Matrix_t<SPComplexType>* Beta,
153                                                                      RealType const cut)
154 {
155   check_wavefunction_consistency(type, Alpha, Beta, NMO, NAEA, NAEB);
156   using Alloc = shared_allocator<SPComplexType>;
157   assert(Alpha != nullptr);
158   if (type == COLLINEAR)
159     assert(Beta != nullptr);
160   std::size_t nr = Alpha->size(0) * Alpha->size(1);
161   if (type == COLLINEAR)
162     nr = (Alpha->size(0) + Beta->size(0)) * Alpha->size(1);
163   if (TGHam.getNGroupsPerTG() > 1)
164   {
165     using tvec = std::vector<std::tuple<int, int, SPComplexType>>;
166     tvec tmat;
167     tmat.reserve(100000); // reserve some space
168     rotateHijkl<tvec>(factorizedHalfRotationType, type, addCoulomb, TG, tmat, Alpha, Beta, V2_fact, cut,
169                       maximum_buffer_size, false, false);
170     TG.node_barrier();
171     return csr::shm::construct_distributed_csr_matrix_from_distributed_containers<SpCType_shm_csr_matrix>(tmat, nr, nr,
172                                                                                                           TGHam);
173   }
174   else
175   {
176     using ucsr_mat = SpCType_shm_csr_matrix::base;
177     ucsr_mat ucsr(tp_ul_ul{nr, nr}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
178     if (TG.getTotalNodes() > 1)
179       rotateHijkl<ucsr_mat>(factorizedHalfRotationType, type, addCoulomb, TG, ucsr, Alpha, Beta, V2_fact, cut,
180                             maximum_buffer_size, true, true);
181     else
182       rotateHijkl_single_node<ucsr_mat>(factorizedHalfRotationType, type, addCoulomb, TG, ucsr, Alpha, Beta, V2_fact,
183                                         cut, maximum_buffer_size, true);
184     TG.node_barrier();
185     return csr::shm::construct_csr_matrix_from_distributed_ucsr<SpCType_shm_csr_matrix, TaskGroup_>(std::move(ucsr),
186                                                                                                     TG);
187   }
188 }
189 
generateHijkl(WALKER_TYPES type,bool addCoulomb,TaskGroup_ & TGwfn,std::map<IndexType,std::pair<bool,IndexType>> & occ_a,std::map<IndexType,std::pair<bool,IndexType>> & occ_b,RealType const cut)190 SpVType_shm_csr_matrix FactorizedSparseHamiltonian::generateHijkl(
191     WALKER_TYPES type,
192     bool addCoulomb,
193     TaskGroup_& TGwfn,
194     std::map<IndexType, std::pair<bool, IndexType>>& occ_a,
195     std::map<IndexType, std::pair<bool, IndexType>>& occ_b,
196     RealType const cut)
197 {
198   using Alloc = shared_allocator<SPValueType>;
199   int nel     = 0;
200   for (auto& i : occ_a)
201     if (i.second.first)
202       nel++;
203   if (type != CLOSED)
204     for (auto& i : occ_b)
205       if (i.second.first)
206         nel++;
207   std::size_t nr = nel * NMO;
208   if (type == NONCOLLINEAR)
209     nr *= 2;
210   if (TGwfn.getNGroupsPerTG() > 1)
211   {
212     APP_ABORT("Finish \n");
213     return SpVType_shm_csr_matrix(tp_ul_ul{0, 0}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
214   }
215   else
216   {
217     using ucsr_mat = SpVType_shm_csr_matrix::base;
218     using wrapper  = csr::matrix_emplace_wrapper<ucsr_mat>;
219     using namespace std::placeholders;
220     ucsr_mat ucsr(tp_ul_ul{nr, nr}, tp_ul_ul{0, 0}, 0, Alloc(TG.Node()));
221     wrapper ucsr_wrapper(ucsr, TG.Node());
222     HamHelper::generateHijkl(type, addCoulomb, TG, ucsr_wrapper, NMO, occ_a, occ_b, *this, true, true, cut);
223     ucsr_wrapper.push_buffer();
224     TG.node_barrier();
225     return csr::shm::construct_csr_matrix_from_distributed_ucsr<SpVType_shm_csr_matrix, TaskGroup_>(std::move(ucsr),
226                                                                                                     TG);
227   }
228 }
229 
getHamiltonianOperations(bool pureSD,bool addCoulomb,WALKER_TYPES type,std::vector<PsiT_Matrix> & PsiT,double cutvn,double cutv2,TaskGroup_ & TGprop,TaskGroup_ & TGwfn,hdf_archive & dump)230 HamiltonianOperations FactorizedSparseHamiltonian::getHamiltonianOperations(bool pureSD,
231                                                                             bool addCoulomb,
232                                                                             WALKER_TYPES type,
233                                                                             std::vector<PsiT_Matrix>& PsiT,
234                                                                             double cutvn,
235                                                                             double cutv2,
236                                                                             TaskGroup_& TGprop,
237                                                                             TaskGroup_& TGwfn,
238                                                                             hdf_archive& dump)
239 {
240   if (type == COLLINEAR)
241     assert(PsiT.size() % 2 == 0);
242 
243 #if defined(MIXED_PRECISION)
244   auto PsiTsp(csr::shm::CSRvector_to_single_precision<PsiT_Matrix_t<SPComplexType>>(PsiT));
245 #else
246   auto& PsiTsp(PsiT);
247 #endif
248 
249   // hack until parallel hdf is in place
250   bool write_hdf = false;
251   if (TGwfn.Global().root())
252     write_hdf = !dump.closed();
253   //    if(TGwfn.Global().root()) write_hdf = (dump.file_id != hdf_archive::is_closed);
254   TGwfn.Global().broadcast_value(write_hdf);
255 
256   boost::multi::array<ComplexType, 2> vn0({NMO, NMO});
257   auto Spvn(calculateHSPotentials(cutvn, TGprop, vn0));
258   auto Spvnview(csr::shm::local_balanced_partition(Spvn, TGprop));
259 
260   // trick: the last node always knows what the total # of chol vecs is
261   int global_ncvecs = 0;
262   if (TG.getNodeID() == TG.getTotalNodes() - 1 && TG.getCoreID() == 0)
263     global_ncvecs = Spvn.global_origin()[1] + Spvn.size(1);
264   global_ncvecs = (TG.Global() += global_ncvecs);
265 
266   ValueType E0 = OneBodyHamiltonian::NuclearCoulombEnergy + OneBodyHamiltonian::FrozenCoreEnergy;
267 
268   // dense one body hamiltonian
269   auto H1 = getH1();
270 
271   // several posibilities
272   int ndet = ((type != COLLINEAR) ? (PsiT.size()) : (PsiT.size() / 2));
273   // SparseTensor<Integrals_Type, SpvnT_Type>
274   if (ndet == 1)
275   {
276     std::vector<boost::multi::array<ComplexType, 1>> hij;
277     hij.reserve(ndet);
278     hij.emplace_back(halfRotatedHij(type, &PsiT[0], ((type == COLLINEAR) ? (&PsiT[1]) : (&PsiT[0]))));
279     std::vector<SpCType_shm_csr_matrix> SpvnT;
280     using matrix_view = typename SpCType_shm_csr_matrix::template matrix_view<int>;
281     std::vector<matrix_view> SpvnTview;
282     SpvnT.emplace_back(
283         sparse_rotate::halfRotateCholeskyMatrixForBias<SPComplexType>(type, TGprop, &PsiTsp[0],
284                                                                       ((type == COLLINEAR) ? (&PsiTsp[1])
285                                                                                            : (&PsiTsp[0])),
286                                                                       Spvn, cutv2));
287     SpvnTview.emplace_back(csr::shm::local_balanced_partition(SpvnT[0], TGprop));
288 
289     // in single determinant, SpvnT is the half rotated cholesky matrix
290     if (pureSD)
291     {
292       // in pureSD: V2 is ValueType
293       using sparse_ham = SparseTensor<ValueType, ComplexType>;
294 
295       if (type != CLOSED)
296         APP_ABORT(" Error: pureSD only implemented for closed shell system right now. \n");
297 
298       std::map<IndexType, std::pair<bool, IndexType>> occ_a;
299       for (int i = 0; i < NMO; i++)
300         occ_a[i] = {false, 0};
301       auto nel = PsiT[0].size(0);
302       for (std::size_t i = 0; i < nel; i++)
303       {
304         if (PsiT[0].num_non_zero_elements(i) != 1)
305           APP_ABORT(" Error: PsiT is not of pureSD form: Too many nnz. \n");
306         auto val = PsiT[0].non_zero_values_data(i)[0];
307         if (std::abs(val - ValueType(1.0)) > 1e-8)
308           APP_ABORT(" Error: PsiT is not of pureSD form: Coeff != 1.0. \n");
309         auto orb   = PsiT[0].non_zero_indices2_data(i)[0];
310         occ_a[orb] = {true, IndexType(i)};
311       }
312 
313       std::vector<SpVType_shm_csr_matrix> V2;
314       V2.reserve(ndet);
315       V2.emplace_back(generateHijkl(type, addCoulomb, TGwfn, occ_a, occ_a, cutv2));
316       std::vector<SpVType_shm_csr_matrix::template matrix_view<int>> V2view;
317       V2view.reserve(ndet);
318       V2view.emplace_back(csr::shm::local_balanced_partition(V2[0], TGwfn));
319 
320       if (write_hdf)
321         writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 12);
322 
323       return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
324                                               std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
325                                               std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
326     }
327     else
328     {
329       // in this case: V2 is ComplexType since it is half rotated
330       using sparse_ham = SparseTensor<ComplexType, ComplexType>;
331 
332       std::vector<SpCType_shm_csr_matrix> V2;
333       V2.reserve(ndet);
334       V2.emplace_back(halfRotatedHijkl(type, addCoulomb, TGwfn, &PsiTsp[0],
335                                        ((type == COLLINEAR) ? (&PsiTsp[1]) : (&PsiTsp[0])), cutv2));
336       std::vector<SpCType_shm_csr_matrix::template matrix_view<int>> V2view;
337       V2view.reserve(ndet);
338       V2view.emplace_back(csr::shm::local_balanced_partition(V2[0], TGwfn));
339 
340       if (write_hdf)
341         writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 22);
342 
343       return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
344                                               std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
345                                               std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
346     }
347   }
348   else if (addCoulomb)
349   {
350     // in multi determinant with addCoulomb, SpvnT is transposed(Spvn)
351 
352     std::vector<boost::multi::array<ComplexType, 1>> hij;
353     hij.reserve(ndet);
354     int skp = ((type == COLLINEAR) ? 1 : 0);
355     for (int n = 0, nd = 0; n < ndet; ++n, nd += (skp + 1))
356       hij.emplace_back(halfRotatedHij(type, &PsiT[nd], &PsiT[nd + skp]));
357     std::vector<SpVType_shm_csr_matrix> SpvnT;
358     using matrix_view = typename SpVType_shm_csr_matrix::template matrix_view<int>;
359     std::vector<matrix_view> SpvnTview;
360     SpvnT.emplace_back(csr::shm::transpose<SpVType_shm_csr_matrix>(Spvn));
361     SpvnTview.emplace_back(csr::shm::local_balanced_partition(SpvnT[0], TGprop));
362 
363     if (pureSD)
364     {
365       // in pureSD: V2 is ValueType
366       using sparse_ham = SparseTensor<ValueType, ValueType>;
367 
368       return HamiltonianOperations{};
369       //return HamiltonianOperations(sparse_ham(std::move(H1),std::move(hij),std::move(V2),
370       //    std::move(V2view),std::move(Spvn),std::move(Spvnview),
371       //    std::move(vn0),std::move(SpvnT),std::move(SpvnTview),E0,global_ncvecs));
372     }
373     else
374     {
375       // in this case: V2 is ComplexType since it is half rotated
376       using sparse_ham = SparseTensor<ComplexType, ValueType>;
377 
378       std::vector<SpCType_shm_csr_matrix> V2;
379       V2.reserve(ndet);
380       for (int n = 0, nd = 0; n < ndet; ++n, nd += (skp + 1))
381         V2.emplace_back(halfRotatedHijkl(type, addCoulomb, TGwfn, &PsiTsp[nd], &PsiTsp[nd + skp], cutv2));
382       std::vector<SpCType_shm_csr_matrix::template matrix_view<int>> V2view;
383       V2view.reserve(ndet);
384       for (auto& v : V2)
385         V2view.emplace_back(csr::shm::local_balanced_partition(v, TGwfn));
386 
387       if (write_hdf)
388         writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 21);
389 
390       return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
391                                               std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
392                                               std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
393     }
394   }
395   else
396   { // multideterminant for PHMSD
397 
398     assert(type == CLOSED);
399     std::vector<boost::multi::array<ComplexType, 1>> hij;
400     hij.reserve(ndet);
401     for (int n = 0, nd = 0; n < ndet; ++n, nd++)
402       hij.emplace_back(halfRotatedHij(type, &PsiT[nd], &PsiT[nd]));
403     std::vector<SpCType_shm_csr_matrix> SpvnT;
404     using matrix_view = typename SpCType_shm_csr_matrix::template matrix_view<int>;
405     std::vector<matrix_view> SpvnTview;
406     SpvnT.reserve(ndet);
407     for (int n = 0; n < ndet; ++n)
408     {
409       SpvnT.emplace_back(sparse_rotate::halfRotateCholeskyMatrixForBias<SPComplexType>(type, TGprop, &PsiTsp[n],
410                                                                                        &PsiTsp[n], Spvn, cutv2));
411       SpvnTview.emplace_back(csr::shm::local_balanced_partition(SpvnT[n], TGprop));
412     }
413 
414     if (pureSD)
415     {
416       // in pureSD: V2 is ValueType
417       using sparse_ham = SparseTensor<ValueType, ValueType>;
418 
419       return HamiltonianOperations{};
420       //return HamiltonianOperations(sparse_ham(std::move(H1),std::move(hij),std::move(V2),
421       //    std::move(V2view),std::move(Spvn),std::move(Spvnview),
422       //    std::move(vn0),std::move(SpvnT),std::move(SpvnTview),E0,global_ncvecs));
423     }
424     else
425     {
426       // in this case: V2 is ComplexType since it is half rotated
427       using sparse_ham = SparseTensor<ComplexType, ComplexType>;
428 
429       std::vector<SpCType_shm_csr_matrix> V2;
430       V2.reserve(ndet);
431       for (int n = 0, nd = 0; n < ndet; ++n, nd++)
432         V2.emplace_back(halfRotatedHijkl(type, addCoulomb, TGwfn, &PsiTsp[nd], &PsiTsp[nd], cutv2));
433       std::vector<SpCType_shm_csr_matrix::template matrix_view<int>> V2view;
434       V2view.reserve(ndet);
435       for (auto& v : V2)
436         V2view.emplace_back(csr::shm::local_balanced_partition(v, TGwfn));
437 
438       if (write_hdf)
439         writeSparseTensor(dump, type, NMO, NAEA, NAEB, TGprop, TGwfn, H1, V2, Spvn, vn0, E0, global_ncvecs, 21);
440 
441       return HamiltonianOperations(sparse_ham(TGwfn.TG_local(), type, std::move(H1), std::move(hij), std::move(V2),
442                                               std::move(V2view), std::move(Spvn), std::move(Spvnview), std::move(vn0),
443                                               std::move(SpvnT), std::move(SpvnTview), E0, global_ncvecs));
444     }
445   }
446 }
447 
448 } // namespace afqmc
449 } // namespace qmcplusplus
450