1 #include <cstdlib>
2 #include <memory>
3 #include <algorithm>
4 #include <complex>
5 #include <iostream>
6 #include <fstream>
7 #include <map>
8 #include <utility>
9 #include <vector>
10 #include <numeric>
11 #if defined(USE_MPI)
12 #include <mpi.h>
13 #endif
14 
15 #include "AFQMC/config.h"
16 #include "AFQMC/Hamiltonians/HamiltonianFactory.h"
17 
18 #include "AFQMC/Hamiltonians/FactorizedSparseHamiltonian.h"
19 #include "AFQMC/Utilities/readHeader.h"
20 #include "AFQMC/Utilities/Utils.hpp"
21 #include "AFQMC/Matrix/hdf5_readers_new.hpp"
22 #include "AFQMC/Matrix/array_partition.hpp"
23 #include "AFQMC/Matrix/csr_hdf5_readers.hpp"
24 #include "AFQMC/Matrix/matrix_emplace_wrapper.hpp"
25 #include "AFQMC/Matrix/csr_matrix.hpp"
26 #include "AFQMC/Matrix/coo_matrix.hpp"
27 
28 namespace qmcplusplus
29 {
30 namespace afqmc
31 {
32 // it is possible to subdivide the rows within equivalent nodes and communicate at the end
read_V2fact(hdf_archive & dump,TaskGroup_ & TG,int nread,int NMO,int nvecs,double cutoff1bar,int int_blocks)33 FactorizedSparseHamiltonian::shm_csr_matrix read_V2fact(hdf_archive& dump,
34                                                         TaskGroup_& TG,
35                                                         int nread,
36                                                         int NMO,
37                                                         int nvecs,
38                                                         double cutoff1bar,
39                                                         int int_blocks)
40 {
41   using counter = qmcplusplus::afqmc::sparse_matrix_element_counter;
42   using Alloc   = shared_allocator<SPValueType>;
43   using ucsr_matrix =
44       ma::sparse::ucsr_matrix<SPValueType, int, std::size_t, shared_allocator<SPValueType>, ma::sparse::is_root>;
45 
46   int min_i = 0;
47   int max_i = nvecs;
48 
49   int nrows           = NMO * NMO;
50   bool distribute_Ham = (TG.getNGroupsPerTG() < TG.getTotalNodes());
51   std::vector<IndexType> row_counts(nrows);
52 
53   app_log() << " Reading sparse factorized two-electron integrals." << std::endl;
54   // calculate column range that belong to this node
55   if (distribute_Ham)
56   {
57     // count number of non-zero elements along each column (Chol Vec)
58     std::vector<IndexType> col_counts(nvecs);
59     csr_hdf5::multiple_reader_global_count(dump, counter(false, nrows, nvecs, 0, nrows, 0, nvecs, cutoff1bar),
60                                            col_counts, TG, nread);
61 
62     std::vector<IndexType> sets(TG.getNumberOfTGs() + 1);
63     simple_matrix_partition<TaskGroup_, IndexType, RealType> split(nrows, nvecs, cutoff1bar);
64     if (TG.getCoreID() < nread)
65       split.partition_over_TGs(TG, false, col_counts, sets);
66 
67     if (TG.getGlobalRank() == 0)
68     {
69       app_log() << " Partitioning of (factorized) Hamiltonian Vectors: ";
70       for (int i = 0; i <= TG.getNumberOfTGs(); i++)
71         app_log() << sets[i] << " ";
72       app_log() << std::endl;
73       app_log() << " Number of terms in each partitioning: ";
74       for (int i = 0; i < TG.getNumberOfTGs(); i++)
75         app_log() << accumulate(col_counts.begin() + sets[i], col_counts.begin() + sets[i + 1], 0) << " ";
76       app_log() << std::endl;
77     }
78 
79     TG.Node().broadcast(sets.begin(), sets.end());
80     min_i = sets[TG.getTGNumber()];
81     max_i = sets[TG.getTGNumber() + 1];
82 
83     csr_hdf5::multiple_reader_local_count(dump, counter(true, nrows, nvecs, 0, nrows, min_i, max_i, cutoff1bar),
84                                           row_counts, TG, nread);
85   }
86   else
87   {
88     // should be faster if ham is not distributed
89     csr_hdf5::multiple_reader_global_count(dump, counter(true, nrows, nvecs, 0, nrows, 0, nvecs, cutoff1bar),
90                                            row_counts, TG, nread);
91   }
92 
93   ucsr_matrix ucsr(tp_ul_ul{nrows, max_i - min_i}, tp_ul_ul{0, min_i}, row_counts, Alloc(TG.Node()));
94   csr::matrix_emplace_wrapper<ucsr_matrix> csr_wrapper(ucsr, TG.Node());
95 
96   using mat_map = qmcplusplus::afqmc::matrix_map;
97   csr_hdf5::multiple_reader_hdf5_csr<SPValueType, int>(csr_wrapper,
98                                                        mat_map(false, true, nrows, nvecs, 0, nrows, min_i, max_i,
99                                                                cutoff1bar),
100                                                        dump, TG, nread);
101   csr_wrapper.push_buffer();
102   TG.node_barrier();
103 
104   return FactorizedSparseHamiltonian::shm_csr_matrix(std::move(ucsr));
105 }
106 
107 } // namespace afqmc
108 
109 } // namespace qmcplusplus
110