1 #include <cstdlib>
2 #include <memory>
3 #include <algorithm>
4 #include <complex>
5 #include <iostream>
6 #include <fstream>
7 #include <map>
8 #include <utility>
9 #include <vector>
10 #include <numeric>
11 #if defined(USE_MPI)
12 #include <mpi.h>
13 #endif
14
15 #include "AFQMC/config.h"
16 #include "AFQMC/Hamiltonians/HamiltonianFactory.h"
17
18 #include "AFQMC/Hamiltonians/FactorizedSparseHamiltonian.h"
19 #include "AFQMC/Utilities/readHeader.h"
20 #include "AFQMC/Utilities/Utils.hpp"
21 #include "AFQMC/Matrix/hdf5_readers_new.hpp"
22 #include "AFQMC/Matrix/array_partition.hpp"
23 #include "AFQMC/Matrix/csr_hdf5_readers.hpp"
24 #include "AFQMC/Matrix/matrix_emplace_wrapper.hpp"
25 #include "AFQMC/Matrix/csr_matrix.hpp"
26 #include "AFQMC/Matrix/coo_matrix.hpp"
27
28 namespace qmcplusplus
29 {
30 namespace afqmc
31 {
32 // it is possible to subdivide the rows within equivalent nodes and communicate at the end
read_V2fact(hdf_archive & dump,TaskGroup_ & TG,int nread,int NMO,int nvecs,double cutoff1bar,int int_blocks)33 FactorizedSparseHamiltonian::shm_csr_matrix read_V2fact(hdf_archive& dump,
34 TaskGroup_& TG,
35 int nread,
36 int NMO,
37 int nvecs,
38 double cutoff1bar,
39 int int_blocks)
40 {
41 using counter = qmcplusplus::afqmc::sparse_matrix_element_counter;
42 using Alloc = shared_allocator<SPValueType>;
43 using ucsr_matrix =
44 ma::sparse::ucsr_matrix<SPValueType, int, std::size_t, shared_allocator<SPValueType>, ma::sparse::is_root>;
45
46 int min_i = 0;
47 int max_i = nvecs;
48
49 int nrows = NMO * NMO;
50 bool distribute_Ham = (TG.getNGroupsPerTG() < TG.getTotalNodes());
51 std::vector<IndexType> row_counts(nrows);
52
53 app_log() << " Reading sparse factorized two-electron integrals." << std::endl;
54 // calculate column range that belong to this node
55 if (distribute_Ham)
56 {
57 // count number of non-zero elements along each column (Chol Vec)
58 std::vector<IndexType> col_counts(nvecs);
59 csr_hdf5::multiple_reader_global_count(dump, counter(false, nrows, nvecs, 0, nrows, 0, nvecs, cutoff1bar),
60 col_counts, TG, nread);
61
62 std::vector<IndexType> sets(TG.getNumberOfTGs() + 1);
63 simple_matrix_partition<TaskGroup_, IndexType, RealType> split(nrows, nvecs, cutoff1bar);
64 if (TG.getCoreID() < nread)
65 split.partition_over_TGs(TG, false, col_counts, sets);
66
67 if (TG.getGlobalRank() == 0)
68 {
69 app_log() << " Partitioning of (factorized) Hamiltonian Vectors: ";
70 for (int i = 0; i <= TG.getNumberOfTGs(); i++)
71 app_log() << sets[i] << " ";
72 app_log() << std::endl;
73 app_log() << " Number of terms in each partitioning: ";
74 for (int i = 0; i < TG.getNumberOfTGs(); i++)
75 app_log() << accumulate(col_counts.begin() + sets[i], col_counts.begin() + sets[i + 1], 0) << " ";
76 app_log() << std::endl;
77 }
78
79 TG.Node().broadcast(sets.begin(), sets.end());
80 min_i = sets[TG.getTGNumber()];
81 max_i = sets[TG.getTGNumber() + 1];
82
83 csr_hdf5::multiple_reader_local_count(dump, counter(true, nrows, nvecs, 0, nrows, min_i, max_i, cutoff1bar),
84 row_counts, TG, nread);
85 }
86 else
87 {
88 // should be faster if ham is not distributed
89 csr_hdf5::multiple_reader_global_count(dump, counter(true, nrows, nvecs, 0, nrows, 0, nvecs, cutoff1bar),
90 row_counts, TG, nread);
91 }
92
93 ucsr_matrix ucsr(tp_ul_ul{nrows, max_i - min_i}, tp_ul_ul{0, min_i}, row_counts, Alloc(TG.Node()));
94 csr::matrix_emplace_wrapper<ucsr_matrix> csr_wrapper(ucsr, TG.Node());
95
96 using mat_map = qmcplusplus::afqmc::matrix_map;
97 csr_hdf5::multiple_reader_hdf5_csr<SPValueType, int>(csr_wrapper,
98 mat_map(false, true, nrows, nvecs, 0, nrows, min_i, max_i,
99 cutoff1bar),
100 dump, TG, nread);
101 csr_wrapper.push_buffer();
102 TG.node_barrier();
103
104 return FactorizedSparseHamiltonian::shm_csr_matrix(std::move(ucsr));
105 }
106
107 } // namespace afqmc
108
109 } // namespace qmcplusplus
110