1 #ifndef AFQMC_CONFIG_H
2 #define AFQMC_CONFIG_H
3
4 #include <string>
5 #include <algorithm>
6 #include <cstdlib>
7 #include <ctype.h>
8 #include <vector>
9 #include <map>
10 #include <complex>
11 #include <tuple>
12 #include <fstream>
13 #include "Configuration.h"
14 #include "OhmmsData/AttributeSet.h"
15 #include "OhmmsData/ParameterSet.h"
16
17 #include "AFQMC/config.0.h"
18
19 #include "AFQMC/Memory/custom_pointers.hpp"
20
21 #include "AFQMC/Matrix/csr_matrix.hpp"
22 #include "AFQMC/Matrix/coo_matrix.hpp"
23
24 //#include "mpi3/shared_window.hpp"
25 #include "AFQMC/Memory/SharedMemory/shm_ptr_with_raw_ptr_dispatch.hpp"
26 #include "multi/array.hpp"
27 #include "multi/array_ref.hpp"
28 #include "multi/memory/fallback.hpp"
29
30 #include "Utilities/TimerManager.h"
31
32 namespace qmcplusplus
33 {
34 extern TimerList_t AFQMCTimers;
35 enum AFQMCTimerIDs
36 {
37 block_timer,
38 pseudo_energy_timer,
39 energy_timer,
40 vHS_timer,
41 assemble_X_timer,
42 vbias_timer,
43 G_for_vbias_timer,
44 propagate_timer,
45 back_propagate_timer,
46 E_comm_overhead_timer,
47 vHS_comm_overhead_timer,
48 popcont_timer,
49 ortho_timer,
50 setup_timer,
51 extra_timer,
52 T1_t,
53 T2_t,
54 T3_t,
55 T4_t,
56 T5_t,
57 T6_t,
58 T7_t,
59 T8_t
60 };
61 extern TimerNameList_t<AFQMCTimerIDs> AFQMCTimerNames;
62
63 namespace afqmc
64 {
65 // ultil we switch to c++17, to reduce extra lines
66 using tp_ul_ul = std::tuple<std::size_t, std::size_t>;
67
68 enum WALKER_TYPES
69 {
70 UNDEFINED_WALKER_TYPE,
71 CLOSED,
72 COLLINEAR,
73 NONCOLLINEAR
74 };
75 // when ENABLE_CUDA is not set, DEVICE and TG_LOCAL are the same
76 enum ALLOCATOR_TYPES
77 {
78 STD,
79 NODE,
80 STD_DEVICE,
81 SHARED_LOCAL_DEVICE,
82 SHARED_DEVICE
83 };
84
initWALKER_TYPES(int i)85 inline WALKER_TYPES initWALKER_TYPES(int i)
86 {
87 if (i == 0)
88 return UNDEFINED_WALKER_TYPE;
89 else if (i == 1)
90 return CLOSED;
91 else if (i == 2)
92 return COLLINEAR;
93 else if (i == 3)
94 return NONCOLLINEAR;
95 return UNDEFINED_WALKER_TYPE;
96 }
97
98 template<typename T>
99 using s1D = std::tuple<IndexType, T>;
100 template<typename T>
101 using s2D = std::tuple<IndexType, IndexType, T>;
102 template<typename T>
103 using s3D = std::tuple<IndexType, IndexType, IndexType, T>;
104 template<typename T>
105 using s4D = std::tuple<IndexType, IndexType, IndexType, IndexType, T>;
106
107 enum SpinTypes
108 {
109 Alpha,
110 Beta
111 };
112
113 // allocators
114 template<class T>
115 using shared_allocator = shm::allocator_shm_ptr_with_raw_ptr_dispatch<T>;
116 template<class T>
117 using shm_pointer = typename shared_allocator<T>::pointer;
118
119 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
120 template<class T>
121 using device_allocator = device::device_allocator<T>;
122 template<class T>
123 using device_ptr = device::device_pointer<T>;
124 template<class T>
125 using localTG_allocator = device_allocator<T>;
126 template<class T>
127 using node_allocator = device_allocator<T>;
128 template<class T, class TG>
make_localTG_allocator(TG &)129 localTG_allocator<T> make_localTG_allocator(TG&)
130 {
131 return localTG_allocator<T>{};
132 }
133 template<class T, class TG>
make_node_allocator(TG &)134 node_allocator<T> make_node_allocator(TG&)
135 {
136 return node_allocator<T>{};
137 }
138 /* Temporary fix for the conflict problem between cpu and gpu pointers. Find proper fix */
139 template<class T>
make_device_ptr(device_ptr<T> p)140 device_ptr<T> make_device_ptr(device_ptr<T> p)
141 {
142 return p;
143 }
144 template<class T>
make_device_ptr(T * p)145 device_ptr<T> make_device_ptr(T* p)
146 {
147 print_stacktrace;
148 throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");
149 }
150 //template<class T>
151 //device_ptr<T> make_device_ptr(boost::mpi3::intranode::array_ptr<T> p)
152 //{
153 // print_stacktrace;
154 // throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");
155 //}
156 template<class T>
make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)157 device_ptr<T> make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)
158 {
159 print_stacktrace;
160 throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");
161 }
162
163 using device_memory_resource = device::memory_resource;
164 using shm_memory_resource = device::memory_resource;
165 template<class T>
166 using device_constructor = device::constructor<T>;
167 template<class T>
168 using shm_constructor = device::constructor<T>;
169
170 #else
171 template<class T>
172 using device_allocator = std::allocator<T>;
173 template<class T>
174 using device_ptr = T*;
175 template<class T>
176 using localTG_allocator = shared_allocator<T>;
177 template<class T>
178 using node_allocator = shared_allocator<T>;
179 template<class T, class TG>
make_localTG_allocator(TG & t_)180 localTG_allocator<T> make_localTG_allocator(TG& t_)
181 {
182 return localTG_allocator<T>{t_.TG_local()};
183 }
184 template<class T, class TG>
make_node_allocator(TG & t_)185 node_allocator<T> make_node_allocator(TG& t_)
186 {
187 return node_allocator<T>{t_.Node()};
188 }
189 /* Temporary fix for the conflict problem between cpu and gpu pointers. Find proper fix */
190 template<class T>
make_device_ptr(T * p)191 device_ptr<T> make_device_ptr(T* p)
192 {
193 return p;
194 }
195 //template<class T>
196 //device_ptr<T> make_device_ptr(boost::mpi3::intranode::array_ptr<T> p) = delete;
197 //{ //return device_ptr<T>{to_address(p)}; }*/
198 // print_stacktrace;*/
199 // throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");*/
200 //}
201 template<class T>
make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)202 device_ptr<T> make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)
203 {
204 return device_ptr<T>{to_address(p)};
205 }
206
207 using device_memory_resource = boost::multi::memory::resource<>;
208 using shm_memory_resource = shm::memory_resource_shm_ptr_with_raw_ptr_dispatch;
209 template<class T>
210 using device_constructor = device_allocator<T>;
211 template<class T>
212 using shm_constructor = shared_allocator<T>;
213
214 #endif
215
216 template<class T>
217 using host_constructor = std::allocator<T>;
218 using host_memory_resource = boost::multi::memory::resource<>;
219
220 // new types
221 using SpCType_shm_csr_matrix =
222 ma::sparse::csr_matrix<SPComplexType, int, std::size_t, shared_allocator<SPComplexType>, ma::sparse::is_root>;
223 using SpVType_shm_csr_matrix =
224 ma::sparse::csr_matrix<SPValueType, int, std::size_t, shared_allocator<SPValueType>, ma::sparse::is_root>;
225 using CType_shm_csr_matrix =
226 ma::sparse::csr_matrix<ComplexType, int, std::size_t, shared_allocator<ComplexType>, ma::sparse::is_root>;
227 using VType_shm_csr_matrix =
228 ma::sparse::csr_matrix<ValueType, int, std::size_t, shared_allocator<ValueType>, ma::sparse::is_root>;
229
230 //#ifdef PsiT_IN_SHM
231 template<typename T>
232 using PsiT_Matrix_t = ma::sparse::csr_matrix<T, int, int, shared_allocator<T>, ma::sparse::is_root>;
233 using PsiT_Matrix = PsiT_Matrix_t<ComplexType>;
234 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
235 using devcsr_Matrix = ma::sparse::csr_matrix<ComplexType, int, int, device_allocator<ComplexType>>;
236 #else
237 using devcsr_Matrix = ma::sparse::csr_matrix<ComplexType, int, int, shared_allocator<ComplexType>, ma::sparse::is_root>;
238 #endif
239 //#else
240 // using PsiT_Matrix = ma::sparse::csr_matrix<ComplexType,int,int>;
241 // using devPsiT_Matrix = ma::sparse::csr_matrix<ComplexType,int,int>;
242 //#endif
243
244
245 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
246 using P1Type = ma::sparse::csr_matrix<ComplexType, int, int, localTG_allocator<ComplexType>>;
247 #else
248 using P1Type = ma::sparse::csr_matrix<ComplexType, int, int, localTG_allocator<ComplexType>, ma::sparse::is_root>;
249 #endif
250
251 enum HamiltonianTypes
252 {
253 Factorized,
254 THC,
255 KPTHC,
256 KPFactorized,
257 RealDenseFactorized,
258 UNKNOWN
259 };
260
261 template<std::ptrdiff_t D>
262 using iextensions = typename boost::multi::iextensions<D>;
263 //using extensions = typename boost::multi::layout_t<D>::extensions_type;
264
265 // general matrix definitions
266 template<class Alloc = std::allocator<int>>
267 using IntegerVector = boost::multi::array<int, 1, Alloc>;
268 template<class Alloc = std::allocator<ValueType>>
269 using ValueVector = boost::multi::array<ValueType, 1, Alloc>;
270 template<class Alloc = std::allocator<ComplexType>>
271 using ComplexVector = boost::multi::array<ComplexType, 1, Alloc>;
272 template<class Alloc = std::allocator<SPComplexType>>
273 using SPComplexVector = boost::multi::array<SPComplexType, 1, Alloc>;
274 template<class Ptr = ComplexType*>
275 using ComplexVector_ref = boost::multi::array_ref<ComplexType, 1, Ptr>;
276 template<class Ptr = SPComplexType*>
277 using SPComplexVector_ref = boost::multi::array_ref<SPComplexType, 1, Ptr>;
278
279 template<class Alloc = std::allocator<int>>
280 using IntegerMatrix = boost::multi::array<int, 2, Alloc>;
281 template<class Alloc = std::allocator<ValueType>>
282 using ValueMatrix = boost::multi::array<ValueType, 2, Alloc>;
283 template<class Alloc = std::allocator<ComplexType>>
284 using ComplexMatrix = boost::multi::array<ComplexType, 2, Alloc>;
285 template<class Alloc = std::allocator<SPComplexType>>
286 using SPComplexMatrix = boost::multi::array<SPComplexType, 2, Alloc>;
287 template<class Ptr = ComplexType*>
288 using ComplexMatrix_ref = boost::multi::array_ref<ComplexType, 2, Ptr>;
289 template<class Ptr = SPComplexType*>
290 using SPComplexMatrix_ref = boost::multi::array_ref<SPComplexType, 2, Ptr>;
291
292 template<class Alloc = std::allocator<ComplexType>>
293 using Complex3Tensor = boost::multi::array<ComplexType, 3, Alloc>;
294 template<class Alloc = std::allocator<SPComplexType>>
295 using SPComplex3Tensor = boost::multi::array<SPComplexType, 3, Alloc>;
296 template<class Ptr = ComplexType*>
297 using Complex3Tensor_ref = boost::multi::array_ref<ComplexType, 3, Ptr>;
298 template<class Ptr = SPComplexType*>
299 using SPComplex3Tensor_ref = boost::multi::array_ref<SPComplexType, 3, Ptr>;
300
301 template<std::ptrdiff_t D, class Alloc = std::allocator<ComplexType>>
302 using ComplexArray = boost::multi::array<ComplexType, D, Alloc>;
303 template<std::ptrdiff_t D, class Alloc = std::allocator<SPComplexType>>
304 using SPComplexArray = boost::multi::array<SPComplexType, D, Alloc>;
305 template<std::ptrdiff_t D, class Ptr = ComplexType*>
306 using ComplexArray_ref = boost::multi::array_ref<ComplexType, D, Ptr>;
307 template<std::ptrdiff_t D, class Ptr = SPComplexType*>
308 using SPComplexArray_ref = boost::multi::array_ref<SPComplexType, D, Ptr>;
309
310
311 struct AFQMCInfo
312 {
313 public:
314 // default constructor
AFQMCInfoAFQMCInfo315 AFQMCInfo()
316 : name(""),
317 NMO(-1),
318 NMO_FULL(-1),
319 NAEA(-1),
320 NAEB(-1),
321 NCA(0),
322 NCB(0),
323 NETOT(-1),
324 MS2(-99),
325 ISYM(-1),
326 spinRestricted(true)
327 {}
328
AFQMCInfoAFQMCInfo329 AFQMCInfo(std::string nm, int nmo_, int naea_, int naeb_)
330 : name(nm),
331 NMO(nmo_),
332 NMO_FULL(nmo_),
333 NAEA(naea_),
334 NAEB(naeb_),
335 NCA(0),
336 NCB(0),
337 NETOT(-1),
338 MS2(-99),
339 ISYM(-1),
340 spinRestricted(true)
341 {}
342
343 AFQMCInfo(const AFQMCInfo& other) = default;
344 AFQMCInfo& operator=(const AFQMCInfo& other) = default;
345
346 // destructor
~AFQMCInfoAFQMCInfo347 ~AFQMCInfo() {}
348
349 // identifier
350 std::string name;
351
352 // number of active orbitals
353 int NMO;
354
355 // number of orbitals
356 int NMO_FULL;
357
358 // number of active electrons alpha/beta
359 int NAEA, NAEB;
360
361 // number of core electrons alpha/beta
362 int NCA, NCB;
363
364 // total number of electrons
365 int NETOT;
366
367 // ms2
368 int MS2;
369
370 // isym
371 int ISYM;
372
373 // if true then RHF calculation, otherwise it is UHF
374 bool spinRestricted;
375
376 // copies values from object
copyInfoAFQMCInfo377 void copyInfo(const AFQMCInfo& a)
378 {
379 name = a.name;
380 NMO_FULL = a.NMO_FULL;
381 NMO = a.NMO;
382 NAEA = a.NAEA;
383 NAEB = a.NAEB;
384 NCA = a.NCA;
385 NCB = a.NCB;
386 NETOT = a.NETOT;
387 MS2 = a.MS2;
388 ISYM = a.ISYM;
389 spinRestricted = a.spinRestricted;
390 }
391
392 // no fully spin polarized yet, not sure what it will break
checkAFQMCInfoStateAFQMCInfo393 bool checkAFQMCInfoState()
394 {
395 if (NMO_FULL < 1 || NAEA < 1 || NAEB < 1 || NCA < 0 || NCB < 0) //|| NETOT!= NCA+NCB+NAEA+NAEB ) //|| MS2<0 )
396 return false;
397 return true;
398 }
399
printAFQMCInfoStateAFQMCInfo400 void printAFQMCInfoState(std::ostream& out)
401 {
402 out << "AFQMC info: \n"
403 << "name: " << name << "\n"
404 << "NMO_FULL: " << NMO_FULL << "\n"
405 << "NAEA: " << NAEA << "\n"
406 << "NAEB: " << NAEB << "\n"
407 << "NCA: " << NCA << "\n"
408 << "NCB: " << NCB << "\n"
409 << "NETOT: " << NETOT << "\n"
410 << "MS2: " << MS2 << "\n"
411 << "spinRestricted: " << spinRestricted << std::endl;
412 }
413
parseAFQMCInfo414 bool parse(xmlNodePtr cur)
415 {
416 if (cur == NULL)
417 return false;
418
419 OhmmsAttributeSet oAttrib;
420 oAttrib.add(name, "name");
421 oAttrib.put(cur);
422
423 std::string sR("yes");
424 ParameterSet m_param;
425 m_param.add(NMO_FULL, "NMO_FULL");
426 m_param.add(NMO_FULL, "NMO");
427 m_param.add(NAEA, "NAEA");
428 m_param.add(NAEB, "NAEB");
429 m_param.add(NCA, "NCA");
430 m_param.add(NCB, "NCB");
431 m_param.add(NETOT, "NETOT");
432 m_param.add(MS2, "MS2");
433 m_param.add(sR, "spinRestricted");
434 m_param.put(cur);
435
436 spinRestricted = false;
437 std::string sR0(sR);
438 std::transform(sR0.begin(), sR0.end(), sR.begin(), (int (*)(int))tolower);
439 if (sR == "yes" || sR == "true")
440 spinRestricted = true;
441
442 NMO = NMO_FULL - NCA;
443 if (NETOT == -1)
444 NETOT = NCA + NCB + NAEA + NAEB;
445
446 return true;
447 }
448 };
449
450 } // namespace afqmc
451 } // namespace qmcplusplus
452
453 #endif
454