1 #ifndef AFQMC_CONFIG_H
2 #define AFQMC_CONFIG_H
3 
4 #include <string>
5 #include <algorithm>
6 #include <cstdlib>
7 #include <ctype.h>
8 #include <vector>
9 #include <map>
10 #include <complex>
11 #include <tuple>
12 #include <fstream>
13 #include "Configuration.h"
14 #include "OhmmsData/AttributeSet.h"
15 #include "OhmmsData/ParameterSet.h"
16 
17 #include "AFQMC/config.0.h"
18 
19 #include "AFQMC/Memory/custom_pointers.hpp"
20 
21 #include "AFQMC/Matrix/csr_matrix.hpp"
22 #include "AFQMC/Matrix/coo_matrix.hpp"
23 
24 //#include "mpi3/shared_window.hpp"
25 #include "AFQMC/Memory/SharedMemory/shm_ptr_with_raw_ptr_dispatch.hpp"
26 #include "multi/array.hpp"
27 #include "multi/array_ref.hpp"
28 #include "multi/memory/fallback.hpp"
29 
30 #include "Utilities/TimerManager.h"
31 
32 namespace qmcplusplus
33 {
34 extern TimerList_t AFQMCTimers;
35 enum AFQMCTimerIDs
36 {
37   block_timer,
38   pseudo_energy_timer,
39   energy_timer,
40   vHS_timer,
41   assemble_X_timer,
42   vbias_timer,
43   G_for_vbias_timer,
44   propagate_timer,
45   back_propagate_timer,
46   E_comm_overhead_timer,
47   vHS_comm_overhead_timer,
48   popcont_timer,
49   ortho_timer,
50   setup_timer,
51   extra_timer,
52   T1_t,
53   T2_t,
54   T3_t,
55   T4_t,
56   T5_t,
57   T6_t,
58   T7_t,
59   T8_t
60 };
61 extern TimerNameList_t<AFQMCTimerIDs> AFQMCTimerNames;
62 
63 namespace afqmc
64 {
65 // ultil we switch to c++17, to reduce extra lines
66 using tp_ul_ul = std::tuple<std::size_t, std::size_t>;
67 
68 enum WALKER_TYPES
69 {
70   UNDEFINED_WALKER_TYPE,
71   CLOSED,
72   COLLINEAR,
73   NONCOLLINEAR
74 };
75 // when ENABLE_CUDA is not set, DEVICE and TG_LOCAL are the same
76 enum ALLOCATOR_TYPES
77 {
78   STD,
79   NODE,
80   STD_DEVICE,
81   SHARED_LOCAL_DEVICE,
82   SHARED_DEVICE
83 };
84 
initWALKER_TYPES(int i)85 inline WALKER_TYPES initWALKER_TYPES(int i)
86 {
87   if (i == 0)
88     return UNDEFINED_WALKER_TYPE;
89   else if (i == 1)
90     return CLOSED;
91   else if (i == 2)
92     return COLLINEAR;
93   else if (i == 3)
94     return NONCOLLINEAR;
95   return UNDEFINED_WALKER_TYPE;
96 }
97 
98 template<typename T>
99 using s1D = std::tuple<IndexType, T>;
100 template<typename T>
101 using s2D = std::tuple<IndexType, IndexType, T>;
102 template<typename T>
103 using s3D = std::tuple<IndexType, IndexType, IndexType, T>;
104 template<typename T>
105 using s4D = std::tuple<IndexType, IndexType, IndexType, IndexType, T>;
106 
107 enum SpinTypes
108 {
109   Alpha,
110   Beta
111 };
112 
113 // allocators
114 template<class T>
115 using shared_allocator = shm::allocator_shm_ptr_with_raw_ptr_dispatch<T>;
116 template<class T>
117 using shm_pointer = typename shared_allocator<T>::pointer;
118 
119 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
120 template<class T>
121 using device_allocator = device::device_allocator<T>;
122 template<class T>
123 using device_ptr = device::device_pointer<T>;
124 template<class T>
125 using localTG_allocator = device_allocator<T>;
126 template<class T>
127 using node_allocator = device_allocator<T>;
128 template<class T, class TG>
make_localTG_allocator(TG &)129 localTG_allocator<T> make_localTG_allocator(TG&)
130 {
131   return localTG_allocator<T>{};
132 }
133 template<class T, class TG>
make_node_allocator(TG &)134 node_allocator<T> make_node_allocator(TG&)
135 {
136   return node_allocator<T>{};
137 }
138 /*   Temporary fix for the conflict problem between cpu and gpu pointers. Find proper fix */
139 template<class T>
make_device_ptr(device_ptr<T> p)140 device_ptr<T> make_device_ptr(device_ptr<T> p)
141 {
142   return p;
143 }
144 template<class T>
make_device_ptr(T * p)145 device_ptr<T> make_device_ptr(T* p)
146 {
147   print_stacktrace;
148   throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");
149 }
150 //template<class T>
151 //device_ptr<T> make_device_ptr(boost::mpi3::intranode::array_ptr<T> p)
152 //{
153 //  print_stacktrace;
154 //  throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");
155 //}
156 template<class T>
make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)157 device_ptr<T> make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)
158 {
159   print_stacktrace;
160   throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");
161 }
162 
163 using device_memory_resource = device::memory_resource;
164 using shm_memory_resource    = device::memory_resource;
165 template<class T>
166 using device_constructor = device::constructor<T>;
167 template<class T>
168 using shm_constructor = device::constructor<T>;
169 
170 #else
171 template<class T>
172 using device_allocator = std::allocator<T>;
173 template<class T>
174 using device_ptr = T*;
175 template<class T>
176 using localTG_allocator = shared_allocator<T>;
177 template<class T>
178 using node_allocator = shared_allocator<T>;
179 template<class T, class TG>
make_localTG_allocator(TG & t_)180 localTG_allocator<T> make_localTG_allocator(TG& t_)
181 {
182   return localTG_allocator<T>{t_.TG_local()};
183 }
184 template<class T, class TG>
make_node_allocator(TG & t_)185 node_allocator<T> make_node_allocator(TG& t_)
186 {
187   return node_allocator<T>{t_.Node()};
188 }
189 /*   Temporary fix for the conflict problem between cpu and gpu pointers. Find proper fix */
190 template<class T>
make_device_ptr(T * p)191 device_ptr<T> make_device_ptr(T* p)
192 {
193   return p;
194 }
195 //template<class T>
196 //device_ptr<T> make_device_ptr(boost::mpi3::intranode::array_ptr<T> p) = delete;
197 //{ //return device_ptr<T>{to_address(p)}; }*/
198 //  print_stacktrace;*/
199 //  throw std::runtime_error(" Invalid pointer conversion: device_pointer<T> to T*.");*/
200 //}
201 template<class T>
make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)202 device_ptr<T> make_device_ptr(shm::shm_ptr_with_raw_ptr_dispatch<T> p)
203 {
204   return device_ptr<T>{to_address(p)};
205 }
206 
207 using device_memory_resource = boost::multi::memory::resource<>;
208 using shm_memory_resource    = shm::memory_resource_shm_ptr_with_raw_ptr_dispatch;
209 template<class T>
210 using device_constructor = device_allocator<T>;
211 template<class T>
212 using shm_constructor = shared_allocator<T>;
213 
214 #endif
215 
216 template<class T>
217 using host_constructor     = std::allocator<T>;
218 using host_memory_resource = boost::multi::memory::resource<>;
219 
220 // new types
221 using SpCType_shm_csr_matrix =
222     ma::sparse::csr_matrix<SPComplexType, int, std::size_t, shared_allocator<SPComplexType>, ma::sparse::is_root>;
223 using SpVType_shm_csr_matrix =
224     ma::sparse::csr_matrix<SPValueType, int, std::size_t, shared_allocator<SPValueType>, ma::sparse::is_root>;
225 using CType_shm_csr_matrix =
226     ma::sparse::csr_matrix<ComplexType, int, std::size_t, shared_allocator<ComplexType>, ma::sparse::is_root>;
227 using VType_shm_csr_matrix =
228     ma::sparse::csr_matrix<ValueType, int, std::size_t, shared_allocator<ValueType>, ma::sparse::is_root>;
229 
230 //#ifdef PsiT_IN_SHM
231 template<typename T>
232 using PsiT_Matrix_t = ma::sparse::csr_matrix<T, int, int, shared_allocator<T>, ma::sparse::is_root>;
233 using PsiT_Matrix   = PsiT_Matrix_t<ComplexType>;
234 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
235 using devcsr_Matrix = ma::sparse::csr_matrix<ComplexType, int, int, device_allocator<ComplexType>>;
236 #else
237 using devcsr_Matrix = ma::sparse::csr_matrix<ComplexType, int, int, shared_allocator<ComplexType>, ma::sparse::is_root>;
238 #endif
239 //#else
240 //  using PsiT_Matrix = ma::sparse::csr_matrix<ComplexType,int,int>;
241 //  using devPsiT_Matrix = ma::sparse::csr_matrix<ComplexType,int,int>;
242 //#endif
243 
244 
245 #if defined(ENABLE_CUDA) || defined(ENABLE_HIP)
246 using P1Type = ma::sparse::csr_matrix<ComplexType, int, int, localTG_allocator<ComplexType>>;
247 #else
248 using P1Type        = ma::sparse::csr_matrix<ComplexType, int, int, localTG_allocator<ComplexType>, ma::sparse::is_root>;
249 #endif
250 
251 enum HamiltonianTypes
252 {
253   Factorized,
254   THC,
255   KPTHC,
256   KPFactorized,
257   RealDenseFactorized,
258   UNKNOWN
259 };
260 
261 template<std::ptrdiff_t D>
262 using iextensions = typename boost::multi::iextensions<D>;
263 //using extensions = typename boost::multi::layout_t<D>::extensions_type;
264 
265 // general matrix definitions
266 template<class Alloc = std::allocator<int>>
267 using IntegerVector = boost::multi::array<int, 1, Alloc>;
268 template<class Alloc = std::allocator<ValueType>>
269 using ValueVector = boost::multi::array<ValueType, 1, Alloc>;
270 template<class Alloc = std::allocator<ComplexType>>
271 using ComplexVector = boost::multi::array<ComplexType, 1, Alloc>;
272 template<class Alloc = std::allocator<SPComplexType>>
273 using SPComplexVector = boost::multi::array<SPComplexType, 1, Alloc>;
274 template<class Ptr = ComplexType*>
275 using ComplexVector_ref = boost::multi::array_ref<ComplexType, 1, Ptr>;
276 template<class Ptr = SPComplexType*>
277 using SPComplexVector_ref = boost::multi::array_ref<SPComplexType, 1, Ptr>;
278 
279 template<class Alloc = std::allocator<int>>
280 using IntegerMatrix = boost::multi::array<int, 2, Alloc>;
281 template<class Alloc = std::allocator<ValueType>>
282 using ValueMatrix = boost::multi::array<ValueType, 2, Alloc>;
283 template<class Alloc = std::allocator<ComplexType>>
284 using ComplexMatrix = boost::multi::array<ComplexType, 2, Alloc>;
285 template<class Alloc = std::allocator<SPComplexType>>
286 using SPComplexMatrix = boost::multi::array<SPComplexType, 2, Alloc>;
287 template<class Ptr = ComplexType*>
288 using ComplexMatrix_ref = boost::multi::array_ref<ComplexType, 2, Ptr>;
289 template<class Ptr = SPComplexType*>
290 using SPComplexMatrix_ref = boost::multi::array_ref<SPComplexType, 2, Ptr>;
291 
292 template<class Alloc = std::allocator<ComplexType>>
293 using Complex3Tensor = boost::multi::array<ComplexType, 3, Alloc>;
294 template<class Alloc = std::allocator<SPComplexType>>
295 using SPComplex3Tensor = boost::multi::array<SPComplexType, 3, Alloc>;
296 template<class Ptr = ComplexType*>
297 using Complex3Tensor_ref = boost::multi::array_ref<ComplexType, 3, Ptr>;
298 template<class Ptr = SPComplexType*>
299 using SPComplex3Tensor_ref = boost::multi::array_ref<SPComplexType, 3, Ptr>;
300 
301 template<std::ptrdiff_t D, class Alloc = std::allocator<ComplexType>>
302 using ComplexArray = boost::multi::array<ComplexType, D, Alloc>;
303 template<std::ptrdiff_t D, class Alloc = std::allocator<SPComplexType>>
304 using SPComplexArray = boost::multi::array<SPComplexType, D, Alloc>;
305 template<std::ptrdiff_t D, class Ptr = ComplexType*>
306 using ComplexArray_ref = boost::multi::array_ref<ComplexType, D, Ptr>;
307 template<std::ptrdiff_t D, class Ptr = SPComplexType*>
308 using SPComplexArray_ref = boost::multi::array_ref<SPComplexType, D, Ptr>;
309 
310 
311 struct AFQMCInfo
312 {
313 public:
314   // default constructor
AFQMCInfoAFQMCInfo315   AFQMCInfo()
316       : name(""),
317         NMO(-1),
318         NMO_FULL(-1),
319         NAEA(-1),
320         NAEB(-1),
321         NCA(0),
322         NCB(0),
323         NETOT(-1),
324         MS2(-99),
325         ISYM(-1),
326         spinRestricted(true)
327   {}
328 
AFQMCInfoAFQMCInfo329   AFQMCInfo(std::string nm, int nmo_, int naea_, int naeb_)
330       : name(nm),
331         NMO(nmo_),
332         NMO_FULL(nmo_),
333         NAEA(naea_),
334         NAEB(naeb_),
335         NCA(0),
336         NCB(0),
337         NETOT(-1),
338         MS2(-99),
339         ISYM(-1),
340         spinRestricted(true)
341   {}
342 
343   AFQMCInfo(const AFQMCInfo& other) = default;
344   AFQMCInfo& operator=(const AFQMCInfo& other) = default;
345 
346   // destructor
~AFQMCInfoAFQMCInfo347   ~AFQMCInfo() {}
348 
349   // identifier
350   std::string name;
351 
352   // number of active orbitals
353   int NMO;
354 
355   // number of orbitals
356   int NMO_FULL;
357 
358   // number of active electrons alpha/beta
359   int NAEA, NAEB;
360 
361   // number of core electrons alpha/beta
362   int NCA, NCB;
363 
364   // total number of electrons
365   int NETOT;
366 
367   // ms2
368   int MS2;
369 
370   // isym
371   int ISYM;
372 
373   // if true then RHF calculation, otherwise it is UHF
374   bool spinRestricted;
375 
376   // copies values from object
copyInfoAFQMCInfo377   void copyInfo(const AFQMCInfo& a)
378   {
379     name           = a.name;
380     NMO_FULL       = a.NMO_FULL;
381     NMO            = a.NMO;
382     NAEA           = a.NAEA;
383     NAEB           = a.NAEB;
384     NCA            = a.NCA;
385     NCB            = a.NCB;
386     NETOT          = a.NETOT;
387     MS2            = a.MS2;
388     ISYM           = a.ISYM;
389     spinRestricted = a.spinRestricted;
390   }
391 
392   // no fully spin polarized yet, not sure what it will break
checkAFQMCInfoStateAFQMCInfo393   bool checkAFQMCInfoState()
394   {
395     if (NMO_FULL < 1 || NAEA < 1 || NAEB < 1 || NCA < 0 || NCB < 0) //|| NETOT!= NCA+NCB+NAEA+NAEB ) //|| MS2<0 )
396       return false;
397     return true;
398   }
399 
printAFQMCInfoStateAFQMCInfo400   void printAFQMCInfoState(std::ostream& out)
401   {
402     out << "AFQMC info: \n"
403         << "name: " << name << "\n"
404         << "NMO_FULL: " << NMO_FULL << "\n"
405         << "NAEA: " << NAEA << "\n"
406         << "NAEB: " << NAEB << "\n"
407         << "NCA: " << NCA << "\n"
408         << "NCB: " << NCB << "\n"
409         << "NETOT: " << NETOT << "\n"
410         << "MS2: " << MS2 << "\n"
411         << "spinRestricted: " << spinRestricted << std::endl;
412   }
413 
parseAFQMCInfo414   bool parse(xmlNodePtr cur)
415   {
416     if (cur == NULL)
417       return false;
418 
419     OhmmsAttributeSet oAttrib;
420     oAttrib.add(name, "name");
421     oAttrib.put(cur);
422 
423     std::string sR("yes");
424     ParameterSet m_param;
425     m_param.add(NMO_FULL, "NMO_FULL");
426     m_param.add(NMO_FULL, "NMO");
427     m_param.add(NAEA, "NAEA");
428     m_param.add(NAEB, "NAEB");
429     m_param.add(NCA, "NCA");
430     m_param.add(NCB, "NCB");
431     m_param.add(NETOT, "NETOT");
432     m_param.add(MS2, "MS2");
433     m_param.add(sR, "spinRestricted");
434     m_param.put(cur);
435 
436     spinRestricted = false;
437     std::string sR0(sR);
438     std::transform(sR0.begin(), sR0.end(), sR.begin(), (int (*)(int))tolower);
439     if (sR == "yes" || sR == "true")
440       spinRestricted = true;
441 
442     NMO = NMO_FULL - NCA;
443     if (NETOT == -1)
444       NETOT = NCA + NCB + NAEA + NAEB;
445 
446     return true;
447   }
448 };
449 
450 } // namespace afqmc
451 } // namespace qmcplusplus
452 
453 #endif
454