1 #include <cstdlib>
2 #include <algorithm>
3 #include <complex>
4 #include <iostream>
5 #include <vector>
6 #include <numeric>
7 #if defined(USE_MPI)
8 #include <mpi.h>
9 #endif
10 
11 #include "Configuration.h"
12 
13 #include "AFQMC/config.h"
14 #include "HSPotential_Helpers.h"
15 #include "Utilities/FairDivide.h"
16 #include "AFQMC/Utilities/taskgroup.h"
17 #include "AFQMC/Matrix/csr_matrix.hpp"
18 #include "AFQMC/Numerics/ma_operations.hpp"
19 #include "AFQMC/Numerics/csr_blas.hpp"
20 #include "AFQMC/Numerics/detail/utilities.hpp"
21 
22 /********************************************************************
23 *  You get 2 potentials per Cholesky std::vector
24 *
25 *    vn(+-)_{i,k} = 0.5*( L^n_{i,k} +- ma::conj(L^n_{k,i}) )
26 ********************************************************************/
27 
28 namespace qmcplusplus
29 {
30 namespace afqmc
31 {
32 namespace HamHelper
33 {
34 namespace
35 {
36 //inline double const& ma::conj(double const& d){return d;}
37 //inline float const& ma::conj(float const& f){return f;}
38 
count_over_cholvec(double cut,std::vector<std::size_t> & count,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lik,SpVType_shm_csr_matrix::reference const & Lki)39 void count_over_cholvec(double cut,
40                         std::vector<std::size_t>& count,
41                         int c0,
42                         int c1,
43                         SpVType_shm_csr_matrix::reference const& Lik,
44                         SpVType_shm_csr_matrix::reference const& Lki)
45 {
46   assert(c1 >= c0);
47   if (c0 == c1)
48     return;
49   auto cik     = std::lower_bound(to_address(Lik.non_zero_indices2_data()),
50                               to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c0);
51   auto cik_end = std::lower_bound(cik, to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c1);
52   auto vik     = to_address(Lik.non_zero_values_data()) + std::distance(to_address(Lik.non_zero_indices2_data()), cik);
53 
54   auto cki     = std::lower_bound(to_address(Lki.non_zero_indices2_data()),
55                               to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c0);
56   auto cki_end = std::lower_bound(cki, to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c1);
57   auto vki     = to_address(Lki.non_zero_values_data()) + std::distance(to_address(Lki.non_zero_indices2_data()), cki);
58 
59   // ignoring factor of 0.5 to keep it consistent with the old code for now
60   using ma::conj;
61   using std::abs;
62   using std::size_t;
63   while (cik != cik_end && cki != cki_end)
64   {
65     if (*cik == *cki)
66     { // both Lik and Lki have components on Chol Vec *cik==*cki
67       if (abs(*vik + ma::conj(*vki)) > cut)
68         count[2 * (*cik)] += size_t(2); // Lik + Lki* and Lki + Lik*
69       if (abs(*vik - ma::conj(*vki)) > cut)
70         count[2 * (*cik) + 1] += size_t(2); // Lik - Lki* and Lki - Lik*
71       ++cik;
72       ++vik;
73       ++cki;
74       ++vki;
75     }
76     else if (*cik < *cki)
77     { // not on the same chol vector, only operate on the smallest
78       if (abs(*vik) > cut)
79       {
80         count[2 * (*cik)] += size_t(2); // Lik + 0
81 #if defined(QMC_COMPLEX)
82         count[2 * (*cik) + 1] += size_t(2); // Lik - 0
83 #endif
84       }
85       ++cik;
86       ++vik;
87     }
88     else
89     {
90       if (abs(*vki) > cut)
91       {
92         count[2 * (*cki)] += size_t(2); // Lki + 0
93 #if defined(QMC_COMPLEX)
94         count[2 * (*cki) + 1] += size_t(2); // Lki - 0
95 #endif
96       }
97       ++cki;
98       ++vki;
99     }
100   }
101   // either cik or cki are at end, check if any terms are missing
102   while (cik != cik_end)
103   {
104     if (abs(*vik) > cut)
105     {
106       count[2 * (*cik)] += size_t(2); // Lik + 0
107 #if defined(QMC_COMPLEX)
108       count[2 * (*cik) + 1] += size_t(2); // Lik - 0
109 #endif
110     }
111     ++cik;
112     ++vik;
113   }
114   while (cki != cki_end)
115   {
116     if (abs(*vki) > cut)
117     {
118       count[2 * (*cki)] += size_t(2); // Lki + 0
119 #if defined(QMC_COMPLEX)
120       count[2 * (*cki) + 1] += size_t(2); // Lki - 0
121 #endif
122     }
123     ++cki;
124     ++vki;
125   }
126 }
127 
count_over_cholvec(double cut,std::vector<std::size_t> & count,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lii)128 void count_over_cholvec(double cut,
129                         std::vector<std::size_t>& count,
130                         int c0,
131                         int c1,
132                         SpVType_shm_csr_matrix::reference const& Lii)
133 {
134   assert(c1 >= c0);
135   if (c0 == c1)
136     return;
137   auto ci     = std::lower_bound(to_address(Lii.non_zero_indices2_data()),
138                              to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c0);
139   auto ci_end = std::lower_bound(ci, to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c1);
140   auto vi     = to_address(Lii.non_zero_values_data()) + std::distance(to_address(Lii.non_zero_indices2_data()), ci);
141 
142   // ignoring factor of 0.5 to keep it consistent with the old code for now
143   using ma::conj;
144   using std::abs;
145   using std::size_t;
146   while (ci != ci_end)
147   {
148     if (abs(*vi + ma::conj(*vi)) > cut)
149       ++count[2 * (*ci)]; // Lii + Lii*
150 #if defined(QMC_COMPLEX)
151     if (abs(*vi - ma::conj(*vi)) > cut)
152       ++count[2 * (*ci) + 1]; // Lii - Lii*
153 #endif
154     ++ci;
155     ++vi;
156   }
157 }
158 
159 // In this case, c0/c1 refer to the ranges in the expanded list of CVs, e.g. 2*n/2*n+1
count_nnz(double cut,std::size_t & nik,std::size_t & nki,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lik,SpVType_shm_csr_matrix::reference const & Lki)160 void count_nnz(double cut,
161                std::size_t& nik,
162                std::size_t& nki,
163                int c0,
164                int c1,
165                SpVType_shm_csr_matrix::reference const& Lik,
166                SpVType_shm_csr_matrix::reference const& Lki)
167 {
168   using ma::conj;
169   using std::abs;
170   using std::size_t;
171   assert(c1 >= c0);
172   nik = nki = size_t(0);
173   if (c0 == c1)
174     return;
175   auto cik = std::lower_bound(to_address(Lik.non_zero_indices2_data()),
176                               to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c0 / 2);
177   auto cik_end =
178       std::lower_bound(cik, to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), (c1 + 1) / 2);
179   auto vik = to_address(Lik.non_zero_values_data()) + std::distance(to_address(Lik.non_zero_indices2_data()), cik);
180 
181   auto cki = std::lower_bound(to_address(Lki.non_zero_indices2_data()),
182                               to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c0 / 2);
183   auto cki_end =
184       std::lower_bound(cki, to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), (c1 + 1) / 2);
185   auto vki = to_address(Lki.non_zero_values_data()) + std::distance(to_address(Lki.non_zero_indices2_data()), cki);
186 
187   // ignoring factor of 0.5 to keep it consistent with the old code for now
188   while (cik != cik_end && cki != cki_end)
189   {
190     if (*cik == *cki)
191     {                                         // both Lik and Lki have components on Chol Vec *cik==*cki
192       if (abs(*vik + ma::conj(*vki)) > cut && // Lik + Lki* and Lki + Lik*
193           2 * (*cik) >= c0 && 2 * (*cik) < c1)
194       {
195         nik++;
196         nki++;
197       }
198 #if defined(QMC_COMPLEX)
199       if (abs(*vik - ma::conj(*vki)) > cut && // Lik - Lki* and Lki - Lik*
200           2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
201       {
202         nik++;
203         nki++;
204       }
205 #endif
206       ++cik;
207       ++vik;
208       ++cki;
209       ++vki;
210     }
211     else if (*cik < *cki)
212     { // not on the same chol vector, only operate on the smallest
213 #if defined(QMC_COMPLEX)
214       if (abs(*vik) > cut)
215       {
216         if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
217         {
218           ++nik;
219           ++nki;
220         }
221         if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
222         {
223           ++nik;
224           ++nki;
225         }
226       }
227 #else
228       if (abs(*vik) > cut && 2 * (*cik) >= c0 && 2 * (*cik) < c1)
229       {
230         ++nik;
231         ++nki;
232       }
233 #endif
234       ++cik;
235       ++vik;
236     }
237     else
238     {
239 #if defined(QMC_COMPLEX)
240       if (abs(*vki) > cut)
241       {
242         if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
243         {
244           ++nik;
245           ++nki;
246         }
247         if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
248         {
249           ++nik;
250           ++nki;
251         }
252       }
253 #else
254       if (abs(*vki) > cut && 2 * (*cki) >= c0 && 2 * (*cki) < c1)
255       {
256         ++nik;
257         ++nki;
258       }
259 #endif
260       ++cki;
261       ++vki;
262     }
263   }
264   while (cik != cik_end)
265   {
266 #if defined(QMC_COMPLEX)
267     if (abs(*vik) > cut)
268     {
269       if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
270       {
271         ++nik;
272         ++nki;
273       }
274       if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
275       {
276         ++nik;
277         ++nki;
278       }
279     }
280 #else
281     if (abs(*vik) > cut && 2 * (*cik) >= c0 && 2 * (*cik) < c1)
282     {
283       ++nik;
284       ++nki;
285     }
286 #endif
287     ++cik;
288     ++vik;
289   }
290   while (cki != cki_end)
291   {
292 #if defined(QMC_COMPLEX)
293     if (abs(*vki) > cut)
294     {
295       if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
296       {
297         ++nik;
298         ++nki;
299       }
300       if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
301       {
302         ++nik;
303         ++nki;
304       }
305     }
306 #else
307     if (abs(*vki) > cut && 2 * (*cki) >= c0 && 2 * (*cki) < c1)
308     {
309       ++nik;
310       ++nki;
311     }
312 #endif
313     ++cki;
314     ++vki;
315   }
316 }
317 
count_nnz(double cut,size_t & ni,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lii)318 void count_nnz(double cut, size_t& ni, int c0, int c1, SpVType_shm_csr_matrix::reference const& Lii)
319 {
320   using ma::conj;
321   using std::abs;
322   using std::size_t;
323   assert(c1 >= c0);
324   ni = size_t(0);
325   if (c0 == c1)
326     return;
327   auto ci = std::lower_bound(to_address(Lii.non_zero_indices2_data()),
328                              to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c0 / 2);
329   auto ci_end =
330       std::lower_bound(ci, to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), (c1 + 1) / 2);
331   auto vi = to_address(Lii.non_zero_values_data()) + std::distance(to_address(Lii.non_zero_indices2_data()), ci);
332 
333   // ignoring factor of 0.5 to keep it consistent with the old code for now
334   while (ci != ci_end)
335   {
336     if (abs(*vi + ma::conj(*vi)) > cut && 2 * (*ci) >= c0 && 2 * (*ci) < c1)
337       ++ni; // Lii + Lii*
338 #if defined(QMC_COMPLEX)
339     if (abs(*vi - ma::conj(*vi)) > cut && 2 * (*ci) + 1 >= c0 && 2 * (*ci) + 1 < c1)
340       ++ni; // Lii - Lii*
341 #endif
342     ++ci;
343     ++vi;
344   }
345 }
346 
add_to_vn(SpVType_shm_csr_matrix & vn,std::vector<int> const & map_,double cut,int ik,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lii)347 void add_to_vn(SpVType_shm_csr_matrix& vn,
348                std::vector<int> const& map_,
349                double cut,
350                int ik,
351                int c0,
352                int c1,
353                SpVType_shm_csr_matrix::reference const& Lii)
354 {
355   using ma::conj;
356   using std::abs;
357   using std::size_t;
358   assert(c1 >= c0);
359   if (c0 == c1)
360     return;
361   auto ci = std::lower_bound(to_address(Lii.non_zero_indices2_data()),
362                              to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c0 / 2);
363   auto ci_end =
364       std::lower_bound(ci, to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), (c1 + 1) / 2);
365   auto vi = to_address(Lii.non_zero_values_data()) + std::distance(to_address(Lii.non_zero_indices2_data()), ci);
366 
367   int c_origin = vn.global_origin()[1];
368   SPValueType half(0.5);
369   SPComplexType im(0.0, 1.0);
370   while (ci != ci_end)
371   {
372     if (abs(*vi + ma::conj(*vi)) > cut && 2 * (*ci) >= c0 && 2 * (*ci) < c1)
373     {
374       assert(map_[2 * (*ci)] >= 0);
375       assert(map_[2 * (*ci)] - c_origin < vn.size(1));
376       vn.emplace_back({ik, (map_[2 * (*ci)] - c_origin)},
377                       static_cast<SPValueType>(half * (*vi + ma::conj(*vi)))); // Lii + Lii*
378     }
379 #if defined(QMC_COMPLEX)
380     if (abs(*vi - ma::conj(*vi)) > cut && 2 * (*ci) + 1 >= c0 && 2 * (*ci) + 1 < c1)
381     {
382       assert(map_[2 * (*ci) + 1] >= 0);
383       assert(map_[2 * (*ci) + 1] - c_origin < vn.size(1));
384       vn.emplace_back({ik, (map_[2 * (*ci) + 1] - c_origin)},
385                       static_cast<SPValueType>(half * im * (*vi - ma::conj(*vi)))); // Lii - Lii*
386     }
387 #endif
388     ++ci;
389     ++vi;
390   }
391 }
392 
add_to_vn(SpVType_shm_csr_matrix & vn,std::vector<int> const & map_,double cut,int ik,int ki,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lik,SpVType_shm_csr_matrix::reference const & Lki)393 void add_to_vn(SpVType_shm_csr_matrix& vn,
394                std::vector<int> const& map_,
395                double cut,
396                int ik,
397                int ki,
398                int c0,
399                int c1,
400                SpVType_shm_csr_matrix::reference const& Lik,
401                SpVType_shm_csr_matrix::reference const& Lki)
402 {
403   using ma::conj;
404   using std::abs;
405   using std::size_t;
406   assert(c1 >= c0);
407   if (c0 == c1)
408     return;
409   auto cik = std::lower_bound(to_address(Lik.non_zero_indices2_data()),
410                               to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c0 / 2);
411   auto cik_end =
412       std::lower_bound(cik, to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), (c1 + 1) / 2);
413   auto vik = to_address(Lik.non_zero_values_data()) + std::distance(to_address(Lik.non_zero_indices2_data()), cik);
414 
415   auto cki = std::lower_bound(to_address(Lki.non_zero_indices2_data()),
416                               to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c0 / 2);
417   auto cki_end =
418       std::lower_bound(cki, to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), (c1 + 1) / 2);
419   auto vki = to_address(Lki.non_zero_values_data()) + std::distance(to_address(Lki.non_zero_indices2_data()), cki);
420 
421   SPComplexType im(0.0, 1.0);
422   SPValueType half(0.5);
423   int c_origin = vn.global_origin()[1];
424   while (cik != cik_end && cki != cki_end)
425   {
426     if (*cik == *cki)
427     { // both Lik and Lki have components on Chol Vec *cik==*cki
428       if (abs(*vik + ma::conj(*vki)) > cut)
429       { // Lik + Lki* and Lki + Lik*
430         if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
431         {
432           assert(map_[2 * (*cik)] >= 0);
433           assert(map_[2 * (*cik)] - c_origin < vn.size(1));
434           vn.emplace_back({ik, (map_[2 * (*cik)] - c_origin)},
435                           static_cast<SPValueType>(half * (*vik + ma::conj(*vki)))); // Lik + Lki*
436           vn.emplace_back({ki, (map_[2 * (*cki)] - c_origin)},
437                           static_cast<SPValueType>(half * (*vki + ma::conj(*vik)))); // Lki + Lik*
438         }
439       }
440 #if defined(QMC_COMPLEX)
441       if (abs(*vik - ma::conj(*vki)) > cut)
442       { // Lik - Lki* and Lki - Lik*
443         if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
444         {
445           assert(map_[2 * (*cik) + 1] >= 0);
446           assert(map_[2 * (*cik) + 1] - c_origin < vn.size(1));
447           vn.emplace_back({ik, (map_[2 * (*cik) + 1] - c_origin)},
448                           static_cast<SPValueType>(half * im * (*vik - ma::conj(*vki)))); // Lik - Lki*
449           vn.emplace_back({ki, (map_[2 * (*cki) + 1] - c_origin)},
450                           static_cast<SPValueType>(half * im * (*vki - ma::conj(*vik)))); // Lki - Lik*
451         }
452       }
453 #endif
454       ++cik;
455       ++vik;
456       ++cki;
457       ++vki;
458     }
459     else if (*cik < *cki)
460     { // not on the same chol vector, only operate on the smallest
461       if (abs(*vik) > cut)
462       {
463         if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
464         {
465           assert(map_[2 * (*cik)] >= 0);
466           assert(map_[2 * (*cik)] - c_origin < vn.size(1));
467           vn.emplace_back({ik, (map_[2 * (*cik)] - c_origin)},
468                           static_cast<SPValueType>(half * (*vik))); // Lik + 0
469           vn.emplace_back({ki, (map_[2 * (*cik)] - c_origin)},
470                           static_cast<SPValueType>(half * ma::conj(*vik))); // Lik + 0
471         }
472 #if defined(QMC_COMPLEX)
473         if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
474         {
475           assert(map_[2 * (*cik) + 1] >= 0);
476           assert(map_[2 * (*cik) + 1] - c_origin < vn.size(1));
477           vn.emplace_back({ik, (map_[2 * (*cik) + 1] - c_origin)},
478                           static_cast<SPValueType>(half * im * (*vik))); // Lik - 0
479           vn.emplace_back({ki, (map_[2 * (*cik) + 1] - c_origin)},
480                           static_cast<SPValueType>(-half * im * ma::conj(*vik))); // Lik - 0
481         }
482 #endif
483       }
484       ++cik;
485       ++vik;
486     }
487     else
488     {
489       if (abs(*vki) > cut)
490       {
491         if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
492         {
493           assert(map_[2 * (*cki)] >= 0);
494           assert(map_[2 * (*cki)] - c_origin < vn.size(1));
495           vn.emplace_back({ik, (map_[2 * (*cki)] - c_origin)},
496                           static_cast<SPValueType>(half * ma::conj(*vki))); // Lki + 0
497           vn.emplace_back({ki, (map_[2 * (*cki)] - c_origin)},
498                           static_cast<SPValueType>(half * (*vki))); // Lki + 0
499         }
500 #if defined(QMC_COMPLEX)
501         if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
502         {
503           assert(map_[2 * (*cki) + 1] >= 0);
504           assert(map_[2 * (*cki) + 1] - c_origin < vn.size(1));
505           vn.emplace_back({ik, (map_[2 * (*cki) + 1] - c_origin)},
506                           static_cast<SPValueType>(-half * im * ma::conj(*vki))); // Lki - 0
507           vn.emplace_back({ki, (map_[2 * (*cki) + 1] - c_origin)},
508                           static_cast<SPValueType>(half * im * (*vki))); // Lki - 0
509         }
510 #endif
511       }
512       ++cki;
513       ++vki;
514     }
515   }
516   while (cik != cik_end)
517   {
518     if (abs(*vik) > cut)
519     {
520       if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
521       {
522         assert(map_[2 * (*cik)] >= 0);
523         assert(map_[2 * (*cik)] - c_origin < vn.size(1));
524         vn.emplace_back({ik, (map_[2 * (*cik)] - c_origin)},
525                         static_cast<SPValueType>(half * (*vik))); // Lik + 0
526         vn.emplace_back({ki, (map_[2 * (*cik)] - c_origin)},
527                         static_cast<SPValueType>(half * ma::conj(*vik))); // Lik + 0
528       }
529 #if defined(QMC_COMPLEX)
530       if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
531       {
532         assert(map_[2 * (*cik) + 1] >= 0);
533         assert(map_[2 * (*cik) + 1] - c_origin < vn.size(1));
534         vn.emplace_back({ik, (map_[2 * (*cik) + 1] - c_origin)},
535                         static_cast<SPValueType>(half * im * (*vik))); // Lik - 0
536         vn.emplace_back({ki, (map_[2 * (*cik) + 1] - c_origin)},
537                         static_cast<SPValueType>(-half * im * ma::conj(*vik))); // Lik - 0
538       }
539 #endif
540     }
541     ++cik;
542     ++vik;
543   }
544   while (cki != cki_end)
545   {
546     if (abs(*vki) > cut)
547     {
548       if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
549       {
550         assert(map_[2 * (*cki)] >= 0);
551         assert(map_[2 * (*cki)] - c_origin < vn.size(1));
552         vn.emplace_back({ik, (map_[2 * (*cki)] - c_origin)},
553                         static_cast<SPValueType>(half * ma::conj(*vki))); // Lki + 0
554         vn.emplace_back({ki, (map_[2 * (*cki)] - c_origin)},
555                         static_cast<SPValueType>(half * (*vki))); // Lki + 0
556       }
557 #if defined(QMC_COMPLEX)
558       if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
559       {
560         assert(map_[2 * (*cki) + 1] >= 0);
561         assert(map_[2 * (*cki) + 1] - c_origin < vn.size(1));
562         vn.emplace_back({ik, (map_[2 * (*cki) + 1] - c_origin)},
563                         static_cast<SPValueType>(-half * im * ma::conj(*vki))); // Lki - 0
564         vn.emplace_back({ki, (map_[2 * (*cki) + 1] - c_origin)},
565                         static_cast<SPValueType>(half * im * (*vki))); // Lki - 0
566       }
567 #endif
568     }
569     ++cki;
570     ++vki;
571   }
572 }
573 
574 } // namespace
575 
count_nnz_per_cholvec(double cut,TaskGroup_ & TG,SpVType_shm_csr_matrix & V2,int NMO)576 std::vector<std::size_t> count_nnz_per_cholvec(double cut, TaskGroup_& TG, SpVType_shm_csr_matrix& V2, int NMO)
577 {
578   if (TG.getNumberOfTGs() > 1)
579     APP_ABORT("Error: count_nnz_per_cholvec is not designed for distributed CholMat. \n");
580 
581   if (V2.size(0) != NMO * NMO)
582     APP_ABORT(" Error in count_nnz_per_cholvec: V2.size(0) ! NMO*NMO \n");
583   int ik0, ikN;
584   // only upper triangular part since both ik/ki are processed simultaneously
585   std::tie(ik0, ikN) = FairDivideBoundary(TG.getGlobalRank(), int(NMO * (NMO + 1) / 2), TG.getGlobalSize());
586 
587   if (cut < 1e-12)
588     cut = 1e-12;
589   std::vector<std::size_t> counts(2 * V2.size(1));
590   int nvecs = V2.size(1);
591 
592   for (int i = 0, cnt = 0; i < NMO; i++)
593     for (int k = i; k < NMO; k++, cnt++)
594     {
595       if (cnt < ik0)
596         continue;
597       if (cnt >= ikN)
598         break;
599       if (i == k)
600         count_over_cholvec(cut, counts, 0, nvecs, V2[i * NMO + k]);
601       else
602         count_over_cholvec(cut, counts, 0, nvecs, V2[i * NMO + k], V2[k * NMO + i]);
603     }
604   TG.Global().all_reduce_in_place_n(counts.begin(), counts.size(), std::plus<>());
605 
606   return counts;
607 }
608 
count_nnz_per_ik(double cut,TaskGroup_ & TG,SpVType_shm_csr_matrix & V2,int NMO,int cv0,int cvN)609 std::vector<std::size_t> count_nnz_per_ik(double cut,
610                                           TaskGroup_& TG,
611                                           SpVType_shm_csr_matrix& V2,
612                                           int NMO,
613                                           int cv0,
614                                           int cvN)
615 {
616   assert(cv0 >= 0 && cvN <= 2 * V2.size(1));
617   if (TG.getNumberOfTGs() > 1)
618     APP_ABORT("Error: count_nnz_per_ik is not designed for distributed CholMat. \n");
619 
620   int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
621 
622   if (V2.size(0) != NMO * NMO)
623     APP_ABORT(" Error in count_nnz_per_cholvec: V2.size(0) ! NMO*NMO \n");
624   int ik0, ikN;
625   // only upper triangular part since both ik/ki are processed simultaneously
626   // it is possible to ssubdivide this over equivalent nodes and then reduce over them
627   std::tie(ik0, ikN) = FairDivideBoundary(coreid, int(NMO * (NMO + 1) / 2), ncores);
628 
629   if (cut < 1e-12)
630     cut = 1e-12;
631   std::vector<std::size_t> counts(V2.size(0));
632 
633   for (int i = 0, cnt = 0; i < NMO; i++)
634     for (int k = i; k < NMO; k++, cnt++)
635     {
636       if (cnt < ik0)
637         continue;
638       if (cnt >= ikN)
639         break;
640       if (i == k)
641         count_nnz(cut, counts[i * NMO + k], cv0, cvN, V2[i * NMO + k]);
642       else
643         count_nnz(cut, counts[i * NMO + k], counts[k * NMO + i], cv0, cvN, V2[i * NMO + k], V2[k * NMO + i]);
644     }
645   TG.Node().all_reduce_in_place_n(counts.begin(), counts.size(), std::plus<>());
646   // if divided over equivalent nodes, reduce over the Core_Equivalent communicator
647   //TG.EqvCore().all_reduce_in_place_n(counts.begin(),counts.size(),std::plus<>());
648 
649   return counts;
650 }
651 
generateHSPotential(SpVType_shm_csr_matrix & vn,std::vector<int> const & map_,double cut,TaskGroup_ & TG,SpVType_shm_csr_matrix & V2,int NMO,int cv0,int cvN)652 void generateHSPotential(SpVType_shm_csr_matrix& vn,
653                          std::vector<int> const& map_,
654                          double cut,
655                          TaskGroup_& TG,
656                          SpVType_shm_csr_matrix& V2,
657                          int NMO,
658                          int cv0,
659                          int cvN)
660 {
661   assert(cv0 >= 0 && cvN <= 2 * V2.size(1));
662   if (TG.getNumberOfTGs() > 1)
663     APP_ABORT("Error: generateHSPotential is not designed for distributed CholMat. \n");
664 
665   int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
666 
667   if (V2.size(0) != NMO * NMO)
668     APP_ABORT(" Error in generateHSPotential: V2.size(0) ! NMO*NMO \n");
669   int ik0, ikN;
670   // only upper triangular part since both ik/ki are processed simultaneously
671   // it is possible to ssubdivide this over equivalent nodes and then reduce over them
672   std::tie(ik0, ikN) = FairDivideBoundary(coreid, int(NMO * (NMO + 1) / 2), ncores);
673 
674   if (cut < 1e-12)
675     cut = 1e-12;
676 
677   for (int i = 0, cnt = 0; i < NMO; i++)
678     for (int k = i; k < NMO; k++, cnt++)
679     {
680       if (cnt < ik0)
681         continue;
682       if (cnt >= ikN)
683         break;
684       if (i == k)
685         add_to_vn(vn, map_, cut, i * NMO + k, cv0, cvN, V2[i * NMO + k]);
686       else
687         add_to_vn(vn, map_, cut, i * NMO + k, k * NMO + i, cv0, cvN, V2[i * NMO + k], V2[k * NMO + i]);
688     }
689   // if distributed over equivalent nodes, perform communication step here
690 
691   TG.node_barrier();
692 }
693 
694 } // namespace HamHelper
695 
696 } // namespace afqmc
697 
698 } // namespace qmcplusplus
699