1 #include <cstdlib>
2 #include <algorithm>
3 #include <complex>
4 #include <iostream>
5 #include <vector>
6 #include <numeric>
7 #if defined(USE_MPI)
8 #include <mpi.h>
9 #endif
10
11 #include "Configuration.h"
12
13 #include "AFQMC/config.h"
14 #include "HSPotential_Helpers.h"
15 #include "Utilities/FairDivide.h"
16 #include "AFQMC/Utilities/taskgroup.h"
17 #include "AFQMC/Matrix/csr_matrix.hpp"
18 #include "AFQMC/Numerics/ma_operations.hpp"
19 #include "AFQMC/Numerics/csr_blas.hpp"
20 #include "AFQMC/Numerics/detail/utilities.hpp"
21
22 /********************************************************************
23 * You get 2 potentials per Cholesky std::vector
24 *
25 * vn(+-)_{i,k} = 0.5*( L^n_{i,k} +- ma::conj(L^n_{k,i}) )
26 ********************************************************************/
27
28 namespace qmcplusplus
29 {
30 namespace afqmc
31 {
32 namespace HamHelper
33 {
34 namespace
35 {
36 //inline double const& ma::conj(double const& d){return d;}
37 //inline float const& ma::conj(float const& f){return f;}
38
count_over_cholvec(double cut,std::vector<std::size_t> & count,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lik,SpVType_shm_csr_matrix::reference const & Lki)39 void count_over_cholvec(double cut,
40 std::vector<std::size_t>& count,
41 int c0,
42 int c1,
43 SpVType_shm_csr_matrix::reference const& Lik,
44 SpVType_shm_csr_matrix::reference const& Lki)
45 {
46 assert(c1 >= c0);
47 if (c0 == c1)
48 return;
49 auto cik = std::lower_bound(to_address(Lik.non_zero_indices2_data()),
50 to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c0);
51 auto cik_end = std::lower_bound(cik, to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c1);
52 auto vik = to_address(Lik.non_zero_values_data()) + std::distance(to_address(Lik.non_zero_indices2_data()), cik);
53
54 auto cki = std::lower_bound(to_address(Lki.non_zero_indices2_data()),
55 to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c0);
56 auto cki_end = std::lower_bound(cki, to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c1);
57 auto vki = to_address(Lki.non_zero_values_data()) + std::distance(to_address(Lki.non_zero_indices2_data()), cki);
58
59 // ignoring factor of 0.5 to keep it consistent with the old code for now
60 using ma::conj;
61 using std::abs;
62 using std::size_t;
63 while (cik != cik_end && cki != cki_end)
64 {
65 if (*cik == *cki)
66 { // both Lik and Lki have components on Chol Vec *cik==*cki
67 if (abs(*vik + ma::conj(*vki)) > cut)
68 count[2 * (*cik)] += size_t(2); // Lik + Lki* and Lki + Lik*
69 if (abs(*vik - ma::conj(*vki)) > cut)
70 count[2 * (*cik) + 1] += size_t(2); // Lik - Lki* and Lki - Lik*
71 ++cik;
72 ++vik;
73 ++cki;
74 ++vki;
75 }
76 else if (*cik < *cki)
77 { // not on the same chol vector, only operate on the smallest
78 if (abs(*vik) > cut)
79 {
80 count[2 * (*cik)] += size_t(2); // Lik + 0
81 #if defined(QMC_COMPLEX)
82 count[2 * (*cik) + 1] += size_t(2); // Lik - 0
83 #endif
84 }
85 ++cik;
86 ++vik;
87 }
88 else
89 {
90 if (abs(*vki) > cut)
91 {
92 count[2 * (*cki)] += size_t(2); // Lki + 0
93 #if defined(QMC_COMPLEX)
94 count[2 * (*cki) + 1] += size_t(2); // Lki - 0
95 #endif
96 }
97 ++cki;
98 ++vki;
99 }
100 }
101 // either cik or cki are at end, check if any terms are missing
102 while (cik != cik_end)
103 {
104 if (abs(*vik) > cut)
105 {
106 count[2 * (*cik)] += size_t(2); // Lik + 0
107 #if defined(QMC_COMPLEX)
108 count[2 * (*cik) + 1] += size_t(2); // Lik - 0
109 #endif
110 }
111 ++cik;
112 ++vik;
113 }
114 while (cki != cki_end)
115 {
116 if (abs(*vki) > cut)
117 {
118 count[2 * (*cki)] += size_t(2); // Lki + 0
119 #if defined(QMC_COMPLEX)
120 count[2 * (*cki) + 1] += size_t(2); // Lki - 0
121 #endif
122 }
123 ++cki;
124 ++vki;
125 }
126 }
127
count_over_cholvec(double cut,std::vector<std::size_t> & count,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lii)128 void count_over_cholvec(double cut,
129 std::vector<std::size_t>& count,
130 int c0,
131 int c1,
132 SpVType_shm_csr_matrix::reference const& Lii)
133 {
134 assert(c1 >= c0);
135 if (c0 == c1)
136 return;
137 auto ci = std::lower_bound(to_address(Lii.non_zero_indices2_data()),
138 to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c0);
139 auto ci_end = std::lower_bound(ci, to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c1);
140 auto vi = to_address(Lii.non_zero_values_data()) + std::distance(to_address(Lii.non_zero_indices2_data()), ci);
141
142 // ignoring factor of 0.5 to keep it consistent with the old code for now
143 using ma::conj;
144 using std::abs;
145 using std::size_t;
146 while (ci != ci_end)
147 {
148 if (abs(*vi + ma::conj(*vi)) > cut)
149 ++count[2 * (*ci)]; // Lii + Lii*
150 #if defined(QMC_COMPLEX)
151 if (abs(*vi - ma::conj(*vi)) > cut)
152 ++count[2 * (*ci) + 1]; // Lii - Lii*
153 #endif
154 ++ci;
155 ++vi;
156 }
157 }
158
159 // In this case, c0/c1 refer to the ranges in the expanded list of CVs, e.g. 2*n/2*n+1
count_nnz(double cut,std::size_t & nik,std::size_t & nki,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lik,SpVType_shm_csr_matrix::reference const & Lki)160 void count_nnz(double cut,
161 std::size_t& nik,
162 std::size_t& nki,
163 int c0,
164 int c1,
165 SpVType_shm_csr_matrix::reference const& Lik,
166 SpVType_shm_csr_matrix::reference const& Lki)
167 {
168 using ma::conj;
169 using std::abs;
170 using std::size_t;
171 assert(c1 >= c0);
172 nik = nki = size_t(0);
173 if (c0 == c1)
174 return;
175 auto cik = std::lower_bound(to_address(Lik.non_zero_indices2_data()),
176 to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c0 / 2);
177 auto cik_end =
178 std::lower_bound(cik, to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), (c1 + 1) / 2);
179 auto vik = to_address(Lik.non_zero_values_data()) + std::distance(to_address(Lik.non_zero_indices2_data()), cik);
180
181 auto cki = std::lower_bound(to_address(Lki.non_zero_indices2_data()),
182 to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c0 / 2);
183 auto cki_end =
184 std::lower_bound(cki, to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), (c1 + 1) / 2);
185 auto vki = to_address(Lki.non_zero_values_data()) + std::distance(to_address(Lki.non_zero_indices2_data()), cki);
186
187 // ignoring factor of 0.5 to keep it consistent with the old code for now
188 while (cik != cik_end && cki != cki_end)
189 {
190 if (*cik == *cki)
191 { // both Lik and Lki have components on Chol Vec *cik==*cki
192 if (abs(*vik + ma::conj(*vki)) > cut && // Lik + Lki* and Lki + Lik*
193 2 * (*cik) >= c0 && 2 * (*cik) < c1)
194 {
195 nik++;
196 nki++;
197 }
198 #if defined(QMC_COMPLEX)
199 if (abs(*vik - ma::conj(*vki)) > cut && // Lik - Lki* and Lki - Lik*
200 2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
201 {
202 nik++;
203 nki++;
204 }
205 #endif
206 ++cik;
207 ++vik;
208 ++cki;
209 ++vki;
210 }
211 else if (*cik < *cki)
212 { // not on the same chol vector, only operate on the smallest
213 #if defined(QMC_COMPLEX)
214 if (abs(*vik) > cut)
215 {
216 if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
217 {
218 ++nik;
219 ++nki;
220 }
221 if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
222 {
223 ++nik;
224 ++nki;
225 }
226 }
227 #else
228 if (abs(*vik) > cut && 2 * (*cik) >= c0 && 2 * (*cik) < c1)
229 {
230 ++nik;
231 ++nki;
232 }
233 #endif
234 ++cik;
235 ++vik;
236 }
237 else
238 {
239 #if defined(QMC_COMPLEX)
240 if (abs(*vki) > cut)
241 {
242 if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
243 {
244 ++nik;
245 ++nki;
246 }
247 if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
248 {
249 ++nik;
250 ++nki;
251 }
252 }
253 #else
254 if (abs(*vki) > cut && 2 * (*cki) >= c0 && 2 * (*cki) < c1)
255 {
256 ++nik;
257 ++nki;
258 }
259 #endif
260 ++cki;
261 ++vki;
262 }
263 }
264 while (cik != cik_end)
265 {
266 #if defined(QMC_COMPLEX)
267 if (abs(*vik) > cut)
268 {
269 if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
270 {
271 ++nik;
272 ++nki;
273 }
274 if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
275 {
276 ++nik;
277 ++nki;
278 }
279 }
280 #else
281 if (abs(*vik) > cut && 2 * (*cik) >= c0 && 2 * (*cik) < c1)
282 {
283 ++nik;
284 ++nki;
285 }
286 #endif
287 ++cik;
288 ++vik;
289 }
290 while (cki != cki_end)
291 {
292 #if defined(QMC_COMPLEX)
293 if (abs(*vki) > cut)
294 {
295 if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
296 {
297 ++nik;
298 ++nki;
299 }
300 if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
301 {
302 ++nik;
303 ++nki;
304 }
305 }
306 #else
307 if (abs(*vki) > cut && 2 * (*cki) >= c0 && 2 * (*cki) < c1)
308 {
309 ++nik;
310 ++nki;
311 }
312 #endif
313 ++cki;
314 ++vki;
315 }
316 }
317
count_nnz(double cut,size_t & ni,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lii)318 void count_nnz(double cut, size_t& ni, int c0, int c1, SpVType_shm_csr_matrix::reference const& Lii)
319 {
320 using ma::conj;
321 using std::abs;
322 using std::size_t;
323 assert(c1 >= c0);
324 ni = size_t(0);
325 if (c0 == c1)
326 return;
327 auto ci = std::lower_bound(to_address(Lii.non_zero_indices2_data()),
328 to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c0 / 2);
329 auto ci_end =
330 std::lower_bound(ci, to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), (c1 + 1) / 2);
331 auto vi = to_address(Lii.non_zero_values_data()) + std::distance(to_address(Lii.non_zero_indices2_data()), ci);
332
333 // ignoring factor of 0.5 to keep it consistent with the old code for now
334 while (ci != ci_end)
335 {
336 if (abs(*vi + ma::conj(*vi)) > cut && 2 * (*ci) >= c0 && 2 * (*ci) < c1)
337 ++ni; // Lii + Lii*
338 #if defined(QMC_COMPLEX)
339 if (abs(*vi - ma::conj(*vi)) > cut && 2 * (*ci) + 1 >= c0 && 2 * (*ci) + 1 < c1)
340 ++ni; // Lii - Lii*
341 #endif
342 ++ci;
343 ++vi;
344 }
345 }
346
add_to_vn(SpVType_shm_csr_matrix & vn,std::vector<int> const & map_,double cut,int ik,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lii)347 void add_to_vn(SpVType_shm_csr_matrix& vn,
348 std::vector<int> const& map_,
349 double cut,
350 int ik,
351 int c0,
352 int c1,
353 SpVType_shm_csr_matrix::reference const& Lii)
354 {
355 using ma::conj;
356 using std::abs;
357 using std::size_t;
358 assert(c1 >= c0);
359 if (c0 == c1)
360 return;
361 auto ci = std::lower_bound(to_address(Lii.non_zero_indices2_data()),
362 to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), c0 / 2);
363 auto ci_end =
364 std::lower_bound(ci, to_address((Lii.non_zero_indices2_data() + Lii.num_non_zero_elements())), (c1 + 1) / 2);
365 auto vi = to_address(Lii.non_zero_values_data()) + std::distance(to_address(Lii.non_zero_indices2_data()), ci);
366
367 int c_origin = vn.global_origin()[1];
368 SPValueType half(0.5);
369 SPComplexType im(0.0, 1.0);
370 while (ci != ci_end)
371 {
372 if (abs(*vi + ma::conj(*vi)) > cut && 2 * (*ci) >= c0 && 2 * (*ci) < c1)
373 {
374 assert(map_[2 * (*ci)] >= 0);
375 assert(map_[2 * (*ci)] - c_origin < vn.size(1));
376 vn.emplace_back({ik, (map_[2 * (*ci)] - c_origin)},
377 static_cast<SPValueType>(half * (*vi + ma::conj(*vi)))); // Lii + Lii*
378 }
379 #if defined(QMC_COMPLEX)
380 if (abs(*vi - ma::conj(*vi)) > cut && 2 * (*ci) + 1 >= c0 && 2 * (*ci) + 1 < c1)
381 {
382 assert(map_[2 * (*ci) + 1] >= 0);
383 assert(map_[2 * (*ci) + 1] - c_origin < vn.size(1));
384 vn.emplace_back({ik, (map_[2 * (*ci) + 1] - c_origin)},
385 static_cast<SPValueType>(half * im * (*vi - ma::conj(*vi)))); // Lii - Lii*
386 }
387 #endif
388 ++ci;
389 ++vi;
390 }
391 }
392
add_to_vn(SpVType_shm_csr_matrix & vn,std::vector<int> const & map_,double cut,int ik,int ki,int c0,int c1,SpVType_shm_csr_matrix::reference const & Lik,SpVType_shm_csr_matrix::reference const & Lki)393 void add_to_vn(SpVType_shm_csr_matrix& vn,
394 std::vector<int> const& map_,
395 double cut,
396 int ik,
397 int ki,
398 int c0,
399 int c1,
400 SpVType_shm_csr_matrix::reference const& Lik,
401 SpVType_shm_csr_matrix::reference const& Lki)
402 {
403 using ma::conj;
404 using std::abs;
405 using std::size_t;
406 assert(c1 >= c0);
407 if (c0 == c1)
408 return;
409 auto cik = std::lower_bound(to_address(Lik.non_zero_indices2_data()),
410 to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), c0 / 2);
411 auto cik_end =
412 std::lower_bound(cik, to_address((Lik.non_zero_indices2_data() + Lik.num_non_zero_elements())), (c1 + 1) / 2);
413 auto vik = to_address(Lik.non_zero_values_data()) + std::distance(to_address(Lik.non_zero_indices2_data()), cik);
414
415 auto cki = std::lower_bound(to_address(Lki.non_zero_indices2_data()),
416 to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), c0 / 2);
417 auto cki_end =
418 std::lower_bound(cki, to_address((Lki.non_zero_indices2_data() + Lki.num_non_zero_elements())), (c1 + 1) / 2);
419 auto vki = to_address(Lki.non_zero_values_data()) + std::distance(to_address(Lki.non_zero_indices2_data()), cki);
420
421 SPComplexType im(0.0, 1.0);
422 SPValueType half(0.5);
423 int c_origin = vn.global_origin()[1];
424 while (cik != cik_end && cki != cki_end)
425 {
426 if (*cik == *cki)
427 { // both Lik and Lki have components on Chol Vec *cik==*cki
428 if (abs(*vik + ma::conj(*vki)) > cut)
429 { // Lik + Lki* and Lki + Lik*
430 if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
431 {
432 assert(map_[2 * (*cik)] >= 0);
433 assert(map_[2 * (*cik)] - c_origin < vn.size(1));
434 vn.emplace_back({ik, (map_[2 * (*cik)] - c_origin)},
435 static_cast<SPValueType>(half * (*vik + ma::conj(*vki)))); // Lik + Lki*
436 vn.emplace_back({ki, (map_[2 * (*cki)] - c_origin)},
437 static_cast<SPValueType>(half * (*vki + ma::conj(*vik)))); // Lki + Lik*
438 }
439 }
440 #if defined(QMC_COMPLEX)
441 if (abs(*vik - ma::conj(*vki)) > cut)
442 { // Lik - Lki* and Lki - Lik*
443 if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
444 {
445 assert(map_[2 * (*cik) + 1] >= 0);
446 assert(map_[2 * (*cik) + 1] - c_origin < vn.size(1));
447 vn.emplace_back({ik, (map_[2 * (*cik) + 1] - c_origin)},
448 static_cast<SPValueType>(half * im * (*vik - ma::conj(*vki)))); // Lik - Lki*
449 vn.emplace_back({ki, (map_[2 * (*cki) + 1] - c_origin)},
450 static_cast<SPValueType>(half * im * (*vki - ma::conj(*vik)))); // Lki - Lik*
451 }
452 }
453 #endif
454 ++cik;
455 ++vik;
456 ++cki;
457 ++vki;
458 }
459 else if (*cik < *cki)
460 { // not on the same chol vector, only operate on the smallest
461 if (abs(*vik) > cut)
462 {
463 if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
464 {
465 assert(map_[2 * (*cik)] >= 0);
466 assert(map_[2 * (*cik)] - c_origin < vn.size(1));
467 vn.emplace_back({ik, (map_[2 * (*cik)] - c_origin)},
468 static_cast<SPValueType>(half * (*vik))); // Lik + 0
469 vn.emplace_back({ki, (map_[2 * (*cik)] - c_origin)},
470 static_cast<SPValueType>(half * ma::conj(*vik))); // Lik + 0
471 }
472 #if defined(QMC_COMPLEX)
473 if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
474 {
475 assert(map_[2 * (*cik) + 1] >= 0);
476 assert(map_[2 * (*cik) + 1] - c_origin < vn.size(1));
477 vn.emplace_back({ik, (map_[2 * (*cik) + 1] - c_origin)},
478 static_cast<SPValueType>(half * im * (*vik))); // Lik - 0
479 vn.emplace_back({ki, (map_[2 * (*cik) + 1] - c_origin)},
480 static_cast<SPValueType>(-half * im * ma::conj(*vik))); // Lik - 0
481 }
482 #endif
483 }
484 ++cik;
485 ++vik;
486 }
487 else
488 {
489 if (abs(*vki) > cut)
490 {
491 if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
492 {
493 assert(map_[2 * (*cki)] >= 0);
494 assert(map_[2 * (*cki)] - c_origin < vn.size(1));
495 vn.emplace_back({ik, (map_[2 * (*cki)] - c_origin)},
496 static_cast<SPValueType>(half * ma::conj(*vki))); // Lki + 0
497 vn.emplace_back({ki, (map_[2 * (*cki)] - c_origin)},
498 static_cast<SPValueType>(half * (*vki))); // Lki + 0
499 }
500 #if defined(QMC_COMPLEX)
501 if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
502 {
503 assert(map_[2 * (*cki) + 1] >= 0);
504 assert(map_[2 * (*cki) + 1] - c_origin < vn.size(1));
505 vn.emplace_back({ik, (map_[2 * (*cki) + 1] - c_origin)},
506 static_cast<SPValueType>(-half * im * ma::conj(*vki))); // Lki - 0
507 vn.emplace_back({ki, (map_[2 * (*cki) + 1] - c_origin)},
508 static_cast<SPValueType>(half * im * (*vki))); // Lki - 0
509 }
510 #endif
511 }
512 ++cki;
513 ++vki;
514 }
515 }
516 while (cik != cik_end)
517 {
518 if (abs(*vik) > cut)
519 {
520 if (2 * (*cik) >= c0 && 2 * (*cik) < c1)
521 {
522 assert(map_[2 * (*cik)] >= 0);
523 assert(map_[2 * (*cik)] - c_origin < vn.size(1));
524 vn.emplace_back({ik, (map_[2 * (*cik)] - c_origin)},
525 static_cast<SPValueType>(half * (*vik))); // Lik + 0
526 vn.emplace_back({ki, (map_[2 * (*cik)] - c_origin)},
527 static_cast<SPValueType>(half * ma::conj(*vik))); // Lik + 0
528 }
529 #if defined(QMC_COMPLEX)
530 if (2 * (*cik) + 1 >= c0 && 2 * (*cik) + 1 < c1)
531 {
532 assert(map_[2 * (*cik) + 1] >= 0);
533 assert(map_[2 * (*cik) + 1] - c_origin < vn.size(1));
534 vn.emplace_back({ik, (map_[2 * (*cik) + 1] - c_origin)},
535 static_cast<SPValueType>(half * im * (*vik))); // Lik - 0
536 vn.emplace_back({ki, (map_[2 * (*cik) + 1] - c_origin)},
537 static_cast<SPValueType>(-half * im * ma::conj(*vik))); // Lik - 0
538 }
539 #endif
540 }
541 ++cik;
542 ++vik;
543 }
544 while (cki != cki_end)
545 {
546 if (abs(*vki) > cut)
547 {
548 if (2 * (*cki) >= c0 && 2 * (*cki) < c1)
549 {
550 assert(map_[2 * (*cki)] >= 0);
551 assert(map_[2 * (*cki)] - c_origin < vn.size(1));
552 vn.emplace_back({ik, (map_[2 * (*cki)] - c_origin)},
553 static_cast<SPValueType>(half * ma::conj(*vki))); // Lki + 0
554 vn.emplace_back({ki, (map_[2 * (*cki)] - c_origin)},
555 static_cast<SPValueType>(half * (*vki))); // Lki + 0
556 }
557 #if defined(QMC_COMPLEX)
558 if (2 * (*cki) + 1 >= c0 && 2 * (*cki) + 1 < c1)
559 {
560 assert(map_[2 * (*cki) + 1] >= 0);
561 assert(map_[2 * (*cki) + 1] - c_origin < vn.size(1));
562 vn.emplace_back({ik, (map_[2 * (*cki) + 1] - c_origin)},
563 static_cast<SPValueType>(-half * im * ma::conj(*vki))); // Lki - 0
564 vn.emplace_back({ki, (map_[2 * (*cki) + 1] - c_origin)},
565 static_cast<SPValueType>(half * im * (*vki))); // Lki - 0
566 }
567 #endif
568 }
569 ++cki;
570 ++vki;
571 }
572 }
573
574 } // namespace
575
count_nnz_per_cholvec(double cut,TaskGroup_ & TG,SpVType_shm_csr_matrix & V2,int NMO)576 std::vector<std::size_t> count_nnz_per_cholvec(double cut, TaskGroup_& TG, SpVType_shm_csr_matrix& V2, int NMO)
577 {
578 if (TG.getNumberOfTGs() > 1)
579 APP_ABORT("Error: count_nnz_per_cholvec is not designed for distributed CholMat. \n");
580
581 if (V2.size(0) != NMO * NMO)
582 APP_ABORT(" Error in count_nnz_per_cholvec: V2.size(0) ! NMO*NMO \n");
583 int ik0, ikN;
584 // only upper triangular part since both ik/ki are processed simultaneously
585 std::tie(ik0, ikN) = FairDivideBoundary(TG.getGlobalRank(), int(NMO * (NMO + 1) / 2), TG.getGlobalSize());
586
587 if (cut < 1e-12)
588 cut = 1e-12;
589 std::vector<std::size_t> counts(2 * V2.size(1));
590 int nvecs = V2.size(1);
591
592 for (int i = 0, cnt = 0; i < NMO; i++)
593 for (int k = i; k < NMO; k++, cnt++)
594 {
595 if (cnt < ik0)
596 continue;
597 if (cnt >= ikN)
598 break;
599 if (i == k)
600 count_over_cholvec(cut, counts, 0, nvecs, V2[i * NMO + k]);
601 else
602 count_over_cholvec(cut, counts, 0, nvecs, V2[i * NMO + k], V2[k * NMO + i]);
603 }
604 TG.Global().all_reduce_in_place_n(counts.begin(), counts.size(), std::plus<>());
605
606 return counts;
607 }
608
count_nnz_per_ik(double cut,TaskGroup_ & TG,SpVType_shm_csr_matrix & V2,int NMO,int cv0,int cvN)609 std::vector<std::size_t> count_nnz_per_ik(double cut,
610 TaskGroup_& TG,
611 SpVType_shm_csr_matrix& V2,
612 int NMO,
613 int cv0,
614 int cvN)
615 {
616 assert(cv0 >= 0 && cvN <= 2 * V2.size(1));
617 if (TG.getNumberOfTGs() > 1)
618 APP_ABORT("Error: count_nnz_per_ik is not designed for distributed CholMat. \n");
619
620 int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
621
622 if (V2.size(0) != NMO * NMO)
623 APP_ABORT(" Error in count_nnz_per_cholvec: V2.size(0) ! NMO*NMO \n");
624 int ik0, ikN;
625 // only upper triangular part since both ik/ki are processed simultaneously
626 // it is possible to ssubdivide this over equivalent nodes and then reduce over them
627 std::tie(ik0, ikN) = FairDivideBoundary(coreid, int(NMO * (NMO + 1) / 2), ncores);
628
629 if (cut < 1e-12)
630 cut = 1e-12;
631 std::vector<std::size_t> counts(V2.size(0));
632
633 for (int i = 0, cnt = 0; i < NMO; i++)
634 for (int k = i; k < NMO; k++, cnt++)
635 {
636 if (cnt < ik0)
637 continue;
638 if (cnt >= ikN)
639 break;
640 if (i == k)
641 count_nnz(cut, counts[i * NMO + k], cv0, cvN, V2[i * NMO + k]);
642 else
643 count_nnz(cut, counts[i * NMO + k], counts[k * NMO + i], cv0, cvN, V2[i * NMO + k], V2[k * NMO + i]);
644 }
645 TG.Node().all_reduce_in_place_n(counts.begin(), counts.size(), std::plus<>());
646 // if divided over equivalent nodes, reduce over the Core_Equivalent communicator
647 //TG.EqvCore().all_reduce_in_place_n(counts.begin(),counts.size(),std::plus<>());
648
649 return counts;
650 }
651
generateHSPotential(SpVType_shm_csr_matrix & vn,std::vector<int> const & map_,double cut,TaskGroup_ & TG,SpVType_shm_csr_matrix & V2,int NMO,int cv0,int cvN)652 void generateHSPotential(SpVType_shm_csr_matrix& vn,
653 std::vector<int> const& map_,
654 double cut,
655 TaskGroup_& TG,
656 SpVType_shm_csr_matrix& V2,
657 int NMO,
658 int cv0,
659 int cvN)
660 {
661 assert(cv0 >= 0 && cvN <= 2 * V2.size(1));
662 if (TG.getNumberOfTGs() > 1)
663 APP_ABORT("Error: generateHSPotential is not designed for distributed CholMat. \n");
664
665 int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
666
667 if (V2.size(0) != NMO * NMO)
668 APP_ABORT(" Error in generateHSPotential: V2.size(0) ! NMO*NMO \n");
669 int ik0, ikN;
670 // only upper triangular part since both ik/ki are processed simultaneously
671 // it is possible to ssubdivide this over equivalent nodes and then reduce over them
672 std::tie(ik0, ikN) = FairDivideBoundary(coreid, int(NMO * (NMO + 1) / 2), ncores);
673
674 if (cut < 1e-12)
675 cut = 1e-12;
676
677 for (int i = 0, cnt = 0; i < NMO; i++)
678 for (int k = i; k < NMO; k++, cnt++)
679 {
680 if (cnt < ik0)
681 continue;
682 if (cnt >= ikN)
683 break;
684 if (i == k)
685 add_to_vn(vn, map_, cut, i * NMO + k, cv0, cvN, V2[i * NMO + k]);
686 else
687 add_to_vn(vn, map_, cut, i * NMO + k, k * NMO + i, cv0, cvN, V2[i * NMO + k], V2[k * NMO + i]);
688 }
689 // if distributed over equivalent nodes, perform communication step here
690
691 TG.node_barrier();
692 }
693
694 } // namespace HamHelper
695
696 } // namespace afqmc
697
698 } // namespace qmcplusplus
699