1 //////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source
3 // License. See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 // Miguel A. Morales, moralessilva2@llnl.gov
9 // Lawrence Livermore National Laboratory
10 //
11 // File created by:
12 // Miguel A. Morales, moralessilva2@llnl.gov
13 // Lawrence Livermore National Laboratory
14 ////////////////////////////////////////////////////////////////////////////////
15
16 #ifndef QMCPLUSPLUS_AFQMC_ROTATEHAMILTONIAN_HELPER2_H
17 #define QMCPLUSPLUS_AFQMC_ROTATEHAMILTONIAN_HELPER2_H
18
19 #include <vector>
20 #include <tuple>
21 #include <mpi.h>
22
23 #include "Utilities/FairDivide.h"
24 #include "AFQMC/config.h"
25 #include "AFQMC/Numerics/ma_operations.hpp"
26 #include "AFQMC/Utilities/taskgroup.h"
27 #include "AFQMC/Matrix/csr_matrix.hpp"
28 #include "AFQMC/Utilities/afqmc_TTI.hpp"
29
30 namespace qmcplusplus
31 {
32 namespace afqmc
33 {
34 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
35 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
36 template<class MatQk, class MatRl, class MatTa>
count_Qk_x_Rl(WALKER_TYPES walker_type,SPComplexType EJX,TaskGroup_ & TG,std::vector<std::size_t> & sz,int k0,int kN,int l0,int lN,int NMO,int NAEA,int NAEB,MatQk const & Qk,MatRl const & Rl,MatTa && Ta,const SPRealType cut)37 inline void count_Qk_x_Rl(WALKER_TYPES walker_type,
38 SPComplexType EJX,
39 TaskGroup_& TG,
40 std::vector<std::size_t>& sz,
41 int k0,
42 int kN,
43 int l0,
44 int lN,
45 int NMO,
46 int NAEA,
47 int NAEB,
48 MatQk const& Qk,
49 MatRl const& Rl,
50 MatTa&& Ta,
51 const SPRealType cut)
52 {
53 using Type = typename std::decay<MatTa>::type::element;
54 assert(Qk.size(0) == Ta.size(0));
55 assert(Qk.size(1) == Rl.size(0));
56 assert(Rl.size(1) == Rl.size(1));
57 int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
58
59 bool amIAlpha = true;
60 if (l0 >= NMO && lN >= NMO)
61 amIAlpha = false;
62
63 int bl0 = -1, blN = -1;
64 int nwork = std::min(int(Rl.size(1)), ncores);
65 if (coreid < nwork)
66 std::tie(bl0, blN) = FairDivideBoundary(coreid, int(Rl.size(1)), nwork);
67 int ka0 = -1, kaN = -1;
68 nwork = std::min(int(Qk.size(0)), ncores);
69 if (coreid < nwork)
70 std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(Qk.size(0)), nwork);
71
72 Type four(4.0);
73 Type two(2.0);
74 if (walker_type == CLOSED)
75 {
76 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
77 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
78 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
79 TG.node_barrier();
80 for (int ka = ka0; ka < kaN; ka++)
81 { // ka = local range index
82 int k = ka / NAEA + k0; // global index
83 int a = ka % NAEA; // global index
84 for (int b = a; b < NAEA; b++)
85 {
86 for (int l = l0; l < lN; l++)
87 {
88 int la = (l - l0) * NAEA + a;
89 int lb = (l - l0) * NAEA + b;
90 int kb = (k - k0) * NAEA + b;
91 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
92 Type qlakb = Ta[kb][la]; // Ta(kb,la)
93 if (std::abs(EJX * four * qkalb - two * qlakb) > cut)
94 if (a != b || k <= l)
95 ++sz[a * NMO + k];
96 }
97 }
98 }
99 }
100 else if (walker_type == COLLINEAR)
101 {
102 if (k0 < NMO && (kN - 1) < NMO)
103 {
104 // k is alpha
105
106 if (amIAlpha)
107 {
108 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
109 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
110 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
111 TG.node_barrier();
112 for (int k = k0, ka = 0; k < kN; k++)
113 {
114 for (int a = 0; a < NAEA; a++, ka++)
115 {
116 for (int lb = bl0; lb < blN; lb++)
117 { // lb = (l-l0)*NAEA+b
118 int b = lb % NAEA;
119 if (b < a)
120 continue;
121 int l = lb / NAEA + l0;
122 int la = (l - l0) * NAEA + a;
123 int kb = (k - k0) * NAEA + b;
124 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
125 Type qlakb = Ta[kb][la]; // Ta(kb,la)
126 if (std::abs(EJX * qkalb - qlakb) > cut)
127 if (a != b || k <= l)
128 ++sz[a * NMO + k];
129 }
130 }
131 }
132 }
133 else if (std::abs(EJX) > 1e-8)
134 {
135 // <a,b | k,l> = Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
136 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
137 TG.node_barrier();
138 for (int k = k0, ka = 0; k < kN; k++)
139 {
140 for (int a = 0; a < NAEA; a++, ka++)
141 {
142 for (int lb = bl0; lb < blN; lb++)
143 { // lb = (l-l0)*NAEB+b
144 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
145 if (std::abs(EJX * qkalb) > cut)
146 ++sz[a * NMO + k];
147 }
148 }
149 }
150 }
151 }
152 else if (k0 >= NMO && kN >= NMO)
153 {
154 // k is beta
155 if (!amIAlpha)
156 {
157 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
158 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
159 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
160 TG.node_barrier();
161 for (int k = k0, ka = 0; k < kN; k++)
162 {
163 for (int a = 0; a < NAEB; a++, ka++)
164 {
165 for (int lb = bl0; lb < blN; lb++)
166 { // lb = (l-l0)*NAEB+b
167 int b = lb % NAEB;
168 if (b < a)
169 continue;
170 int l = lb / NAEB + l0;
171 int la = (l - l0) * NAEB + a;
172 int kb = (k - k0) * NAEB + b;
173 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
174 Type qlakb = Ta[kb][la]; // Ta(kb,la)
175 if (std::abs(EJX * qkalb - qlakb) > cut)
176 if (a != b || k <= l)
177 ++sz[NAEA * NMO + a * NMO + k - NMO];
178 }
179 }
180 }
181 }
182 }
183 else
184 {
185 APP_ABORT(" Error: This should not happen. \n\n\n");
186 }
187 }
188 else if (walker_type == NONCOLLINEAR)
189 {
190 APP_ABORT(" Error in count_Qk_x_Rl: GHF not implemented. \n\n\n");
191 }
192 }
193
194 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
195 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
196 template<class MatQk, class MatRl, class MatTa, class Container>
Qk_x_Rl(WALKER_TYPES walker_type,SPComplexType EJX,TaskGroup_ & TG,int k0,int kN,int l0,int lN,int NMO,int NAEA,int NAEB,MatQk const & Qk,MatRl const & Rl,MatTa && Ta,Container & Vijkl,const SPRealType cut)197 inline void Qk_x_Rl(WALKER_TYPES walker_type,
198 SPComplexType EJX,
199 TaskGroup_& TG,
200 int k0,
201 int kN,
202 int l0,
203 int lN,
204 int NMO,
205 int NAEA,
206 int NAEB,
207 MatQk const& Qk,
208 MatRl const& Rl,
209 MatTa&& Ta,
210 Container& Vijkl,
211 const SPRealType cut)
212 {
213 using Type = typename std::decay<MatTa>::type::element;
214 assert(Qk.size(0) == Ta.size(0));
215 assert(Qk.size(1) == Rl.size(0));
216 assert(Rl.size(1) == Rl.size(1));
217 int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
218
219 bool amIAlpha = true;
220 if (l0 >= NMO && lN >= NMO)
221 amIAlpha = false;
222
223 int bl0 = -1, blN = -1;
224 int ka0 = -1, kaN = -1;
225 int nwork = std::min(int(Rl.size(1)), ncores);
226 if (coreid < nwork)
227 std::tie(bl0, blN) = FairDivideBoundary(coreid, int(Rl.size(1)), nwork);
228 nwork = std::min(int(Qk.size(0)), ncores);
229 if (coreid < nwork)
230 std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(Qk.size(0)), nwork);
231
232 Type four(4.0);
233 Type two(2.0);
234 if (walker_type == CLOSED)
235 {
236 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
237 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
238 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
239 TG.node_barrier();
240 for (int ka = ka0; ka < kaN; ka++)
241 { // ka = local range index
242 int k = ka / NAEA + k0; // global index
243 int a = ka % NAEA; // global index
244 for (int b = a; b < NAEA; b++)
245 {
246 for (int l = l0; l < lN; l++)
247 {
248 int la = (l - l0) * NAEA + a;
249 int lb = (l - l0) * NAEA + b;
250 int kb = (k - k0) * NAEA + b;
251 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
252 Type qlakb = Ta[kb][la]; // Ta(kb,la)
253 if (std::abs(EJX * four * qkalb - two * qlakb) > cut)
254 {
255 if (a != b || k < l)
256 {
257 emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, two * (EJX * four * qkalb - two * qlakb)));
258 }
259 else if (k == l)
260 {
261 emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, (EJX * four * qkalb - two * qlakb)));
262 }
263 }
264 }
265 }
266 }
267 }
268 else if (walker_type == COLLINEAR)
269 {
270 if (k0 < NMO && (kN - 1) < NMO)
271 {
272 // k is alpha
273
274 if (amIAlpha)
275 {
276 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
277 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
278 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
279 TG.node_barrier();
280 for (int ka = ka0; ka < kaN; ka++)
281 { // ka = local range index
282 int k = ka / NAEA + k0; // global index
283 int a = ka % NAEA; // global index
284 for (int b = a; b < NAEA; b++)
285 {
286 for (int l = l0; l < lN; l++)
287 {
288 int la = (l - l0) * NAEA + a;
289 int lb = (l - l0) * NAEA + b;
290 int kb = (k - k0) * NAEA + b;
291 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
292 Type qlakb = Ta[kb][la]; // Ta(kb,la)
293 if (std::abs(EJX * qkalb - qlakb) > cut)
294 {
295 if (a != b || k < l)
296 {
297 emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, two * (EJX * qkalb - qlakb)));
298 }
299 else if (k == l)
300 {
301 emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, EJX * qkalb - qlakb));
302 }
303 }
304 }
305 }
306 }
307 }
308 else if (std::abs(EJX) > 1e-8)
309 {
310 // <a,b | k,l> = Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
311 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
312 TG.node_barrier();
313 for (int ka = ka0; ka < kaN; ka++)
314 { // ka = local range index
315 int k = ka / NAEA + k0; // global index
316 int a = ka % NAEA; // global index
317 for (int b = 0; b < NAEB; b++)
318 {
319 for (int l = l0; l < lN; l++)
320 {
321 int lb = (l - l0) * NAEB + b;
322 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
323 if (std::abs(EJX * qkalb) > cut)
324 emplace(Vijkl, std::forward_as_tuple(a * NMO + k, NMO * NAEA + b * NMO + l - NMO, two * (EJX * qkalb)));
325 }
326 }
327 }
328 }
329 }
330 else if (k0 >= NMO && kN >= NMO)
331 {
332 // k is beta
333 if (!amIAlpha)
334 {
335 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
336 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
337 ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
338 TG.node_barrier();
339 for (int ka = ka0; ka < kaN; ka++)
340 { // ka = local range index
341 int k = ka / NAEB + k0; // global index
342 int a = ka % NAEB; // global index
343 for (int b = a; b < NAEB; b++)
344 {
345 for (int l = l0; l < lN; l++)
346 {
347 int la = (l - l0) * NAEB + a;
348 int lb = (l - l0) * NAEB + b;
349 int kb = (k - k0) * NAEB + b;
350 Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
351 Type qlakb = Ta[kb][la]; // Ta(kb,la)
352 if (std::abs(EJX * qkalb - qlakb) > cut)
353 {
354 if (a != b || k < l)
355 {
356 emplace(Vijkl,
357 std::forward_as_tuple(NMO * NAEA + a * NMO + k - NMO, NMO * NAEA + b * NMO + l - NMO,
358 two * (EJX * qkalb - qlakb)));
359 }
360 else if (k == l)
361 {
362 emplace(Vijkl,
363 std::forward_as_tuple(NMO * NAEA + a * NMO + k - NMO, NMO * NAEA + b * NMO + l - NMO,
364 EJX * qkalb - qlakb));
365 }
366 }
367 }
368 }
369 }
370 }
371 }
372 else
373 {
374 APP_ABORT(" Error: This should not happen. \n\n\n");
375 }
376 }
377 else if (walker_type == NONCOLLINEAR)
378 {
379 APP_ABORT(" Error in createHamiltonianForGeneralDeterminant: GHF not implemented. \n\n\n");
380 }
381 }
382
383 } // namespace afqmc
384
385 } // namespace qmcplusplus
386
387 #endif
388