1 //////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source
3 // License.  See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 // Miguel A. Morales, moralessilva2@llnl.gov
9 //    Lawrence Livermore National Laboratory
10 //
11 // File created by:
12 // Miguel A. Morales, moralessilva2@llnl.gov
13 //    Lawrence Livermore National Laboratory
14 ////////////////////////////////////////////////////////////////////////////////
15 
16 #ifndef QMCPLUSPLUS_AFQMC_ROTATEHAMILTONIAN_HELPER2_H
17 #define QMCPLUSPLUS_AFQMC_ROTATEHAMILTONIAN_HELPER2_H
18 
19 #include <vector>
20 #include <tuple>
21 #include <mpi.h>
22 
23 #include "Utilities/FairDivide.h"
24 #include "AFQMC/config.h"
25 #include "AFQMC/Numerics/ma_operations.hpp"
26 #include "AFQMC/Utilities/taskgroup.h"
27 #include "AFQMC/Matrix/csr_matrix.hpp"
28 #include "AFQMC/Utilities/afqmc_TTI.hpp"
29 
30 namespace qmcplusplus
31 {
32 namespace afqmc
33 {
34 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
35 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
36 template<class MatQk, class MatRl, class MatTa>
count_Qk_x_Rl(WALKER_TYPES walker_type,SPComplexType EJX,TaskGroup_ & TG,std::vector<std::size_t> & sz,int k0,int kN,int l0,int lN,int NMO,int NAEA,int NAEB,MatQk const & Qk,MatRl const & Rl,MatTa && Ta,const SPRealType cut)37 inline void count_Qk_x_Rl(WALKER_TYPES walker_type,
38                           SPComplexType EJX,
39                           TaskGroup_& TG,
40                           std::vector<std::size_t>& sz,
41                           int k0,
42                           int kN,
43                           int l0,
44                           int lN,
45                           int NMO,
46                           int NAEA,
47                           int NAEB,
48                           MatQk const& Qk,
49                           MatRl const& Rl,
50                           MatTa&& Ta,
51                           const SPRealType cut)
52 {
53   using Type = typename std::decay<MatTa>::type::element;
54   assert(Qk.size(0) == Ta.size(0));
55   assert(Qk.size(1) == Rl.size(0));
56   assert(Rl.size(1) == Rl.size(1));
57   int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
58 
59   bool amIAlpha = true;
60   if (l0 >= NMO && lN >= NMO)
61     amIAlpha = false;
62 
63   int bl0 = -1, blN = -1;
64   int nwork = std::min(int(Rl.size(1)), ncores);
65   if (coreid < nwork)
66     std::tie(bl0, blN) = FairDivideBoundary(coreid, int(Rl.size(1)), nwork);
67   int ka0 = -1, kaN = -1;
68   nwork = std::min(int(Qk.size(0)), ncores);
69   if (coreid < nwork)
70     std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(Qk.size(0)), nwork);
71 
72   Type four(4.0);
73   Type two(2.0);
74   if (walker_type == CLOSED)
75   {
76     // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
77     // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
78     ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
79     TG.node_barrier();
80     for (int ka = ka0; ka < kaN; ka++)
81     {                         // ka = local range index
82       int k = ka / NAEA + k0; // global index
83       int a = ka % NAEA;      // global index
84       for (int b = a; b < NAEA; b++)
85       {
86         for (int l = l0; l < lN; l++)
87         {
88           int la     = (l - l0) * NAEA + a;
89           int lb     = (l - l0) * NAEA + b;
90           int kb     = (k - k0) * NAEA + b;
91           Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
92           Type qlakb = Ta[kb][la]; // Ta(kb,la)
93           if (std::abs(EJX * four * qkalb - two * qlakb) > cut)
94             if (a != b || k <= l)
95               ++sz[a * NMO + k];
96         }
97       }
98     }
99   }
100   else if (walker_type == COLLINEAR)
101   {
102     if (k0 < NMO && (kN - 1) < NMO)
103     {
104       // k is alpha
105 
106       if (amIAlpha)
107       {
108         // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
109         // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
110         ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
111         TG.node_barrier();
112         for (int k = k0, ka = 0; k < kN; k++)
113         {
114           for (int a = 0; a < NAEA; a++, ka++)
115           {
116             for (int lb = bl0; lb < blN; lb++)
117             { // lb = (l-l0)*NAEA+b
118               int b = lb % NAEA;
119               if (b < a)
120                 continue;
121               int l      = lb / NAEA + l0;
122               int la     = (l - l0) * NAEA + a;
123               int kb     = (k - k0) * NAEA + b;
124               Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
125               Type qlakb = Ta[kb][la]; // Ta(kb,la)
126               if (std::abs(EJX * qkalb - qlakb) > cut)
127                 if (a != b || k <= l)
128                   ++sz[a * NMO + k];
129             }
130           }
131         }
132       }
133       else if (std::abs(EJX) > 1e-8)
134       {
135         // <a,b | k,l> = Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
136         ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
137         TG.node_barrier();
138         for (int k = k0, ka = 0; k < kN; k++)
139         {
140           for (int a = 0; a < NAEA; a++, ka++)
141           {
142             for (int lb = bl0; lb < blN; lb++)
143             {                          // lb = (l-l0)*NAEB+b
144               Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
145               if (std::abs(EJX * qkalb) > cut)
146                 ++sz[a * NMO + k];
147             }
148           }
149         }
150       }
151     }
152     else if (k0 >= NMO && kN >= NMO)
153     {
154       // k is beta
155       if (!amIAlpha)
156       {
157         // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
158         // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
159         ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
160         TG.node_barrier();
161         for (int k = k0, ka = 0; k < kN; k++)
162         {
163           for (int a = 0; a < NAEB; a++, ka++)
164           {
165             for (int lb = bl0; lb < blN; lb++)
166             { // lb = (l-l0)*NAEB+b
167               int b = lb % NAEB;
168               if (b < a)
169                 continue;
170               int l      = lb / NAEB + l0;
171               int la     = (l - l0) * NAEB + a;
172               int kb     = (k - k0) * NAEB + b;
173               Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
174               Type qlakb = Ta[kb][la]; // Ta(kb,la)
175               if (std::abs(EJX * qkalb - qlakb) > cut)
176                 if (a != b || k <= l)
177                   ++sz[NAEA * NMO + a * NMO + k - NMO];
178             }
179           }
180         }
181       }
182     }
183     else
184     {
185       APP_ABORT(" Error: This should not happen. \n\n\n");
186     }
187   }
188   else if (walker_type == NONCOLLINEAR)
189   {
190     APP_ABORT(" Error in count_Qk_x_Rl: GHF not implemented. \n\n\n");
191   }
192 }
193 
194 // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
195 // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
196 template<class MatQk, class MatRl, class MatTa, class Container>
Qk_x_Rl(WALKER_TYPES walker_type,SPComplexType EJX,TaskGroup_ & TG,int k0,int kN,int l0,int lN,int NMO,int NAEA,int NAEB,MatQk const & Qk,MatRl const & Rl,MatTa && Ta,Container & Vijkl,const SPRealType cut)197 inline void Qk_x_Rl(WALKER_TYPES walker_type,
198                     SPComplexType EJX,
199                     TaskGroup_& TG,
200                     int k0,
201                     int kN,
202                     int l0,
203                     int lN,
204                     int NMO,
205                     int NAEA,
206                     int NAEB,
207                     MatQk const& Qk,
208                     MatRl const& Rl,
209                     MatTa&& Ta,
210                     Container& Vijkl,
211                     const SPRealType cut)
212 {
213   using Type = typename std::decay<MatTa>::type::element;
214   assert(Qk.size(0) == Ta.size(0));
215   assert(Qk.size(1) == Rl.size(0));
216   assert(Rl.size(1) == Rl.size(1));
217   int ncores = TG.getTotalCores(), coreid = TG.getCoreID();
218 
219   bool amIAlpha = true;
220   if (l0 >= NMO && lN >= NMO)
221     amIAlpha = false;
222 
223   int bl0 = -1, blN = -1;
224   int ka0 = -1, kaN = -1;
225   int nwork = std::min(int(Rl.size(1)), ncores);
226   if (coreid < nwork)
227     std::tie(bl0, blN) = FairDivideBoundary(coreid, int(Rl.size(1)), nwork);
228   nwork = std::min(int(Qk.size(0)), ncores);
229   if (coreid < nwork)
230     std::tie(ka0, kaN) = FairDivideBoundary(coreid, int(Qk.size(0)), nwork);
231 
232   Type four(4.0);
233   Type two(2.0);
234   if (walker_type == CLOSED)
235   {
236     // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
237     // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
238     ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
239     TG.node_barrier();
240     for (int ka = ka0; ka < kaN; ka++)
241     {                         // ka = local range index
242       int k = ka / NAEA + k0; // global index
243       int a = ka % NAEA;      // global index
244       for (int b = a; b < NAEA; b++)
245       {
246         for (int l = l0; l < lN; l++)
247         {
248           int la     = (l - l0) * NAEA + a;
249           int lb     = (l - l0) * NAEA + b;
250           int kb     = (k - k0) * NAEA + b;
251           Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
252           Type qlakb = Ta[kb][la]; // Ta(kb,la)
253           if (std::abs(EJX * four * qkalb - two * qlakb) > cut)
254           {
255             if (a != b || k < l)
256             {
257               emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, two * (EJX * four * qkalb - two * qlakb)));
258             }
259             else if (k == l)
260             {
261               emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, (EJX * four * qkalb - two * qlakb)));
262             }
263           }
264         }
265       }
266     }
267   }
268   else if (walker_type == COLLINEAR)
269   {
270     if (k0 < NMO && (kN - 1) < NMO)
271     {
272       // k is alpha
273 
274       if (amIAlpha)
275       {
276         // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
277         // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
278         ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
279         TG.node_barrier();
280         for (int ka = ka0; ka < kaN; ka++)
281         {                         // ka = local range index
282           int k = ka / NAEA + k0; // global index
283           int a = ka % NAEA;      // global index
284           for (int b = a; b < NAEA; b++)
285           {
286             for (int l = l0; l < lN; l++)
287             {
288               int la     = (l - l0) * NAEA + a;
289               int lb     = (l - l0) * NAEA + b;
290               int kb     = (k - k0) * NAEA + b;
291               Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
292               Type qlakb = Ta[kb][la]; // Ta(kb,la)
293               if (std::abs(EJX * qkalb - qlakb) > cut)
294               {
295                 if (a != b || k < l)
296                 {
297                   emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, two * (EJX * qkalb - qlakb)));
298                 }
299                 else if (k == l)
300                 {
301                   emplace(Vijkl, std::forward_as_tuple(a * NMO + k, b * NMO + l, EJX * qkalb - qlakb));
302                 }
303               }
304             }
305           }
306         }
307       }
308       else if (std::abs(EJX) > 1e-8)
309       {
310         // <a,b | k,l> = Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
311         ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
312         TG.node_barrier();
313         for (int ka = ka0; ka < kaN; ka++)
314         {                         // ka = local range index
315           int k = ka / NAEA + k0; // global index
316           int a = ka % NAEA;      // global index
317           for (int b = 0; b < NAEB; b++)
318           {
319             for (int l = l0; l < lN; l++)
320             {
321               int lb     = (l - l0) * NAEB + b;
322               Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
323               if (std::abs(EJX * qkalb) > cut)
324                 emplace(Vijkl, std::forward_as_tuple(a * NMO + k, NMO * NAEA + b * NMO + l - NMO, two * (EJX * qkalb)));
325             }
326           }
327         }
328       }
329     }
330     else if (k0 >= NMO && kN >= NMO)
331     {
332       // k is beta
333       if (!amIAlpha)
334       {
335         // <a,b | k,l> = Ta(ka,lb) - Tb(la,kb)
336         // Ta(ka,lb) = Q(k,a,:)*R(l,b,:)
337         ma::product(Qk, Rl(Rl.extension(0), {bl0, blN}), Ta(Ta.extension(0), {bl0, blN}));
338         TG.node_barrier();
339         for (int ka = ka0; ka < kaN; ka++)
340         {                         // ka = local range index
341           int k = ka / NAEB + k0; // global index
342           int a = ka % NAEB;      // global index
343           for (int b = a; b < NAEB; b++)
344           {
345             for (int l = l0; l < lN; l++)
346             {
347               int la     = (l - l0) * NAEB + a;
348               int lb     = (l - l0) * NAEB + b;
349               int kb     = (k - k0) * NAEB + b;
350               Type qkalb = Ta[ka][lb]; // Ta(ka,lb)
351               Type qlakb = Ta[kb][la]; // Ta(kb,la)
352               if (std::abs(EJX * qkalb - qlakb) > cut)
353               {
354                 if (a != b || k < l)
355                 {
356                   emplace(Vijkl,
357                           std::forward_as_tuple(NMO * NAEA + a * NMO + k - NMO, NMO * NAEA + b * NMO + l - NMO,
358                                                 two * (EJX * qkalb - qlakb)));
359                 }
360                 else if (k == l)
361                 {
362                   emplace(Vijkl,
363                           std::forward_as_tuple(NMO * NAEA + a * NMO + k - NMO, NMO * NAEA + b * NMO + l - NMO,
364                                                 EJX * qkalb - qlakb));
365                 }
366               }
367             }
368           }
369         }
370       }
371     }
372     else
373     {
374       APP_ABORT(" Error: This should not happen. \n\n\n");
375     }
376   }
377   else if (walker_type == NONCOLLINEAR)
378   {
379     APP_ABORT(" Error in createHamiltonianForGeneralDeterminant: GHF not implemented. \n\n\n");
380   }
381 }
382 
383 } // namespace afqmc
384 
385 } // namespace qmcplusplus
386 
387 #endif
388