1 //////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source
3 // License.  See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by:
8 // Miguel A. Morales, moralessilva2@llnl.gov
9 //    Lawrence Livermore National Laboratory
10 //
11 // File created by:
12 // Miguel A. Morales, moralessilva2@llnl.gov
13 //    Lawrence Livermore National Laboratory
14 ////////////////////////////////////////////////////////////////////////////////
15 
16 #ifndef QMCPLUSPLUS_AFQMC_KP3INDEXIO_HPP
17 #define QMCPLUSPLUS_AFQMC_KP3INDEXIO_HPP
18 
19 #include <fstream>
20 
21 #include "hdf/hdf_archive.h"
22 
23 #include "AFQMC/config.h"
24 #include "AFQMC/Utilities/kp_utilities.hpp"
25 #include "AFQMC/SlaterDeterminantOperations/rotate.hpp"
26 
27 #include "AFQMC/HamiltonianOperations/KP3IndexFactorization.hpp"
28 
29 namespace qmcplusplus
30 {
31 namespace afqmc
32 {
loadKP3IndexFactorization(hdf_archive & dump,WALKER_TYPES type,int NMO,int NAEA,int NAEB,std::vector<PsiT_Matrix> & PsiT,TaskGroup_ & TGprop,TaskGroup_ & TGwfn,RealType cutvn,RealType cutv2)33 inline HamiltonianOperations loadKP3IndexFactorization(hdf_archive& dump,
34                                                        WALKER_TYPES type,
35                                                        int NMO,
36                                                        int NAEA,
37                                                        int NAEB,
38                                                        std::vector<PsiT_Matrix>& PsiT,
39                                                        TaskGroup_& TGprop,
40                                                        TaskGroup_& TGwfn,
41                                                        RealType cutvn,
42                                                        RealType cutv2)
43 {
44   using shmIMatrix    = boost::multi::array<int, 2, shared_allocator<int>>;
45   using shmCVector    = boost::multi::array<ComplexType, 1, shared_allocator<ComplexType>>;
46   using shmCMatrix    = boost::multi::array<ComplexType, 2, shared_allocator<ComplexType>>;
47   using shmCTensor    = boost::multi::array<ComplexType, 3, shared_allocator<ComplexType>>;
48   using shmSpMatrix   = boost::multi::array<SPComplexType, 2, shared_allocator<SPComplexType>>;
49   using shmSpTensor   = boost::multi::array<SPComplexType, 3, shared_allocator<SPComplexType>>;
50   using SpMatrix      = boost::multi::array<SPComplexType, 2>;
51   using SpMatrix_ref  = boost::multi::array_ref<SPComplexType, 2>;
52   using Sp3Tensor_ref = boost::multi::array_ref<SPComplexType, 3>;
53 
54   if (TGprop.getNGroupsPerTG() > 1)
55     APP_ABORT(" Error: Distributed KP3INdex not yet implemented.\n");
56   if (TGwfn.getNGroupsPerTG() > 1)
57     APP_ABORT(" Error: Distributed KP3INdex not yet implemented.\n");
58 
59   std::vector<int> dims(7);
60   int ndet   = (type == COLLINEAR ? PsiT.size() / 2 : PsiT.size());
61   int nspins = ((type != COLLINEAR) ? 1 : 2);
62   if (type == COLLINEAR)
63     assert(PsiT.size() % 2 == 0);
64   ValueType E0;
65 
66   // single reader for now
67   if (TGwfn.Global().root())
68   {
69     if (!dump.push("HamiltonianOperations", false))
70     {
71       app_error() << " Error in loadKP3IndexFactorization: Group HamiltonianOperations not found. \n";
72       APP_ABORT("");
73     }
74     if (!dump.push("KP3IndexFactorization", false))
75     {
76       app_error() << " Error in loadKP3IndexFactorization: Group KP3IndexFactorization not found. \n";
77       APP_ABORT("");
78     }
79     if (!dump.read(dims, "dims"))
80     {
81       app_error() << " Error in loadKP3IndexFactorization: Problems reading dataset. \n";
82       APP_ABORT("");
83     }
84     if (dims[0] != NMO)
85     {
86       app_error() << " Error in loadKP3IndexFactorization: Inconsistent data in file: NMO. \n";
87       APP_ABORT("");
88     }
89     if (dims[1] != NAEA)
90     {
91       app_error() << " Error in loadKP3IndexFactorization: Inconsistent data in file: NAEA. \n";
92       APP_ABORT("");
93     }
94     if (dims[2] != NAEB)
95     {
96       app_error() << " Error in loadKP3IndexFactorization: Inconsistent data in file: NAEB. \n";
97       APP_ABORT("");
98     }
99     if (type == CLOSED && dims[4] != 1)
100     {
101       app_error() << " Error in loadKP3IndexFactorization: Inconsistent data in file: walker_type. \n";
102       APP_ABORT("");
103     }
104     if (type == COLLINEAR && dims[4] != 2)
105     {
106       app_error() << " Error in loadKP3IndexFactorization: Inconsistent data in file: walker_type. \n";
107       APP_ABORT("");
108     }
109     if (type == NONCOLLINEAR && dims[4] != 3)
110     {
111       app_error() << " Error in loadKP3IndexFactorization: Inconsistent data in file: walker_type. \n";
112       APP_ABORT("");
113     }
114     std::vector<ValueType> et;
115     if (!dump.read(et, "E0"))
116     {
117       app_error() << " Error in loadKP3IndexFactorization: Problems reading dataset. \n";
118       APP_ABORT("");
119     }
120     E0 = et[0];
121   }
122   TGwfn.Global().broadcast_n(dims.data(), 7);
123   TGwfn.Global().broadcast_value(E0);
124   int nkpts    = dims[3];
125   int nsampleQ = dims[5];
126 
127   std::vector<int> nmo_per_kp(nkpts);
128   std::vector<int> nchol_per_kp(nkpts);
129   std::vector<int> kminus(nkpts);
130   std::vector<RealType> gQ;
131   shmIMatrix QKtok2({nkpts, nkpts}, shared_allocator<int>{TGwfn.Node()});
132 
133   if (TGwfn.Global().root())
134   {
135     if (!dump.read(nmo_per_kp, "NMOPerKP"))
136     {
137       app_error() << " Error in loadKP3IndexFactorization: Problems reading NMOPerKP dataset. \n";
138       APP_ABORT("");
139     }
140     if (!dump.read(nchol_per_kp, "NCholPerKP"))
141     {
142       app_error() << " Error in loadKP3IndexFactorization: Problems reading NCholPerKP dataset. \n";
143       APP_ABORT("");
144     }
145     if (!dump.read(kminus, "MinusK"))
146     {
147       app_error() << " Error in loadKP3IndexFactorization: Problems reading MinusK dataset. \n";
148       APP_ABORT("");
149     }
150     if (!dump.read(QKtok2, "QKTok2"))
151     {
152       app_error() << " Error in loadKP3IndexFactorization: Problems reading QKTok2 dataset. \n";
153       APP_ABORT("");
154     }
155     if (!dump.read(gQ, "gQ"))
156     {
157       app_error() << " Error in loadKP3IndexFactorization: Problems reading gQ dataset. \n";
158       APP_ABORT("");
159     }
160   }
161   TGwfn.Global().broadcast_n(nmo_per_kp.data(), nmo_per_kp.size(), 0);
162   TGwfn.Global().broadcast_n(nchol_per_kp.data(), nchol_per_kp.size(), 0);
163   TGwfn.Global().broadcast_n(kminus.data(), kminus.size(), 0);
164   TGwfn.Global().broadcast_n(gQ.data(), gQ.size(), 0);
165   if (TGwfn.Node().root())
166     TGwfn.Cores().broadcast_n(std::addressof(*QKtok2.origin()), QKtok2.num_elements(), 0);
167 
168   int Q0 = -1; // stores the index of the Q=(0,0,0) Q-point
169                // this must always exist
170   for (int Q = 0; Q < nkpts; Q++)
171   {
172     if (kminus[Q] == Q)
173     {
174       bool found = true;
175       for (int KI = 0; KI < nkpts; KI++)
176         if (KI != QKtok2[Q][KI])
177         {
178           found = false;
179           break;
180         }
181       if (found)
182       {
183         if (Q0 > 0)
184           APP_ABORT(" Error: @ Q-points satisfy Q=0 condition.\n");
185         Q0 = Q;
186       }
187       else
188       {
189         if (nkpts % 2 != 0)
190           APP_ABORT(" Error: Unexpected situation: Q==(-Q)!=Q0 and odd Nk. \n");
191       }
192     }
193   }
194   if (Q0 < 0)
195     APP_ABORT(" Error: Can not find Q=0 Q-point.\n");
196 
197   int nmo_max   = *std::max_element(nmo_per_kp.begin(), nmo_per_kp.end());
198   int nchol_max = *std::max_element(nchol_per_kp.begin(), nchol_per_kp.end());
199   shmCTensor H1({nkpts, nmo_max, nmo_max}, shared_allocator<ComplexType>{TGwfn.Node()});
200   shmCTensor vn0({nkpts, nmo_max, nmo_max}, shared_allocator<ComplexType>{TGwfn.Node()});
201   std::vector<shmSpMatrix> LQKikn;
202   LQKikn.reserve(nkpts);
203   for (int Q = 0; Q < nkpts; Q++)
204     if (Q == Q0)
205       LQKikn.emplace_back(
206           shmSpMatrix({nkpts, nmo_max * nmo_max * nchol_per_kp[Q]}, shared_allocator<SPComplexType>{TGwfn.Node()}));
207     else if (kminus[Q] == Q) // only storing half of K points and using symmetry
208       LQKikn.emplace_back(
209           shmSpMatrix({nkpts / 2, nmo_max * nmo_max * nchol_per_kp[Q]}, shared_allocator<SPComplexType>{TGwfn.Node()}));
210     else if (Q < kminus[Q])
211       LQKikn.emplace_back(
212           shmSpMatrix({nkpts, nmo_max * nmo_max * nchol_per_kp[Q]}, shared_allocator<SPComplexType>{TGwfn.Node()}));
213     else // Q > kminus[Q]
214       LQKikn.emplace_back(shmSpMatrix({1, 1}, shared_allocator<SPComplexType>{TGwfn.Node()}));
215 
216   if (TGwfn.Global().root())
217   {
218     {
219       boost::multi::array_ref<ComplexType, 2> H1_(std::addressof(*H1.origin()),
220                                                   {H1.shape()[0], H1.shape()[1] * H1.shape()[2]});
221       if (!dump.read(H1_, "KPH1"))
222       {
223         app_error() << " Error in loadKP3IndexFactorization: Problems reading dataset. \n";
224         APP_ABORT("");
225       }
226     }
227     {
228       boost::multi::array_ref<ComplexType, 2> vn0_(std::addressof(*vn0.origin()),
229                                                    {vn0.shape()[0], vn0.shape()[1] * vn0.shape()[2]});
230       if (!dump.read(vn0_, "KPv0"))
231       {
232         app_error() << " Error in loadKP3IndexFactorization: Problems reading dataset. \n";
233         APP_ABORT("");
234       }
235     }
236     for (int Q = 0; Q < nkpts; Q++)
237     {
238       if (Q > kminus[Q])
239         continue;
240       // to avoid reallocation, will abort if sizes don't match
241       boost::multi::array_ref<SPComplexType, 2> LQ(std::addressof(*LQKikn[Q].origin()),
242                                                    {LQKikn[Q].shape()[0], LQKikn[Q].shape()[1]});
243       if (!dump.read(LQ, std::string("L") + std::to_string(Q)))
244       {
245         app_error() << " Error in loadKP3IndexFactorization: Problems reading dataset. \n";
246         APP_ABORT("");
247       }
248     }
249   }
250   if (TGwfn.Node().root())
251   {
252     TGwfn.Cores().broadcast_n(std::addressof(*H1.origin()), H1.num_elements(), 0);
253     TGwfn.Cores().broadcast_n(std::addressof(*vn0.origin()), vn0.num_elements(), 0);
254     for (int Q = 0; Q < nkpts; Q++)
255     {
256       if (Q > kminus[Q])
257         continue;
258       TGwfn.Cores().broadcast_n(std::addressof(*LQKikn[Q].origin()), LQKikn[Q].num_elements(), 0);
259     }
260   }
261 
262 
263   shmIMatrix nocc_per_kp({ndet, nspins * nkpts}, shared_allocator<int>{TGwfn.Node()});
264   if (TGwfn.Node().root())
265   {
266     if (type == COLLINEAR)
267     {
268       for (int i = 0; i < ndet; i++)
269       {
270         if (not get_nocc_per_kp(nmo_per_kp, PsiT[2 * i], nocc_per_kp[i]({0, nkpts})))
271         {
272           app_error() << " Error in KPFactorizedHamiltonian::getHamiltonianOperations():"
273                       << " Only wavefunctions in block-diagonal form are accepted. \n";
274           APP_ABORT("");
275         }
276         if (not get_nocc_per_kp(nmo_per_kp, PsiT[2 * i + 1], nocc_per_kp[i]({nkpts, 2 * nkpts})))
277         {
278           app_error() << " Error in KPFactorizedHamiltonian::getHamiltonianOperations():"
279                       << " Only wavefunctions in block-diagonal form are accepted. \n";
280           APP_ABORT("");
281         }
282       }
283     }
284     else
285     {
286       for (int i = 0; i < ndet; i++)
287         if (not get_nocc_per_kp(nmo_per_kp, PsiT[i], nocc_per_kp[i]))
288         {
289           app_error() << " Error in KPFactorizedHamiltonian::getHamiltonianOperations():"
290                       << " Only wavefunctions in block-diagonal form are accepted. \n";
291           APP_ABORT("");
292         }
293     }
294   }
295   TGwfn.Node().barrier();
296   int nocc_max = *std::max_element(std::addressof(*nocc_per_kp.origin()),
297                                    std::addressof(*nocc_per_kp.origin()) + nocc_per_kp.num_elements());
298 
299 
300   std::vector<shmSpMatrix> LQKank;
301   LQKank.reserve(ndet * nspins * (nkpts + 1)); // storing 2 components for Q=0, since it is not assumed symmetric
302   shmCMatrix haj({ndet * nkpts, (type == COLLINEAR ? 2 : 1) * nocc_max * nmo_max},
303                  shared_allocator<ComplexType>{TGwfn.Node()});
304   if (TGwfn.Node().root())
305     std::fill_n(haj.origin(), haj.num_elements(), ComplexType(0.0));
306   int ank_max = nocc_max * nchol_max * nmo_max;
307   for (int nd = 0; nd < ndet; nd++)
308   {
309     for (int Q = 0; Q < (nkpts + 1); Q++)
310     {
311       LQKank.emplace_back(shmSpMatrix({nkpts, ank_max}, shared_allocator<ComplexType>{TGwfn.Node()}));
312     }
313     if (type == COLLINEAR)
314     {
315       for (int Q = 0; Q < (nkpts + 1); Q++)
316       {
317         LQKank.emplace_back(shmSpMatrix({nkpts, ank_max}, shared_allocator<ComplexType>{TGwfn.Node()}));
318       }
319     }
320   }
321   for (int nd = 0, nt = 0, nq0 = 0; nd < ndet; nd++, nq0 += (nkpts + 1) * nspins)
322   {
323     for (int Q = 0; Q < (nkpts + 1); Q++)
324     {
325       for (int K = 0; K < nkpts; K++, nt++)
326       {
327         if (nt % TGwfn.Node().size() == TGwfn.Node().rank())
328         {
329           std::fill_n(std::addressof(*LQKank[nq0 + Q][K].origin()), LQKank[nq0 + Q][K].num_elements(),
330                       SPComplexType(0.0));
331           if (type == COLLINEAR)
332           {
333             std::fill_n(std::addressof(*LQKank[nq0 + nkpts + 1 + Q][K].origin()),
334                         LQKank[nq0 + nkpts + 1 + Q][K].num_elements(), SPComplexType(0.0));
335           }
336         }
337       }
338     }
339   }
340   TGwfn.Node().barrier();
341   boost::multi::array<SPComplexType, 2> buff({nmo_max, nchol_max});
342   for (int nd = 0, nt = 0, nq0 = 0; nd < ndet; nd++, nq0 += (nkpts + 1) * nspins)
343   {
344     for (int Q = 0; Q < nkpts; Q++)
345     {
346       for (int K = 0; K < nkpts; K++, nt++)
347       {
348         if (nt % TGwfn.Global().size() == TGwfn.Global().rank())
349         {
350           // haj and add half-transformed right-handed rotation for Q=0
351           int Qm    = kminus[Q];
352           int QK    = QKtok2[Q][K];
353           int na    = nocc_per_kp[nd][K];
354           int nb    = nocc_per_kp[nd][nkpts + QK];
355           int ni    = nmo_per_kp[K];
356           int nk    = nmo_per_kp[QK];
357           int nchol = nchol_per_kp[Q];
358           if (Q == 0)
359           {
360             if (type == COLLINEAR)
361             {
362               { // Alpha
363                 auto Psi = get_PsiK<boost::multi::array<ComplexType, 2>>(nmo_per_kp, PsiT[2 * nd], K);
364                 assert(Psi.shape()[0] == na);
365                 boost::multi::array_ref<ComplexType, 2> haj_r(std::addressof(*haj[nd * nkpts + K].origin()), {na, ni});
366                 ma::product(Psi, H1[K]({0, ni}, {0, ni}), haj_r);
367               }
368               { // Beta
369                 auto Psi = get_PsiK<boost::multi::array<ComplexType, 2>>(nmo_per_kp, PsiT[2 * nd + 1], K);
370                 assert(Psi.shape()[0] == nb);
371                 boost::multi::array_ref<ComplexType, 2> haj_r(std::addressof(*haj[nd * nkpts + K].origin()) + na * ni,
372                                                               {nb, ni});
373                 ma::product(Psi, H1[K]({0, ni}, {0, ni}), haj_r);
374               }
375             }
376             else
377             {
378               auto Psi = get_PsiK<boost::multi::array<ComplexType, 2>>(nmo_per_kp, PsiT[nd], K);
379               assert(Psi.shape()[0] == na);
380               boost::multi::array_ref<ComplexType, 2> haj_r(std::addressof(*haj[nd * nkpts + K].origin()), {na, ni});
381               ma::product(ComplexType(2.0), Psi, H1[K]({0, ni}, {0, ni}), ComplexType(0.0), haj_r);
382             }
383           }
384           if (type == COLLINEAR)
385           {
386             { // Alpha
387               // change get_PsiK to cast to the value_type of the result
388               auto Psi = get_PsiK<boost::multi::array<SPComplexType, 2>>(nmo_per_kp, PsiT[2 * nd], K);
389               assert(Psi.shape()[0] == nocc_per_kp[nd][K]);
390               if (Q < Qm || Q == Q0 || ((Q == Qm) && (K < QK)))
391               {
392                 int kpos = K;
393                 if (Q == Qm && Q != Q0)
394                 { //find position in symmetric list
395                   kpos = 0;
396                   for (int K_ = 0; K_ < K; K_++)
397                     if (K_ < QKtok2[Q][K_])
398                       kpos++;
399                 }
400                 Sp3Tensor_ref Likn(std::addressof(*LQKikn[Q][kpos].origin()), {ni, nk, nchol});
401                 Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + Q][K].origin()), {na, nchol, nk});
402                 ma_rotate::getLank(Psi, Likn, Lank, buff);
403                 if (Q == Q0)
404                 {
405                   assert(K == QK);
406                   Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + nkpts][K].origin()), {na, nchol, nk});
407                   ma_rotate::getLank_from_Lkin(Psi, Likn, Lank, buff);
408                 }
409               }
410               else
411               {
412                 int kpos = QK;
413                 if (Q == Qm)
414                 { //find position in symmetric list
415                   kpos = 0;
416                   for (int K_ = 0; K_ < QK; K_++)
417                     if (K_ < QKtok2[Q][K_])
418                       kpos++;
419                 }
420                 Sp3Tensor_ref Lkin(std::addressof(*LQKikn[Qm][QK].origin()), {nk, ni, nchol});
421                 Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + Q][K].origin()), {na, nchol, nk});
422                 ma_rotate::getLank_from_Lkin(Psi, Lkin, Lank, buff);
423               }
424             }
425             { // Beta
426               // change get_PsiK to cast to the value_type of the result
427               auto Psi = get_PsiK<boost::multi::array<SPComplexType, 2>>(nmo_per_kp, PsiT[2 * nd + 1], K);
428               assert(Psi.shape()[0] == nb);
429               if (Q < Qm || Q == Q0 || ((Q == Qm) && (K < QK)))
430               {
431                 int kpos = K;
432                 if (Q == Qm && Q != Q0)
433                 { //find position in symmetric list
434                   kpos = 0;
435                   for (int K_ = 0; K_ < K; K_++)
436                     if (K_ < QKtok2[Q][K_])
437                       kpos++;
438                 }
439                 Sp3Tensor_ref Likn(std::addressof(*LQKikn[Q][kpos].origin()), {ni, nk, nchol});
440                 Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + nkpts + 1 + Q][K].origin()), {nb, nchol, nk});
441                 ma_rotate::getLank(Psi, Likn, Lank, buff);
442                 if (Q == Q0)
443                 {
444                   assert(K == QK);
445                   Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + nkpts + 1 + nkpts][K].origin()), {nb, nchol, nk});
446                   ma_rotate::getLank_from_Lkin(Psi, Likn, Lank, buff);
447                 }
448               }
449               else
450               {
451                 int kpos = QK;
452                 if (Q == Qm)
453                 { //find position in symmetric list
454                   kpos = 0;
455                   for (int K_ = 0; K_ < QK; K_++)
456                     if (K_ < QKtok2[Q][K_])
457                       kpos++;
458                 }
459                 Sp3Tensor_ref Lkin(std::addressof(*LQKikn[Qm][QK].origin()), {nk, ni, nchol});
460                 Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + nkpts + 1 + Q][K].origin()), {nb, nchol, nk});
461                 ma_rotate::getLank_from_Lkin(Psi, Lkin, Lank, buff);
462               }
463             }
464           }
465           else
466           {
467             // change get_PsiK to cast to the value_type of the result
468             auto Psi = get_PsiK<SpMatrix>(nmo_per_kp, PsiT[nd], K);
469             assert(Psi.shape()[0] == na);
470             if (Q < Qm || Q == Q0 || ((Q == Qm) && (K < QK)))
471             {
472               int kpos = K;
473               if (Q == Qm && Q != Q0)
474               { //find position in symmetric list
475                 kpos = 0;
476                 for (int K_ = 0; K_ < K; K_++)
477                   if (K_ < QKtok2[Q][K_])
478                     kpos++;
479               }
480               Sp3Tensor_ref Likn(std::addressof(*LQKikn[Q][kpos].origin()), {ni, nk, nchol});
481               Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + Q][K].origin()), {na, nchol, nk});
482               ma_rotate::getLank(Psi, Likn, Lank, buff);
483               if (Q == Q0)
484               {
485                 assert(K == QK);
486                 Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + nkpts][K].origin()), {na, nchol, nk});
487                 ma_rotate::getLank_from_Lkin(Psi, Likn, Lank, buff);
488               }
489             }
490             else
491             {
492               int kpos = QK;
493               if (Q == Qm)
494               { //find position in symmetric list
495                 kpos = 0;
496                 for (int K_ = 0; K_ < QK; K_++)
497                   if (K_ < QKtok2[Q][K_])
498                     kpos++;
499               }
500               Sp3Tensor_ref Lkin(std::addressof(*LQKikn[Qm][kpos].origin()), {nk, ni, nchol});
501               Sp3Tensor_ref Lank(std::addressof(*LQKank[nq0 + Q][K].origin()), {na, nchol, nk});
502               ma_rotate::getLank_from_Lkin(Psi, Lkin, Lank, buff);
503             }
504           }
505         }
506       }
507     }
508   }
509   TGwfn.Global().barrier();
510   if (TGwfn.Node().root())
511   {
512     TGwfn.Cores().all_reduce_in_place_n(std::addressof(*haj.origin()), haj.num_elements(), std::plus<>());
513     for (int Q = 0; Q < LQKank.size(); Q++)
514       TGwfn.Cores().all_reduce_in_place_n(std::addressof(*LQKank[Q].origin()), LQKank[Q].num_elements(), std::plus<>());
515   }
516   TGwfn.Node().barrier();
517 
518   int global_ncvecs = std::accumulate(nchol_per_kp.begin(), nchol_per_kp.end(), 0);
519 
520   if (TGwfn.Global().root())
521   {
522     dump.pop();
523     dump.pop();
524   }
525 
526   return HamiltonianOperations(
527       KP3IndexFactorization(TGwfn.TG_local(), type, std::move(nmo_per_kp), std::move(nchol_per_kp), std::move(kminus),
528                             std::move(nocc_per_kp), std::move(QKtok2), std::move(H1), std::move(haj), std::move(LQKikn),
529                             std::move(LQKank), std::move(vn0), std::move(gQ), nsampleQ, E0, global_ncvecs));
530 }
531 
532 // single writer right now
533 template<class shmIMatrix, class shmC3Tensor, class shmSpMatrix>
writeKP3IndexFactorization(hdf_archive & dump,WALKER_TYPES type,int NMO,int NAEA,int NAEB,TaskGroup_ & TGprop,TaskGroup_ & TGwfn,std::vector<int> & nopk,std::vector<int> & ncholpQ,std::vector<int> & kminus,shmIMatrix & QKToK2,shmC3Tensor & H1,std::vector<shmSpMatrix> & Lik,shmC3Tensor & vn0,int nsampleQ,std::vector<RealType> & gQ,ValueType E0,int gncv)534 inline void writeKP3IndexFactorization(hdf_archive& dump,
535                                        WALKER_TYPES type,
536                                        int NMO,
537                                        int NAEA,
538                                        int NAEB,
539                                        TaskGroup_& TGprop,
540                                        TaskGroup_& TGwfn,
541                                        std::vector<int>& nopk,
542                                        std::vector<int>& ncholpQ,
543                                        std::vector<int>& kminus,
544                                        shmIMatrix& QKToK2,
545                                        shmC3Tensor& H1,
546                                        std::vector<shmSpMatrix>& Lik,
547                                        shmC3Tensor& vn0,
548                                        int nsampleQ,
549                                        std::vector<RealType>& gQ,
550                                        ValueType E0,
551                                        int gncv)
552 {
553   if (TGprop.getNGroupsPerTG() > 1)
554     APP_ABORT(" Error: Distributed KP3INdex not yet implemented.\n");
555   if (TGwfn.getNGroupsPerTG() > 1)
556     APP_ABORT(" Error: Distributed KP3INdex not yet implemented.\n");
557 
558   if (TGwfn.Global().root())
559   {
560     if (dump.push("HamiltonianOperations") == hdf_archive::is_closed)
561       APP_ABORT(" Error: hdf_archive::push returned false. \n");
562     if (dump.push("KP3IndexFactorization") == hdf_archive::is_closed)
563       APP_ABORT(" Error: hdf_archive::push returned false. \n");
564     std::vector<int> dims{NMO, NAEA, NAEB, int(nopk.size()), type, nsampleQ, gncv};
565     dump.write(dims, "dims");
566     std::vector<ValueType> et{E0};
567     dump.write(nopk, "NMOPerKP");
568     dump.write(ncholpQ, "NCholPerKP");
569     dump.write(kminus, "MinusK");
570     dump.write(QKToK2, "QKTok2");
571     dump.write(gQ, "gQ");
572     {
573       boost::multi::array_ref<ComplexType, 2> H1_(std::addressof(*H1.origin()),
574                                                   {H1.shape()[0], H1.shape()[1] * H1.shape()[2]});
575       dump.write(H1_, "KPH1");
576     }
577     {
578       boost::multi::array_ref<ComplexType, 2> vn0_(std::addressof(*vn0.origin()),
579                                                    {vn0.shape()[0], vn0.shape()[1] * vn0.shape()[2]});
580       dump.write(vn0_, "KPv0");
581     }
582     int nkpts = Lik.size();
583     // use hyperslabs on the roots of TGnumber=0 in distributed case
584     for (int Q = 0; Q < nkpts; Q++)
585       dump.write(Lik[Q], std::string("L") + std::to_string(Q));
586     dump.write(et, "E0");
587   }
588 
589   if (TGwfn.Global().root())
590   {
591     dump.pop();
592     dump.pop();
593   }
594   TGwfn.Global().barrier();
595 }
596 
597 } // namespace afqmc
598 } // namespace qmcplusplus
599 
600 #endif
601