1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
8 //                    Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
9 //                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //
11 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
12 //////////////////////////////////////////////////////////////////////////////////////
13 // -*- C++ -*-
14 #ifndef QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
15 #define QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
16 
17 #include <map>
18 #include <numeric>
19 #include "Configuration.h"
20 #if !defined(QMC_BUILD_SANDBOX_ONLY)
21 #include "QMCWaveFunctions/WaveFunctionComponent.h"
22 #include "QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h"
23 #endif
24 #include "Particle/DistanceTableData.h"
25 #include "LongRange/StructFact.h"
26 #include "CPU/SIMD/aligned_allocator.hpp"
27 #include "CPU/SIMD/algorithm.hpp"
28 
29 namespace qmcplusplus
30 {
31 // helper class to activate KEcorr during optimizing Jastrow
32 template<typename RT, class FT>
33 class J2KECorrection
34 {
35   size_t num_groups_;
36   std::vector<size_t> num_elec_in_groups_;
37   RT num_elecs_;
38   RT vol;
39   RT G0mag;
40   const std::vector<FT*>& F_;
41   bool SK_enabled;
42 
43 public:
J2KECorrection(const ParticleSet & targetPtcl,const std::vector<FT * > & F)44   J2KECorrection(const ParticleSet& targetPtcl, const std::vector<FT*>& F)
45       : num_groups_(targetPtcl.groups()),
46         num_elecs_(targetPtcl.getTotalNum()),
47         vol(targetPtcl.Lattice.Volume),
48         F_(F),
49         SK_enabled(targetPtcl.SK != nullptr)
50   {
51     // compute num_elec_in_groups_
52     num_elec_in_groups_.reserve(3);
53     for (int i = 0; i < num_groups_; i++)
54       num_elec_in_groups_.push_back(targetPtcl.last(i) - targetPtcl.first(i));
55 
56     if (SK_enabled)
57       G0mag = std::sqrt(targetPtcl.SK->KLists.ksq[0]);
58   }
59 
computeKEcorr()60   RT computeKEcorr()
61   {
62     if (!SK_enabled)
63       return 0;
64 
65     const int numPoints = 1000;
66     RT uk               = 0.0;
67     RT a                = 1.0;
68 
69     for (int i = 0; i < num_groups_; i++)
70     {
71       int Ni = num_elec_in_groups_[i];
72       for (int j = 0; j < num_groups_; j++)
73       {
74         int Nj = num_elec_in_groups_[j];
75         if (F_[i * num_groups_ + j])
76         {
77           FT& ufunc = *(F_[i * num_groups_ + j]);
78           RT radius = ufunc.cutoff_radius;
79           RT k      = G0mag;
80           RT dr     = radius / (RT)(numPoints - 1);
81           for (int ir = 0; ir < numPoints; ir++)
82           {
83             RT r = dr * (RT)ir;
84             RT u = ufunc.evaluate(r);
85             uk += 0.5 * 4.0 * M_PI * r * std::sin(k * r) / k * u * dr * (RT)Nj / (RT)(Ni + Nj);
86           }
87         }
88       }
89     }
90     for (int iter = 0; iter < 20; iter++)
91       a = uk / (4.0 * M_PI * (1.0 / (G0mag * G0mag) - 1.0 / (G0mag * G0mag + 1.0 / a)));
92     return 4.0 * M_PI * a / (4.0 * vol) * num_elecs_;
93   }
94 };
95 
96 /** @ingroup WaveFunctionComponent
97  *  @brief Specialization for two-body Jastrow function using multiple functors
98  *
99  * Each pair-type can have distinct function \f$u(r_{ij})\f$.
100  * For electrons, distinct pair correlation functions are used
101  * for spins up-up/down-down and up-down/down-up.
102  *
103  * Based on J2OrbitalSoA.h with these considerations
104  * - DistanceTableData using SoA containers
105  * - support mixed precision: FT::real_type != OHMMS_PRECISION
106  * - loops over the groups: elminated PairID
107  * - support simd function
108  * - double the loop counts
109  * - Memory use is O(N).
110  */
111 template<class FT>
112 class J2OrbitalSoA : public WaveFunctionComponent
113 {
114 public:
115   ///alias FuncType
116   using FuncType = FT;
117   ///type of each component U, dU, d2U;
118   using valT = typename FT::real_type;
119   ///element position type
120   using posT = TinyVector<valT, OHMMS_DIM>;
121   ///use the same container
122   using DistRow         = DistanceTableData::DistRow;
123   using DisplRow        = DistanceTableData::DisplRow;
124   using gContainer_type = VectorSoaContainer<valT, OHMMS_DIM>;
125 
126   // Ye: leaving this public is bad but currently used by unit tests.
127   ///Container for \f$F[ig*NumGroups+jg]\f$.
128   std::vector<FT*> F;
129 
130 protected:
131   ///number of particles
132   size_t N;
133   ///number of particles + padded
134   size_t N_padded;
135   ///number of groups of the target particleset
136   size_t NumGroups;
137   ///diff value
138   RealType DiffVal;
139   ///Correction
140   RealType KEcorr;
141   ///\f$Uat[i] = sum_(j) u_{i,j}\f$
142   Vector<valT> Uat;
143   ///\f$dUat[i] = sum_(j) du_{i,j}\f$
144   gContainer_type dUat;
145   ///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$
146   Vector<valT> d2Uat;
147   valT cur_Uat;
148   aligned_vector<valT> cur_u, cur_du, cur_d2u;
149   aligned_vector<valT> old_u, old_du, old_d2u;
150   aligned_vector<valT> DistCompressed;
151   aligned_vector<int> DistIndice;
152   ///Uniquue J2 set for cleanup
153   std::map<std::string, FT*> J2Unique;
154   /// e-e table ID
155   const int my_table_ID_;
156   // helper for compute J2 Chiesa KE correction
157   J2KECorrection<RealType, FT> j2_ke_corr_helper;
158 
159 public:
160   J2OrbitalSoA(const std::string& obj_name, ParticleSet& p, int tid);
161   J2OrbitalSoA(const J2OrbitalSoA& rhs) = delete;
162   ~J2OrbitalSoA();
163 
164   /* initialize storage */
165   void init(ParticleSet& p);
166 
167   /** add functor for (ia,ib) pair */
168   void addFunc(int ia, int ib, FT* j);
169 
170   /** check in an optimizable parameter
171    * @param o a super set of optimizable variables
172    */
checkInVariables(opt_variables_type & active)173   void checkInVariables(opt_variables_type& active)
174   {
175     myVars.clear();
176     typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
177     while (it != it_end)
178     {
179       (*it).second->checkInVariables(active);
180       (*it).second->checkInVariables(myVars);
181       ++it;
182     }
183   }
184 
185   /** check out optimizable variables
186    */
checkOutVariables(const opt_variables_type & active)187   void checkOutVariables(const opt_variables_type& active)
188   {
189     myVars.getIndex(active);
190     Optimizable = myVars.is_optimizable();
191     typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
192     while (it != it_end)
193     {
194       (*it).second->checkOutVariables(active);
195       ++it;
196     }
197     if (dPsi)
198       dPsi->checkOutVariables(active);
199   }
200 
201   ///reset the value of all the unique Two-Body Jastrow functions
resetParameters(const opt_variables_type & active)202   void resetParameters(const opt_variables_type& active)
203   {
204     if (!Optimizable)
205       return;
206     typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
207     while (it != it_end)
208     {
209       (*it).second->resetParameters(active);
210       ++it;
211     }
212     if (dPsi)
213       dPsi->resetParameters(active);
214     for (int i = 0; i < myVars.size(); ++i)
215     {
216       int ii = myVars.Index[i];
217       if (ii >= 0)
218         myVars[i] = active[ii];
219     }
220   }
221 
222 
finalizeOptimization()223   void finalizeOptimization() { KEcorr = j2_ke_corr_helper.computeKEcorr(); }
224 
225   /** print the state, e.g., optimizables */
reportStatus(std::ostream & os)226   void reportStatus(std::ostream& os)
227   {
228     typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
229     while (it != it_end)
230     {
231       (*it).second->myVars.print(os);
232       ++it;
233     }
234   }
235 
236   WaveFunctionComponentPtr makeClone(ParticleSet& tqp) const;
237 
238   LogValueType evaluateLog(const ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L);
239 
240   void evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi);
241 
242   /** recompute internal data assuming distance table is fully ready */
243   void recompute(const ParticleSet& P);
244 
245   PsiValueType ratio(ParticleSet& P, int iat);
evaluateRatios(const VirtualParticleSet & VP,std::vector<ValueType> & ratios)246   void evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
247   {
248     for (int k = 0; k < ratios.size(); ++k)
249       ratios[k] =
250           std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(my_table_ID_).getDistRow(k)));
251   }
252   void evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios);
253 
254   GradType evalGrad(ParticleSet& P, int iat);
255 
256   PsiValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat);
257 
258   void acceptMove(ParticleSet& P, int iat, bool safe_to_delay = false);
restore(int iat)259   inline void restore(int iat) {}
260 
261   /** compute G and L after the sweep
262    */
263   LogValueType evaluateGL(const ParticleSet& P,
264                           ParticleSet::ParticleGradient_t& G,
265                           ParticleSet::ParticleLaplacian_t& L,
266                           bool fromscratch = false);
267 
registerData(ParticleSet & P,WFBufferType & buf)268   inline void registerData(ParticleSet& P, WFBufferType& buf)
269   {
270     if (Bytes_in_WFBuffer == 0)
271     {
272       Bytes_in_WFBuffer = buf.current();
273       buf.add(Uat.begin(), Uat.end());
274       buf.add(dUat.data(), dUat.end());
275       buf.add(d2Uat.begin(), d2Uat.end());
276       Bytes_in_WFBuffer = buf.current() - Bytes_in_WFBuffer;
277       // free local space
278       Uat.free();
279       dUat.free();
280       d2Uat.free();
281     }
282     else
283     {
284       buf.forward(Bytes_in_WFBuffer);
285     }
286   }
287 
copyFromBuffer(ParticleSet & P,WFBufferType & buf)288   inline void copyFromBuffer(ParticleSet& P, WFBufferType& buf)
289   {
290     Uat.attachReference(buf.lendReference<valT>(N), N);
291     dUat.attachReference(N, N_padded, buf.lendReference<valT>(N_padded * OHMMS_DIM));
292     d2Uat.attachReference(buf.lendReference<valT>(N), N);
293   }
294 
295   LogValueType updateBuffer(ParticleSet& P, WFBufferType& buf, bool fromscratch = false)
296   {
297     evaluateGL(P, P.G, P.L, false);
298     buf.forward(Bytes_in_WFBuffer);
299     return LogValue;
300   }
301 
302   /*@{ internal compute engines*/
computeU(const ParticleSet & P,int iat,const DistRow & dist)303   inline valT computeU(const ParticleSet& P, int iat, const DistRow& dist)
304   {
305     valT curUat(0);
306     const int igt = P.GroupID[iat] * NumGroups;
307     for (int jg = 0; jg < NumGroups; ++jg)
308     {
309       const FuncType& f2(*F[igt + jg]);
310       int iStart = P.first(jg);
311       int iEnd   = P.last(jg);
312       curUat += f2.evaluateV(iat, iStart, iEnd, dist.data(), DistCompressed.data());
313     }
314     return curUat;
315   }
316 
317   inline void computeU3(const ParticleSet& P,
318                         int iat,
319                         const DistRow& dist,
320                         RealType* restrict u,
321                         RealType* restrict du,
322                         RealType* restrict d2u,
323                         bool triangle = false);
324 
325   /** compute gradient
326    */
accumulateG(const valT * restrict du,const DisplRow & displ)327   inline posT accumulateG(const valT* restrict du, const DisplRow& displ) const
328   {
329     posT grad;
330     for (int idim = 0; idim < OHMMS_DIM; ++idim)
331     {
332       const valT* restrict dX = displ.data(idim);
333       valT s                  = valT();
334 
335 #pragma omp simd reduction(+ : s) aligned(du, dX: QMC_SIMD_ALIGNMENT)
336       for (int jat = 0; jat < N; ++jat)
337         s += du[jat] * dX[jat];
338       grad[idim] = s;
339     }
340     return grad;
341   }
342   /**@} */
343 
ChiesaKEcorrection()344   RealType ChiesaKEcorrection() { return KEcorr = j2_ke_corr_helper.computeKEcorr(); }
345 
KECorrection()346   RealType KECorrection() { return KEcorr; }
347 };
348 
349 template<typename FT>
J2OrbitalSoA(const std::string & obj_name,ParticleSet & p,int tid)350 J2OrbitalSoA<FT>::J2OrbitalSoA(const std::string& obj_name, ParticleSet& p, int tid)
351     : WaveFunctionComponent("J2OrbitalSoA", obj_name), my_table_ID_(p.addTable(p)), j2_ke_corr_helper(p, F)
352 {
353   if (myName.empty())
354     throw std::runtime_error("J2OrbitalSoA object name cannot be empty!");
355   init(p);
356   KEcorr = 0.0;
357 }
358 
359 template<typename FT>
~J2OrbitalSoA()360 J2OrbitalSoA<FT>::~J2OrbitalSoA()
361 {
362   auto it = J2Unique.begin();
363   while (it != J2Unique.end())
364   {
365     delete ((*it).second);
366     ++it;
367   }
368 } //need to clean up J2Unique
369 
370 template<typename FT>
init(ParticleSet & p)371 void J2OrbitalSoA<FT>::init(ParticleSet& p)
372 {
373   N         = p.getTotalNum();
374   N_padded  = getAlignedSize<valT>(N);
375   NumGroups = p.groups();
376 
377   Uat.resize(N);
378   dUat.resize(N);
379   d2Uat.resize(N);
380   cur_u.resize(N);
381   cur_du.resize(N);
382   cur_d2u.resize(N);
383   old_u.resize(N);
384   old_du.resize(N);
385   old_d2u.resize(N);
386   F.resize(NumGroups * NumGroups, nullptr);
387   DistCompressed.resize(N);
388   DistIndice.resize(N);
389 }
390 
391 template<typename FT>
addFunc(int ia,int ib,FT * j)392 void J2OrbitalSoA<FT>::addFunc(int ia, int ib, FT* j)
393 {
394   if (ia == ib)
395   {
396     if (ia == 0) //first time, assign everything
397     {
398       int ij = 0;
399       for (int ig = 0; ig < NumGroups; ++ig)
400         for (int jg = 0; jg < NumGroups; ++jg, ++ij)
401           if (F[ij] == nullptr)
402             F[ij] = j;
403     }
404     else
405       F[ia * NumGroups + ib] = j;
406   }
407   else
408   {
409     if (N == 2)
410     {
411       // a very special case, 1 up + 1 down
412       // uu/dd was prevented by the builder
413       for (int ig = 0; ig < NumGroups; ++ig)
414         for (int jg = 0; jg < NumGroups; ++jg)
415           F[ig * NumGroups + jg] = j;
416     }
417     else
418     {
419       // generic case
420       F[ia * NumGroups + ib] = j;
421       F[ib * NumGroups + ia] = j;
422     }
423   }
424   std::stringstream aname;
425   aname << ia << ib;
426   J2Unique[aname.str()] = j;
427 }
428 
429 template<typename FT>
makeClone(ParticleSet & tqp)430 WaveFunctionComponentPtr J2OrbitalSoA<FT>::makeClone(ParticleSet& tqp) const
431 {
432   J2OrbitalSoA<FT>* j2copy = new J2OrbitalSoA<FT>(myName, tqp, -1);
433   if (dPsi)
434     j2copy->dPsi = dPsi->makeClone(tqp);
435   std::map<const FT*, FT*> fcmap;
436   for (int ig = 0; ig < NumGroups; ++ig)
437     for (int jg = ig; jg < NumGroups; ++jg)
438     {
439       int ij = ig * NumGroups + jg;
440       if (F[ij] == 0)
441         continue;
442       typename std::map<const FT*, FT*>::iterator fit = fcmap.find(F[ij]);
443       if (fit == fcmap.end())
444       {
445         FT* fc = new FT(*F[ij]);
446         j2copy->addFunc(ig, jg, fc);
447         //if (dPsi) (j2copy->dPsi)->addFunc(aname.str(),ig,jg,fc);
448         fcmap[F[ij]] = fc;
449       }
450     }
451   j2copy->KEcorr      = KEcorr;
452   j2copy->Optimizable = Optimizable;
453   return j2copy;
454 }
455 
456 /** intenal function to compute \f$\sum_j u(r_j), du/dr, d2u/dr2\f$
457  * @param P particleset
458  * @param iat particle index
459  * @param dist starting distance
460  * @param u starting value
461  * @param du starting first deriv
462  * @param d2u starting second deriv
463  */
464 template<typename FT>
computeU3(const ParticleSet & P,int iat,const DistRow & dist,RealType * restrict u,RealType * restrict du,RealType * restrict d2u,bool triangle)465 inline void J2OrbitalSoA<FT>::computeU3(const ParticleSet& P,
466                                         int iat,
467                                         const DistRow& dist,
468                                         RealType* restrict u,
469                                         RealType* restrict du,
470                                         RealType* restrict d2u,
471                                         bool triangle)
472 {
473   const int jelmax = triangle ? iat : N;
474   constexpr valT czero(0);
475   std::fill_n(u, jelmax, czero);
476   std::fill_n(du, jelmax, czero);
477   std::fill_n(d2u, jelmax, czero);
478 
479   const int igt = P.GroupID[iat] * NumGroups;
480   for (int jg = 0; jg < NumGroups; ++jg)
481   {
482     const FuncType& f2(*F[igt + jg]);
483     int iStart = P.first(jg);
484     int iEnd   = std::min(jelmax, P.last(jg));
485     f2.evaluateVGL(iat, iStart, iEnd, dist.data(), u, du, d2u, DistCompressed.data(), DistIndice.data());
486   }
487   //u[iat]=czero;
488   //du[iat]=czero;
489   //d2u[iat]=czero;
490 }
491 
492 template<typename FT>
ratio(ParticleSet & P,int iat)493 typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratio(ParticleSet& P, int iat)
494 {
495   //only ratio, ready to compute it again
496   UpdateMode = ORB_PBYP_RATIO;
497   cur_Uat    = computeU(P, iat, P.getDistTable(my_table_ID_).getTempDists());
498   return std::exp(static_cast<PsiValueType>(Uat[iat] - cur_Uat));
499 }
500 
501 template<typename FT>
evaluateRatiosAlltoOne(ParticleSet & P,std::vector<ValueType> & ratios)502 inline void J2OrbitalSoA<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
503 {
504   const auto& d_table = P.getDistTable(my_table_ID_);
505   const auto& dist    = d_table.getTempDists();
506 
507   for (int ig = 0; ig < NumGroups; ++ig)
508   {
509     const int igt = ig * NumGroups;
510     valT sumU(0);
511     for (int jg = 0; jg < NumGroups; ++jg)
512     {
513       const FuncType& f2(*F[igt + jg]);
514       int iStart = P.first(jg);
515       int iEnd   = P.last(jg);
516       sumU += f2.evaluateV(-1, iStart, iEnd, dist.data(), DistCompressed.data());
517     }
518 
519     for (int i = P.first(ig); i < P.last(ig); ++i)
520     {
521       // remove self-interaction
522       const valT Uself = F[igt + ig]->evaluate(dist[i]);
523       ratios[i]        = std::exp(Uat[i] + Uself - sumU);
524     }
525   }
526 }
527 
528 template<typename FT>
evalGrad(ParticleSet & P,int iat)529 typename J2OrbitalSoA<FT>::GradType J2OrbitalSoA<FT>::evalGrad(ParticleSet& P, int iat)
530 {
531   return GradType(dUat[iat]);
532 }
533 
534 template<typename FT>
ratioGrad(ParticleSet & P,int iat,GradType & grad_iat)535 typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
536 {
537   UpdateMode = ORB_PBYP_PARTIAL;
538 
539   computeU3(P, iat, P.getDistTable(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
540   cur_Uat = simd::accumulate_n(cur_u.data(), N, valT());
541   DiffVal = Uat[iat] - cur_Uat;
542   grad_iat += accumulateG(cur_du.data(), P.getDistTable(my_table_ID_).getTempDispls());
543   return std::exp(static_cast<PsiValueType>(DiffVal));
544 }
545 
546 template<typename FT>
acceptMove(ParticleSet & P,int iat,bool safe_to_delay)547 void J2OrbitalSoA<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
548 {
549   // get the old u, du, d2u
550   const auto& d_table = P.getDistTable(my_table_ID_);
551   computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data());
552   if (UpdateMode == ORB_PBYP_RATIO)
553   { //ratio-only during the move; need to compute derivatives
554     const auto& dist = d_table.getTempDists();
555     computeU3(P, iat, dist, cur_u.data(), cur_du.data(), cur_d2u.data());
556   }
557 
558   valT cur_d2Uat(0);
559   const auto& new_dr    = d_table.getTempDispls();
560   const auto& old_dr    = d_table.getOldDispls();
561   constexpr valT lapfac = OHMMS_DIM - RealType(1);
562 #pragma omp simd reduction(+ : cur_d2Uat)
563   for (int jat = 0; jat < N; jat++)
564   {
565     const valT du   = cur_u[jat] - old_u[jat];
566     const valT newl = cur_d2u[jat] + lapfac * cur_du[jat];
567     const valT dl   = old_d2u[jat] + lapfac * old_du[jat] - newl;
568     Uat[jat] += du;
569     d2Uat[jat] += dl;
570     cur_d2Uat -= newl;
571   }
572   posT cur_dUat;
573   for (int idim = 0; idim < OHMMS_DIM; ++idim)
574   {
575     const valT* restrict new_dX    = new_dr.data(idim);
576     const valT* restrict old_dX    = old_dr.data(idim);
577     const valT* restrict cur_du_pt = cur_du.data();
578     const valT* restrict old_du_pt = old_du.data();
579     valT* restrict save_g          = dUat.data(idim);
580     valT cur_g                     = cur_dUat[idim];
581 #pragma omp simd reduction(+ : cur_g) aligned(old_dX, new_dX, save_g, cur_du_pt, old_du_pt: QMC_SIMD_ALIGNMENT)
582     for (int jat = 0; jat < N; jat++)
583     {
584       const valT newg = cur_du_pt[jat] * new_dX[jat];
585       const valT dg   = newg - old_du_pt[jat] * old_dX[jat];
586       save_g[jat] -= dg;
587       cur_g += newg;
588     }
589     cur_dUat[idim] = cur_g;
590   }
591   LogValue += Uat[iat] - cur_Uat;
592   Uat[iat]   = cur_Uat;
593   dUat(iat)  = cur_dUat;
594   d2Uat[iat] = cur_d2Uat;
595 }
596 
597 template<typename FT>
recompute(const ParticleSet & P)598 void J2OrbitalSoA<FT>::recompute(const ParticleSet& P)
599 {
600   const auto& d_table = P.getDistTable(my_table_ID_);
601   for (int ig = 0; ig < NumGroups; ++ig)
602   {
603     for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
604     {
605       computeU3(P, iat, d_table.getDistRow(iat), cur_u.data(), cur_du.data(), cur_d2u.data(), true);
606       Uat[iat] = simd::accumulate_n(cur_u.data(), iat, valT());
607       posT grad;
608       valT lap(0);
609       const valT* restrict u   = cur_u.data();
610       const valT* restrict du  = cur_du.data();
611       const valT* restrict d2u = cur_d2u.data();
612       const auto& displ        = d_table.getDisplRow(iat);
613       constexpr valT lapfac    = OHMMS_DIM - RealType(1);
614 #pragma omp simd reduction(+ : lap) aligned(du, d2u: QMC_SIMD_ALIGNMENT)
615       for (int jat = 0; jat < iat; ++jat)
616         lap += d2u[jat] + lapfac * du[jat];
617       for (int idim = 0; idim < OHMMS_DIM; ++idim)
618       {
619         const valT* restrict dX = displ.data(idim);
620         valT s                  = valT();
621 #pragma omp simd reduction(+ : s) aligned(du, dX: QMC_SIMD_ALIGNMENT)
622         for (int jat = 0; jat < iat; ++jat)
623           s += du[jat] * dX[jat];
624         grad[idim] = s;
625       }
626       dUat(iat)  = grad;
627       d2Uat[iat] = -lap;
628 // add the contribution from the upper triangle
629 #pragma omp simd aligned(u, du, d2u: QMC_SIMD_ALIGNMENT)
630       for (int jat = 0; jat < iat; jat++)
631       {
632         Uat[jat] += u[jat];
633         d2Uat[jat] -= d2u[jat] + lapfac * du[jat];
634       }
635       for (int idim = 0; idim < OHMMS_DIM; ++idim)
636       {
637         valT* restrict save_g   = dUat.data(idim);
638         const valT* restrict dX = displ.data(idim);
639 #pragma omp simd aligned(save_g, du, dX: QMC_SIMD_ALIGNMENT)
640         for (int jat = 0; jat < iat; jat++)
641           save_g[jat] -= du[jat] * dX[jat];
642       }
643     }
644   }
645 }
646 
647 template<typename FT>
evaluateLog(const ParticleSet & P,ParticleSet::ParticleGradient_t & G,ParticleSet::ParticleLaplacian_t & L)648 typename J2OrbitalSoA<FT>::LogValueType J2OrbitalSoA<FT>::evaluateLog(const ParticleSet& P,
649                                                                       ParticleSet::ParticleGradient_t& G,
650                                                                       ParticleSet::ParticleLaplacian_t& L)
651 {
652   return evaluateGL(P, G, L, true);
653 }
654 
655 template<typename FT>
evaluateGL(const ParticleSet & P,ParticleSet::ParticleGradient_t & G,ParticleSet::ParticleLaplacian_t & L,bool fromscratch)656 WaveFunctionComponent::LogValueType J2OrbitalSoA<FT>::evaluateGL(const ParticleSet& P,
657                                                                  ParticleSet::ParticleGradient_t& G,
658                                                                  ParticleSet::ParticleLaplacian_t& L,
659                                                                  bool fromscratch)
660 {
661   if (fromscratch)
662     recompute(P);
663   LogValue = valT(0);
664   for (int iat = 0; iat < N; ++iat)
665   {
666     LogValue += Uat[iat];
667     G[iat] += dUat[iat];
668     L[iat] += d2Uat[iat];
669   }
670 
671   return LogValue = -LogValue * 0.5;
672 }
673 
674 template<typename FT>
evaluateHessian(ParticleSet & P,HessVector_t & grad_grad_psi)675 void J2OrbitalSoA<FT>::evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi)
676 {
677   LogValue = 0.0;
678   const DistanceTableData& d_ee(P.getDistTable(my_table_ID_));
679   valT dudr, d2udr2;
680 
681   Tensor<valT, DIM> ident;
682   grad_grad_psi = 0.0;
683   ident.diagonal(1.0);
684 
685   for (int i = 1; i < N; ++i)
686   {
687     const auto& dist  = d_ee.getDistRow(i);
688     const auto& displ = d_ee.getDisplRow(i);
689     auto ig           = P.GroupID[i];
690     const int igt     = ig * NumGroups;
691     for (int j = 0; j < i; ++j)
692     {
693       auto r    = dist[j];
694       auto rinv = 1.0 / r;
695       auto dr   = displ[j];
696       auto jg   = P.GroupID[j];
697       auto uij  = F[igt + jg]->evaluate(r, dudr, d2udr2);
698       LogValue -= uij;
699       auto hess = rinv * rinv * outerProduct(dr, dr) * (d2udr2 - dudr * rinv) + ident * dudr * rinv;
700       grad_grad_psi[i] -= hess;
701       grad_grad_psi[j] -= hess;
702     }
703   }
704 }
705 
706 } // namespace qmcplusplus
707 #endif
708