1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
8 // Amrita Mathuriya, amrita.mathuriya@intel.com, Intel Corp.
9 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
10 //
11 // File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp.
12 //////////////////////////////////////////////////////////////////////////////////////
13 // -*- C++ -*-
14 #ifndef QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
15 #define QMCPLUSPLUS_TWOBODYJASTROW_OPTIMIZED_SOA_H
16
17 #include <map>
18 #include <numeric>
19 #include "Configuration.h"
20 #if !defined(QMC_BUILD_SANDBOX_ONLY)
21 #include "QMCWaveFunctions/WaveFunctionComponent.h"
22 #include "QMCWaveFunctions/Jastrow/DiffTwoBodyJastrowOrbital.h"
23 #endif
24 #include "Particle/DistanceTableData.h"
25 #include "LongRange/StructFact.h"
26 #include "CPU/SIMD/aligned_allocator.hpp"
27 #include "CPU/SIMD/algorithm.hpp"
28
29 namespace qmcplusplus
30 {
31 // helper class to activate KEcorr during optimizing Jastrow
32 template<typename RT, class FT>
33 class J2KECorrection
34 {
35 size_t num_groups_;
36 std::vector<size_t> num_elec_in_groups_;
37 RT num_elecs_;
38 RT vol;
39 RT G0mag;
40 const std::vector<FT*>& F_;
41 bool SK_enabled;
42
43 public:
J2KECorrection(const ParticleSet & targetPtcl,const std::vector<FT * > & F)44 J2KECorrection(const ParticleSet& targetPtcl, const std::vector<FT*>& F)
45 : num_groups_(targetPtcl.groups()),
46 num_elecs_(targetPtcl.getTotalNum()),
47 vol(targetPtcl.Lattice.Volume),
48 F_(F),
49 SK_enabled(targetPtcl.SK != nullptr)
50 {
51 // compute num_elec_in_groups_
52 num_elec_in_groups_.reserve(3);
53 for (int i = 0; i < num_groups_; i++)
54 num_elec_in_groups_.push_back(targetPtcl.last(i) - targetPtcl.first(i));
55
56 if (SK_enabled)
57 G0mag = std::sqrt(targetPtcl.SK->KLists.ksq[0]);
58 }
59
computeKEcorr()60 RT computeKEcorr()
61 {
62 if (!SK_enabled)
63 return 0;
64
65 const int numPoints = 1000;
66 RT uk = 0.0;
67 RT a = 1.0;
68
69 for (int i = 0; i < num_groups_; i++)
70 {
71 int Ni = num_elec_in_groups_[i];
72 for (int j = 0; j < num_groups_; j++)
73 {
74 int Nj = num_elec_in_groups_[j];
75 if (F_[i * num_groups_ + j])
76 {
77 FT& ufunc = *(F_[i * num_groups_ + j]);
78 RT radius = ufunc.cutoff_radius;
79 RT k = G0mag;
80 RT dr = radius / (RT)(numPoints - 1);
81 for (int ir = 0; ir < numPoints; ir++)
82 {
83 RT r = dr * (RT)ir;
84 RT u = ufunc.evaluate(r);
85 uk += 0.5 * 4.0 * M_PI * r * std::sin(k * r) / k * u * dr * (RT)Nj / (RT)(Ni + Nj);
86 }
87 }
88 }
89 }
90 for (int iter = 0; iter < 20; iter++)
91 a = uk / (4.0 * M_PI * (1.0 / (G0mag * G0mag) - 1.0 / (G0mag * G0mag + 1.0 / a)));
92 return 4.0 * M_PI * a / (4.0 * vol) * num_elecs_;
93 }
94 };
95
96 /** @ingroup WaveFunctionComponent
97 * @brief Specialization for two-body Jastrow function using multiple functors
98 *
99 * Each pair-type can have distinct function \f$u(r_{ij})\f$.
100 * For electrons, distinct pair correlation functions are used
101 * for spins up-up/down-down and up-down/down-up.
102 *
103 * Based on J2OrbitalSoA.h with these considerations
104 * - DistanceTableData using SoA containers
105 * - support mixed precision: FT::real_type != OHMMS_PRECISION
106 * - loops over the groups: elminated PairID
107 * - support simd function
108 * - double the loop counts
109 * - Memory use is O(N).
110 */
111 template<class FT>
112 class J2OrbitalSoA : public WaveFunctionComponent
113 {
114 public:
115 ///alias FuncType
116 using FuncType = FT;
117 ///type of each component U, dU, d2U;
118 using valT = typename FT::real_type;
119 ///element position type
120 using posT = TinyVector<valT, OHMMS_DIM>;
121 ///use the same container
122 using DistRow = DistanceTableData::DistRow;
123 using DisplRow = DistanceTableData::DisplRow;
124 using gContainer_type = VectorSoaContainer<valT, OHMMS_DIM>;
125
126 // Ye: leaving this public is bad but currently used by unit tests.
127 ///Container for \f$F[ig*NumGroups+jg]\f$.
128 std::vector<FT*> F;
129
130 protected:
131 ///number of particles
132 size_t N;
133 ///number of particles + padded
134 size_t N_padded;
135 ///number of groups of the target particleset
136 size_t NumGroups;
137 ///diff value
138 RealType DiffVal;
139 ///Correction
140 RealType KEcorr;
141 ///\f$Uat[i] = sum_(j) u_{i,j}\f$
142 Vector<valT> Uat;
143 ///\f$dUat[i] = sum_(j) du_{i,j}\f$
144 gContainer_type dUat;
145 ///\f$d2Uat[i] = sum_(j) d2u_{i,j}\f$
146 Vector<valT> d2Uat;
147 valT cur_Uat;
148 aligned_vector<valT> cur_u, cur_du, cur_d2u;
149 aligned_vector<valT> old_u, old_du, old_d2u;
150 aligned_vector<valT> DistCompressed;
151 aligned_vector<int> DistIndice;
152 ///Uniquue J2 set for cleanup
153 std::map<std::string, FT*> J2Unique;
154 /// e-e table ID
155 const int my_table_ID_;
156 // helper for compute J2 Chiesa KE correction
157 J2KECorrection<RealType, FT> j2_ke_corr_helper;
158
159 public:
160 J2OrbitalSoA(const std::string& obj_name, ParticleSet& p, int tid);
161 J2OrbitalSoA(const J2OrbitalSoA& rhs) = delete;
162 ~J2OrbitalSoA();
163
164 /* initialize storage */
165 void init(ParticleSet& p);
166
167 /** add functor for (ia,ib) pair */
168 void addFunc(int ia, int ib, FT* j);
169
170 /** check in an optimizable parameter
171 * @param o a super set of optimizable variables
172 */
checkInVariables(opt_variables_type & active)173 void checkInVariables(opt_variables_type& active)
174 {
175 myVars.clear();
176 typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
177 while (it != it_end)
178 {
179 (*it).second->checkInVariables(active);
180 (*it).second->checkInVariables(myVars);
181 ++it;
182 }
183 }
184
185 /** check out optimizable variables
186 */
checkOutVariables(const opt_variables_type & active)187 void checkOutVariables(const opt_variables_type& active)
188 {
189 myVars.getIndex(active);
190 Optimizable = myVars.is_optimizable();
191 typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
192 while (it != it_end)
193 {
194 (*it).second->checkOutVariables(active);
195 ++it;
196 }
197 if (dPsi)
198 dPsi->checkOutVariables(active);
199 }
200
201 ///reset the value of all the unique Two-Body Jastrow functions
resetParameters(const opt_variables_type & active)202 void resetParameters(const opt_variables_type& active)
203 {
204 if (!Optimizable)
205 return;
206 typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
207 while (it != it_end)
208 {
209 (*it).second->resetParameters(active);
210 ++it;
211 }
212 if (dPsi)
213 dPsi->resetParameters(active);
214 for (int i = 0; i < myVars.size(); ++i)
215 {
216 int ii = myVars.Index[i];
217 if (ii >= 0)
218 myVars[i] = active[ii];
219 }
220 }
221
222
finalizeOptimization()223 void finalizeOptimization() { KEcorr = j2_ke_corr_helper.computeKEcorr(); }
224
225 /** print the state, e.g., optimizables */
reportStatus(std::ostream & os)226 void reportStatus(std::ostream& os)
227 {
228 typename std::map<std::string, FT*>::iterator it(J2Unique.begin()), it_end(J2Unique.end());
229 while (it != it_end)
230 {
231 (*it).second->myVars.print(os);
232 ++it;
233 }
234 }
235
236 WaveFunctionComponentPtr makeClone(ParticleSet& tqp) const;
237
238 LogValueType evaluateLog(const ParticleSet& P, ParticleSet::ParticleGradient_t& G, ParticleSet::ParticleLaplacian_t& L);
239
240 void evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi);
241
242 /** recompute internal data assuming distance table is fully ready */
243 void recompute(const ParticleSet& P);
244
245 PsiValueType ratio(ParticleSet& P, int iat);
evaluateRatios(const VirtualParticleSet & VP,std::vector<ValueType> & ratios)246 void evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
247 {
248 for (int k = 0; k < ratios.size(); ++k)
249 ratios[k] =
250 std::exp(Uat[VP.refPtcl] - computeU(VP.refPS, VP.refPtcl, VP.getDistTable(my_table_ID_).getDistRow(k)));
251 }
252 void evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios);
253
254 GradType evalGrad(ParticleSet& P, int iat);
255
256 PsiValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat);
257
258 void acceptMove(ParticleSet& P, int iat, bool safe_to_delay = false);
restore(int iat)259 inline void restore(int iat) {}
260
261 /** compute G and L after the sweep
262 */
263 LogValueType evaluateGL(const ParticleSet& P,
264 ParticleSet::ParticleGradient_t& G,
265 ParticleSet::ParticleLaplacian_t& L,
266 bool fromscratch = false);
267
registerData(ParticleSet & P,WFBufferType & buf)268 inline void registerData(ParticleSet& P, WFBufferType& buf)
269 {
270 if (Bytes_in_WFBuffer == 0)
271 {
272 Bytes_in_WFBuffer = buf.current();
273 buf.add(Uat.begin(), Uat.end());
274 buf.add(dUat.data(), dUat.end());
275 buf.add(d2Uat.begin(), d2Uat.end());
276 Bytes_in_WFBuffer = buf.current() - Bytes_in_WFBuffer;
277 // free local space
278 Uat.free();
279 dUat.free();
280 d2Uat.free();
281 }
282 else
283 {
284 buf.forward(Bytes_in_WFBuffer);
285 }
286 }
287
copyFromBuffer(ParticleSet & P,WFBufferType & buf)288 inline void copyFromBuffer(ParticleSet& P, WFBufferType& buf)
289 {
290 Uat.attachReference(buf.lendReference<valT>(N), N);
291 dUat.attachReference(N, N_padded, buf.lendReference<valT>(N_padded * OHMMS_DIM));
292 d2Uat.attachReference(buf.lendReference<valT>(N), N);
293 }
294
295 LogValueType updateBuffer(ParticleSet& P, WFBufferType& buf, bool fromscratch = false)
296 {
297 evaluateGL(P, P.G, P.L, false);
298 buf.forward(Bytes_in_WFBuffer);
299 return LogValue;
300 }
301
302 /*@{ internal compute engines*/
computeU(const ParticleSet & P,int iat,const DistRow & dist)303 inline valT computeU(const ParticleSet& P, int iat, const DistRow& dist)
304 {
305 valT curUat(0);
306 const int igt = P.GroupID[iat] * NumGroups;
307 for (int jg = 0; jg < NumGroups; ++jg)
308 {
309 const FuncType& f2(*F[igt + jg]);
310 int iStart = P.first(jg);
311 int iEnd = P.last(jg);
312 curUat += f2.evaluateV(iat, iStart, iEnd, dist.data(), DistCompressed.data());
313 }
314 return curUat;
315 }
316
317 inline void computeU3(const ParticleSet& P,
318 int iat,
319 const DistRow& dist,
320 RealType* restrict u,
321 RealType* restrict du,
322 RealType* restrict d2u,
323 bool triangle = false);
324
325 /** compute gradient
326 */
accumulateG(const valT * restrict du,const DisplRow & displ)327 inline posT accumulateG(const valT* restrict du, const DisplRow& displ) const
328 {
329 posT grad;
330 for (int idim = 0; idim < OHMMS_DIM; ++idim)
331 {
332 const valT* restrict dX = displ.data(idim);
333 valT s = valT();
334
335 #pragma omp simd reduction(+ : s) aligned(du, dX: QMC_SIMD_ALIGNMENT)
336 for (int jat = 0; jat < N; ++jat)
337 s += du[jat] * dX[jat];
338 grad[idim] = s;
339 }
340 return grad;
341 }
342 /**@} */
343
ChiesaKEcorrection()344 RealType ChiesaKEcorrection() { return KEcorr = j2_ke_corr_helper.computeKEcorr(); }
345
KECorrection()346 RealType KECorrection() { return KEcorr; }
347 };
348
349 template<typename FT>
J2OrbitalSoA(const std::string & obj_name,ParticleSet & p,int tid)350 J2OrbitalSoA<FT>::J2OrbitalSoA(const std::string& obj_name, ParticleSet& p, int tid)
351 : WaveFunctionComponent("J2OrbitalSoA", obj_name), my_table_ID_(p.addTable(p)), j2_ke_corr_helper(p, F)
352 {
353 if (myName.empty())
354 throw std::runtime_error("J2OrbitalSoA object name cannot be empty!");
355 init(p);
356 KEcorr = 0.0;
357 }
358
359 template<typename FT>
~J2OrbitalSoA()360 J2OrbitalSoA<FT>::~J2OrbitalSoA()
361 {
362 auto it = J2Unique.begin();
363 while (it != J2Unique.end())
364 {
365 delete ((*it).second);
366 ++it;
367 }
368 } //need to clean up J2Unique
369
370 template<typename FT>
init(ParticleSet & p)371 void J2OrbitalSoA<FT>::init(ParticleSet& p)
372 {
373 N = p.getTotalNum();
374 N_padded = getAlignedSize<valT>(N);
375 NumGroups = p.groups();
376
377 Uat.resize(N);
378 dUat.resize(N);
379 d2Uat.resize(N);
380 cur_u.resize(N);
381 cur_du.resize(N);
382 cur_d2u.resize(N);
383 old_u.resize(N);
384 old_du.resize(N);
385 old_d2u.resize(N);
386 F.resize(NumGroups * NumGroups, nullptr);
387 DistCompressed.resize(N);
388 DistIndice.resize(N);
389 }
390
391 template<typename FT>
addFunc(int ia,int ib,FT * j)392 void J2OrbitalSoA<FT>::addFunc(int ia, int ib, FT* j)
393 {
394 if (ia == ib)
395 {
396 if (ia == 0) //first time, assign everything
397 {
398 int ij = 0;
399 for (int ig = 0; ig < NumGroups; ++ig)
400 for (int jg = 0; jg < NumGroups; ++jg, ++ij)
401 if (F[ij] == nullptr)
402 F[ij] = j;
403 }
404 else
405 F[ia * NumGroups + ib] = j;
406 }
407 else
408 {
409 if (N == 2)
410 {
411 // a very special case, 1 up + 1 down
412 // uu/dd was prevented by the builder
413 for (int ig = 0; ig < NumGroups; ++ig)
414 for (int jg = 0; jg < NumGroups; ++jg)
415 F[ig * NumGroups + jg] = j;
416 }
417 else
418 {
419 // generic case
420 F[ia * NumGroups + ib] = j;
421 F[ib * NumGroups + ia] = j;
422 }
423 }
424 std::stringstream aname;
425 aname << ia << ib;
426 J2Unique[aname.str()] = j;
427 }
428
429 template<typename FT>
makeClone(ParticleSet & tqp)430 WaveFunctionComponentPtr J2OrbitalSoA<FT>::makeClone(ParticleSet& tqp) const
431 {
432 J2OrbitalSoA<FT>* j2copy = new J2OrbitalSoA<FT>(myName, tqp, -1);
433 if (dPsi)
434 j2copy->dPsi = dPsi->makeClone(tqp);
435 std::map<const FT*, FT*> fcmap;
436 for (int ig = 0; ig < NumGroups; ++ig)
437 for (int jg = ig; jg < NumGroups; ++jg)
438 {
439 int ij = ig * NumGroups + jg;
440 if (F[ij] == 0)
441 continue;
442 typename std::map<const FT*, FT*>::iterator fit = fcmap.find(F[ij]);
443 if (fit == fcmap.end())
444 {
445 FT* fc = new FT(*F[ij]);
446 j2copy->addFunc(ig, jg, fc);
447 //if (dPsi) (j2copy->dPsi)->addFunc(aname.str(),ig,jg,fc);
448 fcmap[F[ij]] = fc;
449 }
450 }
451 j2copy->KEcorr = KEcorr;
452 j2copy->Optimizable = Optimizable;
453 return j2copy;
454 }
455
456 /** intenal function to compute \f$\sum_j u(r_j), du/dr, d2u/dr2\f$
457 * @param P particleset
458 * @param iat particle index
459 * @param dist starting distance
460 * @param u starting value
461 * @param du starting first deriv
462 * @param d2u starting second deriv
463 */
464 template<typename FT>
computeU3(const ParticleSet & P,int iat,const DistRow & dist,RealType * restrict u,RealType * restrict du,RealType * restrict d2u,bool triangle)465 inline void J2OrbitalSoA<FT>::computeU3(const ParticleSet& P,
466 int iat,
467 const DistRow& dist,
468 RealType* restrict u,
469 RealType* restrict du,
470 RealType* restrict d2u,
471 bool triangle)
472 {
473 const int jelmax = triangle ? iat : N;
474 constexpr valT czero(0);
475 std::fill_n(u, jelmax, czero);
476 std::fill_n(du, jelmax, czero);
477 std::fill_n(d2u, jelmax, czero);
478
479 const int igt = P.GroupID[iat] * NumGroups;
480 for (int jg = 0; jg < NumGroups; ++jg)
481 {
482 const FuncType& f2(*F[igt + jg]);
483 int iStart = P.first(jg);
484 int iEnd = std::min(jelmax, P.last(jg));
485 f2.evaluateVGL(iat, iStart, iEnd, dist.data(), u, du, d2u, DistCompressed.data(), DistIndice.data());
486 }
487 //u[iat]=czero;
488 //du[iat]=czero;
489 //d2u[iat]=czero;
490 }
491
492 template<typename FT>
ratio(ParticleSet & P,int iat)493 typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratio(ParticleSet& P, int iat)
494 {
495 //only ratio, ready to compute it again
496 UpdateMode = ORB_PBYP_RATIO;
497 cur_Uat = computeU(P, iat, P.getDistTable(my_table_ID_).getTempDists());
498 return std::exp(static_cast<PsiValueType>(Uat[iat] - cur_Uat));
499 }
500
501 template<typename FT>
evaluateRatiosAlltoOne(ParticleSet & P,std::vector<ValueType> & ratios)502 inline void J2OrbitalSoA<FT>::evaluateRatiosAlltoOne(ParticleSet& P, std::vector<ValueType>& ratios)
503 {
504 const auto& d_table = P.getDistTable(my_table_ID_);
505 const auto& dist = d_table.getTempDists();
506
507 for (int ig = 0; ig < NumGroups; ++ig)
508 {
509 const int igt = ig * NumGroups;
510 valT sumU(0);
511 for (int jg = 0; jg < NumGroups; ++jg)
512 {
513 const FuncType& f2(*F[igt + jg]);
514 int iStart = P.first(jg);
515 int iEnd = P.last(jg);
516 sumU += f2.evaluateV(-1, iStart, iEnd, dist.data(), DistCompressed.data());
517 }
518
519 for (int i = P.first(ig); i < P.last(ig); ++i)
520 {
521 // remove self-interaction
522 const valT Uself = F[igt + ig]->evaluate(dist[i]);
523 ratios[i] = std::exp(Uat[i] + Uself - sumU);
524 }
525 }
526 }
527
528 template<typename FT>
evalGrad(ParticleSet & P,int iat)529 typename J2OrbitalSoA<FT>::GradType J2OrbitalSoA<FT>::evalGrad(ParticleSet& P, int iat)
530 {
531 return GradType(dUat[iat]);
532 }
533
534 template<typename FT>
ratioGrad(ParticleSet & P,int iat,GradType & grad_iat)535 typename J2OrbitalSoA<FT>::PsiValueType J2OrbitalSoA<FT>::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
536 {
537 UpdateMode = ORB_PBYP_PARTIAL;
538
539 computeU3(P, iat, P.getDistTable(my_table_ID_).getTempDists(), cur_u.data(), cur_du.data(), cur_d2u.data());
540 cur_Uat = simd::accumulate_n(cur_u.data(), N, valT());
541 DiffVal = Uat[iat] - cur_Uat;
542 grad_iat += accumulateG(cur_du.data(), P.getDistTable(my_table_ID_).getTempDispls());
543 return std::exp(static_cast<PsiValueType>(DiffVal));
544 }
545
546 template<typename FT>
acceptMove(ParticleSet & P,int iat,bool safe_to_delay)547 void J2OrbitalSoA<FT>::acceptMove(ParticleSet& P, int iat, bool safe_to_delay)
548 {
549 // get the old u, du, d2u
550 const auto& d_table = P.getDistTable(my_table_ID_);
551 computeU3(P, iat, d_table.getOldDists(), old_u.data(), old_du.data(), old_d2u.data());
552 if (UpdateMode == ORB_PBYP_RATIO)
553 { //ratio-only during the move; need to compute derivatives
554 const auto& dist = d_table.getTempDists();
555 computeU3(P, iat, dist, cur_u.data(), cur_du.data(), cur_d2u.data());
556 }
557
558 valT cur_d2Uat(0);
559 const auto& new_dr = d_table.getTempDispls();
560 const auto& old_dr = d_table.getOldDispls();
561 constexpr valT lapfac = OHMMS_DIM - RealType(1);
562 #pragma omp simd reduction(+ : cur_d2Uat)
563 for (int jat = 0; jat < N; jat++)
564 {
565 const valT du = cur_u[jat] - old_u[jat];
566 const valT newl = cur_d2u[jat] + lapfac * cur_du[jat];
567 const valT dl = old_d2u[jat] + lapfac * old_du[jat] - newl;
568 Uat[jat] += du;
569 d2Uat[jat] += dl;
570 cur_d2Uat -= newl;
571 }
572 posT cur_dUat;
573 for (int idim = 0; idim < OHMMS_DIM; ++idim)
574 {
575 const valT* restrict new_dX = new_dr.data(idim);
576 const valT* restrict old_dX = old_dr.data(idim);
577 const valT* restrict cur_du_pt = cur_du.data();
578 const valT* restrict old_du_pt = old_du.data();
579 valT* restrict save_g = dUat.data(idim);
580 valT cur_g = cur_dUat[idim];
581 #pragma omp simd reduction(+ : cur_g) aligned(old_dX, new_dX, save_g, cur_du_pt, old_du_pt: QMC_SIMD_ALIGNMENT)
582 for (int jat = 0; jat < N; jat++)
583 {
584 const valT newg = cur_du_pt[jat] * new_dX[jat];
585 const valT dg = newg - old_du_pt[jat] * old_dX[jat];
586 save_g[jat] -= dg;
587 cur_g += newg;
588 }
589 cur_dUat[idim] = cur_g;
590 }
591 LogValue += Uat[iat] - cur_Uat;
592 Uat[iat] = cur_Uat;
593 dUat(iat) = cur_dUat;
594 d2Uat[iat] = cur_d2Uat;
595 }
596
597 template<typename FT>
recompute(const ParticleSet & P)598 void J2OrbitalSoA<FT>::recompute(const ParticleSet& P)
599 {
600 const auto& d_table = P.getDistTable(my_table_ID_);
601 for (int ig = 0; ig < NumGroups; ++ig)
602 {
603 for (int iat = P.first(ig), last = P.last(ig); iat < last; ++iat)
604 {
605 computeU3(P, iat, d_table.getDistRow(iat), cur_u.data(), cur_du.data(), cur_d2u.data(), true);
606 Uat[iat] = simd::accumulate_n(cur_u.data(), iat, valT());
607 posT grad;
608 valT lap(0);
609 const valT* restrict u = cur_u.data();
610 const valT* restrict du = cur_du.data();
611 const valT* restrict d2u = cur_d2u.data();
612 const auto& displ = d_table.getDisplRow(iat);
613 constexpr valT lapfac = OHMMS_DIM - RealType(1);
614 #pragma omp simd reduction(+ : lap) aligned(du, d2u: QMC_SIMD_ALIGNMENT)
615 for (int jat = 0; jat < iat; ++jat)
616 lap += d2u[jat] + lapfac * du[jat];
617 for (int idim = 0; idim < OHMMS_DIM; ++idim)
618 {
619 const valT* restrict dX = displ.data(idim);
620 valT s = valT();
621 #pragma omp simd reduction(+ : s) aligned(du, dX: QMC_SIMD_ALIGNMENT)
622 for (int jat = 0; jat < iat; ++jat)
623 s += du[jat] * dX[jat];
624 grad[idim] = s;
625 }
626 dUat(iat) = grad;
627 d2Uat[iat] = -lap;
628 // add the contribution from the upper triangle
629 #pragma omp simd aligned(u, du, d2u: QMC_SIMD_ALIGNMENT)
630 for (int jat = 0; jat < iat; jat++)
631 {
632 Uat[jat] += u[jat];
633 d2Uat[jat] -= d2u[jat] + lapfac * du[jat];
634 }
635 for (int idim = 0; idim < OHMMS_DIM; ++idim)
636 {
637 valT* restrict save_g = dUat.data(idim);
638 const valT* restrict dX = displ.data(idim);
639 #pragma omp simd aligned(save_g, du, dX: QMC_SIMD_ALIGNMENT)
640 for (int jat = 0; jat < iat; jat++)
641 save_g[jat] -= du[jat] * dX[jat];
642 }
643 }
644 }
645 }
646
647 template<typename FT>
evaluateLog(const ParticleSet & P,ParticleSet::ParticleGradient_t & G,ParticleSet::ParticleLaplacian_t & L)648 typename J2OrbitalSoA<FT>::LogValueType J2OrbitalSoA<FT>::evaluateLog(const ParticleSet& P,
649 ParticleSet::ParticleGradient_t& G,
650 ParticleSet::ParticleLaplacian_t& L)
651 {
652 return evaluateGL(P, G, L, true);
653 }
654
655 template<typename FT>
evaluateGL(const ParticleSet & P,ParticleSet::ParticleGradient_t & G,ParticleSet::ParticleLaplacian_t & L,bool fromscratch)656 WaveFunctionComponent::LogValueType J2OrbitalSoA<FT>::evaluateGL(const ParticleSet& P,
657 ParticleSet::ParticleGradient_t& G,
658 ParticleSet::ParticleLaplacian_t& L,
659 bool fromscratch)
660 {
661 if (fromscratch)
662 recompute(P);
663 LogValue = valT(0);
664 for (int iat = 0; iat < N; ++iat)
665 {
666 LogValue += Uat[iat];
667 G[iat] += dUat[iat];
668 L[iat] += d2Uat[iat];
669 }
670
671 return LogValue = -LogValue * 0.5;
672 }
673
674 template<typename FT>
evaluateHessian(ParticleSet & P,HessVector_t & grad_grad_psi)675 void J2OrbitalSoA<FT>::evaluateHessian(ParticleSet& P, HessVector_t& grad_grad_psi)
676 {
677 LogValue = 0.0;
678 const DistanceTableData& d_ee(P.getDistTable(my_table_ID_));
679 valT dudr, d2udr2;
680
681 Tensor<valT, DIM> ident;
682 grad_grad_psi = 0.0;
683 ident.diagonal(1.0);
684
685 for (int i = 1; i < N; ++i)
686 {
687 const auto& dist = d_ee.getDistRow(i);
688 const auto& displ = d_ee.getDisplRow(i);
689 auto ig = P.GroupID[i];
690 const int igt = ig * NumGroups;
691 for (int j = 0; j < i; ++j)
692 {
693 auto r = dist[j];
694 auto rinv = 1.0 / r;
695 auto dr = displ[j];
696 auto jg = P.GroupID[j];
697 auto uij = F[igt + jg]->evaluate(r, dudr, d2udr2);
698 LogValue -= uij;
699 auto hess = rinv * rinv * outerProduct(dr, dr) * (d2udr2 - dudr * rinv) + ident * dudr * rinv;
700 grad_grad_psi[i] -= hess;
701 grad_grad_psi[j] -= hess;
702 }
703 }
704 }
705
706 } // namespace qmcplusplus
707 #endif
708