1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
8 //                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
9 //                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
10 //                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
11 //
12 // File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
13 //////////////////////////////////////////////////////////////////////////////////////
14 
15 
16 #ifndef QMCPLUSPLUS_NONLOCALECPOTENTIAL_CUDA_H
17 #define QMCPLUSPLUS_NONLOCALECPOTENTIAL_CUDA_H
18 
19 #include "QMCHamiltonians/NonLocalECPotential.h"
20 #include "QMCWaveFunctions/WaveFunctionComponent.h"
21 
22 namespace qmcplusplus
23 {
24 class NonLocalECPotential_CUDA : public NonLocalECPotential
25 {
26 protected:
27   //////////////////////////////////
28   // Vectorized evaluation on GPU //
29   //////////////////////////////////
30   bool UsePBC;
31   int NumIonGroups;
32   std::vector<int> IonFirst, IonLast;
33   gpu::device_vector<CUDA_PRECISION> Ions_GPU, L, Linv;
34   gpu::device_vector<int> Elecs_GPU;
35   gpu::host_vector<int> Elecs_host;
36   gpu::device_vector<CUDA_PRECISION> Dist_GPU;
37   gpu::host_vector<CUDA_PRECISION> Dist_host;
38   gpu::device_vector<int*> Eleclist_GPU;
39   gpu::device_vector<CUDA_PRECISION*> Distlist_GPU;
40   gpu::device_vector<int> NumPairs_GPU;
41   gpu::host_vector<int> NumPairs_host;
42   gpu::host_vector<int*> Eleclist_host;
43   gpu::host_vector<CUDA_PRECISION*> Distlist_host;
44   gpu::host_vector<CUDA_PRECISION*> RatioPoslist_host;
45   gpu::host_vector<CUDA_PRECISION*> Ratiolist_host;
46   gpu::host_vector<CUDA_PRECISION*> CosThetalist_host;
47 
48   int NumElecs;
49   // The maximum number of quadrature points over all the ions species
50   int MaxKnots, MaxPairs, RatiosPerWalker;
51   // These are the positions at which we have to evalate the WF ratios
52   // It has size OHMMS_DIM * MaxPairs * MaxKnots * NumWalkers
53   gpu::device_vector<CUDA_PRECISION> RatioPos_GPU, CosTheta_GPU;
54   gpu::host_vector<CUDA_PRECISION> RatioPos_host, CosTheta_host;
55   gpu::device_vector<CUDA_PRECISION*> RatioPoslist_GPU, CosThetalist_GPU;
56 
57   // Quadrature points
58   std::vector<gpu::device_vector<CUDA_PRECISION>> QuadPoints_GPU;
59   std::vector<std::vector<CUDA_PRECISION>> QuadPoints_host;
60   int CurrentNumWalkers;
61 
62   // These are used in calling Psi->NLratios
63   std::vector<NLjob> JobList;
64   std::vector<PosType> QuadPosList;
65   std::vector<ValueType> RatioList;
66 
67 
68   std::vector<PosType> SortedIons;
69 
70   void setupCUDA(ParticleSet& elecs);
71   void resizeCUDA(int nw);
72 
73 public:
74   NonLocalECPotential_CUDA(ParticleSet& ions,
75                            ParticleSet& els,
76                            TrialWaveFunction& psi,
77                            bool usePBC,
78                            bool doForces   = false,
79                            bool enable_DLA = false);
80 
81   OperatorBase* makeClone(ParticleSet& qp, TrialWaveFunction& psi);
82 
83   void addEnergy(MCWalkerConfiguration& W, std::vector<RealType>& LocalEnergy);
84   void addEnergy(MCWalkerConfiguration& W,
85                  std::vector<RealType>& LocalEnergy,
86                  std::vector<std::vector<NonLocalData>>& Txy);
87 };
88 
89 
90 } // namespace qmcplusplus
91 
92 #endif
93