1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
8 //                    Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
9 //                    Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
10 //                    Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
11 //                    Ye Luo, yeluo@anl.gov, Argonne National Laboratory
12 //                    Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
13 //
14 // File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
15 //////////////////////////////////////////////////////////////////////////////////////
16 
17 
18 #include "CoulombPBCAB_CUDA.h"
19 #include "Particle/MCWalkerConfiguration.h"
20 #include "QMCDrivers/WalkerProperties.h"
21 #include "config/stdlib/math.hpp"
22 
23 namespace qmcplusplus
24 {
25 using WP = WalkerProperties::Indexes;
26 
CoulombPBCAB_CUDA(ParticleSet & ions,ParticleSet & elns,bool cloning)27 CoulombPBCAB_CUDA::CoulombPBCAB_CUDA(ParticleSet& ions, ParticleSet& elns, bool cloning)
28     : CoulombPBCAB(ions, elns, cloning),
29       ElecRef(elns),
30       IonRef(ions),
31       SumGPU("CoulombPBCABTemp::SumGPU"),
32       IGPU("CoulombPBCABTemp::IGPU"),
33       L("CoulombPBCABTemp::L"),
34       Linv("CoulombPBCABTemp::Linv"),
35       kpointsGPU("CoulombPBCABTemp::kpointsGPU"),
36       kshellGPU("CoulombPBCABTemp::kshellGPU"),
37       FkGPU("CoulombPBCABTemp::FkGPU"),
38       RhoklistGPU("CoulombPBCABTemp::RhoklistGPU"),
39       RhokElecGPU("CoulombPBCABTemp::RhokElecGPU")
40 {
41   MaxGridPoints    = 8191;
42   SpeciesSet& sSet = ions.getSpeciesSet();
43   NumIonSpecies    = sSet.getTotalNum();
44   NumIons          = ions.getTotalNum();
45   NumElecs         = elns.getTotalNum();
46 #ifdef QMC_CUDA
47   gpu::host_vector<CUDA_PRECISION_FULL> LHost(9), LinvHost(9);
48   for (int i = 0; i < 3; i++)
49     for (int j = 0; j < 3; j++)
50     {
51       LHost[3 * i + j]    = elns.Lattice.a(j)[i];
52       LinvHost[3 * i + j] = elns.Lattice.b(i)[j];
53     }
54   L    = LHost;
55   Linv = LinvHost;
56   // Copy center positions to GPU, sorting by GroupID
57   gpu::host_vector<CUDA_PRECISION> I_host(OHMMS_DIM * NumIons);
58   int index = 0;
59   for (int cgroup = 0; cgroup < NumIonSpecies; cgroup++)
60   {
61     IonFirst.push_back(index);
62     for (int i = 0; i < NumIons; i++)
63     {
64       if (ions.GroupID[i] == cgroup)
65       {
66         for (int dim = 0; dim < OHMMS_DIM; dim++)
67           I_host[OHMMS_DIM * index + dim] = ions.R[i][dim];
68         SortedIons.push_back(ions.R[i]);
69         index++;
70       }
71     }
72     IonLast.push_back(index - 1);
73   }
74   IGPU = I_host;
75   initBreakup(elns);
76   setupLongRangeGPU();
77 #endif
78 }
79 
initBreakup(ParticleSet & P)80 void CoulombPBCAB_CUDA::initBreakup(ParticleSet& P)
81 {
82   CoulombPBCAB::initBreakup(P);
83 #ifdef QMC_CUDA
84   V0Spline = new TextureSpline;
85   V0Spline->set(V0->data(), V0->size(), V0->grid().rmin(), V0->grid().rmax());
86   SRSplines.resize(NumIonSpecies, V0Spline);
87 #endif
88 }
89 
add(int groupID,std::unique_ptr<RadFunctorType> && ppot)90 void CoulombPBCAB_CUDA::add(int groupID, std::unique_ptr<RadFunctorType>&& ppot)
91 {
92   RadFunctorType* savefunc = Vspec[groupID];
93   CoulombPBCAB::add(groupID, std::move(ppot));
94   RadFunctorType* rfunc = Vspec[groupID];
95   if (rfunc != savefunc)
96   {
97     // Setup CUDA spline
98     SRSplines[groupID] = new TextureSpline();
99     SRSplines[groupID]->set(rfunc->data(), rfunc->size(), rfunc->grid().rmin(), rfunc->grid().rmax());
100   }
101 }
102 
setupLongRangeGPU()103 void CoulombPBCAB_CUDA::setupLongRangeGPU()
104 {
105   StructFact& SK = *(ElecRef.SK);
106   Numk           = SK.KLists.numk;
107   gpu::host_vector<CUDA_PRECISION_FULL> kpointsHost(OHMMS_DIM * Numk);
108   for (int ik = 0; ik < Numk; ik++)
109     for (int dim = 0; dim < OHMMS_DIM; dim++)
110       kpointsHost[ik * OHMMS_DIM + dim] = SK.KLists.kpts_cart[ik][dim];
111   kpointsGPU = kpointsHost;
112   gpu::host_vector<CUDA_PRECISION_FULL> FkHost(Numk);
113   for (int ik = 0; ik < Numk; ik++)
114     FkHost[ik] = AB->Fk[ik];
115   FkGPU = FkHost;
116   // Now compute Rhok for the ions
117   RhokIonsGPU.resize(NumIonSpecies);
118   gpu::host_vector<CUDA_PRECISION_FULL> RhokIons_host(2 * Numk);
119   for (int sp = 0; sp < NumIonSpecies; sp++)
120   {
121     for (int ik = 0; ik < Numk; ik++)
122     {
123       PosType k                 = SK.KLists.kpts_cart[ik];
124       RhokIons_host[2 * ik + 0] = 0.0;
125       RhokIons_host[2 * ik + 1] = 0.0;
126       for (int ion = IonFirst[sp]; ion <= IonLast[sp]; ion++)
127       {
128         PosType ipos   = SortedIons[ion];
129         RealType phase = dot(k, ipos);
130         double s, c;
131         qmcplusplus::sincos(phase, &s, &c);
132         RhokIons_host[2 * ik + 0] += c;
133         RhokIons_host[2 * ik + 1] += s;
134       }
135     }
136     RhokIonsGPU[sp].set_name("CoulombPBCABTemp::RhokIonsGPU");
137     RhokIonsGPU[sp] = RhokIons_host;
138   }
139 }
140 
141 
addEnergy(MCWalkerConfiguration & W,std::vector<RealType> & LocalEnergy)142 void CoulombPBCAB_CUDA::addEnergy(MCWalkerConfiguration& W, std::vector<RealType>& LocalEnergy)
143 {
144   std::vector<Walker_t*>& walkers = W.WalkerList;
145   // Short-circuit for constant contribution (e.g. fixed ions)
146   // if (!is_active) {
147   //   for (int iw=0; iw<walkers.size(); iw++) {
148   // 	walkers[iw]->getPropertyBase()[WP::NUMPROPERTIES+myIndex] = Value;
149   // 	LocalEnergy[iw] += Value;
150   //   }
151   //   return;
152   // }
153   int nw = walkers.size();
154   int N  = NumElecs;
155   if (SumGPU.size() < nw)
156   {
157     SumGPU.resize(nw);
158     SumHost.resize(nw);
159     RhokElecGPU.resize(2 * nw * Numk);
160     RhokIonsGPU.resize(NumIonSpecies);
161     for (int sp = 0; sp < NumIonSpecies; sp++)
162       RhokIonsGPU[sp].resize(2 * Numk);
163     SumHost.resize(nw);
164     RhoklistGPU.resize(nw);
165     RhoklistHost.resize(nw);
166   }
167   for (int iw = 0; iw < nw; iw++)
168   {
169     RhoklistHost[iw] = &(RhokElecGPU.data()[2 * Numk * iw]);
170     SumHost[iw]      = 0.0;
171   }
172   RhoklistGPU = RhoklistHost;
173   SumGPU      = SumHost;
174   // First, do short-range part
175   std::vector<double> esum(nw, 0.0);
176   for (int sp = 0; sp < NumIonSpecies; sp++)
177   {
178     if (SRSplines[sp])
179     {
180       CoulombAB_SR_Sum(W.RList_GPU.data(), N, IGPU.data(), IonFirst[sp], IonLast[sp], SRSplines[sp]->rMax,
181                        SRSplines[sp]->NumPoints, SRSplines[sp]->MyTexture, L.data(), Linv.data(), SumGPU.data(), nw);
182       SumHost = SumGPU;
183       for (int iw = 0; iw < nw; iw++)
184         esum[iw] += Zspec[sp] * Qspec[0] * SumHost[iw];
185     }
186   }
187   // Now, do long-range part:
188   int first = 0;
189   int last  = N - 1;
190   eval_rhok_cuda(W.RList_GPU.data(), first, last, kpointsGPU.data(), Numk, RhoklistGPU.data(), nw);
191   for (int sp = 0; sp < NumIonSpecies; sp++)
192   {
193     for (int iw = 0; iw < nw; iw++)
194       SumHost[iw] = 0.0;
195     SumGPU = SumHost;
196     eval_vk_sum_cuda(RhoklistGPU.data(), RhokIonsGPU[sp].data(), FkGPU.data(), Numk, SumGPU.data(), nw);
197     SumHost = SumGPU;
198     for (int iw = 0; iw < nw; iw++)
199       esum[iw] += Zspec[sp] * Qspec[0] * SumHost[iw];
200   }
201   // #ifdef DEBUG_CUDA_RHOK
202   //     gpu::host_vector<CUDA_PRECISION_FULL> RhokHost;
203   //     RhokHost = RhokGPU;
204   //     for (int ik=0; ik<Numk; ik++) {
205   //       std::complex<double> rhok(0.0, 0.0);
206   //       PosType k = PtclRef.SK->KLists.kpts_cart[ik];
207   //       for (int ir=0; ir<N; ir++) {
208   //     	PosType r = walkers[0]->R[ir];
209   //     	double s, c;
210   //     	double phase = dot(k,r);
211   //     	qmcplusplus::sincos(phase, &s, &c);
212   //     	rhok += std::complex<double>(c,s);
213   //       }
214   //       fprintf (stderr, "GPU:   %d   %14.6f  %14.6f\n",
215   //     	       ik, RhokHost[2*ik+0], RhokHost[2*ik+1]);
216   //       fprintf (stderr, "CPU:   %d   %14.6f  %14.6f\n",
217   //     	       ik, rhok.real(), rhok.imag());
218   //     }
219   // #endif
220   //     for (int sp1=0; sp1<NumSpecies; sp1++)
221   //       for (int sp2=sp1; sp2<NumSpecies; sp2++)
222   //     	eval_vk_sum_cuda(RhoklistsGPU[sp1].data(), RhoklistsGPU[sp2].data(),
223   //     			 FkGPU.data(), Numk, SumGPU.data(), nw);
224   //    SumHost = SumGPU;
225   for (int iw = 0; iw < walkers.size(); iw++)
226   {
227     // fprintf (stderr, "Energy = %18.6f\n", SumHost[iw]);
228     walkers[iw]->getPropertyBase()[WP::NUMPROPERTIES + myIndex] = esum[iw] + myConst;
229     LocalEnergy[iw] += esum[iw] + myConst;
230   }
231 }
232 
233 
makeClone(ParticleSet & qp,TrialWaveFunction & psi)234 OperatorBase* CoulombPBCAB_CUDA::makeClone(ParticleSet& qp, TrialWaveFunction& psi)
235 {
236   CoulombPBCAB_CUDA* myclone = new CoulombPBCAB_CUDA(PtclA, qp, true);
237   if (myGrid)
238     myclone->myGrid = new GridType(*myGrid);
239   for (int ig = 0; ig < Vspec.size(); ++ig)
240   {
241     if (Vspec[ig])
242     {
243       RadFunctorType* apot = Vspec[ig]->makeClone();
244       myclone->Vspec[ig]   = apot;
245       for (int iat = 0; iat < PtclA.getTotalNum(); ++iat)
246       {
247         if (PtclA.GroupID[iat] == ig)
248           myclone->Vat[iat] = apot;
249       }
250     }
251     myclone->V0Spline      = V0Spline;
252     myclone->SRSplines[ig] = SRSplines[ig];
253   }
254   return myclone;
255 }
256 
257 } // namespace qmcplusplus
258