1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
8 // Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign
9 // Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign
10 // Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory
11 // Ye Luo, yeluo@anl.gov, Argonne National Laboratory
12 // Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
13 //
14 // File created by: Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
15 //////////////////////////////////////////////////////////////////////////////////////
16
17
18 #include "CoulombPBCAB_CUDA.h"
19 #include "Particle/MCWalkerConfiguration.h"
20 #include "QMCDrivers/WalkerProperties.h"
21 #include "config/stdlib/math.hpp"
22
23 namespace qmcplusplus
24 {
25 using WP = WalkerProperties::Indexes;
26
CoulombPBCAB_CUDA(ParticleSet & ions,ParticleSet & elns,bool cloning)27 CoulombPBCAB_CUDA::CoulombPBCAB_CUDA(ParticleSet& ions, ParticleSet& elns, bool cloning)
28 : CoulombPBCAB(ions, elns, cloning),
29 ElecRef(elns),
30 IonRef(ions),
31 SumGPU("CoulombPBCABTemp::SumGPU"),
32 IGPU("CoulombPBCABTemp::IGPU"),
33 L("CoulombPBCABTemp::L"),
34 Linv("CoulombPBCABTemp::Linv"),
35 kpointsGPU("CoulombPBCABTemp::kpointsGPU"),
36 kshellGPU("CoulombPBCABTemp::kshellGPU"),
37 FkGPU("CoulombPBCABTemp::FkGPU"),
38 RhoklistGPU("CoulombPBCABTemp::RhoklistGPU"),
39 RhokElecGPU("CoulombPBCABTemp::RhokElecGPU")
40 {
41 MaxGridPoints = 8191;
42 SpeciesSet& sSet = ions.getSpeciesSet();
43 NumIonSpecies = sSet.getTotalNum();
44 NumIons = ions.getTotalNum();
45 NumElecs = elns.getTotalNum();
46 #ifdef QMC_CUDA
47 gpu::host_vector<CUDA_PRECISION_FULL> LHost(9), LinvHost(9);
48 for (int i = 0; i < 3; i++)
49 for (int j = 0; j < 3; j++)
50 {
51 LHost[3 * i + j] = elns.Lattice.a(j)[i];
52 LinvHost[3 * i + j] = elns.Lattice.b(i)[j];
53 }
54 L = LHost;
55 Linv = LinvHost;
56 // Copy center positions to GPU, sorting by GroupID
57 gpu::host_vector<CUDA_PRECISION> I_host(OHMMS_DIM * NumIons);
58 int index = 0;
59 for (int cgroup = 0; cgroup < NumIonSpecies; cgroup++)
60 {
61 IonFirst.push_back(index);
62 for (int i = 0; i < NumIons; i++)
63 {
64 if (ions.GroupID[i] == cgroup)
65 {
66 for (int dim = 0; dim < OHMMS_DIM; dim++)
67 I_host[OHMMS_DIM * index + dim] = ions.R[i][dim];
68 SortedIons.push_back(ions.R[i]);
69 index++;
70 }
71 }
72 IonLast.push_back(index - 1);
73 }
74 IGPU = I_host;
75 initBreakup(elns);
76 setupLongRangeGPU();
77 #endif
78 }
79
initBreakup(ParticleSet & P)80 void CoulombPBCAB_CUDA::initBreakup(ParticleSet& P)
81 {
82 CoulombPBCAB::initBreakup(P);
83 #ifdef QMC_CUDA
84 V0Spline = new TextureSpline;
85 V0Spline->set(V0->data(), V0->size(), V0->grid().rmin(), V0->grid().rmax());
86 SRSplines.resize(NumIonSpecies, V0Spline);
87 #endif
88 }
89
add(int groupID,std::unique_ptr<RadFunctorType> && ppot)90 void CoulombPBCAB_CUDA::add(int groupID, std::unique_ptr<RadFunctorType>&& ppot)
91 {
92 RadFunctorType* savefunc = Vspec[groupID];
93 CoulombPBCAB::add(groupID, std::move(ppot));
94 RadFunctorType* rfunc = Vspec[groupID];
95 if (rfunc != savefunc)
96 {
97 // Setup CUDA spline
98 SRSplines[groupID] = new TextureSpline();
99 SRSplines[groupID]->set(rfunc->data(), rfunc->size(), rfunc->grid().rmin(), rfunc->grid().rmax());
100 }
101 }
102
setupLongRangeGPU()103 void CoulombPBCAB_CUDA::setupLongRangeGPU()
104 {
105 StructFact& SK = *(ElecRef.SK);
106 Numk = SK.KLists.numk;
107 gpu::host_vector<CUDA_PRECISION_FULL> kpointsHost(OHMMS_DIM * Numk);
108 for (int ik = 0; ik < Numk; ik++)
109 for (int dim = 0; dim < OHMMS_DIM; dim++)
110 kpointsHost[ik * OHMMS_DIM + dim] = SK.KLists.kpts_cart[ik][dim];
111 kpointsGPU = kpointsHost;
112 gpu::host_vector<CUDA_PRECISION_FULL> FkHost(Numk);
113 for (int ik = 0; ik < Numk; ik++)
114 FkHost[ik] = AB->Fk[ik];
115 FkGPU = FkHost;
116 // Now compute Rhok for the ions
117 RhokIonsGPU.resize(NumIonSpecies);
118 gpu::host_vector<CUDA_PRECISION_FULL> RhokIons_host(2 * Numk);
119 for (int sp = 0; sp < NumIonSpecies; sp++)
120 {
121 for (int ik = 0; ik < Numk; ik++)
122 {
123 PosType k = SK.KLists.kpts_cart[ik];
124 RhokIons_host[2 * ik + 0] = 0.0;
125 RhokIons_host[2 * ik + 1] = 0.0;
126 for (int ion = IonFirst[sp]; ion <= IonLast[sp]; ion++)
127 {
128 PosType ipos = SortedIons[ion];
129 RealType phase = dot(k, ipos);
130 double s, c;
131 qmcplusplus::sincos(phase, &s, &c);
132 RhokIons_host[2 * ik + 0] += c;
133 RhokIons_host[2 * ik + 1] += s;
134 }
135 }
136 RhokIonsGPU[sp].set_name("CoulombPBCABTemp::RhokIonsGPU");
137 RhokIonsGPU[sp] = RhokIons_host;
138 }
139 }
140
141
addEnergy(MCWalkerConfiguration & W,std::vector<RealType> & LocalEnergy)142 void CoulombPBCAB_CUDA::addEnergy(MCWalkerConfiguration& W, std::vector<RealType>& LocalEnergy)
143 {
144 std::vector<Walker_t*>& walkers = W.WalkerList;
145 // Short-circuit for constant contribution (e.g. fixed ions)
146 // if (!is_active) {
147 // for (int iw=0; iw<walkers.size(); iw++) {
148 // walkers[iw]->getPropertyBase()[WP::NUMPROPERTIES+myIndex] = Value;
149 // LocalEnergy[iw] += Value;
150 // }
151 // return;
152 // }
153 int nw = walkers.size();
154 int N = NumElecs;
155 if (SumGPU.size() < nw)
156 {
157 SumGPU.resize(nw);
158 SumHost.resize(nw);
159 RhokElecGPU.resize(2 * nw * Numk);
160 RhokIonsGPU.resize(NumIonSpecies);
161 for (int sp = 0; sp < NumIonSpecies; sp++)
162 RhokIonsGPU[sp].resize(2 * Numk);
163 SumHost.resize(nw);
164 RhoklistGPU.resize(nw);
165 RhoklistHost.resize(nw);
166 }
167 for (int iw = 0; iw < nw; iw++)
168 {
169 RhoklistHost[iw] = &(RhokElecGPU.data()[2 * Numk * iw]);
170 SumHost[iw] = 0.0;
171 }
172 RhoklistGPU = RhoklistHost;
173 SumGPU = SumHost;
174 // First, do short-range part
175 std::vector<double> esum(nw, 0.0);
176 for (int sp = 0; sp < NumIonSpecies; sp++)
177 {
178 if (SRSplines[sp])
179 {
180 CoulombAB_SR_Sum(W.RList_GPU.data(), N, IGPU.data(), IonFirst[sp], IonLast[sp], SRSplines[sp]->rMax,
181 SRSplines[sp]->NumPoints, SRSplines[sp]->MyTexture, L.data(), Linv.data(), SumGPU.data(), nw);
182 SumHost = SumGPU;
183 for (int iw = 0; iw < nw; iw++)
184 esum[iw] += Zspec[sp] * Qspec[0] * SumHost[iw];
185 }
186 }
187 // Now, do long-range part:
188 int first = 0;
189 int last = N - 1;
190 eval_rhok_cuda(W.RList_GPU.data(), first, last, kpointsGPU.data(), Numk, RhoklistGPU.data(), nw);
191 for (int sp = 0; sp < NumIonSpecies; sp++)
192 {
193 for (int iw = 0; iw < nw; iw++)
194 SumHost[iw] = 0.0;
195 SumGPU = SumHost;
196 eval_vk_sum_cuda(RhoklistGPU.data(), RhokIonsGPU[sp].data(), FkGPU.data(), Numk, SumGPU.data(), nw);
197 SumHost = SumGPU;
198 for (int iw = 0; iw < nw; iw++)
199 esum[iw] += Zspec[sp] * Qspec[0] * SumHost[iw];
200 }
201 // #ifdef DEBUG_CUDA_RHOK
202 // gpu::host_vector<CUDA_PRECISION_FULL> RhokHost;
203 // RhokHost = RhokGPU;
204 // for (int ik=0; ik<Numk; ik++) {
205 // std::complex<double> rhok(0.0, 0.0);
206 // PosType k = PtclRef.SK->KLists.kpts_cart[ik];
207 // for (int ir=0; ir<N; ir++) {
208 // PosType r = walkers[0]->R[ir];
209 // double s, c;
210 // double phase = dot(k,r);
211 // qmcplusplus::sincos(phase, &s, &c);
212 // rhok += std::complex<double>(c,s);
213 // }
214 // fprintf (stderr, "GPU: %d %14.6f %14.6f\n",
215 // ik, RhokHost[2*ik+0], RhokHost[2*ik+1]);
216 // fprintf (stderr, "CPU: %d %14.6f %14.6f\n",
217 // ik, rhok.real(), rhok.imag());
218 // }
219 // #endif
220 // for (int sp1=0; sp1<NumSpecies; sp1++)
221 // for (int sp2=sp1; sp2<NumSpecies; sp2++)
222 // eval_vk_sum_cuda(RhoklistsGPU[sp1].data(), RhoklistsGPU[sp2].data(),
223 // FkGPU.data(), Numk, SumGPU.data(), nw);
224 // SumHost = SumGPU;
225 for (int iw = 0; iw < walkers.size(); iw++)
226 {
227 // fprintf (stderr, "Energy = %18.6f\n", SumHost[iw]);
228 walkers[iw]->getPropertyBase()[WP::NUMPROPERTIES + myIndex] = esum[iw] + myConst;
229 LocalEnergy[iw] += esum[iw] + myConst;
230 }
231 }
232
233
makeClone(ParticleSet & qp,TrialWaveFunction & psi)234 OperatorBase* CoulombPBCAB_CUDA::makeClone(ParticleSet& qp, TrialWaveFunction& psi)
235 {
236 CoulombPBCAB_CUDA* myclone = new CoulombPBCAB_CUDA(PtclA, qp, true);
237 if (myGrid)
238 myclone->myGrid = new GridType(*myGrid);
239 for (int ig = 0; ig < Vspec.size(); ++ig)
240 {
241 if (Vspec[ig])
242 {
243 RadFunctorType* apot = Vspec[ig]->makeClone();
244 myclone->Vspec[ig] = apot;
245 for (int iat = 0; iat < PtclA.getTotalNum(); ++iat)
246 {
247 if (PtclA.GroupID[iat] == ig)
248 myclone->Vat[iat] = apot;
249 }
250 }
251 myclone->V0Spline = V0Spline;
252 myclone->SRSplines[ig] = SRSplines[ig];
253 }
254 return myclone;
255 }
256
257 } // namespace qmcplusplus
258