1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Stan Moore (SNL)
17 ------------------------------------------------------------------------- */
18 
19 #include "pair_coul_dsf_kokkos.h"
20 #include <cmath>
21 #include "kokkos.h"
22 #include "atom_kokkos.h"
23 #include "force.h"
24 #include "neighbor.h"
25 #include "neigh_list_kokkos.h"
26 #include "neigh_request.h"
27 #include "memory_kokkos.h"
28 #include "math_const.h"
29 #include "error.h"
30 #include "atom_masks.h"
31 
32 using namespace LAMMPS_NS;
33 using namespace MathConst;
34 
35 #define EWALD_F   1.12837917
36 #define EWALD_P   0.3275911
37 #define A1        0.254829592
38 #define A2       -0.284496736
39 #define A3        1.421413741
40 #define A4       -1.453152027
41 #define A5        1.061405429
42 
43 /* ---------------------------------------------------------------------- */
44 
45 template<class DeviceType>
PairCoulDSFKokkos(LAMMPS * lmp)46 PairCoulDSFKokkos<DeviceType>::PairCoulDSFKokkos(LAMMPS *lmp) : PairCoulDSF(lmp)
47 {
48   respa_enable = 0;
49 
50   kokkosable = 1;
51   atomKK = (AtomKokkos *) atom;
52   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
53   datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
54   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
55 }
56 
57 /* ---------------------------------------------------------------------- */
58 
59 template<class DeviceType>
~PairCoulDSFKokkos()60 PairCoulDSFKokkos<DeviceType>::~PairCoulDSFKokkos()
61 {
62   if (!copymode) {
63     memoryKK->destroy_kokkos(k_eatom,eatom);
64     memoryKK->destroy_kokkos(k_vatom,vatom);
65   }
66 }
67 
68 /* ---------------------------------------------------------------------- */
69 
70 template<class DeviceType>
compute(int eflag_in,int vflag_in)71 void PairCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
72 {
73   eflag = eflag_in;
74   vflag = vflag_in;
75 
76   if (neighflag == FULL) no_virial_fdotr_compute = 1;
77 
78   ev_init(eflag,vflag,0);
79 
80   // reallocate per-atom arrays if necessary
81 
82   if (eflag_atom) {
83     memoryKK->destroy_kokkos(k_eatom,eatom);
84     memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
85     d_eatom = k_eatom.view<DeviceType>();
86   }
87   if (vflag_atom) {
88     memoryKK->destroy_kokkos(k_vatom,vatom);
89     memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
90     d_vatom = k_vatom.view<DeviceType>();
91   }
92 
93   atomKK->sync(execution_space,datamask_read);
94   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
95   else atomKK->modified(execution_space,F_MASK);
96 
97   x = atomKK->k_x.view<DeviceType>();
98   f = atomKK->k_f.view<DeviceType>();
99   q = atomKK->k_q.view<DeviceType>();
100   nlocal = atom->nlocal;
101   nall = atom->nlocal + atom->nghost;
102   newton_pair = force->newton_pair;
103 
104   NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
105   d_numneigh = k_list->d_numneigh;
106   d_neighbors = k_list->d_neighbors;
107   d_ilist = k_list->d_ilist;
108 
109   special_coul[0] = force->special_coul[0];
110   special_coul[1] = force->special_coul[1];
111   special_coul[2] = force->special_coul[2];
112   special_coul[3] = force->special_coul[3];
113   qqrd2e = force->qqrd2e;
114 
115   int inum = list->inum;
116 
117   copymode = 1;
118 
119   // loop over neighbors of my atoms
120 
121   EV_FLOAT ev;
122 
123   // compute kernel A
124 
125   if (evflag) {
126     if (neighflag == HALF) {
127       if (newton_pair) {
128         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALF,1,1> >(0,inum),*this,ev);
129       } else {
130         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALF,0,1> >(0,inum),*this,ev);
131       }
132     } else if (neighflag == HALFTHREAD) {
133       if (newton_pair) {
134         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALFTHREAD,1,1> >(0,inum),*this,ev);
135       } else {
136         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALFTHREAD,0,1> >(0,inum),*this,ev);
137       }
138     } else if (neighflag == FULL) {
139       if (newton_pair) {
140         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<FULL,1,1> >(0,inum),*this,ev);
141       } else {
142         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<FULL,0,1> >(0,inum),*this,ev);
143       }
144     }
145   } else {
146     if (neighflag == HALF) {
147       if (newton_pair) {
148         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALF,1,0> >(0,inum),*this);
149       } else {
150         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALF,0,0> >(0,inum),*this);
151       }
152     } else if (neighflag == HALFTHREAD) {
153       if (newton_pair) {
154         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALFTHREAD,1,0> >(0,inum),*this);
155       } else {
156         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<HALFTHREAD,0,0> >(0,inum),*this);
157       }
158     } else if (neighflag == FULL) {
159       if (newton_pair) {
160         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<FULL,1,0> >(0,inum),*this);
161       } else {
162         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulDSFKernelA<FULL,0,0> >(0,inum),*this);
163       }
164     }
165   }
166 
167   if (eflag_global) eng_coul += ev.ecoul;
168   if (vflag_global) {
169     virial[0] += ev.v[0];
170     virial[1] += ev.v[1];
171     virial[2] += ev.v[2];
172     virial[3] += ev.v[3];
173     virial[4] += ev.v[4];
174     virial[5] += ev.v[5];
175   }
176 
177   if (eflag_atom) {
178     k_eatom.template modify<DeviceType>();
179     k_eatom.template sync<LMPHostType>();
180   }
181 
182   if (vflag_atom) {
183     k_vatom.template modify<DeviceType>();
184     k_vatom.template sync<LMPHostType>();
185   }
186 
187   if (vflag_fdotr) pair_virial_fdotr_compute(this);
188 
189   copymode = 0;
190 }
191 
192 /* ----------------------------------------------------------------------
193    init specific to this pair style
194 ------------------------------------------------------------------------- */
195 
196 template<class DeviceType>
init_style()197 void PairCoulDSFKokkos<DeviceType>::init_style()
198 {
199   PairCoulDSF::init_style();
200 
201   // irequest = neigh request made by parent class
202 
203   neighflag = lmp->kokkos->neighflag;
204   int irequest = neighbor->nrequest - 1;
205 
206   neighbor->requests[irequest]->
207     kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
208     !std::is_same<DeviceType,LMPDeviceType>::value;
209   neighbor->requests[irequest]->
210     kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
211 
212   if (neighflag == FULL) {
213     neighbor->requests[irequest]->full = 1;
214     neighbor->requests[irequest]->half = 0;
215   } else if (neighflag == HALF || neighflag == HALFTHREAD) {
216     neighbor->requests[irequest]->full = 0;
217     neighbor->requests[irequest]->half = 1;
218   } else {
219     error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk");
220   }
221 }
222 
223 /* ---------------------------------------------------------------------- */
224 
225 ////Specialisation for Neighborlist types Half, HalfThread, Full
226 template<class DeviceType>
227 template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
228 KOKKOS_INLINE_FUNCTION
operator ()(TagPairCoulDSFKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>,const int & ii,EV_FLOAT & ev) const229 void PairCoulDSFKokkos<DeviceType>::operator()(TagPairCoulDSFKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
230 
231   // The f array is atomic for Half/Thread neighbor style
232   Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
233   Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
234 
235   const int i = d_ilist[ii];
236   const X_FLOAT xtmp = x(i,0);
237   const X_FLOAT ytmp = x(i,1);
238   const X_FLOAT ztmp = x(i,2);
239   const F_FLOAT qtmp = q[i];
240 
241   if (eflag) {
242     const F_FLOAT e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
243     if (eflag_global)
244       ev.ecoul += e_self;
245     if (eflag_atom)
246       v_eatom[i] += e_self;
247   }
248 
249   //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
250   const int jnum = d_numneigh[i];
251 
252   F_FLOAT fxtmp = 0.0;
253   F_FLOAT fytmp = 0.0;
254   F_FLOAT fztmp = 0.0;
255 
256   for (int jj = 0; jj < jnum; jj++) {
257     //int j = d_neighbors_i(jj);
258     int j = d_neighbors(i,jj);
259     const F_FLOAT factor_coul = special_coul[sbmask(j)];
260     j &= NEIGHMASK;
261     const X_FLOAT delx = xtmp - x(j,0);
262     const X_FLOAT dely = ytmp - x(j,1);
263     const X_FLOAT delz = ztmp - x(j,2);
264     const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
265 
266     if (rsq < cut_coulsq) {
267 
268 
269       const F_FLOAT r2inv = 1.0/rsq;
270       const F_FLOAT r = sqrt(rsq);
271       const F_FLOAT prefactor = factor_coul * qqrd2e*qtmp*q[j]/r;
272       const F_FLOAT erfcd = exp(-alpha*alpha*rsq);
273       const F_FLOAT t = 1.0 / (1.0 + EWALD_P*alpha*r);
274       const F_FLOAT erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
275       const F_FLOAT forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd +
276                                  r*f_shift) * r;
277       const F_FLOAT fpair = forcecoul * r2inv;
278 
279       fxtmp += delx*fpair;
280       fytmp += dely*fpair;
281       fztmp += delz*fpair;
282 
283       if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
284         a_f(j,0) -= delx*fpair;
285         a_f(j,1) -= dely*fpair;
286         a_f(j,2) -= delz*fpair;
287       }
288 
289       if (EVFLAG) {
290         F_FLOAT ecoul = 0.0;
291         if (eflag) {
292           ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
293           ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*ecoul;
294         }
295 
296         if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,ecoul,fpair,delx,dely,delz);
297       }
298 
299     }
300   }
301 
302   a_f(i,0) += fxtmp;
303   a_f(i,1) += fytmp;
304   a_f(i,2) += fztmp;
305 }
306 
307 template<class DeviceType>
308 template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
309 KOKKOS_INLINE_FUNCTION
operator ()(TagPairCoulDSFKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>,const int & ii) const310 void PairCoulDSFKokkos<DeviceType>::operator()(TagPairCoulDSFKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const {
311   EV_FLOAT ev;
312   this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairCoulDSFKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev);
313 }
314 
315 /* ---------------------------------------------------------------------- */
316 
317 template<class DeviceType>
318 template<int NEIGHFLAG, int NEWTON_PAIR>
319 KOKKOS_INLINE_FUNCTION
ev_tally(EV_FLOAT & ev,const int & i,const int & j,const F_FLOAT & epair,const F_FLOAT & fpair,const F_FLOAT & delx,const F_FLOAT & dely,const F_FLOAT & delz) const320 void PairCoulDSFKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
321       const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
322                 const F_FLOAT &dely, const F_FLOAT &delz) const
323 {
324   const int EFLAG = eflag;
325   const int VFLAG = vflag_either;
326 
327   // The eatom and vatom arrays are atomic for Half/Thread neighbor style
328   Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
329   Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
330 
331   if (EFLAG) {
332     if (eflag_atom) {
333       const E_FLOAT epairhalf = 0.5 * epair;
334       if (NEIGHFLAG!=FULL) {
335         if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
336         if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
337       } else {
338         v_eatom[i] += epairhalf;
339       }
340     }
341   }
342 
343   if (VFLAG) {
344     const E_FLOAT v0 = delx*delx*fpair;
345     const E_FLOAT v1 = dely*dely*fpair;
346     const E_FLOAT v2 = delz*delz*fpair;
347     const E_FLOAT v3 = delx*dely*fpair;
348     const E_FLOAT v4 = delx*delz*fpair;
349     const E_FLOAT v5 = dely*delz*fpair;
350 
351     if (vflag_global) {
352       if (NEIGHFLAG!=FULL) {
353         if (NEWTON_PAIR || i < nlocal) {
354           ev.v[0] += 0.5*v0;
355           ev.v[1] += 0.5*v1;
356           ev.v[2] += 0.5*v2;
357           ev.v[3] += 0.5*v3;
358           ev.v[4] += 0.5*v4;
359           ev.v[5] += 0.5*v5;
360         }
361         if (NEWTON_PAIR || j < nlocal) {
362         ev.v[0] += 0.5*v0;
363         ev.v[1] += 0.5*v1;
364         ev.v[2] += 0.5*v2;
365         ev.v[3] += 0.5*v3;
366         ev.v[4] += 0.5*v4;
367         ev.v[5] += 0.5*v5;
368         }
369       } else {
370         ev.v[0] += 0.5*v0;
371         ev.v[1] += 0.5*v1;
372         ev.v[2] += 0.5*v2;
373         ev.v[3] += 0.5*v3;
374         ev.v[4] += 0.5*v4;
375         ev.v[5] += 0.5*v5;
376       }
377     }
378 
379     if (vflag_atom) {
380       if (NEIGHFLAG!=FULL) {
381         if (NEWTON_PAIR || i < nlocal) {
382           v_vatom(i,0) += 0.5*v0;
383           v_vatom(i,1) += 0.5*v1;
384           v_vatom(i,2) += 0.5*v2;
385           v_vatom(i,3) += 0.5*v3;
386           v_vatom(i,4) += 0.5*v4;
387           v_vatom(i,5) += 0.5*v5;
388         }
389         if (NEWTON_PAIR || j < nlocal) {
390         v_vatom(j,0) += 0.5*v0;
391         v_vatom(j,1) += 0.5*v1;
392         v_vatom(j,2) += 0.5*v2;
393         v_vatom(j,3) += 0.5*v3;
394         v_vatom(j,4) += 0.5*v4;
395         v_vatom(j,5) += 0.5*v5;
396         }
397       } else {
398         v_vatom(i,0) += 0.5*v0;
399         v_vatom(i,1) += 0.5*v1;
400         v_vatom(i,2) += 0.5*v2;
401         v_vatom(i,3) += 0.5*v3;
402         v_vatom(i,4) += 0.5*v4;
403         v_vatom(i,5) += 0.5*v5;
404       }
405     }
406   }
407 }
408 
409 /* ---------------------------------------------------------------------- */
410 
411 template<class DeviceType>
412 KOKKOS_INLINE_FUNCTION
sbmask(const int & j) const413 int PairCoulDSFKokkos<DeviceType>::sbmask(const int& j) const {
414   return j >> SBBITS & 3;
415 }
416 
417 namespace LAMMPS_NS {
418 template class PairCoulDSFKokkos<LMPDeviceType>;
419 #ifdef LMP_KOKKOS_GPU
420 template class PairCoulDSFKokkos<LMPHostType>;
421 #endif
422 }
423 
424