1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Ray Shan (SNL)
17 ------------------------------------------------------------------------- */
18 
19 #include "pair_lj_gromacs_coul_gromacs_kokkos.h"
20 #include <cmath>
21 #include <cstring>
22 #include "kokkos.h"
23 #include "atom_kokkos.h"
24 #include "force.h"
25 #include "neighbor.h"
26 #include "neigh_list.h"
27 #include "neigh_request.h"
28 #include "update.h"
29 #include "respa.h"
30 #include "memory_kokkos.h"
31 #include "error.h"
32 #include "atom_masks.h"
33 
34 using namespace LAMMPS_NS;
35 
36 #define KOKKOS_CUDA_MAX_THREADS 256
37 #define KOKKOS_CUDA_MIN_BLOCKS 8
38 
39 /* ---------------------------------------------------------------------- */
40 
41 template<class DeviceType>
PairLJGromacsCoulGromacsKokkos(LAMMPS * lmp)42 PairLJGromacsCoulGromacsKokkos<DeviceType>::PairLJGromacsCoulGromacsKokkos(LAMMPS *lmp):PairLJGromacsCoulGromacs(lmp)
43 {
44   respa_enable = 0;
45 
46   kokkosable = 1;
47   atomKK = (AtomKokkos *) atom;
48   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
49   datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
50   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
51 }
52 
53 /* ---------------------------------------------------------------------- */
54 
55 template<class DeviceType>
~PairLJGromacsCoulGromacsKokkos()56 PairLJGromacsCoulGromacsKokkos<DeviceType>::~PairLJGromacsCoulGromacsKokkos()
57 {
58   if (copymode) return;
59 
60   if (allocated) {
61     memoryKK->destroy_kokkos(k_eatom,eatom);
62     memoryKK->destroy_kokkos(k_vatom,vatom);
63     memoryKK->destroy_kokkos(k_cutsq,cutsq);
64   }
65 }
66 
67 /* ---------------------------------------------------------------------- */
68 
69 template<class DeviceType>
compute(int eflag_in,int vflag_in)70 void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
71 {
72   eflag = eflag_in;
73   vflag = vflag_in;
74 
75   if (neighflag == FULL) no_virial_fdotr_compute = 1;
76 
77   ev_init(eflag,vflag,0);
78 
79   // reallocate per-atom arrays if necessary
80 
81   if (eflag_atom) {
82     memoryKK->destroy_kokkos(k_eatom,eatom);
83     memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
84     d_eatom = k_eatom.view<DeviceType>();
85   }
86   if (vflag_atom) {
87     memoryKK->destroy_kokkos(k_vatom,vatom);
88     memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
89     d_vatom = k_vatom.view<DeviceType>();
90   }
91 
92   atomKK->sync(execution_space,datamask_read);
93   k_cutsq.template sync<DeviceType>();
94   k_params.template sync<DeviceType>();
95   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
96   else atomKK->modified(execution_space,F_MASK);
97 
98   x = atomKK->k_x.view<DeviceType>();
99   c_x = atomKK->k_x.view<DeviceType>();
100   f = atomKK->k_f.view<DeviceType>();
101   q = atomKK->k_q.view<DeviceType>();
102   type = atomKK->k_type.view<DeviceType>();
103   nlocal = atom->nlocal;
104   nall = atom->nlocal + atom->nghost;
105   special_lj[0] = force->special_lj[0];
106   special_lj[1] = force->special_lj[1];
107   special_lj[2] = force->special_lj[2];
108   special_lj[3] = force->special_lj[3];
109   special_coul[0] = force->special_coul[0];
110   special_coul[1] = force->special_coul[1];
111   special_coul[2] = force->special_coul[2];
112   special_coul[3] = force->special_coul[3];
113   qqrd2e = force->qqrd2e;
114   newton_pair = force->newton_pair;
115 
116   // loop over neighbors of my atoms
117 
118   copymode = 1;
119 
120   EV_FLOAT ev;
121   if (ncoultablebits)
122     ev = pair_compute<PairLJGromacsCoulGromacsKokkos<DeviceType>,CoulLongTable<1> >
123       (this,(NeighListKokkos<DeviceType>*)list);
124   else
125     ev = pair_compute<PairLJGromacsCoulGromacsKokkos<DeviceType>,CoulLongTable<0> >
126       (this,(NeighListKokkos<DeviceType>*)list);
127 
128 
129   if (eflag) {
130     eng_vdwl += ev.evdwl;
131     eng_coul += ev.ecoul;
132   }
133   if (vflag_global) {
134     virial[0] += ev.v[0];
135     virial[1] += ev.v[1];
136     virial[2] += ev.v[2];
137     virial[3] += ev.v[3];
138     virial[4] += ev.v[4];
139     virial[5] += ev.v[5];
140   }
141 
142   if (eflag_atom) {
143     k_eatom.template modify<DeviceType>();
144     k_eatom.template sync<LMPHostType>();
145   }
146 
147   if (vflag_atom) {
148     k_vatom.template modify<DeviceType>();
149     k_vatom.template sync<LMPHostType>();
150   }
151 
152   if (vflag_fdotr) pair_virial_fdotr_compute(this);
153 
154   copymode = 0;
155 }
156 
157 /* ----------------------------------------------------------------------
158    compute LJ GROMACS pair force between atoms i and j
159    ---------------------------------------------------------------------- */
160 template<class DeviceType>
161 template<bool STACKPARAMS, class Specialisation>
162 KOKKOS_INLINE_FUNCTION
163 F_FLOAT PairLJGromacsCoulGromacsKokkos<DeviceType>::
compute_fpair(const F_FLOAT & rsq,const int &,const int &,const int & itype,const int & jtype) const164 compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
165               const int& itype, const int& jtype) const {
166 
167   const F_FLOAT r2inv = 1.0/rsq;
168   const F_FLOAT r6inv = r2inv*r2inv*r2inv;
169   F_FLOAT forcelj = r6inv *
170     ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
171      (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
172 
173   if (rsq > cut_lj_innersq) {
174     const F_FLOAT r = sqrt(rsq);
175     const F_FLOAT tlj = r - cut_lj_inner;
176     const F_FLOAT fswitch = r*tlj*tlj*
177             ((STACKPARAMS?m_params[itype][jtype].ljsw1:params(itype,jtype).ljsw1) +
178              (STACKPARAMS?m_params[itype][jtype].ljsw2:params(itype,jtype).ljsw2)*tlj);
179     forcelj += fswitch;
180   }
181   return forcelj*r2inv;
182 }
183 
184 /* ----------------------------------------------------------------------
185    compute LJ GROMACS pair potential energy between atoms i and j
186    ---------------------------------------------------------------------- */
187 template<class DeviceType>
188 template<bool STACKPARAMS, class Specialisation>
189 KOKKOS_INLINE_FUNCTION
190 F_FLOAT PairLJGromacsCoulGromacsKokkos<DeviceType>::
compute_evdwl(const F_FLOAT & rsq,const int &,const int &,const int & itype,const int & jtype) const191 compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
192               const int& itype, const int& jtype) const {
193 
194   const F_FLOAT r2inv = 1.0/rsq;
195   const F_FLOAT r6inv = r2inv*r2inv*r2inv;
196   F_FLOAT englj = r6inv *
197     ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
198      (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4));
199   englj += (STACKPARAMS?m_params[itype][jtype].ljsw5:params(itype,jtype).ljsw5);
200 
201   if (rsq > cut_lj_innersq) {
202     const F_FLOAT r = sqrt(rsq);
203     const F_FLOAT tlj = r - cut_lj_inner;
204     const F_FLOAT eswitch = tlj*tlj*tlj *
205             ((STACKPARAMS?m_params[itype][jtype].ljsw3:params(itype,jtype).ljsw3) +
206              (STACKPARAMS?m_params[itype][jtype].ljsw4:params(itype,jtype).ljsw4)*tlj);
207     englj += eswitch;
208   }
209   return englj;
210 }
211 
212 /* ----------------------------------------------------------------------
213    compute coulomb pair force between atoms i and j
214    ---------------------------------------------------------------------- */
215 template<class DeviceType>
216 template<bool STACKPARAMS,  class Specialisation>
217 KOKKOS_INLINE_FUNCTION
218 F_FLOAT PairLJGromacsCoulGromacsKokkos<DeviceType>::
compute_fcoul(const F_FLOAT & rsq,const int &,const int & j,const int &,const int &,const F_FLOAT & factor_coul,const F_FLOAT & qtmp) const219 compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
220               const int& /*itype*/, const int& /*jtype*/,
221               const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
222 
223   const F_FLOAT r2inv = 1.0/rsq;
224   const F_FLOAT rinv = sqrt(r2inv);
225   F_FLOAT forcecoul = qqrd2e*qtmp*q(j) *rinv;
226 
227   if (rsq > cut_coul_innersq) {
228     const F_FLOAT r = 1.0/rinv;
229     const F_FLOAT tc = r - cut_coul_inner;
230     const F_FLOAT fcoulswitch = qqrd2e * qtmp*q(j)*r*tc*tc*(coulsw1 + coulsw2*tc);
231     forcecoul += fcoulswitch;
232   }
233   return forcecoul * r2inv * factor_coul;
234 }
235 
236 /* ----------------------------------------------------------------------
237    compute coulomb pair potential energy between atoms i and j
238    ---------------------------------------------------------------------- */
239 template<class DeviceType>
240 template<bool STACKPARAMS, class Specialisation>
241 KOKKOS_INLINE_FUNCTION
242 F_FLOAT PairLJGromacsCoulGromacsKokkos<DeviceType>::
compute_ecoul(const F_FLOAT & rsq,const int &,const int & j,const int &,const int &,const F_FLOAT & factor_coul,const F_FLOAT & qtmp) const243 compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
244               const int& /*itype*/, const int& /*jtype*/,
245               const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
246 
247   const F_FLOAT r2inv = 1.0/rsq;
248   const F_FLOAT rinv = sqrt(r2inv);
249   F_FLOAT ecoul = qqrd2e * qtmp * q(j) * (rinv-coulsw5);
250 
251   if (rsq > cut_coul_innersq) {
252     const F_FLOAT r = 1.0/rinv;
253     const F_FLOAT tc = r - cut_coul_inner;
254     const F_FLOAT ecoulswitch = tc*tc*tc * (coulsw3 + coulsw4*tc);
255     ecoul += qqrd2e*qtmp*q(j)*ecoulswitch;
256   }
257   return ecoul * factor_coul;
258 }
259 
260 /* ----------------------------------------------------------------------
261    allocate all arrays
262 ------------------------------------------------------------------------- */
263 
264 template<class DeviceType>
allocate()265 void PairLJGromacsCoulGromacsKokkos<DeviceType>::allocate()
266 {
267   PairLJGromacsCoulGromacs::allocate();
268 
269   int n = atom->ntypes;
270 
271   memory->destroy(cutsq);
272   memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
273   d_cutsq = k_cutsq.template view<DeviceType>();
274 
275   d_cut_ljsq = typename AT::t_ffloat_2d("pair:cut_ljsq",n+1,n+1);
276 
277   d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1);
278 
279   k_params = Kokkos::DualView<params_lj_coul_gromacs**,Kokkos::LayoutRight,DeviceType>("PairLJGromacsCoulGromacs::params",n+1,n+1);
280   params = k_params.template view<DeviceType>();
281 }
282 
283 template<class DeviceType>
init_tables(double cut_coul,double * cut_respa)284 void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
285 {
286   Pair::init_tables(cut_coul,cut_respa);
287 
288   typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
289   typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
290 
291   int ntable = 1;
292   for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
293 
294 
295   // Copy rtable and drtable
296   {
297   host_table_type h_table("HostTable",ntable);
298   table_type d_table("DeviceTable",ntable);
299   for (int i = 0; i < ntable; i++) {
300     h_table(i) = rtable[i];
301   }
302   Kokkos::deep_copy(d_table,h_table);
303   d_rtable = d_table;
304   }
305 
306   {
307   host_table_type h_table("HostTable",ntable);
308   table_type d_table("DeviceTable",ntable);
309   for (int i = 0; i < ntable; i++) {
310     h_table(i) = drtable[i];
311   }
312   Kokkos::deep_copy(d_table,h_table);
313   d_drtable = d_table;
314   }
315 
316   {
317   host_table_type h_table("HostTable",ntable);
318   table_type d_table("DeviceTable",ntable);
319 
320   // Copy ftable and dftable
321   for (int i = 0; i < ntable; i++) {
322     h_table(i) = ftable[i];
323   }
324   Kokkos::deep_copy(d_table,h_table);
325   d_ftable = d_table;
326   }
327 
328   {
329   host_table_type h_table("HostTable",ntable);
330   table_type d_table("DeviceTable",ntable);
331 
332   for (int i = 0; i < ntable; i++) {
333     h_table(i) = dftable[i];
334   }
335   Kokkos::deep_copy(d_table,h_table);
336   d_dftable = d_table;
337   }
338 
339   {
340   host_table_type h_table("HostTable",ntable);
341   table_type d_table("DeviceTable",ntable);
342 
343   // Copy ctable and dctable
344   for (int i = 0; i < ntable; i++) {
345     h_table(i) = ctable[i];
346   }
347   Kokkos::deep_copy(d_table,h_table);
348   d_ctable = d_table;
349   }
350 
351   {
352   host_table_type h_table("HostTable",ntable);
353   table_type d_table("DeviceTable",ntable);
354 
355   for (int i = 0; i < ntable; i++) {
356     h_table(i) = dctable[i];
357   }
358   Kokkos::deep_copy(d_table,h_table);
359   d_dctable = d_table;
360   }
361 
362   {
363   host_table_type h_table("HostTable",ntable);
364   table_type d_table("DeviceTable",ntable);
365 
366   // Copy etable and detable
367   for (int i = 0; i < ntable; i++) {
368     h_table(i) = etable[i];
369   }
370   Kokkos::deep_copy(d_table,h_table);
371   d_etable = d_table;
372   }
373 
374   {
375   host_table_type h_table("HostTable",ntable);
376   table_type d_table("DeviceTable",ntable);
377 
378   for (int i = 0; i < ntable; i++) {
379     h_table(i) = detable[i];
380   }
381   Kokkos::deep_copy(d_table,h_table);
382   d_detable = d_table;
383   }
384 }
385 
386 
387 /* ----------------------------------------------------------------------
388    global settings
389 ------------------------------------------------------------------------- */
390 
391 template<class DeviceType>
settings(int narg,char ** arg)392 void PairLJGromacsCoulGromacsKokkos<DeviceType>::settings(int narg, char **arg)
393 {
394   if (narg > 2) error->all(FLERR,"Illegal pair_style command");
395 
396   PairLJGromacsCoulGromacs::settings(narg,arg);
397 }
398 
399 /* ----------------------------------------------------------------------
400    init specific to this pair style
401 ------------------------------------------------------------------------- */
402 
403 template<class DeviceType>
init_style()404 void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_style()
405 {
406   PairLJGromacsCoulGromacs::init_style();
407 
408   Kokkos::deep_copy(d_cut_ljsq,cut_ljsq);
409   Kokkos::deep_copy(d_cut_coulsq,cut_coulsq);
410 
411   // error if rRESPA with inner levels
412 
413   if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
414     int respa = 0;
415     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
416     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
417     if (respa)
418       error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
419   }
420 
421   // irequest = neigh request made by parent class
422 
423   neighflag = lmp->kokkos->neighflag;
424   int irequest = neighbor->nrequest - 1;
425 
426   neighbor->requests[irequest]->
427     kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
428     !std::is_same<DeviceType,LMPDeviceType>::value;
429   neighbor->requests[irequest]->
430     kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
431 
432   if (neighflag == FULL) {
433     neighbor->requests[irequest]->full = 1;
434     neighbor->requests[irequest]->half = 0;
435   } else if (neighflag == HALF || neighflag == HALFTHREAD) {
436     neighbor->requests[irequest]->full = 0;
437     neighbor->requests[irequest]->half = 1;
438   } else {
439     error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk");
440   }
441 }
442 
443 /* ----------------------------------------------------------------------
444    init for one type pair i,j and corresponding j,i
445 ------------------------------------------------------------------------- */
446 
447 template<class DeviceType>
init_one(int i,int j)448 double PairLJGromacsCoulGromacsKokkos<DeviceType>::init_one(int i, int j)
449 {
450   double cutone = PairLJGromacsCoulGromacs::init_one(i,j);
451   double cut_ljsqm = cut_ljsq;
452   double cut_coulsqm = cut_coulsq;
453 
454   k_params.h_view(i,j).lj1 = lj1[i][j];
455   k_params.h_view(i,j).lj2 = lj2[i][j];
456   k_params.h_view(i,j).lj3 = lj3[i][j];
457   k_params.h_view(i,j).lj4 = lj4[i][j];
458   k_params.h_view(i,j).ljsw1 = ljsw1[i][j];
459   k_params.h_view(i,j).ljsw2 = ljsw2[i][j];
460   k_params.h_view(i,j).ljsw3 = ljsw3[i][j];
461   k_params.h_view(i,j).ljsw4 = ljsw4[i][j];
462   k_params.h_view(i,j).ljsw5 = ljsw5[i][j];
463   k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
464   k_params.h_view(i,j).cut_coulsq = cut_coulsqm;
465 
466   k_params.h_view(j,i) = k_params.h_view(i,j);
467   if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
468     m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
469     m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
470     m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
471     m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsqm;
472   }
473 
474   k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
475   k_cutsq.template modify<LMPHostType>();
476   k_params.template modify<LMPHostType>();
477 
478   return cutone;
479 }
480 
481 
482 
483 namespace LAMMPS_NS {
484 template class PairLJGromacsCoulGromacsKokkos<LMPDeviceType>;
485 #ifdef LMP_KOKKOS_GPU
486 template class PairLJGromacsCoulGromacsKokkos<LMPHostType>;
487 #endif
488 }
489 
490