1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "pair_lj_class2_coul_long_kokkos.h"
16 #include <cmath>
17 #include <cstring>
18 #include "kokkos.h"
19 #include "atom_kokkos.h"
20 #include "force.h"
21 #include "neighbor.h"
22 #include "neigh_list.h"
23 #include "neigh_request.h"
24 #include "update.h"
25 #include "respa.h"
26 #include "memory_kokkos.h"
27 #include "error.h"
28 #include "atom_masks.h"
29 
30 using namespace LAMMPS_NS;
31 
32 #define KOKKOS_CUDA_MAX_THREADS 256
33 #define KOKKOS_CUDA_MIN_BLOCKS 8
34 
35 
36 #define EWALD_F   1.12837917
37 #define EWALD_P   0.3275911
38 #define A1        0.254829592
39 #define A2       -0.284496736
40 #define A3        1.421413741
41 #define A4       -1.453152027
42 #define A5        1.061405429
43 
44 /* ---------------------------------------------------------------------- */
45 
46 template<class DeviceType>
PairLJClass2CoulLongKokkos(LAMMPS * lmp)47 PairLJClass2CoulLongKokkos<DeviceType>::PairLJClass2CoulLongKokkos(LAMMPS *lmp):PairLJClass2CoulLong(lmp)
48 {
49   respa_enable = 0;
50 
51   kokkosable = 1;
52   atomKK = (AtomKokkos *) atom;
53   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
54   datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
55   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
56 }
57 
58 /* ---------------------------------------------------------------------- */
59 
60 template<class DeviceType>
~PairLJClass2CoulLongKokkos()61 PairLJClass2CoulLongKokkos<DeviceType>::~PairLJClass2CoulLongKokkos()
62 {
63   if (copymode) return;
64 
65   if (allocated) {
66     memoryKK->destroy_kokkos(k_eatom,eatom);
67     memoryKK->destroy_kokkos(k_vatom,vatom);
68     memoryKK->destroy_kokkos(k_cutsq,cutsq);
69     memoryKK->destroy_kokkos(k_cut_ljsq,cut_ljsq);
70   }
71 }
72 
73 /* ---------------------------------------------------------------------- */
74 
75 template<class DeviceType>
compute(int eflag_in,int vflag_in)76 void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
77 {
78   eflag = eflag_in;
79   vflag = vflag_in;
80 
81   if (neighflag == FULL) no_virial_fdotr_compute = 1;
82 
83   ev_init(eflag,vflag,0);
84 
85   // reallocate per-atom arrays if necessary
86 
87   if (eflag_atom) {
88     memoryKK->destroy_kokkos(k_eatom,eatom);
89     memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
90     d_eatom = k_eatom.view<DeviceType>();
91   }
92   if (vflag_atom) {
93     memoryKK->destroy_kokkos(k_vatom,vatom);
94     memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
95     d_vatom = k_vatom.view<DeviceType>();
96   }
97 
98   atomKK->sync(execution_space,datamask_read);
99   k_cutsq.template sync<DeviceType>();
100   k_cut_ljsq.template sync<DeviceType>();
101   k_params.template sync<DeviceType>();
102   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
103   else atomKK->modified(execution_space,F_MASK);
104 
105   x = atomKK->k_x.view<DeviceType>();
106   c_x = atomKK->k_x.view<DeviceType>();
107   f = atomKK->k_f.view<DeviceType>();
108   q = atomKK->k_q.view<DeviceType>();
109   type = atomKK->k_type.view<DeviceType>();
110   nlocal = atom->nlocal;
111   nall = atom->nlocal + atom->nghost;
112   special_lj[0] = force->special_lj[0];
113   special_lj[1] = force->special_lj[1];
114   special_lj[2] = force->special_lj[2];
115   special_lj[3] = force->special_lj[3];
116   special_coul[0] = force->special_coul[0];
117   special_coul[1] = force->special_coul[1];
118   special_coul[2] = force->special_coul[2];
119   special_coul[3] = force->special_coul[3];
120   qqrd2e = force->qqrd2e;
121   newton_pair = force->newton_pair;
122 
123   // loop over neighbors of my atoms
124 
125   copymode = 1;
126 
127   EV_FLOAT ev;
128   if (ncoultablebits)
129     ev = pair_compute<PairLJClass2CoulLongKokkos<DeviceType>,CoulLongTable<1> >
130       (this,(NeighListKokkos<DeviceType>*)list);
131   else
132     ev = pair_compute<PairLJClass2CoulLongKokkos<DeviceType>,CoulLongTable<0> >
133       (this,(NeighListKokkos<DeviceType>*)list);
134 
135 
136   if (eflag) {
137     eng_vdwl += ev.evdwl;
138     eng_coul += ev.ecoul;
139   }
140   if (vflag_global) {
141     virial[0] += ev.v[0];
142     virial[1] += ev.v[1];
143     virial[2] += ev.v[2];
144     virial[3] += ev.v[3];
145     virial[4] += ev.v[4];
146     virial[5] += ev.v[5];
147   }
148 
149   if (eflag_atom) {
150     k_eatom.template modify<DeviceType>();
151     k_eatom.template sync<LMPHostType>();
152   }
153 
154   if (vflag_atom) {
155     k_vatom.template modify<DeviceType>();
156     k_vatom.template sync<LMPHostType>();
157   }
158 
159   if (vflag_fdotr) pair_virial_fdotr_compute(this);
160 
161   copymode = 0;
162 }
163 
164 /* ----------------------------------------------------------------------
165    compute LJ 12-6 pair force between atoms i and j
166    ---------------------------------------------------------------------- */
167 template<class DeviceType>
168 template<bool STACKPARAMS, class Specialisation>
169 KOKKOS_INLINE_FUNCTION
170 F_FLOAT PairLJClass2CoulLongKokkos<DeviceType>::
compute_fpair(const F_FLOAT & rsq,const int & i,const int & j,const int & itype,const int & jtype) const171 compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
172               const int& itype, const int& jtype) const {
173   (void) i;
174   (void) j;
175   const F_FLOAT r2inv = 1.0/rsq;
176   const F_FLOAT rinv = sqrt(r2inv);
177   const F_FLOAT r3inv = r2inv*rinv;
178   const F_FLOAT r6inv = r3inv*r3inv;
179 
180   const F_FLOAT forcelj = r6inv *
181     ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r3inv -
182      (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
183 
184   return forcelj*r2inv;
185 }
186 
187 /* ----------------------------------------------------------------------
188    compute coulomb pair force between atoms i and j
189    ---------------------------------------------------------------------- */
190 template<class DeviceType>
191 template<bool STACKPARAMS,  class Specialisation>
192 KOKKOS_INLINE_FUNCTION
193 F_FLOAT PairLJClass2CoulLongKokkos<DeviceType>::
compute_fcoul(const F_FLOAT & rsq,const int &,const int & j,const int &,const int &,const F_FLOAT & factor_coul,const F_FLOAT & qtmp) const194 compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
195               const int& /*itype*/, const int& /*jtype*/,
196               const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
197   if (Specialisation::DoTable && rsq > tabinnersq) {
198     union_int_float_t rsq_lookup;
199     rsq_lookup.f = rsq;
200     const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
201     const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
202     const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
203     F_FLOAT forcecoul = qtmp*q[j] * table;
204     if (factor_coul < 1.0) {
205       const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
206       const F_FLOAT prefactor = qtmp*q[j] * table;
207       forcecoul -= (1.0-factor_coul)*prefactor;
208     }
209     return forcecoul/rsq;
210   } else {
211     const F_FLOAT r = sqrt(rsq);
212     const F_FLOAT grij = g_ewald * r;
213     const F_FLOAT expm2 = exp(-grij*grij);
214     const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
215     const F_FLOAT rinv = 1.0/r;
216     const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
217     const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
218     F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
219     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
220 
221     return forcecoul*rinv*rinv;
222   }
223 }
224 
225 /* ----------------------------------------------------------------------
226    compute LJ 12-6 pair potential energy between atoms i and j
227    ---------------------------------------------------------------------- */
228 template<class DeviceType>
229 template<bool STACKPARAMS, class Specialisation>
230 KOKKOS_INLINE_FUNCTION
231 F_FLOAT PairLJClass2CoulLongKokkos<DeviceType>::
compute_evdwl(const F_FLOAT & rsq,const int & i,const int & j,const int & itype,const int & jtype) const232 compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
233               const int& itype, const int& jtype) const {
234   (void) i;
235   (void) j;
236   const F_FLOAT r2inv = 1.0/rsq;
237   const F_FLOAT rinv = sqrt(r2inv);
238   const F_FLOAT r3inv = r2inv*rinv;
239   const F_FLOAT r6inv = r3inv*r3inv;
240 
241   return r6inv*((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r3inv -
242                 (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)) -
243                 (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
244 }
245 
246 /* ----------------------------------------------------------------------
247    compute coulomb pair potential energy between atoms i and j
248    ---------------------------------------------------------------------- */
249 template<class DeviceType>
250 template<bool STACKPARAMS, class Specialisation>
251 KOKKOS_INLINE_FUNCTION
252 F_FLOAT PairLJClass2CoulLongKokkos<DeviceType>::
compute_ecoul(const F_FLOAT & rsq,const int &,const int & j,const int &,const int &,const F_FLOAT & factor_coul,const F_FLOAT & qtmp) const253 compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
254               const int& /*itype*/, const int& /*jtype*/,
255               const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
256   if (Specialisation::DoTable && rsq > tabinnersq) {
257     union_int_float_t rsq_lookup;
258     rsq_lookup.f = rsq;
259     const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
260     const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
261     const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
262     F_FLOAT ecoul = qtmp*q[j] * table;
263     if (factor_coul < 1.0) {
264       const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
265       const F_FLOAT prefactor = qtmp*q[j] * table;
266       ecoul -= (1.0-factor_coul)*prefactor;
267     }
268     return ecoul;
269   } else {
270     const F_FLOAT r = sqrt(rsq);
271     const F_FLOAT grij = g_ewald * r;
272     const F_FLOAT expm2 = exp(-grij*grij);
273     const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
274     const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
275     const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
276     F_FLOAT ecoul = prefactor * erfc;
277     if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
278     return ecoul;
279   }
280 }
281 
282 /* ----------------------------------------------------------------------
283    allocate all arrays
284 ------------------------------------------------------------------------- */
285 
286 template<class DeviceType>
allocate()287 void PairLJClass2CoulLongKokkos<DeviceType>::allocate()
288 {
289   PairLJClass2CoulLong::allocate();
290 
291   int n = atom->ntypes;
292 
293   memory->destroy(cutsq);
294   memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
295   d_cutsq = k_cutsq.template view<DeviceType>();
296 
297   memory->destroy(cut_ljsq);
298   memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
299   d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
300 
301   d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1);
302 
303   k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJClass2CoulLong::params",n+1,n+1);
304   params = k_params.template view<DeviceType>();
305 }
306 
307 template<class DeviceType>
init_tables(double cut_coul,double * cut_respa)308 void PairLJClass2CoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
309 {
310   Pair::init_tables(cut_coul,cut_respa);
311 
312   typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
313   typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
314 
315   int ntable = 1;
316   for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
317 
318 
319   // Copy rtable and drtable
320   {
321   host_table_type h_table("HostTable",ntable);
322   table_type d_table("DeviceTable",ntable);
323   for (int i = 0; i < ntable; i++) {
324     h_table(i) = rtable[i];
325   }
326   Kokkos::deep_copy(d_table,h_table);
327   d_rtable = d_table;
328   }
329 
330   {
331   host_table_type h_table("HostTable",ntable);
332   table_type d_table("DeviceTable",ntable);
333   for (int i = 0; i < ntable; i++) {
334     h_table(i) = drtable[i];
335   }
336   Kokkos::deep_copy(d_table,h_table);
337   d_drtable = d_table;
338   }
339 
340   {
341   host_table_type h_table("HostTable",ntable);
342   table_type d_table("DeviceTable",ntable);
343 
344   // Copy ftable and dftable
345   for (int i = 0; i < ntable; i++) {
346     h_table(i) = ftable[i];
347   }
348   Kokkos::deep_copy(d_table,h_table);
349   d_ftable = d_table;
350   }
351 
352   {
353   host_table_type h_table("HostTable",ntable);
354   table_type d_table("DeviceTable",ntable);
355 
356   for (int i = 0; i < ntable; i++) {
357     h_table(i) = dftable[i];
358   }
359   Kokkos::deep_copy(d_table,h_table);
360   d_dftable = d_table;
361   }
362 
363   {
364   host_table_type h_table("HostTable",ntable);
365   table_type d_table("DeviceTable",ntable);
366 
367   // Copy ctable and dctable
368   for (int i = 0; i < ntable; i++) {
369     h_table(i) = ctable[i];
370   }
371   Kokkos::deep_copy(d_table,h_table);
372   d_ctable = d_table;
373   }
374 
375   {
376   host_table_type h_table("HostTable",ntable);
377   table_type d_table("DeviceTable",ntable);
378 
379   for (int i = 0; i < ntable; i++) {
380     h_table(i) = dctable[i];
381   }
382   Kokkos::deep_copy(d_table,h_table);
383   d_dctable = d_table;
384   }
385 
386   {
387   host_table_type h_table("HostTable",ntable);
388   table_type d_table("DeviceTable",ntable);
389 
390   // Copy etable and detable
391   for (int i = 0; i < ntable; i++) {
392     h_table(i) = etable[i];
393   }
394   Kokkos::deep_copy(d_table,h_table);
395   d_etable = d_table;
396   }
397 
398   {
399   host_table_type h_table("HostTable",ntable);
400   table_type d_table("DeviceTable",ntable);
401 
402   for (int i = 0; i < ntable; i++) {
403     h_table(i) = detable[i];
404   }
405   Kokkos::deep_copy(d_table,h_table);
406   d_detable = d_table;
407   }
408 }
409 
410 
411 /* ----------------------------------------------------------------------
412    global settings
413 ------------------------------------------------------------------------- */
414 
415 template<class DeviceType>
settings(int narg,char ** arg)416 void PairLJClass2CoulLongKokkos<DeviceType>::settings(int narg, char **arg)
417 {
418   if (narg > 2) error->all(FLERR,"Illegal pair_style command");
419 
420   PairLJClass2CoulLong::settings(narg,arg);
421 }
422 
423 /* ----------------------------------------------------------------------
424    init specific to this pair style
425 ------------------------------------------------------------------------- */
426 
427 template<class DeviceType>
init_style()428 void PairLJClass2CoulLongKokkos<DeviceType>::init_style()
429 {
430   PairLJClass2CoulLong::init_style();
431 
432   Kokkos::deep_copy(d_cut_coulsq,cut_coulsq);
433 
434   // error if rRESPA with inner levels
435 
436   if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
437     int respa = 0;
438     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
439     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
440     if (respa)
441       error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
442   }
443 
444   // irequest = neigh request made by parent class
445 
446   neighflag = lmp->kokkos->neighflag;
447   int irequest = neighbor->nrequest - 1;
448 
449   neighbor->requests[irequest]->
450     kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
451     !std::is_same<DeviceType,LMPDeviceType>::value;
452   neighbor->requests[irequest]->
453     kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
454 
455   if (neighflag == FULL) {
456     neighbor->requests[irequest]->full = 1;
457     neighbor->requests[irequest]->half = 0;
458   } else if (neighflag == HALF || neighflag == HALFTHREAD) {
459     neighbor->requests[irequest]->full = 0;
460     neighbor->requests[irequest]->half = 1;
461   } else {
462     error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk");
463   }
464 }
465 
466 /* ----------------------------------------------------------------------
467    init for one type pair i,j and corresponding j,i
468 ------------------------------------------------------------------------- */
469 
470 template<class DeviceType>
init_one(int i,int j)471 double PairLJClass2CoulLongKokkos<DeviceType>::init_one(int i, int j)
472 {
473   double cutone = PairLJClass2CoulLong::init_one(i,j);
474   double cut_ljsqm = cut_ljsq[i][j];
475 
476   k_params.h_view(i,j).lj1 = lj1[i][j];
477   k_params.h_view(i,j).lj2 = lj2[i][j];
478   k_params.h_view(i,j).lj3 = lj3[i][j];
479   k_params.h_view(i,j).lj4 = lj4[i][j];
480   k_params.h_view(i,j).offset = offset[i][j];
481   k_params.h_view(i,j).cut_ljsq = cut_ljsqm;
482   k_params.h_view(i,j).cut_coulsq = cut_coulsq;
483 
484   k_params.h_view(j,i) = k_params.h_view(i,j);
485   if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
486     m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
487     m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
488     m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsqm;
489     m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsq;
490   }
491 
492   k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
493   k_cutsq.template modify<LMPHostType>();
494   k_cut_ljsq.h_view(i,j) = k_cut_ljsq.h_view(j,i) = cut_ljsqm;
495   k_cut_ljsq.template modify<LMPHostType>();
496   k_params.template modify<LMPHostType>();
497 
498   return cutone;
499 }
500 
501 namespace LAMMPS_NS {
502 template class PairLJClass2CoulLongKokkos<LMPDeviceType>;
503 #ifdef LMP_KOKKOS_GPU
504 template class PairLJClass2CoulLongKokkos<LMPHostType>;
505 #endif
506 }
507 
508