1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Ray Shan (SNL)
17 ------------------------------------------------------------------------- */
18 
19 #include "pair_lj_charmm_coul_long_kokkos.h"
20 #include <cmath>
21 #include <cstring>
22 #include "kokkos.h"
23 #include "atom_kokkos.h"
24 #include "force.h"
25 #include "neighbor.h"
26 #include "neigh_list.h"
27 #include "neigh_request.h"
28 #include "update.h"
29 #include "respa.h"
30 #include "memory_kokkos.h"
31 #include "error.h"
32 #include "atom_masks.h"
33 
34 using namespace LAMMPS_NS;
35 
36 #define KOKKOS_CUDA_MAX_THREADS 256
37 #define KOKKOS_CUDA_MIN_BLOCKS 8
38 
39 
40 #define EWALD_F   1.12837917
41 #define EWALD_P   0.3275911
42 #define A1        0.254829592
43 #define A2       -0.284496736
44 #define A3        1.421413741
45 #define A4       -1.453152027
46 #define A5        1.061405429
47 
48 /* ---------------------------------------------------------------------- */
49 
50 template<class DeviceType>
PairLJCharmmCoulLongKokkos(LAMMPS * lmp)51 PairLJCharmmCoulLongKokkos<DeviceType>::PairLJCharmmCoulLongKokkos(LAMMPS *lmp):PairLJCharmmCoulLong(lmp)
52 {
53   respa_enable = 0;
54 
55   kokkosable = 1;
56   atomKK = (AtomKokkos *) atom;
57   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
58   datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
59   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
60 }
61 
62 /* ---------------------------------------------------------------------- */
63 
64 template<class DeviceType>
~PairLJCharmmCoulLongKokkos()65 PairLJCharmmCoulLongKokkos<DeviceType>::~PairLJCharmmCoulLongKokkos()
66 {
67   if (copymode) return;
68 
69   if (allocated) {
70     memoryKK->destroy_kokkos(k_eatom,eatom);
71     memoryKK->destroy_kokkos(k_vatom,vatom);
72     memoryKK->destroy_kokkos(k_cutsq,cutsq);
73   }
74 }
75 
76 /* ---------------------------------------------------------------------- */
77 
78 template<class DeviceType>
compute(int eflag_in,int vflag_in)79 void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
80 {
81   eflag = eflag_in;
82   vflag = vflag_in;
83 
84   if (neighflag == FULL) no_virial_fdotr_compute = 1;
85 
86   ev_init(eflag,vflag,0);
87 
88   // reallocate per-atom arrays if necessary
89 
90   if (eflag_atom) {
91     memoryKK->destroy_kokkos(k_eatom,eatom);
92     memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
93     d_eatom = k_eatom.view<DeviceType>();
94   }
95   if (vflag_atom) {
96     memoryKK->destroy_kokkos(k_vatom,vatom);
97     memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
98     d_vatom = k_vatom.view<DeviceType>();
99   }
100 
101   atomKK->sync(execution_space,datamask_read);
102   k_cutsq.template sync<DeviceType>();
103   k_params.template sync<DeviceType>();
104   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
105   else atomKK->modified(execution_space,F_MASK);
106 
107   x = atomKK->k_x.view<DeviceType>();
108   c_x = atomKK->k_x.view<DeviceType>();
109   f = atomKK->k_f.view<DeviceType>();
110   q = atomKK->k_q.view<DeviceType>();
111   type = atomKK->k_type.view<DeviceType>();
112   nlocal = atom->nlocal;
113   nall = atom->nlocal + atom->nghost;
114   special_lj[0] = force->special_lj[0];
115   special_lj[1] = force->special_lj[1];
116   special_lj[2] = force->special_lj[2];
117   special_lj[3] = force->special_lj[3];
118   special_coul[0] = force->special_coul[0];
119   special_coul[1] = force->special_coul[1];
120   special_coul[2] = force->special_coul[2];
121   special_coul[3] = force->special_coul[3];
122   qqrd2e = force->qqrd2e;
123   newton_pair = force->newton_pair;
124 
125   // loop over neighbors of my atoms
126 
127   copymode = 1;
128 
129   EV_FLOAT ev;
130   if (ncoultablebits)
131     ev = pair_compute<PairLJCharmmCoulLongKokkos<DeviceType>,CoulLongTable<1> >
132       (this,(NeighListKokkos<DeviceType>*)list);
133   else
134     ev = pair_compute<PairLJCharmmCoulLongKokkos<DeviceType>,CoulLongTable<0> >
135       (this,(NeighListKokkos<DeviceType>*)list);
136 
137 
138   if (eflag) {
139     eng_vdwl += ev.evdwl;
140     eng_coul += ev.ecoul;
141   }
142   if (vflag_global) {
143     virial[0] += ev.v[0];
144     virial[1] += ev.v[1];
145     virial[2] += ev.v[2];
146     virial[3] += ev.v[3];
147     virial[4] += ev.v[4];
148     virial[5] += ev.v[5];
149   }
150 
151   if (eflag_atom) {
152     k_eatom.template modify<DeviceType>();
153     k_eatom.template sync<LMPHostType>();
154   }
155 
156   if (vflag_atom) {
157     k_vatom.template modify<DeviceType>();
158     k_vatom.template sync<LMPHostType>();
159   }
160 
161   if (vflag_fdotr) pair_virial_fdotr_compute(this);
162 
163   copymode = 0;
164 }
165 
166 /* ----------------------------------------------------------------------
167    compute LJ CHARMM pair force between atoms i and j
168    ---------------------------------------------------------------------- */
169 template<class DeviceType>
170 template<bool STACKPARAMS, class Specialisation>
171 KOKKOS_INLINE_FUNCTION
172 F_FLOAT PairLJCharmmCoulLongKokkos<DeviceType>::
compute_fpair(const F_FLOAT & rsq,const int &,const int &,const int & itype,const int & jtype) const173 compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
174               const int& itype, const int& jtype) const {
175   const F_FLOAT r2inv = 1.0/rsq;
176   const F_FLOAT r6inv = r2inv*r2inv*r2inv;
177   F_FLOAT forcelj, switch1, switch2, englj;
178 
179   forcelj = r6inv *
180     ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
181      (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
182 
183   if (rsq > cut_lj_innersq) {
184     switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
185               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
186     switch2 = 12.0*rsq * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj;
187     englj = r6inv *
188             ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
189              (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4));
190     forcelj = forcelj*switch1 + englj*switch2;
191   }
192 
193   return forcelj*r2inv;
194 }
195 
196 /* ----------------------------------------------------------------------
197    compute LJ CHARMM pair potential energy between atoms i and j
198    ---------------------------------------------------------------------- */
199 template<class DeviceType>
200 template<bool STACKPARAMS, class Specialisation>
201 KOKKOS_INLINE_FUNCTION
202 F_FLOAT PairLJCharmmCoulLongKokkos<DeviceType>::
compute_evdwl(const F_FLOAT & rsq,const int &,const int &,const int & itype,const int & jtype) const203 compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
204               const int& itype, const int& jtype) const {
205   const F_FLOAT r2inv = 1.0/rsq;
206   const F_FLOAT r6inv = r2inv*r2inv*r2inv;
207   F_FLOAT englj, switch1;
208 
209   englj = r6inv *
210     ((STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
211      (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4));
212 
213   if (rsq > cut_lj_innersq) {
214     switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
215       (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
216     englj *= switch1;
217   }
218 
219   return englj;
220 
221 }
222 
223 /* ----------------------------------------------------------------------
224    compute coulomb pair force between atoms i and j
225    ---------------------------------------------------------------------- */
226 template<class DeviceType>
227 template<bool STACKPARAMS,  class Specialisation>
228 KOKKOS_INLINE_FUNCTION
229 F_FLOAT PairLJCharmmCoulLongKokkos<DeviceType>::
compute_fcoul(const F_FLOAT & rsq,const int &,const int & j,const int &,const int &,const F_FLOAT & factor_coul,const F_FLOAT & qtmp) const230 compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
231               const int& /*itype*/, const int& /*jtype*/,
232               const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
233   if (Specialisation::DoTable && rsq > tabinnersq) {
234     union_int_float_t rsq_lookup;
235     rsq_lookup.f = rsq;
236     const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
237     const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
238     const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
239     F_FLOAT forcecoul = qtmp*q[j] * table;
240     if (factor_coul < 1.0) {
241       const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
242       const F_FLOAT prefactor = qtmp*q[j] * table;
243       forcecoul -= (1.0-factor_coul)*prefactor;
244     }
245     return forcecoul/rsq;
246   } else {
247     const F_FLOAT r = sqrt(rsq);
248     const F_FLOAT grij = g_ewald * r;
249     const F_FLOAT expm2 = exp(-grij*grij);
250     const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
251     const F_FLOAT rinv = 1.0/r;
252     const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
253     const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
254     F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
255     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
256 
257     return forcecoul*rinv*rinv;
258   }
259 }
260 
261 /* ----------------------------------------------------------------------
262    compute coulomb pair potential energy between atoms i and j
263    ---------------------------------------------------------------------- */
264 template<class DeviceType>
265 template<bool STACKPARAMS, class Specialisation>
266 KOKKOS_INLINE_FUNCTION
267 F_FLOAT PairLJCharmmCoulLongKokkos<DeviceType>::
compute_ecoul(const F_FLOAT & rsq,const int &,const int & j,const int &,const int &,const F_FLOAT & factor_coul,const F_FLOAT & qtmp) const268 compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
269               const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
270   if (Specialisation::DoTable && rsq > tabinnersq) {
271     union_int_float_t rsq_lookup;
272     rsq_lookup.f = rsq;
273     const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
274     const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
275     const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
276     F_FLOAT ecoul = qtmp*q[j] * table;
277     if (factor_coul < 1.0) {
278       const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
279       const F_FLOAT prefactor = qtmp*q[j] * table;
280       ecoul -= (1.0-factor_coul)*prefactor;
281     }
282     return ecoul;
283   } else {
284     const F_FLOAT r = sqrt(rsq);
285     const F_FLOAT grij = g_ewald * r;
286     const F_FLOAT expm2 = exp(-grij*grij);
287     const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
288     const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
289     const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
290     F_FLOAT ecoul = prefactor * erfc;
291     if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
292     return ecoul;
293   }
294 }
295 
296 /* ----------------------------------------------------------------------
297    allocate all arrays
298 ------------------------------------------------------------------------- */
299 
300 template<class DeviceType>
allocate()301 void PairLJCharmmCoulLongKokkos<DeviceType>::allocate()
302 {
303   PairLJCharmmCoulLong::allocate();
304 
305   int n = atom->ntypes;
306 
307   memory->destroy(cutsq);
308   memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
309   d_cutsq = k_cutsq.template view<DeviceType>();
310 
311   d_cut_ljsq = typename AT::t_ffloat_2d("pair:cut_ljsq",n+1,n+1);
312 
313   d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1);
314 
315   k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCharmmCoulLong::params",n+1,n+1);
316   params = k_params.template view<DeviceType>();
317 }
318 
319 template<class DeviceType>
init_tables(double cut_coul,double * cut_respa)320 void PairLJCharmmCoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
321 {
322   Pair::init_tables(cut_coul,cut_respa);
323 
324   typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
325   typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
326 
327   int ntable = 1;
328   for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
329 
330 
331   // Copy rtable and drtable
332   {
333   host_table_type h_table("HostTable",ntable);
334   table_type d_table("DeviceTable",ntable);
335   for (int i = 0; i < ntable; i++) {
336     h_table(i) = rtable[i];
337   }
338   Kokkos::deep_copy(d_table,h_table);
339   d_rtable = d_table;
340   }
341 
342   {
343   host_table_type h_table("HostTable",ntable);
344   table_type d_table("DeviceTable",ntable);
345   for (int i = 0; i < ntable; i++) {
346     h_table(i) = drtable[i];
347   }
348   Kokkos::deep_copy(d_table,h_table);
349   d_drtable = d_table;
350   }
351 
352   {
353   host_table_type h_table("HostTable",ntable);
354   table_type d_table("DeviceTable",ntable);
355 
356   // Copy ftable and dftable
357   for (int i = 0; i < ntable; i++) {
358     h_table(i) = ftable[i];
359   }
360   Kokkos::deep_copy(d_table,h_table);
361   d_ftable = d_table;
362   }
363 
364   {
365   host_table_type h_table("HostTable",ntable);
366   table_type d_table("DeviceTable",ntable);
367 
368   for (int i = 0; i < ntable; i++) {
369     h_table(i) = dftable[i];
370   }
371   Kokkos::deep_copy(d_table,h_table);
372   d_dftable = d_table;
373   }
374 
375   {
376   host_table_type h_table("HostTable",ntable);
377   table_type d_table("DeviceTable",ntable);
378 
379   // Copy ctable and dctable
380   for (int i = 0; i < ntable; i++) {
381     h_table(i) = ctable[i];
382   }
383   Kokkos::deep_copy(d_table,h_table);
384   d_ctable = d_table;
385   }
386 
387   {
388   host_table_type h_table("HostTable",ntable);
389   table_type d_table("DeviceTable",ntable);
390 
391   for (int i = 0; i < ntable; i++) {
392     h_table(i) = dctable[i];
393   }
394   Kokkos::deep_copy(d_table,h_table);
395   d_dctable = d_table;
396   }
397 
398   {
399   host_table_type h_table("HostTable",ntable);
400   table_type d_table("DeviceTable",ntable);
401 
402   // Copy etable and detable
403   for (int i = 0; i < ntable; i++) {
404     h_table(i) = etable[i];
405   }
406   Kokkos::deep_copy(d_table,h_table);
407   d_etable = d_table;
408   }
409 
410   {
411   host_table_type h_table("HostTable",ntable);
412   table_type d_table("DeviceTable",ntable);
413 
414   for (int i = 0; i < ntable; i++) {
415     h_table(i) = detable[i];
416   }
417   Kokkos::deep_copy(d_table,h_table);
418   d_detable = d_table;
419   }
420 }
421 
422 /* ----------------------------------------------------------------------
423    init specific to this pair style
424 ------------------------------------------------------------------------- */
425 
426 template<class DeviceType>
init_style()427 void PairLJCharmmCoulLongKokkos<DeviceType>::init_style()
428 {
429   PairLJCharmmCoulLong::init_style();
430 
431   Kokkos::deep_copy(d_cut_ljsq,cut_ljsq);
432   Kokkos::deep_copy(d_cut_coulsq,cut_coulsq);
433 
434   // error if rRESPA with inner levels
435 
436   if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
437     int respa = 0;
438     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
439     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
440     if (respa)
441       error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
442   }
443 
444   // irequest = neigh request made by parent class
445 
446   neighflag = lmp->kokkos->neighflag;
447   int irequest = neighbor->nrequest - 1;
448 
449   neighbor->requests[irequest]->
450     kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
451     !std::is_same<DeviceType,LMPDeviceType>::value;
452   neighbor->requests[irequest]->
453     kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
454 
455   if (neighflag == FULL) {
456     neighbor->requests[irequest]->full = 1;
457     neighbor->requests[irequest]->half = 0;
458   } else if (neighflag == HALF || neighflag == HALFTHREAD) {
459     neighbor->requests[irequest]->full = 0;
460     neighbor->requests[irequest]->half = 1;
461   } else {
462     error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk");
463   }
464 }
465 
466 /* ----------------------------------------------------------------------
467    init for one type pair i,j and corresponding j,i
468 ------------------------------------------------------------------------- */
469 
470 template<class DeviceType>
init_one(int i,int j)471 double PairLJCharmmCoulLongKokkos<DeviceType>::init_one(int i, int j)
472 {
473   double cutone = PairLJCharmmCoulLong::init_one(i,j);
474 
475   k_params.h_view(i,j).lj1 = lj1[i][j];
476   k_params.h_view(i,j).lj2 = lj2[i][j];
477   k_params.h_view(i,j).lj3 = lj3[i][j];
478   k_params.h_view(i,j).lj4 = lj4[i][j];
479   //k_params.h_view(i,j).offset = offset[i][j];
480   k_params.h_view(i,j).cut_ljsq = cut_ljsq;
481   k_params.h_view(i,j).cut_coulsq = cut_coulsq;
482 
483   k_params.h_view(j,i) = k_params.h_view(i,j);
484   if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
485     m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
486     m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
487     m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsq;
488     m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsq;
489   }
490 
491   k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
492   k_cutsq.template modify<LMPHostType>();
493   k_params.template modify<LMPHostType>();
494 
495   return cutone;
496 }
497 
498 namespace LAMMPS_NS {
499 template class PairLJCharmmCoulLongKokkos<LMPDeviceType>;
500 #ifdef LMP_KOKKOS_GPU
501 template class PairLJCharmmCoulLongKokkos<LMPHostType>;
502 #endif
503 }
504 
505