1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    This software is distributed under the GNU General Public License.
8 
9    See the README file in the top-level LAMMPS directory.
10 ------------------------------------------------------------------------- */
11 
12 /* ----------------------------------------------------------------------
13    Contributing author: Axel Kohlmeyer (Temple U)
14 ------------------------------------------------------------------------- */
15 
16 #include "pair_lj_cut_coul_long_omp.h"
17 
18 #include "atom.h"
19 #include "comm.h"
20 #include "force.h"
21 #include "neigh_list.h"
22 #include "suffix.h"
23 
24 #include <cmath>
25 
26 #include "omp_compat.h"
27 using namespace LAMMPS_NS;
28 
29 #define EWALD_F   1.12837917
30 #define EWALD_P   0.3275911
31 #define A1        0.254829592
32 #define A2       -0.284496736
33 #define A3        1.421413741
34 #define A4       -1.453152027
35 #define A5        1.061405429
36 
37 /* ---------------------------------------------------------------------- */
38 
PairLJCutCoulLongOMP(LAMMPS * lmp)39 PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) :
40   PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
41 {
42   suffix_flag |= Suffix::OMP;
43   respa_enable = 0;
44   cut_respa = nullptr;
45 }
46 
47 /* ---------------------------------------------------------------------- */
48 
compute(int eflag,int vflag)49 void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
50 {
51   ev_init(eflag,vflag);
52 
53   const int nall = atom->nlocal + atom->nghost;
54   const int nthreads = comm->nthreads;
55   const int inum = list->inum;
56 
57 #if defined(_OPENMP)
58 #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag)
59 #endif
60   {
61     int ifrom, ito, tid;
62 
63     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
64     ThrData *thr = fix->get_thr(tid);
65     thr->timer(Timer::START);
66     ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
67 
68     if (evflag) {
69       if (eflag) {
70         if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
71         else eval<1,1,0>(ifrom, ito, thr);
72       } else {
73         if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
74         else eval<1,0,0>(ifrom, ito, thr);
75       }
76     } else {
77       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
78       else eval<0,0,0>(ifrom, ito, thr);
79     }
80 
81     thr->timer(Timer::PAIR);
82     reduce_thr(this, eflag, vflag, thr);
83   } // end of omp parallel region
84 }
85 
86 /* ---------------------------------------------------------------------- */
87 
88 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
eval(int iifrom,int iito,ThrData * const thr)89 void PairLJCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
90 {
91   int i,j,ii,jj,jnum,itype,jtype,itable;
92   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
93   double fraction,table;
94   double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
95   double grij,expm2,prefactor,t,erfc;
96   int *ilist,*jlist,*numneigh,**firstneigh;
97 
98   evdwl = ecoul = 0.0;
99 
100   const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
101   dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
102   const double * _noalias const q = atom->q;
103   const int * _noalias const type = atom->type;
104   const int nlocal = atom->nlocal;
105   const double * _noalias const special_coul = force->special_coul;
106   const double * _noalias const special_lj = force->special_lj;
107   const double qqrd2e = force->qqrd2e;
108   double fxtmp,fytmp,fztmp;
109 
110   ilist = list->ilist;
111   numneigh = list->numneigh;
112   firstneigh = list->firstneigh;
113 
114   // loop over neighbors of my atoms
115 
116   for (ii = iifrom; ii < iito; ++ii) {
117 
118     i = ilist[ii];
119     qtmp = q[i];
120     xtmp = x[i].x;
121     ytmp = x[i].y;
122     ztmp = x[i].z;
123     itype = type[i];
124     jlist = firstneigh[i];
125     jnum = numneigh[i];
126     fxtmp=fytmp=fztmp=0.0;
127 
128     for (jj = 0; jj < jnum; jj++) {
129       j = jlist[jj];
130       factor_lj = special_lj[sbmask(j)];
131       factor_coul = special_coul[sbmask(j)];
132       j &= NEIGHMASK;
133 
134       delx = xtmp - x[j].x;
135       dely = ytmp - x[j].y;
136       delz = ztmp - x[j].z;
137       rsq = delx*delx + dely*dely + delz*delz;
138       jtype = type[j];
139 
140       if (rsq < cutsq[itype][jtype]) {
141         r2inv = 1.0/rsq;
142 
143         if (rsq < cut_coulsq) {
144           if (!ncoultablebits || rsq <= tabinnersq) {
145             r = sqrt(rsq);
146             grij = g_ewald * r;
147             expm2 = exp(-grij*grij);
148             t = 1.0 / (1.0 + EWALD_P*grij);
149             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
150             prefactor = qqrd2e * qtmp*q[j]/r;
151             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
152             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
153           } else {
154             union_int_float_t rsq_lookup;
155             rsq_lookup.f = rsq;
156             itable = rsq_lookup.i & ncoulmask;
157             itable >>= ncoulshiftbits;
158             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
159             table = ftable[itable] + fraction*dftable[itable];
160             forcecoul = qtmp*q[j] * table;
161             if (factor_coul < 1.0) {
162               table = ctable[itable] + fraction*dctable[itable];
163               prefactor = qtmp*q[j] * table;
164               forcecoul -= (1.0-factor_coul)*prefactor;
165             }
166           }
167         } else forcecoul = 0.0;
168 
169         if (rsq < cut_ljsq[itype][jtype]) {
170           r6inv = r2inv*r2inv*r2inv;
171           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
172           forcelj *= factor_lj;
173         } else forcelj = 0.0;
174 
175         fpair = (forcecoul + forcelj) * r2inv;
176 
177         fxtmp += delx*fpair;
178         fytmp += dely*fpair;
179         fztmp += delz*fpair;
180         if (NEWTON_PAIR || j < nlocal) {
181           f[j].x -= delx*fpair;
182           f[j].y -= dely*fpair;
183           f[j].z -= delz*fpair;
184         }
185 
186         if (EFLAG) {
187           if (rsq < cut_coulsq) {
188             if (!ncoultablebits || rsq <= tabinnersq)
189               ecoul = prefactor*erfc;
190             else {
191               table = etable[itable] + fraction*detable[itable];
192               ecoul = qtmp*q[j] * table;
193             }
194             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
195           } else ecoul = 0.0;
196 
197           if (rsq < cut_ljsq[itype][jtype]) {
198             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
199               offset[itype][jtype];
200             evdwl *= factor_lj;
201           } else evdwl = 0.0;
202         }
203 
204         if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
205                                  evdwl,ecoul,fpair,delx,dely,delz,thr);
206       }
207     }
208     f[i].x += fxtmp;
209     f[i].y += fytmp;
210     f[i].z += fztmp;
211   }
212 }
213 
214 /* ---------------------------------------------------------------------- */
215 
memory_usage()216 double PairLJCutCoulLongOMP::memory_usage()
217 {
218   double bytes = memory_usage_thr();
219   bytes += PairLJCutCoulLong::memory_usage();
220 
221   return bytes;
222 }
223