1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    This software is distributed under the GNU General Public License.
8 
9    See the README file in the top-level LAMMPS directory.
10 ------------------------------------------------------------------------- */
11 
12 /* ----------------------------------------------------------------------
13    Contributing author: Axel Kohlmeyer (Temple U)
14 ------------------------------------------------------------------------- */
15 
16 #include "pair_lj_sf_dipole_sf_omp.h"
17 
18 #include "atom.h"
19 #include "comm.h"
20 #include "force.h"
21 #include "neigh_list.h"
22 #include "suffix.h"
23 
24 #include <cmath>
25 
26 #include "omp_compat.h"
27 using namespace LAMMPS_NS;
28 
29 /* ---------------------------------------------------------------------- */
30 
PairLJSFDipoleSFOMP(LAMMPS * lmp)31 PairLJSFDipoleSFOMP::PairLJSFDipoleSFOMP(LAMMPS *lmp) :
32   PairLJSFDipoleSF(lmp), ThrOMP(lmp, THR_PAIR)
33 {
34   suffix_flag |= Suffix::OMP;
35   respa_enable = 0;
36 }
37 
38 /* ---------------------------------------------------------------------- */
39 
compute(int eflag,int vflag)40 void PairLJSFDipoleSFOMP::compute(int eflag, int vflag)
41 {
42   ev_init(eflag,vflag);
43 
44   const int nall = atom->nlocal + atom->nghost;
45   const int nthreads = comm->nthreads;
46   const int inum = list->inum;
47 
48 #if defined(_OPENMP)
49 #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag)
50 #endif
51   {
52     int ifrom, ito, tid;
53 
54     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
55     ThrData *thr = fix->get_thr(tid);
56     thr->timer(Timer::START);
57     ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
58 
59     if (evflag) {
60       if (eflag) {
61         if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
62         else eval<1,1,0>(ifrom, ito, thr);
63       } else {
64         if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
65         else eval<1,0,0>(ifrom, ito, thr);
66       }
67     } else {
68       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
69       else eval<0,0,0>(ifrom, ito, thr);
70     }
71 
72     thr->timer(Timer::PAIR);
73     reduce_thr(this, eflag, vflag, thr);
74   } // end of omp parallel region
75 }
76 
77 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
eval(int iifrom,int iito,ThrData * const thr)78 void PairLJSFDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr)
79 {
80   int i,j,ii,jj,jnum,itype,jtype;
81   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
82   double rsq,rinv,r2inv,r6inv,r3inv,r5inv,fx,fy,fz;
83   double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
84   double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
85   double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
86   double forcelj,factor_coul,factor_lj;
87   double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
88   double aforcecoulx,aforcecouly,aforcecoulz;
89   double bforcecoulx,bforcecouly,bforcecoulz;
90   double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
91   int *ilist,*jlist,*numneigh,**firstneigh;
92 
93   evdwl = ecoul = 0.0;
94 
95   const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
96   dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
97   double * const * const torque = thr->get_torque();
98   const double * _noalias const q = atom->q;
99   const dbl4_t * _noalias const mu = (dbl4_t *) atom->mu[0];
100   const int * _noalias const type = atom->type;
101   const int nlocal = atom->nlocal;
102   const double * _noalias const special_coul = force->special_coul;
103   const double * _noalias const special_lj = force->special_lj;
104   const double qqrd2e = force->qqrd2e;
105   double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
106 
107   ilist = list->ilist;
108   numneigh = list->numneigh;
109   firstneigh = list->firstneigh;
110 
111   // loop over neighbors of my atoms
112 
113   for (ii = iifrom; ii < iito; ++ii) {
114 
115     i = ilist[ii];
116     xtmp = x[i].x;
117     ytmp = x[i].y;
118     ztmp = x[i].z;
119     qtmp = q[i];
120     itype = type[i];
121     jlist = firstneigh[i];
122     jnum = numneigh[i];
123     fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
124 
125     for (jj = 0; jj < jnum; jj++) {
126       j = jlist[jj];
127       factor_lj = special_lj[sbmask(j)];
128       factor_coul = special_coul[sbmask(j)];
129       j &= NEIGHMASK;
130 
131       delx = xtmp - x[j].x;
132       dely = ytmp - x[j].y;
133       delz = ztmp - x[j].z;
134       rsq = delx*delx + dely*dely + delz*delz;
135       jtype = type[j];
136 
137       if (rsq < cutsq[itype][jtype]) {
138         r2inv = 1.0/rsq;
139         rinv = sqrt(r2inv);
140 
141         // atom can have both a charge and dipole
142         // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
143 
144         forcecoulx = forcecouly = forcecoulz = 0.0;
145         tixcoul = tiycoul = tizcoul = 0.0;
146         tjxcoul = tjycoul = tjzcoul = 0.0;
147 
148         if (rsq < cut_coulsq[itype][jtype]) {
149 
150           rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
151           if (qtmp != 0.0 && q[j] != 0.0) {
152             pre1 = qtmp*q[j]*rinv*(r2inv-rcutcoul2inv);
153 
154             forcecoulx += pre1*delx;
155             forcecouly += pre1*dely;
156             forcecoulz += pre1*delz;
157           }
158 
159           if (mu[i].w > 0.0 && mu[j].w > 0.0) {
160             r3inv = r2inv*rinv;
161             r5inv = r3inv*r2inv;
162 
163             pdotp = mu[i].x*mu[j].x + mu[i].y*mu[j].y + mu[i].z*mu[j].z;
164             pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz;
165             pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz;
166 
167             afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
168             pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
169             aforcecoulx = pre1*delx;
170             aforcecouly = pre1*dely;
171             aforcecoulz = pre1*delz;
172 
173             bfac = 1.0 - 4.0*rsq*sqrt(rsq*rcutcoul2inv)*rcutcoul2inv +
174               3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
175             presf = 2.0 * r2inv * pidotr * pjdotr;
176             bforcecoulx = bfac * (pjdotr*mu[i].x+pidotr*mu[j].x-presf*delx);
177             bforcecouly = bfac * (pjdotr*mu[i].y+pidotr*mu[j].y-presf*dely);
178             bforcecoulz = bfac * (pjdotr*mu[i].z+pidotr*mu[j].z-presf*delz);
179 
180             forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
181             forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
182             forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
183 
184             pre2 = 3.0 * bfac * r5inv * pjdotr;
185             pre3 = 3.0 * bfac * r5inv * pidotr;
186             pre4 = -bfac * r3inv;
187 
188             crossx = pre4 * (mu[i].y*mu[j].z - mu[i].z*mu[j].y);
189             crossy = pre4 * (mu[i].z*mu[j].x - mu[i].x*mu[j].z);
190             crossz = pre4 * (mu[i].x*mu[j].y - mu[i].y*mu[j].x);
191 
192             tixcoul += crossx + pre2 * (mu[i].y*delz - mu[i].z*dely);
193             tiycoul += crossy + pre2 * (mu[i].z*delx - mu[i].x*delz);
194             tizcoul += crossz + pre2 * (mu[i].x*dely - mu[i].y*delx);
195             tjxcoul += -crossx + pre3 * (mu[j].y*delz - mu[j].z*dely);
196             tjycoul += -crossy + pre3 * (mu[j].z*delx - mu[j].x*delz);
197             tjzcoul += -crossz + pre3 * (mu[j].x*dely - mu[j].y*delx);
198           }
199 
200           if (mu[i].w > 0.0 && q[j] != 0.0) {
201             r3inv = r2inv*rinv;
202             r5inv = r3inv*r2inv;
203             pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz;
204             pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
205             pqfac = 1.0 - 3.0*rsq*rcutcoul2inv +
206               2.0*rsq*sqrt(rsq*rcutcoul2inv)*rcutcoul2inv;
207             pre2 = q[j] * r3inv * pqfac;
208 
209             forcecoulx += pre2*mu[i].x - pre1*delx;
210             forcecouly += pre2*mu[i].y - pre1*dely;
211             forcecoulz += pre2*mu[i].z - pre1*delz;
212             tixcoul += pre2 * (mu[i].y*delz - mu[i].z*dely);
213             tiycoul += pre2 * (mu[i].z*delx - mu[i].x*delz);
214             tizcoul += pre2 * (mu[i].x*dely - mu[i].y*delx);
215           }
216 
217           if (mu[j].w > 0.0 && qtmp != 0.0) {
218             r3inv = r2inv*rinv;
219             r5inv = r3inv*r2inv;
220             pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz;
221             pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
222             qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
223               2.0*rsq*sqrt(rsq*rcutcoul2inv)*rcutcoul2inv;
224             pre2 = qtmp * r3inv * qpfac;
225 
226             forcecoulx += pre1*delx - pre2*mu[j].x;
227             forcecouly += pre1*dely - pre2*mu[j].y;
228             forcecoulz += pre1*delz - pre2*mu[j].z;
229             tjxcoul += -pre2 * (mu[j].y*delz - mu[j].z*dely);
230             tjycoul += -pre2 * (mu[j].z*delx - mu[j].x*delz);
231             tjzcoul += -pre2 * (mu[j].x*dely - mu[j].y*delx);
232           }
233         }
234 
235         // LJ interaction
236 
237         if (rsq < cut_ljsq[itype][jtype]) {
238           r6inv = r2inv*r2inv*r2inv;
239           forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
240 
241           rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
242           rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
243           forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) *
244             rcutlj6inv * rcutlj2inv;
245 
246           forcelj = factor_lj * (forceljcut - forceljsf);
247         } else forcelj = 0.0;
248 
249         // total force
250 
251         fq = factor_coul*qqrd2e*scale[itype][jtype];
252         fx = fq*forcecoulx + delx*forcelj;
253         fy = fq*forcecouly + dely*forcelj;
254         fz = fq*forcecoulz + delz*forcelj;
255 
256         // force & torque accumulation
257 
258         fxtmp += fx;
259         fytmp += fy;
260         fztmp += fz;
261         t1tmp += fq*tixcoul;
262         t2tmp += fq*tiycoul;
263         t3tmp += fq*tizcoul;
264 
265         if (NEWTON_PAIR || j < nlocal) {
266           f[j].x -= fx;
267           f[j].y -= fy;
268           f[j].z -= fz;
269           torque[j][0] += fq*tjxcoul;
270           torque[j][1] += fq*tjycoul;
271           torque[j][2] += fq*tjzcoul;
272         }
273 
274         if (EFLAG) {
275           if (rsq < cut_coulsq[itype][jtype]) {
276             ecoul = (1.0-sqrt(rsq/cut_coulsq[itype][jtype]));
277             ecoul *= ecoul;
278             ecoul *= qtmp * q[j] * rinv;
279             if (mu[i].w > 0.0 && mu[j].w > 0.0)
280               ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
281             if (mu[i].w > 0.0 && q[j] != 0.0)
282               ecoul += -q[j] * r3inv * pqfac * pidotr;
283             if (mu[j].w > 0.0 && qtmp != 0.0)
284               ecoul += qtmp * r3inv * qpfac * pjdotr;
285             ecoul *= factor_coul*qqrd2e*scale[itype][jtype];
286           } else ecoul = 0.0;
287 
288           if (rsq < cut_ljsq[itype][jtype]) {
289             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+
290               rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
291               rsq*rcutlj2inv+
292               rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
293             evdwl *= factor_lj;
294           } else evdwl = 0.0;
295         }
296 
297         if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
298                                      evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr);
299       }
300     }
301     f[i].x += fxtmp;
302     f[i].y += fytmp;
303     f[i].z += fztmp;
304     torque[i][0] += t1tmp;
305     torque[i][1] += t2tmp;
306     torque[i][2] += t3tmp;
307   }
308 }
309 
310 /* ---------------------------------------------------------------------- */
311 
memory_usage()312 double PairLJSFDipoleSFOMP::memory_usage()
313 {
314   double bytes = memory_usage_thr();
315   bytes += PairLJSFDipoleSF::memory_usage();
316 
317   return bytes;
318 }
319