1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 This software is distributed under the GNU General Public License.
8
9 See the README file in the top-level LAMMPS directory.
10 ------------------------------------------------------------------------- */
11
12 /* ----------------------------------------------------------------------
13 Contributing author: Axel Kohlmeyer (Temple U)
14 ------------------------------------------------------------------------- */
15
16 #include "pair_lj_sf_dipole_sf_omp.h"
17
18 #include "atom.h"
19 #include "comm.h"
20 #include "force.h"
21 #include "neigh_list.h"
22 #include "suffix.h"
23
24 #include <cmath>
25
26 #include "omp_compat.h"
27 using namespace LAMMPS_NS;
28
29 /* ---------------------------------------------------------------------- */
30
PairLJSFDipoleSFOMP(LAMMPS * lmp)31 PairLJSFDipoleSFOMP::PairLJSFDipoleSFOMP(LAMMPS *lmp) :
32 PairLJSFDipoleSF(lmp), ThrOMP(lmp, THR_PAIR)
33 {
34 suffix_flag |= Suffix::OMP;
35 respa_enable = 0;
36 }
37
38 /* ---------------------------------------------------------------------- */
39
compute(int eflag,int vflag)40 void PairLJSFDipoleSFOMP::compute(int eflag, int vflag)
41 {
42 ev_init(eflag,vflag);
43
44 const int nall = atom->nlocal + atom->nghost;
45 const int nthreads = comm->nthreads;
46 const int inum = list->inum;
47
48 #if defined(_OPENMP)
49 #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag)
50 #endif
51 {
52 int ifrom, ito, tid;
53
54 loop_setup_thr(ifrom, ito, tid, inum, nthreads);
55 ThrData *thr = fix->get_thr(tid);
56 thr->timer(Timer::START);
57 ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
58
59 if (evflag) {
60 if (eflag) {
61 if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
62 else eval<1,1,0>(ifrom, ito, thr);
63 } else {
64 if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
65 else eval<1,0,0>(ifrom, ito, thr);
66 }
67 } else {
68 if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
69 else eval<0,0,0>(ifrom, ito, thr);
70 }
71
72 thr->timer(Timer::PAIR);
73 reduce_thr(this, eflag, vflag, thr);
74 } // end of omp parallel region
75 }
76
77 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
eval(int iifrom,int iito,ThrData * const thr)78 void PairLJSFDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr)
79 {
80 int i,j,ii,jj,jnum,itype,jtype;
81 double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
82 double rsq,rinv,r2inv,r6inv,r3inv,r5inv,fx,fy,fz;
83 double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
84 double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
85 double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
86 double forcelj,factor_coul,factor_lj;
87 double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
88 double aforcecoulx,aforcecouly,aforcecoulz;
89 double bforcecoulx,bforcecouly,bforcecoulz;
90 double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
91 int *ilist,*jlist,*numneigh,**firstneigh;
92
93 evdwl = ecoul = 0.0;
94
95 const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
96 dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
97 double * const * const torque = thr->get_torque();
98 const double * _noalias const q = atom->q;
99 const dbl4_t * _noalias const mu = (dbl4_t *) atom->mu[0];
100 const int * _noalias const type = atom->type;
101 const int nlocal = atom->nlocal;
102 const double * _noalias const special_coul = force->special_coul;
103 const double * _noalias const special_lj = force->special_lj;
104 const double qqrd2e = force->qqrd2e;
105 double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
106
107 ilist = list->ilist;
108 numneigh = list->numneigh;
109 firstneigh = list->firstneigh;
110
111 // loop over neighbors of my atoms
112
113 for (ii = iifrom; ii < iito; ++ii) {
114
115 i = ilist[ii];
116 xtmp = x[i].x;
117 ytmp = x[i].y;
118 ztmp = x[i].z;
119 qtmp = q[i];
120 itype = type[i];
121 jlist = firstneigh[i];
122 jnum = numneigh[i];
123 fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
124
125 for (jj = 0; jj < jnum; jj++) {
126 j = jlist[jj];
127 factor_lj = special_lj[sbmask(j)];
128 factor_coul = special_coul[sbmask(j)];
129 j &= NEIGHMASK;
130
131 delx = xtmp - x[j].x;
132 dely = ytmp - x[j].y;
133 delz = ztmp - x[j].z;
134 rsq = delx*delx + dely*dely + delz*delz;
135 jtype = type[j];
136
137 if (rsq < cutsq[itype][jtype]) {
138 r2inv = 1.0/rsq;
139 rinv = sqrt(r2inv);
140
141 // atom can have both a charge and dipole
142 // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
143
144 forcecoulx = forcecouly = forcecoulz = 0.0;
145 tixcoul = tiycoul = tizcoul = 0.0;
146 tjxcoul = tjycoul = tjzcoul = 0.0;
147
148 if (rsq < cut_coulsq[itype][jtype]) {
149
150 rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
151 if (qtmp != 0.0 && q[j] != 0.0) {
152 pre1 = qtmp*q[j]*rinv*(r2inv-rcutcoul2inv);
153
154 forcecoulx += pre1*delx;
155 forcecouly += pre1*dely;
156 forcecoulz += pre1*delz;
157 }
158
159 if (mu[i].w > 0.0 && mu[j].w > 0.0) {
160 r3inv = r2inv*rinv;
161 r5inv = r3inv*r2inv;
162
163 pdotp = mu[i].x*mu[j].x + mu[i].y*mu[j].y + mu[i].z*mu[j].z;
164 pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz;
165 pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz;
166
167 afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
168 pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
169 aforcecoulx = pre1*delx;
170 aforcecouly = pre1*dely;
171 aforcecoulz = pre1*delz;
172
173 bfac = 1.0 - 4.0*rsq*sqrt(rsq*rcutcoul2inv)*rcutcoul2inv +
174 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
175 presf = 2.0 * r2inv * pidotr * pjdotr;
176 bforcecoulx = bfac * (pjdotr*mu[i].x+pidotr*mu[j].x-presf*delx);
177 bforcecouly = bfac * (pjdotr*mu[i].y+pidotr*mu[j].y-presf*dely);
178 bforcecoulz = bfac * (pjdotr*mu[i].z+pidotr*mu[j].z-presf*delz);
179
180 forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
181 forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
182 forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
183
184 pre2 = 3.0 * bfac * r5inv * pjdotr;
185 pre3 = 3.0 * bfac * r5inv * pidotr;
186 pre4 = -bfac * r3inv;
187
188 crossx = pre4 * (mu[i].y*mu[j].z - mu[i].z*mu[j].y);
189 crossy = pre4 * (mu[i].z*mu[j].x - mu[i].x*mu[j].z);
190 crossz = pre4 * (mu[i].x*mu[j].y - mu[i].y*mu[j].x);
191
192 tixcoul += crossx + pre2 * (mu[i].y*delz - mu[i].z*dely);
193 tiycoul += crossy + pre2 * (mu[i].z*delx - mu[i].x*delz);
194 tizcoul += crossz + pre2 * (mu[i].x*dely - mu[i].y*delx);
195 tjxcoul += -crossx + pre3 * (mu[j].y*delz - mu[j].z*dely);
196 tjycoul += -crossy + pre3 * (mu[j].z*delx - mu[j].x*delz);
197 tjzcoul += -crossz + pre3 * (mu[j].x*dely - mu[j].y*delx);
198 }
199
200 if (mu[i].w > 0.0 && q[j] != 0.0) {
201 r3inv = r2inv*rinv;
202 r5inv = r3inv*r2inv;
203 pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz;
204 pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
205 pqfac = 1.0 - 3.0*rsq*rcutcoul2inv +
206 2.0*rsq*sqrt(rsq*rcutcoul2inv)*rcutcoul2inv;
207 pre2 = q[j] * r3inv * pqfac;
208
209 forcecoulx += pre2*mu[i].x - pre1*delx;
210 forcecouly += pre2*mu[i].y - pre1*dely;
211 forcecoulz += pre2*mu[i].z - pre1*delz;
212 tixcoul += pre2 * (mu[i].y*delz - mu[i].z*dely);
213 tiycoul += pre2 * (mu[i].z*delx - mu[i].x*delz);
214 tizcoul += pre2 * (mu[i].x*dely - mu[i].y*delx);
215 }
216
217 if (mu[j].w > 0.0 && qtmp != 0.0) {
218 r3inv = r2inv*rinv;
219 r5inv = r3inv*r2inv;
220 pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz;
221 pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
222 qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
223 2.0*rsq*sqrt(rsq*rcutcoul2inv)*rcutcoul2inv;
224 pre2 = qtmp * r3inv * qpfac;
225
226 forcecoulx += pre1*delx - pre2*mu[j].x;
227 forcecouly += pre1*dely - pre2*mu[j].y;
228 forcecoulz += pre1*delz - pre2*mu[j].z;
229 tjxcoul += -pre2 * (mu[j].y*delz - mu[j].z*dely);
230 tjycoul += -pre2 * (mu[j].z*delx - mu[j].x*delz);
231 tjzcoul += -pre2 * (mu[j].x*dely - mu[j].y*delx);
232 }
233 }
234
235 // LJ interaction
236
237 if (rsq < cut_ljsq[itype][jtype]) {
238 r6inv = r2inv*r2inv*r2inv;
239 forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
240
241 rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
242 rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
243 forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) *
244 rcutlj6inv * rcutlj2inv;
245
246 forcelj = factor_lj * (forceljcut - forceljsf);
247 } else forcelj = 0.0;
248
249 // total force
250
251 fq = factor_coul*qqrd2e*scale[itype][jtype];
252 fx = fq*forcecoulx + delx*forcelj;
253 fy = fq*forcecouly + dely*forcelj;
254 fz = fq*forcecoulz + delz*forcelj;
255
256 // force & torque accumulation
257
258 fxtmp += fx;
259 fytmp += fy;
260 fztmp += fz;
261 t1tmp += fq*tixcoul;
262 t2tmp += fq*tiycoul;
263 t3tmp += fq*tizcoul;
264
265 if (NEWTON_PAIR || j < nlocal) {
266 f[j].x -= fx;
267 f[j].y -= fy;
268 f[j].z -= fz;
269 torque[j][0] += fq*tjxcoul;
270 torque[j][1] += fq*tjycoul;
271 torque[j][2] += fq*tjzcoul;
272 }
273
274 if (EFLAG) {
275 if (rsq < cut_coulsq[itype][jtype]) {
276 ecoul = (1.0-sqrt(rsq/cut_coulsq[itype][jtype]));
277 ecoul *= ecoul;
278 ecoul *= qtmp * q[j] * rinv;
279 if (mu[i].w > 0.0 && mu[j].w > 0.0)
280 ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
281 if (mu[i].w > 0.0 && q[j] != 0.0)
282 ecoul += -q[j] * r3inv * pqfac * pidotr;
283 if (mu[j].w > 0.0 && qtmp != 0.0)
284 ecoul += qtmp * r3inv * qpfac * pjdotr;
285 ecoul *= factor_coul*qqrd2e*scale[itype][jtype];
286 } else ecoul = 0.0;
287
288 if (rsq < cut_ljsq[itype][jtype]) {
289 evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+
290 rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
291 rsq*rcutlj2inv+
292 rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
293 evdwl *= factor_lj;
294 } else evdwl = 0.0;
295 }
296
297 if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
298 evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr);
299 }
300 }
301 f[i].x += fxtmp;
302 f[i].y += fytmp;
303 f[i].z += fztmp;
304 torque[i][0] += t1tmp;
305 torque[i][1] += t2tmp;
306 torque[i][2] += t3tmp;
307 }
308 }
309
310 /* ---------------------------------------------------------------------- */
311
memory_usage()312 double PairLJSFDipoleSFOMP::memory_usage()
313 {
314 double bytes = memory_usage_thr();
315 bytes += PairLJSFDipoleSF::memory_usage();
316
317 return bytes;
318 }
319