1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 This software is distributed under the GNU General Public License.
8
9 See the README file in the top-level LAMMPS directory.
10 ------------------------------------------------------------------------- */
11
12 /* ----------------------------------------------------------------------
13 Contributing author: Axel Kohlmeyer (Temple U)
14 ------------------------------------------------------------------------- */
15
16 #include "pair_eim_omp.h"
17
18 #include "atom.h"
19 #include "comm.h"
20 #include "force.h"
21 #include "memory.h"
22 #include "neigh_list.h"
23 #include "suffix.h"
24
25 #include <cmath>
26
27 #include "omp_compat.h"
28 using namespace LAMMPS_NS;
29
30 /* ---------------------------------------------------------------------- */
31
PairEIMOMP(LAMMPS * lmp)32 PairEIMOMP::PairEIMOMP(LAMMPS *lmp) :
33 PairEIM(lmp), ThrOMP(lmp, THR_PAIR)
34 {
35 suffix_flag |= Suffix::OMP;
36 respa_enable = 0;
37 }
38
39 /* ---------------------------------------------------------------------- */
40
compute(int eflag,int vflag)41 void PairEIMOMP::compute(int eflag, int vflag)
42 {
43 ev_init(eflag,vflag);
44
45 const int nall = atom->nlocal + atom->nghost;
46 const int nthreads = comm->nthreads;
47 const int inum = list->inum;
48
49 // grow energy and fp arrays if necessary
50 // need to be atom->nmax in length
51
52 if (atom->nmax > nmax) {
53 memory->destroy(rho);
54 memory->destroy(fp);
55 nmax = atom->nmax;
56 memory->create(rho,nthreads*nmax,"pair:rho");
57 memory->create(fp,nthreads*nmax,"pair:fp");
58 }
59
60 #if defined(_OPENMP)
61 #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag)
62 #endif
63 {
64 int ifrom, ito, tid;
65
66 loop_setup_thr(ifrom, ito, tid, inum, nthreads);
67 ThrData *thr = fix->get_thr(tid);
68 thr->timer(Timer::START);
69 ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
70
71 if (force->newton_pair)
72 thr->init_eim(nall, rho, fp);
73 else
74 thr->init_eim(atom->nlocal, rho, fp);
75
76 if (evflag) {
77 if (eflag) {
78 if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
79 else eval<1,1,0>(ifrom, ito, thr);
80 } else {
81 if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
82 else eval<1,0,0>(ifrom, ito, thr);
83 }
84 } else {
85 if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
86 else eval<0,0,0>(ifrom, ito, thr);
87 }
88
89 thr->timer(Timer::PAIR);
90 reduce_thr(this, eflag, vflag, thr);
91 } // end of omp parallel region
92 }
93
94 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
eval(int iifrom,int iito,ThrData * const thr)95 void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
96 {
97 int i,j,ii,jj,m,jnum,itype,jtype;
98 double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
99 double rsq,r,p,rhoip,rhojp,phip,phi,coul,coulp,recip,psip;
100 double *coeff;
101 int *ilist,*jlist,*numneigh,**firstneigh;
102
103 evdwl = 0.0;
104
105
106 const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
107 dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
108 double * const rho_t = thr->get_rho();
109 double * const fp_t = thr->get_fp();
110 const int tid = thr->get_tid();
111 const int nthreads = comm->nthreads;
112
113 const int * _noalias const type = atom->type;
114 const int nlocal = atom->nlocal;
115 const int nall = nlocal + atom->nghost;
116
117 double fxtmp,fytmp,fztmp;
118
119 ilist = list->ilist;
120 numneigh = list->numneigh;
121 firstneigh = list->firstneigh;
122
123 // rho = density at each atom
124 // loop over neighbors of my atoms
125
126 for (ii = iifrom; ii < iito; ii++) {
127 i = ilist[ii];
128 xtmp = x[i].x;
129 ytmp = x[i].y;
130 ztmp = x[i].z;
131 itype = type[i];
132 jlist = firstneigh[i];
133 jnum = numneigh[i];
134
135 for (jj = 0; jj < jnum; jj++) {
136 j = jlist[jj];
137 j &= NEIGHMASK;
138 jtype = type[j];
139 delx = xtmp - x[j].x;
140 dely = ytmp - x[j].y;
141 delz = ztmp - x[j].z;
142 rsq = delx*delx + dely*dely + delz*delz;
143
144 if (rsq < cutforcesq[itype][jtype]) {
145 p = sqrt(rsq)*rdr + 1.0;
146 m = static_cast<int> (p);
147 m = MIN(m,nr-1);
148 p -= m;
149 p = MIN(p,1.0);
150 coeff = Fij_spline[type2Fij[itype][jtype]][m];
151 rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
152 if (NEWTON_PAIR || j < nlocal) {
153 coeff = Fij_spline[type2Fij[jtype][itype]][m];
154 rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
155 }
156 }
157 }
158 }
159
160 // wait until all threads are done with computation
161 sync_threads();
162
163 // communicate and sum densities
164 if (NEWTON_PAIR) {
165 // reduce per thread density
166 thr->timer(Timer::PAIR);
167 data_reduce_thr(rho, nall, nthreads, 1, tid);
168
169 // wait until reduction is complete
170 sync_threads();
171
172 #if defined(_OPENMP)
173 #pragma omp master
174 #endif
175 {
176 rhofp = 1;
177 comm->reverse_comm_pair(this);
178 }
179
180 } else {
181 thr->timer(Timer::PAIR);
182 data_reduce_thr(rho, nlocal, nthreads, 1, tid);
183
184 // wait until reduction is complete
185 sync_threads();
186 }
187
188 #if defined(_OPENMP)
189 #pragma omp master
190 #endif
191 {
192 rhofp = 1;
193 comm->forward_comm_pair(this);
194 }
195
196 // wait until master is finished communicating
197 sync_threads();
198
199 for (ii = iifrom; ii < iito; ii++) {
200 i = ilist[ii];
201 xtmp = x[i].x;
202 ytmp = x[i].y;
203 ztmp = x[i].z;
204 itype = type[i];
205 jlist = firstneigh[i];
206 jnum = numneigh[i];
207
208 for (jj = 0; jj < jnum; jj++) {
209 j = jlist[jj];
210 j &= NEIGHMASK;
211 jtype = type[j];
212
213 delx = xtmp - x[j].x;
214 dely = ytmp - x[j].y;
215 delz = ztmp - x[j].z;
216 rsq = delx*delx + dely*dely + delz*delz;
217
218 if (rsq < cutforcesq[itype][jtype]) {
219 p = sqrt(rsq)*rdr + 1.0;
220 m = static_cast<int> (p);
221 m = MIN(m,nr-1);
222 p -= m;
223 p = MIN(p,1.0);
224 coeff = Gij_spline[type2Gij[itype][jtype]][m];
225 fp_t[i] += rho[j]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]);
226 if (NEWTON_PAIR || j < nlocal) {
227 fp_t[j] += rho[i]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p +
228 coeff[6]);
229 }
230 }
231 }
232 }
233
234 // wait until all threads are done with computation
235 sync_threads();
236
237 // communicate and sum modified densities
238 if (NEWTON_PAIR) {
239 // reduce per thread density
240 thr->timer(Timer::PAIR);
241 data_reduce_thr(fp, nall, nthreads, 1, tid);
242
243 // wait until reduction is complete
244 sync_threads();
245
246 #if defined(_OPENMP)
247 #pragma omp master
248 #endif
249 {
250 rhofp = 2;
251 comm->reverse_comm_pair(this);
252 }
253
254 } else {
255 thr->timer(Timer::PAIR);
256 data_reduce_thr(fp, nlocal, nthreads, 1, tid);
257
258 // wait until reduction is complete
259 sync_threads();
260 }
261
262 #if defined(_OPENMP)
263 #pragma omp master
264 #endif
265 {
266 rhofp = 2;
267 comm->forward_comm_pair(this);
268 }
269
270 // wait until master is finished communicating
271 sync_threads();
272
273 for (ii = iifrom; ii < iito; ii++) {
274 i = ilist[ii];
275 itype = type[i];
276 if (EFLAG) {
277 phi = 0.5*rho[i]*fp[i];
278 e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
279 }
280 }
281
282 // compute forces on each atom
283 // loop over neighbors of my atoms
284
285 for (ii = iifrom; ii < iito; ii++) {
286 i = ilist[ii];
287 xtmp = x[i].x;
288 ytmp = x[i].y;
289 ztmp = x[i].z;
290 itype = type[i];
291 fxtmp = fytmp = fztmp = 0.0;
292
293 jlist = firstneigh[i];
294 jnum = numneigh[i];
295
296 for (jj = 0; jj < jnum; jj++) {
297 j = jlist[jj];
298 j &= NEIGHMASK;
299 jtype = type[j];
300 delx = xtmp - x[j].x;
301 dely = ytmp - x[j].y;
302 delz = ztmp - x[j].z;
303 rsq = delx*delx + dely*dely + delz*delz;
304
305 if (rsq < cutforcesq[itype][jtype]) {
306 r = sqrt(rsq);
307 p = r*rdr + 1.0;
308 m = static_cast<int> (p);
309 m = MIN(m,nr-1);
310 p -= m;
311 p = MIN(p,1.0);
312
313 // rhoip = derivative of (density at atom j due to atom i)
314 // rhojp = derivative of (density at atom i due to atom j)
315 // phi = pair potential energy
316 // phip = phi'
317
318 coeff = Fij_spline[type2Fij[jtype][itype]][m];
319 rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
320 coeff = Fij_spline[type2Fij[itype][jtype]][m];
321 rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
322 coeff = phiij_spline[type2phiij[itype][jtype]][m];
323 phip = (coeff[0]*p + coeff[1])*p + coeff[2];
324 phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
325 coeff = Gij_spline[type2Gij[itype][jtype]][m];
326 coul = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
327 coulp = (coeff[0]*p + coeff[1])*p + coeff[2];
328 psip = phip + (rho[i]*rho[j]-q0[itype]*q0[jtype])*coulp +
329 fp[i]*rhojp + fp[j]*rhoip;
330 recip = 1.0/r;
331 fpair = -psip*recip;
332 fxtmp += delx*fpair;
333 fytmp += dely*fpair;
334 fztmp += delz*fpair;
335 if (NEWTON_PAIR || j < nlocal) {
336 f[j].x -= delx*fpair;
337 f[j].y -= dely*fpair;
338 f[j].z -= delz*fpair;
339 }
340
341 if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul;
342 if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
343 evdwl,0.0,fpair,delx,dely,delz,thr);
344 }
345 }
346 f[i].x += fxtmp;
347 f[i].y += fytmp;
348 f[i].z += fztmp;
349 }
350 }
351
352 /* ---------------------------------------------------------------------- */
353
memory_usage()354 double PairEIMOMP::memory_usage()
355 {
356 double bytes = memory_usage_thr();
357 bytes += PairEIM::memory_usage();
358
359 return bytes;
360 }
361