1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    This software is distributed under the GNU General Public License.
8 
9    See the README file in the top-level LAMMPS directory.
10 ------------------------------------------------------------------------- */
11 
12 /* ----------------------------------------------------------------------
13    Contributing author: Axel Kohlmeyer (Temple U)
14 ------------------------------------------------------------------------- */
15 
16 #include "pair_eim_omp.h"
17 
18 #include "atom.h"
19 #include "comm.h"
20 #include "force.h"
21 #include "memory.h"
22 #include "neigh_list.h"
23 #include "suffix.h"
24 
25 #include <cmath>
26 
27 #include "omp_compat.h"
28 using namespace LAMMPS_NS;
29 
30 /* ---------------------------------------------------------------------- */
31 
PairEIMOMP(LAMMPS * lmp)32 PairEIMOMP::PairEIMOMP(LAMMPS *lmp) :
33   PairEIM(lmp), ThrOMP(lmp, THR_PAIR)
34 {
35   suffix_flag |= Suffix::OMP;
36   respa_enable = 0;
37 }
38 
39 /* ---------------------------------------------------------------------- */
40 
compute(int eflag,int vflag)41 void PairEIMOMP::compute(int eflag, int vflag)
42 {
43   ev_init(eflag,vflag);
44 
45   const int nall = atom->nlocal + atom->nghost;
46   const int nthreads = comm->nthreads;
47   const int inum = list->inum;
48 
49   // grow energy and fp arrays if necessary
50   // need to be atom->nmax in length
51 
52   if (atom->nmax > nmax) {
53     memory->destroy(rho);
54     memory->destroy(fp);
55     nmax = atom->nmax;
56     memory->create(rho,nthreads*nmax,"pair:rho");
57     memory->create(fp,nthreads*nmax,"pair:fp");
58   }
59 
60 #if defined(_OPENMP)
61 #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag)
62 #endif
63   {
64     int ifrom, ito, tid;
65 
66     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
67     ThrData *thr = fix->get_thr(tid);
68     thr->timer(Timer::START);
69     ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
70 
71     if (force->newton_pair)
72       thr->init_eim(nall, rho, fp);
73     else
74       thr->init_eim(atom->nlocal, rho, fp);
75 
76     if (evflag) {
77       if (eflag) {
78         if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
79         else eval<1,1,0>(ifrom, ito, thr);
80       } else {
81         if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
82         else eval<1,0,0>(ifrom, ito, thr);
83       }
84     } else {
85       if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
86       else eval<0,0,0>(ifrom, ito, thr);
87     }
88 
89     thr->timer(Timer::PAIR);
90     reduce_thr(this, eflag, vflag, thr);
91   } // end of omp parallel region
92 }
93 
94 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
eval(int iifrom,int iito,ThrData * const thr)95 void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr)
96 {
97   int i,j,ii,jj,m,jnum,itype,jtype;
98   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
99   double rsq,r,p,rhoip,rhojp,phip,phi,coul,coulp,recip,psip;
100   double *coeff;
101   int *ilist,*jlist,*numneigh,**firstneigh;
102 
103   evdwl = 0.0;
104 
105 
106   const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
107   dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
108   double * const rho_t = thr->get_rho();
109   double * const fp_t = thr->get_fp();
110   const int tid = thr->get_tid();
111   const int nthreads = comm->nthreads;
112 
113   const int * _noalias const type = atom->type;
114   const int nlocal = atom->nlocal;
115   const int nall = nlocal + atom->nghost;
116 
117   double fxtmp,fytmp,fztmp;
118 
119   ilist = list->ilist;
120   numneigh = list->numneigh;
121   firstneigh = list->firstneigh;
122 
123   // rho = density at each atom
124   // loop over neighbors of my atoms
125 
126   for (ii = iifrom; ii < iito; ii++) {
127     i = ilist[ii];
128     xtmp = x[i].x;
129     ytmp = x[i].y;
130     ztmp = x[i].z;
131     itype = type[i];
132     jlist = firstneigh[i];
133     jnum = numneigh[i];
134 
135     for (jj = 0; jj < jnum; jj++) {
136       j = jlist[jj];
137       j &= NEIGHMASK;
138       jtype = type[j];
139       delx = xtmp - x[j].x;
140       dely = ytmp - x[j].y;
141       delz = ztmp - x[j].z;
142       rsq = delx*delx + dely*dely + delz*delz;
143 
144       if (rsq < cutforcesq[itype][jtype]) {
145         p = sqrt(rsq)*rdr + 1.0;
146         m = static_cast<int> (p);
147         m = MIN(m,nr-1);
148         p -= m;
149         p = MIN(p,1.0);
150         coeff = Fij_spline[type2Fij[itype][jtype]][m];
151         rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
152         if (NEWTON_PAIR || j < nlocal) {
153           coeff = Fij_spline[type2Fij[jtype][itype]][m];
154           rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
155         }
156       }
157     }
158   }
159 
160   // wait until all threads are done with computation
161   sync_threads();
162 
163   // communicate and sum densities
164   if (NEWTON_PAIR) {
165     // reduce per thread density
166     thr->timer(Timer::PAIR);
167     data_reduce_thr(rho, nall, nthreads, 1, tid);
168 
169     // wait until reduction is complete
170     sync_threads();
171 
172 #if defined(_OPENMP)
173 #pragma omp master
174 #endif
175     {
176       rhofp = 1;
177       comm->reverse_comm_pair(this);
178     }
179 
180   } else {
181     thr->timer(Timer::PAIR);
182     data_reduce_thr(rho, nlocal, nthreads, 1, tid);
183 
184     // wait until reduction is complete
185     sync_threads();
186   }
187 
188 #if defined(_OPENMP)
189 #pragma omp master
190 #endif
191   {
192     rhofp = 1;
193     comm->forward_comm_pair(this);
194   }
195 
196   // wait until master is finished communicating
197   sync_threads();
198 
199   for (ii = iifrom; ii < iito; ii++) {
200     i = ilist[ii];
201     xtmp = x[i].x;
202     ytmp = x[i].y;
203     ztmp = x[i].z;
204     itype = type[i];
205     jlist = firstneigh[i];
206     jnum = numneigh[i];
207 
208     for (jj = 0; jj < jnum; jj++) {
209       j = jlist[jj];
210       j &= NEIGHMASK;
211       jtype = type[j];
212 
213       delx = xtmp - x[j].x;
214       dely = ytmp - x[j].y;
215       delz = ztmp - x[j].z;
216       rsq = delx*delx + dely*dely + delz*delz;
217 
218       if (rsq < cutforcesq[itype][jtype]) {
219         p = sqrt(rsq)*rdr + 1.0;
220         m = static_cast<int> (p);
221         m = MIN(m,nr-1);
222         p -= m;
223         p = MIN(p,1.0);
224         coeff = Gij_spline[type2Gij[itype][jtype]][m];
225         fp_t[i] += rho[j]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]);
226         if (NEWTON_PAIR || j < nlocal) {
227           fp_t[j] += rho[i]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p +
228                            coeff[6]);
229         }
230       }
231     }
232   }
233 
234   // wait until all threads are done with computation
235   sync_threads();
236 
237   // communicate and sum modified densities
238   if (NEWTON_PAIR) {
239     // reduce per thread density
240     thr->timer(Timer::PAIR);
241     data_reduce_thr(fp, nall, nthreads, 1, tid);
242 
243     // wait until reduction is complete
244     sync_threads();
245 
246 #if defined(_OPENMP)
247 #pragma omp master
248 #endif
249     {
250       rhofp = 2;
251       comm->reverse_comm_pair(this);
252     }
253 
254   } else {
255     thr->timer(Timer::PAIR);
256     data_reduce_thr(fp, nlocal, nthreads, 1, tid);
257 
258     // wait until reduction is complete
259     sync_threads();
260   }
261 
262 #if defined(_OPENMP)
263 #pragma omp master
264 #endif
265   {
266     rhofp = 2;
267     comm->forward_comm_pair(this);
268   }
269 
270   // wait until master is finished communicating
271   sync_threads();
272 
273   for (ii = iifrom; ii < iito; ii++) {
274     i = ilist[ii];
275     itype = type[i];
276     if (EFLAG) {
277       phi = 0.5*rho[i]*fp[i];
278       e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr);
279     }
280   }
281 
282   // compute forces on each atom
283   // loop over neighbors of my atoms
284 
285   for (ii = iifrom; ii < iito; ii++) {
286     i = ilist[ii];
287     xtmp = x[i].x;
288     ytmp = x[i].y;
289     ztmp = x[i].z;
290     itype = type[i];
291     fxtmp = fytmp = fztmp = 0.0;
292 
293     jlist = firstneigh[i];
294     jnum = numneigh[i];
295 
296     for (jj = 0; jj < jnum; jj++) {
297       j = jlist[jj];
298       j &= NEIGHMASK;
299       jtype = type[j];
300       delx = xtmp - x[j].x;
301       dely = ytmp - x[j].y;
302       delz = ztmp - x[j].z;
303       rsq = delx*delx + dely*dely + delz*delz;
304 
305       if (rsq < cutforcesq[itype][jtype]) {
306         r = sqrt(rsq);
307         p = r*rdr + 1.0;
308         m = static_cast<int> (p);
309         m = MIN(m,nr-1);
310         p -= m;
311         p = MIN(p,1.0);
312 
313         // rhoip = derivative of (density at atom j due to atom i)
314         // rhojp = derivative of (density at atom i due to atom j)
315         // phi = pair potential energy
316         // phip = phi'
317 
318         coeff = Fij_spline[type2Fij[jtype][itype]][m];
319         rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
320         coeff = Fij_spline[type2Fij[itype][jtype]][m];
321         rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
322         coeff = phiij_spline[type2phiij[itype][jtype]][m];
323         phip = (coeff[0]*p + coeff[1])*p + coeff[2];
324         phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
325         coeff = Gij_spline[type2Gij[itype][jtype]][m];
326         coul = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
327         coulp = (coeff[0]*p + coeff[1])*p + coeff[2];
328         psip = phip + (rho[i]*rho[j]-q0[itype]*q0[jtype])*coulp +
329                fp[i]*rhojp + fp[j]*rhoip;
330         recip = 1.0/r;
331         fpair = -psip*recip;
332         fxtmp += delx*fpair;
333         fytmp += dely*fpair;
334         fztmp += delz*fpair;
335         if (NEWTON_PAIR || j < nlocal) {
336           f[j].x -= delx*fpair;
337           f[j].y -= dely*fpair;
338           f[j].z -= delz*fpair;
339         }
340 
341         if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul;
342         if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
343                                  evdwl,0.0,fpair,delx,dely,delz,thr);
344       }
345     }
346     f[i].x += fxtmp;
347     f[i].y += fytmp;
348     f[i].z += fztmp;
349   }
350 }
351 
352 /* ---------------------------------------------------------------------- */
353 
memory_usage()354 double PairEIMOMP::memory_usage()
355 {
356   double bytes = memory_usage_thr();
357   bytes += PairEIM::memory_usage();
358 
359   return bytes;
360 }
361