1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    This software is distributed under the GNU General Public License.
8 
9    See the README file in the top-level LAMMPS directory.
10 ------------------------------------------------------------------------- */
11 
12 /* ----------------------------------------------------------------------
13    Contributing author: Axel Kohlmeyer (Temple U)
14 ------------------------------------------------------------------------- */
15 
16 #include "omp_compat.h"
17 #include <cmath>
18 #include "pair_lj_cut_tip4p_long_soft_omp.h"
19 #include "atom.h"
20 #include "domain.h"
21 #include "comm.h"
22 #include "force.h"
23 #include "neighbor.h"
24 #include "error.h"
25 #include "memory.h"
26 #include "neigh_list.h"
27 
28 #include "suffix.h"
29 using namespace LAMMPS_NS;
30 
31 #define EWALD_F   1.12837917
32 #define EWALD_P   0.3275911
33 #define A1        0.254829592
34 #define A2       -0.284496736
35 #define A3        1.421413741
36 #define A4       -1.453152027
37 #define A5        1.061405429
38 
39 /* ---------------------------------------------------------------------- */
40 
PairLJCutTIP4PLongSoftOMP(LAMMPS * lmp)41 PairLJCutTIP4PLongSoftOMP::PairLJCutTIP4PLongSoftOMP(LAMMPS *lmp) :
42   PairLJCutTIP4PLongSoft(lmp), ThrOMP(lmp, THR_PAIR)
43 {
44   suffix_flag |= Suffix::OMP;
45   respa_enable = 0;
46   newsite_thr = nullptr;
47   hneigh_thr = nullptr;
48 
49   // TIP4P cannot compute virial as F dot r
50   // due to finding bonded H atoms which are not near O atom
51 
52   no_virial_fdotr_compute = 1;
53 }
54 
55 /* ---------------------------------------------------------------------- */
56 
~PairLJCutTIP4PLongSoftOMP()57 PairLJCutTIP4PLongSoftOMP::~PairLJCutTIP4PLongSoftOMP()
58 {
59   memory->destroy(hneigh_thr);
60   memory->destroy(newsite_thr);
61 }
62 
63 /* ---------------------------------------------------------------------- */
64 
compute(int eflag,int vflag)65 void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag)
66 {
67   ev_init(eflag,vflag);
68 
69   const int nlocal = atom->nlocal;
70   const int nall = nlocal + atom->nghost;
71 
72   // reallocate hneigh_thr & newsite_thr if necessary
73   // initialize hneigh_thr[0] to -1 on steps when reneighboring occurred
74   // initialize hneigh_thr[2] to 0 every step
75 
76   if (atom->nmax > nmax) {
77     nmax = atom->nmax;
78     memory->destroy(hneigh_thr);
79     memory->create(hneigh_thr,nmax,"pair:hneigh_thr");
80     memory->destroy(newsite_thr);
81     memory->create(newsite_thr,nmax,"pair:newsite_thr");
82   }
83 
84   int i;
85   // tag entire list as completely invalid after a neighbor
86   // list update, since that can change the order of atoms.
87   if (neighbor->ago == 0)
88     for (i = 0; i < nall; i++) hneigh_thr[i].a = -1;
89 
90   // indicate that the coordinates for the M point need to
91   // be updated. this needs to be done in every step.
92   for (i = 0; i < nall; i++) hneigh_thr[i].t = 0;
93 
94   const int nthreads = comm->nthreads;
95   const int inum = list->inum;
96 
97 #if defined(_OPENMP)
98 #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag)
99 #endif
100   {
101     int ifrom, ito, tid;
102 
103     loop_setup_thr(ifrom, ito, tid, inum, nthreads);
104     ThrData *thr = fix->get_thr(tid);
105     thr->timer(Timer::START);
106     ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
107 
108     if (evflag) {
109       if (eflag) {
110         if (vflag) eval<1,1,1>(ifrom, ito, thr);
111         else eval<1,1,0>(ifrom, ito, thr);
112       } else {
113         if (vflag) eval<1,0,1>(ifrom, ito, thr);
114         else eval<1,0,0>(ifrom, ito, thr);
115       }
116     } else eval<0,0,0>(ifrom, ito, thr);
117 
118     thr->timer(Timer::PAIR);
119     reduce_thr(this, eflag, vflag, thr);
120   } // end of omp parallel region
121 }
122 
123 /* ---------------------------------------------------------------------- */
124 
125 template <int EVFLAG, int EFLAG, int VFLAG>
eval(int iifrom,int iito,ThrData * const thr)126 void PairLJCutTIP4PLongSoftOMP::eval(int iifrom, int iito, ThrData * const thr)
127 {
128   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul;
129   double r,rsq,forcecoul,forcelj,cforce;
130   double factor_coul,factor_lj;
131   double grij,expm2,prefactor,t,erfc;
132   double denc, denlj, r4sig6;
133   double v[6];
134   double fdx,fdy,fdz,fOx,fOy,fOz,fHx,fHy,fHz;
135   dbl3_t x1,x2,xH1,xH2;
136 
137   int *ilist,*jlist,*numneigh,**firstneigh;
138   int i,j,ii,jj,jnum,itype,jtype,key;
139   int n,vlist[6];
140   int iH1,iH2,jH1,jH2;
141 
142   evdwl = ecoul = 0.0;
143 
144   const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
145   dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
146   const double * _noalias const q = atom->q;
147   const int * _noalias const type = atom->type;
148   const int nlocal = atom->nlocal;
149   const double * _noalias const special_coul = force->special_coul;
150   const double * _noalias const special_lj = force->special_lj;
151   const double qqrd2e = force->qqrd2e;
152   const double cut_coulsqplus = (cut_coul+2.0*qdist) * (cut_coul+2.0*qdist);
153 
154   double fxtmp,fytmp,fztmp;
155 
156   ilist = list->ilist;
157   numneigh = list->numneigh;
158   firstneigh = list->firstneigh;
159 
160   // loop over neighbors of my atoms
161 
162   for (ii = iifrom; ii < iito; ++ii) {
163     i = ilist[ii];
164     qtmp = q[i];
165     xtmp = x[i].x;
166     ytmp = x[i].y;
167     ztmp = x[i].z;
168     itype = type[i];
169 
170     // if atom I = water O, set x1 = offset charge site
171     // else x1 = x of atom I
172     // NOTE: to make this part thread safe, we need to
173     // make sure that the hneigh_thr[][] entries only get
174     // updated, when all data is in place. worst case,
175     // some calculation is repeated, but since the results
176     // will be the same, there is no race condition.
177     if (itype == typeO) {
178       if (hneigh_thr[i].a < 0) {
179         iH1 = atom->map(atom->tag[i] + 1);
180         iH2 = atom->map(atom->tag[i] + 2);
181         if (iH1 == -1 || iH2 == -1)
182           error->one(FLERR,"TIP4P hydrogen is missing");
183         if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
184           error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
185         // set iH1,iH2 to index of closest image to O
186         iH1 = domain->closest_image(i,iH1);
187         iH2 = domain->closest_image(i,iH2);
188         compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]);
189         hneigh_thr[i].t = 1;
190         hneigh_thr[i].b = iH2;
191         hneigh_thr[i].a = iH1;
192       } else {
193         iH1 = hneigh_thr[i].a;
194         iH2 = hneigh_thr[i].b;
195         if (hneigh_thr[i].t == 0) {
196           compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]);
197           hneigh_thr[i].t = 1;
198         }
199       }
200       x1 = newsite_thr[i];
201     } else x1 = x[i];
202 
203     jlist = firstneigh[i];
204     jnum = numneigh[i];
205     fxtmp=fytmp=fztmp=0.0;
206 
207     for (jj = 0; jj < jnum; jj++) {
208       j = jlist[jj];
209       factor_lj = special_lj[sbmask(j)];
210       factor_coul = special_coul[sbmask(j)];
211       j &= NEIGHMASK;
212 
213       delx = xtmp - x[j].x;
214       dely = ytmp - x[j].y;
215       delz = ztmp - x[j].z;
216       rsq = delx*delx + dely*dely + delz*delz;
217       jtype = type[j];
218 
219       // LJ interaction based on true rsq
220 
221       if (rsq < cut_ljsq[itype][jtype]) {
222 
223         r4sig6 = rsq*rsq / lj2[itype][jtype];
224         denlj = lj3[itype][jtype] + rsq*r4sig6;
225         forcelj = lj1[itype][jtype] * epsilon[itype][jtype] *
226           (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
227 
228         forcelj *= factor_lj;
229 
230         fxtmp += delx*forcelj;
231         fytmp += dely*forcelj;
232         fztmp += delz*forcelj;
233         f[j].x -= delx*forcelj;
234         f[j].y -= dely*forcelj;
235         f[j].z -= delz*forcelj;
236 
237         if (EFLAG) {
238           evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] *
239             (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
240           evdwl *= factor_lj;
241         } else evdwl = 0.0;
242 
243         if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1,
244                                  evdwl,0.0,forcelj,delx,dely,delz,thr);
245       }
246 
247       // adjust rsq and delxyz for off-site O charge(s) if necessary
248       // but only if they are within reach
249       // NOTE: to make this part thread safe, we need to
250       // make sure that the hneigh_thr[][] entries only get
251       // updated, when all data is in place. worst case,
252       // some calculation is repeated, but since the results
253       // will be the same, there is no race condition.
254       if (rsq < cut_coulsqplus) {
255         if (itype == typeO || jtype == typeO) {
256 
257           // if atom J = water O, set x2 = offset charge site
258           // else x2 = x of atom J
259 
260           if (jtype == typeO) {
261             if (hneigh_thr[j].a < 0) {
262               jH1 = atom->map(atom->tag[j] + 1);
263               jH2 = atom->map(atom->tag[j] + 2);
264               if (jH1 == -1 || jH2 == -1)
265                 error->one(FLERR,"TIP4P hydrogen is missing");
266               if (atom->type[jH1] != typeH || atom->type[jH2] != typeH)
267                 error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
268               // set jH1,jH2 to closest image to O
269               jH1 = domain->closest_image(j,jH1);
270               jH2 = domain->closest_image(j,jH2);
271               compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]);
272               hneigh_thr[j].t = 1;
273               hneigh_thr[j].b = jH2;
274               hneigh_thr[j].a = jH1;
275             } else {
276               jH1 = hneigh_thr[j].a;
277               jH2 = hneigh_thr[j].b;
278               if (hneigh_thr[j].t == 0) {
279                 compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]);
280                 hneigh_thr[j].t = 1;
281               }
282             }
283             x2 = newsite_thr[j];
284           } else x2 = x[j];
285 
286           delx = x1.x - x2.x;
287           dely = x1.y - x2.y;
288           delz = x1.z - x2.z;
289           rsq = delx*delx + dely*dely + delz*delz;
290         }
291 
292         // Coulombic interaction based on modified rsq
293 
294         if (rsq < cut_coulsq) {
295           r = sqrt(rsq);
296           grij = g_ewald * r;
297           expm2 = exp(-grij*grij);
298           t = 1.0 / (1.0 + EWALD_P*grij);
299           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
300 
301           denc = sqrt(lj4[itype][jtype] + rsq);
302           prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
303 
304           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
305           if (factor_coul < 1.0) {
306             forcecoul -= (1.0-factor_coul)*prefactor;
307           }
308 
309           cforce = forcecoul;
310 
311           // if i,j are not O atoms, force is applied directly
312           // if i or j are O atoms, force is on fictitious atom & partitioned
313           // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
314           // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
315           // preserves total force and torque on water molecule
316           // virial = sum(r x F) where each water's atoms are near xi and xj
317           // vlist stores 2,4,6 atoms whose forces contribute to virial
318 
319           if (EVFLAG) {
320             n = 0;
321             key = 0;
322           }
323 
324           if (itype != typeO) {
325             fxtmp += delx * cforce;
326             fytmp += dely * cforce;
327             fztmp += delz * cforce;
328 
329             if (VFLAG) {
330               v[0] = x[i].x * delx * cforce;
331               v[1] = x[i].y * dely * cforce;
332               v[2] = x[i].z * delz * cforce;
333               v[3] = x[i].x * dely * cforce;
334               v[4] = x[i].x * delz * cforce;
335               v[5] = x[i].y * delz * cforce;
336             }
337             if (EVFLAG) vlist[n++] = i;
338 
339           } else {
340             if (EVFLAG) key++;
341 
342             fdx = delx*cforce;
343             fdy = dely*cforce;
344             fdz = delz*cforce;
345 
346             fOx = fdx*(1 - alpha);
347             fOy = fdy*(1 - alpha);
348             fOz = fdz*(1 - alpha);
349 
350             fHx = 0.5*alpha * fdx;
351             fHy = 0.5*alpha * fdy;
352             fHz = 0.5*alpha * fdz;
353 
354             fxtmp += fOx;
355             fytmp += fOy;
356             fztmp += fOz;
357 
358             f[iH1].x += fHx;
359             f[iH1].y += fHy;
360             f[iH1].z += fHz;
361 
362             f[iH2].x += fHx;
363             f[iH2].y += fHy;
364             f[iH2].z += fHz;
365 
366             if (VFLAG) {
367               xH1 = x[iH1];
368               xH2 = x[iH2];
369               v[0] = x[i].x*fOx + xH1.x*fHx + xH2.x*fHx;
370               v[1] = x[i].y*fOy + xH1.y*fHy + xH2.y*fHy;
371               v[2] = x[i].z*fOz + xH1.z*fHz + xH2.z*fHz;
372               v[3] = x[i].x*fOy + xH1.x*fHy + xH2.x*fHy;
373               v[4] = x[i].x*fOz + xH1.x*fHz + xH2.x*fHz;
374               v[5] = x[i].y*fOz + xH1.y*fHz + xH2.y*fHz;
375             }
376             if (EVFLAG) {
377               vlist[n++] = i;
378               vlist[n++] = iH1;
379               vlist[n++] = iH2;
380             }
381           }
382 
383           if (jtype != typeO) {
384             f[j].x -= delx * cforce;
385             f[j].y -= dely * cforce;
386             f[j].z -= delz * cforce;
387 
388             if (VFLAG) {
389               v[0] -= x[j].x * delx * cforce;
390               v[1] -= x[j].y * dely * cforce;
391               v[2] -= x[j].z * delz * cforce;
392               v[3] -= x[j].x * dely * cforce;
393               v[4] -= x[j].x * delz * cforce;
394               v[5] -= x[j].y * delz * cforce;
395             }
396             if (EVFLAG) vlist[n++] = j;
397 
398           } else {
399             if (EVFLAG) key += 2;
400 
401             fdx = -delx*cforce;
402             fdy = -dely*cforce;
403             fdz = -delz*cforce;
404 
405             fOx = fdx*(1 - alpha);
406             fOy = fdy*(1 - alpha);
407             fOz = fdz*(1 - alpha);
408 
409             fHx = 0.5*alpha * fdx;
410             fHy = 0.5*alpha * fdy;
411             fHz = 0.5*alpha * fdz;
412 
413             f[j].x += fOx;
414             f[j].y += fOy;
415             f[j].z += fOz;
416 
417             f[jH1].x += fHx;
418             f[jH1].y += fHy;
419             f[jH1].z += fHz;
420 
421             f[jH2].x += fHx;
422             f[jH2].y += fHy;
423             f[jH2].z += fHz;
424 
425             if (VFLAG) {
426               xH1 = x[jH1];
427               xH2 = x[jH2];
428               v[0] += x[j].x*fOx + xH1.x*fHx + xH2.x*fHx;
429               v[1] += x[j].y*fOy + xH1.y*fHy + xH2.y*fHy;
430               v[2] += x[j].z*fOz + xH1.z*fHz + xH2.z*fHz;
431               v[3] += x[j].x*fOy + xH1.x*fHy + xH2.x*fHy;
432               v[4] += x[j].x*fOz + xH1.x*fHz + xH2.x*fHz;
433               v[5] += x[j].y*fOz + xH1.y*fHz + xH2.y*fHz;
434             }
435             if (EVFLAG) {
436               vlist[n++] = j;
437               vlist[n++] = jH1;
438               vlist[n++] = jH2;
439             }
440           }
441 
442           if (EFLAG) {
443             prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
444             ecoul = prefactor*erfc;
445             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
446           } else ecoul = 0.0;
447 
448           if (EVFLAG) ev_tally_list_thr(this,key,vlist,v,ecoul,alpha,thr);
449         }
450       }
451     }
452     f[i].x += fxtmp;
453     f[i].y += fytmp;
454     f[i].z += fztmp;
455   }
456 }
457 
458 /* ----------------------------------------------------------------------
459   compute position xM of fictitious charge site for O atom and 2 H atoms
460   return it as xM
461 ------------------------------------------------------------------------- */
462 
compute_newsite_thr(const dbl3_t & xO,const dbl3_t & xH1,const dbl3_t & xH2,dbl3_t & xM) const463 void PairLJCutTIP4PLongSoftOMP::compute_newsite_thr(const dbl3_t &xO,
464                                                 const dbl3_t &xH1,
465                                                 const dbl3_t &xH2,
466                                                 dbl3_t &xM) const
467 {
468   double delx1 = xH1.x - xO.x;
469   double dely1 = xH1.y - xO.y;
470   double delz1 = xH1.z - xO.z;
471 
472   double delx2 = xH2.x - xO.x;
473   double dely2 = xH2.y - xO.y;
474   double delz2 = xH2.z - xO.z;
475 
476   const double prefac = alpha * 0.5;
477   xM.x = xO.x + prefac * (delx1 + delx2);
478   xM.y = xO.y + prefac * (dely1 + dely2);
479   xM.z = xO.z + prefac * (delz1 + delz2);
480 }
481 
482 /* ---------------------------------------------------------------------- */
483 
memory_usage()484 double PairLJCutTIP4PLongSoftOMP::memory_usage()
485 {
486   double bytes = memory_usage_thr();
487   bytes += PairLJCutTIP4PLongSoft::memory_usage();
488   return bytes;
489 }
490