1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Stan Moore (Sandia)
17 ------------------------------------------------------------------------- */
18 
19 
20 #include "bond_fene_intel.h"
21 
22 #include "atom.h"
23 #include "comm.h"
24 #include "error.h"
25 #include "force.h"
26 #include "math_const.h"
27 #include "memory.h"
28 #include "modify.h"
29 #include "neighbor.h"
30 #include "suffix.h"
31 #include "update.h"
32 
33 #include <cmath>
34 #include <cstring>
35 
36 #include "omp_compat.h"
37 
38 using namespace LAMMPS_NS;
39 using MathConst::MY_CUBEROOT2;
40 
41 typedef struct { int a,b,t;  } int3_t;
42 
43 /* ---------------------------------------------------------------------- */
44 
BondFENEIntel(LAMMPS * lmp)45 BondFENEIntel::BondFENEIntel(LAMMPS *lmp) : BondFENE(lmp)
46 {
47   suffix_flag |= Suffix::INTEL;
48 }
49 
50 /* ---------------------------------------------------------------------- */
51 
~BondFENEIntel()52 BondFENEIntel::~BondFENEIntel()
53 {
54 }
55 
56 /* ---------------------------------------------------------------------- */
57 
compute(int eflag,int vflag)58 void BondFENEIntel::compute(int eflag, int vflag)
59 {
60   #ifdef _LMP_INTEL_OFFLOAD
61   if (_use_base) {
62     BondFENE::compute(eflag, vflag);
63     return;
64   }
65   #endif
66 
67   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
68     compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
69                           force_const_single);
70   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
71     compute<double,double>(eflag, vflag, fix->get_double_buffers(),
72                            force_const_double);
73   else
74     compute<float,float>(eflag, vflag, fix->get_single_buffers(),
75                          force_const_single);
76 }
77 
78 /* ---------------------------------------------------------------------- */
79 
80 template <class flt_t, class acc_t>
compute(int eflag,int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)81 void BondFENEIntel::compute(int eflag, int vflag,
82                                 IntelBuffers<flt_t,acc_t> *buffers,
83                                 const ForceConst<flt_t> &fc)
84 {
85   ev_init(eflag,vflag);
86   if (vflag_atom)
87     error->all(FLERR,"INTEL package does not support per-atom stress");
88 
89   if (evflag) {
90     if (vflag && !eflag) {
91       if (force->newton_bond)
92         eval<0,1,1>(vflag, buffers, fc);
93       else
94         eval<0,1,0>(vflag, buffers, fc);
95     } else {
96       if (force->newton_bond)
97         eval<1,1,1>(vflag, buffers, fc);
98       else
99         eval<1,1,0>(vflag, buffers, fc);
100     }
101   } else {
102     if (force->newton_bond)
103       eval<0,0,1>(vflag, buffers, fc);
104     else
105       eval<0,0,0>(vflag, buffers, fc);
106   }
107 }
108 
109 template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t>
eval(const int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)110 void BondFENEIntel::eval(const int vflag,
111                          IntelBuffers<flt_t,acc_t> *buffers,
112                          const ForceConst<flt_t> &fc)
113 {
114   const int inum = neighbor->nbondlist;
115   if (inum == 0) return;
116 
117   ATOM_T * _noalias const x = buffers->get_x(0);
118   const int nlocal = atom->nlocal;
119   const int nall = nlocal + atom->nghost;
120 
121   int f_stride;
122   if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
123   else f_stride = buffers->get_stride(nlocal);
124 
125   int tc;
126   FORCE_T * _noalias f_start;
127   acc_t * _noalias ev_global;
128   IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
129   const int nthreads = tc;
130 
131   acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5;
132   if (EFLAG) oebond = (acc_t)0.0;
133   if (VFLAG && vflag) {
134     ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
135   }
136 
137   #if defined(_OPENMP)
138   #pragma omp parallel LMP_DEFAULT_NONE \
139     shared(f_start,f_stride,fc)           \
140     reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5)
141   #endif
142   {
143     int nfrom, npl, nto, tid;
144     #ifdef LMP_INTEL_USE_SIMDOFF
145     IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
146     #else
147     IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
148     #endif
149 
150     FORCE_T * _noalias const f = f_start + (tid * f_stride);
151     if (fix->need_zero(tid))
152       memset(f, 0, f_stride * sizeof(FORCE_T));
153 
154     const int3_t * _noalias const bondlist =
155       (int3_t *) neighbor->bondlist[0];
156 
157     #ifdef LMP_INTEL_USE_SIMDOFF
158     acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5;
159     if (EFLAG) sebond = (acc_t)0.0;
160     if (VFLAG && vflag) {
161       sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
162     }
163 #if defined(USE_OMP_SIMD)
164     #pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
165 #else
166     #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
167 #endif
168     for (int n = nfrom; n < nto; n ++) {
169     #else
170     for (int n = nfrom; n < nto; n += npl) {
171     #endif
172       const int i1 = bondlist[n].a;
173       const int i2 = bondlist[n].b;
174       const int type = bondlist[n].t;
175 
176       const flt_t ir0sq = fc.fc[type].ir0sq;
177       const flt_t k = fc.fc[type].k;
178       const flt_t sigma = fc.fc[type].sigma;
179       const flt_t sigmasq = sigma*sigma;
180       const flt_t epsilon = fc.fc[type].epsilon;
181 
182       const flt_t delx = x[i1].x - x[i2].x;
183       const flt_t dely = x[i1].y - x[i2].y;
184       const flt_t delz = x[i1].z - x[i2].z;
185 
186       const flt_t rsq = delx*delx + dely*dely + delz*delz;
187       flt_t rlogarg = (flt_t)1.0 - rsq * ir0sq;
188       flt_t irsq = (flt_t)1.0 / rsq;
189 
190       // if r -> r0, then rlogarg < 0.0 which is an error
191       // issue a warning and reset rlogarg = epsilon
192       // if r > 2*r0 something serious is wrong, abort
193 
194       if (rlogarg < (flt_t)0.1) {
195         error->warning(FLERR,"FENE bond too long: {} {} {} {:.8}",
196                        update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq));
197         if (rlogarg <= (flt_t)-3.0) error->one(FLERR,"Bad FENE bond");
198         rlogarg = (flt_t)0.1;
199       }
200 
201       flt_t fbond = -k/rlogarg;
202 
203       // force from LJ term
204 
205       flt_t sr2,sr6;
206       if (rsq < (flt_t)MY_CUBEROOT2*sigmasq) {
207         sr2 = sigmasq * irsq;
208         sr6 = sr2 * sr2 * sr2;
209         fbond += (flt_t)48.0 * epsilon * sr6 * (sr6 - (flt_t)0.5) * irsq;
210       }
211 
212       // energy
213 
214       flt_t ebond;
215       if (EFLAG) {
216         ebond = (flt_t)-0.5 * k / ir0sq * log(rlogarg);
217         if (rsq < (flt_t)MY_CUBEROOT2 * sigmasq)
218           ebond += (flt_t)4.0 * epsilon * sr6 * (sr6 - (flt_t)1.0) + epsilon;
219       }
220 
221       // apply force to each of 2 atoms
222 
223       #ifdef LMP_INTEL_USE_SIMDOFF
224 #if defined(USE_OMP_SIMD)
225       #pragma omp ordered simd
226 #else
227       #pragma simdoff
228 #endif
229       #endif
230       {
231         if (NEWTON_BOND || i1 < nlocal) {
232           f[i1].x += delx*fbond;
233           f[i1].y += dely*fbond;
234           f[i1].z += delz*fbond;
235         }
236 
237         if (NEWTON_BOND || i2 < nlocal) {
238           f[i2].x -= delx*fbond;
239           f[i2].y -= dely*fbond;
240           f[i2].z -= delz*fbond;
241         }
242       }
243 
244       if (EFLAG || VFLAG) {
245         #ifdef LMP_INTEL_USE_SIMDOFF
246         IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond,
247                              delx, dely, delz, sebond, f, NEWTON_BOND,
248                              nlocal, sv0, sv1, sv2, sv3, sv4, sv5);
249         #else
250         IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond,
251                              delx, dely, delz, oebond, f, NEWTON_BOND,
252                              nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
253         #endif
254       }
255     } // for n
256     #ifdef LMP_INTEL_USE_SIMDOFF
257     if (EFLAG) oebond += sebond;
258     if (VFLAG && vflag) {
259        ov0 += sv0; ov1 += sv1; ov2 += sv2;
260        ov3 += sv3; ov4 += sv4; ov5 += sv5;
261     }
262     #endif
263   } // omp parallel
264 
265   if (EFLAG) energy += oebond;
266   if (VFLAG && vflag) {
267     virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
268     virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
269   }
270 
271   fix->set_reduce_flag();
272 }
273 
274 /* ---------------------------------------------------------------------- */
275 
276 void BondFENEIntel::init_style()
277 {
278   BondFENE::init_style();
279 
280   int ifix = modify->find_fix("package_intel");
281   if (ifix < 0)
282     error->all(FLERR,
283                "The 'package intel' command is required for /intel styles");
284   fix = static_cast<FixIntel *>(modify->fix[ifix]);
285 
286   #ifdef _LMP_INTEL_OFFLOAD
287   _use_base = 0;
288   if (fix->offload_balance() != 0.0) {
289     _use_base = 1;
290     return;
291   }
292   #endif
293 
294   fix->bond_init_check();
295 
296   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
297     pack_force_const(force_const_single, fix->get_mixed_buffers());
298   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
299     pack_force_const(force_const_double, fix->get_double_buffers());
300   else
301     pack_force_const(force_const_single, fix->get_single_buffers());
302 }
303 
304 /* ---------------------------------------------------------------------- */
305 
306 template <class flt_t, class acc_t>
307 void BondFENEIntel::pack_force_const(ForceConst<flt_t> &fc,
308                                      IntelBuffers<flt_t,acc_t> * /*buffers*/)
309 {
310   const int bp1 = atom->nbondtypes + 1;
311   fc.set_ntypes(bp1,memory);
312 
313   for (int i = 1; i < bp1; i++) {
314     fc.fc[i].k = k[i];
315     fc.fc[i].ir0sq = 1.0 / (r0[i] * r0[i]);
316     fc.fc[i].sigma = sigma[i];
317     fc.fc[i].epsilon = epsilon[i];
318   }
319 }
320 
321 /* ---------------------------------------------------------------------- */
322 
323 template <class flt_t>
324 void BondFENEIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes,
325                                                       Memory *memory) {
326   if (nbondtypes != _nbondtypes) {
327     if (_nbondtypes > 0)
328       _memory->destroy(fc);
329 
330     if (nbondtypes > 0)
331       _memory->create(fc,nbondtypes,"bondfeneintel.fc");
332   }
333   _nbondtypes = nbondtypes;
334   _memory = memory;
335 }
336