1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: Stan Moore (Sandia)
17 ------------------------------------------------------------------------- */
18
19
20 #include "bond_fene_intel.h"
21
22 #include "atom.h"
23 #include "comm.h"
24 #include "error.h"
25 #include "force.h"
26 #include "math_const.h"
27 #include "memory.h"
28 #include "modify.h"
29 #include "neighbor.h"
30 #include "suffix.h"
31 #include "update.h"
32
33 #include <cmath>
34 #include <cstring>
35
36 #include "omp_compat.h"
37
38 using namespace LAMMPS_NS;
39 using MathConst::MY_CUBEROOT2;
40
41 typedef struct { int a,b,t; } int3_t;
42
43 /* ---------------------------------------------------------------------- */
44
BondFENEIntel(LAMMPS * lmp)45 BondFENEIntel::BondFENEIntel(LAMMPS *lmp) : BondFENE(lmp)
46 {
47 suffix_flag |= Suffix::INTEL;
48 }
49
50 /* ---------------------------------------------------------------------- */
51
~BondFENEIntel()52 BondFENEIntel::~BondFENEIntel()
53 {
54 }
55
56 /* ---------------------------------------------------------------------- */
57
compute(int eflag,int vflag)58 void BondFENEIntel::compute(int eflag, int vflag)
59 {
60 #ifdef _LMP_INTEL_OFFLOAD
61 if (_use_base) {
62 BondFENE::compute(eflag, vflag);
63 return;
64 }
65 #endif
66
67 if (fix->precision() == FixIntel::PREC_MODE_MIXED)
68 compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
69 force_const_single);
70 else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
71 compute<double,double>(eflag, vflag, fix->get_double_buffers(),
72 force_const_double);
73 else
74 compute<float,float>(eflag, vflag, fix->get_single_buffers(),
75 force_const_single);
76 }
77
78 /* ---------------------------------------------------------------------- */
79
80 template <class flt_t, class acc_t>
compute(int eflag,int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)81 void BondFENEIntel::compute(int eflag, int vflag,
82 IntelBuffers<flt_t,acc_t> *buffers,
83 const ForceConst<flt_t> &fc)
84 {
85 ev_init(eflag,vflag);
86 if (vflag_atom)
87 error->all(FLERR,"INTEL package does not support per-atom stress");
88
89 if (evflag) {
90 if (vflag && !eflag) {
91 if (force->newton_bond)
92 eval<0,1,1>(vflag, buffers, fc);
93 else
94 eval<0,1,0>(vflag, buffers, fc);
95 } else {
96 if (force->newton_bond)
97 eval<1,1,1>(vflag, buffers, fc);
98 else
99 eval<1,1,0>(vflag, buffers, fc);
100 }
101 } else {
102 if (force->newton_bond)
103 eval<0,0,1>(vflag, buffers, fc);
104 else
105 eval<0,0,0>(vflag, buffers, fc);
106 }
107 }
108
109 template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t>
eval(const int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)110 void BondFENEIntel::eval(const int vflag,
111 IntelBuffers<flt_t,acc_t> *buffers,
112 const ForceConst<flt_t> &fc)
113 {
114 const int inum = neighbor->nbondlist;
115 if (inum == 0) return;
116
117 ATOM_T * _noalias const x = buffers->get_x(0);
118 const int nlocal = atom->nlocal;
119 const int nall = nlocal + atom->nghost;
120
121 int f_stride;
122 if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
123 else f_stride = buffers->get_stride(nlocal);
124
125 int tc;
126 FORCE_T * _noalias f_start;
127 acc_t * _noalias ev_global;
128 IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
129 const int nthreads = tc;
130
131 acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5;
132 if (EFLAG) oebond = (acc_t)0.0;
133 if (VFLAG && vflag) {
134 ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
135 }
136
137 #if defined(_OPENMP)
138 #pragma omp parallel LMP_DEFAULT_NONE \
139 shared(f_start,f_stride,fc) \
140 reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5)
141 #endif
142 {
143 int nfrom, npl, nto, tid;
144 #ifdef LMP_INTEL_USE_SIMDOFF
145 IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
146 #else
147 IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
148 #endif
149
150 FORCE_T * _noalias const f = f_start + (tid * f_stride);
151 if (fix->need_zero(tid))
152 memset(f, 0, f_stride * sizeof(FORCE_T));
153
154 const int3_t * _noalias const bondlist =
155 (int3_t *) neighbor->bondlist[0];
156
157 #ifdef LMP_INTEL_USE_SIMDOFF
158 acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5;
159 if (EFLAG) sebond = (acc_t)0.0;
160 if (VFLAG && vflag) {
161 sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
162 }
163 #if defined(USE_OMP_SIMD)
164 #pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
165 #else
166 #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
167 #endif
168 for (int n = nfrom; n < nto; n ++) {
169 #else
170 for (int n = nfrom; n < nto; n += npl) {
171 #endif
172 const int i1 = bondlist[n].a;
173 const int i2 = bondlist[n].b;
174 const int type = bondlist[n].t;
175
176 const flt_t ir0sq = fc.fc[type].ir0sq;
177 const flt_t k = fc.fc[type].k;
178 const flt_t sigma = fc.fc[type].sigma;
179 const flt_t sigmasq = sigma*sigma;
180 const flt_t epsilon = fc.fc[type].epsilon;
181
182 const flt_t delx = x[i1].x - x[i2].x;
183 const flt_t dely = x[i1].y - x[i2].y;
184 const flt_t delz = x[i1].z - x[i2].z;
185
186 const flt_t rsq = delx*delx + dely*dely + delz*delz;
187 flt_t rlogarg = (flt_t)1.0 - rsq * ir0sq;
188 flt_t irsq = (flt_t)1.0 / rsq;
189
190 // if r -> r0, then rlogarg < 0.0 which is an error
191 // issue a warning and reset rlogarg = epsilon
192 // if r > 2*r0 something serious is wrong, abort
193
194 if (rlogarg < (flt_t)0.1) {
195 error->warning(FLERR,"FENE bond too long: {} {} {} {:.8}",
196 update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq));
197 if (rlogarg <= (flt_t)-3.0) error->one(FLERR,"Bad FENE bond");
198 rlogarg = (flt_t)0.1;
199 }
200
201 flt_t fbond = -k/rlogarg;
202
203 // force from LJ term
204
205 flt_t sr2,sr6;
206 if (rsq < (flt_t)MY_CUBEROOT2*sigmasq) {
207 sr2 = sigmasq * irsq;
208 sr6 = sr2 * sr2 * sr2;
209 fbond += (flt_t)48.0 * epsilon * sr6 * (sr6 - (flt_t)0.5) * irsq;
210 }
211
212 // energy
213
214 flt_t ebond;
215 if (EFLAG) {
216 ebond = (flt_t)-0.5 * k / ir0sq * log(rlogarg);
217 if (rsq < (flt_t)MY_CUBEROOT2 * sigmasq)
218 ebond += (flt_t)4.0 * epsilon * sr6 * (sr6 - (flt_t)1.0) + epsilon;
219 }
220
221 // apply force to each of 2 atoms
222
223 #ifdef LMP_INTEL_USE_SIMDOFF
224 #if defined(USE_OMP_SIMD)
225 #pragma omp ordered simd
226 #else
227 #pragma simdoff
228 #endif
229 #endif
230 {
231 if (NEWTON_BOND || i1 < nlocal) {
232 f[i1].x += delx*fbond;
233 f[i1].y += dely*fbond;
234 f[i1].z += delz*fbond;
235 }
236
237 if (NEWTON_BOND || i2 < nlocal) {
238 f[i2].x -= delx*fbond;
239 f[i2].y -= dely*fbond;
240 f[i2].z -= delz*fbond;
241 }
242 }
243
244 if (EFLAG || VFLAG) {
245 #ifdef LMP_INTEL_USE_SIMDOFF
246 IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond,
247 delx, dely, delz, sebond, f, NEWTON_BOND,
248 nlocal, sv0, sv1, sv2, sv3, sv4, sv5);
249 #else
250 IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond,
251 delx, dely, delz, oebond, f, NEWTON_BOND,
252 nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
253 #endif
254 }
255 } // for n
256 #ifdef LMP_INTEL_USE_SIMDOFF
257 if (EFLAG) oebond += sebond;
258 if (VFLAG && vflag) {
259 ov0 += sv0; ov1 += sv1; ov2 += sv2;
260 ov3 += sv3; ov4 += sv4; ov5 += sv5;
261 }
262 #endif
263 } // omp parallel
264
265 if (EFLAG) energy += oebond;
266 if (VFLAG && vflag) {
267 virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
268 virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
269 }
270
271 fix->set_reduce_flag();
272 }
273
274 /* ---------------------------------------------------------------------- */
275
276 void BondFENEIntel::init_style()
277 {
278 BondFENE::init_style();
279
280 int ifix = modify->find_fix("package_intel");
281 if (ifix < 0)
282 error->all(FLERR,
283 "The 'package intel' command is required for /intel styles");
284 fix = static_cast<FixIntel *>(modify->fix[ifix]);
285
286 #ifdef _LMP_INTEL_OFFLOAD
287 _use_base = 0;
288 if (fix->offload_balance() != 0.0) {
289 _use_base = 1;
290 return;
291 }
292 #endif
293
294 fix->bond_init_check();
295
296 if (fix->precision() == FixIntel::PREC_MODE_MIXED)
297 pack_force_const(force_const_single, fix->get_mixed_buffers());
298 else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
299 pack_force_const(force_const_double, fix->get_double_buffers());
300 else
301 pack_force_const(force_const_single, fix->get_single_buffers());
302 }
303
304 /* ---------------------------------------------------------------------- */
305
306 template <class flt_t, class acc_t>
307 void BondFENEIntel::pack_force_const(ForceConst<flt_t> &fc,
308 IntelBuffers<flt_t,acc_t> * /*buffers*/)
309 {
310 const int bp1 = atom->nbondtypes + 1;
311 fc.set_ntypes(bp1,memory);
312
313 for (int i = 1; i < bp1; i++) {
314 fc.fc[i].k = k[i];
315 fc.fc[i].ir0sq = 1.0 / (r0[i] * r0[i]);
316 fc.fc[i].sigma = sigma[i];
317 fc.fc[i].epsilon = epsilon[i];
318 }
319 }
320
321 /* ---------------------------------------------------------------------- */
322
323 template <class flt_t>
324 void BondFENEIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes,
325 Memory *memory) {
326 if (nbondtypes != _nbondtypes) {
327 if (_nbondtypes > 0)
328 _memory->destroy(fc);
329
330 if (nbondtypes > 0)
331 _memory->create(fc,nbondtypes,"bondfeneintel.fc");
332 }
333 _nbondtypes = nbondtypes;
334 _memory = memory;
335 }
336