1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: W. Michael Brown (Intel)
17 ------------------------------------------------------------------------- */
18
19 #include "angle_harmonic_intel.h"
20
21 #include "atom.h"
22 #include "comm.h"
23 #include "error.h"
24 #include "force.h"
25 #include "math_const.h"
26 #include "memory.h"
27 #include "modify.h"
28 #include "neighbor.h"
29 #include "suffix.h"
30
31 #include <cmath>
32 #include <cstring>
33
34 #include "omp_compat.h"
35
36 using namespace LAMMPS_NS;
37 using namespace MathConst;
38
39 #define SMALL2 (flt_t)0.000001
40 #define INVSMALL (flt_t)1000.0
41 typedef struct { int a,b,c,t; } int4_t;
42
43 /* ---------------------------------------------------------------------- */
44
AngleHarmonicIntel(LAMMPS * lmp)45 AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp)
46 {
47 suffix_flag |= Suffix::INTEL;
48 }
49
50 /* ---------------------------------------------------------------------- */
51
~AngleHarmonicIntel()52 AngleHarmonicIntel::~AngleHarmonicIntel()
53 {
54 }
55
56 /* ---------------------------------------------------------------------- */
57
compute(int eflag,int vflag)58 void AngleHarmonicIntel::compute(int eflag, int vflag)
59 {
60 #ifdef _LMP_INTEL_OFFLOAD
61 if (_use_base) {
62 AngleHarmonic::compute(eflag, vflag);
63 return;
64 }
65 #endif
66
67 if (fix->precision() == FixIntel::PREC_MODE_MIXED)
68 compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
69 force_const_single);
70 else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
71 compute<double,double>(eflag, vflag, fix->get_double_buffers(),
72 force_const_double);
73 else
74 compute<float,float>(eflag, vflag, fix->get_single_buffers(),
75 force_const_single);
76 }
77
78 /* ---------------------------------------------------------------------- */
79
80 template <class flt_t, class acc_t>
compute(int eflag,int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)81 void AngleHarmonicIntel::compute(int eflag, int vflag,
82 IntelBuffers<flt_t,acc_t> *buffers,
83 const ForceConst<flt_t> &fc)
84 {
85 ev_init(eflag,vflag);
86 if (vflag_atom)
87 error->all(FLERR,"INTEL package does not support per-atom stress");
88
89 if (evflag) {
90 if (vflag && !eflag) {
91 if (force->newton_bond)
92 eval<0,1,1>(vflag, buffers, fc);
93 else
94 eval<0,1,0>(vflag, buffers, fc);
95 } else {
96 if (force->newton_bond)
97 eval<1,1,1>(vflag, buffers, fc);
98 else
99 eval<1,1,0>(vflag, buffers, fc);
100 }
101 } else {
102 if (force->newton_bond)
103 eval<0,0,1>(vflag, buffers, fc);
104 else
105 eval<0,0,0>(vflag, buffers, fc);
106 }
107 }
108
109 /* ---------------------------------------------------------------------- */
110
111 template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t>
eval(const int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)112 void AngleHarmonicIntel::eval(const int vflag,
113 IntelBuffers<flt_t,acc_t> *buffers,
114 const ForceConst<flt_t> &fc)
115
116 {
117 const int inum = neighbor->nanglelist;
118 if (inum == 0) return;
119
120 ATOM_T * _noalias const x = buffers->get_x(0);
121 const int nlocal = atom->nlocal;
122 const int nall = nlocal + atom->nghost;
123
124 int f_stride;
125 if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
126 else f_stride = buffers->get_stride(nlocal);
127
128 int tc;
129 FORCE_T * _noalias f_start;
130 acc_t * _noalias ev_global;
131 IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
132 const int nthreads = tc;
133
134 acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5;
135 if (EFLAG) oeangle = (acc_t)0.0;
136 if (VFLAG && vflag) {
137 ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
138 }
139
140 #if defined(_OPENMP)
141 #pragma omp parallel LMP_DEFAULT_NONE \
142 shared(f_start,f_stride,fc) \
143 reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5)
144 #endif
145 {
146 int nfrom, npl, nto, tid;
147 #ifdef LMP_INTEL_USE_SIMDOFF
148 IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
149 #else
150 IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
151 #endif
152
153 FORCE_T * _noalias const f = f_start + (tid * f_stride);
154 if (fix->need_zero(tid))
155 memset(f, 0, f_stride * sizeof(FORCE_T));
156
157 const int4_t * _noalias const anglelist =
158 (int4_t *) neighbor->anglelist[0];
159
160 #ifdef LMP_INTEL_USE_SIMDOFF
161 acc_t seangle, sv0, sv1, sv2, sv3, sv4, sv5;
162 if (EFLAG) seangle = (acc_t)0.0;
163 if (VFLAG && vflag) {
164 sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
165 }
166 #if defined(USE_OMP_SIMD)
167 #pragma omp simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
168 #else
169 #pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
170 #endif
171 for (int n = nfrom; n < nto; n ++) {
172 #else
173 for (int n = nfrom; n < nto; n += npl) {
174 #endif
175 const int i1 = anglelist[n].a;
176 const int i2 = anglelist[n].b;
177 const int i3 = anglelist[n].c;
178 const int type = anglelist[n].t;
179
180 // 1st bond
181
182 const flt_t delx1 = x[i1].x - x[i2].x;
183 const flt_t dely1 = x[i1].y - x[i2].y;
184 const flt_t delz1 = x[i1].z - x[i2].z;
185
186 const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
187 const flt_t r1 = (flt_t)1.0/sqrt(rsq1);
188
189 // 2nd bond
190
191 const flt_t delx2 = x[i3].x - x[i2].x;
192 const flt_t dely2 = x[i3].y - x[i2].y;
193 const flt_t delz2 = x[i3].z - x[i2].z;
194
195 const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
196 const flt_t r2 = (flt_t)1.0/sqrt(rsq2);
197
198 // angle (cos and sin)
199
200 flt_t c = delx1*delx2 + dely1*dely2 + delz1*delz2;
201 const flt_t r1r2 = r1 * r2;
202 c *= r1r2;
203
204 if (c > (flt_t)1.0) c = (flt_t)1.0;
205 if (c < (flt_t)-1.0) c = (flt_t)-1.0;
206
207 const flt_t sd = (flt_t)1.0 - c * c;
208 flt_t s = (flt_t)1.0/sqrt(sd);
209 if (sd < SMALL2) s = INVSMALL;
210
211 // harmonic force & energy
212
213 const flt_t dtheta = acos(c) - fc.fc[type].theta0;
214 const flt_t tk = fc.fc[type].k * dtheta;
215
216 flt_t eangle;
217 if (EFLAG) eangle = tk*dtheta;
218
219 const flt_t a = (flt_t)-2.0 * tk * s;
220 const flt_t ac = a*c;
221 const flt_t a11 = ac / rsq1;
222 const flt_t a12 = -a * (r1r2);
223 const flt_t a22 = ac / rsq2;
224
225 const flt_t f1x = a11*delx1 + a12*delx2;
226 const flt_t f1y = a11*dely1 + a12*dely2;
227 const flt_t f1z = a11*delz1 + a12*delz2;
228
229 const flt_t f3x = a22*delx2 + a12*delx1;
230 const flt_t f3y = a22*dely2 + a12*dely1;
231 const flt_t f3z = a22*delz2 + a12*delz1;
232
233 // apply force to each of 3 atoms
234
235 #ifdef LMP_INTEL_USE_SIMDOFF
236 #if defined(USE_OMP_SIMD)
237 #pragma omp ordered simd
238 #else
239 #pragma simdoff
240 #endif
241 #endif
242 {
243 if (NEWTON_BOND || i1 < nlocal) {
244 f[i1].x += f1x;
245 f[i1].y += f1y;
246 f[i1].z += f1z;
247 }
248
249 if (NEWTON_BOND || i2 < nlocal) {
250 f[i2].x -= f1x + f3x;
251 f[i2].y -= f1y + f3y;
252 f[i2].z -= f1z + f3z;
253 }
254
255 if (NEWTON_BOND || i3 < nlocal) {
256 f[i3].x += f3x;
257 f[i3].y += f3y;
258 f[i3].z += f3z;
259 }
260 }
261
262 if (EFLAG || VFLAG) {
263 #ifdef LMP_INTEL_USE_SIMDOFF
264 IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3,
265 f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1,
266 delz1, delx2, dely2, delz2, seangle, f,
267 NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3, sv4,
268 sv5);
269 #else
270 IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3,
271 f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1,
272 delz1, delx2, dely2, delz2, oeangle, f,
273 NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, ov4,
274 ov5);
275 #endif
276 }
277 } // for n
278 #ifdef LMP_INTEL_USE_SIMDOFF
279 if (EFLAG) oeangle += seangle;
280 if (VFLAG && vflag) {
281 ov0 += sv0; ov1 += sv1; ov2 += sv2;
282 ov3 += sv3; ov4 += sv4; ov5 += sv5;
283 }
284 #endif
285 } // omp parallel
286
287 if (EFLAG) energy += oeangle;
288 if (VFLAG && vflag) {
289 virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
290 virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
291 }
292
293 fix->set_reduce_flag();
294 }
295
296 /* ---------------------------------------------------------------------- */
297
298 void AngleHarmonicIntel::init_style()
299 {
300 AngleHarmonic::init_style();
301
302 int ifix = modify->find_fix("package_intel");
303 if (ifix < 0)
304 error->all(FLERR,
305 "The 'package intel' command is required for /intel styles");
306 fix = static_cast<FixIntel *>(modify->fix[ifix]);
307
308 #ifdef _LMP_INTEL_OFFLOAD
309 _use_base = 0;
310 if (fix->offload_balance() != 0.0) {
311 _use_base = 1;
312 return;
313 }
314 #endif
315
316 fix->bond_init_check();
317
318 if (fix->precision() == FixIntel::PREC_MODE_MIXED)
319 pack_force_const(force_const_single, fix->get_mixed_buffers());
320 else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
321 pack_force_const(force_const_double, fix->get_double_buffers());
322 else
323 pack_force_const(force_const_single, fix->get_single_buffers());
324 }
325
326 /* ---------------------------------------------------------------------- */
327
328 template <class flt_t, class acc_t>
329 void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
330 IntelBuffers<flt_t,acc_t> * /*buffers*/)
331 {
332 const int bp1 = atom->nangletypes + 1;
333 fc.set_ntypes(bp1,memory);
334
335 for (int i = 1; i < bp1; i++) {
336 fc.fc[i].k = k[i];
337 fc.fc[i].theta0 = theta0[i];
338 }
339 }
340
341 /* ---------------------------------------------------------------------- */
342
343 template <class flt_t>
344 void AngleHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nangletypes,
345 Memory *memory) {
346 if (nangletypes != _nangletypes) {
347 if (_nangletypes > 0)
348 _memory->destroy(fc);
349
350 if (nangletypes > 0)
351 _memory->create(fc,nangletypes,"anglecharmmintel.fc");
352 }
353 _nangletypes = nangletypes;
354 _memory = memory;
355 }
356