1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: W. Michael Brown (Intel)
17 ------------------------------------------------------------------------- */
18 
19 #include "angle_harmonic_intel.h"
20 
21 #include "atom.h"
22 #include "comm.h"
23 #include "error.h"
24 #include "force.h"
25 #include "math_const.h"
26 #include "memory.h"
27 #include "modify.h"
28 #include "neighbor.h"
29 #include "suffix.h"
30 
31 #include <cmath>
32 #include <cstring>
33 
34 #include "omp_compat.h"
35 
36 using namespace LAMMPS_NS;
37 using namespace MathConst;
38 
39 #define SMALL2 (flt_t)0.000001
40 #define INVSMALL (flt_t)1000.0
41 typedef struct { int a,b,c,t;  } int4_t;
42 
43 /* ---------------------------------------------------------------------- */
44 
AngleHarmonicIntel(LAMMPS * lmp)45 AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp)
46 {
47   suffix_flag |= Suffix::INTEL;
48 }
49 
50 /* ---------------------------------------------------------------------- */
51 
~AngleHarmonicIntel()52 AngleHarmonicIntel::~AngleHarmonicIntel()
53 {
54 }
55 
56 /* ---------------------------------------------------------------------- */
57 
compute(int eflag,int vflag)58 void AngleHarmonicIntel::compute(int eflag, int vflag)
59 {
60   #ifdef _LMP_INTEL_OFFLOAD
61   if (_use_base) {
62     AngleHarmonic::compute(eflag, vflag);
63     return;
64   }
65   #endif
66 
67   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
68     compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
69                           force_const_single);
70   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
71     compute<double,double>(eflag, vflag, fix->get_double_buffers(),
72                            force_const_double);
73   else
74     compute<float,float>(eflag, vflag, fix->get_single_buffers(),
75                          force_const_single);
76 }
77 
78 /* ---------------------------------------------------------------------- */
79 
80 template <class flt_t, class acc_t>
compute(int eflag,int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)81 void AngleHarmonicIntel::compute(int eflag, int vflag,
82                                IntelBuffers<flt_t,acc_t> *buffers,
83                                const ForceConst<flt_t> &fc)
84 {
85   ev_init(eflag,vflag);
86   if (vflag_atom)
87     error->all(FLERR,"INTEL package does not support per-atom stress");
88 
89   if (evflag) {
90     if (vflag && !eflag) {
91       if (force->newton_bond)
92         eval<0,1,1>(vflag, buffers, fc);
93       else
94         eval<0,1,0>(vflag, buffers, fc);
95     } else {
96       if (force->newton_bond)
97         eval<1,1,1>(vflag, buffers, fc);
98       else
99         eval<1,1,0>(vflag, buffers, fc);
100     }
101   } else {
102     if (force->newton_bond)
103       eval<0,0,1>(vflag, buffers, fc);
104     else
105       eval<0,0,0>(vflag, buffers, fc);
106   }
107 }
108 
109 /* ---------------------------------------------------------------------- */
110 
111 template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t>
eval(const int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)112 void AngleHarmonicIntel::eval(const int vflag,
113                             IntelBuffers<flt_t,acc_t> *buffers,
114                             const ForceConst<flt_t> &fc)
115 
116 {
117   const int inum = neighbor->nanglelist;
118   if (inum == 0) return;
119 
120   ATOM_T * _noalias const x = buffers->get_x(0);
121   const int nlocal = atom->nlocal;
122   const int nall = nlocal + atom->nghost;
123 
124   int f_stride;
125   if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
126   else f_stride = buffers->get_stride(nlocal);
127 
128   int tc;
129   FORCE_T * _noalias f_start;
130   acc_t * _noalias ev_global;
131   IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
132   const int nthreads = tc;
133 
134   acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5;
135   if (EFLAG) oeangle = (acc_t)0.0;
136   if (VFLAG && vflag) {
137     ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
138   }
139 
140   #if defined(_OPENMP)
141   #pragma omp parallel LMP_DEFAULT_NONE \
142     shared(f_start,f_stride,fc) \
143     reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5)
144   #endif
145   {
146     int nfrom, npl, nto, tid;
147     #ifdef LMP_INTEL_USE_SIMDOFF
148     IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
149     #else
150     IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
151     #endif
152 
153     FORCE_T * _noalias const f = f_start + (tid * f_stride);
154     if (fix->need_zero(tid))
155       memset(f, 0, f_stride * sizeof(FORCE_T));
156 
157     const int4_t * _noalias const anglelist =
158       (int4_t *) neighbor->anglelist[0];
159 
160     #ifdef LMP_INTEL_USE_SIMDOFF
161     acc_t seangle, sv0, sv1, sv2, sv3, sv4, sv5;
162     if (EFLAG) seangle = (acc_t)0.0;
163     if (VFLAG && vflag) {
164       sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
165     }
166 #if defined(USE_OMP_SIMD)
167     #pragma omp simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
168 #else
169     #pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5)
170 #endif
171     for (int n = nfrom; n < nto; n ++) {
172     #else
173     for (int n = nfrom; n < nto; n += npl) {
174     #endif
175       const int i1 = anglelist[n].a;
176       const int i2 = anglelist[n].b;
177       const int i3 = anglelist[n].c;
178       const int type = anglelist[n].t;
179 
180       // 1st bond
181 
182       const flt_t delx1 = x[i1].x - x[i2].x;
183       const flt_t dely1 = x[i1].y - x[i2].y;
184       const flt_t delz1 = x[i1].z - x[i2].z;
185 
186       const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
187       const flt_t r1 = (flt_t)1.0/sqrt(rsq1);
188 
189       // 2nd bond
190 
191       const flt_t delx2 = x[i3].x - x[i2].x;
192       const flt_t dely2 = x[i3].y - x[i2].y;
193       const flt_t delz2 = x[i3].z - x[i2].z;
194 
195       const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
196       const flt_t r2 = (flt_t)1.0/sqrt(rsq2);
197 
198       // angle (cos and sin)
199 
200       flt_t c = delx1*delx2 + dely1*dely2 + delz1*delz2;
201       const flt_t r1r2 = r1 * r2;
202       c *= r1r2;
203 
204       if (c > (flt_t)1.0) c = (flt_t)1.0;
205       if (c < (flt_t)-1.0) c = (flt_t)-1.0;
206 
207       const flt_t sd = (flt_t)1.0 - c * c;
208       flt_t s = (flt_t)1.0/sqrt(sd);
209       if (sd < SMALL2) s = INVSMALL;
210 
211       // harmonic force & energy
212 
213       const flt_t dtheta = acos(c) - fc.fc[type].theta0;
214       const flt_t tk = fc.fc[type].k * dtheta;
215 
216       flt_t eangle;
217       if (EFLAG) eangle = tk*dtheta;
218 
219       const flt_t a = (flt_t)-2.0 * tk * s;
220       const flt_t ac = a*c;
221       const flt_t a11 = ac / rsq1;
222       const flt_t a12 = -a * (r1r2);
223       const flt_t a22 = ac / rsq2;
224 
225       const flt_t f1x = a11*delx1 + a12*delx2;
226       const flt_t f1y = a11*dely1 + a12*dely2;
227       const flt_t f1z = a11*delz1 + a12*delz2;
228 
229       const flt_t f3x = a22*delx2 + a12*delx1;
230       const flt_t f3y = a22*dely2 + a12*dely1;
231       const flt_t f3z = a22*delz2 + a12*delz1;
232 
233       // apply force to each of 3 atoms
234 
235       #ifdef LMP_INTEL_USE_SIMDOFF
236 #if defined(USE_OMP_SIMD)
237       #pragma omp ordered simd
238 #else
239       #pragma simdoff
240 #endif
241       #endif
242       {
243         if (NEWTON_BOND || i1 < nlocal) {
244           f[i1].x += f1x;
245           f[i1].y += f1y;
246           f[i1].z += f1z;
247         }
248 
249         if (NEWTON_BOND || i2 < nlocal) {
250           f[i2].x -= f1x + f3x;
251           f[i2].y -= f1y + f3y;
252           f[i2].z -= f1z + f3z;
253         }
254 
255         if (NEWTON_BOND || i3 < nlocal) {
256           f[i3].x += f3x;
257           f[i3].y += f3y;
258           f[i3].z += f3z;
259         }
260       }
261 
262       if (EFLAG || VFLAG) {
263         #ifdef LMP_INTEL_USE_SIMDOFF
264         IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3,
265                               f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1,
266                               delz1, delx2, dely2, delz2, seangle, f,
267                               NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3, sv4,
268                               sv5);
269         #else
270         IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3,
271                               f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1,
272                               delz1, delx2, dely2, delz2, oeangle, f,
273                               NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, ov4,
274                               ov5);
275         #endif
276       }
277     } // for n
278     #ifdef LMP_INTEL_USE_SIMDOFF
279     if (EFLAG) oeangle += seangle;
280     if (VFLAG && vflag) {
281         ov0 += sv0; ov1 += sv1; ov2 += sv2;
282         ov3 += sv3; ov4 += sv4; ov5 += sv5;
283     }
284     #endif
285   } // omp parallel
286 
287   if (EFLAG) energy += oeangle;
288   if (VFLAG && vflag) {
289     virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
290     virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
291   }
292 
293   fix->set_reduce_flag();
294 }
295 
296 /* ---------------------------------------------------------------------- */
297 
298 void AngleHarmonicIntel::init_style()
299 {
300   AngleHarmonic::init_style();
301 
302   int ifix = modify->find_fix("package_intel");
303   if (ifix < 0)
304     error->all(FLERR,
305                "The 'package intel' command is required for /intel styles");
306   fix = static_cast<FixIntel *>(modify->fix[ifix]);
307 
308   #ifdef _LMP_INTEL_OFFLOAD
309   _use_base = 0;
310   if (fix->offload_balance() != 0.0) {
311     _use_base = 1;
312     return;
313   }
314   #endif
315 
316   fix->bond_init_check();
317 
318   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
319     pack_force_const(force_const_single, fix->get_mixed_buffers());
320   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
321     pack_force_const(force_const_double, fix->get_double_buffers());
322   else
323     pack_force_const(force_const_single, fix->get_single_buffers());
324 }
325 
326 /* ---------------------------------------------------------------------- */
327 
328 template <class flt_t, class acc_t>
329 void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
330                                           IntelBuffers<flt_t,acc_t> * /*buffers*/)
331 {
332   const int bp1 = atom->nangletypes + 1;
333   fc.set_ntypes(bp1,memory);
334 
335   for (int i = 1; i < bp1; i++) {
336     fc.fc[i].k = k[i];
337     fc.fc[i].theta0 = theta0[i];
338   }
339 }
340 
341 /* ---------------------------------------------------------------------- */
342 
343 template <class flt_t>
344 void AngleHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nangletypes,
345                                                      Memory *memory) {
346   if (nangletypes != _nangletypes) {
347     if (_nangletypes > 0)
348       _memory->destroy(fc);
349 
350     if (nangletypes > 0)
351       _memory->create(fc,nangletypes,"anglecharmmintel.fc");
352   }
353   _nangletypes = nangletypes;
354   _memory = memory;
355 }
356