1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: W. Michael Brown (Intel)
17 ------------------------------------------------------------------------- */
18 
19 
20 #include "improper_harmonic_intel.h"
21 
22 #include "atom.h"
23 #include "comm.h"
24 #include "error.h"
25 #include "force.h"
26 #include "math_const.h"
27 #include "memory.h"
28 #include "modify.h"
29 #include "neighbor.h"
30 #include "suffix.h"
31 
32 #include <cmath>
33 #include <cstring>
34 
35 #include "omp_compat.h"
36 
37 using namespace LAMMPS_NS;
38 using namespace MathConst;
39 
40 #define PTOLERANCE (flt_t)1.05
41 #define MTOLERANCE (flt_t)-1.05
42 #define SMALL     (flt_t)0.001
43 #define SMALL2     (flt_t)0.000001
44 #define INVSMALL   (flt_t)1000.0
45 typedef struct { int a,b,c,d,t;  } int5_t;
46 
47 /* ---------------------------------------------------------------------- */
48 
ImproperHarmonicIntel(LAMMPS * lmp)49 ImproperHarmonicIntel::ImproperHarmonicIntel(LAMMPS *lmp) :
50   ImproperHarmonic(lmp)
51 {
52   suffix_flag |= Suffix::INTEL;
53 }
54 
55 /* ---------------------------------------------------------------------- */
56 
~ImproperHarmonicIntel()57 ImproperHarmonicIntel::~ImproperHarmonicIntel()
58 {
59 }
60 
61 /* ---------------------------------------------------------------------- */
62 
compute(int eflag,int vflag)63 void ImproperHarmonicIntel::compute(int eflag, int vflag)
64 {
65   #ifdef _LMP_INTEL_OFFLOAD
66   if (_use_base) {
67     ImproperHarmonic::compute(eflag, vflag);
68     return;
69   }
70   #endif
71 
72   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
73     compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
74                           force_const_single);
75   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
76     compute<double,double>(eflag, vflag, fix->get_double_buffers(),
77                            force_const_double);
78   else
79     compute<float,float>(eflag, vflag, fix->get_single_buffers(),
80                          force_const_single);
81 }
82 
83 /* ---------------------------------------------------------------------- */
84 
85 template <class flt_t, class acc_t>
compute(int eflag,int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)86 void ImproperHarmonicIntel::compute(int eflag, int vflag,
87                                     IntelBuffers<flt_t,acc_t> *buffers,
88                                     const ForceConst<flt_t> &fc)
89 {
90   ev_init(eflag,vflag);
91   if (vflag_atom)
92     error->all(FLERR,"INTEL package does not support per-atom stress");
93 
94   if (evflag) {
95     if (vflag && !eflag) {
96       if (force->newton_bond)
97         eval<0,1,1>(vflag, buffers, fc);
98       else
99         eval<0,1,0>(vflag, buffers, fc);
100     } else {
101       if (force->newton_bond)
102         eval<1,1,1>(vflag, buffers, fc);
103       else
104         eval<1,1,0>(vflag, buffers, fc);
105     }
106   } else {
107     if (force->newton_bond)
108       eval<0,0,1>(vflag, buffers, fc);
109     else
110       eval<0,0,0>(vflag, buffers, fc);
111   }
112 }
113 
114 /* ---------------------------------------------------------------------- */
115 
116 template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t>
eval(const int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)117 void ImproperHarmonicIntel::eval(const int vflag,
118                                  IntelBuffers<flt_t,acc_t> *buffers,
119                                  const ForceConst<flt_t> &fc)
120 {
121   const int inum = neighbor->nimproperlist;
122   if (inum == 0) return;
123 
124   ATOM_T * _noalias const x = buffers->get_x(0);
125   const int nlocal = atom->nlocal;
126   const int nall = nlocal + atom->nghost;
127 
128   int f_stride;
129   if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
130   else f_stride = buffers->get_stride(nlocal);
131 
132   int tc;
133   FORCE_T * _noalias f_start;
134   acc_t * _noalias ev_global;
135   IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
136   const int nthreads = tc;
137 
138   acc_t oeimproper, ov0, ov1, ov2, ov3, ov4, ov5;
139   if (EFLAG) oeimproper = (acc_t)0.0;
140   if (VFLAG && vflag) {
141     ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
142   }
143 
144   #if defined(_OPENMP)
145   #pragma omp parallel LMP_DEFAULT_NONE \
146     shared(f_start,f_stride,fc) \
147     reduction(+:oeimproper,ov0,ov1,ov2,ov3,ov4,ov5)
148   #endif
149   {
150     int nfrom, npl, nto, tid;
151     #ifdef LMP_INTEL_USE_SIMDOFF
152     IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
153     #else
154     IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
155     #endif
156 
157     FORCE_T * _noalias const f = f_start + (tid * f_stride);
158     if (fix->need_zero(tid))
159       memset(f, 0, f_stride * sizeof(FORCE_T));
160 
161     const int5_t * _noalias const improperlist =
162       (int5_t *) neighbor->improperlist[0];
163 
164     #ifdef LMP_INTEL_USE_SIMDOFF
165     acc_t seimproper, sv0, sv1, sv2, sv3, sv4, sv5;
166     if (EFLAG) seimproper = (acc_t)0.0;
167     if (VFLAG && vflag) {
168       sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
169     }
170 #if defined(USE_OMP_SIMD)
171     #pragma omp simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
172 #else
173     #pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5)
174 #endif
175     for (int n = nfrom; n < nto; n++) {
176     #else
177     for (int n = nfrom; n < nto; n += npl) {
178     #endif
179       const int i1 = improperlist[n].a;
180       const int i2 = improperlist[n].b;
181       const int i3 = improperlist[n].c;
182       const int i4 = improperlist[n].d;
183       const int type = improperlist[n].t;
184 
185       // geometry of 4-body
186 
187       const flt_t vb1x = x[i1].x - x[i2].x;
188       const flt_t vb1y = x[i1].y - x[i2].y;
189       const flt_t vb1z = x[i1].z - x[i2].z;
190 
191       const flt_t vb2x = x[i3].x - x[i2].x;
192       const flt_t vb2y = x[i3].y - x[i2].y;
193       const flt_t vb2z = x[i3].z - x[i2].z;
194 
195       const flt_t vb3x = x[i4].x - x[i3].x;
196       const flt_t vb3y = x[i4].y - x[i3].y;
197       const flt_t vb3z = x[i4].z - x[i3].z;
198 
199       flt_t ss1 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
200       flt_t ss2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
201       flt_t ss3 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
202 
203       const flt_t r1 = (flt_t)1.0 / sqrt(ss1);
204       const flt_t r2 = (flt_t)1.0 / sqrt(ss2);
205       const flt_t r3 = (flt_t)1.0 / sqrt(ss3);
206 
207       ss1 = (flt_t)1.0 / ss1;
208       ss2 = (flt_t)1.0 / ss2;
209       ss3 = (flt_t)1.0 / ss3;
210 
211       // sin and cos of angle
212 
213       const flt_t c0 = (vb1x * vb3x + vb1y * vb3y + vb1z * vb3z) * r1 * r3;
214       const flt_t c1 = (vb1x * vb2x + vb1y * vb2y + vb1z * vb2z) * r1 * r2;
215       const flt_t c2 = -(vb3x * vb2x + vb3y * vb2y + vb3z * vb2z) * r3 * r2;
216 
217       flt_t s1 = 1.0 - c1*c1;
218       if (s1 < SMALL) s1 = SMALL;
219 
220       flt_t s2 = (flt_t)1.0 - c2*c2;
221       if (s2 < SMALL) s2 = SMALL;
222 
223       flt_t s12 = (flt_t)1.0 / sqrt(s1*s2);
224       s1 = (flt_t)1.0 / s1;
225       s2 = (flt_t)1.0 / s2;
226       flt_t c = (c1*c2 + c0) * s12;
227 
228       // error check
229       #ifndef LMP_INTEL_USE_SIMDOFF
230       if (c > PTOLERANCE || c < MTOLERANCE)
231         problem(FLERR, i1, i2, i3, i4);
232       #endif
233 
234       if (c > (flt_t)1.0) c = (flt_t)1.0;
235       if (c < (flt_t)-1.0) c = (flt_t)-1.0;
236 
237       const flt_t sd = (flt_t)1.0 - c * c;
238       flt_t s = (flt_t)1.0 / sqrt(sd);
239       if (sd < SMALL2) s = INVSMALL;
240 
241       // force & energy
242 
243       const flt_t domega = acos(c) - fc.fc[type].chi;
244       flt_t a;
245       a = fc.fc[type].k * domega;
246 
247       flt_t eimproper;
248       if (EFLAG) eimproper = a*domega;
249 
250       a = -a * (flt_t)2.0 * s;
251       c = c * a;
252       s12 = s12 * a;
253       const flt_t a11 = c*ss1*s1;
254       const flt_t a22 = -ss2 * ((flt_t)2.0*c0*s12 - c*(s1+s2));
255       const flt_t a33 = c*ss3*s2;
256       const flt_t a12 = -r1*r2*(c1*c*s1 + c2*s12);
257       const flt_t a13 = -r1*r3*s12;
258       const flt_t a23 = r2*r3*(c2*c*s2 + c1*s12);
259 
260       const flt_t sx2  = a22*vb2x + a23*vb3x + a12*vb1x;
261       const flt_t sy2  = a22*vb2y + a23*vb3y + a12*vb1y;
262       const flt_t sz2  = a22*vb2z + a23*vb3z + a12*vb1z;
263 
264       const flt_t f1x = a12*vb2x + a13*vb3x + a11*vb1x;
265       const flt_t f1y = a12*vb2y + a13*vb3y + a11*vb1y;
266       const flt_t f1z = a12*vb2z + a13*vb3z + a11*vb1z;
267 
268       const flt_t f2x = -sx2 - f1x;
269       const flt_t f2y = -sy2 - f1y;
270       const flt_t f2z = -sz2 - f1z;
271 
272       const flt_t f4x = a23*vb2x + a33*vb3x + a13*vb1x;
273       const flt_t f4y = a23*vb2y + a33*vb3y + a13*vb1y;
274       const flt_t f4z = a23*vb2z + a33*vb3z + a13*vb1z;
275 
276       const flt_t f3x = sx2 - f4x;
277       const flt_t f3y = sy2 - f4y;
278       const flt_t f3z = sz2 - f4z;
279 
280       // apply force to each of 4 atoms
281 
282       #ifdef LMP_INTEL_USE_SIMDOFF
283 #if defined(USE_OMP_SIMD)
284       #pragma omp ordered simd
285 #else
286       #pragma simdoff
287 #endif
288       #endif
289       {
290         if (NEWTON_BOND || i1 < nlocal) {
291           f[i1].x += f1x;
292           f[i1].y += f1y;
293           f[i1].z += f1z;
294         }
295 
296         if (NEWTON_BOND || i2 < nlocal) {
297           f[i2].x += f2x;
298           f[i2].y += f2y;
299           f[i2].z += f2z;
300         }
301 
302         if (NEWTON_BOND || i3 < nlocal) {
303           f[i3].x += f3x;
304           f[i3].y += f3y;
305           f[i3].z += f3z;
306         }
307 
308         if (NEWTON_BOND || i4 < nlocal) {
309           f[i4].x += f4x;
310           f[i4].y += f4y;
311           f[i4].z += f4z;
312         }
313       }
314 
315       if (EFLAG || VFLAG) {
316         #ifdef LMP_INTEL_USE_SIMDOFF
317         IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2,
318                               i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x,
319                               f4y, f4z, vb1x, vb1y, vb1z, vb2x, vb2y, vb2z,
320                               vb3x, vb3y, vb3z, seimproper, f, NEWTON_BOND,
321                               nlocal, sv0, sv1, sv2, sv3, sv4, sv5);
322         #else
323         IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2,
324                               i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x,
325                               f4y, f4z, vb1x, vb1y, vb1z, vb2x, vb2y, vb2z,
326                               vb3x, vb3y, vb3z, oeimproper, f, NEWTON_BOND,
327                               nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
328         #endif
329       }
330     } // for n
331     #ifdef LMP_INTEL_USE_SIMDOFF
332     if (EFLAG) oeimproper += seimproper;
333     if (VFLAG && vflag) {
334       ov0 += sv0; ov1 += sv1; ov2 += sv2;
335       ov3 += sv3; ov4 += sv4; ov5 += sv5;
336     }
337     #endif
338   } // omp parallel
339   if (EFLAG) energy += oeimproper;
340   if (VFLAG && vflag) {
341     virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
342     virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
343   }
344 
345   fix->set_reduce_flag();
346 }
347 
348 /* ---------------------------------------------------------------------- */
349 
350 void ImproperHarmonicIntel::init_style()
351 {
352   int ifix = modify->find_fix("package_intel");
353   if (ifix < 0)
354     error->all(FLERR,
355                "The 'package intel' command is required for /intel styles");
356   fix = static_cast<FixIntel *>(modify->fix[ifix]);
357 
358   #ifdef _LMP_INTEL_OFFLOAD
359   _use_base = 0;
360   if (fix->offload_balance() != 0.0) {
361     _use_base = 1;
362     return;
363   }
364   #endif
365 
366   fix->bond_init_check();
367 
368   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
369     pack_force_const(force_const_single, fix->get_mixed_buffers());
370   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
371     pack_force_const(force_const_double, fix->get_double_buffers());
372   else
373     pack_force_const(force_const_single, fix->get_single_buffers());
374 }
375 
376 /* ---------------------------------------------------------------------- */
377 
378 template <class flt_t, class acc_t>
379 void ImproperHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
380                                              IntelBuffers<flt_t,acc_t> * /*buffers*/)
381 {
382   const int bp1 = atom->nimpropertypes + 1;
383   fc.set_ntypes(bp1,memory);
384 
385   for (int i = 1; i < bp1; i++) {
386     fc.fc[i].k = k[i];
387     fc.fc[i].chi = chi[i];
388   }
389 }
390 
391 /* ---------------------------------------------------------------------- */
392 
393 template <class flt_t>
394 void ImproperHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nimproper,
395                                                           Memory *memory) {
396   if (nimproper != _nimpropertypes) {
397     if (_nimpropertypes > 0)
398       _memory->destroy(fc);
399 
400     if (nimproper > 0)
401       _memory->create(fc,nimproper,"improperharmonicintel.fc");
402   }
403   _nimpropertypes = nimproper;
404   _memory = memory;
405 }
406