1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: W. Michael Brown (Intel)
17 ------------------------------------------------------------------------- */
18 
19 #include "bond_harmonic_intel.h"
20 
21 #include "atom.h"
22 #include "comm.h"
23 #include "error.h"
24 #include "force.h"
25 #include "memory.h"
26 #include "modify.h"
27 #include "neighbor.h"
28 #include "suffix.h"
29 
30 #include <cmath>
31 #include <cstring>
32 
33 #include "omp_compat.h"
34 
35 using namespace LAMMPS_NS;
36 
37 typedef struct { int a,b,t;  } int3_t;
38 
39 /* ---------------------------------------------------------------------- */
40 
BondHarmonicIntel(LAMMPS * lmp)41 BondHarmonicIntel::BondHarmonicIntel(LAMMPS *lmp) : BondHarmonic(lmp)
42 {
43   suffix_flag |= Suffix::INTEL;
44 }
45 
46 /* ---------------------------------------------------------------------- */
47 
~BondHarmonicIntel()48 BondHarmonicIntel::~BondHarmonicIntel()
49 {
50 }
51 
52 /* ---------------------------------------------------------------------- */
53 
compute(int eflag,int vflag)54 void BondHarmonicIntel::compute(int eflag, int vflag)
55 {
56   #ifdef _LMP_INTEL_OFFLOAD
57   if (_use_base) {
58     BondHarmonic::compute(eflag, vflag);
59     return;
60   }
61   #endif
62 
63   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
64     compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
65                           force_const_single);
66   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
67     compute<double,double>(eflag, vflag, fix->get_double_buffers(),
68                            force_const_double);
69   else
70     compute<float,float>(eflag, vflag, fix->get_single_buffers(),
71                          force_const_single);
72 }
73 
74 /* ---------------------------------------------------------------------- */
75 
76 template <class flt_t, class acc_t>
compute(int eflag,int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)77 void BondHarmonicIntel::compute(int eflag, int vflag,
78                                 IntelBuffers<flt_t,acc_t> *buffers,
79                                 const ForceConst<flt_t> &fc)
80 {
81   ev_init(eflag,vflag);
82   if (vflag_atom)
83     error->all(FLERR,"INTEL package does not support per-atom stress");
84 
85   if (evflag) {
86     if (vflag && !eflag) {
87       if (force->newton_bond)
88         eval<0,1,1>(vflag, buffers, fc);
89       else
90         eval<0,1,0>(vflag, buffers, fc);
91     } else {
92       if (force->newton_bond)
93         eval<1,1,1>(vflag, buffers, fc);
94       else
95         eval<1,1,0>(vflag, buffers, fc);
96     }
97   } else {
98     if (force->newton_bond)
99       eval<0,0,1>(vflag, buffers, fc);
100     else
101       eval<0,0,0>(vflag, buffers, fc);
102   }
103 }
104 
105 template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t>
eval(const int vflag,IntelBuffers<flt_t,acc_t> * buffers,const ForceConst<flt_t> & fc)106 void BondHarmonicIntel::eval(const int vflag,
107                              IntelBuffers<flt_t,acc_t> *buffers,
108                              const ForceConst<flt_t> &fc)
109 {
110   const int inum = neighbor->nbondlist;
111   if (inum == 0) return;
112 
113   ATOM_T * _noalias const x = buffers->get_x(0);
114   const int nlocal = atom->nlocal;
115   const int nall = nlocal + atom->nghost;
116 
117   int f_stride;
118   if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
119   else f_stride = buffers->get_stride(nlocal);
120 
121   int tc;
122   FORCE_T * _noalias f_start;
123   acc_t * _noalias ev_global;
124   IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
125   const int nthreads = tc;
126 
127   acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5;
128   if (EFLAG) oebond = (acc_t)0.0;
129   if (VFLAG && vflag) {
130     ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
131   }
132 
133   #if defined(_OPENMP)
134   #pragma omp parallel LMP_DEFAULT_NONE \
135     shared(f_start,f_stride,fc)           \
136     reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5)
137   #endif
138   {
139     int nfrom, npl, nto, tid;
140     #ifdef LMP_INTEL_USE_SIMDOFF
141     IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
142     #else
143     IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads);
144     #endif
145 
146     FORCE_T * _noalias const f = f_start + (tid * f_stride);
147     if (fix->need_zero(tid))
148       memset(f, 0, f_stride * sizeof(FORCE_T));
149 
150     const int3_t * _noalias const bondlist =
151       (int3_t *) neighbor->bondlist[0];
152 
153     #ifdef LMP_INTEL_USE_SIMDOFF
154     acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5;
155     if (EFLAG) sebond = (acc_t)0.0;
156     if (VFLAG && vflag) {
157       sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
158     }
159 #if defined(USE_OMP_SIMD)
160     #pragma omp simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
161 #else
162     #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5)
163 #endif
164     for (int n = nfrom; n < nto; n ++) {
165     #else
166     for (int n = nfrom; n < nto; n += npl) {
167     #endif
168       const int i1 = bondlist[n].a;
169       const int i2 = bondlist[n].b;
170       const int type = bondlist[n].t;
171 
172       const flt_t delx = x[i1].x - x[i2].x;
173       const flt_t dely = x[i1].y - x[i2].y;
174       const flt_t delz = x[i1].z - x[i2].z;
175 
176       const flt_t rsq = delx*delx + dely*dely + delz*delz;
177       const flt_t r = sqrt(rsq);
178       const flt_t dr = r - fc.fc[type].r0;
179       const flt_t rk = fc.fc[type].k * dr;
180 
181       // force & energy
182 
183       flt_t fbond;
184       if (r > (flt_t)0.0) fbond = (flt_t)-2.0*rk/r;
185       else fbond = (flt_t)0.0;
186 
187       flt_t ebond;
188       if (EFLAG) ebond = rk*dr;
189 
190       // apply force to each of 2 atoms
191       #ifdef LMP_INTEL_USE_SIMDOFF
192 #if defined(USE_OMP_SIMD)
193       #pragma omp ordered simd
194 #else
195       #pragma simdoff
196 #endif
197       #endif
198       {
199         if (NEWTON_BOND || i1 < nlocal) {
200           f[i1].x += delx*fbond;
201           f[i1].y += dely*fbond;
202           f[i1].z += delz*fbond;
203         }
204 
205         if (NEWTON_BOND || i2 < nlocal) {
206           f[i2].x -= delx*fbond;
207           f[i2].y -= dely*fbond;
208           f[i2].z -= delz*fbond;
209         }
210       }
211 
212       if (EFLAG || VFLAG) {
213         #ifdef LMP_INTEL_USE_SIMDOFF
214         IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2,
215                              fbond, delx, dely, delz, sebond, f,
216                              NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3,
217                              sv4, sv5);
218         #else
219         IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2,
220                              fbond, delx, dely, delz, oebond, f,
221                              NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3,
222                              ov4, ov5);
223         #endif
224       }
225     } // for n
226     #ifdef LMP_INTEL_USE_SIMDOFF
227     if (EFLAG) oebond += sebond;
228     if (VFLAG && vflag) {
229        ov0 += sv0; ov1 += sv1; ov2 += sv2;
230        ov3 += sv3; ov4 += sv4; ov5 += sv5;
231     }
232     #endif
233   } // omp parallel
234 
235   if (EFLAG) energy += oebond;
236   if (VFLAG && vflag) {
237     virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
238     virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
239   }
240 
241   fix->set_reduce_flag();
242 }
243 
244 /* ---------------------------------------------------------------------- */
245 
246 void BondHarmonicIntel::init_style()
247 {
248   BondHarmonic::init_style();
249 
250   int ifix = modify->find_fix("package_intel");
251   if (ifix < 0)
252     error->all(FLERR,
253                "The 'package intel' command is required for /intel styles");
254   fix = static_cast<FixIntel *>(modify->fix[ifix]);
255 
256   #ifdef _LMP_INTEL_OFFLOAD
257   _use_base = 0;
258   if (fix->offload_balance() != 0.0) {
259     _use_base = 1;
260     return;
261   }
262   #endif
263 
264   fix->bond_init_check();
265 
266   if (fix->precision() == FixIntel::PREC_MODE_MIXED)
267     pack_force_const(force_const_single, fix->get_mixed_buffers());
268   else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
269     pack_force_const(force_const_double, fix->get_double_buffers());
270   else
271     pack_force_const(force_const_single, fix->get_single_buffers());
272 }
273 
274 /* ---------------------------------------------------------------------- */
275 
276 template <class flt_t, class acc_t>
277 void BondHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
278                                          IntelBuffers<flt_t,acc_t> * /*buffers*/)
279 {
280   const int bp1 = atom->nbondtypes + 1;
281   fc.set_ntypes(bp1,memory);
282 
283   for (int i = 1; i < bp1; i++) {
284     fc.fc[i].k = k[i];
285     fc.fc[i].r0 = r0[i];
286   }
287 }
288 
289 /* ---------------------------------------------------------------------- */
290 
291 template <class flt_t>
292 void BondHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes,
293                                                       Memory *memory) {
294   if (nbondtypes != _nbondtypes) {
295     if (_nbondtypes > 0)
296       _memory->destroy(fc);
297 
298     if (nbondtypes > 0)
299       _memory->create(fc,nbondtypes,"bondharmonicintel.fc");
300   }
301   _nbondtypes = nbondtypes;
302   _memory = memory;
303 }
304