1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Ray Shan (SNL) and Christian Trott (SNL)
17 ------------------------------------------------------------------------- */
18 
19 #include "pair_tersoff_kokkos.h"
20 #include <cmath>
21 #include "kokkos.h"
22 #include "atom_kokkos.h"
23 #include "comm.h"
24 #include "force.h"
25 #include "neighbor.h"
26 #include "neigh_request.h"
27 #include "neigh_list_kokkos.h"
28 #include "math_const.h"
29 #include "memory_kokkos.h"
30 #include "error.h"
31 #include "atom_masks.h"
32 #include "suffix.h"
33 
34 using namespace LAMMPS_NS;
35 using namespace MathConst;
36 
37 #define KOKKOS_CUDA_MAX_THREADS 256
38 #define KOKKOS_CUDA_MIN_BLOCKS 8
39 
40 /* ---------------------------------------------------------------------- */
41 
42 template<class DeviceType>
PairTersoffKokkos(LAMMPS * lmp)43 PairTersoffKokkos<DeviceType>::PairTersoffKokkos(LAMMPS *lmp) : PairTersoff(lmp)
44 {
45   respa_enable = 0;
46   suffix_flag |= Suffix::KOKKOS;
47 
48   kokkosable = 1;
49   atomKK = (AtomKokkos *) atom;
50   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
51   datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
52   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
53 }
54 
55 /* ---------------------------------------------------------------------- */
56 
57 template<class DeviceType>
~PairTersoffKokkos()58 PairTersoffKokkos<DeviceType>::~PairTersoffKokkos()
59 {
60   if (!copymode) {
61     memoryKK->destroy_kokkos(k_eatom,eatom);
62     memoryKK->destroy_kokkos(k_vatom,vatom);
63   }
64 }
65 
66 /* ---------------------------------------------------------------------- */
67 
68 template<class DeviceType>
allocate()69 void PairTersoffKokkos<DeviceType>::allocate()
70 {
71   PairTersoff::allocate();
72 
73   int n = atom->ntypes;
74 
75   k_params = Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType>
76           ("PairTersoff::paramskk",n+1,n+1,n+1);
77   paramskk = k_params.template view<DeviceType>();
78 }
79 
80 /* ----------------------------------------------------------------------
81    init specific to this pair style
82 ------------------------------------------------------------------------- */
83 
84 template<class DeviceType>
init_style()85 void PairTersoffKokkos<DeviceType>::init_style()
86 {
87   PairTersoff::init_style();
88 
89   // irequest = neigh request made by parent class
90 
91   neighflag = lmp->kokkos->neighflag;
92   int irequest = neighbor->nrequest - 1;
93 
94   neighbor->requests[irequest]->
95     kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
96     !std::is_same<DeviceType,LMPDeviceType>::value;
97   neighbor->requests[irequest]->
98     kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
99 
100   if (neighflag == FULL)
101     error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/kk");
102 
103   if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
104   //if (neighflag == FULL || neighflag == HALFTHREAD) {
105     neighbor->requests[irequest]->full = 1;
106     neighbor->requests[irequest]->half = 0;
107     if (neighflag == FULL)
108       neighbor->requests[irequest]->ghost = 1;
109     else
110       neighbor->requests[irequest]->ghost = 0;
111   } else {
112     error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/kk");
113   }
114 }
115 
116 /* ---------------------------------------------------------------------- */
117 
118 template<class DeviceType>
setup_params()119 void PairTersoffKokkos<DeviceType>::setup_params()
120 {
121   PairTersoff::setup_params();
122 
123   int i,j,k,m;
124   int n = atom->ntypes;
125 
126   for (i = 1; i <= n; i++)
127     for (j = 1; j <= n; j++)
128       for (k = 1; k <= n; k++) {
129         m = elem3param[map[i]][map[j]][map[k]];
130         k_params.h_view(i,j,k).powerm = params[m].powerm;
131         k_params.h_view(i,j,k).gamma = params[m].gamma;
132         k_params.h_view(i,j,k).lam3 = params[m].lam3;
133         k_params.h_view(i,j,k).c = params[m].c;
134         k_params.h_view(i,j,k).d = params[m].d;
135         k_params.h_view(i,j,k).h = params[m].h;
136         k_params.h_view(i,j,k).powern = params[m].powern;
137         k_params.h_view(i,j,k).beta = params[m].beta;
138         k_params.h_view(i,j,k).lam2 = params[m].lam2;
139         k_params.h_view(i,j,k).bigb = params[m].bigb;
140         k_params.h_view(i,j,k).bigr = params[m].bigr;
141         k_params.h_view(i,j,k).bigd = params[m].bigd;
142         k_params.h_view(i,j,k).lam1 = params[m].lam1;
143         k_params.h_view(i,j,k).biga = params[m].biga;
144         k_params.h_view(i,j,k).cutsq = params[m].cutsq;
145         k_params.h_view(i,j,k).c1 = params[m].c1;
146         k_params.h_view(i,j,k).c2 = params[m].c2;
147         k_params.h_view(i,j,k).c3 = params[m].c3;
148         k_params.h_view(i,j,k).c4 = params[m].c4;
149       }
150 
151   k_params.template modify<LMPHostType>();
152 
153 }
154 
155 /* ---------------------------------------------------------------------- */
156 
157 template<class DeviceType>
compute(int eflag_in,int vflag_in)158 void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
159 {
160   eflag = eflag_in;
161   vflag = vflag_in;
162 
163   if (neighflag == FULL) no_virial_fdotr_compute = 1;
164 
165   ev_init(eflag,vflag,0);
166 
167   // reallocate per-atom arrays if necessary
168 
169   if (eflag_atom) {
170     memoryKK->destroy_kokkos(k_eatom,eatom);
171     memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
172     d_eatom = k_eatom.view<DeviceType>();
173   }
174   if (vflag_atom) {
175     memoryKK->destroy_kokkos(k_vatom,vatom);
176     memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
177     d_vatom = k_vatom.view<DeviceType>();
178   }
179 
180   atomKK->sync(execution_space,datamask_read);
181   k_params.template sync<DeviceType>();
182   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
183   else atomKK->modified(execution_space,F_MASK);
184 
185   x = atomKK->k_x.view<DeviceType>();
186   f = atomKK->k_f.view<DeviceType>();
187   type = atomKK->k_type.view<DeviceType>();
188   tag = atomKK->k_tag.view<DeviceType>();
189   nlocal = atom->nlocal;
190   nall = atom->nlocal + atom->nghost;
191   newton_pair = force->newton_pair;
192 
193   inum = list->inum;
194   const int ignum = inum + list->gnum;
195   NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
196   d_numneigh = k_list->d_numneigh;
197   d_neighbors = k_list->d_neighbors;
198   d_ilist = k_list->d_ilist;
199 
200   need_dup = lmp->kokkos->need_dup<DeviceType>();
201   if (need_dup) {
202     dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
203     dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
204     dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
205   } else {
206     ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
207     ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
208     ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
209   }
210 
211   copymode = 1;
212 
213   EV_FLOAT ev;
214   EV_FLOAT ev_all;
215 
216   // build short neighbor list
217 
218   int max_neighs = d_neighbors.extent(1);
219 
220   if (((int)d_neighbors_short.extent(1) != max_neighs) ||
221      ((int)d_neighbors_short.extent(0) != ignum)) {
222     d_neighbors_short = Kokkos::View<int**,DeviceType>("Tersoff::neighbors_short",ignum,max_neighs);
223   }
224   if ((int)d_numneigh_short.extent(0)!=ignum)
225     d_numneigh_short = Kokkos::View<int*,DeviceType>("Tersoff::numneighs_short",ignum);
226   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairTersoffComputeShortNeigh>(0,neighflag==FULL?ignum:inum), *this);
227 
228   if (neighflag == HALF) {
229     if (evflag)
230       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,1> >(0,inum),*this,ev);
231     else
232       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,0> >(0,inum),*this);
233     ev_all += ev;
234   } else if (neighflag == HALFTHREAD) {
235     if (evflag)
236       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
237     else
238       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,0> >(0,inum),*this);
239     ev_all += ev;
240   } else if (neighflag == FULL) {
241     if (evflag)
242       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,1> >(0,inum),*this,ev);
243     else
244       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,0> >(0,inum),*this);
245     ev_all += ev;
246 
247     if (evflag)
248       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,1> >(0,ignum),*this,ev);
249     else
250       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,0> >(0,ignum),*this);
251     ev_all += ev;
252   }
253 
254   if (need_dup)
255     Kokkos::Experimental::contribute(f, dup_f);
256 
257   if (eflag_global) eng_vdwl += ev_all.evdwl;
258   if (vflag_global) {
259     virial[0] += ev_all.v[0];
260     virial[1] += ev_all.v[1];
261     virial[2] += ev_all.v[2];
262     virial[3] += ev_all.v[3];
263     virial[4] += ev_all.v[4];
264     virial[5] += ev_all.v[5];
265   }
266 
267   if (eflag_atom) {
268     if (need_dup)
269       Kokkos::Experimental::contribute(d_eatom, dup_eatom);
270     k_eatom.template modify<DeviceType>();
271     k_eatom.template sync<LMPHostType>();
272   }
273 
274   if (vflag_atom) {
275     if (need_dup)
276       Kokkos::Experimental::contribute(d_vatom, dup_vatom);
277     k_vatom.template modify<DeviceType>();
278     k_vatom.template sync<LMPHostType>();
279   }
280 
281   if (vflag_fdotr) pair_virial_fdotr_compute(this);
282 
283   copymode = 0;
284 
285   // free duplicated memory
286   if (need_dup) {
287     dup_f     = decltype(dup_f)();
288     dup_eatom = decltype(dup_eatom)();
289     dup_vatom = decltype(dup_vatom)();
290   }
291 }
292 
293 /* ---------------------------------------------------------------------- */
294 
295 template<class DeviceType>
296 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeShortNeigh,const int & ii) const297 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeShortNeigh, const int& ii) const {
298     const int i = d_ilist[ii];
299     const X_FLOAT xtmp = x(i,0);
300     const X_FLOAT ytmp = x(i,1);
301     const X_FLOAT ztmp = x(i,2);
302 
303     const int jnum = d_numneigh[i];
304     int inside = 0;
305     for (int jj = 0; jj < jnum; jj++) {
306       int j = d_neighbors(i,jj);
307       j &= NEIGHMASK;
308 
309       const X_FLOAT delx = xtmp - x(j,0);
310       const X_FLOAT dely = ytmp - x(j,1);
311       const X_FLOAT delz = ztmp - x(j,2);
312       const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
313 
314       if (rsq < cutmax*cutmax) {
315         d_neighbors_short(i,inside) = j;
316         inside++;
317       }
318     }
319     d_numneigh_short(i) = inside;
320 }
321 
322 /* ---------------------------------------------------------------------- */
323 
324 template<class DeviceType>
325 template<int NEIGHFLAG, int EVFLAG>
326 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const327 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
328 
329   // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
330 
331   auto v_f = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
332   auto a_f = v_f.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
333 
334   const int i = d_ilist[ii];
335   if (i >= nlocal) return;
336   const X_FLOAT xtmp = x(i,0);
337   const X_FLOAT ytmp = x(i,1);
338   const X_FLOAT ztmp = x(i,2);
339   const int itype = type(i);
340   const tagint itag = tag(i);
341 
342   F_FLOAT fi[3], fj[3], fk[3];
343 
344   //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
345   const int jnum = d_numneigh_short[i];
346 
347   // repulsive
348 
349   F_FLOAT f_x = 0.0;
350   F_FLOAT f_y = 0.0;
351   F_FLOAT f_z = 0.0;
352 
353   for (int jj = 0; jj < jnum; jj++) {
354     int j = d_neighbors_short(i,jj);
355     j &= NEIGHMASK;
356     const int jtype = type(j);
357     const tagint jtag = tag(j);
358 
359     if (itag > jtag) {
360       if ((itag+jtag) % 2 == 0) continue;
361     } else if (itag < jtag) {
362       if ((itag+jtag) % 2 == 1) continue;
363     } else {
364       if (x(j,2)  < ztmp) continue;
365       if (x(j,2) == ztmp && x(j,1)  < ytmp) continue;
366       if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
367     }
368 
369     const X_FLOAT delx = xtmp - x(j,0);
370     const X_FLOAT dely = ytmp - x(j,1);
371     const X_FLOAT delz = ztmp - x(j,2);
372     const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
373     const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
374 
375     if (rsq > cutsq) continue;
376 
377     const F_FLOAT r = sqrt(rsq);
378     const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
379     const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
380     const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
381     const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp *
382                           (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
383     const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
384 
385     f_x += delx*frep;
386     f_y += dely*frep;
387     f_z += delz*frep;
388     a_f(j,0) -= delx*frep;
389     a_f(j,1) -= dely*frep;
390     a_f(j,2) -= delz*frep;
391 
392     if (EVFLAG) {
393       if (eflag) ev.evdwl += eng;
394       if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
395     }
396   }
397 
398   // attractive: bond order
399 
400   for (int jj = 0; jj < jnum; jj++) {
401     int j = d_neighbors_short(i,jj);
402     j &= NEIGHMASK;
403     const int jtype = type(j);
404 
405     const F_FLOAT delx1 = xtmp - x(j,0);
406     const F_FLOAT dely1 = ytmp - x(j,1);
407     const F_FLOAT delz1 = ztmp - x(j,2);
408     const F_FLOAT rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
409     const F_FLOAT cutsq1 = paramskk(itype,jtype,jtype).cutsq;
410 
411     F_FLOAT bo_ij = 0.0;
412     if (rsq1 > cutsq1) continue;
413     const F_FLOAT rij = sqrt(rsq1);
414 
415     for (int kk = 0; kk < jnum; kk++) {
416       if (jj == kk) continue;
417       int k = d_neighbors_short(i,kk);
418       k &= NEIGHMASK;
419       const int ktype = type(k);
420 
421       const F_FLOAT delx2 = xtmp - x(k,0);
422       const F_FLOAT dely2 = ytmp - x(k,1);
423       const F_FLOAT delz2 = ztmp - x(k,2);
424       const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
425       const F_FLOAT cutsq2 = paramskk(itype,jtype,ktype).cutsq;
426 
427       if (rsq2 > cutsq2) continue;
428       const F_FLOAT rik = sqrt(rsq2);
429       bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
430     }
431 
432     // attractive: pairwise potential and force
433 
434     const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
435     const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
436     const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
437     const F_FLOAT fatt = -0.5*bij * dfa / rij;
438     const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
439 
440     f_x += delx1*fatt;
441     f_y += dely1*fatt;
442     f_z += delz1*fatt;
443     F_FLOAT fj_x = -delx1*fatt;
444     F_FLOAT fj_y = -dely1*fatt;
445     F_FLOAT fj_z = -delz1*fatt;
446 
447     if (EVFLAG) {
448       const F_FLOAT eng = 0.5*bij * fa;
449       if (eflag) ev.evdwl += eng;
450       if (vflag_either || eflag_atom)
451         this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
452     }
453 
454     // attractive: three-body force
455 
456     for (int kk = 0; kk < jnum; kk++) {
457       if (jj == kk) continue;
458       int k = d_neighbors_short(i,kk);
459       k &= NEIGHMASK;
460       const int ktype = type(k);
461 
462       const F_FLOAT delx2 = xtmp - x(k,0);
463       const F_FLOAT dely2 = ytmp - x(k,1);
464       const F_FLOAT delz2 = ztmp - x(k,2);
465       const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
466       const F_FLOAT cutsq2 = paramskk(itype,jtype,ktype).cutsq;
467 
468       if (rsq2 > cutsq2) continue;
469       const F_FLOAT rik = sqrt(rsq2);
470       ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
471                 rik,delx2,dely2,delz2,fi,fj,fk);
472 
473       f_x += fi[0];
474       f_y += fi[1];
475       f_z += fi[2];
476       fj_x += fj[0];
477       fj_y += fj[1];
478       fj_z += fj[2];
479       a_f(k,0) += fk[0];
480       a_f(k,1) += fk[1];
481       a_f(k,2) += fk[2];
482 
483       if (vflag_atom) {
484         F_FLOAT delrij[3], delrik[3];
485         delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
486         delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
487         if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
488       }
489     }
490     a_f(j,0) += fj_x;
491     a_f(j,1) += fj_y;
492     a_f(j,2) += fj_z;
493   }
494   a_f(i,0) += f_x;
495   a_f(i,1) += f_y;
496   a_f(i,2) += f_z;
497 }
498 
499 template<class DeviceType>
500 template<int NEIGHFLAG, int EVFLAG>
501 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>,const int & ii) const502 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
503   EV_FLOAT ev;
504   this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
505 }
506 
507 /* ---------------------------------------------------------------------- */
508 
509 template<class DeviceType>
510 template<int NEIGHFLAG, int EVFLAG>
511 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const512 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
513 
514   const int i = d_ilist[ii];
515   const X_FLOAT xtmp = x(i,0);
516   const X_FLOAT ytmp = x(i,1);
517   const X_FLOAT ztmp = x(i,2);
518   const int itype = type(i);
519 
520   int j,k,jj,kk,jtype,ktype;
521   F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
522   F_FLOAT fi[3], fj[3], fk[3];
523   X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
524 
525   //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
526   const int jnum = d_numneigh_short[i];
527 
528   // repulsive
529 
530   F_FLOAT f_x = 0.0;
531   F_FLOAT f_y = 0.0;
532   F_FLOAT f_z = 0.0;
533   for (jj = 0; jj < jnum; jj++) {
534     j = d_neighbors_short(i,jj);
535     j &= NEIGHMASK;
536     const int jtype = type(j);
537 
538     const X_FLOAT delx = xtmp - x(j,0);
539     const X_FLOAT dely = ytmp - x(j,1);
540     const X_FLOAT delz = ztmp - x(j,2);
541     const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
542     const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
543 
544     if (rsq > cutsq) continue;
545 
546     const F_FLOAT r = sqrt(rsq);
547     const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
548     const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
549     const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
550     const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp *
551                           (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
552     const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
553 
554     f_x += delx*frep;
555     f_y += dely*frep;
556     f_z += delz*frep;
557 
558     if (EVFLAG) {
559       if (eflag)
560         ev.evdwl += 0.5*eng;
561       if (vflag_either || eflag_atom)
562         this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
563     }
564   }
565 
566   // attractive: bond order
567 
568   for (jj = 0; jj < jnum; jj++) {
569     j = d_neighbors_short(i,jj);
570     j &= NEIGHMASK;
571     jtype = type(j);
572 
573     delx1 = xtmp - x(j,0);
574     dely1 = ytmp - x(j,1);
575     delz1 = ztmp - x(j,2);
576     rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
577     cutsq1 = paramskk(itype,jtype,jtype).cutsq;
578 
579     bo_ij = 0.0;
580     if (rsq1 > cutsq1) continue;
581     rij = sqrt(rsq1);
582 
583     for (kk = 0; kk < jnum; kk++) {
584       if (jj == kk) continue;
585       k = d_neighbors_short(i,kk);
586       k &= NEIGHMASK;
587       ktype = type(k);
588 
589       delx2 = xtmp - x(k,0);
590       dely2 = ytmp - x(k,1);
591       delz2 = ztmp - x(k,2);
592       rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
593       cutsq2 = paramskk(itype,jtype,ktype).cutsq;
594 
595       if (rsq2 > cutsq2) continue;
596       rik = sqrt(rsq2);
597       bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
598     }
599 
600     // attractive: pairwise potential and force
601 
602     const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
603     const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
604     const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
605     const F_FLOAT fatt = -0.5*bij * dfa / rij;
606     const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
607     const F_FLOAT eng = 0.5*bij * fa;
608 
609     f_x += delx1*fatt;
610     f_y += dely1*fatt;
611     f_z += delz1*fatt;
612 
613     if (EVFLAG) {
614       if (eflag) ev.evdwl += 0.5*eng;
615       if (vflag_either || eflag_atom)
616         this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
617     }
618 
619     // attractive: three-body force
620 
621     for (kk = 0; kk < jnum; kk++) {
622       if (jj == kk) continue;
623       k = d_neighbors_short(i,kk);
624       k &= NEIGHMASK;
625       ktype = type(k);
626 
627       delx2 = xtmp - x(k,0);
628       dely2 = ytmp - x(k,1);
629       delz2 = ztmp - x(k,2);
630       rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
631       cutsq2 = paramskk(itype,jtype,ktype).cutsq;
632 
633       if (rsq2 > cutsq2) continue;
634       rik = sqrt(rsq2);
635       ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
636                 rik,delx2,dely2,delz2,fi,fj,fk);
637 
638       f_x += fi[0];
639       f_y += fi[1];
640       f_z += fi[2];
641 
642       if (vflag_atom) {
643         F_FLOAT delrij[3], delrik[3];
644         delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
645         delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
646         if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
647       }
648     }
649   }
650   f(i,0) += f_x;
651   f(i,1) += f_y;
652   f(i,2) += f_z;
653 }
654 
655 template<class DeviceType>
656 template<int NEIGHFLAG, int EVFLAG>
657 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>,const int & ii) const658 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
659   EV_FLOAT ev;
660   this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
661 }
662 
663 /* ---------------------------------------------------------------------- */
664 
665 template<class DeviceType>
666 template<int NEIGHFLAG, int EVFLAG>
667 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const668 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
669 
670   const int i = d_ilist[ii];
671   const X_FLOAT xtmp = x(i,0);
672   const X_FLOAT ytmp = x(i,1);
673   const X_FLOAT ztmp = x(i,2);
674   const int itype = type(i);
675 
676   int j,k,jj,kk,jtype,ktype,j_jnum;
677   F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
678   F_FLOAT fj[3], fk[3];
679   X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
680 
681   const int jnum = d_numneigh_short[i];
682 
683   F_FLOAT f_x = 0.0;
684   F_FLOAT f_y = 0.0;
685   F_FLOAT f_z = 0.0;
686 
687   // attractive: bond order
688 
689   for (jj = 0; jj < jnum; jj++) {
690     j = d_neighbors_short(i,jj);
691     j &= NEIGHMASK;
692     if (j >= nlocal) continue;
693     jtype = type(j);
694 
695     delx1 = x(j,0) - xtmp;
696     dely1 = x(j,1) - ytmp;
697     delz1 = x(j,2) - ztmp;
698     rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
699     cutsq1 = paramskk(jtype,itype,itype).cutsq;
700 
701     bo_ij = 0.0;
702     if (rsq1 > cutsq1) continue;
703     rij = sqrt(rsq1);
704 
705     j_jnum = d_numneigh_short[j];
706 
707     for (kk = 0; kk < j_jnum; kk++) {
708       k = d_neighbors_short(j,kk);
709       if (k == i) continue;
710       k &= NEIGHMASK;
711       ktype = type(k);
712 
713       delx2 = x(j,0) - x(k,0);
714       dely2 = x(j,1) - x(k,1);
715       delz2 = x(j,2) - x(k,2);
716       rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
717       cutsq2 = paramskk(jtype,itype,ktype).cutsq;
718 
719       if (rsq2 > cutsq2) continue;
720       rik = sqrt(rsq2);
721       bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
722 
723     }
724 
725     // attractive: pairwise potential and force
726 
727     const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij);
728     const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij);
729     const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij);
730     const F_FLOAT fatt = -0.5*bij * dfa / rij;
731     const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij);
732     const F_FLOAT eng = 0.5*bij * fa;
733 
734     f_x -= delx1*fatt;
735     f_y -= dely1*fatt;
736     f_z -= delz1*fatt;
737 
738     if (EVFLAG) {
739       if (eflag)
740         ev.evdwl += 0.5 * eng;
741       if (vflag_either || eflag_atom)
742         this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
743     }
744 
745     // attractive: three-body force
746 
747     for (kk = 0; kk < j_jnum; kk++) {
748       k = d_neighbors_short(j,kk);
749       if (k == i) continue;
750       k &= NEIGHMASK;
751       ktype = type(k);
752 
753       delx2 = x(j,0) - x(k,0);
754       dely2 = x(j,1) - x(k,1);
755       delz2 = x(j,2) - x(k,2);
756       rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
757       cutsq2 = paramskk(jtype,itype,ktype).cutsq;
758 
759       if (rsq2 > cutsq2) continue;
760       rik = sqrt(rsq2);
761       ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1,
762                 rik,delx2,dely2,delz2,fj,fk);
763       f_x += fj[0];
764       f_y += fj[1];
765       f_z += fj[2];
766 
767       if (vflag_atom) {
768         F_FLOAT delrji[3], delrjk[3];
769         delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1;
770         delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2;
771         if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk);
772       }
773 
774       const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik);
775       const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij);
776       ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2,
777                 rij,delx1,dely1,delz1,fk);
778       f_x += fk[0];
779       f_y += fk[1];
780       f_z += fk[2];
781     }
782   }
783   f(i,0) += f_x;
784   f(i,1) += f_y;
785   f(i,2) += f_z;
786 }
787 
788 template<class DeviceType>
789 template<int NEIGHFLAG, int EVFLAG>
790 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>,const int & ii) const791 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
792   EV_FLOAT ev;
793   this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
794 }
795 
796 /* ---------------------------------------------------------------------- */
797 
798 template<class DeviceType>
799 KOKKOS_INLINE_FUNCTION
ters_fc_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const800 double PairTersoffKokkos<DeviceType>::ters_fc_k(const int &i, const int &j,
801                 const int &k, const F_FLOAT &r) const
802 {
803   const F_FLOAT ters_R = paramskk(i,j,k).bigr;
804   const F_FLOAT ters_D = paramskk(i,j,k).bigd;
805 
806   if (r < ters_R-ters_D) return 1.0;
807   if (r > ters_R+ters_D) return 0.0;
808   return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D));
809 }
810 
811 /* ---------------------------------------------------------------------- */
812 
813 template<class DeviceType>
814 KOKKOS_INLINE_FUNCTION
ters_dfc(const int & i,const int & j,const int & k,const F_FLOAT & r) const815 double PairTersoffKokkos<DeviceType>::ters_dfc(const int &i, const int &j,
816                 const int &k, const F_FLOAT &r) const
817 {
818   const F_FLOAT ters_R = paramskk(i,j,k).bigr;
819   const F_FLOAT ters_D = paramskk(i,j,k).bigd;
820 
821   if (r < ters_R-ters_D) return 0.0;
822   if (r > ters_R+ters_D) return 0.0;
823   return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D);
824 }
825 
826 /* ---------------------------------------------------------------------- */
827 
828 template<class DeviceType>
829 KOKKOS_INLINE_FUNCTION
bondorder(const int & i,const int & j,const int & k,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2) const830 double PairTersoffKokkos<DeviceType>::bondorder(const int &i, const int &j, const int &k,
831         const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
832         const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const
833 {
834   F_FLOAT arg, ex_delr;
835 
836   const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik);
837 
838   const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
839   if (int(paramskk(i,j,k).powerm) == 3) arg = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
840   else arg = param;
841 
842   if (arg > 69.0776) ex_delr = 1.e30;
843   else if (arg < -69.0776) ex_delr = 0.0;
844   else ex_delr = exp(arg);
845 
846   return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr;
847 }
848 
849 /* ---------------------------------------------------------------------- */
850 
851 template<class DeviceType>
852 KOKKOS_INLINE_FUNCTION
853 double PairTersoffKokkos<DeviceType>::
ters_gijk(const int & i,const int & j,const int & k,const F_FLOAT & cos) const854         ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
855 {
856   const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
857   const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
858   const F_FLOAT hcth = paramskk(i,j,k).h - cos;
859 
860   return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth));
861 }
862 
863 /* ---------------------------------------------------------------------- */
864 
865 template<class DeviceType>
866 KOKKOS_INLINE_FUNCTION
867 double PairTersoffKokkos<DeviceType>::
ters_dgijk(const int & i,const int & j,const int & k,const F_FLOAT & cos) const868         ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
869 {
870   const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
871   const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
872   const F_FLOAT hcth = paramskk(i,j,k).h - cos;
873   const F_FLOAT numerator = -2.0 * ters_c * hcth;
874   const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth);
875   return paramskk(i,j,k).gamma * numerator * denominator * denominator;
876 }
877 
878 /* ---------------------------------------------------------------------- */
879 
880 template<class DeviceType>
881 KOKKOS_INLINE_FUNCTION
ters_fa_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const882 double PairTersoffKokkos<DeviceType>::ters_fa_k(const int &i, const int &j,
883                 const int &k, const F_FLOAT &r) const
884 {
885   if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
886   return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r)
887           * ters_fc_k(i,j,k,r);
888 }
889 
890 /* ---------------------------------------------------------------------- */
891 
892 template<class DeviceType>
893 KOKKOS_INLINE_FUNCTION
ters_dfa(const int & i,const int & j,const int & k,const F_FLOAT & r) const894 double PairTersoffKokkos<DeviceType>::ters_dfa(const int &i, const int &j,
895                 const int &k, const F_FLOAT &r) const
896 {
897   if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
898   return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) *
899     (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) - ters_dfc(i,j,k,r));
900 }
901 
902 /* ---------------------------------------------------------------------- */
903 
904 template<class DeviceType>
905 KOKKOS_INLINE_FUNCTION
ters_bij_k(const int & i,const int & j,const int & k,const F_FLOAT & bo) const906 double PairTersoffKokkos<DeviceType>::ters_bij_k(const int &i, const int &j,
907                 const int &k, const F_FLOAT &bo) const
908 {
909   const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
910   if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp);
911   if (tmp > paramskk(i,j,k).c2)
912     return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp);
913   if (tmp < paramskk(i,j,k).c4) return 1.0;
914   if (tmp < paramskk(i,j,k).c3)
915     return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern);
916   return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern));
917 }
918 
919 /* ---------------------------------------------------------------------- */
920 
921 template<class DeviceType>
922 KOKKOS_INLINE_FUNCTION
ters_dbij(const int & i,const int & j,const int & k,const F_FLOAT & bo) const923 double PairTersoffKokkos<DeviceType>::ters_dbij(const int &i, const int &j,
924                 const int &k, const F_FLOAT &bo) const
925 {
926   const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
927   if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5/sqrt(tmp*tmp);//*pow(tmp,-1.5);
928   if (tmp > paramskk(i,j,k).c2)
929     return paramskk(i,j,k).beta * (-0.5/sqrt(tmp*tmp) * //*pow(tmp,-1.5) *
930            (1.0 - 0.5*(1.0 +  1.0/(2.0*paramskk(i,j,k).powern)) *
931            pow(tmp,-paramskk(i,j,k).powern)));
932   if (tmp < paramskk(i,j,k).c4) return 0.0;
933   if (tmp < paramskk(i,j,k).c3)
934     return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0);
935 
936   const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern);
937   return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo;
938 }
939 
940 /* ---------------------------------------------------------------------- */
941 
942 template<class DeviceType>
943 KOKKOS_INLINE_FUNCTION
ters_dthb(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fi,F_FLOAT * fj,F_FLOAT * fk) const944 void PairTersoffKokkos<DeviceType>::ters_dthb(
945         const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
946         const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
947         const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
948         F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const
949 {
950   // from PairTersoff::attractive
951   F_FLOAT rij_hat[3],rik_hat[3];
952   F_FLOAT rijinv,rikinv;
953   F_FLOAT delrij[3], delrik[3];
954 
955   delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
956   delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
957 
958   //rij = sqrt(rsq1);
959   rijinv = 1.0/rij;
960   vec3_scale(rijinv,delrij,rij_hat);
961 
962   //rik = sqrt(rsq2);
963   rikinv = 1.0/rik;
964   vec3_scale(rikinv,delrik,rik_hat);
965 
966   // from PairTersoff::ters_zetaterm_d
967   F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
968   F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
969 
970   fc = ters_fc_k(i,j,k,rik);
971   dfc = ters_dfc(i,j,k,rik);
972   const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
973   if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
974   else tmp = param;
975 
976   if (tmp > 69.0776) ex_delr = 1.e30;
977   else if (tmp < -69.0776) ex_delr = 0.0;
978   else ex_delr = exp(tmp);
979 
980   if (int(paramskk(i,j,k).powerm) == 3)
981     dex_delr = 3.0*param*param*paramskk(i,j,k).lam3*ex_delr;//pow(rij-rik,2.0)*ex_delr;
982   else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
983 
984   cos = vec3_dot(rij_hat,rik_hat);
985   gijk = ters_gijk(i,j,k,cos);
986   dgijk = ters_dgijk(i,j,k,cos);
987 
988   // from PairTersoff::costheta_d
989   vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
990   vec3_scale(rijinv,dcosfj,dcosfj);
991   vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
992   vec3_scale(rikinv,dcosfk,dcosfk);
993   vec3_add(dcosfj,dcosfk,dcosfi);
994   vec3_scale(-1.0,dcosfi,dcosfi);
995 
996   vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi);
997   vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi);
998   vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi);
999   vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi);
1000   vec3_scale(prefactor,fi,fi);
1001 
1002   vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
1003   vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
1004   vec3_scale(prefactor,fj,fj);
1005 
1006   vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1007   vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1008   vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1009   vec3_scale(prefactor,fk,fk);
1010 
1011 }
1012 
1013 /* ---------------------------------------------------------------------- */
1014 
1015 template<class DeviceType>
1016 KOKKOS_INLINE_FUNCTION
ters_dthbj(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fj,F_FLOAT * fk) const1017 void PairTersoffKokkos<DeviceType>::ters_dthbj(
1018         const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
1019         const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
1020         const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
1021         F_FLOAT *fj, F_FLOAT *fk) const
1022 {
1023   F_FLOAT rij_hat[3],rik_hat[3];
1024   F_FLOAT rijinv,rikinv;
1025   F_FLOAT delrij[3], delrik[3];
1026 
1027   delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
1028   delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
1029 
1030   rijinv = 1.0/rij;
1031   vec3_scale(rijinv,delrij,rij_hat);
1032 
1033   rikinv = 1.0/rik;
1034   vec3_scale(rikinv,delrik,rik_hat);
1035 
1036   F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
1037   F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
1038 
1039   fc = ters_fc_k(i,j,k,rik);
1040   dfc = ters_dfc(i,j,k,rik);
1041   const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
1042   if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
1043   else tmp = param;//paramskk(i,j,k).lam3 * (rij-rik);
1044 
1045   if (tmp > 69.0776) ex_delr = 1.e30;
1046   else if (tmp < -69.0776) ex_delr = 0.0;
1047   else ex_delr = exp(tmp);
1048 
1049   if (int(paramskk(i,j,k).powerm) == 3)
1050     dex_delr = 3.0*param*param*paramskk(i,j,k).lam3*ex_delr;//pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
1051   else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
1052 
1053   cos = vec3_dot(rij_hat,rik_hat);
1054   gijk = ters_gijk(i,j,k,cos);
1055   dgijk = ters_dgijk(i,j,k,cos);
1056 
1057   vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
1058   vec3_scale(rijinv,dcosfj,dcosfj);
1059   vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
1060   vec3_scale(rikinv,dcosfk,dcosfk);
1061   vec3_add(dcosfj,dcosfk,dcosfi);
1062   vec3_scale(-1.0,dcosfi,dcosfi);
1063 
1064   vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
1065   vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
1066   vec3_scale(prefactor,fj,fj);
1067 
1068   vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1069   vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1070   vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1071   vec3_scale(prefactor,fk,fk);
1072 
1073 }
1074 
1075 /* ---------------------------------------------------------------------- */
1076 
1077 template<class DeviceType>
1078 KOKKOS_INLINE_FUNCTION
ters_dthbk(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fk) const1079 void PairTersoffKokkos<DeviceType>::ters_dthbk(
1080         const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
1081         const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
1082         const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
1083         F_FLOAT *fk) const
1084 {
1085   F_FLOAT rij_hat[3],rik_hat[3];
1086   F_FLOAT rijinv,rikinv;
1087   F_FLOAT delrij[3], delrik[3];
1088 
1089   delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
1090   delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
1091 
1092   rijinv = 1.0/rij;
1093   vec3_scale(rijinv,delrij,rij_hat);
1094 
1095   rikinv = 1.0/rik;
1096   vec3_scale(rikinv,delrik,rik_hat);
1097 
1098   F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
1099   F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
1100 
1101   fc = ters_fc_k(i,j,k,rik);
1102   dfc = ters_dfc(i,j,k,rik);
1103   const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
1104   if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
1105   else tmp = param;//paramskk(i,j,k).lam3 * (rij-rik);
1106 
1107   if (tmp > 69.0776) ex_delr = 1.e30;
1108   else if (tmp < -69.0776) ex_delr = 0.0;
1109   else ex_delr = exp(tmp);
1110 
1111   if (int(paramskk(i,j,k).powerm) == 3)
1112     dex_delr = 3.0*param*param*paramskk(i,j,k).lam3*ex_delr;//pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
1113   else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
1114 
1115   cos = vec3_dot(rij_hat,rik_hat);
1116   gijk = ters_gijk(i,j,k,cos);
1117   dgijk = ters_dgijk(i,j,k,cos);
1118 
1119   vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
1120   vec3_scale(rijinv,dcosfj,dcosfj);
1121   vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
1122   vec3_scale(rikinv,dcosfk,dcosfk);
1123   vec3_add(dcosfj,dcosfk,dcosfi);
1124   vec3_scale(-1.0,dcosfi,dcosfi);
1125 
1126   vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1127   vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1128   vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1129   vec3_scale(prefactor,fk,fk);
1130 
1131 }
1132 
1133 /* ---------------------------------------------------------------------- */
1134 
1135 template<class DeviceType>
1136 template<int NEIGHFLAG>
1137 KOKKOS_INLINE_FUNCTION
ev_tally(EV_FLOAT & ev,const int & i,const int & j,const F_FLOAT & epair,const F_FLOAT & fpair,const F_FLOAT & delx,const F_FLOAT & dely,const F_FLOAT & delz) const1138 void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
1139       const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
1140                 const F_FLOAT &dely, const F_FLOAT &delz) const
1141 {
1142   const int VFLAG = vflag_either;
1143 
1144   // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
1145 
1146   auto v_eatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
1147   auto a_eatom = v_eatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1148 
1149   auto v_vatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
1150   auto a_vatom = v_vatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1151 
1152   if (eflag_atom) {
1153     const E_FLOAT epairhalf = 0.5 * epair;
1154     a_eatom[i] += epairhalf;
1155     if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
1156   }
1157 
1158   if (VFLAG) {
1159     const E_FLOAT v0 = delx*delx*fpair;
1160     const E_FLOAT v1 = dely*dely*fpair;
1161     const E_FLOAT v2 = delz*delz*fpair;
1162     const E_FLOAT v3 = delx*dely*fpair;
1163     const E_FLOAT v4 = delx*delz*fpair;
1164     const E_FLOAT v5 = dely*delz*fpair;
1165 
1166     if (vflag_global) {
1167       if (NEIGHFLAG != FULL) {
1168         ev.v[0] += v0;
1169         ev.v[1] += v1;
1170         ev.v[2] += v2;
1171         ev.v[3] += v3;
1172         ev.v[4] += v4;
1173         ev.v[5] += v5;
1174       } else {
1175         ev.v[0] += 0.5*v0;
1176         ev.v[1] += 0.5*v1;
1177         ev.v[2] += 0.5*v2;
1178         ev.v[3] += 0.5*v3;
1179         ev.v[4] += 0.5*v4;
1180         ev.v[5] += 0.5*v5;
1181       }
1182     }
1183 
1184     if (vflag_atom) {
1185       a_vatom(i,0) += 0.5*v0;
1186       a_vatom(i,1) += 0.5*v1;
1187       a_vatom(i,2) += 0.5*v2;
1188       a_vatom(i,3) += 0.5*v3;
1189       a_vatom(i,4) += 0.5*v4;
1190       a_vatom(i,5) += 0.5*v5;
1191 
1192       if (NEIGHFLAG != FULL) {
1193         a_vatom(j,0) += 0.5*v0;
1194         a_vatom(j,1) += 0.5*v1;
1195         a_vatom(j,2) += 0.5*v2;
1196         a_vatom(j,3) += 0.5*v3;
1197         a_vatom(j,4) += 0.5*v4;
1198         a_vatom(j,5) += 0.5*v5;
1199       }
1200     }
1201   }
1202 }
1203 
1204 /* ---------------------------------------------------------------------- */
1205 
1206 template<class DeviceType>
1207 template<int NEIGHFLAG>
1208 KOKKOS_INLINE_FUNCTION
v_tally3(EV_FLOAT & ev,const int & i,const int & j,const int & k,F_FLOAT * fj,F_FLOAT * fk,F_FLOAT * drij,F_FLOAT * drik) const1209 void PairTersoffKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k,
1210         F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
1211 {
1212   // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
1213 
1214   auto v_vatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
1215   auto a_vatom = v_vatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1216 
1217   F_FLOAT v[6];
1218 
1219   v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]);
1220   v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]);
1221   v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]);
1222   v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]);
1223   v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]);
1224   v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]);
1225 
1226   if (vflag_global) {
1227     ev.v[0] += v[0];
1228     ev.v[1] += v[1];
1229     ev.v[2] += v[2];
1230     ev.v[3] += v[3];
1231     ev.v[4] += v[4];
1232     ev.v[5] += v[5];
1233   }
1234 
1235   if (vflag_atom) {
1236     a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
1237     a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
1238     if (NEIGHFLAG != FULL) {
1239       a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
1240       a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
1241       a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
1242       a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
1243     }
1244   }
1245 
1246 }
1247 
1248 /* ---------------------------------------------------------------------- */
1249 
1250 template<class DeviceType>
1251 KOKKOS_INLINE_FUNCTION
v_tally3_atom(EV_FLOAT & ev,const int & i,const int &,const int &,F_FLOAT * fj,F_FLOAT * fk,F_FLOAT * drji,F_FLOAT * drjk) const1252 void PairTersoffKokkos<DeviceType>::v_tally3_atom(EV_FLOAT &ev, const int &i, const int & /*j*/,
1253                                                   const int & /*k*/, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const
1254 {
1255   F_FLOAT v[6];
1256 
1257   v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]);
1258   v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]);
1259   v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]);
1260   v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]);
1261   v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]);
1262   v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]);
1263 
1264   if (vflag_global) {
1265     ev.v[0] += v[0];
1266     ev.v[1] += v[1];
1267     ev.v[2] += v[2];
1268     ev.v[3] += v[3];
1269     ev.v[4] += v[4];
1270     ev.v[5] += v[5];
1271   }
1272 
1273   if (vflag_atom) {
1274     d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2];
1275     d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5];
1276   }
1277 }
1278 
1279 /* ---------------------------------------------------------------------- */
1280 
1281 template<class DeviceType>
1282 KOKKOS_INLINE_FUNCTION
sbmask(const int & j) const1283 int PairTersoffKokkos<DeviceType>::sbmask(const int& j) const {
1284   return j >> SBBITS & 3;
1285 }
1286 
1287 namespace LAMMPS_NS {
1288 template class PairTersoffKokkos<LMPDeviceType>;
1289 #ifdef LMP_KOKKOS_GPU
1290 template class PairTersoffKokkos<LMPHostType>;
1291 #endif
1292 }
1293 
1294