1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: Ray Shan (SNL) and Christian Trott (SNL)
17 ------------------------------------------------------------------------- */
18
19 #include "pair_tersoff_kokkos.h"
20 #include <cmath>
21 #include "kokkos.h"
22 #include "atom_kokkos.h"
23 #include "comm.h"
24 #include "force.h"
25 #include "neighbor.h"
26 #include "neigh_request.h"
27 #include "neigh_list_kokkos.h"
28 #include "math_const.h"
29 #include "memory_kokkos.h"
30 #include "error.h"
31 #include "atom_masks.h"
32 #include "suffix.h"
33
34 using namespace LAMMPS_NS;
35 using namespace MathConst;
36
37 #define KOKKOS_CUDA_MAX_THREADS 256
38 #define KOKKOS_CUDA_MIN_BLOCKS 8
39
40 /* ---------------------------------------------------------------------- */
41
42 template<class DeviceType>
PairTersoffKokkos(LAMMPS * lmp)43 PairTersoffKokkos<DeviceType>::PairTersoffKokkos(LAMMPS *lmp) : PairTersoff(lmp)
44 {
45 respa_enable = 0;
46 suffix_flag |= Suffix::KOKKOS;
47
48 kokkosable = 1;
49 atomKK = (AtomKokkos *) atom;
50 execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
51 datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
52 datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
53 }
54
55 /* ---------------------------------------------------------------------- */
56
57 template<class DeviceType>
~PairTersoffKokkos()58 PairTersoffKokkos<DeviceType>::~PairTersoffKokkos()
59 {
60 if (!copymode) {
61 memoryKK->destroy_kokkos(k_eatom,eatom);
62 memoryKK->destroy_kokkos(k_vatom,vatom);
63 }
64 }
65
66 /* ---------------------------------------------------------------------- */
67
68 template<class DeviceType>
allocate()69 void PairTersoffKokkos<DeviceType>::allocate()
70 {
71 PairTersoff::allocate();
72
73 int n = atom->ntypes;
74
75 k_params = Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType>
76 ("PairTersoff::paramskk",n+1,n+1,n+1);
77 paramskk = k_params.template view<DeviceType>();
78 }
79
80 /* ----------------------------------------------------------------------
81 init specific to this pair style
82 ------------------------------------------------------------------------- */
83
84 template<class DeviceType>
init_style()85 void PairTersoffKokkos<DeviceType>::init_style()
86 {
87 PairTersoff::init_style();
88
89 // irequest = neigh request made by parent class
90
91 neighflag = lmp->kokkos->neighflag;
92 int irequest = neighbor->nrequest - 1;
93
94 neighbor->requests[irequest]->
95 kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
96 !std::is_same<DeviceType,LMPDeviceType>::value;
97 neighbor->requests[irequest]->
98 kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
99
100 if (neighflag == FULL)
101 error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/kk");
102
103 if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
104 //if (neighflag == FULL || neighflag == HALFTHREAD) {
105 neighbor->requests[irequest]->full = 1;
106 neighbor->requests[irequest]->half = 0;
107 if (neighflag == FULL)
108 neighbor->requests[irequest]->ghost = 1;
109 else
110 neighbor->requests[irequest]->ghost = 0;
111 } else {
112 error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/kk");
113 }
114 }
115
116 /* ---------------------------------------------------------------------- */
117
118 template<class DeviceType>
setup_params()119 void PairTersoffKokkos<DeviceType>::setup_params()
120 {
121 PairTersoff::setup_params();
122
123 int i,j,k,m;
124 int n = atom->ntypes;
125
126 for (i = 1; i <= n; i++)
127 for (j = 1; j <= n; j++)
128 for (k = 1; k <= n; k++) {
129 m = elem3param[map[i]][map[j]][map[k]];
130 k_params.h_view(i,j,k).powerm = params[m].powerm;
131 k_params.h_view(i,j,k).gamma = params[m].gamma;
132 k_params.h_view(i,j,k).lam3 = params[m].lam3;
133 k_params.h_view(i,j,k).c = params[m].c;
134 k_params.h_view(i,j,k).d = params[m].d;
135 k_params.h_view(i,j,k).h = params[m].h;
136 k_params.h_view(i,j,k).powern = params[m].powern;
137 k_params.h_view(i,j,k).beta = params[m].beta;
138 k_params.h_view(i,j,k).lam2 = params[m].lam2;
139 k_params.h_view(i,j,k).bigb = params[m].bigb;
140 k_params.h_view(i,j,k).bigr = params[m].bigr;
141 k_params.h_view(i,j,k).bigd = params[m].bigd;
142 k_params.h_view(i,j,k).lam1 = params[m].lam1;
143 k_params.h_view(i,j,k).biga = params[m].biga;
144 k_params.h_view(i,j,k).cutsq = params[m].cutsq;
145 k_params.h_view(i,j,k).c1 = params[m].c1;
146 k_params.h_view(i,j,k).c2 = params[m].c2;
147 k_params.h_view(i,j,k).c3 = params[m].c3;
148 k_params.h_view(i,j,k).c4 = params[m].c4;
149 }
150
151 k_params.template modify<LMPHostType>();
152
153 }
154
155 /* ---------------------------------------------------------------------- */
156
157 template<class DeviceType>
compute(int eflag_in,int vflag_in)158 void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
159 {
160 eflag = eflag_in;
161 vflag = vflag_in;
162
163 if (neighflag == FULL) no_virial_fdotr_compute = 1;
164
165 ev_init(eflag,vflag,0);
166
167 // reallocate per-atom arrays if necessary
168
169 if (eflag_atom) {
170 memoryKK->destroy_kokkos(k_eatom,eatom);
171 memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
172 d_eatom = k_eatom.view<DeviceType>();
173 }
174 if (vflag_atom) {
175 memoryKK->destroy_kokkos(k_vatom,vatom);
176 memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
177 d_vatom = k_vatom.view<DeviceType>();
178 }
179
180 atomKK->sync(execution_space,datamask_read);
181 k_params.template sync<DeviceType>();
182 if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
183 else atomKK->modified(execution_space,F_MASK);
184
185 x = atomKK->k_x.view<DeviceType>();
186 f = atomKK->k_f.view<DeviceType>();
187 type = atomKK->k_type.view<DeviceType>();
188 tag = atomKK->k_tag.view<DeviceType>();
189 nlocal = atom->nlocal;
190 nall = atom->nlocal + atom->nghost;
191 newton_pair = force->newton_pair;
192
193 inum = list->inum;
194 const int ignum = inum + list->gnum;
195 NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
196 d_numneigh = k_list->d_numneigh;
197 d_neighbors = k_list->d_neighbors;
198 d_ilist = k_list->d_ilist;
199
200 need_dup = lmp->kokkos->need_dup<DeviceType>();
201 if (need_dup) {
202 dup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
203 dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
204 dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
205 } else {
206 ndup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
207 ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
208 ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
209 }
210
211 copymode = 1;
212
213 EV_FLOAT ev;
214 EV_FLOAT ev_all;
215
216 // build short neighbor list
217
218 int max_neighs = d_neighbors.extent(1);
219
220 if (((int)d_neighbors_short.extent(1) != max_neighs) ||
221 ((int)d_neighbors_short.extent(0) != ignum)) {
222 d_neighbors_short = Kokkos::View<int**,DeviceType>("Tersoff::neighbors_short",ignum,max_neighs);
223 }
224 if ((int)d_numneigh_short.extent(0)!=ignum)
225 d_numneigh_short = Kokkos::View<int*,DeviceType>("Tersoff::numneighs_short",ignum);
226 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairTersoffComputeShortNeigh>(0,neighflag==FULL?ignum:inum), *this);
227
228 if (neighflag == HALF) {
229 if (evflag)
230 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,1> >(0,inum),*this,ev);
231 else
232 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALF,0> >(0,inum),*this);
233 ev_all += ev;
234 } else if (neighflag == HALFTHREAD) {
235 if (evflag)
236 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
237 else
238 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeHalf<HALFTHREAD,0> >(0,inum),*this);
239 ev_all += ev;
240 } else if (neighflag == FULL) {
241 if (evflag)
242 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,1> >(0,inum),*this,ev);
243 else
244 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullA<FULL,0> >(0,inum),*this);
245 ev_all += ev;
246
247 if (evflag)
248 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,1> >(0,ignum),*this,ev);
249 else
250 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffComputeFullB<FULL,0> >(0,ignum),*this);
251 ev_all += ev;
252 }
253
254 if (need_dup)
255 Kokkos::Experimental::contribute(f, dup_f);
256
257 if (eflag_global) eng_vdwl += ev_all.evdwl;
258 if (vflag_global) {
259 virial[0] += ev_all.v[0];
260 virial[1] += ev_all.v[1];
261 virial[2] += ev_all.v[2];
262 virial[3] += ev_all.v[3];
263 virial[4] += ev_all.v[4];
264 virial[5] += ev_all.v[5];
265 }
266
267 if (eflag_atom) {
268 if (need_dup)
269 Kokkos::Experimental::contribute(d_eatom, dup_eatom);
270 k_eatom.template modify<DeviceType>();
271 k_eatom.template sync<LMPHostType>();
272 }
273
274 if (vflag_atom) {
275 if (need_dup)
276 Kokkos::Experimental::contribute(d_vatom, dup_vatom);
277 k_vatom.template modify<DeviceType>();
278 k_vatom.template sync<LMPHostType>();
279 }
280
281 if (vflag_fdotr) pair_virial_fdotr_compute(this);
282
283 copymode = 0;
284
285 // free duplicated memory
286 if (need_dup) {
287 dup_f = decltype(dup_f)();
288 dup_eatom = decltype(dup_eatom)();
289 dup_vatom = decltype(dup_vatom)();
290 }
291 }
292
293 /* ---------------------------------------------------------------------- */
294
295 template<class DeviceType>
296 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeShortNeigh,const int & ii) const297 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeShortNeigh, const int& ii) const {
298 const int i = d_ilist[ii];
299 const X_FLOAT xtmp = x(i,0);
300 const X_FLOAT ytmp = x(i,1);
301 const X_FLOAT ztmp = x(i,2);
302
303 const int jnum = d_numneigh[i];
304 int inside = 0;
305 for (int jj = 0; jj < jnum; jj++) {
306 int j = d_neighbors(i,jj);
307 j &= NEIGHMASK;
308
309 const X_FLOAT delx = xtmp - x(j,0);
310 const X_FLOAT dely = ytmp - x(j,1);
311 const X_FLOAT delz = ztmp - x(j,2);
312 const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
313
314 if (rsq < cutmax*cutmax) {
315 d_neighbors_short(i,inside) = j;
316 inside++;
317 }
318 }
319 d_numneigh_short(i) = inside;
320 }
321
322 /* ---------------------------------------------------------------------- */
323
324 template<class DeviceType>
325 template<int NEIGHFLAG, int EVFLAG>
326 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const327 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
328
329 // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
330
331 auto v_f = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
332 auto a_f = v_f.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
333
334 const int i = d_ilist[ii];
335 if (i >= nlocal) return;
336 const X_FLOAT xtmp = x(i,0);
337 const X_FLOAT ytmp = x(i,1);
338 const X_FLOAT ztmp = x(i,2);
339 const int itype = type(i);
340 const tagint itag = tag(i);
341
342 F_FLOAT fi[3], fj[3], fk[3];
343
344 //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
345 const int jnum = d_numneigh_short[i];
346
347 // repulsive
348
349 F_FLOAT f_x = 0.0;
350 F_FLOAT f_y = 0.0;
351 F_FLOAT f_z = 0.0;
352
353 for (int jj = 0; jj < jnum; jj++) {
354 int j = d_neighbors_short(i,jj);
355 j &= NEIGHMASK;
356 const int jtype = type(j);
357 const tagint jtag = tag(j);
358
359 if (itag > jtag) {
360 if ((itag+jtag) % 2 == 0) continue;
361 } else if (itag < jtag) {
362 if ((itag+jtag) % 2 == 1) continue;
363 } else {
364 if (x(j,2) < ztmp) continue;
365 if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
366 if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
367 }
368
369 const X_FLOAT delx = xtmp - x(j,0);
370 const X_FLOAT dely = ytmp - x(j,1);
371 const X_FLOAT delz = ztmp - x(j,2);
372 const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
373 const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
374
375 if (rsq > cutsq) continue;
376
377 const F_FLOAT r = sqrt(rsq);
378 const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
379 const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
380 const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
381 const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp *
382 (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
383 const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
384
385 f_x += delx*frep;
386 f_y += dely*frep;
387 f_z += delz*frep;
388 a_f(j,0) -= delx*frep;
389 a_f(j,1) -= dely*frep;
390 a_f(j,2) -= delz*frep;
391
392 if (EVFLAG) {
393 if (eflag) ev.evdwl += eng;
394 if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
395 }
396 }
397
398 // attractive: bond order
399
400 for (int jj = 0; jj < jnum; jj++) {
401 int j = d_neighbors_short(i,jj);
402 j &= NEIGHMASK;
403 const int jtype = type(j);
404
405 const F_FLOAT delx1 = xtmp - x(j,0);
406 const F_FLOAT dely1 = ytmp - x(j,1);
407 const F_FLOAT delz1 = ztmp - x(j,2);
408 const F_FLOAT rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
409 const F_FLOAT cutsq1 = paramskk(itype,jtype,jtype).cutsq;
410
411 F_FLOAT bo_ij = 0.0;
412 if (rsq1 > cutsq1) continue;
413 const F_FLOAT rij = sqrt(rsq1);
414
415 for (int kk = 0; kk < jnum; kk++) {
416 if (jj == kk) continue;
417 int k = d_neighbors_short(i,kk);
418 k &= NEIGHMASK;
419 const int ktype = type(k);
420
421 const F_FLOAT delx2 = xtmp - x(k,0);
422 const F_FLOAT dely2 = ytmp - x(k,1);
423 const F_FLOAT delz2 = ztmp - x(k,2);
424 const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
425 const F_FLOAT cutsq2 = paramskk(itype,jtype,ktype).cutsq;
426
427 if (rsq2 > cutsq2) continue;
428 const F_FLOAT rik = sqrt(rsq2);
429 bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
430 }
431
432 // attractive: pairwise potential and force
433
434 const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
435 const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
436 const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
437 const F_FLOAT fatt = -0.5*bij * dfa / rij;
438 const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
439
440 f_x += delx1*fatt;
441 f_y += dely1*fatt;
442 f_z += delz1*fatt;
443 F_FLOAT fj_x = -delx1*fatt;
444 F_FLOAT fj_y = -dely1*fatt;
445 F_FLOAT fj_z = -delz1*fatt;
446
447 if (EVFLAG) {
448 const F_FLOAT eng = 0.5*bij * fa;
449 if (eflag) ev.evdwl += eng;
450 if (vflag_either || eflag_atom)
451 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
452 }
453
454 // attractive: three-body force
455
456 for (int kk = 0; kk < jnum; kk++) {
457 if (jj == kk) continue;
458 int k = d_neighbors_short(i,kk);
459 k &= NEIGHMASK;
460 const int ktype = type(k);
461
462 const F_FLOAT delx2 = xtmp - x(k,0);
463 const F_FLOAT dely2 = ytmp - x(k,1);
464 const F_FLOAT delz2 = ztmp - x(k,2);
465 const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
466 const F_FLOAT cutsq2 = paramskk(itype,jtype,ktype).cutsq;
467
468 if (rsq2 > cutsq2) continue;
469 const F_FLOAT rik = sqrt(rsq2);
470 ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
471 rik,delx2,dely2,delz2,fi,fj,fk);
472
473 f_x += fi[0];
474 f_y += fi[1];
475 f_z += fi[2];
476 fj_x += fj[0];
477 fj_y += fj[1];
478 fj_z += fj[2];
479 a_f(k,0) += fk[0];
480 a_f(k,1) += fk[1];
481 a_f(k,2) += fk[2];
482
483 if (vflag_atom) {
484 F_FLOAT delrij[3], delrik[3];
485 delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
486 delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
487 if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
488 }
489 }
490 a_f(j,0) += fj_x;
491 a_f(j,1) += fj_y;
492 a_f(j,2) += fj_z;
493 }
494 a_f(i,0) += f_x;
495 a_f(i,1) += f_y;
496 a_f(i,2) += f_z;
497 }
498
499 template<class DeviceType>
500 template<int NEIGHFLAG, int EVFLAG>
501 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>,const int & ii) const502 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
503 EV_FLOAT ev;
504 this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
505 }
506
507 /* ---------------------------------------------------------------------- */
508
509 template<class DeviceType>
510 template<int NEIGHFLAG, int EVFLAG>
511 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const512 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
513
514 const int i = d_ilist[ii];
515 const X_FLOAT xtmp = x(i,0);
516 const X_FLOAT ytmp = x(i,1);
517 const X_FLOAT ztmp = x(i,2);
518 const int itype = type(i);
519
520 int j,k,jj,kk,jtype,ktype;
521 F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
522 F_FLOAT fi[3], fj[3], fk[3];
523 X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
524
525 //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
526 const int jnum = d_numneigh_short[i];
527
528 // repulsive
529
530 F_FLOAT f_x = 0.0;
531 F_FLOAT f_y = 0.0;
532 F_FLOAT f_z = 0.0;
533 for (jj = 0; jj < jnum; jj++) {
534 j = d_neighbors_short(i,jj);
535 j &= NEIGHMASK;
536 const int jtype = type(j);
537
538 const X_FLOAT delx = xtmp - x(j,0);
539 const X_FLOAT dely = ytmp - x(j,1);
540 const X_FLOAT delz = ztmp - x(j,2);
541 const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
542 const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
543
544 if (rsq > cutsq) continue;
545
546 const F_FLOAT r = sqrt(rsq);
547 const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
548 const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
549 const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
550 const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp *
551 (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r;
552 const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
553
554 f_x += delx*frep;
555 f_y += dely*frep;
556 f_z += delz*frep;
557
558 if (EVFLAG) {
559 if (eflag)
560 ev.evdwl += 0.5*eng;
561 if (vflag_either || eflag_atom)
562 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
563 }
564 }
565
566 // attractive: bond order
567
568 for (jj = 0; jj < jnum; jj++) {
569 j = d_neighbors_short(i,jj);
570 j &= NEIGHMASK;
571 jtype = type(j);
572
573 delx1 = xtmp - x(j,0);
574 dely1 = ytmp - x(j,1);
575 delz1 = ztmp - x(j,2);
576 rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
577 cutsq1 = paramskk(itype,jtype,jtype).cutsq;
578
579 bo_ij = 0.0;
580 if (rsq1 > cutsq1) continue;
581 rij = sqrt(rsq1);
582
583 for (kk = 0; kk < jnum; kk++) {
584 if (jj == kk) continue;
585 k = d_neighbors_short(i,kk);
586 k &= NEIGHMASK;
587 ktype = type(k);
588
589 delx2 = xtmp - x(k,0);
590 dely2 = ytmp - x(k,1);
591 delz2 = ztmp - x(k,2);
592 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
593 cutsq2 = paramskk(itype,jtype,ktype).cutsq;
594
595 if (rsq2 > cutsq2) continue;
596 rik = sqrt(rsq2);
597 bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
598 }
599
600 // attractive: pairwise potential and force
601
602 const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
603 const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
604 const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
605 const F_FLOAT fatt = -0.5*bij * dfa / rij;
606 const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
607 const F_FLOAT eng = 0.5*bij * fa;
608
609 f_x += delx1*fatt;
610 f_y += dely1*fatt;
611 f_z += delz1*fatt;
612
613 if (EVFLAG) {
614 if (eflag) ev.evdwl += 0.5*eng;
615 if (vflag_either || eflag_atom)
616 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
617 }
618
619 // attractive: three-body force
620
621 for (kk = 0; kk < jnum; kk++) {
622 if (jj == kk) continue;
623 k = d_neighbors_short(i,kk);
624 k &= NEIGHMASK;
625 ktype = type(k);
626
627 delx2 = xtmp - x(k,0);
628 dely2 = ytmp - x(k,1);
629 delz2 = ztmp - x(k,2);
630 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
631 cutsq2 = paramskk(itype,jtype,ktype).cutsq;
632
633 if (rsq2 > cutsq2) continue;
634 rik = sqrt(rsq2);
635 ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
636 rik,delx2,dely2,delz2,fi,fj,fk);
637
638 f_x += fi[0];
639 f_y += fi[1];
640 f_z += fi[2];
641
642 if (vflag_atom) {
643 F_FLOAT delrij[3], delrik[3];
644 delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
645 delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
646 if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
647 }
648 }
649 }
650 f(i,0) += f_x;
651 f(i,1) += f_y;
652 f(i,2) += f_z;
653 }
654
655 template<class DeviceType>
656 template<int NEIGHFLAG, int EVFLAG>
657 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>,const int & ii) const658 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
659 EV_FLOAT ev;
660 this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
661 }
662
663 /* ---------------------------------------------------------------------- */
664
665 template<class DeviceType>
666 template<int NEIGHFLAG, int EVFLAG>
667 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const668 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
669
670 const int i = d_ilist[ii];
671 const X_FLOAT xtmp = x(i,0);
672 const X_FLOAT ytmp = x(i,1);
673 const X_FLOAT ztmp = x(i,2);
674 const int itype = type(i);
675
676 int j,k,jj,kk,jtype,ktype,j_jnum;
677 F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
678 F_FLOAT fj[3], fk[3];
679 X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
680
681 const int jnum = d_numneigh_short[i];
682
683 F_FLOAT f_x = 0.0;
684 F_FLOAT f_y = 0.0;
685 F_FLOAT f_z = 0.0;
686
687 // attractive: bond order
688
689 for (jj = 0; jj < jnum; jj++) {
690 j = d_neighbors_short(i,jj);
691 j &= NEIGHMASK;
692 if (j >= nlocal) continue;
693 jtype = type(j);
694
695 delx1 = x(j,0) - xtmp;
696 dely1 = x(j,1) - ytmp;
697 delz1 = x(j,2) - ztmp;
698 rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
699 cutsq1 = paramskk(jtype,itype,itype).cutsq;
700
701 bo_ij = 0.0;
702 if (rsq1 > cutsq1) continue;
703 rij = sqrt(rsq1);
704
705 j_jnum = d_numneigh_short[j];
706
707 for (kk = 0; kk < j_jnum; kk++) {
708 k = d_neighbors_short(j,kk);
709 if (k == i) continue;
710 k &= NEIGHMASK;
711 ktype = type(k);
712
713 delx2 = x(j,0) - x(k,0);
714 dely2 = x(j,1) - x(k,1);
715 delz2 = x(j,2) - x(k,2);
716 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
717 cutsq2 = paramskk(jtype,itype,ktype).cutsq;
718
719 if (rsq2 > cutsq2) continue;
720 rik = sqrt(rsq2);
721 bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
722
723 }
724
725 // attractive: pairwise potential and force
726
727 const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij);
728 const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij);
729 const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij);
730 const F_FLOAT fatt = -0.5*bij * dfa / rij;
731 const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij);
732 const F_FLOAT eng = 0.5*bij * fa;
733
734 f_x -= delx1*fatt;
735 f_y -= dely1*fatt;
736 f_z -= delz1*fatt;
737
738 if (EVFLAG) {
739 if (eflag)
740 ev.evdwl += 0.5 * eng;
741 if (vflag_either || eflag_atom)
742 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
743 }
744
745 // attractive: three-body force
746
747 for (kk = 0; kk < j_jnum; kk++) {
748 k = d_neighbors_short(j,kk);
749 if (k == i) continue;
750 k &= NEIGHMASK;
751 ktype = type(k);
752
753 delx2 = x(j,0) - x(k,0);
754 dely2 = x(j,1) - x(k,1);
755 delz2 = x(j,2) - x(k,2);
756 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
757 cutsq2 = paramskk(jtype,itype,ktype).cutsq;
758
759 if (rsq2 > cutsq2) continue;
760 rik = sqrt(rsq2);
761 ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1,
762 rik,delx2,dely2,delz2,fj,fk);
763 f_x += fj[0];
764 f_y += fj[1];
765 f_z += fj[2];
766
767 if (vflag_atom) {
768 F_FLOAT delrji[3], delrjk[3];
769 delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1;
770 delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2;
771 if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk);
772 }
773
774 const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik);
775 const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij);
776 ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2,
777 rij,delx1,dely1,delz1,fk);
778 f_x += fk[0];
779 f_y += fk[1];
780 f_z += fk[2];
781 }
782 }
783 f(i,0) += f_x;
784 f(i,1) += f_y;
785 f(i,2) += f_z;
786 }
787
788 template<class DeviceType>
789 template<int NEIGHFLAG, int EVFLAG>
790 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>,const int & ii) const791 void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
792 EV_FLOAT ev;
793 this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
794 }
795
796 /* ---------------------------------------------------------------------- */
797
798 template<class DeviceType>
799 KOKKOS_INLINE_FUNCTION
ters_fc_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const800 double PairTersoffKokkos<DeviceType>::ters_fc_k(const int &i, const int &j,
801 const int &k, const F_FLOAT &r) const
802 {
803 const F_FLOAT ters_R = paramskk(i,j,k).bigr;
804 const F_FLOAT ters_D = paramskk(i,j,k).bigd;
805
806 if (r < ters_R-ters_D) return 1.0;
807 if (r > ters_R+ters_D) return 0.0;
808 return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D));
809 }
810
811 /* ---------------------------------------------------------------------- */
812
813 template<class DeviceType>
814 KOKKOS_INLINE_FUNCTION
ters_dfc(const int & i,const int & j,const int & k,const F_FLOAT & r) const815 double PairTersoffKokkos<DeviceType>::ters_dfc(const int &i, const int &j,
816 const int &k, const F_FLOAT &r) const
817 {
818 const F_FLOAT ters_R = paramskk(i,j,k).bigr;
819 const F_FLOAT ters_D = paramskk(i,j,k).bigd;
820
821 if (r < ters_R-ters_D) return 0.0;
822 if (r > ters_R+ters_D) return 0.0;
823 return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D);
824 }
825
826 /* ---------------------------------------------------------------------- */
827
828 template<class DeviceType>
829 KOKKOS_INLINE_FUNCTION
bondorder(const int & i,const int & j,const int & k,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2) const830 double PairTersoffKokkos<DeviceType>::bondorder(const int &i, const int &j, const int &k,
831 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
832 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const
833 {
834 F_FLOAT arg, ex_delr;
835
836 const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik);
837
838 const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
839 if (int(paramskk(i,j,k).powerm) == 3) arg = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
840 else arg = param;
841
842 if (arg > 69.0776) ex_delr = 1.e30;
843 else if (arg < -69.0776) ex_delr = 0.0;
844 else ex_delr = exp(arg);
845
846 return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr;
847 }
848
849 /* ---------------------------------------------------------------------- */
850
851 template<class DeviceType>
852 KOKKOS_INLINE_FUNCTION
853 double PairTersoffKokkos<DeviceType>::
ters_gijk(const int & i,const int & j,const int & k,const F_FLOAT & cos) const854 ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
855 {
856 const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
857 const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
858 const F_FLOAT hcth = paramskk(i,j,k).h - cos;
859
860 return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth));
861 }
862
863 /* ---------------------------------------------------------------------- */
864
865 template<class DeviceType>
866 KOKKOS_INLINE_FUNCTION
867 double PairTersoffKokkos<DeviceType>::
ters_dgijk(const int & i,const int & j,const int & k,const F_FLOAT & cos) const868 ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
869 {
870 const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
871 const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
872 const F_FLOAT hcth = paramskk(i,j,k).h - cos;
873 const F_FLOAT numerator = -2.0 * ters_c * hcth;
874 const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth);
875 return paramskk(i,j,k).gamma * numerator * denominator * denominator;
876 }
877
878 /* ---------------------------------------------------------------------- */
879
880 template<class DeviceType>
881 KOKKOS_INLINE_FUNCTION
ters_fa_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const882 double PairTersoffKokkos<DeviceType>::ters_fa_k(const int &i, const int &j,
883 const int &k, const F_FLOAT &r) const
884 {
885 if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
886 return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r)
887 * ters_fc_k(i,j,k,r);
888 }
889
890 /* ---------------------------------------------------------------------- */
891
892 template<class DeviceType>
893 KOKKOS_INLINE_FUNCTION
ters_dfa(const int & i,const int & j,const int & k,const F_FLOAT & r) const894 double PairTersoffKokkos<DeviceType>::ters_dfa(const int &i, const int &j,
895 const int &k, const F_FLOAT &r) const
896 {
897 if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
898 return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) *
899 (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) - ters_dfc(i,j,k,r));
900 }
901
902 /* ---------------------------------------------------------------------- */
903
904 template<class DeviceType>
905 KOKKOS_INLINE_FUNCTION
ters_bij_k(const int & i,const int & j,const int & k,const F_FLOAT & bo) const906 double PairTersoffKokkos<DeviceType>::ters_bij_k(const int &i, const int &j,
907 const int &k, const F_FLOAT &bo) const
908 {
909 const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
910 if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp);
911 if (tmp > paramskk(i,j,k).c2)
912 return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp);
913 if (tmp < paramskk(i,j,k).c4) return 1.0;
914 if (tmp < paramskk(i,j,k).c3)
915 return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern);
916 return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern));
917 }
918
919 /* ---------------------------------------------------------------------- */
920
921 template<class DeviceType>
922 KOKKOS_INLINE_FUNCTION
ters_dbij(const int & i,const int & j,const int & k,const F_FLOAT & bo) const923 double PairTersoffKokkos<DeviceType>::ters_dbij(const int &i, const int &j,
924 const int &k, const F_FLOAT &bo) const
925 {
926 const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
927 if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5/sqrt(tmp*tmp);//*pow(tmp,-1.5);
928 if (tmp > paramskk(i,j,k).c2)
929 return paramskk(i,j,k).beta * (-0.5/sqrt(tmp*tmp) * //*pow(tmp,-1.5) *
930 (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) *
931 pow(tmp,-paramskk(i,j,k).powern)));
932 if (tmp < paramskk(i,j,k).c4) return 0.0;
933 if (tmp < paramskk(i,j,k).c3)
934 return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0);
935
936 const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern);
937 return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo;
938 }
939
940 /* ---------------------------------------------------------------------- */
941
942 template<class DeviceType>
943 KOKKOS_INLINE_FUNCTION
ters_dthb(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fi,F_FLOAT * fj,F_FLOAT * fk) const944 void PairTersoffKokkos<DeviceType>::ters_dthb(
945 const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
946 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
947 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
948 F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const
949 {
950 // from PairTersoff::attractive
951 F_FLOAT rij_hat[3],rik_hat[3];
952 F_FLOAT rijinv,rikinv;
953 F_FLOAT delrij[3], delrik[3];
954
955 delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
956 delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
957
958 //rij = sqrt(rsq1);
959 rijinv = 1.0/rij;
960 vec3_scale(rijinv,delrij,rij_hat);
961
962 //rik = sqrt(rsq2);
963 rikinv = 1.0/rik;
964 vec3_scale(rikinv,delrik,rik_hat);
965
966 // from PairTersoff::ters_zetaterm_d
967 F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
968 F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
969
970 fc = ters_fc_k(i,j,k,rik);
971 dfc = ters_dfc(i,j,k,rik);
972 const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
973 if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
974 else tmp = param;
975
976 if (tmp > 69.0776) ex_delr = 1.e30;
977 else if (tmp < -69.0776) ex_delr = 0.0;
978 else ex_delr = exp(tmp);
979
980 if (int(paramskk(i,j,k).powerm) == 3)
981 dex_delr = 3.0*param*param*paramskk(i,j,k).lam3*ex_delr;//pow(rij-rik,2.0)*ex_delr;
982 else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
983
984 cos = vec3_dot(rij_hat,rik_hat);
985 gijk = ters_gijk(i,j,k,cos);
986 dgijk = ters_dgijk(i,j,k,cos);
987
988 // from PairTersoff::costheta_d
989 vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
990 vec3_scale(rijinv,dcosfj,dcosfj);
991 vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
992 vec3_scale(rikinv,dcosfk,dcosfk);
993 vec3_add(dcosfj,dcosfk,dcosfi);
994 vec3_scale(-1.0,dcosfi,dcosfi);
995
996 vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi);
997 vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi);
998 vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi);
999 vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi);
1000 vec3_scale(prefactor,fi,fi);
1001
1002 vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
1003 vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
1004 vec3_scale(prefactor,fj,fj);
1005
1006 vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1007 vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1008 vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1009 vec3_scale(prefactor,fk,fk);
1010
1011 }
1012
1013 /* ---------------------------------------------------------------------- */
1014
1015 template<class DeviceType>
1016 KOKKOS_INLINE_FUNCTION
ters_dthbj(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fj,F_FLOAT * fk) const1017 void PairTersoffKokkos<DeviceType>::ters_dthbj(
1018 const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
1019 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
1020 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
1021 F_FLOAT *fj, F_FLOAT *fk) const
1022 {
1023 F_FLOAT rij_hat[3],rik_hat[3];
1024 F_FLOAT rijinv,rikinv;
1025 F_FLOAT delrij[3], delrik[3];
1026
1027 delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
1028 delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
1029
1030 rijinv = 1.0/rij;
1031 vec3_scale(rijinv,delrij,rij_hat);
1032
1033 rikinv = 1.0/rik;
1034 vec3_scale(rikinv,delrik,rik_hat);
1035
1036 F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
1037 F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
1038
1039 fc = ters_fc_k(i,j,k,rik);
1040 dfc = ters_dfc(i,j,k,rik);
1041 const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
1042 if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
1043 else tmp = param;//paramskk(i,j,k).lam3 * (rij-rik);
1044
1045 if (tmp > 69.0776) ex_delr = 1.e30;
1046 else if (tmp < -69.0776) ex_delr = 0.0;
1047 else ex_delr = exp(tmp);
1048
1049 if (int(paramskk(i,j,k).powerm) == 3)
1050 dex_delr = 3.0*param*param*paramskk(i,j,k).lam3*ex_delr;//pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
1051 else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
1052
1053 cos = vec3_dot(rij_hat,rik_hat);
1054 gijk = ters_gijk(i,j,k,cos);
1055 dgijk = ters_dgijk(i,j,k,cos);
1056
1057 vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
1058 vec3_scale(rijinv,dcosfj,dcosfj);
1059 vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
1060 vec3_scale(rikinv,dcosfk,dcosfk);
1061 vec3_add(dcosfj,dcosfk,dcosfi);
1062 vec3_scale(-1.0,dcosfi,dcosfi);
1063
1064 vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
1065 vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
1066 vec3_scale(prefactor,fj,fj);
1067
1068 vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1069 vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1070 vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1071 vec3_scale(prefactor,fk,fk);
1072
1073 }
1074
1075 /* ---------------------------------------------------------------------- */
1076
1077 template<class DeviceType>
1078 KOKKOS_INLINE_FUNCTION
ters_dthbk(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fk) const1079 void PairTersoffKokkos<DeviceType>::ters_dthbk(
1080 const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
1081 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
1082 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
1083 F_FLOAT *fk) const
1084 {
1085 F_FLOAT rij_hat[3],rik_hat[3];
1086 F_FLOAT rijinv,rikinv;
1087 F_FLOAT delrij[3], delrik[3];
1088
1089 delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
1090 delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
1091
1092 rijinv = 1.0/rij;
1093 vec3_scale(rijinv,delrij,rij_hat);
1094
1095 rikinv = 1.0/rik;
1096 vec3_scale(rikinv,delrik,rik_hat);
1097
1098 F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
1099 F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
1100
1101 fc = ters_fc_k(i,j,k,rik);
1102 dfc = ters_dfc(i,j,k,rik);
1103 const F_FLOAT param = paramskk(i,j,k).lam3 * (rij-rik);
1104 if (int(paramskk(i,j,k).powerm) == 3) tmp = param*param*param;//pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
1105 else tmp = param;//paramskk(i,j,k).lam3 * (rij-rik);
1106
1107 if (tmp > 69.0776) ex_delr = 1.e30;
1108 else if (tmp < -69.0776) ex_delr = 0.0;
1109 else ex_delr = exp(tmp);
1110
1111 if (int(paramskk(i,j,k).powerm) == 3)
1112 dex_delr = 3.0*param*param*paramskk(i,j,k).lam3*ex_delr;//pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
1113 else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
1114
1115 cos = vec3_dot(rij_hat,rik_hat);
1116 gijk = ters_gijk(i,j,k,cos);
1117 dgijk = ters_dgijk(i,j,k,cos);
1118
1119 vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
1120 vec3_scale(rijinv,dcosfj,dcosfj);
1121 vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
1122 vec3_scale(rikinv,dcosfk,dcosfk);
1123 vec3_add(dcosfj,dcosfk,dcosfi);
1124 vec3_scale(-1.0,dcosfi,dcosfi);
1125
1126 vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1127 vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1128 vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1129 vec3_scale(prefactor,fk,fk);
1130
1131 }
1132
1133 /* ---------------------------------------------------------------------- */
1134
1135 template<class DeviceType>
1136 template<int NEIGHFLAG>
1137 KOKKOS_INLINE_FUNCTION
ev_tally(EV_FLOAT & ev,const int & i,const int & j,const F_FLOAT & epair,const F_FLOAT & fpair,const F_FLOAT & delx,const F_FLOAT & dely,const F_FLOAT & delz) const1138 void PairTersoffKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
1139 const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
1140 const F_FLOAT &dely, const F_FLOAT &delz) const
1141 {
1142 const int VFLAG = vflag_either;
1143
1144 // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
1145
1146 auto v_eatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
1147 auto a_eatom = v_eatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1148
1149 auto v_vatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
1150 auto a_vatom = v_vatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1151
1152 if (eflag_atom) {
1153 const E_FLOAT epairhalf = 0.5 * epair;
1154 a_eatom[i] += epairhalf;
1155 if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
1156 }
1157
1158 if (VFLAG) {
1159 const E_FLOAT v0 = delx*delx*fpair;
1160 const E_FLOAT v1 = dely*dely*fpair;
1161 const E_FLOAT v2 = delz*delz*fpair;
1162 const E_FLOAT v3 = delx*dely*fpair;
1163 const E_FLOAT v4 = delx*delz*fpair;
1164 const E_FLOAT v5 = dely*delz*fpair;
1165
1166 if (vflag_global) {
1167 if (NEIGHFLAG != FULL) {
1168 ev.v[0] += v0;
1169 ev.v[1] += v1;
1170 ev.v[2] += v2;
1171 ev.v[3] += v3;
1172 ev.v[4] += v4;
1173 ev.v[5] += v5;
1174 } else {
1175 ev.v[0] += 0.5*v0;
1176 ev.v[1] += 0.5*v1;
1177 ev.v[2] += 0.5*v2;
1178 ev.v[3] += 0.5*v3;
1179 ev.v[4] += 0.5*v4;
1180 ev.v[5] += 0.5*v5;
1181 }
1182 }
1183
1184 if (vflag_atom) {
1185 a_vatom(i,0) += 0.5*v0;
1186 a_vatom(i,1) += 0.5*v1;
1187 a_vatom(i,2) += 0.5*v2;
1188 a_vatom(i,3) += 0.5*v3;
1189 a_vatom(i,4) += 0.5*v4;
1190 a_vatom(i,5) += 0.5*v5;
1191
1192 if (NEIGHFLAG != FULL) {
1193 a_vatom(j,0) += 0.5*v0;
1194 a_vatom(j,1) += 0.5*v1;
1195 a_vatom(j,2) += 0.5*v2;
1196 a_vatom(j,3) += 0.5*v3;
1197 a_vatom(j,4) += 0.5*v4;
1198 a_vatom(j,5) += 0.5*v5;
1199 }
1200 }
1201 }
1202 }
1203
1204 /* ---------------------------------------------------------------------- */
1205
1206 template<class DeviceType>
1207 template<int NEIGHFLAG>
1208 KOKKOS_INLINE_FUNCTION
v_tally3(EV_FLOAT & ev,const int & i,const int & j,const int & k,F_FLOAT * fj,F_FLOAT * fk,F_FLOAT * drij,F_FLOAT * drik) const1209 void PairTersoffKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k,
1210 F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
1211 {
1212 // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
1213
1214 auto v_vatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
1215 auto a_vatom = v_vatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1216
1217 F_FLOAT v[6];
1218
1219 v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]);
1220 v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]);
1221 v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]);
1222 v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]);
1223 v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]);
1224 v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]);
1225
1226 if (vflag_global) {
1227 ev.v[0] += v[0];
1228 ev.v[1] += v[1];
1229 ev.v[2] += v[2];
1230 ev.v[3] += v[3];
1231 ev.v[4] += v[4];
1232 ev.v[5] += v[5];
1233 }
1234
1235 if (vflag_atom) {
1236 a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
1237 a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
1238 if (NEIGHFLAG != FULL) {
1239 a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
1240 a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
1241 a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
1242 a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
1243 }
1244 }
1245
1246 }
1247
1248 /* ---------------------------------------------------------------------- */
1249
1250 template<class DeviceType>
1251 KOKKOS_INLINE_FUNCTION
v_tally3_atom(EV_FLOAT & ev,const int & i,const int &,const int &,F_FLOAT * fj,F_FLOAT * fk,F_FLOAT * drji,F_FLOAT * drjk) const1252 void PairTersoffKokkos<DeviceType>::v_tally3_atom(EV_FLOAT &ev, const int &i, const int & /*j*/,
1253 const int & /*k*/, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const
1254 {
1255 F_FLOAT v[6];
1256
1257 v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]);
1258 v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]);
1259 v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]);
1260 v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]);
1261 v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]);
1262 v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]);
1263
1264 if (vflag_global) {
1265 ev.v[0] += v[0];
1266 ev.v[1] += v[1];
1267 ev.v[2] += v[2];
1268 ev.v[3] += v[3];
1269 ev.v[4] += v[4];
1270 ev.v[5] += v[5];
1271 }
1272
1273 if (vflag_atom) {
1274 d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2];
1275 d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5];
1276 }
1277 }
1278
1279 /* ---------------------------------------------------------------------- */
1280
1281 template<class DeviceType>
1282 KOKKOS_INLINE_FUNCTION
sbmask(const int & j) const1283 int PairTersoffKokkos<DeviceType>::sbmask(const int& j) const {
1284 return j >> SBBITS & 3;
1285 }
1286
1287 namespace LAMMPS_NS {
1288 template class PairTersoffKokkos<LMPDeviceType>;
1289 #ifdef LMP_KOKKOS_GPU
1290 template class PairTersoffKokkos<LMPHostType>;
1291 #endif
1292 }
1293
1294