1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: Ray Shan (SNL)
17 ------------------------------------------------------------------------- */
18
19 #include "pair_tersoff_zbl_kokkos.h"
20 #include <cmath>
21 #include <cstring>
22 #include "kokkos.h"
23 #include "atom_kokkos.h"
24 #include "comm.h"
25 #include "force.h"
26 #include "neighbor.h"
27 #include "neigh_request.h"
28 #include "neigh_list_kokkos.h"
29 #include "update.h"
30 #include "math_const.h"
31 #include "memory_kokkos.h"
32 #include "error.h"
33 #include "atom_masks.h"
34
35 using namespace LAMMPS_NS;
36 using namespace MathConst;
37
38 #define KOKKOS_CUDA_MAX_THREADS 256
39 #define KOKKOS_CUDA_MIN_BLOCKS 8
40
41 /* ---------------------------------------------------------------------- */
42
43 template<class DeviceType>
PairTersoffZBLKokkos(LAMMPS * lmp)44 PairTersoffZBLKokkos<DeviceType>::PairTersoffZBLKokkos(LAMMPS *lmp) : PairTersoffZBL(lmp)
45 {
46 respa_enable = 0;
47
48 kokkosable = 1;
49 atomKK = (AtomKokkos *) atom;
50 execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
51 datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
52 datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
53
54 if (strcmp(update->unit_style,"metal") == 0) {
55 global_a_0 = 0.529;
56 global_epsilon_0 = 0.00552635;
57 global_e = 1.0;
58 } else if (strcmp(update->unit_style,"real") == 0) {
59 global_a_0 = 0.529;
60 global_epsilon_0 = 0.00552635 * 0.043365121;
61 global_e = 1.0;
62 } else error->all(FLERR,"Pair tersoff/zbl/kk requires metal or real units");
63
64 }
65
66 /* ---------------------------------------------------------------------- */
67
68 template<class DeviceType>
~PairTersoffZBLKokkos()69 PairTersoffZBLKokkos<DeviceType>::~PairTersoffZBLKokkos()
70 {
71 if (!copymode) {
72 memoryKK->destroy_kokkos(k_eatom,eatom);
73 memoryKK->destroy_kokkos(k_vatom,vatom);
74 }
75 }
76
77 /* ---------------------------------------------------------------------- */
78
79 template<class DeviceType>
allocate()80 void PairTersoffZBLKokkos<DeviceType>::allocate()
81 {
82 PairTersoffZBL::allocate();
83
84 int n = atom->ntypes;
85
86 k_params = Kokkos::DualView<params_ters***,Kokkos::LayoutRight,DeviceType>
87 ("PairTersoffZBL::paramskk",n+1,n+1,n+1);
88 paramskk = k_params.template view<DeviceType>();
89 }
90
91 /* ----------------------------------------------------------------------
92 init specific to this pair style
93 ------------------------------------------------------------------------- */
94
95 template<class DeviceType>
init_style()96 void PairTersoffZBLKokkos<DeviceType>::init_style()
97 {
98 PairTersoffZBL::init_style();
99
100 // irequest = neigh request made by parent class
101
102 neighflag = lmp->kokkos->neighflag;
103 int irequest = neighbor->nrequest - 1;
104
105 neighbor->requests[irequest]->
106 kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
107 !std::is_same<DeviceType,LMPDeviceType>::value;
108 neighbor->requests[irequest]->
109 kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
110
111 if (neighflag == FULL)
112 error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/zbl/kk");
113
114 if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
115 neighbor->requests[irequest]->full = 1;
116 neighbor->requests[irequest]->half = 0;
117 if (neighflag == FULL)
118 neighbor->requests[irequest]->ghost = 1;
119 else
120 neighbor->requests[irequest]->ghost = 0;
121 } else {
122 error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/zbl/kk");
123 }
124 }
125
126 /* ---------------------------------------------------------------------- */
127
128 template<class DeviceType>
setup_params()129 void PairTersoffZBLKokkos<DeviceType>::setup_params()
130 {
131 PairTersoffZBL::setup_params();
132
133 int i,j,k,m;
134 int n = atom->ntypes;
135
136 for (i = 1; i <= n; i++)
137 for (j = 1; j <= n; j++)
138 for (k = 1; k <= n; k++) {
139 m = elem3param[map[i]][map[j]][map[k]];
140 k_params.h_view(i,j,k).powerm = params[m].powerm;
141 k_params.h_view(i,j,k).gamma = params[m].gamma;
142 k_params.h_view(i,j,k).lam3 = params[m].lam3;
143 k_params.h_view(i,j,k).c = params[m].c;
144 k_params.h_view(i,j,k).d = params[m].d;
145 k_params.h_view(i,j,k).h = params[m].h;
146 k_params.h_view(i,j,k).powern = params[m].powern;
147 k_params.h_view(i,j,k).beta = params[m].beta;
148 k_params.h_view(i,j,k).lam2 = params[m].lam2;
149 k_params.h_view(i,j,k).bigb = params[m].bigb;
150 k_params.h_view(i,j,k).bigr = params[m].bigr;
151 k_params.h_view(i,j,k).bigd = params[m].bigd;
152 k_params.h_view(i,j,k).lam1 = params[m].lam1;
153 k_params.h_view(i,j,k).biga = params[m].biga;
154 k_params.h_view(i,j,k).cutsq = params[m].cutsq;
155 k_params.h_view(i,j,k).c1 = params[m].c1;
156 k_params.h_view(i,j,k).c2 = params[m].c2;
157 k_params.h_view(i,j,k).c3 = params[m].c3;
158 k_params.h_view(i,j,k).c4 = params[m].c4;
159 k_params.h_view(i,j,k).Z_i = params[m].Z_i;
160 k_params.h_view(i,j,k).Z_j = params[m].Z_j;
161 k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut;
162 k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale;
163 }
164
165 k_params.template modify<LMPHostType>();
166
167 }
168
169 /* ---------------------------------------------------------------------- */
170
171 template<class DeviceType>
compute(int eflag_in,int vflag_in)172 void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
173 {
174 eflag = eflag_in;
175 vflag = vflag_in;
176
177 if (neighflag == FULL) no_virial_fdotr_compute = 1;
178
179 ev_init(eflag,vflag,0);
180
181 // reallocate per-atom arrays if necessary
182
183 if (eflag_atom) {
184 memoryKK->destroy_kokkos(k_eatom,eatom);
185 memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
186 d_eatom = k_eatom.view<DeviceType>();
187 }
188 if (vflag_atom) {
189 memoryKK->destroy_kokkos(k_vatom,vatom);
190 memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
191 d_vatom = k_vatom.view<DeviceType>();
192 }
193
194 atomKK->sync(execution_space,datamask_read);
195 k_params.template sync<DeviceType>();
196 if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
197 else atomKK->modified(execution_space,F_MASK);
198
199 x = atomKK->k_x.view<DeviceType>();
200 f = atomKK->k_f.view<DeviceType>();
201 type = atomKK->k_type.view<DeviceType>();
202 tag = atomKK->k_tag.view<DeviceType>();
203 nlocal = atom->nlocal;
204 nall = atom->nlocal + atom->nghost;
205 newton_pair = force->newton_pair;
206
207 inum = list->inum;
208 const int ignum = inum + list->gnum;
209 NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
210 d_numneigh = k_list->d_numneigh;
211 d_neighbors = k_list->d_neighbors;
212 d_ilist = k_list->d_ilist;
213
214 need_dup = lmp->kokkos->need_dup<DeviceType>();
215 if (need_dup) {
216 dup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
217 dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
218 dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
219 } else {
220 ndup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
221 ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
222 ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
223 }
224
225 copymode = 1;
226
227 EV_FLOAT ev;
228 EV_FLOAT ev_all;
229
230 // build short neighbor list
231
232 int max_neighs = d_neighbors.extent(1);
233
234 if (((int)d_neighbors_short.extent(1) != max_neighs) ||
235 ((int)d_neighbors_short.extent(0) != ignum)) {
236 d_neighbors_short = Kokkos::View<int**,DeviceType>("Tersoff::neighbors_short",ignum,max_neighs);
237 }
238 if ((int)d_numneigh_short.extent(0)!=ignum)
239 d_numneigh_short = Kokkos::View<int*,DeviceType>("Tersoff::numneighs_short",ignum);
240 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairTersoffZBLComputeShortNeigh>(0,neighflag==FULL?ignum:inum), *this);
241
242 if (neighflag == HALF) {
243 if (evflag)
244 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,1> >(0,inum),*this,ev);
245 else
246 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALF,0> >(0,inum),*this);
247 ev_all += ev;
248 } else if (neighflag == HALFTHREAD) {
249 if (evflag)
250 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,1> >(0,inum),*this,ev);
251 else
252 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeHalf<HALFTHREAD,0> >(0,inum),*this);
253 ev_all += ev;
254 } else if (neighflag == FULL) {
255 if (evflag)
256 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,1> >(0,inum),*this,ev);
257 else
258 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullA<FULL,0> >(0,inum),*this);
259 ev_all += ev;
260
261 if (evflag)
262 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,1> >(0,ignum),*this,ev);
263 else
264 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairTersoffZBLComputeFullB<FULL,0> >(0,ignum),*this);
265 ev_all += ev;
266 }
267
268 if (need_dup)
269 Kokkos::Experimental::contribute(f, dup_f);
270
271 if (eflag_global) eng_vdwl += ev_all.evdwl;
272 if (vflag_global) {
273 virial[0] += ev_all.v[0];
274 virial[1] += ev_all.v[1];
275 virial[2] += ev_all.v[2];
276 virial[3] += ev_all.v[3];
277 virial[4] += ev_all.v[4];
278 virial[5] += ev_all.v[5];
279 }
280
281 if (eflag_atom) {
282 if (need_dup)
283 Kokkos::Experimental::contribute(d_eatom, dup_eatom);
284 k_eatom.template modify<DeviceType>();
285 k_eatom.template sync<LMPHostType>();
286 }
287
288 if (vflag_atom) {
289 if (need_dup)
290 Kokkos::Experimental::contribute(d_vatom, dup_vatom);
291 k_vatom.template modify<DeviceType>();
292 k_vatom.template sync<LMPHostType>();
293 }
294
295 if (vflag_fdotr) pair_virial_fdotr_compute(this);
296
297 copymode = 0;
298
299 // free duplicated memory
300 if (need_dup) {
301 dup_f = decltype(dup_f)();
302 dup_eatom = decltype(dup_eatom)();
303 dup_vatom = decltype(dup_vatom)();
304 }
305 }
306
307 /* ---------------------------------------------------------------------- */
308
309 template<class DeviceType>
310 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffZBLComputeShortNeigh,const int & ii) const311 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeShortNeigh, const int& ii) const {
312 const int i = d_ilist[ii];
313 const X_FLOAT xtmp = x(i,0);
314 const X_FLOAT ytmp = x(i,1);
315 const X_FLOAT ztmp = x(i,2);
316
317 const int jnum = d_numneigh[i];
318 int inside = 0;
319 for (int jj = 0; jj < jnum; jj++) {
320 int j = d_neighbors(i,jj);
321 j &= NEIGHMASK;
322
323 const X_FLOAT delx = xtmp - x(j,0);
324 const X_FLOAT dely = ytmp - x(j,1);
325 const X_FLOAT delz = ztmp - x(j,2);
326 const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
327
328 if (rsq < cutmax*cutmax) {
329 d_neighbors_short(i,inside) = j;
330 inside++;
331 }
332 }
333 d_numneigh_short(i) = inside;
334 }
335
336 /* ---------------------------------------------------------------------- */
337
338 template<class DeviceType>
339 template<int NEIGHFLAG, int EVFLAG>
340 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const341 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
342
343 // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
344
345 auto v_f = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
346 auto a_f = v_f.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
347
348 const int i = d_ilist[ii];
349 if (i >= nlocal) return;
350 const X_FLOAT xtmp = x(i,0);
351 const X_FLOAT ytmp = x(i,1);
352 const X_FLOAT ztmp = x(i,2);
353 const int itype = type(i);
354 const tagint itag = tag(i);
355
356 F_FLOAT fi[3], fj[3], fk[3];
357
358 //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
359 const int jnum = d_numneigh_short[i];
360
361 // repulsive
362
363 F_FLOAT f_x = 0.0;
364 F_FLOAT f_y = 0.0;
365 F_FLOAT f_z = 0.0;
366
367 for (int jj = 0; jj < jnum; jj++) {
368 int j = d_neighbors_short(i,jj);
369 j &= NEIGHMASK;
370 const int jtype = type(j);
371 const tagint jtag = tag(j);
372
373 if (itag > jtag) {
374 if ((itag+jtag) % 2 == 0) continue;
375 } else if (itag < jtag) {
376 if ((itag+jtag) % 2 == 1) continue;
377 } else {
378 if (x(j,2) < ztmp) continue;
379 if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
380 if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
381 }
382
383 const X_FLOAT delx = xtmp - x(j,0);
384 const X_FLOAT dely = ytmp - x(j,1);
385 const X_FLOAT delz = ztmp - x(j,2);
386 const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
387 const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
388
389 if (rsq > cutsq) continue;
390
391 // Tersoff repulsive portion
392
393 const F_FLOAT r = sqrt(rsq);
394 const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
395 const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
396 const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
397 const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp *
398 (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1);
399 const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
400
401 // ZBL repulsive portion
402
403 const F_FLOAT esq = pow(global_e,2.0);
404 const F_FLOAT a_ij = (0.8854*global_a_0) /
405 (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23));
406 const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/
407 (4.0*MY_PI*global_epsilon_0);
408 const F_FLOAT r_ov_a = r/a_ij;
409 const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) +
410 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a);
411 const F_FLOAT dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) -
412 0.9423*0.5099*exp(-0.9423*r_ov_a) -
413 0.4029*0.2802*exp(-0.4029*r_ov_a) -
414 0.2016*0.02817*exp(-0.2016*r_ov_a));
415 const F_FLOAT frep_z = premult*-phi/rsq + premult*dphi/r;
416 const F_FLOAT eng_z = premult*(1.0/r)*phi;
417
418 // combine two parts with smoothing by Fermi-like function
419
420 F_FLOAT frep, eng;
421 frep = -(-fermi_d_k(itype,jtype,jtype,r) * eng_z +
422 (1.0 - fermi_k(itype,jtype,jtype,r))*frep_z +
423 fermi_d_k(itype,jtype,jtype,r)*eng_t + fermi_k(itype,jtype,jtype,r)*frep_t) / r;
424
425 if (eflag)
426 eng = (1.0 - fermi_k(itype,jtype,jtype,r)) * eng_z +
427 fermi_k(itype,jtype,jtype,r) * eng_t;
428
429 f_x += delx*frep;
430 f_y += dely*frep;
431 f_z += delz*frep;
432 a_f(j,0) -= delx*frep;
433 a_f(j,1) -= dely*frep;
434 a_f(j,2) -= delz*frep;
435
436 if (EVFLAG) {
437 if (eflag) ev.evdwl += eng;
438 if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
439 }
440 }
441
442 // attractive: bond order
443
444 for (int jj = 0; jj < jnum; jj++) {
445 int j = d_neighbors_short(i,jj);
446 j &= NEIGHMASK;
447 const int jtype = type(j);
448
449 const F_FLOAT delx1 = xtmp - x(j,0);
450 const F_FLOAT dely1 = ytmp - x(j,1);
451 const F_FLOAT delz1 = ztmp - x(j,2);
452 const F_FLOAT rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
453 const F_FLOAT cutsq1 = paramskk(itype,jtype,jtype).cutsq;
454
455 F_FLOAT bo_ij = 0.0;
456 if (rsq1 > cutsq1) continue;
457 const F_FLOAT rij = sqrt(rsq1);
458
459 for (int kk = 0; kk < jnum; kk++) {
460 if (jj == kk) continue;
461 int k = d_neighbors_short(i,kk);
462 k &= NEIGHMASK;
463 const int ktype = type(k);
464
465 const F_FLOAT delx2 = xtmp - x(k,0);
466 const F_FLOAT dely2 = ytmp - x(k,1);
467 const F_FLOAT delz2 = ztmp - x(k,2);
468 const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
469 const F_FLOAT cutsq2 = paramskk(itype,jtype,ktype).cutsq;
470
471 if (rsq2 > cutsq2) continue;
472 const F_FLOAT rik = sqrt(rsq2);
473 bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
474 }
475
476 // attractive: pairwise potential and force
477
478 const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
479 const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
480 const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
481 const F_FLOAT fatt = -0.5*bij * dfa / rij;
482 const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
483
484 f_x += delx1*fatt;
485 f_y += dely1*fatt;
486 f_z += delz1*fatt;
487 F_FLOAT fj_x = -delx1*fatt;
488 F_FLOAT fj_y = -dely1*fatt;
489 F_FLOAT fj_z = -delz1*fatt;
490
491 if (EVFLAG) {
492 const F_FLOAT eng = 0.5*bij * fa;
493 if (eflag) ev.evdwl += eng;
494 if (vflag_either || eflag_atom)
495 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
496 }
497
498 // attractive: three-body force
499
500 for (int kk = 0; kk < jnum; kk++) {
501 if (jj == kk) continue;
502 int k = d_neighbors_short(i,kk);
503 k &= NEIGHMASK;
504 const int ktype = type(k);
505
506 const F_FLOAT delx2 = xtmp - x(k,0);
507 const F_FLOAT dely2 = ytmp - x(k,1);
508 const F_FLOAT delz2 = ztmp - x(k,2);
509 const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
510 const F_FLOAT cutsq2 = paramskk(itype,jtype,ktype).cutsq;
511
512 if (rsq2 > cutsq2) continue;
513 const F_FLOAT rik = sqrt(rsq2);
514 ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
515 rik,delx2,dely2,delz2,fi,fj,fk);
516
517 f_x += fi[0];
518 f_y += fi[1];
519 f_z += fi[2];
520 fj_x += fj[0];
521 fj_y += fj[1];
522 fj_z += fj[2];
523 a_f(k,0) += fk[0];
524 a_f(k,1) += fk[1];
525 a_f(k,2) += fk[2];
526
527 if (vflag_atom) {
528 F_FLOAT delrij[3], delrik[3];
529 delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
530 delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
531 if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
532 }
533 }
534 a_f(j,0) += fj_x;
535 a_f(j,1) += fj_y;
536 a_f(j,2) += fj_z;
537 }
538 a_f(i,0) += f_x;
539 a_f(i,1) += f_y;
540 a_f(i,2) += f_z;
541 }
542
543 template<class DeviceType>
544 template<int NEIGHFLAG, int EVFLAG>
545 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>,const int & ii) const546 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>, const int &ii) const {
547 EV_FLOAT ev;
548 this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffZBLComputeHalf<NEIGHFLAG,EVFLAG>(), ii, ev);
549 }
550
551 /* ---------------------------------------------------------------------- */
552
553 template<class DeviceType>
554 template<int NEIGHFLAG, int EVFLAG>
555 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const556 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
557
558 const int i = d_ilist[ii];
559 const X_FLOAT xtmp = x(i,0);
560 const X_FLOAT ytmp = x(i,1);
561 const X_FLOAT ztmp = x(i,2);
562 const int itype = type(i);
563
564 int j,k,jj,kk,jtype,ktype;
565 F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
566 F_FLOAT fi[3], fj[3], fk[3];
567 X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
568
569 //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
570 const int jnum = d_numneigh[i];
571
572 // repulsive
573
574 F_FLOAT f_x = 0.0;
575 F_FLOAT f_y = 0.0;
576 F_FLOAT f_z = 0.0;
577 for (jj = 0; jj < jnum; jj++) {
578 j = d_neighbors_short(i,jj);
579 j &= NEIGHMASK;
580 const int jtype = type(j);
581
582 const X_FLOAT delx = xtmp - x(j,0);
583 const X_FLOAT dely = ytmp - x(j,1);
584 const X_FLOAT delz = ztmp - x(j,2);
585 const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
586 const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq;
587
588 if (rsq > cutsq) continue;
589
590 // Tersoff repulsive portion
591
592 const F_FLOAT r = sqrt(rsq);
593 const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r);
594 const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r);
595 const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r);
596 const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp *
597 (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1);
598 const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp;
599
600 // ZBL repulsive portion
601
602 const F_FLOAT esq = pow(global_e,2.0);
603 const F_FLOAT a_ij = (0.8854*global_a_0) /
604 (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23));
605 const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/
606 (4.0*MY_PI*global_epsilon_0);
607 const F_FLOAT r_ov_a = r/a_ij;
608 const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) +
609 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a);
610 const F_FLOAT dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) -
611 0.9423*0.5099*exp(-0.9423*r_ov_a) -
612 0.4029*0.2802*exp(-0.4029*r_ov_a) -
613 0.2016*0.02817*exp(-0.2016*r_ov_a));
614 const F_FLOAT frep_z = premult*-phi/rsq + premult*dphi/r;
615 const F_FLOAT eng_z = premult*(1.0/r)*phi;
616
617 // combine two parts with smoothing by Fermi-like function
618
619 F_FLOAT frep, eng;
620 frep = -(-fermi_d_k(itype,jtype,jtype,r) * eng_z +
621 (1.0 - fermi_k(itype,jtype,jtype,r))*frep_z +
622 fermi_d_k(itype,jtype,jtype,r)*eng_t + fermi_k(itype,jtype,jtype,r)*frep_t) / r;
623
624 if (eflag)
625 eng = (1.0 - fermi_k(itype,jtype,jtype,r)) * eng_z +
626 fermi_k(itype,jtype,jtype,r) * eng_t;
627
628 f_x += delx*frep;
629 f_y += dely*frep;
630 f_z += delz*frep;
631
632 if (EVFLAG) {
633 if (eflag)
634 ev.evdwl += 0.5*eng;
635 if (vflag_either || eflag_atom)
636 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,frep,delx,dely,delz);
637 }
638 }
639
640 // attractive: bond order
641
642 for (jj = 0; jj < jnum; jj++) {
643 j = d_neighbors_short(i,jj);
644 j &= NEIGHMASK;
645 jtype = type(j);
646
647 delx1 = xtmp - x(j,0);
648 dely1 = ytmp - x(j,1);
649 delz1 = ztmp - x(j,2);
650 rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
651 cutsq1 = paramskk(itype,jtype,jtype).cutsq;
652
653 bo_ij = 0.0;
654 if (rsq1 > cutsq1) continue;
655 rij = sqrt(rsq1);
656
657 for (kk = 0; kk < jnum; kk++) {
658 if (jj == kk) continue;
659 k = d_neighbors_short(i,kk);
660 k &= NEIGHMASK;
661 ktype = type(k);
662
663 delx2 = xtmp - x(k,0);
664 dely2 = ytmp - x(k,1);
665 delz2 = ztmp - x(k,2);
666 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
667 cutsq2 = paramskk(itype,jtype,ktype).cutsq;
668
669 if (rsq2 > cutsq2) continue;
670 rik = sqrt(rsq2);
671 bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
672 }
673
674 // attractive: pairwise potential and force
675
676 const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij);
677 const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij);
678 const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij);
679 const F_FLOAT fatt = -0.5*bij * dfa / rij;
680 const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij);
681 const F_FLOAT eng = 0.5*bij * fa;
682
683 f_x += delx1*fatt;
684 f_y += dely1*fatt;
685 f_z += delz1*fatt;
686
687 if (EVFLAG) {
688 if (eflag) ev.evdwl += 0.5*eng;
689 if (vflag_either || eflag_atom)
690 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
691 }
692
693 // attractive: three-body force
694
695 for (kk = 0; kk < jnum; kk++) {
696 if (jj == kk) continue;
697 k = d_neighbors_short(i,kk);
698 k &= NEIGHMASK;
699 ktype = type(k);
700
701 delx2 = xtmp - x(k,0);
702 dely2 = ytmp - x(k,1);
703 delz2 = ztmp - x(k,2);
704 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
705 cutsq2 = paramskk(itype,jtype,ktype).cutsq;
706
707 if (rsq2 > cutsq2) continue;
708 rik = sqrt(rsq2);
709 ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1,
710 rik,delx2,dely2,delz2,fi,fj,fk);
711
712 f_x += fi[0];
713 f_y += fi[1];
714 f_z += fi[2];
715
716 if (vflag_atom) {
717 F_FLOAT delrij[3], delrik[3];
718 delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1;
719 delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2;
720 if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj,fk,delrij,delrik);
721 }
722 }
723 }
724 f(i,0) += f_x;
725 f(i,1) += f_y;
726 f(i,2) += f_z;
727 }
728
729 template<class DeviceType>
730 template<int NEIGHFLAG, int EVFLAG>
731 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>,const int & ii) const732 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>, const int &ii) const {
733 EV_FLOAT ev;
734 this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffZBLComputeFullA<NEIGHFLAG,EVFLAG>(), ii, ev);
735 }
736
737 /* ---------------------------------------------------------------------- */
738
739 template<class DeviceType>
740 template<int NEIGHFLAG, int EVFLAG>
741 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>,const int & ii,EV_FLOAT & ev) const742 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
743
744 const int i = d_ilist[ii];
745 const X_FLOAT xtmp = x(i,0);
746 const X_FLOAT ytmp = x(i,1);
747 const X_FLOAT ztmp = x(i,2);
748 const int itype = type(i);
749
750 int j,k,jj,kk,jtype,ktype,j_jnum;
751 F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij;
752 F_FLOAT fj[3], fk[3];
753 X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2;
754
755 const int jnum = d_numneigh_short[i];
756
757 F_FLOAT f_x = 0.0;
758 F_FLOAT f_y = 0.0;
759 F_FLOAT f_z = 0.0;
760
761 // attractive: bond order
762
763 for (jj = 0; jj < jnum; jj++) {
764 j = d_neighbors_short(i,jj);
765 j &= NEIGHMASK;
766 if (j >= nlocal) continue;
767 jtype = type(j);
768
769 delx1 = x(j,0) - xtmp;
770 dely1 = x(j,1) - ytmp;
771 delz1 = x(j,2) - ztmp;
772 rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
773 cutsq1 = paramskk(jtype,itype,itype).cutsq;
774
775 bo_ij = 0.0;
776 if (rsq1 > cutsq1) continue;
777 rij = sqrt(rsq1);
778
779 j_jnum = d_numneigh_short[j];
780
781 for (kk = 0; kk < j_jnum; kk++) {
782 k = d_neighbors_short(j,kk);
783 if (k == i) continue;
784 k &= NEIGHMASK;
785 ktype = type(k);
786
787 delx2 = x(j,0) - x(k,0);
788 dely2 = x(j,1) - x(k,1);
789 delz2 = x(j,2) - x(k,2);
790 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
791 cutsq2 = paramskk(jtype,itype,ktype).cutsq;
792
793 if (rsq2 > cutsq2) continue;
794 rik = sqrt(rsq2);
795 bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2);
796
797 }
798
799 // attractive: pairwise potential and force
800
801 const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij);
802 const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij);
803 const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij);
804 const F_FLOAT fatt = -0.5*bij * dfa / rij;
805 const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij);
806 const F_FLOAT eng = 0.5*bij * fa;
807
808 f_x -= delx1*fatt;
809 f_y -= dely1*fatt;
810 f_z -= delz1*fatt;
811
812 if (EVFLAG) {
813 if (eflag)
814 ev.evdwl += 0.5 * eng;
815 if (vflag_either || eflag_atom)
816 this->template ev_tally<NEIGHFLAG>(ev,i,j,eng,fatt,delx1,dely1,delz1);
817 }
818
819 // attractive: three-body force
820
821 for (kk = 0; kk < j_jnum; kk++) {
822 k = d_neighbors_short(j,kk);
823 if (k == i) continue;
824 k &= NEIGHMASK;
825 ktype = type(k);
826
827 delx2 = x(j,0) - x(k,0);
828 dely2 = x(j,1) - x(k,1);
829 delz2 = x(j,2) - x(k,2);
830 rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
831 cutsq2 = paramskk(jtype,itype,ktype).cutsq;
832
833 if (rsq2 > cutsq2) continue;
834 rik = sqrt(rsq2);
835 ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1,
836 rik,delx2,dely2,delz2,fj,fk);
837 f_x += fj[0];
838 f_y += fj[1];
839 f_z += fj[2];
840
841 if (vflag_atom) {
842 F_FLOAT delrji[3], delrjk[3];
843 delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1;
844 delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2;
845 if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk);
846 }
847
848 const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik);
849 const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij);
850 ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2,
851 rij,delx1,dely1,delz1,fk);
852 f_x += fk[0];
853 f_y += fk[1];
854 f_z += fk[2];
855 }
856 }
857 f(i,0) += f_x;
858 f(i,1) += f_y;
859 f(i,2) += f_z;
860 }
861
862 template<class DeviceType>
863 template<int NEIGHFLAG, int EVFLAG>
864 KOKKOS_INLINE_FUNCTION
operator ()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>,const int & ii) const865 void PairTersoffZBLKokkos<DeviceType>::operator()(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>, const int &ii) const {
866 EV_FLOAT ev;
867 this->template operator()<NEIGHFLAG,EVFLAG>(TagPairTersoffZBLComputeFullB<NEIGHFLAG,EVFLAG>(), ii, ev);
868 }
869
870 /* ---------------------------------------------------------------------- */
871
872 template<class DeviceType>
873 KOKKOS_INLINE_FUNCTION
ters_fc_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const874 double PairTersoffZBLKokkos<DeviceType>::ters_fc_k(const int &i, const int &j,
875 const int &k, const F_FLOAT &r) const
876 {
877 const F_FLOAT ters_R = paramskk(i,j,k).bigr;
878 const F_FLOAT ters_D = paramskk(i,j,k).bigd;
879
880 if (r < ters_R-ters_D) return 1.0;
881 if (r > ters_R+ters_D) return 0.0;
882 return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D));
883 }
884
885 /* ---------------------------------------------------------------------- */
886
887 template<class DeviceType>
888 KOKKOS_INLINE_FUNCTION
ters_dfc(const int & i,const int & j,const int & k,const F_FLOAT & r) const889 double PairTersoffZBLKokkos<DeviceType>::ters_dfc(const int &i, const int &j,
890 const int &k, const F_FLOAT &r) const
891 {
892 const F_FLOAT ters_R = paramskk(i,j,k).bigr;
893 const F_FLOAT ters_D = paramskk(i,j,k).bigd;
894
895 if (r < ters_R-ters_D) return 0.0;
896 if (r > ters_R+ters_D) return 0.0;
897 return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D);
898 }
899
900 /* ---------------------------------------------------------------------- */
901
902 template<class DeviceType>
903 KOKKOS_INLINE_FUNCTION
bondorder(const int & i,const int & j,const int & k,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2) const904 double PairTersoffZBLKokkos<DeviceType>::bondorder(const int &i, const int &j, const int &k,
905 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
906 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const
907 {
908 F_FLOAT arg, ex_delr;
909
910 const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik);
911
912 if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
913 else arg = paramskk(i,j,k).lam3 * (rij-rik);
914
915 if (arg > 69.0776) ex_delr = 1.e30;
916 else if (arg < -69.0776) ex_delr = 0.0;
917 else ex_delr = exp(arg);
918
919 return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr;
920 }
921
922 /* ---------------------------------------------------------------------- */
923
924 template<class DeviceType>
925 KOKKOS_INLINE_FUNCTION
926 double PairTersoffZBLKokkos<DeviceType>::
ters_gijk(const int & i,const int & j,const int & k,const F_FLOAT & cos) const927 ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
928 {
929 const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
930 const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
931 const F_FLOAT hcth = paramskk(i,j,k).h - cos;
932
933 return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth));
934 }
935
936 /* ---------------------------------------------------------------------- */
937
938 template<class DeviceType>
939 KOKKOS_INLINE_FUNCTION
940 double PairTersoffZBLKokkos<DeviceType>::
ters_dgijk(const int & i,const int & j,const int & k,const F_FLOAT & cos) const941 ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const
942 {
943
944 const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c;
945 const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d;
946 const F_FLOAT hcth = paramskk(i,j,k).h - cos;
947 const F_FLOAT numerator = -2.0 * ters_c * hcth;
948 const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth);
949 return paramskk(i,j,k).gamma * numerator * denominator * denominator;
950 }
951
952 /* ---------------------------------------------------------------------- */
953
954 template<class DeviceType>
955 KOKKOS_INLINE_FUNCTION
ters_fa_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const956 double PairTersoffZBLKokkos<DeviceType>::ters_fa_k(const int &i, const int &j,
957 const int &k, const F_FLOAT &r) const
958 {
959 if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
960 return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r)
961 * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r);
962 }
963
964 /* ---------------------------------------------------------------------- */
965
966 template<class DeviceType>
967 KOKKOS_INLINE_FUNCTION
ters_dfa(const int & i,const int & j,const int & k,const F_FLOAT & r) const968 double PairTersoffZBLKokkos<DeviceType>::ters_dfa(const int &i, const int &j,
969 const int &k, const F_FLOAT &r) const
970 {
971 if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0;
972 return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) *
973 (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r) -
974 ters_dfc(i,j,k,r) * fermi_k(i,j,k,r) - ters_fc_k(i,j,k,r) *
975 fermi_d_k(i,j,k,r));
976 }
977
978 /* ---------------------------------------------------------------------- */
979
980 template<class DeviceType>
981 KOKKOS_INLINE_FUNCTION
ters_bij_k(const int & i,const int & j,const int & k,const F_FLOAT & bo) const982 double PairTersoffZBLKokkos<DeviceType>::ters_bij_k(const int &i, const int &j,
983 const int &k, const F_FLOAT &bo) const
984 {
985 const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
986 if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp);
987 if (tmp > paramskk(i,j,k).c2)
988 return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp);
989 if (tmp < paramskk(i,j,k).c4) return 1.0;
990 if (tmp < paramskk(i,j,k).c3)
991 return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern);
992 return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern));
993 }
994
995 /* ---------------------------------------------------------------------- */
996
997 template<class DeviceType>
998 KOKKOS_INLINE_FUNCTION
ters_dbij(const int & i,const int & j,const int & k,const F_FLOAT & bo) const999 double PairTersoffZBLKokkos<DeviceType>::ters_dbij(const int &i, const int &j,
1000 const int &k, const F_FLOAT &bo) const
1001 {
1002 const F_FLOAT tmp = paramskk(i,j,k).beta * bo;
1003 if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5*pow(tmp,-1.5);
1004 if (tmp > paramskk(i,j,k).c2)
1005 return paramskk(i,j,k).beta * (-0.5*pow(tmp,-1.5) *
1006 (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) *
1007 pow(tmp,-paramskk(i,j,k).powern)));
1008 if (tmp < paramskk(i,j,k).c4) return 0.0;
1009 if (tmp < paramskk(i,j,k).c3)
1010 return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0);
1011
1012 const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern);
1013 return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo;
1014 }
1015
1016 /* ---------------------------------------------------------------------- */
1017
1018 template<class DeviceType>
1019 KOKKOS_INLINE_FUNCTION
ters_dthb(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fi,F_FLOAT * fj,F_FLOAT * fk) const1020 void PairTersoffZBLKokkos<DeviceType>::ters_dthb(
1021 const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
1022 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
1023 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
1024 F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const
1025 {
1026 // from PairTersoffZBL::attractive
1027 F_FLOAT rij_hat[3],rik_hat[3];
1028 F_FLOAT rijinv,rikinv;
1029 F_FLOAT delrij[3], delrik[3];
1030
1031 delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
1032 delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
1033
1034 //rij = sqrt(rsq1);
1035 rijinv = 1.0/rij;
1036 vec3_scale(rijinv,delrij,rij_hat);
1037
1038 //rik = sqrt(rsq2);
1039 rikinv = 1.0/rik;
1040 vec3_scale(rikinv,delrik,rik_hat);
1041
1042 // from PairTersoffZBL::ters_zetaterm_d
1043 F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
1044 F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
1045
1046 fc = ters_fc_k(i,j,k,rik);
1047 dfc = ters_dfc(i,j,k,rik);
1048 if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
1049 else tmp = paramskk(i,j,k).lam3 * (rij-rik);
1050
1051 if (tmp > 69.0776) ex_delr = 1.e30;
1052 else if (tmp < -69.0776) ex_delr = 0.0;
1053 else ex_delr = exp(tmp);
1054
1055 if (int(paramskk(i,j,k).powerm) == 3)
1056 dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
1057 else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
1058
1059 cos = vec3_dot(rij_hat,rik_hat);
1060 gijk = ters_gijk(i,j,k,cos);
1061 dgijk = ters_dgijk(i,j,k,cos);
1062
1063 // from PairTersoffZBL::costheta_d
1064 vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
1065 vec3_scale(rijinv,dcosfj,dcosfj);
1066 vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
1067 vec3_scale(rikinv,dcosfk,dcosfk);
1068 vec3_add(dcosfj,dcosfk,dcosfi);
1069 vec3_scale(-1.0,dcosfi,dcosfi);
1070
1071 vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi);
1072 vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi);
1073 vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi);
1074 vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi);
1075 vec3_scale(prefactor,fi,fi);
1076
1077 vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
1078 vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
1079 vec3_scale(prefactor,fj,fj);
1080
1081 vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1082 vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1083 vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1084 vec3_scale(prefactor,fk,fk);
1085
1086 }
1087
1088 /* ---------------------------------------------------------------------- */
1089
1090 template<class DeviceType>
1091 KOKKOS_INLINE_FUNCTION
ters_dthbj(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fj,F_FLOAT * fk) const1092 void PairTersoffZBLKokkos<DeviceType>::ters_dthbj(
1093 const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
1094 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
1095 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
1096 F_FLOAT *fj, F_FLOAT *fk) const
1097 {
1098 F_FLOAT rij_hat[3],rik_hat[3];
1099 F_FLOAT rijinv,rikinv;
1100 F_FLOAT delrij[3], delrik[3];
1101
1102 delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
1103 delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
1104
1105 rijinv = 1.0/rij;
1106 vec3_scale(rijinv,delrij,rij_hat);
1107
1108 rikinv = 1.0/rik;
1109 vec3_scale(rikinv,delrik,rik_hat);
1110
1111 F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
1112 F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
1113
1114 fc = ters_fc_k(i,j,k,rik);
1115 dfc = ters_dfc(i,j,k,rik);
1116 if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
1117 else tmp = paramskk(i,j,k).lam3 * (rij-rik);
1118
1119 if (tmp > 69.0776) ex_delr = 1.e30;
1120 else if (tmp < -69.0776) ex_delr = 0.0;
1121 else ex_delr = exp(tmp);
1122
1123 if (int(paramskk(i,j,k).powerm) == 3)
1124 dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
1125 else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
1126
1127 cos = vec3_dot(rij_hat,rik_hat);
1128 gijk = ters_gijk(i,j,k,cos);
1129 dgijk = ters_dgijk(i,j,k,cos);
1130
1131 vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
1132 vec3_scale(rijinv,dcosfj,dcosfj);
1133 vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
1134 vec3_scale(rikinv,dcosfk,dcosfk);
1135 vec3_add(dcosfj,dcosfk,dcosfi);
1136 vec3_scale(-1.0,dcosfi,dcosfi);
1137
1138 vec3_scale(fc*dgijk*ex_delr,dcosfj,fj);
1139 vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj);
1140 vec3_scale(prefactor,fj,fj);
1141
1142 vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1143 vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1144 vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1145 vec3_scale(prefactor,fk,fk);
1146
1147 }
1148
1149 /* ---------------------------------------------------------------------- */
1150
1151 template<class DeviceType>
1152 KOKKOS_INLINE_FUNCTION
ters_dthbk(const int & i,const int & j,const int & k,const F_FLOAT & prefactor,const F_FLOAT & rij,const F_FLOAT & dx1,const F_FLOAT & dy1,const F_FLOAT & dz1,const F_FLOAT & rik,const F_FLOAT & dx2,const F_FLOAT & dy2,const F_FLOAT & dz2,F_FLOAT * fk) const1153 void PairTersoffZBLKokkos<DeviceType>::ters_dthbk(
1154 const int &i, const int &j, const int &k, const F_FLOAT &prefactor,
1155 const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1,
1156 const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2,
1157 F_FLOAT *fk) const
1158 {
1159 F_FLOAT rij_hat[3],rik_hat[3];
1160 F_FLOAT rijinv,rikinv;
1161 F_FLOAT delrij[3], delrik[3];
1162
1163 delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1;
1164 delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2;
1165
1166 rijinv = 1.0/rij;
1167 vec3_scale(rijinv,delrij,rij_hat);
1168
1169 rikinv = 1.0/rik;
1170 vec3_scale(rikinv,delrik,rik_hat);
1171
1172 F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp;
1173 F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3];
1174
1175 fc = ters_fc_k(i,j,k,rik);
1176 dfc = ters_dfc(i,j,k,rik);
1177 if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0);
1178 else tmp = paramskk(i,j,k).lam3 * (rij-rik);
1179
1180 if (tmp > 69.0776) ex_delr = 1.e30;
1181 else if (tmp < -69.0776) ex_delr = 0.0;
1182 else ex_delr = exp(tmp);
1183
1184 if (int(paramskk(i,j,k).powerm) == 3)
1185 dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
1186 else dex_delr = paramskk(i,j,k).lam3 * ex_delr;
1187
1188 cos = vec3_dot(rij_hat,rik_hat);
1189 gijk = ters_gijk(i,j,k,cos);
1190 dgijk = ters_dgijk(i,j,k,cos);
1191
1192 vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj);
1193 vec3_scale(rijinv,dcosfj,dcosfj);
1194 vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk);
1195 vec3_scale(rikinv,dcosfk,dcosfk);
1196 vec3_add(dcosfj,dcosfk,dcosfi);
1197 vec3_scale(-1.0,dcosfi,dcosfi);
1198
1199 vec3_scale(dfc*gijk*ex_delr,rik_hat,fk);
1200 vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk);
1201 vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk);
1202 vec3_scale(prefactor,fk,fk);
1203
1204 }
1205
1206 /* ---------------------------------------------------------------------- */
1207
1208 template<class DeviceType>
1209 KOKKOS_INLINE_FUNCTION
fermi_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const1210 double PairTersoffZBLKokkos<DeviceType>::fermi_k(const int &i, const int &j,
1211 const int &k, const F_FLOAT &r) const
1212 {
1213 return 1.0 / (1.0 + exp(-paramskk(i,j,k).ZBLexpscale *
1214 (r - paramskk(i,j,k).ZBLcut)));
1215 }
1216
1217 /* ---------------------------------------------------------------------- */
1218
1219 template<class DeviceType>
1220 KOKKOS_INLINE_FUNCTION
fermi_d_k(const int & i,const int & j,const int & k,const F_FLOAT & r) const1221 double PairTersoffZBLKokkos<DeviceType>::fermi_d_k(const int &i, const int &j,
1222 const int &k, const F_FLOAT &r) const
1223 {
1224 return paramskk(i,j,k).ZBLexpscale * exp(-paramskk(i,j,k).ZBLexpscale *
1225 (r - paramskk(i,j,k).ZBLcut)) /
1226 pow(1.0 + exp(-paramskk(i,j,k).ZBLexpscale *
1227 (r - paramskk(i,j,k).ZBLcut)),2.0);
1228 }
1229
1230 /* ---------------------------------------------------------------------- */
1231
1232 template<class DeviceType>
1233 template<int NEIGHFLAG>
1234 KOKKOS_INLINE_FUNCTION
ev_tally(EV_FLOAT & ev,const int & i,const int & j,const F_FLOAT & epair,const F_FLOAT & fpair,const F_FLOAT & delx,const F_FLOAT & dely,const F_FLOAT & delz) const1235 void PairTersoffZBLKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
1236 const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
1237 const F_FLOAT &dely, const F_FLOAT &delz) const
1238 {
1239 const int VFLAG = vflag_either;
1240
1241 // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
1242
1243 auto v_eatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
1244 auto a_eatom = v_eatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1245
1246 auto v_vatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
1247 auto a_vatom = v_vatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1248
1249 if (eflag_atom) {
1250 const E_FLOAT epairhalf = 0.5 * epair;
1251 a_eatom[i] += epairhalf;
1252 if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
1253 }
1254
1255 if (VFLAG) {
1256 const E_FLOAT v0 = delx*delx*fpair;
1257 const E_FLOAT v1 = dely*dely*fpair;
1258 const E_FLOAT v2 = delz*delz*fpair;
1259 const E_FLOAT v3 = delx*dely*fpair;
1260 const E_FLOAT v4 = delx*delz*fpair;
1261 const E_FLOAT v5 = dely*delz*fpair;
1262
1263 if (vflag_global) {
1264 if (NEIGHFLAG != FULL) {
1265 ev.v[0] += v0;
1266 ev.v[1] += v1;
1267 ev.v[2] += v2;
1268 ev.v[3] += v3;
1269 ev.v[4] += v4;
1270 ev.v[5] += v5;
1271 } else {
1272 ev.v[0] += 0.5*v0;
1273 ev.v[1] += 0.5*v1;
1274 ev.v[2] += 0.5*v2;
1275 ev.v[3] += 0.5*v3;
1276 ev.v[4] += 0.5*v4;
1277 ev.v[5] += 0.5*v5;
1278 }
1279 }
1280
1281 if (vflag_atom) {
1282 a_vatom(i,0) += 0.5*v0;
1283 a_vatom(i,1) += 0.5*v1;
1284 a_vatom(i,2) += 0.5*v2;
1285 a_vatom(i,3) += 0.5*v3;
1286 a_vatom(i,4) += 0.5*v4;
1287 a_vatom(i,5) += 0.5*v5;
1288
1289 if (NEIGHFLAG != FULL) {
1290 a_vatom(j,0) += 0.5*v0;
1291 a_vatom(j,1) += 0.5*v1;
1292 a_vatom(j,2) += 0.5*v2;
1293 a_vatom(j,3) += 0.5*v3;
1294 a_vatom(j,4) += 0.5*v4;
1295 a_vatom(j,5) += 0.5*v5;
1296 }
1297 }
1298 }
1299 }
1300
1301 /* ---------------------------------------------------------------------- */
1302
1303 template<class DeviceType>
1304 template<int NEIGHFLAG>
1305 KOKKOS_INLINE_FUNCTION
v_tally3(EV_FLOAT & ev,const int & i,const int & j,const int & k,F_FLOAT * fj,F_FLOAT * fk,F_FLOAT * drij,F_FLOAT * drik) const1306 void PairTersoffZBLKokkos<DeviceType>::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k,
1307 F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
1308 {
1309 // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
1310
1311 auto v_vatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
1312 auto a_vatom = v_vatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
1313
1314 F_FLOAT v[6];
1315
1316 v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]);
1317 v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]);
1318 v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]);
1319 v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]);
1320 v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]);
1321 v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]);
1322
1323 if (vflag_global) {
1324 ev.v[0] += v[0];
1325 ev.v[1] += v[1];
1326 ev.v[2] += v[2];
1327 ev.v[3] += v[3];
1328 ev.v[4] += v[4];
1329 ev.v[5] += v[5];
1330 }
1331
1332 if (vflag_atom) {
1333 a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
1334 a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
1335 if (NEIGHFLAG != FULL) {
1336 a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
1337 a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
1338 a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
1339 a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
1340 }
1341 }
1342
1343 }
1344
1345 /* ---------------------------------------------------------------------- */
1346
1347 template<class DeviceType>
1348 KOKKOS_INLINE_FUNCTION
v_tally3_atom(EV_FLOAT & ev,const int & i,const int &,const int &,F_FLOAT * fj,F_FLOAT * fk,F_FLOAT * drji,F_FLOAT * drjk) const1349 void PairTersoffZBLKokkos<DeviceType>::v_tally3_atom(EV_FLOAT &ev, const int &i, const int & /*j*/,
1350 const int & /*k*/, F_FLOAT *fj, F_FLOAT *fk,
1351 F_FLOAT *drji, F_FLOAT *drjk) const
1352 {
1353 F_FLOAT v[6];
1354
1355 v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]);
1356 v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]);
1357 v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]);
1358 v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]);
1359 v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]);
1360 v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]);
1361
1362 if (vflag_global) {
1363 ev.v[0] += v[0];
1364 ev.v[1] += v[1];
1365 ev.v[2] += v[2];
1366 ev.v[3] += v[3];
1367 ev.v[4] += v[4];
1368 ev.v[5] += v[5];
1369 }
1370
1371 if (vflag_atom) {
1372 d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2];
1373 d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5];
1374 }
1375 }
1376
1377 /* ---------------------------------------------------------------------- */
1378
1379 template<class DeviceType>
1380 KOKKOS_INLINE_FUNCTION
sbmask(const int & j) const1381 int PairTersoffZBLKokkos<DeviceType>::sbmask(const int& j) const {
1382 return j >> SBBITS & 3;
1383 }
1384
1385 namespace LAMMPS_NS {
1386 template class PairTersoffZBLKokkos<LMPDeviceType>;
1387 #ifdef LMP_KOKKOS_GPU
1388 template class PairTersoffZBLKokkos<LMPHostType>;
1389 #endif
1390 }
1391
1392