1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing authors: Stan Moore (SNL)
17 ------------------------------------------------------------------------- */
18 
19 #include "pair_eam_alloy_kokkos.h"
20 
21 #include "atom_kokkos.h"
22 #include "atom_masks.h"
23 #include "comm.h"
24 #include "error.h"
25 #include "force.h"
26 #include "kokkos.h"
27 #include "memory_kokkos.h"
28 #include "neigh_list_kokkos.h"
29 #include "neigh_request.h"
30 #include "neighbor.h"
31 #include "pair_kokkos.h"
32 #include "potential_file_reader.h"
33 
34 #include <cmath>
35 #include <cstring>
36 
37 using namespace LAMMPS_NS;
38 
39 // Cannot use virtual inheritance on the GPU, so must duplicate code
40 
41 /* ---------------------------------------------------------------------- */
42 
43 template<class DeviceType>
PairEAMAlloyKokkos(LAMMPS * lmp)44 PairEAMAlloyKokkos<DeviceType>::PairEAMAlloyKokkos(LAMMPS *lmp) : PairEAM(lmp)
45 {
46   respa_enable = 0;
47   single_enable = 0;
48   one_coeff = 1;
49 
50   kokkosable = 1;
51   atomKK = (AtomKokkos *) atom;
52   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
53   datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
54   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
55 }
56 
57 /* ---------------------------------------------------------------------- */
58 
59 template<class DeviceType>
~PairEAMAlloyKokkos()60 PairEAMAlloyKokkos<DeviceType>::~PairEAMAlloyKokkos()
61 {
62   if (!copymode) {
63     memoryKK->destroy_kokkos(k_eatom,eatom);
64     memoryKK->destroy_kokkos(k_vatom,vatom);
65   }
66 }
67 
68 /* ---------------------------------------------------------------------- */
69 
70 template<class DeviceType>
compute(int eflag_in,int vflag_in)71 void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
72 {
73   eflag = eflag_in;
74   vflag = vflag_in;
75 
76   if (neighflag == FULL) no_virial_fdotr_compute = 1;
77 
78   ev_init(eflag,vflag,0);
79 
80   // reallocate per-atom arrays if necessary
81 
82   if (eflag_atom) {
83     memoryKK->destroy_kokkos(k_eatom,eatom);
84     memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
85     d_eatom = k_eatom.view<DeviceType>();
86   }
87   if (vflag_atom) {
88     memoryKK->destroy_kokkos(k_vatom,vatom);
89     memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
90     d_vatom = k_vatom.view<DeviceType>();
91   }
92 
93   atomKK->sync(execution_space,datamask_read);
94   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
95   else atomKK->modified(execution_space,F_MASK);
96 
97   // grow energy and fp arrays if necessary
98   // need to be atom->nmax in length
99 
100   if (atom->nmax > nmax) {
101     nmax = atom->nmax;
102     k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax);
103     k_fp = DAT::tdual_ffloat_1d("pair:fp",nmax);
104     d_rho = k_rho.template view<DeviceType>();
105     d_fp = k_fp.template view<DeviceType>();
106     h_rho = k_rho.h_view;
107     h_fp = k_fp.h_view;
108   }
109 
110   x = atomKK->k_x.view<DeviceType>();
111   f = atomKK->k_f.view<DeviceType>();
112   type = atomKK->k_type.view<DeviceType>();
113   tag = atomKK->k_tag.view<DeviceType>();
114   nlocal = atom->nlocal;
115   nall = atom->nlocal + atom->nghost;
116   newton_pair = force->newton_pair;
117 
118   NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
119   d_numneigh = k_list->d_numneigh;
120   d_neighbors = k_list->d_neighbors;
121   d_ilist = k_list->d_ilist;
122   int inum = list->inum;
123 
124   need_dup = lmp->kokkos->need_dup<DeviceType>();
125   if (need_dup) {
126     dup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho);
127     dup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(f);
128     dup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
129     dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
130   } else {
131     ndup_rho   = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_rho);
132     ndup_f     = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(f);
133     ndup_eatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
134     ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
135   }
136 
137   copymode = 1;
138 
139   // zero out density
140 
141   if (newton_pair)
142     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nall),*this);
143   else
144     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyInitialize>(0,nlocal),*this);
145 
146   // loop over neighbors of my atoms
147 
148   EV_FLOAT ev;
149 
150   // compute kernel A
151 
152   if (neighflag == HALF || neighflag == HALFTHREAD) {
153 
154     if (neighflag == HALF) {
155       if (newton_pair) {
156         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALF,1> >(0,inum),*this);
157       } else {
158         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALF,0> >(0,inum),*this);
159       }
160     } else if (neighflag == HALFTHREAD) {
161       if (newton_pair) {
162         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALFTHREAD,1> >(0,inum),*this);
163       } else {
164         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelA<HALFTHREAD,0> >(0,inum),*this);
165       }
166     }
167 
168     if (need_dup)
169       Kokkos::Experimental::contribute(d_rho, dup_rho);
170 
171     // communicate and sum densities (on the host)
172 
173     if (newton_pair) {
174       k_rho.template modify<DeviceType>();
175       comm->reverse_comm_pair(this);
176       k_rho.template sync<DeviceType>();
177     }
178 
179     // compute kernel B
180 
181     if (eflag)
182       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<1> >(0,inum),*this,ev);
183     else
184       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelB<0> >(0,inum),*this);
185 
186   } else if (neighflag == FULL) {
187 
188     // compute kernel AB
189 
190     if (eflag)
191       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<1> >(0,inum),*this,ev);
192     else
193       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelAB<0> >(0,inum),*this);
194   }
195 
196   if (eflag) {
197     eng_vdwl += ev.evdwl;
198     ev.evdwl = 0.0;
199   }
200 
201   // communicate derivative of embedding function
202 
203   k_fp.template modify<DeviceType>();
204   comm->forward_comm_pair(this);
205   k_fp.template sync<DeviceType>();
206 
207   // compute kernel C
208 
209   if (evflag) {
210     if (neighflag == HALF) {
211       if (newton_pair) {
212         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,1,1> >(0,inum),*this,ev);
213       } else {
214         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,0,1> >(0,inum),*this,ev);
215       }
216     } else if (neighflag == HALFTHREAD) {
217       if (newton_pair) {
218         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,1,1> >(0,inum),*this,ev);
219       } else {
220         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,0,1> >(0,inum),*this,ev);
221       }
222     } else if (neighflag == FULL) {
223       if (newton_pair) {
224         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,1,1> >(0,inum),*this,ev);
225       } else {
226         Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,0,1> >(0,inum),*this,ev);
227       }
228     }
229   } else {
230     if (neighflag == HALF) {
231       if (newton_pair) {
232         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,1,0> >(0,inum),*this);
233       } else {
234         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALF,0,0> >(0,inum),*this);
235       }
236     } else if (neighflag == HALFTHREAD) {
237       if (newton_pair) {
238         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,1,0> >(0,inum),*this);
239       } else {
240         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<HALFTHREAD,0,0> >(0,inum),*this);
241       }
242     } else if (neighflag == FULL) {
243       if (newton_pair) {
244         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,1,0> >(0,inum),*this);
245       } else {
246         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyKernelC<FULL,0,0> >(0,inum),*this);
247       }
248     }
249   }
250 
251   if (need_dup)
252     Kokkos::Experimental::contribute(f, dup_f);
253 
254   if (eflag_global) eng_vdwl += ev.evdwl;
255   if (vflag_global) {
256     virial[0] += ev.v[0];
257     virial[1] += ev.v[1];
258     virial[2] += ev.v[2];
259     virial[3] += ev.v[3];
260     virial[4] += ev.v[4];
261     virial[5] += ev.v[5];
262   }
263 
264   if (vflag_fdotr) pair_virial_fdotr_compute(this);
265 
266   if (eflag_atom) {
267     if (need_dup)
268       Kokkos::Experimental::contribute(d_eatom, dup_eatom);
269     k_eatom.template modify<DeviceType>();
270     k_eatom.template sync<LMPHostType>();
271   }
272 
273   if (vflag_atom) {
274     if (need_dup)
275       Kokkos::Experimental::contribute(d_vatom, dup_vatom);
276     k_vatom.template modify<DeviceType>();
277     k_vatom.template sync<LMPHostType>();
278   }
279 
280   copymode = 0;
281 
282   // free duplicated memory
283   if (need_dup) {
284     dup_rho   = decltype(dup_rho)();
285     dup_f     = decltype(dup_f)();
286     dup_eatom = decltype(dup_eatom)();
287     dup_vatom = decltype(dup_vatom)();
288   }
289 }
290 
291 /* ----------------------------------------------------------------------
292    init specific to this pair style
293 ------------------------------------------------------------------------- */
294 
295 template<class DeviceType>
init_style()296 void PairEAMAlloyKokkos<DeviceType>::init_style()
297 {
298   // convert read-in file(s) to arrays and spline them
299 
300   PairEAM::init_style();
301 
302   // irequest = neigh request made by parent class
303 
304   neighflag = lmp->kokkos->neighflag;
305   int irequest = neighbor->nrequest - 1;
306 
307   neighbor->requests[irequest]->
308     kokkos_host = std::is_same<DeviceType,LMPHostType>::value &&
309     !std::is_same<DeviceType,LMPDeviceType>::value;
310   neighbor->requests[irequest]->
311     kokkos_device = std::is_same<DeviceType,LMPDeviceType>::value;
312 
313   if (neighflag == FULL) {
314     neighbor->requests[irequest]->full = 1;
315     neighbor->requests[irequest]->half = 0;
316   } else if (neighflag == HALF || neighflag == HALFTHREAD) {
317     neighbor->requests[irequest]->full = 0;
318     neighbor->requests[irequest]->half = 1;
319   } else {
320     error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
321   }
322 
323 }
324 
325 /* ----------------------------------------------------------------------
326    convert read-in funcfl potential(s) to standard array format
327    interpolate all file values to a single grid and cutoff
328 ------------------------------------------------------------------------- */
329 
330 template<class DeviceType>
file2array()331 void PairEAMAlloyKokkos<DeviceType>::file2array()
332 {
333   file2array_alloy();
334 
335   int i,j;
336   int n = atom->ntypes;
337 
338   auto k_type2frho = DAT::tdual_int_1d("pair:type2frho",n+1);
339   auto k_type2rhor = DAT::tdual_int_2d_dl("pair:type2rhor",n+1,n+1);
340   auto k_type2z2r = DAT::tdual_int_2d_dl("pair:type2z2r",n+1,n+1);
341 
342   auto h_type2frho =  k_type2frho.h_view;
343   auto h_type2rhor = k_type2rhor.h_view;
344   auto h_type2z2r = k_type2z2r.h_view;
345 
346   for (i = 1; i <= n; i++) {
347     h_type2frho[i] = type2frho[i];
348     for (j = 1; j <= n; j++) {
349       h_type2rhor(i,j) = type2rhor[i][j];
350       h_type2z2r(i,j) = type2z2r[i][j];
351     }
352   }
353   k_type2frho.template modify<LMPHostType>();
354   k_type2frho.template sync<DeviceType>();
355   k_type2rhor.template modify<LMPHostType>();
356   k_type2rhor.template sync<DeviceType>();
357   k_type2z2r.template modify<LMPHostType>();
358   k_type2z2r.template sync<DeviceType>();
359 
360   d_type2frho = k_type2frho.template view<DeviceType>();
361   d_type2rhor = k_type2rhor.template view<DeviceType>();
362   d_type2z2r = k_type2z2r.template view<DeviceType>();
363 }
364 
365 /* ---------------------------------------------------------------------- */
366 
367 template<class DeviceType>
array2spline()368 void PairEAMAlloyKokkos<DeviceType>::array2spline()
369 {
370   rdr = 1.0/dr;
371   rdrho = 1.0/drho;
372 
373   tdual_ffloat_2d_n7 k_frho_spline = tdual_ffloat_2d_n7("pair:frho",nfrho,nrho+1);
374   tdual_ffloat_2d_n7 k_rhor_spline = tdual_ffloat_2d_n7("pair:rhor",nrhor,nr+1);
375   tdual_ffloat_2d_n7 k_z2r_spline = tdual_ffloat_2d_n7("pair:z2r",nz2r,nr+1);
376 
377   t_host_ffloat_2d_n7 h_frho_spline = k_frho_spline.h_view;
378   t_host_ffloat_2d_n7 h_rhor_spline = k_rhor_spline.h_view;
379   t_host_ffloat_2d_n7 h_z2r_spline = k_z2r_spline.h_view;
380 
381   for (int i = 0; i < nfrho; i++)
382     interpolate(nrho,drho,frho[i],h_frho_spline,i);
383   k_frho_spline.template modify<LMPHostType>();
384   k_frho_spline.template sync<DeviceType>();
385 
386   for (int i = 0; i < nrhor; i++)
387     interpolate(nr,dr,rhor[i],h_rhor_spline,i);
388   k_rhor_spline.template modify<LMPHostType>();
389   k_rhor_spline.template sync<DeviceType>();
390 
391   for (int i = 0; i < nz2r; i++)
392     interpolate(nr,dr,z2r[i],h_z2r_spline,i);
393   k_z2r_spline.template modify<LMPHostType>();
394   k_z2r_spline.template sync<DeviceType>();
395 
396   d_frho_spline = k_frho_spline.template view<DeviceType>();
397   d_rhor_spline = k_rhor_spline.template view<DeviceType>();
398   d_z2r_spline = k_z2r_spline.template view<DeviceType>();
399 }
400 
401 /* ---------------------------------------------------------------------- */
402 
403 template<class DeviceType>
interpolate(int n,double delta,double * f,t_host_ffloat_2d_n7 h_spline,int i)404 void PairEAMAlloyKokkos<DeviceType>::interpolate(int n, double delta, double *f, t_host_ffloat_2d_n7 h_spline, int i)
405 {
406   for (int m = 1; m <= n; m++) h_spline(i,m,6) = f[m];
407 
408   h_spline(i,1,5) = h_spline(i,2,6) - h_spline(i,1,6);
409   h_spline(i,2,5) = 0.5 * (h_spline(i,3,6)-h_spline(i,1,6));
410   h_spline(i,n-1,5) = 0.5 * (h_spline(i,n,6)-h_spline(i,n-2,6));
411   h_spline(i,n,5) = h_spline(i,n,6) - h_spline(i,n-1,6);
412 
413   for (int m = 3; m <= n-2; m++)
414     h_spline(i,m,5) = ((h_spline(i,m-2,6)-h_spline(i,m+2,6)) +
415                     8.0*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / 12.0;
416 
417   for (int m = 1; m <= n-1; m++) {
418     h_spline(i,m,4) = 3.0*(h_spline(i,m+1,6)-h_spline(i,m,6)) -
419       2.0*h_spline(i,m,5) - h_spline(i,m+1,5);
420     h_spline(i,m,3) = h_spline(i,m,5) + h_spline(i,m+1,5) -
421       2.0*(h_spline(i,m+1,6)-h_spline(i,m,6));
422   }
423 
424   h_spline(i,n,4) = 0.0;
425   h_spline(i,n,3) = 0.0;
426 
427   for (int m = 1; m <= n; m++) {
428     h_spline(i,m,2) = h_spline(i,m,5)/delta;
429     h_spline(i,m,1) = 2.0*h_spline(i,m,4)/delta;
430     h_spline(i,m,0) = 3.0*h_spline(i,m,3)/delta;
431   }
432 }
433 
434 /* ---------------------------------------------------------------------- */
435 
436 template<class DeviceType>
pack_forward_comm_kokkos(int n,DAT::tdual_int_2d k_sendlist,int iswap_in,DAT::tdual_xfloat_1d & buf,int,int *)437 int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
438                                                              int iswap_in, DAT::tdual_xfloat_1d &buf,
439                                                              int /*pbc_flag*/, int * /*pbc*/)
440 {
441   d_sendlist = k_sendlist.view<DeviceType>();
442   iswap = iswap_in;
443   v_buf = buf.view<DeviceType>();
444   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyPackForwardComm>(0,n),*this);
445   return n;
446 }
447 
448 template<class DeviceType>
449 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyPackForwardComm,const int & i) const450 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyPackForwardComm, const int &i) const {
451   int j = d_sendlist(iswap, i);
452   v_buf[i] = d_fp[j];
453 }
454 
455 /* ---------------------------------------------------------------------- */
456 
457 template<class DeviceType>
unpack_forward_comm_kokkos(int n,int first_in,DAT::tdual_xfloat_1d & buf)458 void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
459 {
460   first = first_in;
461   v_buf = buf.view<DeviceType>();
462   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMAlloyUnpackForwardComm>(0,n),*this);
463 }
464 
465 template<class DeviceType>
466 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyUnpackForwardComm,const int & i) const467 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyUnpackForwardComm, const int &i) const {
468   d_fp[i + first] = v_buf[i];
469 }
470 
471 /* ---------------------------------------------------------------------- */
472 
473 template<class DeviceType>
pack_forward_comm(int n,int * list,double * buf,int,int *)474 int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
475                                                       int /*pbc_flag*/, int * /*pbc*/)
476 {
477   k_fp.sync_host();
478 
479   int i,j;
480 
481   for (i = 0; i < n; i++) {
482     j = list[i];
483     buf[i] = h_fp[j];
484   }
485   return n;
486 }
487 
488 /* ---------------------------------------------------------------------- */
489 
490 template<class DeviceType>
unpack_forward_comm(int n,int first,double * buf)491 void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
492 {
493   k_fp.sync_host();
494 
495   for (int i = 0; i < n; i++) {
496     h_fp[i + first] = buf[i];
497   }
498 
499   k_fp.modify_host();
500 }
501 
502 /* ---------------------------------------------------------------------- */
503 
504 template<class DeviceType>
pack_reverse_comm(int n,int first,double * buf)505 int PairEAMAlloyKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
506 {
507   k_rho.sync_host();
508 
509   int i,m,last;
510 
511   m = 0;
512   last = first + n;
513   for (i = first; i < last; i++) buf[m++] = h_rho[i];
514   return m;
515 }
516 
517 /* ---------------------------------------------------------------------- */
518 
519 template<class DeviceType>
unpack_reverse_comm(int n,int * list,double * buf)520 void PairEAMAlloyKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
521 {
522   k_rho.sync_host();
523 
524   int i,j,m;
525 
526   m = 0;
527   for (i = 0; i < n; i++) {
528     j = list[i];
529     h_rho[j] += buf[m++];
530   }
531 
532   k_rho.modify_host();
533 }
534 
535 /* ---------------------------------------------------------------------- */
536 
537 template<class DeviceType>
538 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyInitialize,const int & i) const539 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyInitialize, const int &i) const {
540   d_rho[i] = 0.0;
541 }
542 
543 /* ---------------------------------------------------------------------- */
544 
545 ////Specialisation for Neighborlist types Half, HalfThread, Full
546 template<class DeviceType>
547 template<int NEIGHFLAG, int NEWTON_PAIR>
548 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyKernelA<NEIGHFLAG,NEWTON_PAIR>,const int & ii) const549 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelA<NEIGHFLAG,NEWTON_PAIR>, const int &ii) const {
550 
551   // rho = density at each atom
552   // loop over neighbors of my atoms
553 
554   // The rho array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
555 
556   auto v_rho = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_rho),decltype(ndup_rho)>::get(dup_rho,ndup_rho);
557   auto a_rho = v_rho.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
558 
559   const int i = d_ilist[ii];
560   const X_FLOAT xtmp = x(i,0);
561   const X_FLOAT ytmp = x(i,1);
562   const X_FLOAT ztmp = x(i,2);
563   const int itype = type(i);
564 
565   const int jnum = d_numneigh[i];
566 
567   F_FLOAT rhotmp = 0.0;
568 
569   for (int jj = 0; jj < jnum; jj++) {
570     int j = d_neighbors(i,jj);
571     j &= NEIGHMASK;
572     const X_FLOAT delx = xtmp - x(j,0);
573     const X_FLOAT dely = ytmp - x(j,1);
574     const X_FLOAT delz = ztmp - x(j,2);
575     const int jtype = type(j);
576     const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
577 
578     if (rsq < cutforcesq) {
579       F_FLOAT p = sqrt(rsq)*rdr + 1.0;
580       int m = static_cast<int> (p);
581       m = MIN(m,nr-1);
582       p -= m;
583       p = MIN(p,1.0);
584       const int d_type2rhor_ji = d_type2rhor(jtype,itype);
585       rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
586                   d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
587       if (NEWTON_PAIR || j < nlocal) {
588         const int d_type2rhor_ij = d_type2rhor(itype,jtype);
589         a_rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p +
590                       d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6);
591       }
592     }
593 
594   }
595   a_rho[i] += rhotmp;
596 }
597 
598 /* ---------------------------------------------------------------------- */
599 
600 ////Specialisation for Neighborlist types Half, HalfThread, Full
601 template<class DeviceType>
602 template<int EFLAG>
603 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyKernelB<EFLAG>,const int & ii,EV_FLOAT & ev) const604 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelB<EFLAG>, const int &ii, EV_FLOAT& ev) const {
605   // fp = derivative of embedding energy at each atom
606   // phi = embedding energy at each atom
607   // if rho > rhomax (e.g. due to close approach of two atoms),
608   //   will exceed table, so add linear term to conserve energy
609 
610   const int i = d_ilist[ii];
611   const int itype = type(i);
612 
613   F_FLOAT p = d_rho[i]*rdrho + 1.0;
614   int m = static_cast<int> (p);
615   m = MAX(1,MIN(m,nrho-1));
616   p -= m;
617   p = MIN(p,1.0);
618   const int d_type2frho_i = d_type2frho[itype];
619   d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
620   if (EFLAG) {
621     F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
622                     d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
623     if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
624     if (eflag_global) ev.evdwl += phi;
625     if (eflag_atom) d_eatom[i] += phi;
626   }
627 }
628 
629 template<class DeviceType>
630 template<int EFLAG>
631 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyKernelB<EFLAG>,const int & ii) const632 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelB<EFLAG>, const int &ii) const {
633   EV_FLOAT ev;
634   this->template operator()<EFLAG>(TagPairEAMAlloyKernelB<EFLAG>(), ii, ev);
635 }
636 
637 /* ---------------------------------------------------------------------- */
638 
639 ////Specialisation for Neighborlist types Half, HalfThread, Full
640 template<class DeviceType>
641 template<int EFLAG>
642 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyKernelAB<EFLAG>,const int & ii,EV_FLOAT & ev) const643 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelAB<EFLAG>, const int &ii, EV_FLOAT& ev) const {
644 
645   // rho = density at each atom
646   // loop over neighbors of my atoms
647 
648   const int i = d_ilist[ii];
649   const X_FLOAT xtmp = x(i,0);
650   const X_FLOAT ytmp = x(i,1);
651   const X_FLOAT ztmp = x(i,2);
652   const int itype = type(i);
653 
654   const int jnum = d_numneigh[i];
655 
656   F_FLOAT rhotmp = 0.0;
657 
658   for (int jj = 0; jj < jnum; jj++) {
659     int j = d_neighbors(i,jj);
660     j &= NEIGHMASK;
661     const X_FLOAT delx = xtmp - x(j,0);
662     const X_FLOAT dely = ytmp - x(j,1);
663     const X_FLOAT delz = ztmp - x(j,2);
664     const int jtype = type(j);
665     const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
666 
667     if (rsq < cutforcesq) {
668       F_FLOAT p = sqrt(rsq)*rdr + 1.0;
669       int m = static_cast<int> (p);
670       m = MIN(m,nr-1);
671       p -= m;
672       p = MIN(p,1.0);
673       const int d_type2rhor_ji = d_type2rhor(jtype,itype);
674       rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
675                   d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
676     }
677 
678   }
679   d_rho[i] += rhotmp;
680 
681   // fp = derivative of embedding energy at each atom
682   // phi = embedding energy at each atom
683   // if rho > rhomax (e.g. due to close approach of two atoms),
684   //   will exceed table, so add linear term to conserve energy
685 
686   F_FLOAT p = d_rho[i]*rdrho + 1.0;
687   int m = static_cast<int> (p);
688   m = MAX(1,MIN(m,nrho-1));
689   p -= m;
690   p = MIN(p,1.0);
691   const int d_type2frho_i = d_type2frho[itype];
692   d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
693   if (EFLAG) {
694     F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
695                     d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
696     if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
697     if (eflag_global) ev.evdwl += phi;
698     if (eflag_atom) d_eatom[i] += phi;
699   }
700 
701 }
702 
703 template<class DeviceType>
704 template<int EFLAG>
705 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyKernelAB<EFLAG>,const int & ii) const706 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelAB<EFLAG>, const int &ii) const {
707   EV_FLOAT ev;
708   this->template operator()<EFLAG>(TagPairEAMAlloyKernelAB<EFLAG>(), ii, ev);
709 }
710 
711 /* ---------------------------------------------------------------------- */
712 
713 ////Specialisation for Neighborlist types Half, HalfThread, Full
714 template<class DeviceType>
715 template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
716 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>,const int & ii,EV_FLOAT & ev) const717 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
718 
719   // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
720 
721   auto v_f = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
722   auto a_f = v_f.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
723 
724   const int i = d_ilist[ii];
725   const X_FLOAT xtmp = x(i,0);
726   const X_FLOAT ytmp = x(i,1);
727   const X_FLOAT ztmp = x(i,2);
728   const int itype = type(i);
729 
730   const int jnum = d_numneigh[i];
731 
732   F_FLOAT fxtmp = 0.0;
733   F_FLOAT fytmp = 0.0;
734   F_FLOAT fztmp = 0.0;
735 
736   for (int jj = 0; jj < jnum; jj++) {
737     int j = d_neighbors(i,jj);
738     j &= NEIGHMASK;
739     const X_FLOAT delx = xtmp - x(j,0);
740     const X_FLOAT dely = ytmp - x(j,1);
741     const X_FLOAT delz = ztmp - x(j,2);
742     const int jtype = type(j);
743     const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
744 
745     if (rsq < cutforcesq) {
746       const F_FLOAT r = sqrt(rsq);
747       F_FLOAT p = r*rdr + 1.0;
748       int m = static_cast<int> (p);
749       m = MIN(m,nr-1);
750       p -= m;
751       p = MIN(p,1.0);
752 
753       // rhoip = derivative of (density at atom j due to atom i)
754       // rhojp = derivative of (density at atom i due to atom j)
755       // phi = pair potential energy
756       // phip = phi'
757       // z2 = phi * r
758       // z2p = (phi * r)' = (phi' r) + phi
759       // psip needs both fp[i] and fp[j] terms since r_ij appears in two
760       //   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
761       //   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
762 
763       const int d_type2rhor_ij = d_type2rhor(itype,jtype);
764       const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p +
765                              d_rhor_spline(d_type2rhor_ij,m,2);
766       const int d_type2rhor_ji = d_type2rhor(jtype,itype);
767       const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p +
768                              d_rhor_spline(d_type2rhor_ji,m,2);
769       const int d_type2z2r_ij = d_type2z2r(itype,jtype);
770 
771       const auto z2r_spline_3 = d_z2r_spline(d_type2z2r_ij,m,3);
772       const auto z2r_spline_4 = d_z2r_spline(d_type2z2r_ij,m,4);
773       const auto z2r_spline_5 = d_z2r_spline(d_type2z2r_ij,m,5);
774       const auto z2r_spline_6 = d_z2r_spline(d_type2z2r_ij,m,6);
775 
776       const F_FLOAT z2p = (3.0*rdr*z2r_spline_3*p + 2.0*rdr*z2r_spline_4)*p +
777                            rdr*z2r_spline_5; // the rdr and the factors of 3.0 and 2.0 come out of the interpolate function
778       const F_FLOAT z2 = ((z2r_spline_3*p + z2r_spline_4)*p +
779                            z2r_spline_5)*p + z2r_spline_6;
780 
781       const F_FLOAT recip = 1.0/r;
782       const F_FLOAT phi = z2*recip;
783       const F_FLOAT phip = z2p*recip - phi*recip;
784       const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip;
785       const F_FLOAT fpair = -psip*recip;
786 
787       fxtmp += delx*fpair;
788       fytmp += dely*fpair;
789       fztmp += delz*fpair;
790 
791       if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
792         a_f(j,0) -= delx*fpair;
793         a_f(j,1) -= dely*fpair;
794         a_f(j,2) -= delz*fpair;
795       }
796 
797       if (EVFLAG) {
798         if (eflag) {
799           ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*phi;
800         }
801 
802         if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,phi,fpair,delx,dely,delz);
803       }
804 
805     }
806   }
807 
808   a_f(i,0) += fxtmp;
809   a_f(i,1) += fytmp;
810   a_f(i,2) += fztmp;
811 }
812 
813 template<class DeviceType>
814 template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
815 KOKKOS_INLINE_FUNCTION
operator ()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>,const int & ii) const816 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const {
817   EV_FLOAT ev;
818   this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairEAMAlloyKernelC<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev);
819 }
820 
821 /* ---------------------------------------------------------------------- */
822 
823 template<class DeviceType>
824 template<int NEIGHFLAG, int NEWTON_PAIR>
825 KOKKOS_INLINE_FUNCTION
ev_tally(EV_FLOAT & ev,const int & i,const int & j,const F_FLOAT & epair,const F_FLOAT & fpair,const F_FLOAT & delx,const F_FLOAT & dely,const F_FLOAT & delz) const826 void PairEAMAlloyKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
827       const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
828                 const F_FLOAT &dely, const F_FLOAT &delz) const
829 {
830   const int EFLAG = eflag;
831   const int VFLAG = vflag_either;
832 
833   // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
834 
835   auto v_eatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom);
836   auto a_eatom = v_eatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
837 
838   auto v_vatom = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
839   auto a_vatom = v_vatom.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
840 
841   if (EFLAG) {
842     if (eflag_atom) {
843       const E_FLOAT epairhalf = 0.5 * epair;
844       if (NEIGHFLAG!=FULL) {
845         if (NEWTON_PAIR || i < nlocal) a_eatom[i] += epairhalf;
846         if (NEWTON_PAIR || j < nlocal) a_eatom[j] += epairhalf;
847       } else {
848         a_eatom[i] += epairhalf;
849       }
850     }
851   }
852 
853   if (VFLAG) {
854     const E_FLOAT v0 = delx*delx*fpair;
855     const E_FLOAT v1 = dely*dely*fpair;
856     const E_FLOAT v2 = delz*delz*fpair;
857     const E_FLOAT v3 = delx*dely*fpair;
858     const E_FLOAT v4 = delx*delz*fpair;
859     const E_FLOAT v5 = dely*delz*fpair;
860 
861     if (vflag_global) {
862       if (NEIGHFLAG!=FULL) {
863         if (NEWTON_PAIR || i < nlocal) {
864           ev.v[0] += 0.5*v0;
865           ev.v[1] += 0.5*v1;
866           ev.v[2] += 0.5*v2;
867           ev.v[3] += 0.5*v3;
868           ev.v[4] += 0.5*v4;
869           ev.v[5] += 0.5*v5;
870         }
871         if (NEWTON_PAIR || j < nlocal) {
872         ev.v[0] += 0.5*v0;
873         ev.v[1] += 0.5*v1;
874         ev.v[2] += 0.5*v2;
875         ev.v[3] += 0.5*v3;
876         ev.v[4] += 0.5*v4;
877         ev.v[5] += 0.5*v5;
878         }
879       } else {
880         ev.v[0] += 0.5*v0;
881         ev.v[1] += 0.5*v1;
882         ev.v[2] += 0.5*v2;
883         ev.v[3] += 0.5*v3;
884         ev.v[4] += 0.5*v4;
885         ev.v[5] += 0.5*v5;
886       }
887     }
888 
889     if (vflag_atom) {
890       if (NEIGHFLAG!=FULL) {
891         if (NEWTON_PAIR || i < nlocal) {
892           a_vatom(i,0) += 0.5*v0;
893           a_vatom(i,1) += 0.5*v1;
894           a_vatom(i,2) += 0.5*v2;
895           a_vatom(i,3) += 0.5*v3;
896           a_vatom(i,4) += 0.5*v4;
897           a_vatom(i,5) += 0.5*v5;
898         }
899         if (NEWTON_PAIR || j < nlocal) {
900         a_vatom(j,0) += 0.5*v0;
901         a_vatom(j,1) += 0.5*v1;
902         a_vatom(j,2) += 0.5*v2;
903         a_vatom(j,3) += 0.5*v3;
904         a_vatom(j,4) += 0.5*v4;
905         a_vatom(j,5) += 0.5*v5;
906         }
907       } else {
908         a_vatom(i,0) += 0.5*v0;
909         a_vatom(i,1) += 0.5*v1;
910         a_vatom(i,2) += 0.5*v2;
911         a_vatom(i,3) += 0.5*v3;
912         a_vatom(i,4) += 0.5*v4;
913         a_vatom(i,5) += 0.5*v5;
914       }
915     }
916   }
917 }
918 
919 /* ---------------------------------------------------------------------- */
920 
921 // Duplicate PairEAMAlloy functions
922 
923 /* ----------------------------------------------------------------------
924    set coeffs for one or more type pairs
925    read DYNAMO setfl file
926 ------------------------------------------------------------------------- */
927 
928 template<class DeviceType>
coeff(int narg,char ** arg)929 void PairEAMAlloyKokkos<DeviceType>::coeff(int narg, char **arg)
930 {
931   int i,j;
932 
933   if (!allocated) allocate();
934 
935   if (narg != 3 + atom->ntypes)
936     error->all(FLERR,"Incorrect args for pair coefficients");
937 
938   // insure I,J args are * *
939 
940   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
941     error->all(FLERR,"Incorrect args for pair coefficients");
942 
943   // read EAM setfl file
944 
945   if (setfl) {
946     for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
947     delete [] setfl->elements;
948     delete [] setfl->mass;
949     memory->destroy(setfl->frho);
950     memory->destroy(setfl->rhor);
951     memory->destroy(setfl->z2r);
952     delete setfl;
953   }
954   setfl = new Setfl();
955   read_file(arg[2]);
956 
957   // read args that map atom types to elements in potential file
958   // map[i] = which element the Ith atom type is, -1 if "NULL"
959 
960   for (i = 3; i < narg; i++) {
961     if (strcmp(arg[i],"NULL") == 0) {
962       map[i-2] = -1;
963       continue;
964     }
965     for (j = 0; j < setfl->nelements; j++)
966       if (strcmp(arg[i],setfl->elements[j]) == 0) break;
967     if (j < setfl->nelements) map[i-2] = j;
968     else error->all(FLERR,"No matching element in EAM potential file");
969   }
970 
971   // clear setflag since coeff() called once with I,J = * *
972 
973   int n = atom->ntypes;
974   for (i = 1; i <= n; i++)
975     for (j = i; j <= n; j++)
976       setflag[i][j] = 0;
977 
978   // set setflag i,j for type pairs where both are mapped to elements
979   // set mass of atom type if i = j
980 
981   int count = 0;
982   for (i = 1; i <= n; i++) {
983     for (j = i; j <= n; j++) {
984       if (map[i] >= 0 && map[j] >= 0) {
985         setflag[i][j] = 1;
986         if (i == j) atom->set_mass(FLERR,i,setfl->mass[map[i]]);
987         count++;
988       }
989     }
990   }
991 
992   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
993 }
994 
995 /* ----------------------------------------------------------------------
996    read a multi-element DYNAMO setfl file
997 ------------------------------------------------------------------------- */
998 
999 template<class DeviceType>
read_file(char * filename)1000 void PairEAMAlloyKokkos<DeviceType>::read_file(char *filename)
1001 {
1002   Setfl *file = setfl;
1003 
1004   // read potential file
1005   if (comm->me == 0) {
1006     PotentialFileReader reader(lmp, filename, "eam/alloy", unit_convert_flag);
1007 
1008     // transparently convert units for supported conversions
1009 
1010     int unit_convert = reader.get_unit_convert();
1011     double conversion_factor = utils::get_conversion_factor(utils::ENERGY,
1012                                                             unit_convert);
1013     try {
1014       reader.skip_line();
1015       reader.skip_line();
1016       reader.skip_line();
1017 
1018       // extract element names from nelements line
1019       ValueTokenizer values = reader.next_values(1);
1020       file->nelements = values.next_int();
1021 
1022       if ((int)values.count() != file->nelements + 1)
1023         error->one(FLERR,"Incorrect element names in EAM potential file");
1024 
1025       file->elements = new char*[file->nelements];
1026       for (int i = 0; i < file->nelements; i++) {
1027         const std::string word = values.next_string();
1028         const int n = word.length() + 1;
1029         file->elements[i] = new char[n];
1030         strcpy(file->elements[i], word.c_str());
1031       }
1032 
1033       //
1034 
1035       values = reader.next_values(5);
1036       file->nrho = values.next_int();
1037       file->drho = values.next_double();
1038       file->nr   = values.next_int();
1039       file->dr   = values.next_double();
1040       file->cut  = values.next_double();
1041 
1042       if ((file->nrho <= 0) || (file->nr <= 0) || (file->dr <= 0.0))
1043         error->one(FLERR,"Invalid EAM potential file");
1044 
1045       memory->create(file->mass, file->nelements, "pair:mass");
1046       memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
1047       memory->create(file->rhor, file->nelements, file->nr + 1, "pair:rhor");
1048       memory->create(file->z2r, file->nelements, file->nelements, file->nr + 1, "pair:z2r");
1049 
1050       for (int i = 0; i < file->nelements; i++) {
1051         values = reader.next_values(2);
1052         values.next_int(); // ignore
1053         file->mass[i] = values.next_double();
1054 
1055         reader.next_dvector(&file->frho[i][1], file->nrho);
1056         reader.next_dvector(&file->rhor[i][1], file->nr);
1057         if (unit_convert) {
1058           for (int j = 1; j < file->nrho; ++j)
1059             file->frho[i][j] *= conversion_factor;
1060         }
1061       }
1062 
1063       for (int i = 0; i < file->nelements; i++) {
1064         for (int j = 0; j <= i; j++) {
1065           reader.next_dvector(&file->z2r[i][j][1], file->nr);
1066           if (unit_convert) {
1067             for (int k = 1; k < file->nr; ++k)
1068               file->z2r[i][j][k] *= conversion_factor;
1069           }
1070         }
1071       }
1072     } catch (TokenizerException &e) {
1073       error->one(FLERR, e.what());
1074     }
1075   }
1076 
1077   // broadcast potential information
1078   MPI_Bcast(&file->nelements, 1, MPI_INT, 0, world);
1079 
1080   MPI_Bcast(&file->nrho, 1, MPI_INT, 0, world);
1081   MPI_Bcast(&file->drho, 1, MPI_DOUBLE, 0, world);
1082   MPI_Bcast(&file->nr, 1, MPI_INT, 0, world);
1083   MPI_Bcast(&file->dr, 1, MPI_DOUBLE, 0, world);
1084   MPI_Bcast(&file->cut, 1, MPI_DOUBLE, 0, world);
1085 
1086   // allocate memory on other procs
1087   if (comm->me != 0) {
1088     file->elements = new char*[file->nelements];
1089     for (int i = 0; i < file->nelements; i++) file->elements[i] = nullptr;
1090     memory->create(file->mass, file->nelements, "pair:mass");
1091     memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
1092     memory->create(file->rhor, file->nelements, file->nr + 1, "pair:rhor");
1093     memory->create(file->z2r, file->nelements, file->nelements, file->nr + 1, "pair:z2r");
1094   }
1095 
1096   // broadcast file->elements string array
1097   for (int i = 0; i < file->nelements; i++) {
1098     int n;
1099     if (comm->me == 0) n = strlen(file->elements[i]) + 1;
1100     MPI_Bcast(&n, 1, MPI_INT, 0, world);
1101     if (comm->me != 0) file->elements[i] = new char[n];
1102     MPI_Bcast(file->elements[i], n, MPI_CHAR, 0, world);
1103   }
1104 
1105   // broadcast file->mass, frho, rhor
1106   for (int i = 0; i < file->nelements; i++) {
1107     MPI_Bcast(&file->mass[i], 1, MPI_DOUBLE, 0, world);
1108     MPI_Bcast(&file->frho[i][1], file->nrho, MPI_DOUBLE, 0, world);
1109     MPI_Bcast(&file->rhor[i][1], file->nr, MPI_DOUBLE, 0, world);
1110   }
1111 
1112   // broadcast file->z2r
1113   for (int i = 0; i < file->nelements; i++) {
1114     for (int j = 0; j <= i; j++) {
1115       MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world);
1116     }
1117   }
1118 }
1119 
1120 /* ----------------------------------------------------------------------
1121    copy read-in setfl potential to standard array format
1122 ------------------------------------------------------------------------- */
1123 
1124 template<class DeviceType>
file2array_alloy()1125 void PairEAMAlloyKokkos<DeviceType>::file2array_alloy()
1126 {
1127   int i,j,m,n;
1128   int ntypes = atom->ntypes;
1129 
1130   // set function params directly from setfl file
1131 
1132   nrho = setfl->nrho;
1133   nr = setfl->nr;
1134   drho = setfl->drho;
1135   dr = setfl->dr;
1136   rhomax = (nrho-1) * drho;
1137 
1138   // ------------------------------------------------------------------
1139   // setup frho arrays
1140   // ------------------------------------------------------------------
1141 
1142   // allocate frho arrays
1143   // nfrho = # of setfl elements + 1 for zero array
1144 
1145   nfrho = setfl->nelements + 1;
1146   memory->destroy(frho);
1147   memory->create(frho,nfrho,nrho+1,"pair:frho");
1148 
1149   // copy each element's frho to global frho
1150 
1151   for (i = 0; i < setfl->nelements; i++)
1152     for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
1153 
1154   // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
1155   // this is necessary b/c fp is still computed for non-EAM atoms
1156 
1157   for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
1158 
1159   // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
1160   // if atom type doesn't point to element (non-EAM atom in pair hybrid)
1161   // then map it to last frho array of zeroes
1162 
1163   for (i = 1; i <= ntypes; i++)
1164     if (map[i] >= 0) type2frho[i] = map[i];
1165     else type2frho[i] = nfrho-1;
1166 
1167   // ------------------------------------------------------------------
1168   // setup rhor arrays
1169   // ------------------------------------------------------------------
1170 
1171   // allocate rhor arrays
1172   // nrhor = # of setfl elements
1173 
1174   nrhor = setfl->nelements;
1175   memory->destroy(rhor);
1176   memory->create(rhor,nrhor,nr+1,"pair:rhor");
1177 
1178   // copy each element's rhor to global rhor
1179 
1180   for (i = 0; i < setfl->nelements; i++)
1181     for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
1182 
1183   // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
1184   // for setfl files, I,J mapping only depends on I
1185   // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
1186 
1187   for (i = 1; i <= ntypes; i++)
1188     for (j = 1; j <= ntypes; j++)
1189       type2rhor[i][j] = map[i];
1190 
1191   // ------------------------------------------------------------------
1192   // setup z2r arrays
1193   // ------------------------------------------------------------------
1194 
1195   // allocate z2r arrays
1196   // nz2r = N*(N+1)/2 where N = # of setfl elements
1197 
1198   nz2r = setfl->nelements * (setfl->nelements+1) / 2;
1199   memory->destroy(z2r);
1200   memory->create(z2r,nz2r,nr+1,"pair:z2r");
1201 
1202   // copy each element pair z2r to global z2r, only for I >= J
1203 
1204   n = 0;
1205   for (i = 0; i < setfl->nelements; i++)
1206     for (j = 0; j <= i; j++) {
1207       for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
1208       n++;
1209     }
1210 
1211   // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
1212   // set of z2r arrays only fill lower triangular Nelement matrix
1213   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
1214   // swap indices when irow < icol to stay lower triangular
1215   // if map = -1 (non-EAM atom in pair hybrid):
1216   //   type2z2r is not used by non-opt
1217   //   but set type2z2r to 0 since accessed by opt
1218 
1219   int irow,icol;
1220   for (i = 1; i <= ntypes; i++) {
1221     for (j = 1; j <= ntypes; j++) {
1222       irow = map[i];
1223       icol = map[j];
1224       if (irow == -1 || icol == -1) {
1225         type2z2r[i][j] = 0;
1226         continue;
1227       }
1228       if (irow < icol) {
1229         irow = map[j];
1230         icol = map[i];
1231       }
1232       n = 0;
1233       for (m = 0; m < irow; m++) n += m + 1;
1234       n += icol;
1235       type2z2r[i][j] = n;
1236     }
1237   }
1238 }
1239 
1240 /* ---------------------------------------------------------------------- */
1241 
1242 namespace LAMMPS_NS {
1243 template class PairEAMAlloyKokkos<LMPDeviceType>;
1244 #ifdef LMP_KOKKOS_GPU
1245 template class PairEAMAlloyKokkos<LMPHostType>;
1246 #endif
1247 }
1248