1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Trung Dac Nguyen (ndactrung@gmail.com)
17 ------------------------------------------------------------------------- */
18 
19 #include "pair_tersoff_zbl_gpu.h"
20 
21 #include "atom.h"
22 #include "comm.h"
23 #include "domain.h"
24 #include "error.h"
25 #include "force.h"
26 #include "gpu_extra.h"
27 #include "memory.h"
28 #include "neigh_list.h"
29 #include "neigh_request.h"
30 #include "neighbor.h"
31 #include "suffix.h"
32 
33 using namespace LAMMPS_NS;
34 
35 // External functions from cuda library for atom decomposition
36 
37 int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
38                      const int max_nbors, const double cell_size, int &gpu_mode,
39                      FILE *screen, int* host_map, const int nelements,
40                      int*** host_elem3param, const int nparams,
41                      const double* ts_lam1, const double* ts_lam2,
42                      const double* ts_lam3, const double* ts_powermint,
43                      const double* ts_biga, const double* ts_bigb,
44                      const double* ts_bigr, const double* ts_bigd,
45                      const double* ts_c1, const double* ts_c2,
46                      const double* ts_c3, const double* ts_c4,
47                      const double* ts_c, const double* ts_d,
48                      const double* ts_h, const double* ts_gamma,
49                      const double* ts_beta, const double* ts_powern,
50                      const double* ts_Z_i, const double* ts_Z_j,
51                      const double* ts_ZBLcut, const double* ts_ZBLexpscale,
52                      const double global_e, const double global_a_0,
53                      const double global_epsilon_0, const double* ts_cutsq);
54 void tersoff_zbl_gpu_clear();
55 int ** tersoff_zbl_gpu_compute_n(const int ago, const int inum_full,
56                     const int nall, double **host_x, int *host_type,
57                     double *sublo, double *subhi, tagint *tag, int **nspecial,
58                     tagint **special, const bool eflag, const bool vflag,
59                     const bool eatom, const bool vatom, int &host_start,
60                     int **ilist, int **jnum, const double cpu_time,
61                     bool &success);
62 void tersoff_zbl_gpu_compute(const int ago, const int nlocal, const int nall,
63                     const int nlist, double **host_x, int *host_type,
64                     int *ilist, int *numj, int **firstneigh, const bool eflag,
65                     const bool vflag, const bool eatom, const bool vatom,
66                     int &host_start, const double cpu_time, bool &success);
67 double tersoff_zbl_gpu_bytes();
68 
69 /* ---------------------------------------------------------------------- */
70 
PairTersoffZBLGPU(LAMMPS * lmp)71 PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp),
72   gpu_mode(GPU_FORCE)
73 {
74   cpu_time = 0.0;
75   suffix_flag |= Suffix::GPU;
76   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
77 
78   cutghost = nullptr;
79   ghostneigh = 1;
80 }
81 
82 /* ----------------------------------------------------------------------
83    check if allocated, since class can be destructed when incomplete
84 ------------------------------------------------------------------------- */
85 
~PairTersoffZBLGPU()86 PairTersoffZBLGPU::~PairTersoffZBLGPU()
87 {
88   tersoff_zbl_gpu_clear();
89   if (allocated)
90     memory->destroy(cutghost);
91 }
92 
93 /* ---------------------------------------------------------------------- */
94 
compute(int eflag,int vflag)95 void PairTersoffZBLGPU::compute(int eflag, int vflag)
96 {
97   ev_init(eflag,vflag);
98 
99   int nall = atom->nlocal + atom->nghost;
100   int inum, host_start;
101 
102   bool success = true;
103   int *ilist, *numneigh, **firstneigh;
104   if (gpu_mode != GPU_FORCE) {
105     double sublo[3],subhi[3];
106     if (domain->triclinic == 0) {
107       sublo[0] = domain->sublo[0];
108       sublo[1] = domain->sublo[1];
109       sublo[2] = domain->sublo[2];
110       subhi[0] = domain->subhi[0];
111       subhi[1] = domain->subhi[1];
112       subhi[2] = domain->subhi[2];
113     } else {
114       domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
115     }
116     inum = atom->nlocal;
117     firstneigh = tersoff_zbl_gpu_compute_n(neighbor->ago, inum, nall,
118                                   atom->x, atom->type, sublo,
119                                   subhi, atom->tag, atom->nspecial,
120                                   atom->special, eflag, vflag, eflag_atom,
121                                   vflag_atom, host_start,
122                                   &ilist, &numneigh, cpu_time, success);
123   } else {
124     inum = list->inum;
125     ilist = list->ilist;
126     numneigh = list->numneigh;
127     firstneigh = list->firstneigh;
128 
129     tersoff_zbl_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
130                    atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
131                    vflag, eflag_atom, vflag_atom, host_start, cpu_time,
132                    success);
133   }
134   if (!success)
135     error->one(FLERR,"Insufficient memory on accelerator");
136 }
137 
138 /* ---------------------------------------------------------------------- */
139 
allocate()140 void PairTersoffZBLGPU::allocate()
141 {
142   PairTersoffZBL::allocate();
143   int n = atom->ntypes;
144 
145   memory->create(cutghost,n+1,n+1,"pair:cutghost");
146 }
147 
148 /* ----------------------------------------------------------------------
149    init specific to this pair style
150 ------------------------------------------------------------------------- */
151 
init_style()152 void PairTersoffZBLGPU::init_style()
153 {
154   double cell_size = cutmax + neighbor->skin;
155 
156   if (atom->tag_enable == 0)
157     error->all(FLERR,"Pair style tersoff/zbl/gpu requires atom IDs");
158   if (force->newton_pair != 0)
159     error->all(FLERR,"Pair style tersoff/zbl/gpu requires newton pair off");
160 
161   double *lam1, *lam2, *lam3, *powermint;
162   double *biga, *bigb, *bigr, *bigd;
163   double *c1, *c2, *c3, *c4;
164   double *c, *d, *h, *gamma;
165   double *beta, *powern, *Z_i, *Z_j, *ZBLcut, *ZBLexpscale, *_cutsq;
166   lam1 = lam2 = lam3 = powermint = nullptr;
167   biga = bigb = bigr = bigd = nullptr;
168   c1 = c2 = c3 = c4 = nullptr;
169   c = d = h = gamma = nullptr;
170   beta = powern = Z_i = Z_j = ZBLcut = ZBLexpscale = _cutsq = nullptr;
171 
172   memory->create(lam1,nparams,"pair:lam1");
173   memory->create(lam2,nparams,"pair:lam2");
174   memory->create(lam3,nparams,"pair:lam3");
175   memory->create(powermint,nparams,"pair:powermint");
176   memory->create(biga,nparams,"pair:biga");
177   memory->create(bigb,nparams,"pair:bigb");
178   memory->create(bigr,nparams,"pair:bigr");
179   memory->create(bigd,nparams,"pair:bigd");
180   memory->create(c1,nparams,"pair:c1");
181   memory->create(c2,nparams,"pair:c2");
182   memory->create(c3,nparams,"pair:c3");
183   memory->create(c4,nparams,"pair:c4");
184   memory->create(c,nparams,"pair:c");
185   memory->create(d,nparams,"pair:d");
186   memory->create(h,nparams,"pair:h");
187   memory->create(gamma,nparams,"pair:gamma");
188   memory->create(beta,nparams,"pair:beta");
189   memory->create(powern,nparams,"pair:powern");
190   memory->create(Z_i,nparams,"pair:Z_i");
191   memory->create(Z_j,nparams,"pair:Z_j");
192   memory->create(ZBLcut,nparams,"pair:ZBLcut");
193   memory->create(ZBLexpscale,nparams,"pair:ZBLexpscale");
194   memory->create(_cutsq,nparams,"pair:_cutsq");
195 
196   for (int i = 0; i < nparams; i++) {
197     lam1[i] = params[i].lam1;
198     lam2[i] = params[i].lam2;
199     lam3[i] = params[i].lam3;
200     powermint[i] = params[i].powermint;
201     biga[i] = params[i].biga;
202     bigb[i] = params[i].bigb;
203     bigr[i] = params[i].bigr;
204     bigd[i] = params[i].bigd;
205     c1[i] = params[i].c1;
206     c2[i] = params[i].c2;
207     c3[i] = params[i].c3;
208     c4[i] = params[i].c4;
209     c[i] = params[i].c;
210     d[i] = params[i].d;
211     h[i] = params[i].h;
212     gamma[i] = params[i].gamma;
213     beta[i] = params[i].beta;
214     powern[i] = params[i].powern;
215     Z_i[i] = params[i].Z_i;
216     Z_j[i] = params[i].Z_j;
217     ZBLcut[i] = params[i].ZBLcut;
218     ZBLexpscale[i] = params[i].ZBLexpscale;
219     _cutsq[i] = params[i].cutsq;
220   }
221 
222   int mnf = 5e-2 * neighbor->oneatom;
223   int success = tersoff_zbl_gpu_init(atom->ntypes+1, atom->nlocal,
224                                  atom->nlocal+atom->nghost, mnf,
225                                  cell_size, gpu_mode, screen, map, nelements,
226                                  elem3param, nparams, lam1, lam2, lam3,
227                                  powermint, biga, bigb, bigr, bigd,
228                                  c1, c2, c3, c4, c, d, h, gamma,
229                                  beta, powern, Z_i, Z_j, ZBLcut, ZBLexpscale,
230                                  global_e, global_a_0, global_epsilon_0, _cutsq);
231 
232   memory->destroy(lam1);
233   memory->destroy(lam2);
234   memory->destroy(lam3);
235   memory->destroy(powermint);
236   memory->destroy(biga);
237   memory->destroy(bigb);
238   memory->destroy(bigr);
239   memory->destroy(bigd);
240   memory->destroy(c1);
241   memory->destroy(c2);
242   memory->destroy(c3);
243   memory->destroy(c4);
244   memory->destroy(c);
245   memory->destroy(d);
246   memory->destroy(h);
247   memory->destroy(gamma);
248   memory->destroy(beta);
249   memory->destroy(powern);
250   memory->destroy(Z_i);
251   memory->destroy(Z_j);
252   memory->destroy(ZBLcut);
253   memory->destroy(ZBLexpscale);
254   memory->destroy(_cutsq);
255 
256   GPU_EXTRA::check_flag(success,error,world);
257 
258   if (gpu_mode == GPU_FORCE) {
259     int irequest = neighbor->request(this);
260     neighbor->requests[irequest]->half = 0;
261     neighbor->requests[irequest]->full = 1;
262     neighbor->requests[irequest]->ghost = 1;
263   }
264   if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
265     comm->cutghostuser = 2.0*cutmax + neighbor->skin;
266     if (comm->me == 0)
267        error->warning(FLERR,"Increasing communication cutoff for GPU style");
268   }
269 }
270 
271 /* ----------------------------------------------------------------------
272    init for one type pair i,j and corresponding j,i
273 ------------------------------------------------------------------------- */
274 
init_one(int i,int j)275 double PairTersoffZBLGPU::init_one(int i, int j)
276 {
277   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
278   cutghost[i][j] = cutmax;
279   cutghost[j][i] = cutmax;
280 
281   return cutmax;
282 }
283 
284