1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: Trung Dac Nguyen (ndactrung@gmail.com)
17 ------------------------------------------------------------------------- */
18
19 #include "pair_tersoff_zbl_gpu.h"
20
21 #include "atom.h"
22 #include "comm.h"
23 #include "domain.h"
24 #include "error.h"
25 #include "force.h"
26 #include "gpu_extra.h"
27 #include "memory.h"
28 #include "neigh_list.h"
29 #include "neigh_request.h"
30 #include "neighbor.h"
31 #include "suffix.h"
32
33 using namespace LAMMPS_NS;
34
35 // External functions from cuda library for atom decomposition
36
37 int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
38 const int max_nbors, const double cell_size, int &gpu_mode,
39 FILE *screen, int* host_map, const int nelements,
40 int*** host_elem3param, const int nparams,
41 const double* ts_lam1, const double* ts_lam2,
42 const double* ts_lam3, const double* ts_powermint,
43 const double* ts_biga, const double* ts_bigb,
44 const double* ts_bigr, const double* ts_bigd,
45 const double* ts_c1, const double* ts_c2,
46 const double* ts_c3, const double* ts_c4,
47 const double* ts_c, const double* ts_d,
48 const double* ts_h, const double* ts_gamma,
49 const double* ts_beta, const double* ts_powern,
50 const double* ts_Z_i, const double* ts_Z_j,
51 const double* ts_ZBLcut, const double* ts_ZBLexpscale,
52 const double global_e, const double global_a_0,
53 const double global_epsilon_0, const double* ts_cutsq);
54 void tersoff_zbl_gpu_clear();
55 int ** tersoff_zbl_gpu_compute_n(const int ago, const int inum_full,
56 const int nall, double **host_x, int *host_type,
57 double *sublo, double *subhi, tagint *tag, int **nspecial,
58 tagint **special, const bool eflag, const bool vflag,
59 const bool eatom, const bool vatom, int &host_start,
60 int **ilist, int **jnum, const double cpu_time,
61 bool &success);
62 void tersoff_zbl_gpu_compute(const int ago, const int nlocal, const int nall,
63 const int nlist, double **host_x, int *host_type,
64 int *ilist, int *numj, int **firstneigh, const bool eflag,
65 const bool vflag, const bool eatom, const bool vatom,
66 int &host_start, const double cpu_time, bool &success);
67 double tersoff_zbl_gpu_bytes();
68
69 /* ---------------------------------------------------------------------- */
70
PairTersoffZBLGPU(LAMMPS * lmp)71 PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp),
72 gpu_mode(GPU_FORCE)
73 {
74 cpu_time = 0.0;
75 suffix_flag |= Suffix::GPU;
76 GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
77
78 cutghost = nullptr;
79 ghostneigh = 1;
80 }
81
82 /* ----------------------------------------------------------------------
83 check if allocated, since class can be destructed when incomplete
84 ------------------------------------------------------------------------- */
85
~PairTersoffZBLGPU()86 PairTersoffZBLGPU::~PairTersoffZBLGPU()
87 {
88 tersoff_zbl_gpu_clear();
89 if (allocated)
90 memory->destroy(cutghost);
91 }
92
93 /* ---------------------------------------------------------------------- */
94
compute(int eflag,int vflag)95 void PairTersoffZBLGPU::compute(int eflag, int vflag)
96 {
97 ev_init(eflag,vflag);
98
99 int nall = atom->nlocal + atom->nghost;
100 int inum, host_start;
101
102 bool success = true;
103 int *ilist, *numneigh, **firstneigh;
104 if (gpu_mode != GPU_FORCE) {
105 double sublo[3],subhi[3];
106 if (domain->triclinic == 0) {
107 sublo[0] = domain->sublo[0];
108 sublo[1] = domain->sublo[1];
109 sublo[2] = domain->sublo[2];
110 subhi[0] = domain->subhi[0];
111 subhi[1] = domain->subhi[1];
112 subhi[2] = domain->subhi[2];
113 } else {
114 domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
115 }
116 inum = atom->nlocal;
117 firstneigh = tersoff_zbl_gpu_compute_n(neighbor->ago, inum, nall,
118 atom->x, atom->type, sublo,
119 subhi, atom->tag, atom->nspecial,
120 atom->special, eflag, vflag, eflag_atom,
121 vflag_atom, host_start,
122 &ilist, &numneigh, cpu_time, success);
123 } else {
124 inum = list->inum;
125 ilist = list->ilist;
126 numneigh = list->numneigh;
127 firstneigh = list->firstneigh;
128
129 tersoff_zbl_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
130 atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
131 vflag, eflag_atom, vflag_atom, host_start, cpu_time,
132 success);
133 }
134 if (!success)
135 error->one(FLERR,"Insufficient memory on accelerator");
136 }
137
138 /* ---------------------------------------------------------------------- */
139
allocate()140 void PairTersoffZBLGPU::allocate()
141 {
142 PairTersoffZBL::allocate();
143 int n = atom->ntypes;
144
145 memory->create(cutghost,n+1,n+1,"pair:cutghost");
146 }
147
148 /* ----------------------------------------------------------------------
149 init specific to this pair style
150 ------------------------------------------------------------------------- */
151
init_style()152 void PairTersoffZBLGPU::init_style()
153 {
154 double cell_size = cutmax + neighbor->skin;
155
156 if (atom->tag_enable == 0)
157 error->all(FLERR,"Pair style tersoff/zbl/gpu requires atom IDs");
158 if (force->newton_pair != 0)
159 error->all(FLERR,"Pair style tersoff/zbl/gpu requires newton pair off");
160
161 double *lam1, *lam2, *lam3, *powermint;
162 double *biga, *bigb, *bigr, *bigd;
163 double *c1, *c2, *c3, *c4;
164 double *c, *d, *h, *gamma;
165 double *beta, *powern, *Z_i, *Z_j, *ZBLcut, *ZBLexpscale, *_cutsq;
166 lam1 = lam2 = lam3 = powermint = nullptr;
167 biga = bigb = bigr = bigd = nullptr;
168 c1 = c2 = c3 = c4 = nullptr;
169 c = d = h = gamma = nullptr;
170 beta = powern = Z_i = Z_j = ZBLcut = ZBLexpscale = _cutsq = nullptr;
171
172 memory->create(lam1,nparams,"pair:lam1");
173 memory->create(lam2,nparams,"pair:lam2");
174 memory->create(lam3,nparams,"pair:lam3");
175 memory->create(powermint,nparams,"pair:powermint");
176 memory->create(biga,nparams,"pair:biga");
177 memory->create(bigb,nparams,"pair:bigb");
178 memory->create(bigr,nparams,"pair:bigr");
179 memory->create(bigd,nparams,"pair:bigd");
180 memory->create(c1,nparams,"pair:c1");
181 memory->create(c2,nparams,"pair:c2");
182 memory->create(c3,nparams,"pair:c3");
183 memory->create(c4,nparams,"pair:c4");
184 memory->create(c,nparams,"pair:c");
185 memory->create(d,nparams,"pair:d");
186 memory->create(h,nparams,"pair:h");
187 memory->create(gamma,nparams,"pair:gamma");
188 memory->create(beta,nparams,"pair:beta");
189 memory->create(powern,nparams,"pair:powern");
190 memory->create(Z_i,nparams,"pair:Z_i");
191 memory->create(Z_j,nparams,"pair:Z_j");
192 memory->create(ZBLcut,nparams,"pair:ZBLcut");
193 memory->create(ZBLexpscale,nparams,"pair:ZBLexpscale");
194 memory->create(_cutsq,nparams,"pair:_cutsq");
195
196 for (int i = 0; i < nparams; i++) {
197 lam1[i] = params[i].lam1;
198 lam2[i] = params[i].lam2;
199 lam3[i] = params[i].lam3;
200 powermint[i] = params[i].powermint;
201 biga[i] = params[i].biga;
202 bigb[i] = params[i].bigb;
203 bigr[i] = params[i].bigr;
204 bigd[i] = params[i].bigd;
205 c1[i] = params[i].c1;
206 c2[i] = params[i].c2;
207 c3[i] = params[i].c3;
208 c4[i] = params[i].c4;
209 c[i] = params[i].c;
210 d[i] = params[i].d;
211 h[i] = params[i].h;
212 gamma[i] = params[i].gamma;
213 beta[i] = params[i].beta;
214 powern[i] = params[i].powern;
215 Z_i[i] = params[i].Z_i;
216 Z_j[i] = params[i].Z_j;
217 ZBLcut[i] = params[i].ZBLcut;
218 ZBLexpscale[i] = params[i].ZBLexpscale;
219 _cutsq[i] = params[i].cutsq;
220 }
221
222 int mnf = 5e-2 * neighbor->oneatom;
223 int success = tersoff_zbl_gpu_init(atom->ntypes+1, atom->nlocal,
224 atom->nlocal+atom->nghost, mnf,
225 cell_size, gpu_mode, screen, map, nelements,
226 elem3param, nparams, lam1, lam2, lam3,
227 powermint, biga, bigb, bigr, bigd,
228 c1, c2, c3, c4, c, d, h, gamma,
229 beta, powern, Z_i, Z_j, ZBLcut, ZBLexpscale,
230 global_e, global_a_0, global_epsilon_0, _cutsq);
231
232 memory->destroy(lam1);
233 memory->destroy(lam2);
234 memory->destroy(lam3);
235 memory->destroy(powermint);
236 memory->destroy(biga);
237 memory->destroy(bigb);
238 memory->destroy(bigr);
239 memory->destroy(bigd);
240 memory->destroy(c1);
241 memory->destroy(c2);
242 memory->destroy(c3);
243 memory->destroy(c4);
244 memory->destroy(c);
245 memory->destroy(d);
246 memory->destroy(h);
247 memory->destroy(gamma);
248 memory->destroy(beta);
249 memory->destroy(powern);
250 memory->destroy(Z_i);
251 memory->destroy(Z_j);
252 memory->destroy(ZBLcut);
253 memory->destroy(ZBLexpscale);
254 memory->destroy(_cutsq);
255
256 GPU_EXTRA::check_flag(success,error,world);
257
258 if (gpu_mode == GPU_FORCE) {
259 int irequest = neighbor->request(this);
260 neighbor->requests[irequest]->half = 0;
261 neighbor->requests[irequest]->full = 1;
262 neighbor->requests[irequest]->ghost = 1;
263 }
264 if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
265 comm->cutghostuser = 2.0*cutmax + neighbor->skin;
266 if (comm->me == 0)
267 error->warning(FLERR,"Increasing communication cutoff for GPU style");
268 }
269 }
270
271 /* ----------------------------------------------------------------------
272 init for one type pair i,j and corresponding j,i
273 ------------------------------------------------------------------------- */
274
init_one(int i,int j)275 double PairTersoffZBLGPU::init_one(int i, int j)
276 {
277 if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
278 cutghost[i][j] = cutmax;
279 cutghost[j][i] = cutmax;
280
281 return cutmax;
282 }
283
284