1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: W. Michael Brown (Intel)
17 ------------------------------------------------------------------------- */
18 
19 #include "nbin_intel.h"
20 
21 #include "atom.h"
22 #include "comm.h"
23 #include "error.h"
24 #include "group.h"
25 #include "modify.h"
26 #include "update.h"
27 
28 using namespace LAMMPS_NS;
29 
30 /* ---------------------------------------------------------------------- */
31 
NBinIntel(LAMMPS * lmp)32 NBinIntel::NBinIntel(LAMMPS *lmp) : NBinStandard(lmp) {
33   int ifix = modify->find_fix("package_intel");
34   if (ifix < 0)
35     error->all(FLERR,
36                "The 'package intel' command is required for /intel styles");
37   _fix = static_cast<FixIntel *>(modify->fix[ifix]);
38   _precision_mode = _fix->precision();
39   _atombin = nullptr;
40   _binpacked = nullptr;
41   #ifdef _LMP_INTEL_OFFLOAD
42   _cop = _fix->coprocessor_number();
43   _offload_alloc = 0;
44   #endif
45 }
46 
47 /* ---------------------------------------------------------------------- */
48 
~NBinIntel()49 NBinIntel::~NBinIntel() {
50   #ifdef _LMP_INTEL_OFFLOAD
51   if (_offload_alloc) {
52     const int * binhead = this->binhead;
53     const int * bins = this->bins;
54     const int * _atombin = this->_atombin;
55     const int * _binpacked = this->_binpacked;
56     #pragma offload_transfer target(mic:_cop)   \
57       nocopy(binhead,bins,_atombin,_binpacked:alloc_if(0) free_if(1))
58   }
59   #endif
60   memory->destroy(_atombin);
61   memory->destroy(_binpacked);
62 }
63 
64 /* ----------------------------------------------------------------------
65    setup for bin_atoms()
66 ------------------------------------------------------------------------- */
67 
bin_atoms_setup(int nall)68 void NBinIntel::bin_atoms_setup(int nall)
69 {
70   // binhead = per-bin vector, mbins in length
71   // add 1 bin for INTEL package
72 
73   if (mbins > maxbin) {
74     #ifdef _LMP_INTEL_OFFLOAD
75     if (_offload_alloc) {
76       const int * binhead = this->binhead;
77       #pragma offload_transfer target(mic:_cop) \
78         nocopy(binhead:alloc_if(0) free_if(1))
79     }
80     #endif
81 
82     maxbin = mbins;
83     memory->destroy(binhead);
84     memory->create(binhead,maxbin+1,"neigh:binhead");
85 
86     #ifdef _LMP_INTEL_OFFLOAD
87     if (_fix->offload_balance() != 0) {
88       int * binhead = this->binhead;
89       #pragma offload_transfer target(mic:_cop) \
90          nocopy(binhead:length(maxbin+1) alloc_if(1) free_if(0))
91     }
92     #endif
93   }
94 
95   // bins = per-atom vector
96 
97   if (nall > maxatom) {
98     maxatom = nall;
99 
100     #ifdef _LMP_INTEL_OFFLOAD
101     if (_offload_alloc) {
102       const int * bins = this->bins;
103       const int * _atombin = this->_atombin;
104       const int * _binpacked = this->_binpacked;
105       #pragma offload_transfer target(mic:_cop) \
106         nocopy(bins,_atombin,_binpacked:alloc_if(0) free_if(1))
107     }
108     #endif
109     memory->destroy(bins);
110     memory->destroy(_atombin);
111     memory->destroy(_binpacked);
112 
113     memory->create(bins,maxatom,"neigh:bins");
114     memory->create(_atombin,maxatom,"neigh:bins");
115     memory->create(_binpacked,maxatom,"neigh:bins");
116     #ifdef _LMP_INTEL_OFFLOAD
117     if (_fix->offload_balance() != 0) {
118       const int * bins = this->bins;
119       const int * _atombin = this->_atombin;
120       const int * _binpacked = this->_binpacked;
121       #pragma offload_transfer target(mic:_cop) \
122         nocopy(bins,_atombin,_binpacked:length(maxatom) alloc_if(1) free_if(0))
123       _offload_alloc=1;
124     }
125     #endif
126 
127     if (_precision_mode == FixIntel::PREC_MODE_MIXED)
128       _fix->get_mixed_buffers()->set_bininfo(_atombin,_binpacked);
129     else if (_precision_mode == FixIntel::PREC_MODE_SINGLE)
130       _fix->get_single_buffers()->set_bininfo(_atombin,_binpacked);
131     else
132       _fix->get_double_buffers()->set_bininfo(_atombin,_binpacked);
133   }
134 }
135 
136 /* ----------------------------------------------------------------------
137    bin owned and ghost atoms
138 ------------------------------------------------------------------------- */
139 
bin_atoms()140 void NBinIntel::bin_atoms()
141 {
142   last_bin = update->ntimestep;
143 
144   if (_precision_mode == FixIntel::PREC_MODE_MIXED)
145     bin_atoms(_fix->get_mixed_buffers());
146   else if (_precision_mode == FixIntel::PREC_MODE_SINGLE)
147     bin_atoms(_fix->get_single_buffers());
148   else
149     bin_atoms(_fix->get_double_buffers());
150 }
151 
152 template <class flt_t, class acc_t>
bin_atoms(IntelBuffers<flt_t,acc_t> * buffers)153 void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
154   const int nlocal = atom->nlocal;
155   const int nall = nlocal + atom->nghost;
156   const int aend = _fix->offload_end_neighbor();
157 
158 
159   // ---------- Sanity check for padding --------------
160   {
161     const flt_t dx = (INTEL_BIGP - bboxhi[0]);
162     const flt_t dy = (INTEL_BIGP - bboxhi[1]);
163     const flt_t dz = (INTEL_BIGP - bboxhi[2]);
164     if (dx * dx + dy * dy + dz * dz <
165         static_cast<flt_t>(neighbor->cutneighmaxsq))
166       error->one(FLERR,
167         "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.");
168   }
169 
170   // ---------- Grow and cast/pack buffers -------------
171   _fix->start_watch(TIME_PACK);
172   buffers->grow(nall, atom->nlocal, comm->nthreads, aend);
173 
174   ATOM_T biga;
175   biga.x = INTEL_BIGP;
176   biga.y = INTEL_BIGP;
177   biga.z = INTEL_BIGP;
178   biga.w = 1;
179   buffers->get_x()[nall] = biga;
180 
181   int nthreads;
182   if (comm->nthreads > INTEL_HTHREADS) nthreads = comm->nthreads;
183   else nthreads = 1;
184   #if defined(_OPENMP)
185   #pragma omp parallel if (nthreads > INTEL_HTHREADS)
186   #endif
187   {
188     int ifrom, ito, tid;
189     IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads,
190                               sizeof(ATOM_T));
191     buffers->thr_pack(ifrom, ito, 0);
192   }
193   _fix->stop_watch(TIME_PACK);
194 
195 
196   // ---------- Bin Atoms -------------
197   _fix->start_watch(TIME_HOST_NEIGHBOR);
198   int * _noalias const atombin = this->_atombin;
199   int * _noalias const binpacked = this->_binpacked;
200 
201   int i, ibin;
202 
203   for (i = 0; i < mbins; i++) binhead[i] = -1;
204 
205   int *mask = atom->mask;
206 
207   if (includegroup) {
208     int bitmask = group->bitmask[includegroup];
209     for (i = nall-1; i >= nlocal; i--) {
210       if (mask[i] & bitmask) {
211         ibin = coord2bin(atom->x[i]);
212         // Only necessary to store when neighboring ghost
213         atombin[i] = ibin;
214         bins[i] = binhead[ibin];
215         binhead[ibin] = i;
216       }
217     }
218     for (i = atom->nfirst-1; i >= 0; i--) {
219       ibin = coord2bin(atom->x[i]);
220       atombin[i] = ibin;
221       bins[i] = binhead[ibin];
222       binhead[ibin] = i;
223     }
224   } else {
225     for (i = nall-1; i >= 0; i--) {
226       ibin = coord2bin(atom->x[i]);
227       // Only necessary to store for ghost when neighboring ghost
228       atombin[i] = ibin;
229       bins[i] = binhead[ibin];
230       binhead[ibin] = i;
231     }
232   }
233   int newhead = 0;
234   for (i = 0; i < mbins; i++) {
235     int j = binhead[i];
236     binhead[i] = newhead;
237     for ( ; j >= 0; j = bins[j])
238       binpacked[newhead++] = j;
239   }
240   binhead[mbins] = newhead;
241 }
242 
243 /* ---------------------------------------------------------------------- */
244 
memory_usage()245 double NBinIntel::memory_usage()
246 {
247   return NBinStandard::memory_usage() + maxatom*2*sizeof(int);
248 }
249