1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 #include "mpi.h"
16 #include "math.h"
17 #include "string.h"
18 #include "stdlib.h"
19 #include "ctype.h"
20 #include "particle_kokkos.h"
21 #include "grid_kokkos.h"
22 #include "update.h"
23 #include "comm.h"
24 #include "mixture.h"
25 #include "collide.h"
26 #include "random_mars.h"
27 #include "random_knuth.h"
28 #include "memory_kokkos.h"
29 #include "error.h"
30 #include "kokkos.h"
31 #include "sparta_masks.h"
32
33 #include <Kokkos_Vector.hpp>
34
35 using namespace SPARTA_NS;
36
37 enum{PKEEP,PINSERT,PDONE,PDISCARD,PENTRY,PEXIT,PSURF}; // several files
38 enum{NONE,DISCRETE,SMOOTH}; // several files
39 enum{INT,DOUBLE}; // several files
40 enum{COPYPARTICLELIST,FIXEDMEMORY};
41
42 #define DELTA 16384
43 #define DELTASPECIES 16
44 #define DELTAMIXTURE 8
45 #define DELTACELLCOUNT 10
46 #define MAXLINE 1024
47
48 // customize by adding an abbreviation string
49 // also add a check for the keyword in 2 places in add_species()
50
51 #define AIR "N O NO"
52
53 /* ---------------------------------------------------------------------- */
54
ParticleKokkos(SPARTA * sparta)55 ParticleKokkos::ParticleKokkos(SPARTA *sparta) : Particle(sparta)
56 {
57 d_fail_flag = DAT::t_int_scalar("particle:fail_flag");
58 h_fail_flag = HAT::t_int_scalar("particle:fail_flag_mirror");
59
60 k_reorder_pass = DAT::tdual_int_scalar("particle:reorder_pass");
61 d_reorder_pass = k_reorder_pass.d_view;
62 h_reorder_pass = k_reorder_pass.h_view;
63
64 sorted_kk = 0;
65 maxcellcount = 10;
66
67 k_eivec = tdual_struct_tdual_int_1d_1d("particle:eivec",0);
68 k_eiarray = tdual_struct_tdual_int_2d_1d("particle:eiarray",0);
69 k_edvec = tdual_struct_tdual_float_1d_1d("particle:edvec",0);
70 k_edarray = tdual_struct_tdual_float_2d_1d("particle:edarray",0);
71 }
72
73 /* ---------------------------------------------------------------------- */
74
~ParticleKokkos()75 ParticleKokkos::~ParticleKokkos()
76 {
77 if (copy || copymode) return;
78
79 particles = NULL;
80 species = NULL;
81
82
83 // deallocate views of views in serial to prevent race condition in profiling tools
84
85 for (int i = 0; i < k_eivec.extent(0); i++)
86 k_eivec.h_view(i).k_view = decltype(k_eivec.h_view(i).k_view)();
87
88 for (int i = 0; i < k_eiarray.extent(0); i++)
89 k_eiarray.h_view(i).k_view = decltype(k_eiarray.h_view(i).k_view)();
90
91 for (int i = 0; i < k_edvec.extent(0); i++)
92 k_edvec.h_view(i).k_view = decltype(k_edvec.h_view(i).k_view)();
93
94 for (int i = 0; i < k_edarray.extent(0); i++)
95 k_edarray.h_view(i).k_view = decltype(k_edarray.h_view(i).k_view)();
96
97 ewhich = NULL;
98 eicol = NULL;
99 edcol = NULL;
100 }
101
102 #ifndef SPARTA_KOKKOS_EXACT
103 /* ----------------------------------------------------------------------
104 compress particle list to remove particles with indices in dellist
105 dellist indices can be in ANY order
106 ------------------------------------------------------------------------- */
107
compress_migrate(int ndelete,int * dellist)108 void ParticleKokkos::compress_migrate(int ndelete, int *dellist)
109 {
110 // reallocate next list as needed
111
112 if (maxsort < maxlocal) {
113 maxsort = maxlocal;
114 memory->destroy(next);
115 memory->create(next,maxsort,"particle:next");
116 }
117
118 int i;
119
120 nbytes = sizeof(OnePart);
121
122 if (ndelete > d_lists.extent(1)) {
123 d_lists = DAT::t_int_2d(Kokkos::view_alloc("particle:lists",Kokkos::WithoutInitializing),2,ndelete);
124 d_mlist = Kokkos::subview(d_lists,0,Kokkos::ALL);
125 d_slist = Kokkos::subview(d_lists,1,Kokkos::ALL);
126
127 h_lists = HAT::t_int_2d(Kokkos::view_alloc("particle:lists_mirror",Kokkos::WithoutInitializing),2,ndelete);
128 h_mlist = Kokkos::subview(h_lists,0,Kokkos::ALL);
129 h_slist = Kokkos::subview(h_lists,1,Kokkos::ALL);
130 }
131
132 // use next as a scratch vector
133 // next is only used for upper locs from nlocal-ndelete to nlocal
134 // next[i] = 0 if deleted particle, 1 otherwise
135
136 int upper = nlocal - ndelete;
137 for (i = upper; i < nlocal; i++) next[i] = 1;
138
139 for (int m = 0; m < ndelete; m++) {
140 i = dellist[m];
141 if (i >= upper)
142 next[i] = 0;
143 }
144
145 int ncopy = 0;
146 int ncount = 0;
147 for (int j = upper; j < nlocal; j++) {
148 if (!next[j]) continue;
149
150 int i = dellist[ncount];
151 while (i >= upper) {
152 ncount++;
153 i = dellist[ncount];
154 }
155 h_mlist[ncopy] = i;
156 h_slist[ncopy] = j;
157 ncopy++;
158 ncount++;
159 }
160
161 nlocal = upper;
162
163 Kokkos::deep_copy(d_lists,h_lists);
164
165 this->sync(Device,PARTICLE_MASK);
166 d_particles = k_particles.d_view;
167
168 copymode = 1;
169 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleCompressReactions>(0,ncopy),*this);
170 DeviceType().fence();
171 copymode = 0;
172
173 this->modify(Device,PARTICLE_MASK);
174 d_particles = t_particle_1d(); // destroy reference to reduce memory use
175
176 sorted = 0;
177 sorted_kk = 0;
178 }
179 #endif
180
181 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleCompressReactions,const int & i) const182 void ParticleKokkos::operator()(TagParticleCompressReactions, const int &i) const {
183 const int j = d_mlist[i];
184 const int k = d_slist[i];
185 //memcpy(&d_particles[j],&d_particles[k],nbytes);
186 d_particles[j] = d_particles[k];
187 }
188
189 /* ----------------------------------------------------------------------
190 sort particles into grid cells
191 set cinfo.first = index of first particle in cell
192 set cinfo.count = # of particles in cell
193 next[] = index of next particle in same cell, -1 for no more
194 ------------------------------------------------------------------------- */
195
sort_kokkos()196 void ParticleKokkos::sort_kokkos()
197 {
198 sorted_kk = 1;
199 int reorder_scheme = COPYPARTICLELIST;
200 if (update->have_mem_limit())
201 reorder_scheme = FIXEDMEMORY;
202
203 ngrid = grid->nlocal;
204 GridKokkos* grid_kk = (GridKokkos*)grid;
205 d_cellcount = grid_kk->d_cellcount;
206 d_plist = grid_kk->d_plist;
207
208 if (ngrid > int(d_cellcount.extent(0))) {
209 grid_kk->d_cellcount = DAT::t_int_1d("particle:cellcount",ngrid);
210 d_cellcount = grid_kk->d_cellcount;
211 } else {
212 copymode = 1;
213 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleZero_cellcount>(0,d_cellcount.extent(0)),*this);
214 DeviceType().fence();
215 copymode = 0;
216 }
217
218 if (ngrid > int(d_plist.extent(0)) || maxcellcount > int(d_plist.extent(1))) {
219 grid_kk->d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
220 grid_kk->d_plist = DAT::t_int_2d(Kokkos::view_alloc("particle:plist",Kokkos::WithoutInitializing),ngrid,maxcellcount);
221 d_plist = grid_kk->d_plist;
222 }
223
224 this->sync(Device,PARTICLE_MASK);
225 d_particles = k_particles.d_view;
226
227 // icell = global cell the particle is in
228
229 // Cannot grow a Kokkos view in a parallel loop, so
230 // if the capacity of the list is exceeded, count the size
231 // needed, reallocate on the host, and then
232 // repeat the parallel loop again
233
234 do {
235 copymode = 1;
236 if (sparta->kokkos->need_atomics)
237 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleSort<1> >(0,nlocal),*this);
238 else
239 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleSort<0> >(0,nlocal),*this);
240 DeviceType().fence();
241 copymode = 0;
242
243 Kokkos::deep_copy(h_fail_flag,d_fail_flag);
244
245 if (h_fail_flag()) {
246 copymode = 1;
247 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleZero_cellcount>(0,ngrid),*this);
248 DeviceType().fence();
249 copymode = 0;
250 maxcellcount += DELTACELLCOUNT;
251 grid_kk->d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
252 grid_kk->d_plist = DAT::t_int_2d(Kokkos::view_alloc("particle:plist",Kokkos::WithoutInitializing),ngrid,maxcellcount);
253 d_plist = grid_kk->d_plist;
254
255 Kokkos::deep_copy(d_fail_flag,0);
256 }
257 } while (h_fail_flag());
258
259 if (update->reorder_period &&
260 (update->ntimestep % update->reorder_period == 0)) {
261
262 if (reorder_scheme == COPYPARTICLELIST && d_particles.extent(0) > d_sorted.extent(0)) {
263 d_sorted = t_particle_1d();
264 d_sorted = t_particle_1d("particle:sorted",d_particles.extent(0));
265 }
266 else if (reorder_scheme == FIXEDMEMORY && d_pswap1.size() == 0){
267 nParticlesWksp = (double)update->global_mem_limit/sizeof(Particle::OnePart);
268 d_pswap1 = t_particle_1d(Kokkos::view_alloc("particle:swap1",Kokkos::WithoutInitializing),nParticlesWksp);
269 d_pswap2 = t_particle_1d(Kokkos::view_alloc("particle:swap2",Kokkos::WithoutInitializing),nParticlesWksp);
270 }
271
272 nbytes = sizeof(OnePart);
273
274 if (reorder_scheme == COPYPARTICLELIST) {
275 copymode = 1;
276 Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType, TagParticleReorder_COPYPARTICLELIST>(0,ngrid),*this);
277 copymode = 0;
278 Kokkos::deep_copy(k_particles.d_view,d_sorted);
279 this->modify(Device,PARTICLE_MASK);
280 }
281 else if (reorder_scheme == FIXEDMEMORY) {
282 // Copy particle destinations into the particle list cell locations
283 // (to avoid adding a "destination" integer in OnePart for the fixed memory reorder)
284 // After the particle list has been reordered, reset the icell values to correctly reflect
285 // the variable naming.
286 copymode = 1;
287 Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType, TagCopyParticleReorderDestinations>(0,ngrid),*this);
288 DeviceType().fence();
289 copymode = 0;
290
291 int npasses = (nlocal-1)/nParticlesWksp + 1;
292 for (int ipass=0; ipass < npasses; ++ipass) {
293
294 h_reorder_pass() = ipass;
295 k_reorder_pass.modify_host();
296 k_reorder_pass.sync_device();
297
298 // identify next set of particles to reorder
299 copymode = 1;
300 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixedMemoryReorderInit>(0,nParticlesWksp),*this);
301 DeviceType().fence();
302 copymode = 0;
303
304 // reorder this set of particles
305 copymode = 1;
306 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixedMemoryReorder>(0,nParticlesWksp),*this);
307 DeviceType().fence();
308 copymode = 0;
309 }
310
311 // reset the icell values in the particle list
312 copymode = 1;
313 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagSetIcellFromPlist>(0,ngrid),*this);
314 DeviceType().fence();
315 copymode = 0;
316 this->modify(Device,PARTICLE_MASK);
317
318 // destroy references to reduce memory use
319 d_pswap1 = t_particle_1d();
320 d_pswap2 = t_particle_1d();
321 }
322 }
323
324 d_particles = t_particle_1d(); // destroy reference to reduce memory use
325 }
326
327 KOKKOS_INLINE_FUNCTION
operator ()(TagCopyParticleReorderDestinations,const int icell,int & m_fill,const bool & final) const328 void ParticleKokkos::operator()(TagCopyParticleReorderDestinations, const int icell, int &m_fill, const bool &final) const
329 {
330 // load icell values with reorder destination index for particle reordering
331 // (location in d_particles where particle is moving)
332 for (int j = 0; j < d_cellcount[icell]; j++) {
333 if (final) {
334 const int iparticle = d_plist(icell,j);
335 d_particles[iparticle].icell = m_fill;
336 d_plist(icell,j) = m_fill; // the new plist after reordering
337 }
338 m_fill++;
339 }
340 }
341
342 KOKKOS_INLINE_FUNCTION
operator ()(TagFixedMemoryReorderInit,const int & i) const343 void ParticleKokkos::operator()(TagFixedMemoryReorderInit, const int &i) const
344 {
345 // note: "i" is a thread id (cannot be greater than number of particles allocated in d_pswap* workspaces)
346
347 // assign batch of threads to next batch of particles (batch size = nParticlesWksp)
348 d_pswap1[i].icell = -999; // default to thread isn't moving a particle
349 int nextParticleToCheckForReordering = d_reorder_pass() * nParticlesWksp;
350 int n = nextParticleToCheckForReordering + i;
351 if (n < nlocal) {
352 if (d_particles[n].icell != n) {
353 d_pswap1[i] = d_particles[n]; // copy the moving particle into the work space so the thread can move it later.
354 d_particles[n].icell = -1; // current location of moving particle is marked as vacant
355 }
356 }
357 }
358
359 KOKKOS_INLINE_FUNCTION
operator ()(TagFixedMemoryReorder,const int & i) const360 void ParticleKokkos::operator()(TagFixedMemoryReorder, const int &i) const
361 {
362 // particle movement for this thread continues until a particle is moved to a vacant location (indicated by icell = -1)
363 OnePart *movePtr;
364 int newParticleLoc;
365 bool iHaveAnotherParticle = false;
366 if (d_pswap1[i].icell != -999)
367 iHaveAnotherParticle = true;
368
369 int count = 0;
370 while (iHaveAnotherParticle){
371 if (count % 2 == 0) { // even
372 movePtr = &d_pswap1[i];
373 newParticleLoc = movePtr->icell; // note: this is a particle location, NOT a cell location
374 d_pswap2[i] = d_particles[newParticleLoc];
375 if (d_pswap2[i].icell == -1)
376 iHaveAnotherParticle = false;
377 }
378 else { // odd
379 movePtr = &d_pswap2[i];
380 newParticleLoc = movePtr->icell; // note: this is a particle location, NOT a cell location
381 d_pswap1[i] = d_particles[newParticleLoc];
382 if (d_pswap1[i].icell == -1)
383 iHaveAnotherParticle = false;
384 }
385 d_particles[newParticleLoc] = *movePtr;
386 count++;
387 }
388 }
389
390 KOKKOS_INLINE_FUNCTION
operator ()(TagSetIcellFromPlist,const int & icell) const391 void ParticleKokkos::operator()(TagSetIcellFromPlist, const int &icell) const
392 {
393 for (int j = 0; j < d_cellcount[icell]; j++) {
394 const int iparticle = d_plist(icell,j);
395 d_particles[iparticle].icell = icell;
396 }
397 }
398
399 template<int NEED_ATOMICS>
400 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleSort<NEED_ATOMICS>,const int & i) const401 void ParticleKokkos::operator()(TagParticleSort<NEED_ATOMICS>, const int &i) const
402 {
403 const int icell = d_particles[i].icell;
404 int j;
405 if (NEED_ATOMICS)
406 j = Kokkos::atomic_fetch_add(&d_cellcount[icell],1);
407 else {
408 j = d_cellcount[icell];
409 d_cellcount[icell]++;
410 }
411 if (j+1 > maxcellcount)
412 d_fail_flag() = 1;
413 if (d_fail_flag()) return;
414 d_plist(icell,j) = i;
415 }
416
417 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleReorder_COPYPARTICLELIST,const int icell,int & m_fill,const bool & final) const418 void ParticleKokkos::operator()(TagParticleReorder_COPYPARTICLELIST, const int icell, int &m_fill, const bool &final) const
419 {
420 for (int j = 0; j < d_cellcount[icell]; j++) {
421 if (final) {
422 const int iparticle = d_plist(icell,j);
423 //memcpy(&d_sorted[m_fill],&d_particles[iparticle],nbytes);
424 d_sorted[m_fill] = d_particles[iparticle];
425 d_plist(icell,j) = m_fill;
426 }
427 m_fill++;
428 }
429 }
430
431 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleZero_cellcount,const int & i) const432 void ParticleKokkos::operator()(TagParticleZero_cellcount, const int &i) const {
433 d_cellcount[i] = 0.0;
434 }
435
436 /* ----------------------------------------------------------------------
437 set the initial weight of each particle
438 called by Update before particle move
439 only called if particle weighting is enabled
440 only grid-based weighting is currently implemented
441 ------------------------------------------------------------------------- */
442
pre_weight()443 void ParticleKokkos::pre_weight()
444 {
445 auto grid_kk = dynamic_cast<GridKokkos*>(grid);
446 auto& k_cinfo = grid_kk->k_cinfo;
447 grid_kk->sync(Device,CINFO_MASK);
448 this->sync(Device,PARTICLE_MASK);
449 auto d_cinfo = k_cinfo.d_view;
450 auto d_particles = k_particles.d_view;
451
452 Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(int i) {
453 auto icell = d_particles[i].icell;
454 d_particles[i].weight = d_cinfo[icell].weight;
455 });
456 this->modify(Device,PARTICLE_MASK);
457 }
458
459 /* ----------------------------------------------------------------------
460 clone/delete each particle based on ratio of its initial/final weights
461 called by Update after particle move and migration
462 only called if particle weighting is enabled
463 only grid-based weighting is currently implemented
464 ------------------------------------------------------------------------- */
465
466 struct PostWeightPair { int i; int id; };
467
post_weight()468 void ParticleKokkos::post_weight()
469 {
470 if (ncustom) {
471 error->all(FLERR,"Custom per-particles attributes not yet supported with Kokkos");
472 }
473 constexpr int METHOD = 1;
474 if (METHOD == 1) { // just call the host one
475 this->sync(Host,PARTICLE_MASK);
476 Particle::post_weight();
477 this->modify(Host,PARTICLE_MASK);
478 } else if (METHOD == 2) { // Kokkos-parallel, gives same (correct) answer
479 Kokkos::View<double*> d_ratios("post_weight:ratios", nlocal);
480 auto h_ratios = Kokkos::create_mirror_view(d_ratios);
481 auto grid_kk = dynamic_cast<GridKokkos*>(grid);
482 auto& k_cinfo = grid_kk->k_cinfo;
483 grid_kk->sync(Device,CINFO_MASK);
484 this->sync(Device,PARTICLE_MASK);
485 auto d_particles = k_particles.d_view;
486 auto d_cinfo = k_cinfo.d_view;
487 Kokkos::vector<PostWeightPair> map;
488 map.set_overallocation(0.5);
489 map.resize(nlocal);
490 auto d_map = map.d_view;
491 Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(int i) {
492 auto icell = d_particles[i].icell;
493 d_ratios[i] = d_particles[i].weight / d_cinfo[icell].weight;
494 d_map[i].id = d_particles[i].id;
495 d_map[i].i = i;
496 });
497 Kokkos::deep_copy(h_ratios, d_ratios);
498 map.device_to_host();
499 int nlocal_original = nlocal;
500 int i = 0;
501 while (i < nlocal_original) {
502 auto ratio = h_ratios[map[i].i];
503 if (ratio == 1.0) {
504 i++;
505 } else if (ratio < 1.0) {
506 if (wrandom->uniform() > ratio) {
507 map[i] = map.back();
508 if (map.size() > nlocal_original) ++i;
509 else --nlocal_original;
510 map.pop_back();
511 } else {
512 ++i;
513 }
514 } else if (ratio > 1.0) {
515 int nclone = int(ratio);
516 double fraction = ratio - nclone;
517 --nclone;
518 if (wrandom->uniform() < fraction) ++nclone;
519 for (int m = 0; m < nclone; ++m) {
520 map.push_back(map[i]);
521 map.back().id = MAXSMALLINT * wrandom->uniform();
522 }
523 ++i;
524 }
525 }
526 map.host_to_device();
527 nlocal = (int(map.size()));
528 grow(0);
529 Kokkos::View<OnePart*> d_newparticles("post_weight:newparticles", maxlocal);
530 d_map = map.d_view;
531 Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(int i) {
532 d_newparticles[i] = d_particles[d_map[i].i];
533 d_newparticles[i].id = d_map[i].id;
534 });
535 Kokkos::deep_copy(k_particles.d_view,d_newparticles);
536 this->modify(Device,PARTICLE_MASK);
537 }
538 }
539
540 /* ---------------------------------------------------------------------- */
541
update_class_variables()542 void ParticleKokkos::update_class_variables() {
543 d_species = k_species.d_view;
544 this->sync(Device,SPECIES_MASK);
545
546 boltz = update->boltz;
547 collide_rot = 0;
548 vibstyle = NONE;
549 if (collide) {
550 vibstyle = collide->vibstyle;
551 if (collide->rotstyle != NONE) collide_rot = 1;
552 }
553 }
554
555 /* ----------------------------------------------------------------------
556 insure particle list can hold nextra new particles
557 if defined, also grow custom particle arrays and initialize with zeroes
558 ------------------------------------------------------------------------- */
559
grow(int nextra)560 void ParticleKokkos::grow(int nextra)
561 {
562 bigint target = (bigint) nlocal + nextra;
563 if (target <= maxlocal) return;
564
565 bigint newmax = maxlocal;
566 while (newmax < target) newmax += MAX(DELTA, newmax*1.1);
567 int oldmax = maxlocal;
568
569 if (newmax > MAXSMALLINT)
570 error->one(FLERR,"Per-processor particle count is too big");
571
572 maxlocal = newmax;
573 if (particles == NULL)
574 k_particles = tdual_particle_1d("particle:particles",maxlocal);
575 else {
576 this->sync(Device,PARTICLE_MASK); // force resize on device
577 k_particles.resize(maxlocal);
578 this->modify(Device,PARTICLE_MASK); // needed for auto sync
579 }
580 d_particles = k_particles.d_view;
581 particles = k_particles.h_view.data();
582
583 if (ncustom == 0) return;
584
585 for (int i = 0; i < ncustom; i++) {
586 if (ename[i] == NULL) continue;
587 grow_custom(i,oldmax,maxlocal);
588 }
589 }
590
591 /* ----------------------------------------------------------------------
592 insure species list can hold maxspecies species
593 assumes that maxspecies has already been increased
594 ------------------------------------------------------------------------- */
595
grow_species()596 void ParticleKokkos::grow_species()
597 {
598 if (sparta->kokkos->prewrap) {
599 Particle::grow_species();
600 } else {
601 if (species == NULL)
602 k_species = tdual_species_1d("particle:species",maxspecies);
603 else
604 k_species.resize(maxspecies);
605 species = k_species.h_view.data();
606 }
607 }
608
609 /* ---------------------------------------------------------------------- */
610
wrap_kokkos()611 void ParticleKokkos::wrap_kokkos()
612 {
613 // species
614
615 if (species != k_species.h_view.data()) {
616 memoryKK->wrap_kokkos(k_species,species,nspecies,"particle:species");
617 k_species.modify_host();
618 k_species.sync_device();
619 memory->sfree(species);
620 species = k_species.h_view.data();
621 }
622
623 // mixtures
624
625 k_species2group = DAT::tdual_int_2d("particle:species2group",nmixture,nspecies);
626 for (int i = 0; i < nmixture; i++)
627 for (int j = 0; j < nspecies; j++)
628 k_species2group.h_view(i,j) = mixture[i]->species2group[j];
629 k_species2group.modify_host();
630 k_species2group.sync_device();
631
632 //if (mixtures != k_mixtures.h_view.data()) {
633 // memoryKK->wrap_kokkos(k_mixtures,mixture,nmixture,"particle:mixture");
634 // k_mixtures.modify_host();
635 // k_mixtures.sync_device();
636 // memory->sfree(mixtures);
637 // mixtures = k_mixtures.h_view.data();
638 //}
639 }
640
641 /* ----------------------------------------------------------------------
642 add a custom attribute with name
643 assumes name does not already exist, except in case of restart
644 type = 0/1 for int/double
645 size = 0 for vector, size > 0 for array with size columns
646 allocate the vector or array to current maxlocal via grow_custom()
647 return index of its location;
648 ------------------------------------------------------------------------- */
649
add_custom(char * name,int type,int size)650 int ParticleKokkos::add_custom(char *name, int type, int size)
651 {
652 ///modifies eivec,eiarray,edvec,edarray on either host or device, probably device since host isn't modified. May just want to use host
653 ///modifies ewhich on host, sync to device here since it is never modified on the device
654 //
655
656 int index;
657
658 // if name already exists
659 // just return index if a restart script and re-defining the name
660 // else error
661
662 index = find_custom(name);
663 if (index >= 0) {
664 if (custom_restart_flag == NULL || custom_restart_flag[index] == 1)
665 error->all(FLERR,"Custom particle attribute name already exists");
666 custom_restart_flag[index] = 1;
667 return index;
668 }
669
670 // use first available NULL entry or allocate a new one
671
672 for (index = 0; index < ncustom; index++)
673 if (ename[index] == NULL) break;
674
675 if (index == ncustom) {
676 ncustom++;
677 ename = (char **) memory->srealloc(ename,ncustom*sizeof(char *),
678 "particle:ename");
679 memory->grow(etype,ncustom,"particle:etype");
680 memory->grow(esize,ncustom,"particle:esize");
681 memoryKK->grow_kokkos(k_ewhich,ewhich,ncustom,"particle:ewhich");
682 }
683
684 int n = strlen(name) + 1;
685 ename[index] = new char[n];
686 strcpy(ename[index],name);
687 etype[index] = type;
688 esize[index] = size;
689
690 if (type == INT) {
691 if (size == 0) {
692 ewhich[index] = ncustom_ivec++;
693 eivec = (int **)
694 memory->srealloc(eivec,ncustom_ivec*sizeof(int *),"particle:eivec");
695 eivec[ncustom_ivec-1] = NULL;
696 k_eivec.resize(ncustom_ivec);
697 memory->grow(icustom_ivec,ncustom_ivec,"particle:icustom_ivec");
698 icustom_ivec[ncustom_ivec-1] = index;
699 } else {
700 ewhich[index] = ncustom_iarray++;
701 eiarray = (int ***)
702 memory->srealloc(eiarray,ncustom_iarray*sizeof(int **),
703 "particle:eiarray");
704 eiarray[ncustom_iarray-1] = NULL;
705 k_eiarray.resize(ncustom_iarray);
706 memory->grow(icustom_iarray,ncustom_iarray,"particle:icustom_iarray");
707 icustom_iarray[ncustom_iarray-1] = index;
708 memoryKK->grow_kokkos(k_eicol,eicol,ncustom_iarray,"particle:eicol");
709 eicol[ncustom_iarray-1] = size;
710 }
711 } else if (type == DOUBLE) {
712 if (size == 0) {
713 ewhich[index] = ncustom_dvec++;
714 edvec = (double **)
715 memory->srealloc(edvec,ncustom_dvec*sizeof(double *),"particle:edvec");
716 edvec[ncustom_dvec-1] = NULL;
717 k_edvec.resize(ncustom_dvec);
718 memory->grow(icustom_dvec,ncustom_dvec,"particle:icustom_dvec");
719 icustom_dvec[ncustom_dvec-1] = index;
720 } else {
721 ewhich[index] = ncustom_darray++;
722 edarray = (double ***)
723 memory->srealloc(edarray,ncustom_darray*sizeof(double **),
724 "particle:edarray");
725 edarray[ncustom_darray-1] = NULL;
726 k_edarray.resize(ncustom_darray);
727 memory->grow(icustom_darray,ncustom_darray,"particle:icustom_darray");
728 icustom_darray[ncustom_darray-1] = index;
729 memoryKK->grow_kokkos(k_edcol,edcol,ncustom_darray,"particle:edcol");
730 edcol[ncustom_darray-1] = size;
731 }
732 }
733
734 // ewhich,eicol,edcol never modified on the device, so sync here
735
736 k_ewhich.modify_host();
737 k_ewhich.sync_device();
738
739 k_eicol.modify_host();
740 k_eicol.sync_device();
741
742 k_edcol.modify_host();
743 k_edcol.sync_device();
744
745 grow_custom(index,0,maxlocal);
746
747 return index;
748 }
749
750 /* ----------------------------------------------------------------------
751 grow the vector/array associated with custom attribute with index
752 nold = old length, nnew = new length (typically maxlocal)
753 set new values to 0 via memset()
754 ------------------------------------------------------------------------- */
755
grow_custom(int index,int nold,int nnew)756 void ParticleKokkos::grow_custom(int index, int nold, int nnew)
757 {
758 // modifies the inner part of eivec,eiarray,edvec,edarray on whatever, and the outer view on the host
759
760 k_eivec.sync_host();
761 k_eiarray.sync_host();
762 k_edvec.sync_host();
763 k_edarray.sync_host();
764
765 if (etype[index] == INT) {
766 if (esize[index] == 0) {
767 int *ivector = eivec[ewhich[index]];
768 auto k_ivector = k_eivec.h_view[ewhich[index]].k_view;
769 memoryKK->grow_kokkos(k_ivector,ivector,nold+nnew,"particle:eivec");
770 eivec[ewhich[index]] = ivector;
771 } else {
772 int **iarray = eiarray[ewhich[index]];
773 auto &k_iarray = k_eiarray.h_view[ewhich[index]].k_view;
774 memoryKK->grow_kokkos(k_iarray,iarray,nold+nnew,esize[index],"particle:eiarray");
775 eiarray[ewhich[index]] = iarray;
776 }
777
778 } else {
779 if (esize[index] == 0) {
780 double *dvector = edvec[ewhich[index]];
781 auto k_dvector = k_edvec.h_view[ewhich[index]].k_view;
782 memoryKK->grow_kokkos(k_dvector,dvector,nold+nnew,"particle:edvec");
783 edvec[ewhich[index]] = dvector;
784 } else {
785 double **darray = edarray[ewhich[index]];
786 auto k_darray = k_edarray.h_view[ewhich[index]].k_view;
787 memoryKK->grow_kokkos(k_darray,darray,nold+nnew,esize[index],"particle:edarray");
788 edarray[ewhich[index]] = darray;
789 }
790 }
791
792 k_eivec.modify_host();
793 k_eiarray.modify_host();
794 k_edvec.modify_host();
795 k_edarray.modify_host();
796
797 k_eivec.sync_device();
798 k_eiarray.sync_device();
799 k_edvec.sync_device();
800 k_edarray.sync_device();
801 }
802
803 /* ----------------------------------------------------------------------
804 remove a custom attribute at location index
805 free memory for name and vector/array and set ptrs to NULL
806 ncustom lists never shrink, but indices stored between
807 the ncustom list and the dense vector/array lists must be reset
808 ------------------------------------------------------------------------- */
809
remove_custom(int index)810 void ParticleKokkos::remove_custom(int index)
811 {
812 // modifies the outer host view, deletes the inner dual view
813 //
814 delete [] ename[index];
815 ename[index] = NULL;
816
817 if (etype[index] == INT) {
818 if (esize[index] == 0) {
819 memoryKK->destroy_kokkos(k_eivec.h_view[ewhich[index]].k_view,eivec[ewhich[index]]);
820 ncustom_ivec--;
821 for (int i = ewhich[index]; i < ncustom_ivec; i++) {
822 icustom_ivec[i] = icustom_ivec[i+1];
823 ewhich[icustom_ivec[i]] = i;
824 eivec[i] = eivec[i+1];
825 k_eivec.h_view[i] = k_eivec.h_view[i+1];
826 }
827 } else {
828 memoryKK->destroy_kokkos(eiarray[ewhich[index]]);
829 ncustom_iarray--;
830 for (int i = ewhich[index]; i < ncustom_iarray; i++) {
831 icustom_iarray[i] = icustom_iarray[i+1];
832 ewhich[icustom_iarray[i]] = i;
833 eiarray[i] = eiarray[i+1];
834 eicol[i] = eicol[i+1];
835 k_eiarray.h_view[i] = k_eiarray.h_view[i+1];
836 }
837 }
838 } else if (etype[index] == DOUBLE) {
839 if (esize[index] == 0) {
840 memoryKK->destroy(edvec[ewhich[index]]);
841 ncustom_dvec--;
842 for (int i = ewhich[index]; i < ncustom_dvec; i++) {
843 icustom_dvec[i] = icustom_dvec[i+1];
844 ewhich[icustom_dvec[i]] = i;
845 edvec[i] = edvec[i+1];
846 k_edvec.h_view[i] = k_edvec.h_view[i+1];
847 }
848 k_edvec.modify_host();
849 } else {
850 memoryKK->destroy(edarray[ewhich[index]]);
851 ncustom_darray--;
852 for (int i = ewhich[index]; i < ncustom_darray; i++) {
853 icustom_darray[i] = icustom_darray[i+1];
854 ewhich[icustom_darray[i]] = i;
855 edarray[i] = edarray[i+1];
856 edcol[i] = edcol[i+1];
857 k_edarray.h_view[i] = k_edarray.h_view[i+1];
858 }
859 k_edarray.modify_host();
860 }
861 }
862
863 // set ncustom = 0 if custom list is now entirely empty
864
865 int empty = 1;
866 for (int i = 0; i < ncustom; i++)
867 if (ename[i]) empty = 0;
868 if (empty) ncustom = 0;
869
870 k_eivec.sync_device();
871 k_eiarray.sync_device();
872 k_edvec.sync_device();
873 k_edarray.sync_device();
874 }
875
876 /* ----------------------------------------------------------------------
877 copy info for one particle in custom attribute vectors/arrays
878 into location I from location J
879 ------------------------------------------------------------------------- */
880
copy_custom(int i,int j)881 void ParticleKokkos::copy_custom(int i, int j)
882 {
883 // need sync/modify host of inner view
884 int m;
885
886 // caller does not always check this
887 // shouldn't be a problem, but valgrind can complain if memcpy to self
888 // oddly memcpy(&particles[i],&particles[j],sizeof(OnePart)) seems OK
889
890 if (i == j) return;
891
892 // 4 flavors of vectors/arrays
893
894 if (ncustom_ivec) {
895 for (m = 0; m < ncustom_ivec; m++) eivec[m][i] = eivec[m][j];
896 }
897 if (ncustom_iarray) {
898 for (m = 0; m < ncustom_iarray; m++)
899 memcpy(eiarray[m][i],eiarray[m][j],eicol[m]*sizeof(int));
900 }
901 if (ncustom_dvec) {
902 for (m = 0; m < ncustom_dvec; m++) edvec[m][i] = edvec[m][j];
903 }
904 if (ncustom_darray) {
905 for (m = 0; m < ncustom_darray; m++)
906 memcpy(edarray[m][i],edarray[m][j],edcol[m]*sizeof(double));
907 }
908 }
909
910 /* ----------------------------------------------------------------------
911 pack a custom attributes for a single particle N into buf
912 this is done in order of 4 styles of vectors/arrays, not in ncustom order
913 ------------------------------------------------------------------------- */
914
pack_custom(int n,char * buf)915 void ParticleKokkos::pack_custom(int n, char *buf)
916 {
917 this->sync(Host,CUSTOM_MASK);
918 Particle::pack_custom(n,buf);
919 }
920
921 /* ----------------------------------------------------------------------
922 unpack custom attributes for a single particle N from buf
923 this is done in order of 4 styles of vectors/arrays, not in ncustom order
924 ------------------------------------------------------------------------- */
925
unpack_custom(char * buf,int n)926 void ParticleKokkos::unpack_custom(char *buf, int n)
927 {
928 Particle::unpack_custom(buf,n);
929 this->modify(Host,CUSTOM_MASK);
930 }
931
932 /* ---------------------------------------------------------------------- */
933
sync(ExecutionSpace space,unsigned int mask)934 void ParticleKokkos::sync(ExecutionSpace space, unsigned int mask)
935 {
936 if (space == Device) {
937 if (sparta->kokkos->auto_sync)
938 modify(Host,mask);
939 if (mask & PARTICLE_MASK) k_particles.sync_device();
940 if (mask & SPECIES_MASK) k_species.sync_device();
941 if (mask & CUSTOM_MASK) {
942 if (ncustom) {
943 if (ncustom_ivec)
944 for (int i = 0; i < ncustom_ivec; i++)
945 k_eivec.h_view[i].k_view.sync_device();
946
947 if (ncustom_iarray)
948 for (int i = 0; i < ncustom_iarray; i++)
949 k_eiarray.h_view[i].k_view.sync_device();
950
951 if (ncustom_dvec)
952 for (int i = 0; i < ncustom_dvec; i++)
953 k_edvec.h_view[i].k_view.sync_device();
954
955 if (ncustom_darray)
956 for (int i = 0; i < ncustom_darray; i++)
957 k_edarray.h_view[i].k_view.sync_device();
958 }
959 }
960 } else {
961 if (mask & PARTICLE_MASK) k_particles.sync_host();
962 if (mask & SPECIES_MASK) k_species.sync_host();
963 if (mask & CUSTOM_MASK) {
964 if (ncustom_ivec)
965 for (int i = 0; i < ncustom_ivec; i++)
966 k_eivec.h_view[i].k_view.sync_host();
967
968 if (ncustom_iarray)
969 for (int i = 0; i < ncustom_iarray; i++)
970 k_eiarray.h_view[i].k_view.sync_host();
971
972 if (ncustom_dvec)
973 for (int i = 0; i < ncustom_dvec; i++)
974 k_edvec.h_view[i].k_view.sync_host();
975
976 if (ncustom_darray)
977 for (int i = 0; i < ncustom_darray; i++)
978 k_edarray.h_view[i].k_view.sync_host();
979 }
980 }
981 }
982
983 /* ---------------------------------------------------------------------- */
984
modify(ExecutionSpace space,unsigned int mask)985 void ParticleKokkos::modify(ExecutionSpace space, unsigned int mask)
986 {
987 if (space == Device) {
988 if (mask & PARTICLE_MASK) k_particles.modify_device();
989 if (mask & SPECIES_MASK) k_species.modify_device();
990 if (mask & CUSTOM_MASK) {
991 if (ncustom) {
992 if (ncustom_ivec)
993 for (int i = 0; i < ncustom_ivec; i++)
994 k_eivec.h_view[i].k_view.modify_device();
995
996 if (ncustom_iarray)
997 for (int i = 0; i < ncustom_iarray; i++)
998 k_eiarray.h_view[i].k_view.modify_device();
999
1000 if (ncustom_dvec)
1001 for (int i = 0; i < ncustom_dvec; i++)
1002 k_edvec.h_view[i].k_view.modify_device();
1003
1004 if (ncustom_darray)
1005 for (int i = 0; i < ncustom_darray; i++)
1006 k_edarray.h_view[i].k_view.modify_device();
1007 }
1008 }
1009 if (sparta->kokkos->auto_sync)
1010 sync(Host,mask);
1011 } else {
1012 if (mask & PARTICLE_MASK) k_particles.modify_host();
1013 if (mask & SPECIES_MASK) k_species.modify_host();
1014 if (mask & CUSTOM_MASK) {
1015 if (ncustom) {
1016 if (ncustom_ivec)
1017 for (int i = 0; i < ncustom_ivec; i++)
1018 k_eivec.h_view[i].k_view.modify_host();
1019
1020 if (ncustom_iarray)
1021 for (int i = 0; i < ncustom_iarray; i++)
1022 k_eiarray.h_view[i].k_view.modify_host();
1023
1024 if (ncustom_dvec)
1025 for (int i = 0; i < ncustom_dvec; i++)
1026 k_edvec.h_view[i].k_view.modify_host();
1027
1028 if (ncustom_darray)
1029 for (int i = 0; i < ncustom_darray; i++)
1030 k_edarray.h_view[i].k_view.modify_host();
1031 }
1032 }
1033 }
1034 }
1035