1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "mpi.h"
16 #include "math.h"
17 #include "string.h"
18 #include "stdlib.h"
19 #include "ctype.h"
20 #include "particle_kokkos.h"
21 #include "grid_kokkos.h"
22 #include "update.h"
23 #include "comm.h"
24 #include "mixture.h"
25 #include "collide.h"
26 #include "random_mars.h"
27 #include "random_knuth.h"
28 #include "memory_kokkos.h"
29 #include "error.h"
30 #include "kokkos.h"
31 #include "sparta_masks.h"
32 
33 #include <Kokkos_Vector.hpp>
34 
35 using namespace SPARTA_NS;
36 
37 enum{PKEEP,PINSERT,PDONE,PDISCARD,PENTRY,PEXIT,PSURF};  // several files
38 enum{NONE,DISCRETE,SMOOTH};            // several files
39 enum{INT,DOUBLE};                      // several files
40 enum{COPYPARTICLELIST,FIXEDMEMORY};
41 
42 #define DELTA 16384
43 #define DELTASPECIES 16
44 #define DELTAMIXTURE 8
45 #define DELTACELLCOUNT 10
46 #define MAXLINE 1024
47 
48 // customize by adding an abbreviation string
49 // also add a check for the keyword in 2 places in add_species()
50 
51 #define AIR "N O NO"
52 
53 /* ---------------------------------------------------------------------- */
54 
ParticleKokkos(SPARTA * sparta)55 ParticleKokkos::ParticleKokkos(SPARTA *sparta) : Particle(sparta)
56 {
57   d_fail_flag = DAT::t_int_scalar("particle:fail_flag");
58   h_fail_flag = HAT::t_int_scalar("particle:fail_flag_mirror");
59 
60   k_reorder_pass = DAT::tdual_int_scalar("particle:reorder_pass");
61   d_reorder_pass = k_reorder_pass.d_view;
62   h_reorder_pass = k_reorder_pass.h_view;
63 
64   sorted_kk = 0;
65   maxcellcount = 10;
66 
67   k_eivec = tdual_struct_tdual_int_1d_1d("particle:eivec",0);
68   k_eiarray = tdual_struct_tdual_int_2d_1d("particle:eiarray",0);
69   k_edvec = tdual_struct_tdual_float_1d_1d("particle:edvec",0);
70   k_edarray = tdual_struct_tdual_float_2d_1d("particle:edarray",0);
71 }
72 
73 /* ---------------------------------------------------------------------- */
74 
~ParticleKokkos()75 ParticleKokkos::~ParticleKokkos()
76 {
77   if (copy || copymode) return;
78 
79   particles = NULL;
80   species = NULL;
81 
82 
83   // deallocate views of views in serial to prevent race condition in profiling tools
84 
85   for (int i = 0; i < k_eivec.extent(0); i++)
86     k_eivec.h_view(i).k_view = decltype(k_eivec.h_view(i).k_view)();
87 
88   for (int i = 0; i < k_eiarray.extent(0); i++)
89     k_eiarray.h_view(i).k_view = decltype(k_eiarray.h_view(i).k_view)();
90 
91   for (int i = 0; i < k_edvec.extent(0); i++)
92     k_edvec.h_view(i).k_view = decltype(k_edvec.h_view(i).k_view)();
93 
94   for (int i = 0; i < k_edarray.extent(0); i++)
95     k_edarray.h_view(i).k_view = decltype(k_edarray.h_view(i).k_view)();
96 
97   ewhich = NULL;
98   eicol = NULL;
99   edcol = NULL;
100 }
101 
102 #ifndef SPARTA_KOKKOS_EXACT
103 /* ----------------------------------------------------------------------
104    compress particle list to remove particles with indices in dellist
105    dellist indices can be in ANY order
106 ------------------------------------------------------------------------- */
107 
compress_migrate(int ndelete,int * dellist)108 void ParticleKokkos::compress_migrate(int ndelete, int *dellist)
109 {
110   // reallocate next list as needed
111 
112   if (maxsort < maxlocal) {
113     maxsort = maxlocal;
114     memory->destroy(next);
115     memory->create(next,maxsort,"particle:next");
116   }
117 
118   int i;
119 
120   nbytes = sizeof(OnePart);
121 
122   if (ndelete > d_lists.extent(1)) {
123     d_lists = DAT::t_int_2d(Kokkos::view_alloc("particle:lists",Kokkos::WithoutInitializing),2,ndelete);
124     d_mlist = Kokkos::subview(d_lists,0,Kokkos::ALL);
125     d_slist = Kokkos::subview(d_lists,1,Kokkos::ALL);
126 
127     h_lists = HAT::t_int_2d(Kokkos::view_alloc("particle:lists_mirror",Kokkos::WithoutInitializing),2,ndelete);
128     h_mlist = Kokkos::subview(h_lists,0,Kokkos::ALL);
129     h_slist = Kokkos::subview(h_lists,1,Kokkos::ALL);
130   }
131 
132   // use next as a scratch vector
133   // next is only used for upper locs from nlocal-ndelete to nlocal
134   // next[i] = 0 if deleted particle, 1 otherwise
135 
136   int upper = nlocal - ndelete;
137   for (i = upper; i < nlocal; i++) next[i] = 1;
138 
139   for (int m = 0; m < ndelete; m++) {
140     i = dellist[m];
141     if (i >= upper)
142       next[i] = 0;
143   }
144 
145   int ncopy = 0;
146   int ncount = 0;
147   for (int j = upper; j < nlocal; j++) {
148     if (!next[j]) continue;
149 
150     int i = dellist[ncount];
151     while (i >= upper) {
152       ncount++;
153       i = dellist[ncount];
154     }
155     h_mlist[ncopy] = i;
156     h_slist[ncopy] = j;
157     ncopy++;
158     ncount++;
159   }
160 
161   nlocal = upper;
162 
163   Kokkos::deep_copy(d_lists,h_lists);
164 
165   this->sync(Device,PARTICLE_MASK);
166   d_particles = k_particles.d_view;
167 
168   copymode = 1;
169   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleCompressReactions>(0,ncopy),*this);
170   DeviceType().fence();
171   copymode = 0;
172 
173   this->modify(Device,PARTICLE_MASK);
174   d_particles = t_particle_1d(); // destroy reference to reduce memory use
175 
176   sorted = 0;
177   sorted_kk = 0;
178 }
179 #endif
180 
181 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleCompressReactions,const int & i) const182 void ParticleKokkos::operator()(TagParticleCompressReactions, const int &i) const {
183   const int j = d_mlist[i];
184   const int k = d_slist[i];
185   //memcpy(&d_particles[j],&d_particles[k],nbytes);
186   d_particles[j] = d_particles[k];
187 }
188 
189 /* ----------------------------------------------------------------------
190    sort particles into grid cells
191    set cinfo.first = index of first particle in cell
192    set cinfo.count = # of particles in cell
193    next[] = index of next particle in same cell, -1 for no more
194 ------------------------------------------------------------------------- */
195 
sort_kokkos()196 void ParticleKokkos::sort_kokkos()
197 {
198   sorted_kk = 1;
199   int reorder_scheme = COPYPARTICLELIST;
200   if (update->have_mem_limit())
201     reorder_scheme = FIXEDMEMORY;
202 
203   ngrid = grid->nlocal;
204   GridKokkos* grid_kk = (GridKokkos*)grid;
205   d_cellcount = grid_kk->d_cellcount;
206   d_plist = grid_kk->d_plist;
207 
208   if (ngrid > int(d_cellcount.extent(0))) {
209     grid_kk->d_cellcount = DAT::t_int_1d("particle:cellcount",ngrid);
210     d_cellcount = grid_kk->d_cellcount;
211   } else {
212     copymode = 1;
213     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleZero_cellcount>(0,d_cellcount.extent(0)),*this);
214     DeviceType().fence();
215     copymode = 0;
216   }
217 
218   if (ngrid > int(d_plist.extent(0)) || maxcellcount > int(d_plist.extent(1))) {
219     grid_kk->d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
220     grid_kk->d_plist = DAT::t_int_2d(Kokkos::view_alloc("particle:plist",Kokkos::WithoutInitializing),ngrid,maxcellcount);
221     d_plist = grid_kk->d_plist;
222   }
223 
224   this->sync(Device,PARTICLE_MASK);
225   d_particles = k_particles.d_view;
226 
227   // icell = global cell the particle is in
228 
229   // Cannot grow a Kokkos view in a parallel loop, so
230   //  if the capacity of the list is exceeded, count the size
231   //  needed, reallocate on the host, and then
232   //  repeat the parallel loop again
233 
234   do {
235     copymode = 1;
236     if (sparta->kokkos->need_atomics)
237       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleSort<1> >(0,nlocal),*this);
238     else
239       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleSort<0> >(0,nlocal),*this);
240     DeviceType().fence();
241     copymode = 0;
242 
243     Kokkos::deep_copy(h_fail_flag,d_fail_flag);
244 
245     if (h_fail_flag()) {
246       copymode = 1;
247       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagParticleZero_cellcount>(0,ngrid),*this);
248       DeviceType().fence();
249       copymode = 0;
250       maxcellcount += DELTACELLCOUNT;
251       grid_kk->d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
252       grid_kk->d_plist = DAT::t_int_2d(Kokkos::view_alloc("particle:plist",Kokkos::WithoutInitializing),ngrid,maxcellcount);
253       d_plist = grid_kk->d_plist;
254 
255       Kokkos::deep_copy(d_fail_flag,0);
256     }
257   } while (h_fail_flag());
258 
259   if (update->reorder_period &&
260       (update->ntimestep % update->reorder_period == 0)) {
261 
262     if (reorder_scheme == COPYPARTICLELIST && d_particles.extent(0) > d_sorted.extent(0)) {
263       d_sorted = t_particle_1d();
264       d_sorted = t_particle_1d("particle:sorted",d_particles.extent(0));
265     }
266     else if (reorder_scheme == FIXEDMEMORY && d_pswap1.size() == 0){
267       nParticlesWksp = (double)update->global_mem_limit/sizeof(Particle::OnePart);
268       d_pswap1 = t_particle_1d(Kokkos::view_alloc("particle:swap1",Kokkos::WithoutInitializing),nParticlesWksp);
269       d_pswap2 = t_particle_1d(Kokkos::view_alloc("particle:swap2",Kokkos::WithoutInitializing),nParticlesWksp);
270     }
271 
272     nbytes = sizeof(OnePart);
273 
274     if (reorder_scheme == COPYPARTICLELIST) {
275       copymode = 1;
276       Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType, TagParticleReorder_COPYPARTICLELIST>(0,ngrid),*this);
277       copymode = 0;
278       Kokkos::deep_copy(k_particles.d_view,d_sorted);
279       this->modify(Device,PARTICLE_MASK);
280     }
281     else if (reorder_scheme == FIXEDMEMORY) {
282       // Copy particle destinations into the particle list cell locations
283       //  (to avoid adding a "destination" integer in OnePart for the fixed memory reorder)
284       // After the particle list has been reordered, reset the icell values to correctly reflect
285       // the variable naming.
286       copymode = 1;
287       Kokkos::parallel_scan(Kokkos::RangePolicy<DeviceType, TagCopyParticleReorderDestinations>(0,ngrid),*this);
288       DeviceType().fence();
289       copymode = 0;
290 
291       int npasses = (nlocal-1)/nParticlesWksp + 1;
292       for (int ipass=0; ipass < npasses; ++ipass) {
293 
294         h_reorder_pass() = ipass;
295         k_reorder_pass.modify_host();
296         k_reorder_pass.sync_device();
297 
298         // identify next set of particles to reorder
299         copymode = 1;
300         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixedMemoryReorderInit>(0,nParticlesWksp),*this);
301         DeviceType().fence();
302         copymode = 0;
303 
304         // reorder this set of particles
305         copymode = 1;
306         Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixedMemoryReorder>(0,nParticlesWksp),*this);
307         DeviceType().fence();
308         copymode = 0;
309       }
310 
311       // reset the icell values in the particle list
312       copymode = 1;
313       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagSetIcellFromPlist>(0,ngrid),*this);
314       DeviceType().fence();
315       copymode = 0;
316       this->modify(Device,PARTICLE_MASK);
317 
318       // destroy references to reduce memory use
319       d_pswap1 = t_particle_1d();
320       d_pswap2 = t_particle_1d();
321     }
322   }
323 
324   d_particles = t_particle_1d(); // destroy reference to reduce memory use
325 }
326 
327 KOKKOS_INLINE_FUNCTION
operator ()(TagCopyParticleReorderDestinations,const int icell,int & m_fill,const bool & final) const328 void ParticleKokkos::operator()(TagCopyParticleReorderDestinations, const int icell, int &m_fill, const bool &final) const
329 {
330   // load icell values with reorder destination index for particle reordering
331   //  (location in d_particles where particle is moving)
332   for (int j = 0; j < d_cellcount[icell]; j++) {
333     if (final) {
334       const int iparticle = d_plist(icell,j);
335       d_particles[iparticle].icell = m_fill;
336       d_plist(icell,j) = m_fill; // the new plist after reordering
337     }
338     m_fill++;
339   }
340 }
341 
342 KOKKOS_INLINE_FUNCTION
operator ()(TagFixedMemoryReorderInit,const int & i) const343 void ParticleKokkos::operator()(TagFixedMemoryReorderInit, const int &i) const
344 {
345   // note:  "i" is a thread id (cannot be greater than number of particles allocated in d_pswap* workspaces)
346 
347   // assign batch of threads to next batch of particles (batch size = nParticlesWksp)
348   d_pswap1[i].icell = -999; // default to thread isn't moving a particle
349   int nextParticleToCheckForReordering = d_reorder_pass() * nParticlesWksp;
350   int n = nextParticleToCheckForReordering + i;
351   if (n < nlocal) {
352     if (d_particles[n].icell != n) {
353       d_pswap1[i] = d_particles[n]; // copy the moving particle into the work space so the thread can move it later.
354       d_particles[n].icell = -1;    // current location of moving particle is marked as vacant
355     }
356   }
357 }
358 
359 KOKKOS_INLINE_FUNCTION
operator ()(TagFixedMemoryReorder,const int & i) const360 void ParticleKokkos::operator()(TagFixedMemoryReorder, const int &i) const
361 {
362   // particle movement for this thread continues until a particle is moved to a vacant location (indicated by icell = -1)
363   OnePart *movePtr;
364   int newParticleLoc;
365   bool iHaveAnotherParticle = false;
366   if (d_pswap1[i].icell != -999)
367     iHaveAnotherParticle = true;
368 
369   int count = 0;
370   while (iHaveAnotherParticle){
371     if (count % 2 == 0) { // even
372       movePtr = &d_pswap1[i];
373       newParticleLoc = movePtr->icell; // note:  this is a particle location, NOT a cell location
374       d_pswap2[i] = d_particles[newParticleLoc];
375       if (d_pswap2[i].icell == -1)
376         iHaveAnotherParticle = false;
377     }
378     else { // odd
379       movePtr = &d_pswap2[i];
380       newParticleLoc = movePtr->icell; // note:  this is a particle location, NOT a cell location
381       d_pswap1[i] = d_particles[newParticleLoc];
382       if (d_pswap1[i].icell == -1)
383         iHaveAnotherParticle = false;
384     }
385     d_particles[newParticleLoc] = *movePtr;
386     count++;
387   }
388 }
389 
390 KOKKOS_INLINE_FUNCTION
operator ()(TagSetIcellFromPlist,const int & icell) const391 void ParticleKokkos::operator()(TagSetIcellFromPlist, const int &icell) const
392 {
393   for (int j = 0; j < d_cellcount[icell]; j++) {
394     const int iparticle = d_plist(icell,j);
395     d_particles[iparticle].icell = icell;
396   }
397 }
398 
399 template<int NEED_ATOMICS>
400 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleSort<NEED_ATOMICS>,const int & i) const401 void ParticleKokkos::operator()(TagParticleSort<NEED_ATOMICS>, const int &i) const
402 {
403   const int icell = d_particles[i].icell;
404   int j;
405   if (NEED_ATOMICS)
406     j = Kokkos::atomic_fetch_add(&d_cellcount[icell],1);
407   else {
408     j = d_cellcount[icell];
409     d_cellcount[icell]++;
410   }
411   if (j+1 > maxcellcount)
412     d_fail_flag() = 1;
413   if (d_fail_flag()) return;
414   d_plist(icell,j) = i;
415 }
416 
417 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleReorder_COPYPARTICLELIST,const int icell,int & m_fill,const bool & final) const418 void ParticleKokkos::operator()(TagParticleReorder_COPYPARTICLELIST, const int icell, int &m_fill, const bool &final) const
419 {
420   for (int j = 0; j < d_cellcount[icell]; j++) {
421     if (final) {
422       const int iparticle = d_plist(icell,j);
423       //memcpy(&d_sorted[m_fill],&d_particles[iparticle],nbytes);
424       d_sorted[m_fill] = d_particles[iparticle];
425       d_plist(icell,j) = m_fill;
426     }
427     m_fill++;
428   }
429 }
430 
431 KOKKOS_INLINE_FUNCTION
operator ()(TagParticleZero_cellcount,const int & i) const432 void ParticleKokkos::operator()(TagParticleZero_cellcount, const int &i) const {
433   d_cellcount[i] = 0.0;
434 }
435 
436 /* ----------------------------------------------------------------------
437    set the initial weight of each particle
438    called by Update before particle move
439    only called if particle weighting is enabled
440    only grid-based weighting is currently implemented
441 ------------------------------------------------------------------------- */
442 
pre_weight()443 void ParticleKokkos::pre_weight()
444 {
445   auto grid_kk = dynamic_cast<GridKokkos*>(grid);
446   auto& k_cinfo = grid_kk->k_cinfo;
447   grid_kk->sync(Device,CINFO_MASK);
448   this->sync(Device,PARTICLE_MASK);
449   auto d_cinfo = k_cinfo.d_view;
450   auto d_particles = k_particles.d_view;
451 
452   Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(int i) {
453     auto icell = d_particles[i].icell;
454     d_particles[i].weight = d_cinfo[icell].weight;
455   });
456   this->modify(Device,PARTICLE_MASK);
457 }
458 
459 /* ----------------------------------------------------------------------
460    clone/delete each particle based on ratio of its initial/final weights
461    called by Update after particle move and migration
462    only called if particle weighting is enabled
463    only grid-based weighting is currently implemented
464 ------------------------------------------------------------------------- */
465 
466 struct PostWeightPair { int i; int id; };
467 
post_weight()468 void ParticleKokkos::post_weight()
469 {
470   if (ncustom) {
471     error->all(FLERR,"Custom per-particles attributes not yet supported with Kokkos");
472   }
473   constexpr int METHOD = 1;
474   if (METHOD == 1) { // just call the host one
475     this->sync(Host,PARTICLE_MASK);
476     Particle::post_weight();
477     this->modify(Host,PARTICLE_MASK);
478   } else if (METHOD == 2) { // Kokkos-parallel, gives same (correct) answer
479     Kokkos::View<double*> d_ratios("post_weight:ratios", nlocal);
480     auto h_ratios = Kokkos::create_mirror_view(d_ratios);
481     auto grid_kk = dynamic_cast<GridKokkos*>(grid);
482     auto& k_cinfo = grid_kk->k_cinfo;
483     grid_kk->sync(Device,CINFO_MASK);
484     this->sync(Device,PARTICLE_MASK);
485     auto d_particles = k_particles.d_view;
486     auto d_cinfo = k_cinfo.d_view;
487     Kokkos::vector<PostWeightPair> map;
488     map.set_overallocation(0.5);
489     map.resize(nlocal);
490     auto d_map = map.d_view;
491     Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(int i) {
492       auto icell = d_particles[i].icell;
493       d_ratios[i] = d_particles[i].weight / d_cinfo[icell].weight;
494       d_map[i].id = d_particles[i].id;
495       d_map[i].i = i;
496     });
497     Kokkos::deep_copy(h_ratios, d_ratios);
498     map.device_to_host();
499     int nlocal_original = nlocal;
500     int i = 0;
501     while (i < nlocal_original) {
502       auto ratio = h_ratios[map[i].i];
503       if (ratio == 1.0) {
504         i++;
505       } else if (ratio < 1.0) {
506         if (wrandom->uniform() > ratio) {
507           map[i] = map.back();
508           if (map.size() > nlocal_original) ++i;
509           else --nlocal_original;
510           map.pop_back();
511         } else {
512           ++i;
513         }
514       } else if (ratio > 1.0) {
515         int nclone = int(ratio);
516         double fraction = ratio - nclone;
517         --nclone;
518         if (wrandom->uniform() < fraction) ++nclone;
519         for (int m = 0; m < nclone; ++m) {
520           map.push_back(map[i]);
521           map.back().id = MAXSMALLINT * wrandom->uniform();
522         }
523         ++i;
524       }
525     }
526     map.host_to_device();
527     nlocal = (int(map.size()));
528     grow(0);
529     Kokkos::View<OnePart*> d_newparticles("post_weight:newparticles", maxlocal);
530     d_map = map.d_view;
531     Kokkos::parallel_for(nlocal, KOKKOS_LAMBDA(int i) {
532       d_newparticles[i] = d_particles[d_map[i].i];
533       d_newparticles[i].id = d_map[i].id;
534     });
535     Kokkos::deep_copy(k_particles.d_view,d_newparticles);
536     this->modify(Device,PARTICLE_MASK);
537   }
538 }
539 
540 /* ---------------------------------------------------------------------- */
541 
update_class_variables()542 void ParticleKokkos::update_class_variables() {
543   d_species = k_species.d_view;
544   this->sync(Device,SPECIES_MASK);
545 
546   boltz = update->boltz;
547   collide_rot = 0;
548   vibstyle = NONE;
549   if (collide) {
550     vibstyle = collide->vibstyle;
551     if (collide->rotstyle != NONE) collide_rot = 1;
552   }
553 }
554 
555 /* ----------------------------------------------------------------------
556    insure particle list can hold nextra new particles
557    if defined, also grow custom particle arrays and initialize with zeroes
558 ------------------------------------------------------------------------- */
559 
grow(int nextra)560 void ParticleKokkos::grow(int nextra)
561 {
562   bigint target = (bigint) nlocal + nextra;
563   if (target <= maxlocal) return;
564 
565   bigint newmax = maxlocal;
566   while (newmax < target) newmax += MAX(DELTA, newmax*1.1);
567   int oldmax = maxlocal;
568 
569   if (newmax > MAXSMALLINT)
570     error->one(FLERR,"Per-processor particle count is too big");
571 
572   maxlocal = newmax;
573   if (particles == NULL)
574     k_particles = tdual_particle_1d("particle:particles",maxlocal);
575   else {
576     this->sync(Device,PARTICLE_MASK); // force resize on device
577     k_particles.resize(maxlocal);
578     this->modify(Device,PARTICLE_MASK); // needed for auto sync
579   }
580   d_particles = k_particles.d_view;
581   particles = k_particles.h_view.data();
582 
583   if (ncustom == 0) return;
584 
585   for (int i = 0; i < ncustom; i++) {
586     if (ename[i] == NULL) continue;
587     grow_custom(i,oldmax,maxlocal);
588   }
589 }
590 
591 /* ----------------------------------------------------------------------
592    insure species list can hold maxspecies species
593    assumes that maxspecies has already been increased
594 ------------------------------------------------------------------------- */
595 
grow_species()596 void ParticleKokkos::grow_species()
597 {
598   if (sparta->kokkos->prewrap) {
599     Particle::grow_species();
600   } else {
601     if (species == NULL)
602       k_species = tdual_species_1d("particle:species",maxspecies);
603     else
604       k_species.resize(maxspecies);
605     species = k_species.h_view.data();
606   }
607 }
608 
609 /* ---------------------------------------------------------------------- */
610 
wrap_kokkos()611 void ParticleKokkos::wrap_kokkos()
612 {
613   // species
614 
615   if (species != k_species.h_view.data()) {
616     memoryKK->wrap_kokkos(k_species,species,nspecies,"particle:species");
617     k_species.modify_host();
618     k_species.sync_device();
619     memory->sfree(species);
620     species = k_species.h_view.data();
621   }
622 
623   // mixtures
624 
625   k_species2group = DAT::tdual_int_2d("particle:species2group",nmixture,nspecies);
626   for (int i = 0; i < nmixture; i++)
627     for (int j = 0; j < nspecies; j++)
628       k_species2group.h_view(i,j) = mixture[i]->species2group[j];
629   k_species2group.modify_host();
630   k_species2group.sync_device();
631 
632   //if (mixtures != k_mixtures.h_view.data()) {
633   //  memoryKK->wrap_kokkos(k_mixtures,mixture,nmixture,"particle:mixture");
634   //  k_mixtures.modify_host();
635   //  k_mixtures.sync_device();
636   //  memory->sfree(mixtures);
637   //  mixtures = k_mixtures.h_view.data();
638   //}
639 }
640 
641 /* ----------------------------------------------------------------------
642    add a custom attribute with name
643    assumes name does not already exist, except in case of restart
644    type = 0/1 for int/double
645    size = 0 for vector, size > 0 for array with size columns
646    allocate the vector or array to current maxlocal via grow_custom()
647    return index of its location;
648 ------------------------------------------------------------------------- */
649 
add_custom(char * name,int type,int size)650 int ParticleKokkos::add_custom(char *name, int type, int size)
651 {
652   ///modifies eivec,eiarray,edvec,edarray on either host or device, probably device since host isn't modified. May just want to use host
653   ///modifies ewhich on host, sync to device here since it is never modified on the device
654   //
655 
656   int index;
657 
658   // if name already exists
659   // just return index if a restart script and re-defining the name
660   // else error
661 
662   index = find_custom(name);
663   if (index >= 0) {
664     if (custom_restart_flag == NULL || custom_restart_flag[index] == 1)
665       error->all(FLERR,"Custom particle attribute name already exists");
666     custom_restart_flag[index] = 1;
667     return index;
668   }
669 
670   // use first available NULL entry or allocate a new one
671 
672   for (index = 0; index < ncustom; index++)
673     if (ename[index] == NULL) break;
674 
675   if (index == ncustom) {
676     ncustom++;
677     ename = (char **) memory->srealloc(ename,ncustom*sizeof(char *),
678                                        "particle:ename");
679     memory->grow(etype,ncustom,"particle:etype");
680     memory->grow(esize,ncustom,"particle:esize");
681     memoryKK->grow_kokkos(k_ewhich,ewhich,ncustom,"particle:ewhich");
682   }
683 
684   int n = strlen(name) + 1;
685   ename[index] = new char[n];
686   strcpy(ename[index],name);
687   etype[index] = type;
688   esize[index] = size;
689 
690   if (type == INT) {
691     if (size == 0) {
692       ewhich[index] = ncustom_ivec++;
693       eivec = (int **)
694         memory->srealloc(eivec,ncustom_ivec*sizeof(int *),"particle:eivec");
695       eivec[ncustom_ivec-1] = NULL;
696       k_eivec.resize(ncustom_ivec);
697       memory->grow(icustom_ivec,ncustom_ivec,"particle:icustom_ivec");
698       icustom_ivec[ncustom_ivec-1] = index;
699     } else {
700       ewhich[index] = ncustom_iarray++;
701       eiarray = (int ***)
702         memory->srealloc(eiarray,ncustom_iarray*sizeof(int **),
703                          "particle:eiarray");
704       eiarray[ncustom_iarray-1] = NULL;
705       k_eiarray.resize(ncustom_iarray);
706       memory->grow(icustom_iarray,ncustom_iarray,"particle:icustom_iarray");
707       icustom_iarray[ncustom_iarray-1] = index;
708       memoryKK->grow_kokkos(k_eicol,eicol,ncustom_iarray,"particle:eicol");
709       eicol[ncustom_iarray-1] = size;
710     }
711   } else if (type == DOUBLE) {
712     if (size == 0) {
713       ewhich[index] = ncustom_dvec++;
714       edvec = (double **)
715         memory->srealloc(edvec,ncustom_dvec*sizeof(double *),"particle:edvec");
716       edvec[ncustom_dvec-1] = NULL;
717       k_edvec.resize(ncustom_dvec);
718       memory->grow(icustom_dvec,ncustom_dvec,"particle:icustom_dvec");
719       icustom_dvec[ncustom_dvec-1] = index;
720     } else {
721       ewhich[index] = ncustom_darray++;
722       edarray = (double ***)
723         memory->srealloc(edarray,ncustom_darray*sizeof(double **),
724                          "particle:edarray");
725       edarray[ncustom_darray-1] = NULL;
726       k_edarray.resize(ncustom_darray);
727       memory->grow(icustom_darray,ncustom_darray,"particle:icustom_darray");
728       icustom_darray[ncustom_darray-1] = index;
729       memoryKK->grow_kokkos(k_edcol,edcol,ncustom_darray,"particle:edcol");
730       edcol[ncustom_darray-1] = size;
731     }
732   }
733 
734   // ewhich,eicol,edcol never modified on the device, so sync here
735 
736   k_ewhich.modify_host();
737   k_ewhich.sync_device();
738 
739   k_eicol.modify_host();
740   k_eicol.sync_device();
741 
742   k_edcol.modify_host();
743   k_edcol.sync_device();
744 
745   grow_custom(index,0,maxlocal);
746 
747   return index;
748 }
749 
750 /* ----------------------------------------------------------------------
751    grow the vector/array associated with custom attribute with index
752    nold = old length, nnew = new length (typically maxlocal)
753    set new values to 0 via memset()
754 ------------------------------------------------------------------------- */
755 
grow_custom(int index,int nold,int nnew)756 void ParticleKokkos::grow_custom(int index, int nold, int nnew)
757 {
758   // modifies the inner part of eivec,eiarray,edvec,edarray on whatever, and the outer view on the host
759 
760   k_eivec.sync_host();
761   k_eiarray.sync_host();
762   k_edvec.sync_host();
763   k_edarray.sync_host();
764 
765   if (etype[index] == INT) {
766     if (esize[index] == 0) {
767       int *ivector = eivec[ewhich[index]];
768       auto k_ivector = k_eivec.h_view[ewhich[index]].k_view;
769       memoryKK->grow_kokkos(k_ivector,ivector,nold+nnew,"particle:eivec");
770       eivec[ewhich[index]] = ivector;
771     } else {
772       int **iarray = eiarray[ewhich[index]];
773       auto &k_iarray = k_eiarray.h_view[ewhich[index]].k_view;
774       memoryKK->grow_kokkos(k_iarray,iarray,nold+nnew,esize[index],"particle:eiarray");
775       eiarray[ewhich[index]] = iarray;
776     }
777 
778   } else {
779     if (esize[index] == 0) {
780       double *dvector = edvec[ewhich[index]];
781       auto k_dvector = k_edvec.h_view[ewhich[index]].k_view;
782       memoryKK->grow_kokkos(k_dvector,dvector,nold+nnew,"particle:edvec");
783       edvec[ewhich[index]] = dvector;
784     } else {
785       double **darray = edarray[ewhich[index]];
786       auto k_darray = k_edarray.h_view[ewhich[index]].k_view;
787       memoryKK->grow_kokkos(k_darray,darray,nold+nnew,esize[index],"particle:edarray");
788       edarray[ewhich[index]] = darray;
789     }
790   }
791 
792   k_eivec.modify_host();
793   k_eiarray.modify_host();
794   k_edvec.modify_host();
795   k_edarray.modify_host();
796 
797   k_eivec.sync_device();
798   k_eiarray.sync_device();
799   k_edvec.sync_device();
800   k_edarray.sync_device();
801 }
802 
803 /* ----------------------------------------------------------------------
804    remove a custom attribute at location index
805    free memory for name and vector/array and set ptrs to NULL
806    ncustom lists never shrink, but indices stored between
807      the ncustom list and the dense vector/array lists must be reset
808 ------------------------------------------------------------------------- */
809 
remove_custom(int index)810 void ParticleKokkos::remove_custom(int index)
811 {
812   // modifies the outer host view, deletes the inner dual view
813   //
814   delete [] ename[index];
815   ename[index] = NULL;
816 
817   if (etype[index] == INT) {
818     if (esize[index] == 0) {
819       memoryKK->destroy_kokkos(k_eivec.h_view[ewhich[index]].k_view,eivec[ewhich[index]]);
820       ncustom_ivec--;
821       for (int i = ewhich[index]; i < ncustom_ivec; i++) {
822         icustom_ivec[i] = icustom_ivec[i+1];
823         ewhich[icustom_ivec[i]] = i;
824         eivec[i] = eivec[i+1];
825         k_eivec.h_view[i] = k_eivec.h_view[i+1];
826       }
827     } else {
828       memoryKK->destroy_kokkos(eiarray[ewhich[index]]);
829       ncustom_iarray--;
830       for (int i = ewhich[index]; i < ncustom_iarray; i++) {
831         icustom_iarray[i] = icustom_iarray[i+1];
832         ewhich[icustom_iarray[i]] = i;
833         eiarray[i] = eiarray[i+1];
834         eicol[i] = eicol[i+1];
835         k_eiarray.h_view[i] = k_eiarray.h_view[i+1];
836       }
837     }
838   } else if (etype[index] == DOUBLE) {
839     if (esize[index] == 0) {
840       memoryKK->destroy(edvec[ewhich[index]]);
841       ncustom_dvec--;
842       for (int i = ewhich[index]; i < ncustom_dvec; i++) {
843         icustom_dvec[i] = icustom_dvec[i+1];
844         ewhich[icustom_dvec[i]] = i;
845         edvec[i] = edvec[i+1];
846         k_edvec.h_view[i] = k_edvec.h_view[i+1];
847       }
848       k_edvec.modify_host();
849     } else {
850       memoryKK->destroy(edarray[ewhich[index]]);
851       ncustom_darray--;
852       for (int i = ewhich[index]; i < ncustom_darray; i++) {
853         icustom_darray[i] = icustom_darray[i+1];
854         ewhich[icustom_darray[i]] = i;
855         edarray[i] = edarray[i+1];
856         edcol[i] = edcol[i+1];
857         k_edarray.h_view[i] = k_edarray.h_view[i+1];
858       }
859       k_edarray.modify_host();
860     }
861   }
862 
863   // set ncustom = 0 if custom list is now entirely empty
864 
865   int empty = 1;
866   for (int i = 0; i < ncustom; i++)
867     if (ename[i]) empty = 0;
868   if (empty) ncustom = 0;
869 
870   k_eivec.sync_device();
871   k_eiarray.sync_device();
872   k_edvec.sync_device();
873   k_edarray.sync_device();
874 }
875 
876 /* ----------------------------------------------------------------------
877    copy info for one particle in custom attribute vectors/arrays
878    into location I from location J
879 ------------------------------------------------------------------------- */
880 
copy_custom(int i,int j)881 void ParticleKokkos::copy_custom(int i, int j)
882 {
883   // need sync/modify host of inner view
884   int m;
885 
886   // caller does not always check this
887   // shouldn't be a problem, but valgrind can complain if memcpy to self
888   // oddly memcpy(&particles[i],&particles[j],sizeof(OnePart)) seems OK
889 
890   if (i == j) return;
891 
892   // 4 flavors of vectors/arrays
893 
894   if (ncustom_ivec) {
895     for (m = 0; m < ncustom_ivec; m++) eivec[m][i] = eivec[m][j];
896   }
897   if (ncustom_iarray) {
898     for (m = 0; m < ncustom_iarray; m++)
899       memcpy(eiarray[m][i],eiarray[m][j],eicol[m]*sizeof(int));
900   }
901   if (ncustom_dvec) {
902     for (m = 0; m < ncustom_dvec; m++) edvec[m][i] = edvec[m][j];
903   }
904   if (ncustom_darray) {
905     for (m = 0; m < ncustom_darray; m++)
906       memcpy(edarray[m][i],edarray[m][j],edcol[m]*sizeof(double));
907   }
908 }
909 
910 /* ----------------------------------------------------------------------
911    pack a custom attributes for a single particle N into buf
912    this is done in order of 4 styles of vectors/arrays, not in ncustom order
913 ------------------------------------------------------------------------- */
914 
pack_custom(int n,char * buf)915 void ParticleKokkos::pack_custom(int n, char *buf)
916 {
917   this->sync(Host,CUSTOM_MASK);
918   Particle::pack_custom(n,buf);
919 }
920 
921 /* ----------------------------------------------------------------------
922    unpack custom attributes for a single particle N from buf
923    this is done in order of 4 styles of vectors/arrays, not in ncustom order
924 ------------------------------------------------------------------------- */
925 
unpack_custom(char * buf,int n)926 void ParticleKokkos::unpack_custom(char *buf, int n)
927 {
928   Particle::unpack_custom(buf,n);
929   this->modify(Host,CUSTOM_MASK);
930 }
931 
932 /* ---------------------------------------------------------------------- */
933 
sync(ExecutionSpace space,unsigned int mask)934 void ParticleKokkos::sync(ExecutionSpace space, unsigned int mask)
935 {
936   if (space == Device) {
937     if (sparta->kokkos->auto_sync)
938       modify(Host,mask);
939     if (mask & PARTICLE_MASK) k_particles.sync_device();
940     if (mask & SPECIES_MASK) k_species.sync_device();
941     if (mask & CUSTOM_MASK) {
942       if (ncustom) {
943         if (ncustom_ivec)
944           for (int i = 0; i < ncustom_ivec; i++)
945             k_eivec.h_view[i].k_view.sync_device();
946 
947         if (ncustom_iarray)
948           for (int i = 0; i < ncustom_iarray; i++)
949             k_eiarray.h_view[i].k_view.sync_device();
950 
951         if (ncustom_dvec)
952           for (int i = 0; i < ncustom_dvec; i++)
953             k_edvec.h_view[i].k_view.sync_device();
954 
955         if (ncustom_darray)
956           for (int i = 0; i < ncustom_darray; i++)
957             k_edarray.h_view[i].k_view.sync_device();
958       }
959     }
960   } else {
961     if (mask & PARTICLE_MASK) k_particles.sync_host();
962     if (mask & SPECIES_MASK) k_species.sync_host();
963     if (mask & CUSTOM_MASK) {
964       if (ncustom_ivec)
965         for (int i = 0; i < ncustom_ivec; i++)
966           k_eivec.h_view[i].k_view.sync_host();
967 
968       if (ncustom_iarray)
969         for (int i = 0; i < ncustom_iarray; i++)
970           k_eiarray.h_view[i].k_view.sync_host();
971 
972       if (ncustom_dvec)
973         for (int i = 0; i < ncustom_dvec; i++)
974           k_edvec.h_view[i].k_view.sync_host();
975 
976       if (ncustom_darray)
977         for (int i = 0; i < ncustom_darray; i++)
978           k_edarray.h_view[i].k_view.sync_host();
979     }
980   }
981 }
982 
983 /* ---------------------------------------------------------------------- */
984 
modify(ExecutionSpace space,unsigned int mask)985 void ParticleKokkos::modify(ExecutionSpace space, unsigned int mask)
986 {
987   if (space == Device) {
988     if (mask & PARTICLE_MASK) k_particles.modify_device();
989     if (mask & SPECIES_MASK) k_species.modify_device();
990     if (mask & CUSTOM_MASK) {
991       if (ncustom) {
992         if (ncustom_ivec)
993           for (int i = 0; i < ncustom_ivec; i++)
994             k_eivec.h_view[i].k_view.modify_device();
995 
996         if (ncustom_iarray)
997           for (int i = 0; i < ncustom_iarray; i++)
998             k_eiarray.h_view[i].k_view.modify_device();
999 
1000         if (ncustom_dvec)
1001           for (int i = 0; i < ncustom_dvec; i++)
1002             k_edvec.h_view[i].k_view.modify_device();
1003 
1004         if (ncustom_darray)
1005           for (int i = 0; i < ncustom_darray; i++)
1006             k_edarray.h_view[i].k_view.modify_device();
1007       }
1008     }
1009     if (sparta->kokkos->auto_sync)
1010       sync(Host,mask);
1011   } else {
1012     if (mask & PARTICLE_MASK) k_particles.modify_host();
1013     if (mask & SPECIES_MASK) k_species.modify_host();
1014     if (mask & CUSTOM_MASK) {
1015       if (ncustom) {
1016         if (ncustom_ivec)
1017           for (int i = 0; i < ncustom_ivec; i++)
1018             k_eivec.h_view[i].k_view.modify_host();
1019 
1020         if (ncustom_iarray)
1021           for (int i = 0; i < ncustom_iarray; i++)
1022             k_eiarray.h_view[i].k_view.modify_host();
1023 
1024         if (ncustom_dvec)
1025           for (int i = 0; i < ncustom_dvec; i++)
1026             k_edvec.h_view[i].k_view.modify_host();
1027 
1028         if (ncustom_darray)
1029           for (int i = 0; i < ncustom_darray; i++)
1030             k_edarray.h_view[i].k_view.modify_host();
1031       }
1032     }
1033   }
1034 }
1035