1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "string.h"
16 #include "compute_grid_kokkos.h"
17 #include "particle_kokkos.h"
18 #include "mixture.h"
19 #include "grid_kokkos.h"
20 #include "update.h"
21 #include "modify.h"
22 #include "memory_kokkos.h"
23 #include "error.h"
24 #include "sparta_masks.h"
25 #include "kokkos.h"
26 
27 using namespace SPARTA_NS;
28 
29 // user keywords
30 
31 enum{NUM,NRHO,NFRAC,MASS,MASSRHO,MASSFRAC,
32      U,V,W,USQ,VSQ,WSQ,KE,TEMPERATURE,EROT,TROT,EVIB,TVIB,
33      PXRHO,PYRHO,PZRHO,KERHO};
34 
35 // internal accumulators
36 
37 enum{COUNT,MASSSUM,MVX,MVY,MVZ,MVXSQ,MVYSQ,MVZSQ,MVSQ,
38      ENGROT,ENGVIB,DOFROT,DOFVIB,CELLCOUNT,CELLMASS,LASTSIZE};
39 
40 // max # of quantities to accumulate for any user value
41 
42 #define MAXACCUMULATE 2
43 
44 /* ---------------------------------------------------------------------- */
45 
ComputeGridKokkos(SPARTA * sparta,int narg,char ** arg)46 ComputeGridKokkos::ComputeGridKokkos(SPARTA *sparta, int narg, char **arg) :
47   ComputeGrid(sparta, narg, arg)
48 {
49   kokkos_flag = 1;
50 
51   k_unique = DAT::tdual_int_1d("compute/grid:unique",npergroup);
52   for (int m = 0; m < npergroup; m++)
53     k_unique.h_view(m) = unique[m];
54   k_unique.modify_host();
55   k_unique.sync_device();
56   d_unique = k_unique.d_view;
57 }
58 
59 /* ---------------------------------------------------------------------- */
60 
~ComputeGridKokkos()61 ComputeGridKokkos::~ComputeGridKokkos()
62 {
63   if (copymode) return;
64 
65   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
66   memoryKK->destroy_kokkos(k_tally,tally);
67   vector_grid = NULL;
68   tally = NULL;
69 }
70 
71 /* ---------------------------------------------------------------------- */
72 
compute_per_grid()73 void ComputeGridKokkos::compute_per_grid()
74 {
75   if (sparta->kokkos->prewrap) {
76     ComputeGrid::compute_per_grid();
77   } else {
78     compute_per_grid_kokkos();
79     k_tally.modify_device();
80     k_tally.sync_host();
81   }
82 }
83 
84 /* ---------------------------------------------------------------------- */
85 
compute_per_grid_kokkos()86 void ComputeGridKokkos::compute_per_grid_kokkos()
87 {
88   invoked_per_grid = update->ntimestep;
89 
90   ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
91   particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
92   d_particles = particle_kk->k_particles.d_view;
93   d_species = particle_kk->k_species.d_view;
94 
95   GridKokkos* grid_kk = (GridKokkos*) grid;
96   d_cellcount = grid_kk->d_cellcount;
97   d_plist = grid_kk->d_plist;
98   grid_kk->sync(Device,CINFO_MASK);
99   d_cinfo = grid_kk->k_cinfo.d_view;
100 
101   d_s2g = particle_kk->k_species2group.d_view;
102   int nlocal = particle->nlocal;
103 
104   // zero all accumulators
105 
106   Kokkos::deep_copy(d_tally,0.0);
107 
108   // loop over all particles, skip species not in mixture group
109   // skip cells not in grid group
110   // perform all tallies needed for each particle
111   // depends on its species group and the user-requested values
112 
113   need_dup = sparta->kokkos->need_dup<DeviceType>();
114   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
115     need_dup = 0;
116 
117   if (need_dup)
118     dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
119   else
120     ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
121 
122   copymode = 1;
123   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
124     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_compute_per_grid>(0,nglocal),*this);
125   else {
126     if (sparta->kokkos->need_atomics)
127       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
128     else
129       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
130   }
131   DeviceType().fence();
132   copymode = 0;
133 
134   if (need_dup) {
135     Kokkos::Experimental::contribute(d_tally, dup_tally);
136     dup_tally = decltype(dup_tally)(); // free duplicated memory
137   }
138 
139   d_particles = t_particle_1d(); // destroy reference to reduce memory use
140   d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
141 }
142 
143 /* ---------------------------------------------------------------------- */
144 
145 template<int NEED_ATOMICS>
146 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const147 void ComputeGridKokkos::operator()(TagComputeGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
148 
149   // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
150 
151   auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
152   auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
153 
154   const int ispecies = d_particles[i].ispecies;
155   const int igroup = d_s2g(imix,ispecies);
156   if (igroup < 0) return;
157 
158   const int icell = d_particles[i].icell;
159   if (!(d_cinfo[icell].mask & groupbit)) return;
160 
161   const double mass = d_species[ispecies].mass;
162   double* v = d_particles[i].v;
163 
164   if (cellmass) a_tally(icell,cellmass) += mass;
165   if (cellcount) a_tally(icell,cellcount) += 1.0;
166 
167   // loop has all possible values particle needs to accumulate
168   // subset defined by user values are indexed by accumulate vector
169 
170   int k = igroup*npergroup;
171 
172   for (int m = 0; m < npergroup; m++) {
173     switch (d_unique[m]) {
174     case COUNT:
175       a_tally(icell,k++) += 1.0;
176       break;
177     case MASSSUM:
178       a_tally(icell,k++) += mass;
179       break;
180     case MVX:
181       a_tally(icell,k++) += mass*v[0];
182       break;
183     case MVY:
184       a_tally(icell,k++) += mass*v[1];
185       break;
186     case MVZ:
187       a_tally(icell,k++) += mass*v[2];
188       break;
189     case MVXSQ:
190       a_tally(icell,k++) += mass*v[0]*v[0];
191       break;
192     case MVYSQ:
193       a_tally(icell,k++) += mass*v[1]*v[1];
194       break;
195     case MVZSQ:
196       a_tally(icell,k++) += mass*v[2]*v[2];
197       break;
198     case MVSQ:
199       a_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
200       break;
201     case ENGROT:
202       a_tally(icell,k++) += d_particles[i].erot;
203       break;
204     case ENGVIB:
205       a_tally(icell,k++) += d_particles[i].evib;
206       break;
207     case DOFROT:
208       a_tally(icell,k++) += d_species[ispecies].rotdof;
209       break;
210     case DOFVIB:
211       a_tally(icell,k++) += d_species[ispecies].vibdof;
212       break;
213     }
214   }
215 }
216 
217 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_compute_per_grid,const int & icell) const218 void ComputeGridKokkos::operator()(TagComputeGrid_compute_per_grid, const int &icell) const {
219   if (!(d_cinfo[icell].mask & groupbit)) return;
220 
221   const int np = d_cellcount[icell];
222 
223   for (int n = 0; n < np; n++) {
224 
225     const int i = d_plist(icell,n);
226 
227     const int ispecies = d_particles[i].ispecies;
228     const int igroup = d_s2g(imix,ispecies);
229     if (igroup < 0) return;
230 
231     const double mass = d_species[ispecies].mass;
232     double* v = d_particles[i].v;
233 
234     if (cellmass) d_tally(icell,cellmass) += mass;
235     if (cellcount) d_tally(icell,cellcount) += 1.0;
236 
237     // loop has all possible values particle needs to accumulate
238     // subset defined by user values are indexed by accumulate vector
239 
240     int k = igroup*npergroup;
241 
242     for (int m = 0; m < npergroup; m++) {
243       switch (d_unique[m]) {
244       case COUNT:
245         d_tally(icell,k++) += 1.0;
246         break;
247       case MASSSUM:
248         d_tally(icell,k++) += mass;
249         break;
250       case MVX:
251         d_tally(icell,k++) += mass*v[0];
252         break;
253       case MVY:
254         d_tally(icell,k++) += mass*v[1];
255         break;
256       case MVZ:
257         d_tally(icell,k++) += mass*v[2];
258         break;
259       case MVXSQ:
260         d_tally(icell,k++) += mass*v[0]*v[0];
261         break;
262       case MVYSQ:
263         d_tally(icell,k++) += mass*v[1]*v[1];
264         break;
265       case MVZSQ:
266         d_tally(icell,k++) += mass*v[2]*v[2];
267         break;
268       case MVSQ:
269         d_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
270         break;
271       case ENGROT:
272         d_tally(icell,k++) += d_particles[i].erot;
273         break;
274       case ENGVIB:
275         d_tally(icell,k++) += d_particles[i].evib;
276         break;
277       case DOFROT:
278         d_tally(icell,k++) += d_species[ispecies].rotdof;
279         break;
280       case DOFVIB:
281         d_tally(icell,k++) += d_species[ispecies].vibdof;
282         break;
283       }
284     }
285   }
286 }
287 
288 /* ----------------------------------------------------------------------
289    query info about internal tally array for this compute
290    index = which column of output (0 for vec, 1 to N for array)
291    return # of tally quantities for this index
292    also return array = ptr to tally array
293    also return cols = ptr to list of columns in tally for this index
294 ------------------------------------------------------------------------- */
295 
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)296 int ComputeGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
297 {
298   d_array = d_tally;
299   return 0;
300 }
301 
302 /* ----------------------------------------------------------------------
303    tally accumulated info to compute final normalized values
304    index = which column of output (0 for vec, 1 to N for array)
305    for etally = NULL:
306      use internal tallied info for single timestep, set nsample = 1
307      if onecell = -1, compute values for all grid cells
308        store results in vector_grid with nstride = 1 (single col of array_grid)
309      if onecell >= 0, compute single value for onecell and return it
310    for etally = ptr to caller array:
311      use external tallied info for many timesteps
312      nsample = additional normalization factor used by some values
313      emap = list of etally columns to use, # of columns determined by index
314      store results in caller's vec, spaced by nstride
315    if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
316 ------------------------------------------------------------------------- */
317 
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)318 void ComputeGridKokkos::post_process_grid_kokkos(int index, int nsample,
319                                       DAT::t_float_2d_lr d_etally, int *emap,
320                                       DAT::t_float_1d_strided d_vec)
321 {
322   index--;
323   int ivalue = index % nvalue;
324 
325   int lo = 0;
326   int hi = nglocal;
327 
328   if (!d_etally.data()) {
329     nsample = 1;
330     d_etally = d_tally;
331     emap = map[index];
332     d_vec = d_vector;
333     nstride = 1;
334   }
335 
336   this->nsample = nsample;
337   this->d_etally = d_etally;
338   this->d_vec = d_vec;
339   this->nstride = nstride;
340 
341   GridKokkos* grid_kk = (GridKokkos*) grid;
342   grid_kk->sync(Device,CINFO_MASK);
343   d_cinfo = grid_kk->k_cinfo.d_view;
344 
345   fnum = update->fnum;
346 
347   copymode = 1;
348 
349   // compute normalized final value for each grid cell
350 
351   switch (value[ivalue]) {
352 
353   case NUM:
354     {
355       count = emap[0];
356       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_NUM>(lo,hi),*this);
357       DeviceType().fence();
358       break;
359     }
360 
361   case MASS:
362     {
363       mass = emap[0];
364       count = emap[1];
365       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_MASS>(lo,hi),*this);
366       DeviceType().fence();
367       break;
368     }
369 
370   case NRHO:
371     {
372       count = emap[0];
373       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_NRHO>(lo,hi),*this);
374       DeviceType().fence();
375       break;
376     }
377 
378   case MASSRHO:
379     {
380       mass = emap[0];
381       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_MASSRHO>(lo,hi),*this);
382       DeviceType().fence();
383       break;
384     }
385 
386   case NFRAC:
387   case MASSFRAC:
388     {
389       count_or_mass = emap[0];
390       cell_count_or_mass = emap[1];
391       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_NFRAC>(lo,hi),*this);
392       DeviceType().fence();
393       break;
394     }
395 
396   case U:
397   case V:
398   case W:
399   case USQ:
400   case VSQ:
401   case WSQ:
402     {
403       velocity = emap[0];
404       mass = emap[1];
405       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_U>(lo,hi),*this);
406       DeviceType().fence();
407       break;
408     }
409 
410   case KE:
411     {
412       mvsq = emap[0];
413       count = emap[1];
414       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_KE>(lo,hi),*this);
415       DeviceType().fence();
416       break;
417     }
418 
419   case TEMPERATURE:
420     {
421       mvsq = emap[0];
422       count = emap[1];
423       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_TEMPERATURE>(lo,hi),*this);
424       DeviceType().fence();
425       break;
426     }
427 
428   case EROT:
429   case EVIB:
430     {
431       eng = emap[0];
432       count = emap[1];
433       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_EROT>(lo,hi),*this);
434       DeviceType().fence();
435       break;
436     }
437 
438   case TROT:
439   case TVIB:
440     {
441       eng = emap[0];
442       dof = emap[1];
443       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_TROT>(lo,hi),*this);
444       DeviceType().fence();
445       break;
446     }
447 
448   case PXRHO:
449   case PYRHO:
450   case PZRHO:
451     {
452       mom = emap[0];
453       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_PXRHO>(lo,hi),*this);
454       DeviceType().fence();
455       break;
456     }
457 
458   case KERHO:
459     {
460       ke = emap[0];
461       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_KERHO>(lo,hi),*this);
462       DeviceType().fence();
463       break;
464     }
465   }
466   copymode = 0;
467 }
468 
469 /* ---------------------------------------------------------------------- */
470 
471 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_NUM,const int & icell) const472 void ComputeGridKokkos::operator()(TagComputeGrid_NUM, const int &icell) const {
473   d_vec[icell] = d_etally(icell,count) / nsample;
474 }
475 
476 /* ---------------------------------------------------------------------- */
477 
478 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_MASS,const int & icell) const479 void ComputeGridKokkos::operator()(TagComputeGrid_MASS, const int &icell) const {
480   const double norm = d_etally(icell,count);
481   if (norm == 0.0) d_vec[icell] = 0.0;
482   else d_vec[icell] = d_etally(icell,mass) / norm;
483 }
484 
485 /* ---------------------------------------------------------------------- */
486 
487 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_NRHO,const int & icell) const488 void ComputeGridKokkos::operator()(TagComputeGrid_NRHO, const int &icell) const {
489   const double norm = d_cinfo[icell].volume;
490   if (norm == 0.0) d_vec[icell] = 0.0;
491   else {
492     const double wt = fnum * d_cinfo[icell].weight / norm;
493     d_vec[icell] = wt * d_etally(icell,count) / nsample;
494   }
495 }
496 
497 /* ---------------------------------------------------------------------- */
498 
499 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_MASSRHO,const int & icell) const500 void ComputeGridKokkos::operator()(TagComputeGrid_MASSRHO, const int &icell) const {
501   const double norm = d_cinfo[icell].volume;
502   if (norm == 0.0) d_vec[icell] = 0.0;
503   else {
504     const double wt = fnum * d_cinfo[icell].weight / norm;
505     d_vec[icell] = wt * d_etally(icell,mass) / nsample;
506   }
507 }
508 
509 /* ---------------------------------------------------------------------- */
510 
511 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_NFRAC,const int & icell) const512 void ComputeGridKokkos::operator()(TagComputeGrid_NFRAC, const int &icell) const {
513   const double norm = d_etally(icell,cell_count_or_mass);
514   if (norm == 0.0) d_vec[icell] = 0.0;
515   else d_vec[icell] = d_etally(icell,count_or_mass) / norm;
516 }
517 
518 /* ---------------------------------------------------------------------- */
519 
520 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_U,const int & icell) const521 void ComputeGridKokkos::operator()(TagComputeGrid_U, const int &icell) const {
522   const double norm = d_etally(icell,mass);
523   if (norm == 0.0) d_vec[icell] = 0.0;
524   else d_vec[icell] = d_etally(icell,velocity) / norm;
525 }
526 
527 /* ---------------------------------------------------------------------- */
528 
529 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_KE,const int & icell) const530 void ComputeGridKokkos::operator()(TagComputeGrid_KE, const int &icell) const {
531   const double norm = d_etally(icell,count);
532   if (norm == 0.0) d_vec[icell] = 0.0;
533   else d_vec[icell] = eprefactor * d_etally(icell,mvsq) / norm;
534 }
535 
536 /* ---------------------------------------------------------------------- */
537 
538 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_TEMPERATURE,const int & icell) const539 void ComputeGridKokkos::operator()(TagComputeGrid_TEMPERATURE, const int &icell) const {
540   const double norm = d_etally(icell,count);
541   if (norm == 0.0) d_vec[icell] = 0.0;
542   else d_vec[icell] = tprefactor * d_etally(icell,mvsq) / norm;
543 }
544 
545 /* ---------------------------------------------------------------------- */
546 
547 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_EROT,const int & icell) const548 void ComputeGridKokkos::operator()(TagComputeGrid_EROT, const int &icell) const {
549   const double norm = d_etally(icell,count);
550   if (norm == 0.0) d_vec[icell] = 0.0;
551   else d_vec[icell] = d_etally(icell,eng) / norm;
552 }
553 
554 /* ---------------------------------------------------------------------- */
555 
556 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_TROT,const int & icell) const557 void ComputeGridKokkos::operator()(TagComputeGrid_TROT, const int &icell) const {
558   const double norm = d_etally(icell,dof);
559   if (norm == 0.0) d_vec[icell] = 0.0;
560   else d_vec[icell] = rvprefactor * d_etally(icell,eng) / norm;
561 }
562 
563 /* ---------------------------------------------------------------------- */
564 
565 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_PXRHO,const int & icell) const566 void ComputeGridKokkos::operator()(TagComputeGrid_PXRHO, const int &icell) const {
567   const double norm = d_cinfo[icell].volume;
568   if (norm == 0.0) d_vec[icell] = 0.0;
569   else {
570     const double wt = fnum * d_cinfo[icell].weight / norm;
571     d_vec[icell] = wt * d_etally(icell,mom) / nsample;
572   }
573 }
574 
575 /* ---------------------------------------------------------------------- */
576 
577 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_KERHO,const int & icell) const578 void ComputeGridKokkos::operator()(TagComputeGrid_KERHO, const int &icell) const {
579   const double norm = d_cinfo[icell].volume;
580   if (norm == 0.0) d_vec[icell] = 0.0;
581   else {
582     const double wt = fnum * d_cinfo[icell].weight / norm;
583     d_vec[icell] = eprefactor * wt * d_etally(icell,ke) / nsample;
584   }
585 }
586 
587 /* ----------------------------------------------------------------------
588    reallocate data storage if nglocal has changed
589    called by init() and whenever grid changes
590 ------------------------------------------------------------------------- */
591 
reallocate()592 void ComputeGridKokkos::reallocate()
593 {
594   if (grid->nlocal == nglocal) return;
595 
596   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
597   memoryKK->destroy_kokkos(k_tally,tally);
598   nglocal = grid->nlocal;
599   memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"compute_grid:vector_grid");
600   d_vector = k_vector_grid.d_view;
601   memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"compute_grid:tally");
602   d_tally = k_tally.d_view;
603 }
604