1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Alan Stagg (Sandia)
17 ------------------------------------------------------------------------- */
18 
19 #include "math.h"
20 #include "stdlib.h"
21 #include "string.h"
22 #include "compute_pflux_grid_kokkos.h"
23 #include "particle_kokkos.h"
24 #include "mixture.h"
25 #include "grid_kokkos.h"
26 #include "update.h"
27 #include "modify.h"
28 #include "comm.h"
29 #include "memory_kokkos.h"
30 #include "error.h"
31 #include "sparta_masks.h"
32 #include "kokkos.h"
33 
34 using namespace SPARTA_NS;
35 
36 // user keywords
37 
38 enum{MOMXX,MOMYY,MOMZZ,MOMXY,MOMYZ,MOMXZ};
39 
40 // internal accumulators
41 
42 enum{MASSSUM,MVX,MVY,MVZ,MVXSQ,MVYSQ,MVZSQ,MVXVY,MVYVZ,MVXVZ,LASTSIZE};
43 
44 
45 /* ---------------------------------------------------------------------- */
46 
ComputePFluxGridKokkos(SPARTA * sparta,int narg,char ** arg)47 ComputePFluxGridKokkos::ComputePFluxGridKokkos(SPARTA *sparta, int narg, char **arg) :
48   ComputePFluxGrid(sparta, narg, arg)
49 {
50   kokkos_flag = 1;
51 
52   k_unique = DAT::tdual_int_1d("compute/pflux/grid:unique",npergroup);
53   for (int m = 0; m < npergroup; m++)
54     k_unique.h_view(m) = unique[m];
55   k_unique.modify_host();
56   k_unique.sync_device();
57   d_unique = k_unique.d_view;
58 }
59 
60 /* ---------------------------------------------------------------------- */
61 
~ComputePFluxGridKokkos()62 ComputePFluxGridKokkos::~ComputePFluxGridKokkos()
63 {
64   if (copymode) return;
65 
66   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
67   memoryKK->destroy_kokkos(k_tally,tally);
68   vector_grid = NULL;
69   tally = NULL;
70 }
71 
72 /* ---------------------------------------------------------------------- */
73 
compute_per_grid()74 void ComputePFluxGridKokkos::compute_per_grid()
75 {
76   if (sparta->kokkos->prewrap) {
77     ComputePFluxGrid::compute_per_grid();
78   } else {
79     compute_per_grid_kokkos();
80     k_tally.modify_device();
81     k_tally.sync_host();
82   }
83 }
84 
85 /* ---------------------------------------------------------------------- */
86 
compute_per_grid_kokkos()87 void ComputePFluxGridKokkos::compute_per_grid_kokkos()
88 {
89   invoked_per_grid = update->ntimestep;
90 
91   ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
92   particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
93   d_particles = particle_kk->k_particles.d_view;
94   d_species = particle_kk->k_species.d_view;
95 
96   GridKokkos* grid_kk = (GridKokkos*) grid;
97   d_cellcount = grid_kk->d_cellcount;
98   d_plist = grid_kk->d_plist;
99   grid_kk->sync(Device,CINFO_MASK);
100   d_cinfo = grid_kk->k_cinfo.d_view;
101 
102   d_s2g = particle_kk->k_species2group.d_view;
103   int nlocal = particle->nlocal;
104 
105   // zero all accumulators
106 
107   Kokkos::deep_copy(d_tally,0.0);
108 
109   // loop over all particles, skip species not in mixture group
110 
111   need_dup = sparta->kokkos->need_dup<DeviceType>();
112   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
113     need_dup = 0;
114 
115   if (need_dup)
116     dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
117   else
118     ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
119 
120   copymode = 1;
121   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
122     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_compute_per_grid>(0,nglocal),*this);
123   else {
124     if (sparta->kokkos->need_atomics)
125       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
126     else
127       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
128   }
129   DeviceType().fence();
130   copymode = 0;
131 
132   if (need_dup) {
133     Kokkos::Experimental::contribute(d_tally, dup_tally);
134     dup_tally = decltype(dup_tally)(); // free duplicated memory
135   }
136 
137   d_particles = t_particle_1d(); // destroy reference to reduce memory use
138   d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
139 }
140 
141 /* ---------------------------------------------------------------------- */
142 
143 template<int NEED_ATOMICS>
144 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const145 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
146 
147   // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
148 
149   auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
150   auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
151 
152   const int ispecies = d_particles[i].ispecies;
153   const int igroup = d_s2g(imix,ispecies);
154   if (igroup < 0) return;
155 
156   const int icell = d_particles[i].icell;
157   if (!(d_cinfo[icell].mask & groupbit)) return;
158 
159   const double mass = d_species[ispecies].mass;
160   double *v = d_particles[i].v;
161 
162   // loop has all possible values particle needs to accumulate
163   // subset defined by user values are indexed by accumulate vector
164 
165   int k = igroup*npergroup;
166 
167   for (int m = 0; m < npergroup; m++) {
168     switch (d_unique[m]) {
169     case MASSSUM:
170       a_tally(icell,k++) += mass;
171       break;
172     case MVX:
173       a_tally(icell,k++) += mass*v[0];
174       break;
175     case MVY:
176       a_tally(icell,k++) += mass*v[1];
177       break;
178     case MVZ:
179       a_tally(icell,k++) += mass*v[2];
180       break;
181     case MVXSQ:
182       a_tally(icell,k++) += mass*v[0]*v[0];
183       break;
184     case MVYSQ:
185       a_tally(icell,k++) += mass*v[1]*v[1];
186       break;
187     case MVZSQ:
188       a_tally(icell,k++) += mass*v[2]*v[2];
189       break;
190     case MVXVY:
191       a_tally(icell,k++) += mass*v[0]*v[1];
192       break;
193     case MVYVZ:
194       a_tally(icell,k++) += mass*v[1]*v[2];
195       break;
196     case MVXVZ:
197       a_tally(icell,k++) += mass*v[0]*v[2];
198       break;
199     }
200   }
201 }
202 
203 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_compute_per_grid,const int & icell) const204 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_compute_per_grid, const int &icell) const {
205   if (!(d_cinfo[icell].mask & groupbit)) return;
206 
207   const int np = d_cellcount[icell];
208 
209   for (int n = 0; n < np; n++) {
210 
211     const int i = d_plist(icell,n);
212 
213     const int ispecies = d_particles[i].ispecies;
214     const int igroup = d_s2g(imix,ispecies);
215     if (igroup < 0) return;
216 
217     const double mass = d_species[ispecies].mass;
218     double *v = d_particles[i].v;
219 
220     int k = igroup*npergroup;
221 
222     for (int m = 0; m < npergroup; m++) {
223       switch (d_unique[m]) {
224       case MASSSUM:
225         d_tally(icell,k++) += mass;
226         break;
227       case MVX:
228         d_tally(icell,k++) += mass*v[0];
229         break;
230       case MVY:
231         d_tally(icell,k++) += mass*v[1];
232         break;
233       case MVZ:
234         d_tally(icell,k++) += mass*v[2];
235         break;
236       case MVXSQ:
237         d_tally(icell,k++) += mass*v[0]*v[0];
238         break;
239       case MVYSQ:
240         d_tally(icell,k++) += mass*v[1]*v[1];
241         break;
242       case MVZSQ:
243         d_tally(icell,k++) += mass*v[2]*v[2];
244         break;
245       case MVXVY:
246         d_tally(icell,k++) += mass*v[0]*v[1];
247         break;
248       case MVYVZ:
249         d_tally(icell,k++) += mass*v[1]*v[2];
250         break;
251       case MVXVZ:
252         d_tally(icell,k++) += mass*v[0]*v[2];
253         break;
254       }
255     }
256   }
257 }
258 
259 /* ----------------------------------------------------------------------
260    query info about internal tally array for this compute
261    index = which column of output (0 for vec, 1 to N for array)
262    return # of tally quantities for this index
263    also return array = ptr to tally array
264    also return cols = ptr to list of columns in tally for this index
265 ------------------------------------------------------------------------- */
266 
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)267 int ComputePFluxGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
268 {
269   d_array = d_tally;
270   return 0;
271 }
272 
273 /* ----------------------------------------------------------------------
274    tally accumulated info to compute final normalized values
275    index = which column of output (0 for vec, 1 to N for array)
276    for etally = NULL:
277      use internal tallied info for single timestep
278      compute values for all grid cells
279        store results in vector_grid with nstride = 1 (single col of array_grid)
280    for etaylly = ptr to caller array:
281      use external tallied info for many timesteps
282      emap = list of etally columns to use, # of columns determined by index
283      store results in caller's vec, spaced by nstride
284    if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
285 ------------------------------------------------------------------------- */
286 
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)287 void ComputePFluxGridKokkos::post_process_grid_kokkos(int index, int nsample,
288                          DAT::t_float_2d_lr d_etally, int *emap, DAT::t_float_1d_strided d_vec)
289 {
290   index--;
291   int ivalue = index % nvalue;
292 
293   int lo = 0;
294   int hi = nglocal;
295 
296   if (!d_etally.data()) {
297     nsample = 1;
298     d_etally = d_tally;
299     emap = map[index];
300     d_vec = d_vector;
301     nstride = 1;
302   }
303 
304   this->d_etally = d_etally;
305   this->d_vec = d_vec;
306   this->nstride = nstride;
307   this->nsample = nsample;
308 
309   // compute normalized final value for each grid cell
310   // Vcm = Sum mv / Sum m = (Wx,Wy,Wz)
311   // Wi = Sum mVi / M
312   // momii = F/V Sum m(Vi - Wi)^2
313   // momii = F/V [ Sum mVi^2 + M Wi^2 - 2 Wi Sum mVi ]
314   // momii = F/V [ Sum mVi^2 - (Sum mVi)^2 / M ]
315   // momij = F/V Sum m(Vi - Wi)(Vj - Wj)
316   // momij = F/V [ Sum mViVj + M Wi Wj - Wj Sum mVi - Wi Sum mVj ]
317   // momij = F/V [ Sum mViVj - Sum mVi Sum mVj / M ]
318 
319   GridKokkos* grid_kk = (GridKokkos*) grid;
320   grid_kk->sync(Device,CINFO_MASK);
321   d_cinfo = grid_kk->k_cinfo.d_view;
322   fnum = update->fnum;
323 
324   mass = emap[0];
325   switch (value[ivalue]) {
326 
327   case MOMXX:
328   case MOMYY:
329   case MOMZZ:
330     {
331       mv = emap[1];
332       mvv = emap[2];
333       copymode = 1;
334       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_post_process_grid_diag>(lo,hi),*this);
335       DeviceType().fence();
336       copymode = 0;
337       break;
338     }
339   case MOMXY:
340   case MOMYZ:
341   case MOMXZ:
342     {
343       mv1 = emap[1];
344       mv2 = emap[2];
345       mvv = emap[3];
346       copymode = 1;
347       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_post_process_grid_offdiag>(lo,hi),*this);
348       DeviceType().fence();
349       copymode = 0;
350       break;
351     }
352   } // end switch
353 }
354 
355 /* ---------------------------------------------------------------------- */
356 
357 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_post_process_grid_diag,const int & icell) const358 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_post_process_grid_diag, const int &icell) const {
359   double summass, summv, wt;
360   summass = d_etally(icell,mass);
361   if (summass == 0.0) d_vec[icell] = 0.0;
362   else{
363     wt = fnum * d_cinfo[icell].weight / d_cinfo[icell].volume;
364     summv = d_etally(icell,mv);
365     d_vec[icell] = wt/nsample * (d_etally(icell,mvv) - summv*summv/summass);
366   }
367 }
368 
369 /* ---------------------------------------------------------------------- */
370 
371 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_post_process_grid_offdiag,const int & icell) const372 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_post_process_grid_offdiag, const int &icell) const {
373   double summass, wt;
374   summass = d_etally(icell,mass);
375   if (summass == 0.0) d_vec[icell] = 0.0;
376   else{
377     wt = fnum * d_cinfo[icell].weight / d_cinfo[icell].volume;
378     d_vec[icell] = wt/nsample * (d_etally(icell,mvv) -
379                                  d_etally(icell,mv1)*d_etally(icell,mv2)/summass);
380   }
381 }
382 
383 /* ----------------------------------------------------------------------
384    reallocate arrays if nglocal has changed
385    called by init() and whenever grid changes
386 ------------------------------------------------------------------------- */
387 
reallocate()388 void ComputePFluxGridKokkos::reallocate()
389 {
390   if (grid->nlocal == nglocal) return;
391 
392   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
393   memoryKK->destroy_kokkos(k_tally,tally);
394   nglocal = grid->nlocal;
395   memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"pflux/grid:vector_grid");
396   d_vector = k_vector_grid.d_view;
397   memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"pflux/grid:tally");
398   d_tally = k_tally.d_view;
399 }
400