1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Alan Stagg (Sandia)
17 ------------------------------------------------------------------------- */
18 
19 #include "math.h"
20 #include "stdlib.h"
21 #include "string.h"
22 #include "compute_eflux_grid_kokkos.h"
23 #include "particle_kokkos.h"
24 #include "mixture.h"
25 #include "grid_kokkos.h"
26 #include "update.h"
27 #include "modify.h"
28 #include "comm.h"
29 #include "memory_kokkos.h"
30 #include "error.h"
31 #include "sparta_masks.h"
32 #include "kokkos.h"
33 
34 using namespace SPARTA_NS;
35 
36 // user keywords
37 
38 enum{HEATX,HEATY,HEATZ};
39 
40 // internal accumulators
41 
42 enum{MASSSUM,MVX,MVY,MVZ,MVXVX,MVYVY,MVZVZ,MVXVY,MVYVZ,MVXVZ,
43      MVXVXVX,MVYVYVY,MVZVZVZ,
44      MVXVYVY,MVXVZVZ,MVYVXVX,MVYVZVZ,MVZVXVX,MVZVYVY,LASTSIZE};
45 
46 
47 
48 /* ---------------------------------------------------------------------- */
49 
ComputeEFluxGridKokkos(SPARTA * sparta,int narg,char ** arg)50 ComputeEFluxGridKokkos::ComputeEFluxGridKokkos(SPARTA *sparta, int narg, char **arg) :
51   ComputeEFluxGrid(sparta, narg, arg)
52 {
53   kokkos_flag = 1;
54 
55   k_unique = DAT::tdual_int_1d("compute/eflux/grid:unique",npergroup);
56   for (int m = 0; m < npergroup; m++)
57     k_unique.h_view(m) = unique[m];
58   k_unique.modify_host();
59   k_unique.sync_device();
60   d_unique = k_unique.d_view;
61 }
62 
63 /* ---------------------------------------------------------------------- */
64 
~ComputeEFluxGridKokkos()65 ComputeEFluxGridKokkos::~ComputeEFluxGridKokkos()
66 {
67   if (copymode) return;
68 
69   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
70   memoryKK->destroy_kokkos(k_tally,tally);
71   vector_grid = NULL;
72   tally = NULL;
73 }
74 
75 /* ---------------------------------------------------------------------- */
76 
compute_per_grid()77 void ComputeEFluxGridKokkos::compute_per_grid()
78 {
79   if (sparta->kokkos->prewrap) {
80     ComputeEFluxGrid::compute_per_grid();
81   } else {
82     compute_per_grid_kokkos();
83     k_tally.modify_device();
84     k_tally.sync_host();
85   }
86 }
87 
88 /* ---------------------------------------------------------------------- */
89 
compute_per_grid_kokkos()90 void ComputeEFluxGridKokkos::compute_per_grid_kokkos()
91 {
92   invoked_per_grid = update->ntimestep;
93 
94   ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
95   particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
96   d_particles = particle_kk->k_particles.d_view;
97   d_species = particle_kk->k_species.d_view;
98 
99   GridKokkos* grid_kk = (GridKokkos*) grid;
100   d_cellcount = grid_kk->d_cellcount;
101   d_plist = grid_kk->d_plist;
102   grid_kk->sync(Device,CINFO_MASK);
103   d_cinfo = grid_kk->k_cinfo.d_view;
104 
105   d_s2g = particle_kk->k_species2group.d_view;
106   int nlocal = particle->nlocal;
107 
108   // zero all accumulators
109 
110   Kokkos::deep_copy(d_tally,0.0);
111 
112   // loop over all particles, skip species not in mixture group
113 
114   need_dup = sparta->kokkos->need_dup<DeviceType>();
115   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
116     need_dup = 0;
117 
118   if (need_dup)
119     dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
120   else
121     ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
122 
123   copymode = 1;
124   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
125     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_compute_per_grid>(0,nglocal),*this);
126   else {
127     if (sparta->kokkos->need_atomics)
128       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
129     else
130       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
131   }
132   DeviceType().fence();
133   copymode = 0;
134 
135   if (need_dup) {
136     Kokkos::Experimental::contribute(d_tally, dup_tally);
137     dup_tally = decltype(dup_tally)(); // free duplicated memory
138   }
139 
140   d_particles = t_particle_1d(); // destroy reference to reduce memory use
141   d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
142 }
143 
144 /* ---------------------------------------------------------------------- */
145 
146 template<int NEED_ATOMICS>
147 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeEFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const148 void ComputeEFluxGridKokkos::operator()(TagComputeEFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
149 
150   // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
151 
152   auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
153   auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
154 
155   const int ispecies = d_particles[i].ispecies;
156   const int igroup = d_s2g(imix,ispecies);
157   if (igroup < 0) return;
158 
159   const int icell = d_particles[i].icell;
160   if (!(d_cinfo[icell].mask & groupbit)) return;
161 
162   const double mass = d_species[ispecies].mass;
163   double *v = d_particles[i].v;
164 
165   // loop has all possible values particle needs to accumulate
166   // subset defined by user values are indexed by accumulate vector
167 
168   int k = igroup*npergroup;
169 
170   for (int m = 0; m < npergroup; m++) {
171     switch (d_unique[m]) {
172     case MASSSUM:
173       a_tally(icell,k++) += mass;
174       break;
175     case MVX:
176       a_tally(icell,k++) += mass*v[0];
177       break;
178     case MVY:
179       a_tally(icell,k++) += mass*v[1];
180       break;
181     case MVZ:
182       a_tally(icell,k++) += mass*v[2];
183       break;
184     case MVXVX:
185       a_tally(icell,k++) += mass*v[0]*v[0];
186       break;
187     case MVYVY:
188       a_tally(icell,k++) += mass*v[1]*v[1];
189       break;
190     case MVZVZ:
191       a_tally(icell,k++) += mass*v[2]*v[2];
192       break;
193     case MVXVY:
194       a_tally(icell,k++) += mass*v[0]*v[1];
195       break;
196     case MVYVZ:
197       a_tally(icell,k++) += mass*v[1]*v[2];
198       break;
199     case MVXVZ:
200       a_tally(icell,k++) += mass*v[0]*v[2];
201       break;
202     case MVXVXVX:
203       a_tally(icell,k++) += mass*v[0]*v[0]*v[0];
204       break;
205     case MVYVYVY:
206       a_tally(icell,k++) += mass*v[1]*v[1]*v[1];
207       break;
208     case MVZVZVZ:
209       a_tally(icell,k++) += mass*v[2]*v[2]*v[2];
210       break;
211     case MVXVYVY:
212       a_tally(icell,k++) += mass*v[0]*v[1]*v[1];
213       break;
214     case MVXVZVZ:
215       a_tally(icell,k++) += mass*v[0]*v[2]*v[2];
216       break;
217     case MVYVXVX:
218       a_tally(icell,k++) += mass*v[1]*v[0]*v[0];
219       break;
220     case MVYVZVZ:
221       a_tally(icell,k++) += mass*v[1]*v[2]*v[2];
222       break;
223     case MVZVXVX:
224       a_tally(icell,k++) += mass*v[2]*v[0]*v[0];
225       break;
226     case MVZVYVY:
227       a_tally(icell,k++) += mass*v[2]*v[1]*v[1];
228       break;
229     }
230   }
231 }
232 
233 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeEFluxGrid_compute_per_grid,const int & icell) const234 void ComputeEFluxGridKokkos::operator()(TagComputeEFluxGrid_compute_per_grid, const int &icell) const {
235   if (!(d_cinfo[icell].mask & groupbit)) return;
236 
237   const int np = d_cellcount[icell];
238 
239   for (int n = 0; n < np; n++) {
240 
241     const int i = d_plist(icell,n);
242 
243     const int ispecies = d_particles[i].ispecies;
244     const int igroup = d_s2g(imix,ispecies);
245     if (igroup < 0) return;
246 
247     const double mass = d_species[ispecies].mass;
248     double *v = d_particles[i].v;
249 
250     int k = igroup*npergroup;
251 
252     for (int m = 0; m < npergroup; m++) {
253       switch (d_unique[m]) {
254       case MASSSUM:
255         d_tally(icell,k++) += mass;
256         break;
257       case MVX:
258         d_tally(icell,k++) += mass*v[0];
259         break;
260       case MVY:
261         d_tally(icell,k++) += mass*v[1];
262         break;
263       case MVZ:
264         d_tally(icell,k++) += mass*v[2];
265         break;
266       case MVXVX:
267         d_tally(icell,k++) += mass*v[0]*v[0];
268         break;
269       case MVYVY:
270         d_tally(icell,k++) += mass*v[1]*v[1];
271         break;
272       case MVZVZ:
273         d_tally(icell,k++) += mass*v[2]*v[2];
274         break;
275       case MVXVY:
276         d_tally(icell,k++) += mass*v[0]*v[1];
277         break;
278       case MVYVZ:
279         d_tally(icell,k++) += mass*v[1]*v[2];
280         break;
281       case MVXVZ:
282         d_tally(icell,k++) += mass*v[0]*v[2];
283         break;
284       case MVXVXVX:
285         d_tally(icell,k++) += mass*v[0]*v[0]*v[0];
286         break;
287       case MVYVYVY:
288         d_tally(icell,k++) += mass*v[1]*v[1]*v[1];
289         break;
290       case MVZVZVZ:
291         d_tally(icell,k++) += mass*v[2]*v[2]*v[2];
292         break;
293       case MVXVYVY:
294         d_tally(icell,k++) += mass*v[0]*v[1]*v[1];
295         break;
296       case MVXVZVZ:
297         d_tally(icell,k++) += mass*v[0]*v[2]*v[2];
298         break;
299       case MVYVXVX:
300         d_tally(icell,k++) += mass*v[1]*v[0]*v[0];
301         break;
302       case MVYVZVZ:
303         d_tally(icell,k++) += mass*v[1]*v[2]*v[2];
304         break;
305       case MVZVXVX:
306         d_tally(icell,k++) += mass*v[2]*v[0]*v[0];
307         break;
308       case MVZVYVY:
309         d_tally(icell,k++) += mass*v[2]*v[1]*v[1];
310         break;
311       }
312     }
313   }
314 }
315 
316 /* ----------------------------------------------------------------------
317    query info about internal tally array for this compute
318    index = which column of output (0 for vec, 1 to N for array)
319    return # of tally quantities for this index
320    also return array = ptr to tally array
321    also return cols = ptr to list of columns in tally for this index
322 ------------------------------------------------------------------------- */
323 
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)324 int ComputeEFluxGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
325 {
326   d_array = d_tally;
327   return 0;
328 }
329 
330 /* ----------------------------------------------------------------------
331    tally accumulated info to compute final normalized values
332    index = which column of output (0 for vec, 1 to N for array)
333    for etally = NULL:
334      use internal tallied info for single timestep, set nsample = 1
335      if onecell = -1, compute values for all grid cells
336        store results in vector_grid with nstride = 1 (single col of array_grid)
337      if onecell >= 0, compute single value for onecell and return it
338    for etaylly = ptr to caller array:
339      use external tallied info for many timesteps
340      nsample = additional normalization factor used by some values
341      emap = list of etally columns to use, # of columns determined by index
342      store results in caller's vec, spaced by nstride
343    if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
344 ------------------------------------------------------------------------- */
345 
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)346 void ComputeEFluxGridKokkos::post_process_grid_kokkos(int index, int nsample,
347                          DAT::t_float_2d_lr d_etally, int *emap, DAT::t_float_1d_strided d_vec)
348 {
349   index--;
350 
351   int lo = 0;
352   int hi = nglocal;
353 
354   if (!d_etally.data()) {
355     nsample = 1;
356     d_etally = d_tally;
357     emap = map[index];
358     d_vec = d_vector;
359     nstride = 1;
360   }
361 
362   this->d_etally = d_etally;
363   this->d_vec = d_vec;
364   this->nstride = nstride;
365   this->nsample = nsample;
366 
367   // compute normalized final value for each grid cell
368   // Vcm = Sum mv / Sum m = (Wx,Wy,Wz)
369   // Wi = Sum mVi / M
370   // heati = 0.5 * F/V Sum m (Vi - Wi) (V - W)^2
371   // (Vi - Wi) (V - W)^2 = (Vi - Wi)
372   //                       [ (Vi - Wi)^2 + (V1 - W1)^2 + (V2 - W2)^2 ]
373   // i = xyz and 1,2 = xyx indices different than i
374   // heati = 3 terms = h+h1+h2 where h2 is same as h1 with V1 replaced by V2
375   // h = F/V Sum m (Vi-Wi)^3
376   //   (Vi-Wi)^3 = Vi^3 - 3WiVi^2 + 3Wi^2Vi - Wi^3
377   //   Sum m (Vi-Wi)^3 = Sum(mVi^3) - 3 Sum(mVi^2) Sum(mVi) / M +
378   //                     2 Sum(mVi)^3 / M^2
379   // h1 = F/V Sum m (Vi-Wi) (V1-W1)^2
380   //   (Vi-Wi) (V1-W1)^2 = ViV1^2 - 2ViV1W1 + ViW1^2 - WiV1^2 + 2V1WiW1 - WiW1^2
381   //   3 terms in previous equation combine to 1 term in next equation
382   //   Sum m (Vi-Wi) (V1-W1)^2 = Sum(mViV1^2) - 2 Sum(mViV1) Sum(mV1) / M -
383   //                             Sum(mVi) Sum(mV1^2) / M +
384   //                             2 Sum(mVi) Sum(mV1)^2 / M^2
385 
386   GridKokkos* grid_kk = (GridKokkos*) grid;
387   grid_kk->sync(Device,CINFO_MASK);
388   d_cinfo = grid_kk->k_cinfo.d_view;
389   fnum = update->fnum;
390 
391   mass = emap[0];
392   mv = emap[1];
393   mv1 = emap[2];
394   mv2 = emap[3];
395   mvv = emap[4];
396   mv1v1 = emap[5];
397   mv2v2 = emap[6];
398   mvv1 = emap[7];
399   mvv2 = emap[8];
400   mvvv = emap[9];
401   mvv1v1 = emap[10];
402   mvv2v2 = emap[11];
403 
404   copymode = 1;
405   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_post_process_grid>(lo,hi),*this);
406   DeviceType().fence();
407   copymode = 0;
408 }
409 
410 /* ---------------------------------------------------------------------- */
411 
412 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeEFluxGrid_post_process_grid,const int & icell) const413 void ComputeEFluxGridKokkos::operator()(TagComputeEFluxGrid_post_process_grid, const int &icell) const {
414   double h, h1, h2, wt;
415   const double summass = d_etally(icell,mass);
416   if (summass == 0.0) d_vec[icell] = 0.0;
417   else{
418     h = d_etally(icell,mvvv) - 3.0*d_etally(icell,mv)*d_etally(icell,mvv)/summass +
419       2.0*d_etally(icell,mv)*d_etally(icell,mv)*d_etally(icell,mv)/summass/summass;
420     h1 = d_etally(icell,mvv1v1) - 2.0*d_etally(icell,mvv1)*d_etally(icell,mv1)/summass -
421       d_etally(icell,mv)*d_etally(icell,mv1v1)/summass +
422       2.0*d_etally(icell,mv)*d_etally(icell,mv1)*d_etally(icell,mv1)/summass/summass;
423     h2 = d_etally(icell,mvv2v2) - 2.0*d_etally(icell,mvv2)*d_etally(icell,mv2)/summass -
424       d_etally(icell,mv)*d_etally(icell,mv2v2)/summass +
425       2.0*d_etally(icell,mv)*d_etally(icell,mv2)*d_etally(icell,mv2)/summass/summass;
426     wt = 0.5 * fnum * d_cinfo[icell].weight / d_cinfo[icell].volume;
427     d_vec[icell] = wt/nsample * (h + h1 + h2);
428   }
429 }
430 
431 /* ----------------------------------------------------------------------
432    reallocate arrays if nglocal has changed
433    called by init() and whenever grid changes
434 ------------------------------------------------------------------------- */
435 
reallocate()436 void ComputeEFluxGridKokkos::reallocate()
437 {
438   if (grid->nlocal == nglocal) return;
439 
440   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
441   memoryKK->destroy_kokkos(k_tally,tally);
442   nglocal = grid->nlocal;
443   memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"eflux/grid:vector_grid");
444   d_vector = k_vector_grid.d_view;
445   memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"eflux/grid:tally");
446   d_tally = k_tally.d_view;
447 }
448