1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "math.h"
16 #include "stdlib.h"
17 #include "string.h"
18 #include "compute_thermal_grid_kokkos.h"
19 #include "particle_kokkos.h"
20 #include "mixture.h"
21 #include "grid_kokkos.h"
22 #include "update.h"
23 #include "modify.h"
24 #include "comm.h"
25 #include "memory_kokkos.h"
26 #include "error.h"
27 #include "sparta_masks.h"
28 #include "kokkos.h"
29 
30 using namespace SPARTA_NS;
31 
32 // user keywords
33 
34 enum{TEMP,PRESS};
35 
36 
37 
38 
39 /* ---------------------------------------------------------------------- */
40 
ComputeThermalGridKokkos(SPARTA * sparta,int narg,char ** arg)41 ComputeThermalGridKokkos::ComputeThermalGridKokkos(SPARTA *sparta, int narg, char **arg) :
42   ComputeThermalGrid(sparta, narg, arg)
43 {
44   kokkos_flag = 1;
45 }
46 
47 /* ---------------------------------------------------------------------- */
48 
~ComputeThermalGridKokkos()49 ComputeThermalGridKokkos::~ComputeThermalGridKokkos()
50 {
51   if (copymode) return;
52 
53   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
54   memoryKK->destroy_kokkos(k_tally,tally);
55   vector_grid = NULL;
56   tally = NULL;
57 }
58 
59 /* ---------------------------------------------------------------------- */
60 
compute_per_grid()61 void ComputeThermalGridKokkos::compute_per_grid()
62 {
63   if (sparta->kokkos->prewrap) {
64     ComputeThermalGrid::compute_per_grid();
65   } else {
66     compute_per_grid_kokkos();
67     k_tally.modify_device();
68     k_tally.sync_host();
69   }
70 }
71 
72 /* ---------------------------------------------------------------------- */
73 
compute_per_grid_kokkos()74 void ComputeThermalGridKokkos::compute_per_grid_kokkos()
75 {
76   invoked_per_grid = update->ntimestep;
77 
78   ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
79   particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
80   d_particles = particle_kk->k_particles.d_view;
81   d_species = particle_kk->k_species.d_view;
82 
83   GridKokkos* grid_kk = (GridKokkos*) grid;
84   d_cellcount = grid_kk->d_cellcount;
85   d_plist = grid_kk->d_plist;
86   grid_kk->sync(Device,CINFO_MASK);
87   d_cinfo = grid_kk->k_cinfo.d_view;
88 
89   d_s2g = particle_kk->k_species2group.d_view;
90   int nlocal = particle->nlocal;
91 
92   // zero all accumulators
93 
94   Kokkos::deep_copy(d_tally,0.0);
95 
96   // loop over all particles, skip species not in mixture group
97 
98   need_dup = sparta->kokkos->need_dup<DeviceType>();
99   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
100     need_dup = 0;
101 
102   if (need_dup)
103     dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
104   else
105     ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
106 
107   copymode = 1;
108   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
109     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_compute_per_grid>(0,nglocal),*this);
110   else {
111     if (sparta->kokkos->need_atomics)
112       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
113     else
114       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
115   }
116   DeviceType().fence();
117   copymode = 0;
118 
119   if (need_dup) {
120     Kokkos::Experimental::contribute(d_tally, dup_tally);
121     dup_tally = decltype(dup_tally)(); // free duplicated memory
122   }
123 
124   d_particles = t_particle_1d(); // destroy reference to reduce memory use
125   d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
126 }
127 
128 /* ---------------------------------------------------------------------- */
129 
130 template<int NEED_ATOMICS>
131 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeThermalGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const132 void ComputeThermalGridKokkos::operator()(TagComputeThermalGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
133 
134   // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
135 
136   auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
137   auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
138 
139   const int ispecies = d_particles[i].ispecies;
140   const int igroup = d_s2g(imix,ispecies);
141   if (igroup < 0) return;
142 
143   const int icell = d_particles[i].icell;
144   if (!(d_cinfo[icell].mask & groupbit)) return;
145 
146   const double mass = d_species[ispecies].mass;
147   double *v = d_particles[i].v;
148 
149   // 6 tallies per particle: N, Mass, mVx, mVy, mVz, mV^2
150 
151   int k = igroup*npergroup;
152 
153   a_tally(icell,k++) += 1.0;
154   a_tally(icell,k++) += mass;
155   a_tally(icell,k++) += mass*v[0];
156   a_tally(icell,k++) += mass*v[1];
157   a_tally(icell,k++) += mass*v[2];
158   a_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
159 }
160 
161 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeThermalGrid_compute_per_grid,const int & icell) const162 void ComputeThermalGridKokkos::operator()(TagComputeThermalGrid_compute_per_grid, const int &icell) const {
163   if (!(d_cinfo[icell].mask & groupbit)) return;
164 
165   const int np = d_cellcount[icell];
166 
167   for (int n = 0; n < np; n++) {
168 
169     const int i = d_plist(icell,n);
170 
171     const int ispecies = d_particles[i].ispecies;
172     const int igroup = d_s2g(imix,ispecies);
173   if (igroup < 0) return;
174 
175     const int icell = d_particles[i].icell;
176 
177     const double mass = d_species[ispecies].mass;
178     double *v = d_particles[i].v;
179 
180     // 6 tallies per particle: N, Mass, mVx, mVy, mVz, mV^2
181 
182 
183 
184     int k = igroup*npergroup;
185 
186     d_tally(icell,k++) += 1.0;
187     d_tally(icell,k++) += mass;
188     d_tally(icell,k++) += mass*v[0];
189     d_tally(icell,k++) += mass*v[1];
190     d_tally(icell,k++) += mass*v[2];
191     d_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
192   }
193 }
194 
195 /* ----------------------------------------------------------------------
196    query info about internal tally array for this compute
197    index = which column of output (0 for vec, 1 to N for array)
198    return # of tally quantities for this index
199    also return array = ptr to tally array
200    also return cols = ptr to list of columns in tally for this index
201 ------------------------------------------------------------------------- */
202 
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)203 int ComputeThermalGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
204 {
205   d_array = d_tally;
206   return 0;
207 }
208 
209 /* ----------------------------------------------------------------------
210    tally accumulated info to compute final normalized values
211    index = which column of output (0 for vec, 1 to N for array)
212    for etally = NULL:
213      use internal tallied info for single timestep, set nsample = 1
214      compute values for all grid cells
215        store results in vector_grid with nstride = 1 (single col of array_grid)
216    for etaylly = ptr to caller array:
217      use external tallied info for many timesteps
218      nsample = additional normalization factor used by some values
219      emap = list of etally columns to use, # of columns determined by index
220      store results in caller's vec, spaced by nstride
221    if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
222 ------------------------------------------------------------------------- */
223 
224 void ComputeThermalGridKokkos::
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)225 post_process_grid_kokkos(int index, int nsample,
226                   DAT::t_float_2d_lr d_etally, int *emap, DAT::t_float_1d_strided d_vec)
227 {
228   index--;
229   int ivalue = index % nvalue;
230 
231   int lo = 0;
232   int hi = nglocal;
233   //k = 0;
234 
235   if (!d_etally.data()) {
236     nsample = 1;
237     d_etally = d_tally;
238     emap = map[index];
239     d_vec = d_vector;
240     nstride = 1;
241   }
242 
243   this->d_etally = d_etally;
244   this->d_vec = d_vec;
245   this->nstride = nstride;
246   this->nsample = nsample;
247 
248   // compute normalized final value for each grid cell
249   // Vcm = Sum mv / Sum m
250   // total KE = 0.5 * Sum m(v - Vcm)^2
251   // KE = 0.5 * Sum(i=xyz) [(Sum mVi^2) - M Vcmx^2]
252   // KE = 0.5 * Sum(i=xyz) [(Sum mVi^2) - (Sum mVx)^2 / M]
253   // KE = 0.5 * Sum (mv^2) - [(Sum mVx)^2 + (Sum mVy)^2 + (Sum mVz)^2] / M
254   // thermal temp = (2/(3NkB)) * KE
255   // press = (2/(3V)) * KE
256 
257   tflag = 1;
258   if (value[ivalue] == PRESS) tflag = 0;
259 
260   if (value[ivalue] == TEMP) prefactor = tprefactor;
261   else if (value[ivalue] == PRESS) prefactor = pprefactor;
262 
263   GridKokkos* grid_kk = (GridKokkos*) grid;
264   grid_kk->sync(Device,CINFO_MASK);
265   d_cinfo = grid_kk->k_cinfo.d_view;
266 
267   n = emap[0];
268 
269   copymode = 1;
270   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_post_process_grid>(lo,hi),*this);
271   DeviceType().fence();
272   copymode = 0;
273 }
274 
275 /* ---------------------------------------------------------------------- */
276 
277 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeThermalGrid_post_process_grid,const int & icell) const278 void ComputeThermalGridKokkos::operator()(TagComputeThermalGrid_post_process_grid, const int &icell) const {
279   const double ncount = d_etally(icell,n);
280   if (ncount == 0.0) d_vec[icell] = 0.0;
281   else {
282     const double mass = d_etally(icell,n+1);
283     const double mvx = d_etally(icell,n+2);
284     const double mvy = d_etally(icell,n+3);
285     const double mvz = d_etally(icell,n+4);
286     const double mvsq = d_etally(icell,n+5);
287     d_vec[icell] = mvsq - (mvx*mvx + mvy*mvy + mvz*mvz)/mass;
288     d_vec[icell] *= prefactor;
289     if (tflag) d_vec[icell] /= ncount;
290     else d_vec[icell] *= d_cinfo[icell].weight / d_cinfo[icell].volume / nsample;
291   }
292 }
293 
294 /* ----------------------------------------------------------------------
295    reallocate arrays if nglocal has changed
296    called by init() and whenever grid changes
297 ------------------------------------------------------------------------- */
298 
reallocate()299 void ComputeThermalGridKokkos::reallocate()
300 {
301   if (grid->nlocal == nglocal) return;
302 
303   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
304   memoryKK->destroy_kokkos(k_tally,tally);
305   nglocal = grid->nlocal;
306   memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"thermal/grid:vector_grid");
307   d_vector = k_vector_grid.d_view;
308   memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"thermal/grid:tally");
309   d_tally = k_tally.d_view;
310 }
311