1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: Alan Stagg (Sandia)
17 ------------------------------------------------------------------------- */
18
19 #include "math.h"
20 #include "stdlib.h"
21 #include "string.h"
22 #include "compute_pflux_grid_kokkos.h"
23 #include "particle_kokkos.h"
24 #include "mixture.h"
25 #include "grid_kokkos.h"
26 #include "update.h"
27 #include "modify.h"
28 #include "comm.h"
29 #include "memory_kokkos.h"
30 #include "error.h"
31 #include "sparta_masks.h"
32 #include "kokkos.h"
33
34 using namespace SPARTA_NS;
35
36 // user keywords
37
38 enum{MOMXX,MOMYY,MOMZZ,MOMXY,MOMYZ,MOMXZ};
39
40 // internal accumulators
41
42 enum{MASSSUM,MVX,MVY,MVZ,MVXSQ,MVYSQ,MVZSQ,MVXVY,MVYVZ,MVXVZ,LASTSIZE};
43
44
45 /* ---------------------------------------------------------------------- */
46
ComputePFluxGridKokkos(SPARTA * sparta,int narg,char ** arg)47 ComputePFluxGridKokkos::ComputePFluxGridKokkos(SPARTA *sparta, int narg, char **arg) :
48 ComputePFluxGrid(sparta, narg, arg)
49 {
50 kokkos_flag = 1;
51
52 k_unique = DAT::tdual_int_1d("compute/pflux/grid:unique",npergroup);
53 for (int m = 0; m < npergroup; m++)
54 k_unique.h_view(m) = unique[m];
55 k_unique.modify_host();
56 k_unique.sync_device();
57 d_unique = k_unique.d_view;
58 }
59
60 /* ---------------------------------------------------------------------- */
61
~ComputePFluxGridKokkos()62 ComputePFluxGridKokkos::~ComputePFluxGridKokkos()
63 {
64 if (copymode) return;
65
66 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
67 memoryKK->destroy_kokkos(k_tally,tally);
68 vector_grid = NULL;
69 tally = NULL;
70 }
71
72 /* ---------------------------------------------------------------------- */
73
compute_per_grid()74 void ComputePFluxGridKokkos::compute_per_grid()
75 {
76 if (sparta->kokkos->prewrap) {
77 ComputePFluxGrid::compute_per_grid();
78 } else {
79 compute_per_grid_kokkos();
80 k_tally.modify_device();
81 k_tally.sync_host();
82 }
83 }
84
85 /* ---------------------------------------------------------------------- */
86
compute_per_grid_kokkos()87 void ComputePFluxGridKokkos::compute_per_grid_kokkos()
88 {
89 invoked_per_grid = update->ntimestep;
90
91 ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
92 particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
93 d_particles = particle_kk->k_particles.d_view;
94 d_species = particle_kk->k_species.d_view;
95
96 GridKokkos* grid_kk = (GridKokkos*) grid;
97 d_cellcount = grid_kk->d_cellcount;
98 d_plist = grid_kk->d_plist;
99 grid_kk->sync(Device,CINFO_MASK);
100 d_cinfo = grid_kk->k_cinfo.d_view;
101
102 d_s2g = particle_kk->k_species2group.d_view;
103 int nlocal = particle->nlocal;
104
105 // zero all accumulators
106
107 Kokkos::deep_copy(d_tally,0.0);
108
109 // loop over all particles, skip species not in mixture group
110
111 need_dup = sparta->kokkos->need_dup<DeviceType>();
112 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
113 need_dup = 0;
114
115 if (need_dup)
116 dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
117 else
118 ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
119
120 copymode = 1;
121 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
122 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_compute_per_grid>(0,nglocal),*this);
123 else {
124 if (sparta->kokkos->need_atomics)
125 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
126 else
127 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
128 }
129 DeviceType().fence();
130 copymode = 0;
131
132 if (need_dup) {
133 Kokkos::Experimental::contribute(d_tally, dup_tally);
134 dup_tally = decltype(dup_tally)(); // free duplicated memory
135 }
136
137 d_particles = t_particle_1d(); // destroy reference to reduce memory use
138 d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
139 }
140
141 /* ---------------------------------------------------------------------- */
142
143 template<int NEED_ATOMICS>
144 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const145 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
146
147 // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
148
149 auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
150 auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
151
152 const int ispecies = d_particles[i].ispecies;
153 const int igroup = d_s2g(imix,ispecies);
154 if (igroup < 0) return;
155
156 const int icell = d_particles[i].icell;
157 if (!(d_cinfo[icell].mask & groupbit)) return;
158
159 const double mass = d_species[ispecies].mass;
160 double *v = d_particles[i].v;
161
162 // loop has all possible values particle needs to accumulate
163 // subset defined by user values are indexed by accumulate vector
164
165 int k = igroup*npergroup;
166
167 for (int m = 0; m < npergroup; m++) {
168 switch (d_unique[m]) {
169 case MASSSUM:
170 a_tally(icell,k++) += mass;
171 break;
172 case MVX:
173 a_tally(icell,k++) += mass*v[0];
174 break;
175 case MVY:
176 a_tally(icell,k++) += mass*v[1];
177 break;
178 case MVZ:
179 a_tally(icell,k++) += mass*v[2];
180 break;
181 case MVXSQ:
182 a_tally(icell,k++) += mass*v[0]*v[0];
183 break;
184 case MVYSQ:
185 a_tally(icell,k++) += mass*v[1]*v[1];
186 break;
187 case MVZSQ:
188 a_tally(icell,k++) += mass*v[2]*v[2];
189 break;
190 case MVXVY:
191 a_tally(icell,k++) += mass*v[0]*v[1];
192 break;
193 case MVYVZ:
194 a_tally(icell,k++) += mass*v[1]*v[2];
195 break;
196 case MVXVZ:
197 a_tally(icell,k++) += mass*v[0]*v[2];
198 break;
199 }
200 }
201 }
202
203 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_compute_per_grid,const int & icell) const204 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_compute_per_grid, const int &icell) const {
205 if (!(d_cinfo[icell].mask & groupbit)) return;
206
207 const int np = d_cellcount[icell];
208
209 for (int n = 0; n < np; n++) {
210
211 const int i = d_plist(icell,n);
212
213 const int ispecies = d_particles[i].ispecies;
214 const int igroup = d_s2g(imix,ispecies);
215 if (igroup < 0) return;
216
217 const double mass = d_species[ispecies].mass;
218 double *v = d_particles[i].v;
219
220 int k = igroup*npergroup;
221
222 for (int m = 0; m < npergroup; m++) {
223 switch (d_unique[m]) {
224 case MASSSUM:
225 d_tally(icell,k++) += mass;
226 break;
227 case MVX:
228 d_tally(icell,k++) += mass*v[0];
229 break;
230 case MVY:
231 d_tally(icell,k++) += mass*v[1];
232 break;
233 case MVZ:
234 d_tally(icell,k++) += mass*v[2];
235 break;
236 case MVXSQ:
237 d_tally(icell,k++) += mass*v[0]*v[0];
238 break;
239 case MVYSQ:
240 d_tally(icell,k++) += mass*v[1]*v[1];
241 break;
242 case MVZSQ:
243 d_tally(icell,k++) += mass*v[2]*v[2];
244 break;
245 case MVXVY:
246 d_tally(icell,k++) += mass*v[0]*v[1];
247 break;
248 case MVYVZ:
249 d_tally(icell,k++) += mass*v[1]*v[2];
250 break;
251 case MVXVZ:
252 d_tally(icell,k++) += mass*v[0]*v[2];
253 break;
254 }
255 }
256 }
257 }
258
259 /* ----------------------------------------------------------------------
260 query info about internal tally array for this compute
261 index = which column of output (0 for vec, 1 to N for array)
262 return # of tally quantities for this index
263 also return array = ptr to tally array
264 also return cols = ptr to list of columns in tally for this index
265 ------------------------------------------------------------------------- */
266
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)267 int ComputePFluxGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
268 {
269 d_array = d_tally;
270 return 0;
271 }
272
273 /* ----------------------------------------------------------------------
274 tally accumulated info to compute final normalized values
275 index = which column of output (0 for vec, 1 to N for array)
276 for etally = NULL:
277 use internal tallied info for single timestep
278 compute values for all grid cells
279 store results in vector_grid with nstride = 1 (single col of array_grid)
280 for etaylly = ptr to caller array:
281 use external tallied info for many timesteps
282 emap = list of etally columns to use, # of columns determined by index
283 store results in caller's vec, spaced by nstride
284 if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
285 ------------------------------------------------------------------------- */
286
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)287 void ComputePFluxGridKokkos::post_process_grid_kokkos(int index, int nsample,
288 DAT::t_float_2d_lr d_etally, int *emap, DAT::t_float_1d_strided d_vec)
289 {
290 index--;
291 int ivalue = index % nvalue;
292
293 int lo = 0;
294 int hi = nglocal;
295
296 if (!d_etally.data()) {
297 nsample = 1;
298 d_etally = d_tally;
299 emap = map[index];
300 d_vec = d_vector;
301 nstride = 1;
302 }
303
304 this->d_etally = d_etally;
305 this->d_vec = d_vec;
306 this->nstride = nstride;
307 this->nsample = nsample;
308
309 // compute normalized final value for each grid cell
310 // Vcm = Sum mv / Sum m = (Wx,Wy,Wz)
311 // Wi = Sum mVi / M
312 // momii = F/V Sum m(Vi - Wi)^2
313 // momii = F/V [ Sum mVi^2 + M Wi^2 - 2 Wi Sum mVi ]
314 // momii = F/V [ Sum mVi^2 - (Sum mVi)^2 / M ]
315 // momij = F/V Sum m(Vi - Wi)(Vj - Wj)
316 // momij = F/V [ Sum mViVj + M Wi Wj - Wj Sum mVi - Wi Sum mVj ]
317 // momij = F/V [ Sum mViVj - Sum mVi Sum mVj / M ]
318
319 GridKokkos* grid_kk = (GridKokkos*) grid;
320 grid_kk->sync(Device,CINFO_MASK);
321 d_cinfo = grid_kk->k_cinfo.d_view;
322 fnum = update->fnum;
323
324 mass = emap[0];
325 switch (value[ivalue]) {
326
327 case MOMXX:
328 case MOMYY:
329 case MOMZZ:
330 {
331 mv = emap[1];
332 mvv = emap[2];
333 copymode = 1;
334 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_post_process_grid_diag>(lo,hi),*this);
335 DeviceType().fence();
336 copymode = 0;
337 break;
338 }
339 case MOMXY:
340 case MOMYZ:
341 case MOMXZ:
342 {
343 mv1 = emap[1];
344 mv2 = emap[2];
345 mvv = emap[3];
346 copymode = 1;
347 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputePFluxGrid_post_process_grid_offdiag>(lo,hi),*this);
348 DeviceType().fence();
349 copymode = 0;
350 break;
351 }
352 } // end switch
353 }
354
355 /* ---------------------------------------------------------------------- */
356
357 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_post_process_grid_diag,const int & icell) const358 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_post_process_grid_diag, const int &icell) const {
359 double summass, summv, wt;
360 summass = d_etally(icell,mass);
361 if (summass == 0.0) d_vec[icell] = 0.0;
362 else{
363 wt = fnum * d_cinfo[icell].weight / d_cinfo[icell].volume;
364 summv = d_etally(icell,mv);
365 d_vec[icell] = wt/nsample * (d_etally(icell,mvv) - summv*summv/summass);
366 }
367 }
368
369 /* ---------------------------------------------------------------------- */
370
371 KOKKOS_INLINE_FUNCTION
operator ()(TagComputePFluxGrid_post_process_grid_offdiag,const int & icell) const372 void ComputePFluxGridKokkos::operator()(TagComputePFluxGrid_post_process_grid_offdiag, const int &icell) const {
373 double summass, wt;
374 summass = d_etally(icell,mass);
375 if (summass == 0.0) d_vec[icell] = 0.0;
376 else{
377 wt = fnum * d_cinfo[icell].weight / d_cinfo[icell].volume;
378 d_vec[icell] = wt/nsample * (d_etally(icell,mvv) -
379 d_etally(icell,mv1)*d_etally(icell,mv2)/summass);
380 }
381 }
382
383 /* ----------------------------------------------------------------------
384 reallocate arrays if nglocal has changed
385 called by init() and whenever grid changes
386 ------------------------------------------------------------------------- */
387
reallocate()388 void ComputePFluxGridKokkos::reallocate()
389 {
390 if (grid->nlocal == nglocal) return;
391
392 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
393 memoryKK->destroy_kokkos(k_tally,tally);
394 nglocal = grid->nlocal;
395 memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"pflux/grid:vector_grid");
396 d_vector = k_vector_grid.d_view;
397 memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"pflux/grid:tally");
398 d_tally = k_tally.d_view;
399 }
400