1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: Alan Stagg (Sandia)
17 ------------------------------------------------------------------------- */
18
19 #include "math.h"
20 #include "stdlib.h"
21 #include "string.h"
22 #include "compute_eflux_grid_kokkos.h"
23 #include "particle_kokkos.h"
24 #include "mixture.h"
25 #include "grid_kokkos.h"
26 #include "update.h"
27 #include "modify.h"
28 #include "comm.h"
29 #include "memory_kokkos.h"
30 #include "error.h"
31 #include "sparta_masks.h"
32 #include "kokkos.h"
33
34 using namespace SPARTA_NS;
35
36 // user keywords
37
38 enum{HEATX,HEATY,HEATZ};
39
40 // internal accumulators
41
42 enum{MASSSUM,MVX,MVY,MVZ,MVXVX,MVYVY,MVZVZ,MVXVY,MVYVZ,MVXVZ,
43 MVXVXVX,MVYVYVY,MVZVZVZ,
44 MVXVYVY,MVXVZVZ,MVYVXVX,MVYVZVZ,MVZVXVX,MVZVYVY,LASTSIZE};
45
46
47
48 /* ---------------------------------------------------------------------- */
49
ComputeEFluxGridKokkos(SPARTA * sparta,int narg,char ** arg)50 ComputeEFluxGridKokkos::ComputeEFluxGridKokkos(SPARTA *sparta, int narg, char **arg) :
51 ComputeEFluxGrid(sparta, narg, arg)
52 {
53 kokkos_flag = 1;
54
55 k_unique = DAT::tdual_int_1d("compute/eflux/grid:unique",npergroup);
56 for (int m = 0; m < npergroup; m++)
57 k_unique.h_view(m) = unique[m];
58 k_unique.modify_host();
59 k_unique.sync_device();
60 d_unique = k_unique.d_view;
61 }
62
63 /* ---------------------------------------------------------------------- */
64
~ComputeEFluxGridKokkos()65 ComputeEFluxGridKokkos::~ComputeEFluxGridKokkos()
66 {
67 if (copymode) return;
68
69 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
70 memoryKK->destroy_kokkos(k_tally,tally);
71 vector_grid = NULL;
72 tally = NULL;
73 }
74
75 /* ---------------------------------------------------------------------- */
76
compute_per_grid()77 void ComputeEFluxGridKokkos::compute_per_grid()
78 {
79 if (sparta->kokkos->prewrap) {
80 ComputeEFluxGrid::compute_per_grid();
81 } else {
82 compute_per_grid_kokkos();
83 k_tally.modify_device();
84 k_tally.sync_host();
85 }
86 }
87
88 /* ---------------------------------------------------------------------- */
89
compute_per_grid_kokkos()90 void ComputeEFluxGridKokkos::compute_per_grid_kokkos()
91 {
92 invoked_per_grid = update->ntimestep;
93
94 ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
95 particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
96 d_particles = particle_kk->k_particles.d_view;
97 d_species = particle_kk->k_species.d_view;
98
99 GridKokkos* grid_kk = (GridKokkos*) grid;
100 d_cellcount = grid_kk->d_cellcount;
101 d_plist = grid_kk->d_plist;
102 grid_kk->sync(Device,CINFO_MASK);
103 d_cinfo = grid_kk->k_cinfo.d_view;
104
105 d_s2g = particle_kk->k_species2group.d_view;
106 int nlocal = particle->nlocal;
107
108 // zero all accumulators
109
110 Kokkos::deep_copy(d_tally,0.0);
111
112 // loop over all particles, skip species not in mixture group
113
114 need_dup = sparta->kokkos->need_dup<DeviceType>();
115 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
116 need_dup = 0;
117
118 if (need_dup)
119 dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
120 else
121 ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
122
123 copymode = 1;
124 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
125 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_compute_per_grid>(0,nglocal),*this);
126 else {
127 if (sparta->kokkos->need_atomics)
128 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
129 else
130 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
131 }
132 DeviceType().fence();
133 copymode = 0;
134
135 if (need_dup) {
136 Kokkos::Experimental::contribute(d_tally, dup_tally);
137 dup_tally = decltype(dup_tally)(); // free duplicated memory
138 }
139
140 d_particles = t_particle_1d(); // destroy reference to reduce memory use
141 d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
142 }
143
144 /* ---------------------------------------------------------------------- */
145
146 template<int NEED_ATOMICS>
147 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeEFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const148 void ComputeEFluxGridKokkos::operator()(TagComputeEFluxGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
149
150 // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
151
152 auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
153 auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
154
155 const int ispecies = d_particles[i].ispecies;
156 const int igroup = d_s2g(imix,ispecies);
157 if (igroup < 0) return;
158
159 const int icell = d_particles[i].icell;
160 if (!(d_cinfo[icell].mask & groupbit)) return;
161
162 const double mass = d_species[ispecies].mass;
163 double *v = d_particles[i].v;
164
165 // loop has all possible values particle needs to accumulate
166 // subset defined by user values are indexed by accumulate vector
167
168 int k = igroup*npergroup;
169
170 for (int m = 0; m < npergroup; m++) {
171 switch (d_unique[m]) {
172 case MASSSUM:
173 a_tally(icell,k++) += mass;
174 break;
175 case MVX:
176 a_tally(icell,k++) += mass*v[0];
177 break;
178 case MVY:
179 a_tally(icell,k++) += mass*v[1];
180 break;
181 case MVZ:
182 a_tally(icell,k++) += mass*v[2];
183 break;
184 case MVXVX:
185 a_tally(icell,k++) += mass*v[0]*v[0];
186 break;
187 case MVYVY:
188 a_tally(icell,k++) += mass*v[1]*v[1];
189 break;
190 case MVZVZ:
191 a_tally(icell,k++) += mass*v[2]*v[2];
192 break;
193 case MVXVY:
194 a_tally(icell,k++) += mass*v[0]*v[1];
195 break;
196 case MVYVZ:
197 a_tally(icell,k++) += mass*v[1]*v[2];
198 break;
199 case MVXVZ:
200 a_tally(icell,k++) += mass*v[0]*v[2];
201 break;
202 case MVXVXVX:
203 a_tally(icell,k++) += mass*v[0]*v[0]*v[0];
204 break;
205 case MVYVYVY:
206 a_tally(icell,k++) += mass*v[1]*v[1]*v[1];
207 break;
208 case MVZVZVZ:
209 a_tally(icell,k++) += mass*v[2]*v[2]*v[2];
210 break;
211 case MVXVYVY:
212 a_tally(icell,k++) += mass*v[0]*v[1]*v[1];
213 break;
214 case MVXVZVZ:
215 a_tally(icell,k++) += mass*v[0]*v[2]*v[2];
216 break;
217 case MVYVXVX:
218 a_tally(icell,k++) += mass*v[1]*v[0]*v[0];
219 break;
220 case MVYVZVZ:
221 a_tally(icell,k++) += mass*v[1]*v[2]*v[2];
222 break;
223 case MVZVXVX:
224 a_tally(icell,k++) += mass*v[2]*v[0]*v[0];
225 break;
226 case MVZVYVY:
227 a_tally(icell,k++) += mass*v[2]*v[1]*v[1];
228 break;
229 }
230 }
231 }
232
233 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeEFluxGrid_compute_per_grid,const int & icell) const234 void ComputeEFluxGridKokkos::operator()(TagComputeEFluxGrid_compute_per_grid, const int &icell) const {
235 if (!(d_cinfo[icell].mask & groupbit)) return;
236
237 const int np = d_cellcount[icell];
238
239 for (int n = 0; n < np; n++) {
240
241 const int i = d_plist(icell,n);
242
243 const int ispecies = d_particles[i].ispecies;
244 const int igroup = d_s2g(imix,ispecies);
245 if (igroup < 0) return;
246
247 const double mass = d_species[ispecies].mass;
248 double *v = d_particles[i].v;
249
250 int k = igroup*npergroup;
251
252 for (int m = 0; m < npergroup; m++) {
253 switch (d_unique[m]) {
254 case MASSSUM:
255 d_tally(icell,k++) += mass;
256 break;
257 case MVX:
258 d_tally(icell,k++) += mass*v[0];
259 break;
260 case MVY:
261 d_tally(icell,k++) += mass*v[1];
262 break;
263 case MVZ:
264 d_tally(icell,k++) += mass*v[2];
265 break;
266 case MVXVX:
267 d_tally(icell,k++) += mass*v[0]*v[0];
268 break;
269 case MVYVY:
270 d_tally(icell,k++) += mass*v[1]*v[1];
271 break;
272 case MVZVZ:
273 d_tally(icell,k++) += mass*v[2]*v[2];
274 break;
275 case MVXVY:
276 d_tally(icell,k++) += mass*v[0]*v[1];
277 break;
278 case MVYVZ:
279 d_tally(icell,k++) += mass*v[1]*v[2];
280 break;
281 case MVXVZ:
282 d_tally(icell,k++) += mass*v[0]*v[2];
283 break;
284 case MVXVXVX:
285 d_tally(icell,k++) += mass*v[0]*v[0]*v[0];
286 break;
287 case MVYVYVY:
288 d_tally(icell,k++) += mass*v[1]*v[1]*v[1];
289 break;
290 case MVZVZVZ:
291 d_tally(icell,k++) += mass*v[2]*v[2]*v[2];
292 break;
293 case MVXVYVY:
294 d_tally(icell,k++) += mass*v[0]*v[1]*v[1];
295 break;
296 case MVXVZVZ:
297 d_tally(icell,k++) += mass*v[0]*v[2]*v[2];
298 break;
299 case MVYVXVX:
300 d_tally(icell,k++) += mass*v[1]*v[0]*v[0];
301 break;
302 case MVYVZVZ:
303 d_tally(icell,k++) += mass*v[1]*v[2]*v[2];
304 break;
305 case MVZVXVX:
306 d_tally(icell,k++) += mass*v[2]*v[0]*v[0];
307 break;
308 case MVZVYVY:
309 d_tally(icell,k++) += mass*v[2]*v[1]*v[1];
310 break;
311 }
312 }
313 }
314 }
315
316 /* ----------------------------------------------------------------------
317 query info about internal tally array for this compute
318 index = which column of output (0 for vec, 1 to N for array)
319 return # of tally quantities for this index
320 also return array = ptr to tally array
321 also return cols = ptr to list of columns in tally for this index
322 ------------------------------------------------------------------------- */
323
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)324 int ComputeEFluxGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
325 {
326 d_array = d_tally;
327 return 0;
328 }
329
330 /* ----------------------------------------------------------------------
331 tally accumulated info to compute final normalized values
332 index = which column of output (0 for vec, 1 to N for array)
333 for etally = NULL:
334 use internal tallied info for single timestep, set nsample = 1
335 if onecell = -1, compute values for all grid cells
336 store results in vector_grid with nstride = 1 (single col of array_grid)
337 if onecell >= 0, compute single value for onecell and return it
338 for etaylly = ptr to caller array:
339 use external tallied info for many timesteps
340 nsample = additional normalization factor used by some values
341 emap = list of etally columns to use, # of columns determined by index
342 store results in caller's vec, spaced by nstride
343 if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
344 ------------------------------------------------------------------------- */
345
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)346 void ComputeEFluxGridKokkos::post_process_grid_kokkos(int index, int nsample,
347 DAT::t_float_2d_lr d_etally, int *emap, DAT::t_float_1d_strided d_vec)
348 {
349 index--;
350
351 int lo = 0;
352 int hi = nglocal;
353
354 if (!d_etally.data()) {
355 nsample = 1;
356 d_etally = d_tally;
357 emap = map[index];
358 d_vec = d_vector;
359 nstride = 1;
360 }
361
362 this->d_etally = d_etally;
363 this->d_vec = d_vec;
364 this->nstride = nstride;
365 this->nsample = nsample;
366
367 // compute normalized final value for each grid cell
368 // Vcm = Sum mv / Sum m = (Wx,Wy,Wz)
369 // Wi = Sum mVi / M
370 // heati = 0.5 * F/V Sum m (Vi - Wi) (V - W)^2
371 // (Vi - Wi) (V - W)^2 = (Vi - Wi)
372 // [ (Vi - Wi)^2 + (V1 - W1)^2 + (V2 - W2)^2 ]
373 // i = xyz and 1,2 = xyx indices different than i
374 // heati = 3 terms = h+h1+h2 where h2 is same as h1 with V1 replaced by V2
375 // h = F/V Sum m (Vi-Wi)^3
376 // (Vi-Wi)^3 = Vi^3 - 3WiVi^2 + 3Wi^2Vi - Wi^3
377 // Sum m (Vi-Wi)^3 = Sum(mVi^3) - 3 Sum(mVi^2) Sum(mVi) / M +
378 // 2 Sum(mVi)^3 / M^2
379 // h1 = F/V Sum m (Vi-Wi) (V1-W1)^2
380 // (Vi-Wi) (V1-W1)^2 = ViV1^2 - 2ViV1W1 + ViW1^2 - WiV1^2 + 2V1WiW1 - WiW1^2
381 // 3 terms in previous equation combine to 1 term in next equation
382 // Sum m (Vi-Wi) (V1-W1)^2 = Sum(mViV1^2) - 2 Sum(mViV1) Sum(mV1) / M -
383 // Sum(mVi) Sum(mV1^2) / M +
384 // 2 Sum(mVi) Sum(mV1)^2 / M^2
385
386 GridKokkos* grid_kk = (GridKokkos*) grid;
387 grid_kk->sync(Device,CINFO_MASK);
388 d_cinfo = grid_kk->k_cinfo.d_view;
389 fnum = update->fnum;
390
391 mass = emap[0];
392 mv = emap[1];
393 mv1 = emap[2];
394 mv2 = emap[3];
395 mvv = emap[4];
396 mv1v1 = emap[5];
397 mv2v2 = emap[6];
398 mvv1 = emap[7];
399 mvv2 = emap[8];
400 mvvv = emap[9];
401 mvv1v1 = emap[10];
402 mvv2v2 = emap[11];
403
404 copymode = 1;
405 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeEFluxGrid_post_process_grid>(lo,hi),*this);
406 DeviceType().fence();
407 copymode = 0;
408 }
409
410 /* ---------------------------------------------------------------------- */
411
412 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeEFluxGrid_post_process_grid,const int & icell) const413 void ComputeEFluxGridKokkos::operator()(TagComputeEFluxGrid_post_process_grid, const int &icell) const {
414 double h, h1, h2, wt;
415 const double summass = d_etally(icell,mass);
416 if (summass == 0.0) d_vec[icell] = 0.0;
417 else{
418 h = d_etally(icell,mvvv) - 3.0*d_etally(icell,mv)*d_etally(icell,mvv)/summass +
419 2.0*d_etally(icell,mv)*d_etally(icell,mv)*d_etally(icell,mv)/summass/summass;
420 h1 = d_etally(icell,mvv1v1) - 2.0*d_etally(icell,mvv1)*d_etally(icell,mv1)/summass -
421 d_etally(icell,mv)*d_etally(icell,mv1v1)/summass +
422 2.0*d_etally(icell,mv)*d_etally(icell,mv1)*d_etally(icell,mv1)/summass/summass;
423 h2 = d_etally(icell,mvv2v2) - 2.0*d_etally(icell,mvv2)*d_etally(icell,mv2)/summass -
424 d_etally(icell,mv)*d_etally(icell,mv2v2)/summass +
425 2.0*d_etally(icell,mv)*d_etally(icell,mv2)*d_etally(icell,mv2)/summass/summass;
426 wt = 0.5 * fnum * d_cinfo[icell].weight / d_cinfo[icell].volume;
427 d_vec[icell] = wt/nsample * (h + h1 + h2);
428 }
429 }
430
431 /* ----------------------------------------------------------------------
432 reallocate arrays if nglocal has changed
433 called by init() and whenever grid changes
434 ------------------------------------------------------------------------- */
435
reallocate()436 void ComputeEFluxGridKokkos::reallocate()
437 {
438 if (grid->nlocal == nglocal) return;
439
440 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
441 memoryKK->destroy_kokkos(k_tally,tally);
442 nglocal = grid->nlocal;
443 memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"eflux/grid:vector_grid");
444 d_vector = k_vector_grid.d_view;
445 memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"eflux/grid:tally");
446 d_tally = k_tally.d_view;
447 }
448