1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 #include "string.h"
16 #include "compute_grid_kokkos.h"
17 #include "particle_kokkos.h"
18 #include "mixture.h"
19 #include "grid_kokkos.h"
20 #include "update.h"
21 #include "modify.h"
22 #include "memory_kokkos.h"
23 #include "error.h"
24 #include "sparta_masks.h"
25 #include "kokkos.h"
26
27 using namespace SPARTA_NS;
28
29 // user keywords
30
31 enum{NUM,NRHO,NFRAC,MASS,MASSRHO,MASSFRAC,
32 U,V,W,USQ,VSQ,WSQ,KE,TEMPERATURE,EROT,TROT,EVIB,TVIB,
33 PXRHO,PYRHO,PZRHO,KERHO};
34
35 // internal accumulators
36
37 enum{COUNT,MASSSUM,MVX,MVY,MVZ,MVXSQ,MVYSQ,MVZSQ,MVSQ,
38 ENGROT,ENGVIB,DOFROT,DOFVIB,CELLCOUNT,CELLMASS,LASTSIZE};
39
40 // max # of quantities to accumulate for any user value
41
42 #define MAXACCUMULATE 2
43
44 /* ---------------------------------------------------------------------- */
45
ComputeGridKokkos(SPARTA * sparta,int narg,char ** arg)46 ComputeGridKokkos::ComputeGridKokkos(SPARTA *sparta, int narg, char **arg) :
47 ComputeGrid(sparta, narg, arg)
48 {
49 kokkos_flag = 1;
50
51 k_unique = DAT::tdual_int_1d("compute/grid:unique",npergroup);
52 for (int m = 0; m < npergroup; m++)
53 k_unique.h_view(m) = unique[m];
54 k_unique.modify_host();
55 k_unique.sync_device();
56 d_unique = k_unique.d_view;
57 }
58
59 /* ---------------------------------------------------------------------- */
60
~ComputeGridKokkos()61 ComputeGridKokkos::~ComputeGridKokkos()
62 {
63 if (copymode) return;
64
65 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
66 memoryKK->destroy_kokkos(k_tally,tally);
67 vector_grid = NULL;
68 tally = NULL;
69 }
70
71 /* ---------------------------------------------------------------------- */
72
compute_per_grid()73 void ComputeGridKokkos::compute_per_grid()
74 {
75 if (sparta->kokkos->prewrap) {
76 ComputeGrid::compute_per_grid();
77 } else {
78 compute_per_grid_kokkos();
79 k_tally.modify_device();
80 k_tally.sync_host();
81 }
82 }
83
84 /* ---------------------------------------------------------------------- */
85
compute_per_grid_kokkos()86 void ComputeGridKokkos::compute_per_grid_kokkos()
87 {
88 invoked_per_grid = update->ntimestep;
89
90 ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
91 particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
92 d_particles = particle_kk->k_particles.d_view;
93 d_species = particle_kk->k_species.d_view;
94
95 GridKokkos* grid_kk = (GridKokkos*) grid;
96 d_cellcount = grid_kk->d_cellcount;
97 d_plist = grid_kk->d_plist;
98 grid_kk->sync(Device,CINFO_MASK);
99 d_cinfo = grid_kk->k_cinfo.d_view;
100
101 d_s2g = particle_kk->k_species2group.d_view;
102 int nlocal = particle->nlocal;
103
104 // zero all accumulators
105
106 Kokkos::deep_copy(d_tally,0.0);
107
108 // loop over all particles, skip species not in mixture group
109 // skip cells not in grid group
110 // perform all tallies needed for each particle
111 // depends on its species group and the user-requested values
112
113 need_dup = sparta->kokkos->need_dup<DeviceType>();
114 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
115 need_dup = 0;
116
117 if (need_dup)
118 dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
119 else
120 ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
121
122 copymode = 1;
123 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
124 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_compute_per_grid>(0,nglocal),*this);
125 else {
126 if (sparta->kokkos->need_atomics)
127 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
128 else
129 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
130 }
131 DeviceType().fence();
132 copymode = 0;
133
134 if (need_dup) {
135 Kokkos::Experimental::contribute(d_tally, dup_tally);
136 dup_tally = decltype(dup_tally)(); // free duplicated memory
137 }
138
139 d_particles = t_particle_1d(); // destroy reference to reduce memory use
140 d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
141 }
142
143 /* ---------------------------------------------------------------------- */
144
145 template<int NEED_ATOMICS>
146 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const147 void ComputeGridKokkos::operator()(TagComputeGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
148
149 // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
150
151 auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
152 auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
153
154 const int ispecies = d_particles[i].ispecies;
155 const int igroup = d_s2g(imix,ispecies);
156 if (igroup < 0) return;
157
158 const int icell = d_particles[i].icell;
159 if (!(d_cinfo[icell].mask & groupbit)) return;
160
161 const double mass = d_species[ispecies].mass;
162 double* v = d_particles[i].v;
163
164 if (cellmass) a_tally(icell,cellmass) += mass;
165 if (cellcount) a_tally(icell,cellcount) += 1.0;
166
167 // loop has all possible values particle needs to accumulate
168 // subset defined by user values are indexed by accumulate vector
169
170 int k = igroup*npergroup;
171
172 for (int m = 0; m < npergroup; m++) {
173 switch (d_unique[m]) {
174 case COUNT:
175 a_tally(icell,k++) += 1.0;
176 break;
177 case MASSSUM:
178 a_tally(icell,k++) += mass;
179 break;
180 case MVX:
181 a_tally(icell,k++) += mass*v[0];
182 break;
183 case MVY:
184 a_tally(icell,k++) += mass*v[1];
185 break;
186 case MVZ:
187 a_tally(icell,k++) += mass*v[2];
188 break;
189 case MVXSQ:
190 a_tally(icell,k++) += mass*v[0]*v[0];
191 break;
192 case MVYSQ:
193 a_tally(icell,k++) += mass*v[1]*v[1];
194 break;
195 case MVZSQ:
196 a_tally(icell,k++) += mass*v[2]*v[2];
197 break;
198 case MVSQ:
199 a_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
200 break;
201 case ENGROT:
202 a_tally(icell,k++) += d_particles[i].erot;
203 break;
204 case ENGVIB:
205 a_tally(icell,k++) += d_particles[i].evib;
206 break;
207 case DOFROT:
208 a_tally(icell,k++) += d_species[ispecies].rotdof;
209 break;
210 case DOFVIB:
211 a_tally(icell,k++) += d_species[ispecies].vibdof;
212 break;
213 }
214 }
215 }
216
217 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_compute_per_grid,const int & icell) const218 void ComputeGridKokkos::operator()(TagComputeGrid_compute_per_grid, const int &icell) const {
219 if (!(d_cinfo[icell].mask & groupbit)) return;
220
221 const int np = d_cellcount[icell];
222
223 for (int n = 0; n < np; n++) {
224
225 const int i = d_plist(icell,n);
226
227 const int ispecies = d_particles[i].ispecies;
228 const int igroup = d_s2g(imix,ispecies);
229 if (igroup < 0) return;
230
231 const double mass = d_species[ispecies].mass;
232 double* v = d_particles[i].v;
233
234 if (cellmass) d_tally(icell,cellmass) += mass;
235 if (cellcount) d_tally(icell,cellcount) += 1.0;
236
237 // loop has all possible values particle needs to accumulate
238 // subset defined by user values are indexed by accumulate vector
239
240 int k = igroup*npergroup;
241
242 for (int m = 0; m < npergroup; m++) {
243 switch (d_unique[m]) {
244 case COUNT:
245 d_tally(icell,k++) += 1.0;
246 break;
247 case MASSSUM:
248 d_tally(icell,k++) += mass;
249 break;
250 case MVX:
251 d_tally(icell,k++) += mass*v[0];
252 break;
253 case MVY:
254 d_tally(icell,k++) += mass*v[1];
255 break;
256 case MVZ:
257 d_tally(icell,k++) += mass*v[2];
258 break;
259 case MVXSQ:
260 d_tally(icell,k++) += mass*v[0]*v[0];
261 break;
262 case MVYSQ:
263 d_tally(icell,k++) += mass*v[1]*v[1];
264 break;
265 case MVZSQ:
266 d_tally(icell,k++) += mass*v[2]*v[2];
267 break;
268 case MVSQ:
269 d_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
270 break;
271 case ENGROT:
272 d_tally(icell,k++) += d_particles[i].erot;
273 break;
274 case ENGVIB:
275 d_tally(icell,k++) += d_particles[i].evib;
276 break;
277 case DOFROT:
278 d_tally(icell,k++) += d_species[ispecies].rotdof;
279 break;
280 case DOFVIB:
281 d_tally(icell,k++) += d_species[ispecies].vibdof;
282 break;
283 }
284 }
285 }
286 }
287
288 /* ----------------------------------------------------------------------
289 query info about internal tally array for this compute
290 index = which column of output (0 for vec, 1 to N for array)
291 return # of tally quantities for this index
292 also return array = ptr to tally array
293 also return cols = ptr to list of columns in tally for this index
294 ------------------------------------------------------------------------- */
295
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)296 int ComputeGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
297 {
298 d_array = d_tally;
299 return 0;
300 }
301
302 /* ----------------------------------------------------------------------
303 tally accumulated info to compute final normalized values
304 index = which column of output (0 for vec, 1 to N for array)
305 for etally = NULL:
306 use internal tallied info for single timestep, set nsample = 1
307 if onecell = -1, compute values for all grid cells
308 store results in vector_grid with nstride = 1 (single col of array_grid)
309 if onecell >= 0, compute single value for onecell and return it
310 for etally = ptr to caller array:
311 use external tallied info for many timesteps
312 nsample = additional normalization factor used by some values
313 emap = list of etally columns to use, # of columns determined by index
314 store results in caller's vec, spaced by nstride
315 if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
316 ------------------------------------------------------------------------- */
317
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)318 void ComputeGridKokkos::post_process_grid_kokkos(int index, int nsample,
319 DAT::t_float_2d_lr d_etally, int *emap,
320 DAT::t_float_1d_strided d_vec)
321 {
322 index--;
323 int ivalue = index % nvalue;
324
325 int lo = 0;
326 int hi = nglocal;
327
328 if (!d_etally.data()) {
329 nsample = 1;
330 d_etally = d_tally;
331 emap = map[index];
332 d_vec = d_vector;
333 nstride = 1;
334 }
335
336 this->nsample = nsample;
337 this->d_etally = d_etally;
338 this->d_vec = d_vec;
339 this->nstride = nstride;
340
341 GridKokkos* grid_kk = (GridKokkos*) grid;
342 grid_kk->sync(Device,CINFO_MASK);
343 d_cinfo = grid_kk->k_cinfo.d_view;
344
345 fnum = update->fnum;
346
347 copymode = 1;
348
349 // compute normalized final value for each grid cell
350
351 switch (value[ivalue]) {
352
353 case NUM:
354 {
355 count = emap[0];
356 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_NUM>(lo,hi),*this);
357 DeviceType().fence();
358 break;
359 }
360
361 case MASS:
362 {
363 mass = emap[0];
364 count = emap[1];
365 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_MASS>(lo,hi),*this);
366 DeviceType().fence();
367 break;
368 }
369
370 case NRHO:
371 {
372 count = emap[0];
373 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_NRHO>(lo,hi),*this);
374 DeviceType().fence();
375 break;
376 }
377
378 case MASSRHO:
379 {
380 mass = emap[0];
381 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_MASSRHO>(lo,hi),*this);
382 DeviceType().fence();
383 break;
384 }
385
386 case NFRAC:
387 case MASSFRAC:
388 {
389 count_or_mass = emap[0];
390 cell_count_or_mass = emap[1];
391 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_NFRAC>(lo,hi),*this);
392 DeviceType().fence();
393 break;
394 }
395
396 case U:
397 case V:
398 case W:
399 case USQ:
400 case VSQ:
401 case WSQ:
402 {
403 velocity = emap[0];
404 mass = emap[1];
405 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_U>(lo,hi),*this);
406 DeviceType().fence();
407 break;
408 }
409
410 case KE:
411 {
412 mvsq = emap[0];
413 count = emap[1];
414 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_KE>(lo,hi),*this);
415 DeviceType().fence();
416 break;
417 }
418
419 case TEMPERATURE:
420 {
421 mvsq = emap[0];
422 count = emap[1];
423 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_TEMPERATURE>(lo,hi),*this);
424 DeviceType().fence();
425 break;
426 }
427
428 case EROT:
429 case EVIB:
430 {
431 eng = emap[0];
432 count = emap[1];
433 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_EROT>(lo,hi),*this);
434 DeviceType().fence();
435 break;
436 }
437
438 case TROT:
439 case TVIB:
440 {
441 eng = emap[0];
442 dof = emap[1];
443 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_TROT>(lo,hi),*this);
444 DeviceType().fence();
445 break;
446 }
447
448 case PXRHO:
449 case PYRHO:
450 case PZRHO:
451 {
452 mom = emap[0];
453 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_PXRHO>(lo,hi),*this);
454 DeviceType().fence();
455 break;
456 }
457
458 case KERHO:
459 {
460 ke = emap[0];
461 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeGrid_KERHO>(lo,hi),*this);
462 DeviceType().fence();
463 break;
464 }
465 }
466 copymode = 0;
467 }
468
469 /* ---------------------------------------------------------------------- */
470
471 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_NUM,const int & icell) const472 void ComputeGridKokkos::operator()(TagComputeGrid_NUM, const int &icell) const {
473 d_vec[icell] = d_etally(icell,count) / nsample;
474 }
475
476 /* ---------------------------------------------------------------------- */
477
478 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_MASS,const int & icell) const479 void ComputeGridKokkos::operator()(TagComputeGrid_MASS, const int &icell) const {
480 const double norm = d_etally(icell,count);
481 if (norm == 0.0) d_vec[icell] = 0.0;
482 else d_vec[icell] = d_etally(icell,mass) / norm;
483 }
484
485 /* ---------------------------------------------------------------------- */
486
487 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_NRHO,const int & icell) const488 void ComputeGridKokkos::operator()(TagComputeGrid_NRHO, const int &icell) const {
489 const double norm = d_cinfo[icell].volume;
490 if (norm == 0.0) d_vec[icell] = 0.0;
491 else {
492 const double wt = fnum * d_cinfo[icell].weight / norm;
493 d_vec[icell] = wt * d_etally(icell,count) / nsample;
494 }
495 }
496
497 /* ---------------------------------------------------------------------- */
498
499 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_MASSRHO,const int & icell) const500 void ComputeGridKokkos::operator()(TagComputeGrid_MASSRHO, const int &icell) const {
501 const double norm = d_cinfo[icell].volume;
502 if (norm == 0.0) d_vec[icell] = 0.0;
503 else {
504 const double wt = fnum * d_cinfo[icell].weight / norm;
505 d_vec[icell] = wt * d_etally(icell,mass) / nsample;
506 }
507 }
508
509 /* ---------------------------------------------------------------------- */
510
511 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_NFRAC,const int & icell) const512 void ComputeGridKokkos::operator()(TagComputeGrid_NFRAC, const int &icell) const {
513 const double norm = d_etally(icell,cell_count_or_mass);
514 if (norm == 0.0) d_vec[icell] = 0.0;
515 else d_vec[icell] = d_etally(icell,count_or_mass) / norm;
516 }
517
518 /* ---------------------------------------------------------------------- */
519
520 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_U,const int & icell) const521 void ComputeGridKokkos::operator()(TagComputeGrid_U, const int &icell) const {
522 const double norm = d_etally(icell,mass);
523 if (norm == 0.0) d_vec[icell] = 0.0;
524 else d_vec[icell] = d_etally(icell,velocity) / norm;
525 }
526
527 /* ---------------------------------------------------------------------- */
528
529 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_KE,const int & icell) const530 void ComputeGridKokkos::operator()(TagComputeGrid_KE, const int &icell) const {
531 const double norm = d_etally(icell,count);
532 if (norm == 0.0) d_vec[icell] = 0.0;
533 else d_vec[icell] = eprefactor * d_etally(icell,mvsq) / norm;
534 }
535
536 /* ---------------------------------------------------------------------- */
537
538 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_TEMPERATURE,const int & icell) const539 void ComputeGridKokkos::operator()(TagComputeGrid_TEMPERATURE, const int &icell) const {
540 const double norm = d_etally(icell,count);
541 if (norm == 0.0) d_vec[icell] = 0.0;
542 else d_vec[icell] = tprefactor * d_etally(icell,mvsq) / norm;
543 }
544
545 /* ---------------------------------------------------------------------- */
546
547 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_EROT,const int & icell) const548 void ComputeGridKokkos::operator()(TagComputeGrid_EROT, const int &icell) const {
549 const double norm = d_etally(icell,count);
550 if (norm == 0.0) d_vec[icell] = 0.0;
551 else d_vec[icell] = d_etally(icell,eng) / norm;
552 }
553
554 /* ---------------------------------------------------------------------- */
555
556 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_TROT,const int & icell) const557 void ComputeGridKokkos::operator()(TagComputeGrid_TROT, const int &icell) const {
558 const double norm = d_etally(icell,dof);
559 if (norm == 0.0) d_vec[icell] = 0.0;
560 else d_vec[icell] = rvprefactor * d_etally(icell,eng) / norm;
561 }
562
563 /* ---------------------------------------------------------------------- */
564
565 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_PXRHO,const int & icell) const566 void ComputeGridKokkos::operator()(TagComputeGrid_PXRHO, const int &icell) const {
567 const double norm = d_cinfo[icell].volume;
568 if (norm == 0.0) d_vec[icell] = 0.0;
569 else {
570 const double wt = fnum * d_cinfo[icell].weight / norm;
571 d_vec[icell] = wt * d_etally(icell,mom) / nsample;
572 }
573 }
574
575 /* ---------------------------------------------------------------------- */
576
577 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeGrid_KERHO,const int & icell) const578 void ComputeGridKokkos::operator()(TagComputeGrid_KERHO, const int &icell) const {
579 const double norm = d_cinfo[icell].volume;
580 if (norm == 0.0) d_vec[icell] = 0.0;
581 else {
582 const double wt = fnum * d_cinfo[icell].weight / norm;
583 d_vec[icell] = eprefactor * wt * d_etally(icell,ke) / nsample;
584 }
585 }
586
587 /* ----------------------------------------------------------------------
588 reallocate data storage if nglocal has changed
589 called by init() and whenever grid changes
590 ------------------------------------------------------------------------- */
591
reallocate()592 void ComputeGridKokkos::reallocate()
593 {
594 if (grid->nlocal == nglocal) return;
595
596 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
597 memoryKK->destroy_kokkos(k_tally,tally);
598 nglocal = grid->nlocal;
599 memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"compute_grid:vector_grid");
600 d_vector = k_vector_grid.d_view;
601 memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"compute_grid:tally");
602 d_tally = k_tally.d_view;
603 }
604