1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 #include "stdlib.h"
16 #include "string.h"
17 #include "fix_emit_face_kokkos.h"
18 #include "update.h"
19 #include "domain.h"
20 #include "region.h"
21 #include "grid.h"
22 #include "surf.h"
23 #include "particle.h"
24 #include "mixture.h"
25 #include "comm.h"
26 #include "modify.h"
27 #include "geometry.h"
28 #include "input.h"
29 #include "random_knuth.h"
30 #include "math_const.h"
31 #include "memory_kokkos.h"
32 #include "error.h"
33 #include "kokkos_type.h"
34 #include "particle_kokkos.h"
35 #include "sparta_masks.h"
36 #include "Kokkos_Random.hpp"
37
38 using namespace SPARTA_NS;
39 using namespace MathConst;
40
41 enum{XLO,XHI,YLO,YHI,ZLO,ZHI,INTERIOR}; // same as Domain
42 enum{PERIODIC,OUTFLOW,REFLECT,SURFACE,AXISYM}; // same as Domain
43 enum{UNKNOWN,OUTSIDE,INSIDE,OVERLAP}; // same as Grid
44 enum{PKEEP,PINSERT,PDONE,PDISCARD,PENTRY,PEXIT,PSURF}; // several files
45 enum{NCHILD,NPARENT,NUNKNOWN,NPBCHILD,NPBPARENT,NPBUNKNOWN,NBOUND}; // Grid
46 enum{NOSUBSONIC,PTBOTH,PONLY};
47
48 #define DELTATASK 256
49 #define TEMPLIMIT 1.0e5
50
51 /* ----------------------------------------------------------------------
52 insert particles in grid cells with faces touching inflow boundaries
53 ------------------------------------------------------------------------- */
54
FixEmitFaceKokkos(SPARTA * sparta,int narg,char ** arg)55 FixEmitFaceKokkos::FixEmitFaceKokkos(SPARTA *sparta, int narg, char **arg) :
56 FixEmitFace(sparta, narg, arg),
57 rand_pool(12345 + comm->me
58 #ifdef SPARTA_KOKKOS_EXACT
59 , sparta
60 #endif
61 ),
62 particle_kk_copy(sparta)
63 {
64 kokkos_flag = 1;
65 execution_space = Device;
66 datamask_read = EMPTY_MASK;
67 datamask_modify = EMPTY_MASK;
68 }
69
70 /* ---------------------------------------------------------------------- */
71
~FixEmitFaceKokkos()72 FixEmitFaceKokkos::~FixEmitFaceKokkos()
73 {
74 if (copymode) return;
75
76 particle_kk_copy.uncopy();
77
78 #ifdef SPARTA_KOKKOS_EXACT
79 rand_pool.destroy();
80 #endif
81
82 for (int i = 0; i < ntaskmax; i++) {
83 tasks[i].ntargetsp = NULL;
84 tasks[i].vscale = NULL;
85 }
86
87 tasks = NULL;
88 }
89
90 /* ---------------------------------------------------------------------- */
91
init()92 void FixEmitFaceKokkos::init()
93 {
94 k_tasks.sync_host();
95 if (perspecies) k_ntargetsp.sync_host();
96 if (subsonic_style == PONLY) k_vscale.sync_host();
97
98 FixEmitFace::init();
99
100 k_tasks.modify_host();
101 if (perspecies) k_ntargetsp.modify_host();
102 if (subsonic_style == PONLY) k_vscale.modify_host();
103
104 #ifdef SPARTA_KOKKOS_EXACT
105 rand_pool.init(random);
106 #endif
107
108 k_mix_vscale = DAT::tdual_float_1d("mix_vscale", nspecies);
109 k_cummulative = DAT::tdual_float_1d("cummulative", nspecies);
110 k_species = DAT::tdual_int_1d("species", nspecies);
111
112 d_mix_vscale = k_mix_vscale .d_view;
113 d_cummulative = k_cummulative.d_view;
114 d_species = k_species .d_view;
115
116 auto h_mix_vscale = k_mix_vscale .h_view;
117 auto h_cummulative = k_cummulative.h_view;
118 auto h_species = k_species .h_view;
119
120 for (int isp = 0; isp < nspecies; ++isp) {
121 h_mix_vscale(isp) = particle->mixture[imix]->vscale[isp];
122 h_cummulative(isp) = particle->mixture[imix]->cummulative[isp];
123 h_species(isp) = particle->mixture[imix]->species[isp];
124 }
125
126 k_mix_vscale .modify_host();
127 k_cummulative.modify_host();
128 k_species .modify_host();
129 }
130
131 /* ----------------------------------------------------------------------
132 create tasks for one grid cell
133 add them to tasks list and increment ntasks
134 ------------------------------------------------------------------------- */
135
create_task(int icell)136 void FixEmitFaceKokkos::create_task(int icell)
137 {
138 FixEmitFace::create_task(icell);
139 k_tasks.modify_host();
140 k_ntargetsp.modify_host();
141 k_vscale.modify_host();
142 }
143
144 /* ---------------------------------------------------------------------- */
145
perform_task()146 void FixEmitFaceKokkos::perform_task()
147 {
148 dt = update->dt;
149 auto l_dimension = this->dimension;
150 auto l_subsonic_style = this->subsonic_style;
151
152 // if subsonic, re-compute particle inflow counts for each task
153 // also computes current per-task temp_thermal and vstream
154
155 if (subsonic)
156 error->one(FLERR,"Cannot yet use fix emit/face/kk with subsonic emission");
157 //if (subsonic) subsonic_inflow(); ////////////////////////
158
159 // insert particles for each task = cell/face pair
160 // ntarget/ninsert is either perspecies or for all species
161
162 // copy needed task data to device
163
164 if (perspecies) k_ntargetsp.sync_device();
165 else k_tasks.sync_device();
166
167 auto ninsert_dim1 = perspecies ? nspecies : 1;
168 if (d_ninsert.extent(0) < ntask * ninsert_dim1)
169 d_ninsert = DAT::t_int_1d("ninsert", ntask * ninsert_dim1);
170
171 copymode = 1;
172 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEmitFace_ninsert>(0,ntask),*this);
173 copymode = 0;
174
175 int ncands;
176 d_task2cand = offset_scan(d_ninsert, ncands);
177
178 if (ncands == 0) return;
179
180 // for one particle:
181 // x = random position on face
182 // v = randomized thermal velocity + vstream
183 // first stage: normal dimension (ndim)
184 // second stage: parallel dimensions (pdim,qdim)
185
186 // double while loop until randomized particle velocity meets 2 criteria
187 // inner do-while loop:
188 // v = vstream-component + vthermal is into simulation box
189 // see Bird 1994, p 425
190 // outer do-while loop:
191 // shift Maxwellian distribution by stream velocity component
192 // see Bird 1994, p 259, eq 12.5
193
194 if (d_x.extent(0) < ncands || d_x.extent(1) < l_dimension)
195 d_x = DAT::t_float_2d("x", ncands, l_dimension);
196
197 if (d_task.extent(0) < ncands) {
198 d_beta_un = DAT::t_float_1d("beta_un", ncands);
199 d_theta = DAT::t_float_1d("theta", ncands);
200 d_vr = DAT::t_float_1d("vr", ncands);
201 d_erot = DAT::t_float_1d("erot", ncands);
202 d_evib = DAT::t_float_1d("evib", ncands);
203 d_dtremain = DAT::t_float_1d("dtremain", ncands);
204 d_id = DAT::t_int_1d("id", ncands);
205 d_isp = DAT::t_int_1d("isp", ncands);
206 d_task = DAT::t_int_1d("task", ncands);
207 d_keep = DAT::t_int_1d("keep", ncands);
208 }
209 Kokkos::deep_copy(d_keep,0); // needs to be initialized with zeros
210
211 auto ld_x = d_x ;
212 auto ld_beta_un = d_beta_un ;
213 auto ld_theta = d_theta ;
214 auto ld_vr = d_vr ;
215 auto ld_erot = d_erot ;
216 auto ld_evib = d_evib ;
217 auto ld_dtremain = d_dtremain;
218 auto ld_id = d_id ;
219 auto ld_isp = d_isp ;
220 auto ld_task = d_task ;
221 auto ld_keep = d_keep ;
222
223 // copy needed task data to device
224
225 k_tasks.sync_device();
226 if (perspecies) k_ntargetsp.sync_device();
227 if (subsonic_style == PONLY) k_vscale.sync_device();
228 auto ld_tasks = d_tasks;
229 auto ld_vscale = d_vscale;
230
231 // copy needed mixture data to device
232
233 k_mix_vscale .sync_device();
234 k_species .sync_device();
235 k_cummulative.sync_device();
236
237 auto ld_mix_vscale = d_mix_vscale;
238 auto ld_species = d_species ;
239
240 ParticleKokkos* particle_kk = ((ParticleKokkos*)particle);
241 particle_kk->update_class_variables();
242 particle_kk_copy.copy(particle_kk);
243
244 if (region)
245 error->one(FLERR,"Cannot yet use fix emit/face/kk with regions");
246
247 int nsingle_reduce = 0;
248 copymode = 1;
249 Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEmitFace_perform_task>(0,ntask),*this,nsingle_reduce);
250 copymode = 0;
251 nsingle += nsingle_reduce;
252
253 int nnew;
254 auto ld_cands2new = offset_scan(d_keep, nnew);
255
256 auto particleKK = dynamic_cast<ParticleKokkos*>(particle);
257 auto nlocal_before = particleKK->nlocal;
258 particleKK->grow(nnew);
259 particleKK->sync(SPARTA_NS::Device, PARTICLE_MASK);
260 auto ld_particles = particleKK->k_particles.d_view;
261
262 Kokkos::parallel_for(ncands, SPARTA_LAMBDA(int cand) {
263 if (!ld_keep(cand)) return;
264
265 double *normal,*vstream;
266
267 auto i = ld_task(cand);
268 Task task_i = ld_tasks(i);
269
270 const int pcell = task_i.pcell;
271 const int ndim = task_i.ndim;
272 const int pdim = task_i.pdim;
273 const int qdim = task_i.qdim;
274 normal = task_i.normal;
275 vstream = task_i.vstream;
276
277 auto isp = ld_isp(cand);
278
279 auto vscale_val = (l_subsonic_style == PONLY) ?
280 ld_vscale(i, isp) : ld_mix_vscale(isp);
281
282 auto ispecies = ld_species(isp);
283
284 double x[3];
285 for (int d = 0; d < l_dimension; ++d) x[d] = ld_x(cand, d);
286 for (int d = l_dimension; d < 3; ++d) x[d] = 0;
287
288 auto beta_un = ld_beta_un(cand);
289
290 auto theta = ld_theta(cand);
291 auto vr = ld_vr(cand);
292 auto erot = ld_erot(cand);
293 auto evib = ld_evib(cand);
294 auto id = ld_id(cand);
295 auto dtremain = ld_dtremain(cand);
296
297 double v[3];
298 v[ndim] = beta_un*vscale_val*normal[ndim] + vstream[ndim];
299 v[pdim] = vr * sin(theta) + vstream[pdim];
300 v[qdim] = vr * cos(theta) + vstream[qdim];
301
302 auto inew = ld_cands2new(cand);
303 auto ilocal = nlocal_before + inew;
304
305 ParticleKokkos::add_particle_kokkos(ld_particles,ilocal,
306 id,ispecies,pcell,x,v,erot,evib);
307
308 ld_particles(ilocal).flag = PINSERT;
309 ld_particles(ilocal).dtremain = dtremain;
310 });
311 particleKK->nlocal = nlocal_before + nnew;
312 particleKK->modify(SPARTA_NS::Device, PARTICLE_MASK);
313
314 if (modify->n_update_custom) {
315 auto h_keep = Kokkos::create_mirror_view(d_keep);
316 auto h_task = Kokkos::create_mirror_view(d_task);
317 Kokkos::deep_copy(h_keep, d_keep);
318 Kokkos::deep_copy(h_task, d_task);
319
320 // copy needed task data to host
321
322 k_tasks.sync_host();
323
324 auto h_cands2new = Kokkos::create_mirror_view(ld_cands2new);
325 Kokkos::deep_copy(h_cands2new, ld_cands2new);
326 for (int cand = 0; cand < ncands; ++cand) {
327 if (!h_keep(cand)) continue;
328
329 auto task = h_task(cand);
330
331 auto temp_thermal = tasks[task].temp_thermal;
332 auto temp_rot = tasks[task].temp_rot;
333 auto temp_vib = tasks[task].temp_vib;
334 auto vstream = tasks[task].vstream;
335
336 auto inew = h_cands2new(cand);
337 auto ilocal = nlocal_before + inew;
338
339 modify->update_custom(ilocal,temp_thermal,
340 temp_rot,temp_vib,vstream);
341 }
342 }
343 }
344
345 KOKKOS_INLINE_FUNCTION
operator ()(TagFixEmitFace_ninsert,const int & i) const346 void FixEmitFaceKokkos::operator()(TagFixEmitFace_ninsert, const int &i) const
347 {
348 int ninsert;
349
350 rand_type rand_gen = rand_pool.get_state();
351
352 if (perspecies) {
353 for (int isp = 0; isp < nspecies; isp++) {
354 auto ntarget = d_ntargetsp(i,isp)+rand_gen.drand();
355 ninsert = static_cast<int> (ntarget);
356 d_ninsert(i * nspecies + isp) = ninsert;
357 }
358 } else {
359 if (np == 0) {
360 auto ntarget = d_tasks(i).ntarget+rand_gen.drand();
361 ninsert = static_cast<int> (ntarget);
362 } else {
363 ninsert = npertask;
364 if (i >= nthresh) ninsert++;
365 }
366 d_ninsert(i) = ninsert;
367 }
368
369 rand_pool.free_state(rand_gen);
370 }
371
372 KOKKOS_INLINE_FUNCTION
operator ()(TagFixEmitFace_perform_task,const int & i,int & nsingle) const373 void FixEmitFaceKokkos::operator()(TagFixEmitFace_perform_task, const int &i, int &nsingle) const
374 {
375 double *lo,*hi,*normal,*vstream;
376
377 rand_type rand_gen = rand_pool.get_state();
378
379 Task task_i = d_tasks(i);
380
381 lo = task_i.lo;
382 hi = task_i.hi;
383 normal = task_i.normal;
384
385 const double temp_rot = task_i.temp_rot;
386 const double temp_vib = task_i.temp_vib;
387 vstream = task_i.vstream;
388
389 auto indot = vstream[0]*normal[0] + vstream[1]*normal[1] + vstream[2]*normal[2];
390
391 if (perspecies) {
392 for (int isp = 0; isp < nspecies; isp++) {
393 auto vscale_val = (subsonic_style == PONLY) ?
394 d_vscale(i, isp) : d_mix_vscale(isp);
395
396 auto ispecies = d_species[isp];
397 auto ninsert = d_ninsert(i * nspecies + isp);
398 auto start = d_task2cand(i * nspecies + isp);
399 auto scosine = indot / vscale_val;
400
401 int nactual = 0;
402 for (int m = 0; m < ninsert; m++) {
403 auto cand = start + m;
404 double x[3];
405 x[0] = lo[0] + rand_gen.drand() * (hi[0]-lo[0]);
406 x[1] = lo[1] + rand_gen.drand() * (hi[1]-lo[1]);
407 if (dimension == 3) x[2] = lo[2] + rand_gen.drand() * (hi[2]-lo[2]);
408 else x[2] = 0.0;
409
410 //if (region && !region->match(x)) continue; ////////////////////////
411 nactual++;
412 d_keep(cand) = 1;
413 d_task(cand) = i;
414 d_isp(cand) = isp;
415 for (int d = 0; d < dimension; ++d) d_x(cand, d) = x[d];
416
417 double beta_un, normalized_distbn_fn;
418 do {
419 do beta_un = (6.0*rand_gen.drand() - 3.0);
420 while (beta_un + scosine < 0.0);
421 normalized_distbn_fn = 2.0 * (beta_un + scosine) /
422 (scosine + sqrt(scosine*scosine + 2.0)) *
423 exp(0.5 + (0.5*scosine)*(scosine-sqrt(scosine*scosine + 2.0)) -
424 beta_un*beta_un);
425 } while (normalized_distbn_fn < rand_gen.drand());
426
427 d_beta_un(cand) = beta_un;
428
429 d_theta(cand) = MY_2PI * rand_gen.drand();
430 d_vr(cand) = vscale_val * sqrt(-log(rand_gen.drand()));
431 d_erot(cand) = particle_kk_copy.obj.erot(ispecies,temp_rot,rand_gen);
432 d_evib(cand) = particle_kk_copy.obj.evib(ispecies,temp_vib,rand_gen);
433 d_id(cand) = MAXSMALLINT*rand_gen.drand();
434 d_dtremain(cand) = dt * rand_gen.drand();
435 }
436 nsingle += nactual;
437 }
438 } else {
439 auto ninsert = d_ninsert(i);
440 auto start = d_task2cand(i);
441
442 int nactual = 0;
443 for (int m = 0; m < ninsert; m++) {
444 auto cand = start + m;
445 auto rn = rand_gen.drand();
446 int isp = 0;
447 while (d_cummulative[isp] < rn) isp++;
448 auto vscale_val = (subsonic_style == PONLY) ?
449 d_vscale(i, isp) : d_mix_vscale(isp);
450 auto ispecies = d_species[isp];
451 auto scosine = indot / vscale_val;
452
453 double x[3];
454 x[0] = lo[0] + rand_gen.drand() * (hi[0]-lo[0]);
455 x[1] = lo[1] + rand_gen.drand() * (hi[1]-lo[1]);
456 if (dimension == 3) x[2] = lo[2] + rand_gen.drand() * (hi[2]-lo[2]);
457 else x[2] = 0.0;
458
459 //if (region && !region->match(x)) continue; ////////////////////////
460 nactual++;
461 d_keep(cand) = 1;
462 d_task(cand) = i;
463 d_isp(cand) = isp;
464 for (int d = 0; d < dimension; ++d) d_x(cand, d) = x[d];
465
466 double beta_un, normalized_distbn_fn;
467 do {
468 do {
469 beta_un = (6.0*rand_gen.drand() - 3.0);
470 } while (beta_un + scosine < 0.0);
471 normalized_distbn_fn = 2.0 * (beta_un + scosine) /
472 (scosine + sqrt(scosine*scosine + 2.0)) *
473 exp(0.5 + (0.5*scosine)*(scosine-sqrt(scosine*scosine + 2.0)) -
474 beta_un*beta_un);
475 } while (normalized_distbn_fn < rand_gen.drand());
476
477 d_beta_un(cand) = beta_un;
478
479 d_theta(cand) = MY_2PI * rand_gen.drand();
480 d_vr(cand) = vscale_val * sqrt(-log(rand_gen.drand()));
481 d_erot(cand) = particle_kk_copy.obj.erot(ispecies,temp_rot,rand_gen);
482 d_evib(cand) = particle_kk_copy.obj.evib(ispecies,temp_vib,rand_gen);
483 d_id(cand) = MAXSMALLINT*rand_gen.drand();
484 d_dtremain(cand) = dt * rand_gen.drand();
485 }
486 nsingle += nactual;
487 }
488 rand_pool.free_state(rand_gen);
489 }
490
491 /* ----------------------------------------------------------------------
492 grow task list
493 ------------------------------------------------------------------------- */
494
grow_task()495 void FixEmitFaceKokkos::grow_task()
496 {
497 ntaskmax += DELTATASK;
498
499 if (tasks == NULL)
500 k_tasks = tdual_task_1d("emit/face:tasks",ntaskmax);
501 else {
502 k_tasks.sync_host();
503 k_tasks.modify_host(); // force resize on host
504 k_tasks.resize(ntaskmax);
505 }
506 d_tasks = k_tasks.d_view;
507 tasks = k_tasks.h_view.data();
508
509 // set all new task bytes to 0 so valgrind won't complain
510 // if bytes between fields are uninitialized
511
512 //memset(&tasks[oldmax],0,(ntaskmax-oldmax)*sizeof(Task));
513
514 // allocate vectors in each new task or set to NULL
515
516 if (perspecies) {
517 k_ntargetsp.sync_host();
518 k_ntargetsp.modify_host(); // force resize on host
519 k_ntargetsp.resize(ntaskmax,nspecies);
520 d_ntargetsp = k_ntargetsp.d_view;
521 for (int i = 0; i < ntaskmax; i++)
522 tasks[i].ntargetsp = k_ntargetsp.h_view.data() + i*k_ntargetsp.h_view.extent(1);
523 }
524
525 if (subsonic_style == PONLY) {
526 k_vscale.modify_host(); // force resize on host
527 k_vscale.sync_host();
528 k_vscale.resize(ntaskmax,nspecies);
529 d_vscale = k_vscale.d_view;
530 for (int i = 0; i < ntaskmax; i++)
531 tasks[i].vscale = k_vscale.h_view.data() + i*k_vscale.h_view.extent(1);
532 }
533 }
534
535 /* ----------------------------------------------------------------------
536 reallocate nspecies arrays
537 ------------------------------------------------------------------------- */
538
realloc_nspecies()539 void FixEmitFaceKokkos::realloc_nspecies()
540 {
541 if (perspecies) {
542 k_ntargetsp = DAT::tdual_float_2d_lr("emit/face:ntargetsp",ntaskmax,nspecies);
543 d_ntargetsp = k_ntargetsp.d_view;
544 for (int i = 0; i < ntaskmax; i++)
545 tasks[i].ntargetsp = k_ntargetsp.h_view.data() + i*k_ntargetsp.h_view.extent(1);
546 }
547 if (subsonic_style == PONLY) {
548 k_vscale = DAT::tdual_float_2d_lr("emit/face:vscale",ntaskmax,nspecies);
549 d_vscale = k_vscale.d_view;
550 for (int i = 0; i < ntaskmax; i++)
551 tasks[i].vscale = k_vscale.h_view.data() + i*k_vscale.h_view.extent(1);
552 }
553 }
554