1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "stdlib.h"
16 #include "string.h"
17 #include "fix_emit_face_kokkos.h"
18 #include "update.h"
19 #include "domain.h"
20 #include "region.h"
21 #include "grid.h"
22 #include "surf.h"
23 #include "particle.h"
24 #include "mixture.h"
25 #include "comm.h"
26 #include "modify.h"
27 #include "geometry.h"
28 #include "input.h"
29 #include "random_knuth.h"
30 #include "math_const.h"
31 #include "memory_kokkos.h"
32 #include "error.h"
33 #include "kokkos_type.h"
34 #include "particle_kokkos.h"
35 #include "sparta_masks.h"
36 #include "Kokkos_Random.hpp"
37 
38 using namespace SPARTA_NS;
39 using namespace MathConst;
40 
41 enum{XLO,XHI,YLO,YHI,ZLO,ZHI,INTERIOR};         // same as Domain
42 enum{PERIODIC,OUTFLOW,REFLECT,SURFACE,AXISYM};  // same as Domain
43 enum{UNKNOWN,OUTSIDE,INSIDE,OVERLAP};           // same as Grid
44 enum{PKEEP,PINSERT,PDONE,PDISCARD,PENTRY,PEXIT,PSURF};   // several files
45 enum{NCHILD,NPARENT,NUNKNOWN,NPBCHILD,NPBPARENT,NPBUNKNOWN,NBOUND};  // Grid
46 enum{NOSUBSONIC,PTBOTH,PONLY};
47 
48 #define DELTATASK 256
49 #define TEMPLIMIT 1.0e5
50 
51 /* ----------------------------------------------------------------------
52    insert particles in grid cells with faces touching inflow boundaries
53 ------------------------------------------------------------------------- */
54 
FixEmitFaceKokkos(SPARTA * sparta,int narg,char ** arg)55 FixEmitFaceKokkos::FixEmitFaceKokkos(SPARTA *sparta, int narg, char **arg) :
56   FixEmitFace(sparta, narg, arg),
57   rand_pool(12345 + comm->me
58 #ifdef SPARTA_KOKKOS_EXACT
59             , sparta
60 #endif
61             ),
62   particle_kk_copy(sparta)
63 {
64   kokkos_flag = 1;
65   execution_space = Device;
66   datamask_read = EMPTY_MASK;
67   datamask_modify = EMPTY_MASK;
68 }
69 
70 /* ---------------------------------------------------------------------- */
71 
~FixEmitFaceKokkos()72 FixEmitFaceKokkos::~FixEmitFaceKokkos()
73 {
74   if (copymode) return;
75 
76   particle_kk_copy.uncopy();
77 
78 #ifdef SPARTA_KOKKOS_EXACT
79   rand_pool.destroy();
80 #endif
81 
82   for (int i = 0; i < ntaskmax; i++) {
83     tasks[i].ntargetsp = NULL;
84     tasks[i].vscale = NULL;
85   }
86 
87   tasks = NULL;
88 }
89 
90 /* ---------------------------------------------------------------------- */
91 
init()92 void FixEmitFaceKokkos::init()
93 {
94   k_tasks.sync_host();
95   if (perspecies) k_ntargetsp.sync_host();
96   if (subsonic_style == PONLY) k_vscale.sync_host();
97 
98   FixEmitFace::init();
99 
100   k_tasks.modify_host();
101   if (perspecies) k_ntargetsp.modify_host();
102   if (subsonic_style == PONLY) k_vscale.modify_host();
103 
104 #ifdef SPARTA_KOKKOS_EXACT
105   rand_pool.init(random);
106 #endif
107 
108   k_mix_vscale  = DAT::tdual_float_1d("mix_vscale", nspecies);
109   k_cummulative = DAT::tdual_float_1d("cummulative", nspecies);
110   k_species     = DAT::tdual_int_1d("species", nspecies);
111 
112   d_mix_vscale  = k_mix_vscale .d_view;
113   d_cummulative = k_cummulative.d_view;
114   d_species     = k_species    .d_view;
115 
116   auto h_mix_vscale  = k_mix_vscale .h_view;
117   auto h_cummulative = k_cummulative.h_view;
118   auto h_species     = k_species    .h_view;
119 
120   for (int isp = 0; isp < nspecies; ++isp) {
121     h_mix_vscale(isp) = particle->mixture[imix]->vscale[isp];
122     h_cummulative(isp) = particle->mixture[imix]->cummulative[isp];
123     h_species(isp) = particle->mixture[imix]->species[isp];
124   }
125 
126   k_mix_vscale .modify_host();
127   k_cummulative.modify_host();
128   k_species    .modify_host();
129 }
130 
131 /* ----------------------------------------------------------------------
132    create tasks for one grid cell
133    add them to tasks list and increment ntasks
134 ------------------------------------------------------------------------- */
135 
create_task(int icell)136 void FixEmitFaceKokkos::create_task(int icell)
137 {
138   FixEmitFace::create_task(icell);
139   k_tasks.modify_host();
140   k_ntargetsp.modify_host();
141   k_vscale.modify_host();
142 }
143 
144 /* ---------------------------------------------------------------------- */
145 
perform_task()146 void FixEmitFaceKokkos::perform_task()
147 {
148   dt = update->dt;
149   auto l_dimension = this->dimension;
150   auto l_subsonic_style = this->subsonic_style;
151 
152   // if subsonic, re-compute particle inflow counts for each task
153   // also computes current per-task temp_thermal and vstream
154 
155   if (subsonic)
156     error->one(FLERR,"Cannot yet use fix emit/face/kk with subsonic emission");
157   //if (subsonic) subsonic_inflow(); ////////////////////////
158 
159   // insert particles for each task = cell/face pair
160   // ntarget/ninsert is either perspecies or for all species
161 
162   // copy needed task data to device
163 
164   if (perspecies) k_ntargetsp.sync_device();
165   else k_tasks.sync_device();
166 
167   auto ninsert_dim1 = perspecies ? nspecies : 1;
168   if (d_ninsert.extent(0) < ntask * ninsert_dim1)
169     d_ninsert = DAT::t_int_1d("ninsert", ntask * ninsert_dim1);
170 
171   copymode = 1;
172   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEmitFace_ninsert>(0,ntask),*this);
173   copymode = 0;
174 
175   int ncands;
176   d_task2cand = offset_scan(d_ninsert, ncands);
177 
178   if (ncands == 0) return;
179 
180   // for one particle:
181   //   x = random position on face
182   //   v = randomized thermal velocity + vstream
183   //       first stage: normal dimension (ndim)
184   //       second stage: parallel dimensions (pdim,qdim)
185 
186   // double while loop until randomized particle velocity meets 2 criteria
187   // inner do-while loop:
188   //   v = vstream-component + vthermal is into simulation box
189   //   see Bird 1994, p 425
190   // outer do-while loop:
191   //   shift Maxwellian distribution by stream velocity component
192   //   see Bird 1994, p 259, eq 12.5
193 
194   if (d_x.extent(0) < ncands || d_x.extent(1) < l_dimension)
195     d_x = DAT::t_float_2d("x", ncands, l_dimension);
196 
197   if (d_task.extent(0) < ncands) {
198     d_beta_un  = DAT::t_float_1d("beta_un", ncands);
199     d_theta    = DAT::t_float_1d("theta", ncands);
200     d_vr       = DAT::t_float_1d("vr", ncands);
201     d_erot     = DAT::t_float_1d("erot", ncands);
202     d_evib     = DAT::t_float_1d("evib", ncands);
203     d_dtremain = DAT::t_float_1d("dtremain", ncands);
204     d_id       = DAT::t_int_1d("id", ncands);
205     d_isp      = DAT::t_int_1d("isp", ncands);
206     d_task     = DAT::t_int_1d("task", ncands);
207     d_keep     = DAT::t_int_1d("keep", ncands);
208   }
209   Kokkos::deep_copy(d_keep,0); // needs to be initialized with zeros
210 
211   auto ld_x        = d_x       ;
212   auto ld_beta_un  = d_beta_un ;
213   auto ld_theta    = d_theta   ;
214   auto ld_vr       = d_vr      ;
215   auto ld_erot     = d_erot    ;
216   auto ld_evib     = d_evib    ;
217   auto ld_dtremain = d_dtremain;
218   auto ld_id       = d_id      ;
219   auto ld_isp      = d_isp     ;
220   auto ld_task     = d_task    ;
221   auto ld_keep     = d_keep    ;
222 
223   // copy needed task data to device
224 
225   k_tasks.sync_device();
226   if (perspecies) k_ntargetsp.sync_device();
227   if (subsonic_style == PONLY) k_vscale.sync_device();
228   auto ld_tasks = d_tasks;
229   auto ld_vscale = d_vscale;
230 
231   // copy needed mixture data to device
232 
233   k_mix_vscale .sync_device();
234   k_species    .sync_device();
235   k_cummulative.sync_device();
236 
237   auto ld_mix_vscale = d_mix_vscale;
238   auto ld_species    = d_species   ;
239 
240   ParticleKokkos* particle_kk = ((ParticleKokkos*)particle);
241   particle_kk->update_class_variables();
242   particle_kk_copy.copy(particle_kk);
243 
244   if (region)
245     error->one(FLERR,"Cannot yet use fix emit/face/kk with regions");
246 
247   int nsingle_reduce = 0;
248   copymode = 1;
249   Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEmitFace_perform_task>(0,ntask),*this,nsingle_reduce);
250   copymode = 0;
251   nsingle += nsingle_reduce;
252 
253   int nnew;
254   auto ld_cands2new = offset_scan(d_keep, nnew);
255 
256   auto particleKK = dynamic_cast<ParticleKokkos*>(particle);
257   auto nlocal_before = particleKK->nlocal;
258   particleKK->grow(nnew);
259   particleKK->sync(SPARTA_NS::Device, PARTICLE_MASK);
260   auto ld_particles = particleKK->k_particles.d_view;
261 
262   Kokkos::parallel_for(ncands, SPARTA_LAMBDA(int cand) {
263     if (!ld_keep(cand)) return;
264 
265     double *normal,*vstream;
266 
267     auto i = ld_task(cand);
268     Task task_i = ld_tasks(i);
269 
270     const int pcell = task_i.pcell;
271     const int ndim = task_i.ndim;
272     const int pdim = task_i.pdim;
273     const int qdim = task_i.qdim;
274     normal = task_i.normal;
275     vstream = task_i.vstream;
276 
277     auto isp = ld_isp(cand);
278 
279     auto vscale_val = (l_subsonic_style == PONLY) ?
280       ld_vscale(i, isp) : ld_mix_vscale(isp);
281 
282     auto ispecies = ld_species(isp);
283 
284     double x[3];
285     for (int d = 0; d < l_dimension; ++d) x[d] = ld_x(cand, d);
286     for (int d = l_dimension; d < 3; ++d) x[d] = 0;
287 
288     auto beta_un = ld_beta_un(cand);
289 
290     auto theta = ld_theta(cand);
291     auto vr = ld_vr(cand);
292     auto erot = ld_erot(cand);
293     auto evib = ld_evib(cand);
294     auto id = ld_id(cand);
295     auto dtremain = ld_dtremain(cand);
296 
297     double v[3];
298     v[ndim] = beta_un*vscale_val*normal[ndim] + vstream[ndim];
299     v[pdim] = vr * sin(theta) + vstream[pdim];
300     v[qdim] = vr * cos(theta) + vstream[qdim];
301 
302     auto inew = ld_cands2new(cand);
303     auto ilocal = nlocal_before + inew;
304 
305     ParticleKokkos::add_particle_kokkos(ld_particles,ilocal,
306         id,ispecies,pcell,x,v,erot,evib);
307 
308     ld_particles(ilocal).flag = PINSERT;
309     ld_particles(ilocal).dtremain = dtremain;
310   });
311   particleKK->nlocal = nlocal_before + nnew;
312   particleKK->modify(SPARTA_NS::Device, PARTICLE_MASK);
313 
314   if (modify->n_update_custom) {
315     auto h_keep = Kokkos::create_mirror_view(d_keep);
316     auto h_task = Kokkos::create_mirror_view(d_task);
317     Kokkos::deep_copy(h_keep, d_keep);
318     Kokkos::deep_copy(h_task, d_task);
319 
320     // copy needed task data to host
321 
322     k_tasks.sync_host();
323 
324     auto h_cands2new = Kokkos::create_mirror_view(ld_cands2new);
325     Kokkos::deep_copy(h_cands2new, ld_cands2new);
326     for (int cand = 0; cand < ncands; ++cand) {
327       if (!h_keep(cand)) continue;
328 
329       auto task = h_task(cand);
330 
331       auto temp_thermal = tasks[task].temp_thermal;
332       auto temp_rot = tasks[task].temp_rot;
333       auto temp_vib = tasks[task].temp_vib;
334       auto vstream = tasks[task].vstream;
335 
336       auto inew = h_cands2new(cand);
337       auto ilocal = nlocal_before + inew;
338 
339       modify->update_custom(ilocal,temp_thermal,
340           temp_rot,temp_vib,vstream);
341     }
342   }
343 }
344 
345 KOKKOS_INLINE_FUNCTION
operator ()(TagFixEmitFace_ninsert,const int & i) const346 void FixEmitFaceKokkos::operator()(TagFixEmitFace_ninsert, const int &i) const
347 {
348   int ninsert;
349 
350   rand_type rand_gen = rand_pool.get_state();
351 
352   if (perspecies) {
353     for (int isp = 0; isp < nspecies; isp++) {
354       auto ntarget = d_ntargetsp(i,isp)+rand_gen.drand();
355       ninsert = static_cast<int> (ntarget);
356       d_ninsert(i * nspecies + isp) = ninsert;
357     }
358   } else {
359     if (np == 0) {
360       auto ntarget = d_tasks(i).ntarget+rand_gen.drand();
361       ninsert = static_cast<int> (ntarget);
362     } else {
363       ninsert = npertask;
364       if (i >= nthresh) ninsert++;
365     }
366     d_ninsert(i) = ninsert;
367   }
368 
369   rand_pool.free_state(rand_gen);
370 }
371 
372 KOKKOS_INLINE_FUNCTION
operator ()(TagFixEmitFace_perform_task,const int & i,int & nsingle) const373 void FixEmitFaceKokkos::operator()(TagFixEmitFace_perform_task, const int &i, int &nsingle) const
374 {
375   double *lo,*hi,*normal,*vstream;
376 
377   rand_type rand_gen = rand_pool.get_state();
378 
379   Task task_i = d_tasks(i);
380 
381   lo = task_i.lo;
382   hi = task_i.hi;
383   normal = task_i.normal;
384 
385   const double temp_rot = task_i.temp_rot;
386   const double temp_vib = task_i.temp_vib;
387   vstream = task_i.vstream;
388 
389   auto indot = vstream[0]*normal[0] + vstream[1]*normal[1] + vstream[2]*normal[2];
390 
391   if (perspecies) {
392     for (int isp = 0; isp < nspecies; isp++) {
393       auto vscale_val = (subsonic_style == PONLY) ?
394         d_vscale(i, isp) : d_mix_vscale(isp);
395 
396       auto ispecies = d_species[isp];
397       auto ninsert = d_ninsert(i * nspecies + isp);
398       auto start = d_task2cand(i * nspecies + isp);
399       auto scosine = indot / vscale_val;
400 
401       int nactual = 0;
402       for (int m = 0; m < ninsert; m++) {
403         auto cand = start + m;
404         double x[3];
405         x[0] = lo[0] + rand_gen.drand() * (hi[0]-lo[0]);
406         x[1] = lo[1] + rand_gen.drand() * (hi[1]-lo[1]);
407         if (dimension == 3) x[2] = lo[2] + rand_gen.drand() * (hi[2]-lo[2]);
408         else x[2] = 0.0;
409 
410         //if (region && !region->match(x)) continue; ////////////////////////
411         nactual++;
412         d_keep(cand) = 1;
413         d_task(cand) = i;
414         d_isp(cand) = isp;
415         for (int d = 0; d < dimension; ++d) d_x(cand, d) = x[d];
416 
417         double beta_un, normalized_distbn_fn;
418         do {
419           do beta_un = (6.0*rand_gen.drand() - 3.0);
420           while (beta_un + scosine < 0.0);
421           normalized_distbn_fn = 2.0 * (beta_un + scosine) /
422             (scosine + sqrt(scosine*scosine + 2.0)) *
423             exp(0.5 + (0.5*scosine)*(scosine-sqrt(scosine*scosine + 2.0)) -
424                 beta_un*beta_un);
425         } while (normalized_distbn_fn < rand_gen.drand());
426 
427         d_beta_un(cand) = beta_un;
428 
429         d_theta(cand) = MY_2PI * rand_gen.drand();
430         d_vr(cand) = vscale_val * sqrt(-log(rand_gen.drand()));
431         d_erot(cand) = particle_kk_copy.obj.erot(ispecies,temp_rot,rand_gen);
432         d_evib(cand) = particle_kk_copy.obj.evib(ispecies,temp_vib,rand_gen);
433         d_id(cand) = MAXSMALLINT*rand_gen.drand();
434         d_dtremain(cand) = dt * rand_gen.drand();
435       }
436       nsingle += nactual;
437     }
438   } else {
439     auto ninsert = d_ninsert(i);
440     auto start = d_task2cand(i);
441 
442     int nactual = 0;
443     for (int m = 0; m < ninsert; m++) {
444       auto cand = start + m;
445       auto rn = rand_gen.drand();
446       int isp = 0;
447       while (d_cummulative[isp] < rn) isp++;
448       auto vscale_val = (subsonic_style == PONLY) ?
449         d_vscale(i, isp) : d_mix_vscale(isp);
450       auto ispecies = d_species[isp];
451       auto scosine = indot / vscale_val;
452 
453       double x[3];
454       x[0] = lo[0] + rand_gen.drand() * (hi[0]-lo[0]);
455       x[1] = lo[1] + rand_gen.drand() * (hi[1]-lo[1]);
456       if (dimension == 3) x[2] = lo[2] + rand_gen.drand() * (hi[2]-lo[2]);
457       else x[2] = 0.0;
458 
459       //if (region && !region->match(x)) continue; ////////////////////////
460       nactual++;
461       d_keep(cand) = 1;
462       d_task(cand) = i;
463       d_isp(cand) = isp;
464       for (int d = 0; d < dimension; ++d) d_x(cand, d) = x[d];
465 
466       double beta_un, normalized_distbn_fn;
467       do {
468         do {
469           beta_un = (6.0*rand_gen.drand() - 3.0);
470         } while (beta_un + scosine < 0.0);
471         normalized_distbn_fn = 2.0 * (beta_un + scosine) /
472           (scosine + sqrt(scosine*scosine + 2.0)) *
473           exp(0.5 + (0.5*scosine)*(scosine-sqrt(scosine*scosine + 2.0)) -
474               beta_un*beta_un);
475       } while (normalized_distbn_fn < rand_gen.drand());
476 
477       d_beta_un(cand) = beta_un;
478 
479       d_theta(cand) = MY_2PI * rand_gen.drand();
480       d_vr(cand) = vscale_val * sqrt(-log(rand_gen.drand()));
481       d_erot(cand) = particle_kk_copy.obj.erot(ispecies,temp_rot,rand_gen);
482       d_evib(cand) = particle_kk_copy.obj.evib(ispecies,temp_vib,rand_gen);
483       d_id(cand) = MAXSMALLINT*rand_gen.drand();
484       d_dtremain(cand) = dt * rand_gen.drand();
485     }
486     nsingle += nactual;
487   }
488   rand_pool.free_state(rand_gen);
489 }
490 
491 /* ----------------------------------------------------------------------
492    grow task list
493 ------------------------------------------------------------------------- */
494 
grow_task()495 void FixEmitFaceKokkos::grow_task()
496 {
497   ntaskmax += DELTATASK;
498 
499   if (tasks == NULL)
500     k_tasks = tdual_task_1d("emit/face:tasks",ntaskmax);
501   else {
502     k_tasks.sync_host();
503     k_tasks.modify_host(); // force resize on host
504     k_tasks.resize(ntaskmax);
505   }
506   d_tasks = k_tasks.d_view;
507   tasks = k_tasks.h_view.data();
508 
509   // set all new task bytes to 0 so valgrind won't complain
510   // if bytes between fields are uninitialized
511 
512   //memset(&tasks[oldmax],0,(ntaskmax-oldmax)*sizeof(Task));
513 
514   // allocate vectors in each new task or set to NULL
515 
516   if (perspecies) {
517     k_ntargetsp.sync_host();
518     k_ntargetsp.modify_host(); // force resize on host
519     k_ntargetsp.resize(ntaskmax,nspecies);
520     d_ntargetsp = k_ntargetsp.d_view;
521     for (int i = 0; i < ntaskmax; i++)
522       tasks[i].ntargetsp = k_ntargetsp.h_view.data() + i*k_ntargetsp.h_view.extent(1);
523   }
524 
525   if (subsonic_style == PONLY) {
526     k_vscale.modify_host(); // force resize on host
527     k_vscale.sync_host();
528     k_vscale.resize(ntaskmax,nspecies);
529     d_vscale = k_vscale.d_view;
530     for (int i = 0; i < ntaskmax; i++)
531       tasks[i].vscale = k_vscale.h_view.data() + i*k_vscale.h_view.extent(1);
532   }
533 }
534 
535 /* ----------------------------------------------------------------------
536    reallocate nspecies arrays
537 ------------------------------------------------------------------------- */
538 
realloc_nspecies()539 void FixEmitFaceKokkos::realloc_nspecies()
540 {
541   if (perspecies) {
542     k_ntargetsp = DAT::tdual_float_2d_lr("emit/face:ntargetsp",ntaskmax,nspecies);
543     d_ntargetsp = k_ntargetsp.d_view;
544     for (int i = 0; i < ntaskmax; i++)
545       tasks[i].ntargetsp = k_ntargetsp.h_view.data() + i*k_ntargetsp.h_view.extent(1);
546   }
547   if (subsonic_style == PONLY) {
548     k_vscale = DAT::tdual_float_2d_lr("emit/face:vscale",ntaskmax,nspecies);
549     d_vscale = k_vscale.d_view;
550     for (int i = 0; i < ntaskmax; i++)
551       tasks[i].vscale = k_vscale.h_view.data() + i*k_vscale.h_view.extent(1);
552   }
553 }
554