1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "math.h"
16 #include "stdlib.h"
17 #include "string.h"
18 #include "create_particles_kokkos.h"
19 #include "update.h"
20 #include "particle_kokkos.h"
21 #include "mixture.h"
22 #include "grid_kokkos.h"
23 #include "modify.h"
24 #include "comm.h"
25 #include "domain.h"
26 #include "region.h"
27 #include "input.h"
28 #include "variable.h"
29 #include "random_mars.h"
30 #include "random_knuth.h"
31 #include "math_const.h"
32 #include "memory_kokkos.h"
33 #include "error.h"
34 #include "kokkos_type.h"
35 #include "sparta_masks.h"
36 #include "kokkos.h"
37 
38 using namespace SPARTA_NS;
39 using namespace MathConst;
40 
41 enum{UNKNOWN,OUTSIDE,INSIDE,OVERLAP};   // same as Grid
42 
43 #define EPSZERO 1.0e-14
44 
CreateParticlesKokkos(SPARTA * spa)45 CreateParticlesKokkos::CreateParticlesKokkos(SPARTA* spa):
46   CreateParticles(spa)
47 {
48 }
49 
create_local(bigint np)50 void CreateParticlesKokkos::create_local(bigint np)
51 {
52   int dimension = domain->dimension;
53 
54   int me = comm->me;
55   RanKnuth *random = new RanKnuth(update->ranmaster->uniform());
56   double seed = update->ranmaster->uniform();
57   random->reset(seed,me,100);
58   Grid::ChildCell *cells = grid->cells;
59   Grid::ChildInfo *cinfo = grid->cinfo;
60   GridKokkos* grid_kk = ((GridKokkos*)grid);
61   grid_kk->sync(Host,CINFO_MASK|CELL_MASK);
62   int nglocal = grid->nlocal;
63 
64   // volme = volume of grid cells I own that are OUTSIDE surfs
65   // skip cells entirely outside region
66   // Nme = # of particles I will create
67   // MPI_Scan() logic insures sum of nme = Np
68 
69   double *lo,*hi;
70   double volone;
71 
72   double volme = 0.0;
73   for (int i = 0; i < nglocal; i++) {
74     if (cinfo[i].type != OUTSIDE) continue;
75     lo = cells[i].lo;
76     hi = cells[i].hi;
77     if (region && region->bboxflag && outside_region(dimension,lo,hi))
78       continue;
79 
80     if (dimension == 3) volone = (hi[0]-lo[0]) * (hi[1]-lo[1]) * (hi[2]-lo[2]);
81     else if (domain->axisymmetric)
82       volone = (hi[0]-lo[0]) * (hi[1]*hi[1]-lo[1]*lo[1])*MY_PI;
83     else volone = (hi[0]-lo[0]) * (hi[1]-lo[1]);
84     volme += volone / cinfo[i].weight;
85   }
86 
87   double volupto;
88   MPI_Scan(&volme,&volupto,1,MPI_DOUBLE,MPI_SUM,world);
89 
90   double *vols;
91   int nprocs = comm->nprocs;
92   memory->create(vols,nprocs,"create_particles:vols");
93   MPI_Allgather(&volupto,1,MPI_DOUBLE,vols,1,MPI_DOUBLE,world);
94 
95   // nme = # of particles for me to create
96   // gathered Scan results not guaranteed to be monotonically increasing
97   // can cause epsilon mis-counts for huge particle counts
98   // enforce that by brute force
99 
100   for (int i = 1; i < nprocs; i++)
101     if (vols[i] != vols[i-1] &&
102         fabs(vols[i]-vols[i-1])/vols[nprocs-1] < EPSZERO)
103       vols[i] = vols[i-1];
104 
105   bigint nstart,nstop;
106   if (me > 0) nstart = static_cast<bigint> (np * (vols[me-1]/vols[nprocs-1]));
107   else nstart = 0;
108   nstop = static_cast<bigint> (np * (vols[me]/vols[nprocs-1]));
109   bigint nme = nstop-nstart;
110 
111   memory->destroy(vols);
112 
113   // nfix_update_custom = # of fixes with update_custom() method
114 
115   modify->list_init_fixes();
116   int nfix_update_custom = modify->n_update_custom;
117 
118   // loop over cells I own
119   // only add particles to OUTSIDE cells
120   // skip cells entirely outside region
121   // ntarget = floating point # of particles to create in one cell
122   // npercell = integer # of particles to create in one cell
123   // basing ntarget on accumulated volume and nprev insures Nme total creations
124   // particle species = random value based on mixture fractions
125   // particle velocity = stream velocity + thermal velocity
126 
127   int *species = particle->mixture[imix]->species;
128   double *cummulative = particle->mixture[imix]->cummulative;
129   double *vstream = particle->mixture[imix]->vstream;
130   double *vscale = particle->mixture[imix]->vscale;
131   int nspecies = particle->mixture[imix]->nspecies;
132   double temp_thermal = particle->mixture[imix]->temp_thermal;
133   double temp_rot = particle->mixture[imix]->temp_rot;
134   double temp_vib = particle->mixture[imix]->temp_vib;
135 
136   double tempscale = 1.0;
137   double sqrttempscale = 1.0;
138 
139   double volsum = 0.0;
140   bigint nprev = 0;
141 
142   double vstream_variable[3];
143 
144   Kokkos::View<int*, DeviceType> d_npercell("npercell", nglocal);
145   auto h_npercell = Kokkos::create_mirror_view(d_npercell);
146 
147   for (int i = 0; i < nglocal; i++) {
148     if (cinfo[i].type != OUTSIDE) continue;
149     lo = cells[i].lo;
150     hi = cells[i].hi;
151     if (region && region->bboxflag && outside_region(dimension,lo,hi))
152       continue;
153 
154     if (dimension == 3) volone = (hi[0]-lo[0]) * (hi[1]-lo[1]) * (hi[2]-lo[2]);
155     else if (domain->axisymmetric)
156       volone = (hi[0]-lo[0]) * (hi[1]*hi[1]-lo[1]*lo[1])*MY_PI;
157     else volone = (hi[0]-lo[0]) * (hi[1]-lo[1]);
158     volsum += volone / cinfo[i].weight;
159 
160     double ntarget = nme * volsum/volme - nprev;
161     auto npercell = static_cast<int> (ntarget);
162     if (random->uniform() < ntarget-npercell) npercell++;
163     auto ncreate = npercell;
164 
165     if (densflag) {
166       auto scale = density_variable(lo,hi);
167       ntarget *= scale;
168       ncreate = static_cast<int> (ntarget);
169       if (random->uniform() < ntarget-ncreate) ncreate++;
170     }
171 
172     if (ncreate < 0) ncreate = 0;
173     h_npercell(i) = ncreate;
174 
175     // increment count without effect of density variation
176     // so that target insertion count is undisturbed
177 
178     nprev += npercell;
179   }
180   Kokkos::deep_copy(d_npercell, h_npercell);
181 
182   int ncands;
183   auto d_cells2cands = offset_scan(d_npercell, ncands);
184   auto h_cells2cands = Kokkos::create_mirror_view(d_cells2cands);
185   Kokkos::deep_copy(h_cells2cands, d_cells2cands);
186 
187   Kokkos::View<int*, DeviceType> d_keep("cand_keep", ncands);
188   Kokkos::View<int*, DeviceType> d_isp("cand_x", ncands);
189   Kokkos::View<double*[3], DeviceType> d_x("cand_x", ncands);
190   Kokkos::View<double*, DeviceType> d_vn("cand_vn", ncands);
191   Kokkos::View<double*, DeviceType> d_vr("cand_vr", ncands);
192   Kokkos::View<double*[2], DeviceType> d_theta("cand_theta", ncands);
193   Kokkos::View<double*, DeviceType> d_erot("cand_erot", ncands);
194   Kokkos::View<double*, DeviceType> d_evib("cand_evib", ncands);
195   Kokkos::View<int*, DeviceType> d_id("cand_id", ncands);
196   Kokkos::View<double*[3], DeviceType> d_v("cand_v", ncands);
197   auto h_keep = Kokkos::create_mirror_view(d_keep);
198   auto h_isp = Kokkos::create_mirror_view(d_isp);
199   auto h_x = Kokkos::create_mirror_view(d_x);
200   auto h_vn = Kokkos::create_mirror_view(d_vn);
201   auto h_vr = Kokkos::create_mirror_view(d_vr);
202   auto h_theta = Kokkos::create_mirror_view(d_theta);
203   auto h_erot = Kokkos::create_mirror_view(d_erot);
204   auto h_evib = Kokkos::create_mirror_view(d_evib);
205   auto h_id = Kokkos::create_mirror_view(d_id);
206   auto h_v = Kokkos::create_mirror_view(d_v);
207 
208   for (int i = 0; i < nglocal; i++) {
209     auto ncreate = h_npercell(i);
210     lo = cells[i].lo;
211     hi = cells[i].hi;
212 
213     for (int m = 0; m < ncreate; m++) {
214       auto cand = h_cells2cands(i) + m;
215       auto rn = random->uniform();
216       int isp = 0;
217       while (cummulative[isp] < rn) isp++;
218       auto ispecies = species[isp];
219 
220       double x[3];
221       x[0] = lo[0] + random->uniform() * (hi[0]-lo[0]);
222       x[1] = lo[1] + random->uniform() * (hi[1]-lo[1]);
223       x[2] = lo[2] + random->uniform() * (hi[2]-lo[2]);
224       if (dimension == 2) x[2] = 0.0;
225 
226       if (region && !region->match(x)) continue;
227       if (speciesflag) {
228         isp = species_variable(x) - 1;
229         if (isp < 0 || isp >= nspecies) continue;
230         ispecies = species[isp];
231       }
232 
233       h_keep(cand) = 1;
234       h_isp(cand) = isp;
235       for (int d = 0; d < 3; ++d) h_x(cand, d) = x[d];
236 
237       if (tempflag) {
238         tempscale = temperature_variable(x);
239         sqrttempscale = sqrt(tempscale);
240       }
241 
242       auto vn = vscale[isp] * sqrttempscale * sqrt(-log(random->uniform()));
243       auto vr = vscale[isp] * sqrttempscale * sqrt(-log(random->uniform()));
244 
245       auto theta1 = MY_2PI * random->uniform();
246       auto theta2 = MY_2PI * random->uniform();
247       h_vn(cand) = vn;
248       h_vr(cand) = vr;
249       h_theta(cand,0) = theta1;
250       h_theta(cand,1) = theta2;
251 
252       //these two functions also use random variables...
253       h_erot(cand) = particle->erot(ispecies,temp_rot*tempscale,random);
254       h_evib(cand) = particle->evib(ispecies,temp_vib*tempscale,random);
255 
256       h_id(cand) = MAXSMALLINT*random->uniform();
257 
258       double v[3];
259       if (velflag) {
260         velocity_variable(x,vstream,vstream_variable);
261         v[0] = vstream_variable[0] + vn*cos(theta1);
262         v[1] = vstream_variable[1] + vr*cos(theta2);
263         v[2] = vstream_variable[2] + vr*sin(theta2);
264       } else {
265         v[0] = vstream[0] + vn*cos(theta1);
266         v[1] = vstream[1] + vr*cos(theta2);
267         v[2] = vstream[2] + vr*sin(theta2);
268       }
269       for (int d = 0; d < 3; ++d) h_v(cand, d) = v[d];
270     }
271   }
272 
273   Kokkos::deep_copy(d_keep, h_keep);
274   Kokkos::deep_copy(d_isp, h_isp);
275   Kokkos::deep_copy(d_x, h_x);
276   Kokkos::deep_copy(d_vn, h_vn);
277   Kokkos::deep_copy(d_vr, h_vr);
278   Kokkos::deep_copy(d_theta, h_theta);
279   Kokkos::deep_copy(d_erot, h_erot);
280   Kokkos::deep_copy(d_evib, h_evib);
281   Kokkos::deep_copy(d_id, h_id);
282   Kokkos::deep_copy(d_v, h_v);
283 
284   int nnew;
285   auto d_cands2new = offset_scan(d_keep, nnew);
286 
287   auto particleKK = dynamic_cast<ParticleKokkos*>(particle);
288   particleKK->grow(nnew);
289   particleKK->sync(Device, PARTICLE_MASK | SPECIES_MASK);
290   auto d_particles = particleKK->k_particles.d_view;
291   Kokkos::View<int*, DeviceType> d_species("species", nspecies);
292   auto h_species = Kokkos::create_mirror_view(d_species);
293   for (int i = 0; i < nspecies; ++i) h_species(i) = species[i];
294   Kokkos::deep_copy(d_species, h_species);
295   auto nlocal_before = particleKK->nlocal;
296 
297   Kokkos::parallel_for(nglocal, SPARTA_LAMBDA(int i) {
298     auto ncreate = d_npercell(i);
299     for (int m = 0; m < ncreate; m++) {
300       auto cand = d_cells2cands(i) + m;
301       if (!d_keep(cand)) continue;
302       auto inew = d_cands2new(cand) + nlocal_before;
303       auto id = d_id(cand);
304       auto ispecies = d_species(d_isp(cand));
305       double x[3],v[3];
306       for (int d = 0; d < 3; ++d) x[d] = d_x(cand, d);
307       for (int d = 0; d < 3; ++d) v[d] = d_v(cand, d);
308       auto erot = d_erot(cand);
309       auto evib = d_evib(cand);
310       ParticleKokkos::add_particle_kokkos(d_particles,inew,id,ispecies,i,x,v,erot,evib);
311     }
312   });
313   particleKK->modify(Device,PARTICLE_MASK);
314   particleKK->sync(Host,PARTICLE_MASK);
315   particleKK->nlocal += nnew;
316 
317   auto h_cands2new = Kokkos::create_mirror_view(d_cands2new);
318   Kokkos::deep_copy(h_cands2new, d_cands2new);
319 
320   for (int i = 0; i < nglocal; i++) {
321     auto ncreate = h_npercell(i);
322     for (int m = 0; m < ncreate; m++) {
323       auto cand = h_cells2cands(i) + m;
324       if (!h_keep(cand)) continue;
325       auto inew = h_cands2new(cand) + nlocal_before;
326       if (nfix_update_custom)
327         modify->update_custom(inew,temp_thermal,temp_rot,temp_vib,vstream);
328     }
329   }
330 
331   delete random;
332 }
333 
334