1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Alan Stagg (Sandia)
17 ------------------------------------------------------------------------- */
18 
19 #include "math.h"
20 #include "stdlib.h"
21 #include "string.h"
22 #include "compute_tvib_grid_kokkos.h"
23 #include "particle_kokkos.h"
24 #include "mixture.h"
25 #include "grid_kokkos.h"
26 #include "update.h"
27 #include "modify.h"
28 #include "comm.h"
29 #include "memory_kokkos.h"
30 #include "error.h"
31 #include "sparta_masks.h"
32 #include "kokkos.h"
33 
34 using namespace SPARTA_NS;
35 
36 // user keywords
37 
38 enum{NONE,COUNT,MASSWT,DOF};
39 
40 /* ---------------------------------------------------------------------- */
41 
ComputeTvibGridKokkos(SPARTA * sparta,int narg,char ** arg)42 ComputeTvibGridKokkos::ComputeTvibGridKokkos(SPARTA *sparta, int narg, char **arg) :
43   ComputeTvibGrid(sparta, narg, arg)
44 {
45   kokkos_flag = 1;
46 
47   if (modeflag == 0) {
48     k_s2t = DAT::tdual_int_1d("compute/tvib/grid:s2t",nspecies);
49     k_t2s = DAT::tdual_int_1d("compute/tvib/grid:t2s",ntally);
50 
51     for (int n = 0; n < nspecies; n++)
52       k_s2t.h_view(n) = s2t[n];
53 
54     for (int n = 0; n < ntally; n++)
55       k_t2s.h_view(n) = t2s[n];
56 
57     k_s2t.modify_host();
58     k_t2s.modify_host();
59 
60     k_s2t.sync_device();
61     k_t2s.sync_device();
62 
63     d_s2t = k_s2t.d_view;
64     d_t2s = k_t2s.d_view;
65   } else {
66     k_t2s_mode = DAT::tdual_int_1d("compute/tvib/grid:t2s_mode",ntally);
67     k_s2t_mode = DAT::tdual_int_2d("compute/tvib/grids2t_mode",nspecies,maxmode);
68 
69     for (int n = 0; n < nspecies; n++)
70       for (int m = 0; m < maxmode; m++)
71         k_s2t_mode.h_view(n,m) = s2t_mode[n][m];
72 
73     for (int n = 0; n < ntally; n++)
74       k_t2s_mode.h_view(n) = t2s_mode[n];
75 
76     k_s2t_mode.modify_host();
77     k_t2s_mode.modify_host();
78 
79     k_s2t_mode.sync_device();
80     k_t2s_mode.sync_device();
81 
82     d_s2t_mode = k_s2t_mode.d_view;
83     d_t2s_mode = k_t2s_mode.d_view;
84   }
85 
86   d_tspecies = DAT::t_float_1d("d_tspecies",nspecies);
87   d_tspecies_mode = DAT::t_float_2d_lr("d_tspecies_mode",nspecies,maxmode);
88 
89   boltz = update->boltz;
90 }
91 
92 /* ---------------------------------------------------------------------- */
93 
~ComputeTvibGridKokkos()94 ComputeTvibGridKokkos::~ComputeTvibGridKokkos()
95 {
96   if (copymode) return;
97 
98   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
99   memoryKK->destroy_kokkos(k_tally,tally);
100   vector_grid = NULL;
101   tally = NULL;
102 }
103 
104 /* ---------------------------------------------------------------------- */
105 
compute_per_grid()106 void ComputeTvibGridKokkos::compute_per_grid()
107 {
108   if (sparta->kokkos->prewrap) {
109     ComputeTvibGrid::compute_per_grid();
110   } else {
111     compute_per_grid_kokkos();
112     k_tally.modify_device();
113     k_tally.sync_host();
114   }
115 }
116 
117 /* ---------------------------------------------------------------------- */
118 
compute_per_grid_kokkos()119 void ComputeTvibGridKokkos::compute_per_grid_kokkos()
120 {
121   invoked_per_grid = update->ntimestep;
122 
123   ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
124   particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK|CUSTOM_MASK);
125   d_particles = particle_kk->k_particles.d_view;
126   d_species = particle_kk->k_species.d_view;
127   d_ewhich = particle_kk->k_ewhich.d_view;
128   k_eiarray = particle_kk->k_eiarray;
129 
130   GridKokkos* grid_kk = (GridKokkos*) grid;
131   d_cellcount = grid_kk->d_cellcount;
132   d_plist = grid_kk->d_plist;
133   grid_kk->sync(Device,CINFO_MASK);
134   d_cinfo = grid_kk->k_cinfo.d_view;
135 
136   d_s2g = particle_kk->k_species2group.view<DeviceType>();
137   int nlocal = particle->nlocal;
138 
139   // zero all accumulators
140 
141   Kokkos::deep_copy(d_tally,0.0);
142 
143   // loop over all particles, skip species not in mixture group
144   // mode = 0: tally vib eng and count for each species
145   // mode >= 1: tally vib level and count for each species and each vib mode
146 
147   need_dup = sparta->kokkos->need_dup<DeviceType>();
148   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
149     need_dup = 0;
150 
151   if (need_dup)
152     dup_tally = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_tally);
153   else
154     ndup_tally = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
155 
156   copymode = 1;
157   if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
158     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeTvibGrid_compute_per_grid>(0,nglocal),*this);
159   else {
160     if (sparta->kokkos->need_atomics)
161       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeTvibGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
162     else
163       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeTvibGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
164   }
165   copymode = 0;
166 
167   if (need_dup) {
168     Kokkos::Experimental::contribute(d_tally, dup_tally);
169     dup_tally = decltype(dup_tally)(); // free duplicated memory
170   }
171 
172   d_particles = t_particle_1d(); // destroy reference to reduce memory use
173   d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
174 }
175 
176 /* ---------------------------------------------------------------------- */
177 
178 template<int NEED_ATOMICS>
179 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeTvibGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const180 void ComputeTvibGridKokkos::operator()(TagComputeTvibGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
181   // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
182 
183   auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
184   auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
185 
186   const int ispecies = d_particles[i].ispecies;
187   if (!d_species[ispecies].vibdof) return;
188   const int igroup = d_s2g(imix,ispecies);
189   if (igroup < 0) return;
190   const int icell = d_particles[i].icell;
191   if (!(d_cinfo[icell].mask & groupbit)) return;
192 
193   if (modeflag == 0) {
194     const int j = d_s2t[ispecies];
195     a_tally(icell,j) += d_particles[i].evib;
196     a_tally(icell,j+1) += 1.0;
197   } else if (modeflag >= 1) {
198     auto &d_vibmode = k_eiarray.d_view[d_ewhich[index_vibmode]].k_view.d_view;
199 
200     // tally only the modes this species has
201 
202     const int nmode = d_species[ispecies].nvibmode;
203     for (int imode = 0; imode < nmode; imode++) {
204       const int j = d_s2t_mode(ispecies,imode);
205       if (nmode > 1) a_tally(icell,j) += d_vibmode(i,imode);
206       else a_tally(icell,j) +=
207              d_particles[i].evib / (boltz*d_species[ispecies].vibtemp[0]);
208       a_tally(icell,j+1) += 1.0;
209     }
210   }
211 }
212 
213 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeTvibGrid_compute_per_grid,const int & icell) const214 void ComputeTvibGridKokkos::operator()(TagComputeTvibGrid_compute_per_grid, const int &icell) const {
215   if (!(d_cinfo[icell].mask & groupbit)) return;
216 
217   const int np = d_cellcount[icell];
218 
219   for (int n = 0; n < np; n++) {
220     const int i = d_plist(icell,n);
221     const int ispecies = d_particles[i].ispecies;
222     if (!d_species[ispecies].vibdof) continue;
223     const int igroup = d_s2g(imix,ispecies);
224     if (igroup < 0) continue;
225 
226     if (modeflag == 0) {
227       const int j = d_s2t[ispecies];
228       d_tally(icell,j) += d_particles[i].evib;
229       d_tally(icell,j+1) += 1.0;
230     } else if (modeflag >= 1) {
231       auto &d_vibmode = k_eiarray.d_view[d_ewhich[index_vibmode]].k_view.d_view;
232 
233       // tally only the modes this species has
234 
235       const int nmode = d_species[ispecies].nvibmode;
236       for (int imode = 0; imode < nmode; imode++) {
237         const int j = d_s2t_mode(ispecies,imode);
238         if (nmode > 1) d_tally(icell,j) += d_vibmode(i,imode);
239         else d_tally(icell,j) +=
240                d_particles[i].evib / (boltz*d_species[ispecies].vibtemp[0]);
241         d_tally(icell,j+1) += 1.0;
242       }
243     }
244   }
245 }
246 
247 /* ----------------------------------------------------------------------
248    query info about internal tally array for this compute
249    index = which column of output (0 for vec, 1 to N for array)
250    return # of tally quantities for this index
251    also return array = ptr to tally array
252    also return cols = ptr to list of columns in tally for this index
253 ------------------------------------------------------------------------- */
254 
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)255 int ComputeTvibGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
256 {
257   d_array = d_tally;
258   return 0;
259 }
260 
261 /* ----------------------------------------------------------------------
262    tally accumulated info to compute final normalized values
263    index = which column of output (0 for vec, 1 to N for array)
264    for etally = NULL:
265      use internal tallied info for single timestep, set nsample = 1
266      if onecell = -1, compute values for all grid cells
267        store results in vector_grid with nstride = 1 (single col of array_grid)
268      if onecell >= 0, compute single value for onecell and return it
269    for etaylly = ptr to caller array:
270      use external tallied info for many timesteps
271      nsample = additional normalization factor used by some values
272      emap = list of etally columns to use, # of columns determined by index
273      store results in caller's vec, spaced by nstride
274    if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
275 ------------------------------------------------------------------------- */
276 
277 void ComputeTvibGridKokkos::
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)278 post_process_grid_kokkos(int index, int nsample,
279                          DAT::t_float_2d_lr d_etally, int *emap,
280                          DAT::t_float_1d_strided d_vec)
281 {
282   index--;
283 
284   int lo = 0;
285   int hi = nglocal;
286 
287   if (!d_etally.data()) {
288     nsample = 1;
289     d_etally = d_tally;
290     emap = map[index];
291     d_vec = d_vector;
292     nstride = 1;
293   }
294 
295   this->d_etally = d_etally;
296   this->d_vec = d_vec;
297   this->nstride = nstride;
298 
299   // compute normalized single Tgroup value for each grid cell
300   // compute Ibar and Tspecies for each species in the requested group
301   // ditto for individual vibrational modes if modeflag = 1 or 2
302   // loop over species/modes in group to compute normalized Tgroup
303 
304   GridKokkos* grid_kk = (GridKokkos*) grid;
305   grid_kk->sync(Device,CINFO_MASK);
306   d_cinfo = grid_kk->k_cinfo.d_view;
307 
308   if (modeflag == 0) {
309     nsp = nmap[index] / 2;
310     evib = emap[0];
311     count = emap[1];
312   } else if (modeflag == 1) {
313     nsp = nmap[index] / maxmode / 2;
314     evib = emap[0];
315     count = emap[1];
316   } else if (modeflag == 2) {
317     nsp = nmap[index] / maxmode / 2;
318     imode = index % maxmode;
319     evib = emap[2*imode];
320     count = emap[2*imode+1];
321   }
322 
323   copymode = 1;
324   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeTvibGrid_post_process_grid>(lo,hi),*this);
325   copymode = 0;
326 }
327 
328 /* ---------------------------------------------------------------------- */
329 
330 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeTvibGrid_post_process_grid,const int & icell) const331 void ComputeTvibGridKokkos::operator()(TagComputeTvibGrid_post_process_grid, const int &icell) const {
332   int evb = evib;
333   int cnt = evib+1;
334 
335   // modeflag = 0, no vib modes exist
336   // nsp = # of species in the group
337   // inputs: 2*nsp tallies
338   // output: Tgroup = weighted sum over all Tsp for species in group
339 
340   if (modeflag == 0) {
341 
342     for (int isp = 0; isp < nsp; ++isp) {
343       const int ispecies = d_t2s[evb];
344       const double theta = d_species[ispecies].vibtemp[0];
345       if (theta == 0.0 || d_etally(icell,cnt) == 0.0) {
346         d_tspecies[isp] = 0.0;
347         evb += 2;
348         cnt = evb+1;
349         continue;
350       }
351       const double ibar = d_etally(icell,evb) / (d_etally(icell,cnt) * boltz * theta);
352       if (ibar == 0.0) {
353         d_tspecies[isp] = 0.0;
354         evb += 2;
355         cnt = evb+1;
356         continue;
357       }
358       d_tspecies[isp] = theta / (log(1.0 + 1.0/ibar));
359       //denom = boltz * etally[icell][count] * ibar * log(1.0 + 1.0/ibar);
360       //tspecies[isp] = etally[icell][evb] / denom;
361       evb += 2;
362       cnt = evb+1;
363     }
364 
365     double numer = 0.0;
366     double denom = 0.0;
367     cnt = count;
368     for (int isp = 0; isp < nsp; isp++) {
369       numer += d_tspecies[isp]*d_etally(icell,cnt);
370       denom += d_etally(icell,cnt);
371       cnt += 2;
372     }
373 
374     if (denom == 0.0) d_vec[icell] = 0.0;
375     else d_vec[icell] = numer/denom;
376 
377   // modeflag = 1, vib modes exist
378   // Tgroup = weighted sum over all Tsp and modes for species in group
379   // nsp = # of species in the group
380   // maxmode = max # of modes for any species (unused values are zero)
381   // inputs: 2*nsp*maxmode tallies
382 
383   } else if (modeflag == 1) {
384     auto &d_vibmode = k_eiarray.d_view[d_ewhich[index_vibmode]].k_view.d_view;
385 
386     for (int isp = 0; isp < nsp; isp++) {
387       const int ispecies = d_t2s_mode[evb-evib];
388       for (int imode = 0; imode < maxmode; imode++) {
389         const double theta = d_species[ispecies].vibtemp[imode];
390         if (theta == 0.0 || d_etally(icell,cnt) == 0.0) {
391           d_tspecies_mode(isp,imode) = 0.0;
392           evb += 2;
393           cnt = evb+1;
394           continue;
395         }
396         const double ibar = d_etally(icell,evb) / d_etally(icell,cnt);
397         if (ibar == 0.0) {
398           d_tspecies_mode(isp,imode) = 0.0;
399           evb += 2;
400           cnt = evb+1;
401           continue;
402         }
403         d_tspecies_mode(isp,imode) = theta / (log(1.0 + 1.0/ibar));
404         //denom = boltz * etally[icell][count] * ibar * log(1.0 + 1.0/ibar);
405         //tspecies_mode[isp][imode] = etally[icell][evib] / denom;
406         evb += 2;
407         cnt = evb+1;
408       }
409     }
410 
411     // loop over species in group and all their modes
412     // to accumulate numerator & denominator
413 
414     double numer = 0.0;
415     double denom = 0.0;
416     cnt = count;
417     for (int isp = 0; isp < nsp; isp++) {
418       for (int imode = 0; imode < maxmode; imode++) {
419         numer += d_tspecies_mode(isp,imode)*d_etally(icell,cnt);
420         denom += d_etally(icell,cnt);
421         cnt += 2;
422       }
423     }
424     if (denom == 0.0) d_vec[icell] = 0.0;
425     else d_vec[icell] = numer/denom;
426 
427   // modeflag = 2, vib modes exist
428   // Tgroup = weighted sum over all Tsp and single mode for species in group
429   // nsp = # of species in the group
430   // imode = single mode correpsonding to caller index
431   // inputs: 2*nsp tallies strided by maxmode
432 
433   } else if (modeflag == 2) {
434     auto &d_vibmode = k_eiarray.d_view[d_ewhich[index_vibmode]].k_view.d_view;
435 
436     for (int isp = 0; isp < nsp; isp++) {
437       const int ispecies = d_t2s_mode[evb-evib];
438       const double theta = d_species[ispecies].vibtemp[imode];
439       if (theta == 0.0 || d_etally(icell,cnt) == 0.0) {
440         d_tspecies_mode(isp,imode) = 0.0;
441         evb += 2*maxmode;
442         cnt = evb+1;
443         continue;
444       }
445       const double ibar = d_etally(icell,evb) / d_etally(icell,cnt);
446       if (ibar == 0.0) {
447         d_tspecies_mode(isp,imode) = 0.0;
448         evb += 2*maxmode;
449         cnt = evb+1;
450         continue;
451       }
452       d_tspecies_mode(isp,imode) = theta / (log(1.0 + 1.0/ibar));
453       //denom = boltz * etally[icell][count] * ibar * log(1.0 + 1.0/ibar);
454       //tspecies_mode[isp][imode] = etally[icell][evib] / denom;
455       evb += 2*maxmode;
456       cnt = evb+1;
457     }
458 
459     // loop over species in group and single mode for each species
460     // to accumulate numerator & denominator
461 
462     double numer = 0.0;
463     double denom = 0.0;
464     cnt = count;
465     for (int isp = 0; isp < nsp; isp++) {
466       numer += d_tspecies_mode(isp,imode)*d_etally(icell,cnt);
467       denom += d_etally(icell,cnt);
468       cnt += 2*maxmode;
469     }
470 
471     if (denom == 0.0) d_vec[icell] = 0.0;
472     else d_vec[icell] = numer/denom;
473   }
474 
475 }
476 
477 /* ----------------------------------------------------------------------
478    reallocate arrays if nglocal has changed
479    called by init() and whenever grid changes
480 ------------------------------------------------------------------------- */
481 
reallocate()482 void ComputeTvibGridKokkos::reallocate()
483 {
484   if (grid->nlocal == nglocal) return;
485 
486   memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
487   memoryKK->destroy_kokkos(k_tally,tally);
488   nglocal = grid->nlocal;
489   memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"tvib/grid:vector_grid");
490   d_vector = k_vector_grid.d_view;
491   memoryKK->create_kokkos(k_tally,tally,nglocal,ntally,"tvib/grid:tally");
492   d_tally = k_tally.d_view;
493 }
494