1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 #include "math.h"
16 #include "stdlib.h"
17 #include "string.h"
18 #include "compute_thermal_grid_kokkos.h"
19 #include "particle_kokkos.h"
20 #include "mixture.h"
21 #include "grid_kokkos.h"
22 #include "update.h"
23 #include "modify.h"
24 #include "comm.h"
25 #include "memory_kokkos.h"
26 #include "error.h"
27 #include "sparta_masks.h"
28 #include "kokkos.h"
29
30 using namespace SPARTA_NS;
31
32 // user keywords
33
34 enum{TEMP,PRESS};
35
36
37
38
39 /* ---------------------------------------------------------------------- */
40
ComputeThermalGridKokkos(SPARTA * sparta,int narg,char ** arg)41 ComputeThermalGridKokkos::ComputeThermalGridKokkos(SPARTA *sparta, int narg, char **arg) :
42 ComputeThermalGrid(sparta, narg, arg)
43 {
44 kokkos_flag = 1;
45 }
46
47 /* ---------------------------------------------------------------------- */
48
~ComputeThermalGridKokkos()49 ComputeThermalGridKokkos::~ComputeThermalGridKokkos()
50 {
51 if (copymode) return;
52
53 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
54 memoryKK->destroy_kokkos(k_tally,tally);
55 vector_grid = NULL;
56 tally = NULL;
57 }
58
59 /* ---------------------------------------------------------------------- */
60
compute_per_grid()61 void ComputeThermalGridKokkos::compute_per_grid()
62 {
63 if (sparta->kokkos->prewrap) {
64 ComputeThermalGrid::compute_per_grid();
65 } else {
66 compute_per_grid_kokkos();
67 k_tally.modify_device();
68 k_tally.sync_host();
69 }
70 }
71
72 /* ---------------------------------------------------------------------- */
73
compute_per_grid_kokkos()74 void ComputeThermalGridKokkos::compute_per_grid_kokkos()
75 {
76 invoked_per_grid = update->ntimestep;
77
78 ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
79 particle_kk->sync(Device,PARTICLE_MASK|SPECIES_MASK);
80 d_particles = particle_kk->k_particles.d_view;
81 d_species = particle_kk->k_species.d_view;
82
83 GridKokkos* grid_kk = (GridKokkos*) grid;
84 d_cellcount = grid_kk->d_cellcount;
85 d_plist = grid_kk->d_plist;
86 grid_kk->sync(Device,CINFO_MASK);
87 d_cinfo = grid_kk->k_cinfo.d_view;
88
89 d_s2g = particle_kk->k_species2group.d_view;
90 int nlocal = particle->nlocal;
91
92 // zero all accumulators
93
94 Kokkos::deep_copy(d_tally,0.0);
95
96 // loop over all particles, skip species not in mixture group
97
98 need_dup = sparta->kokkos->need_dup<DeviceType>();
99 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
100 need_dup = 0;
101
102 if (need_dup)
103 dup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_tally);
104 else
105 ndup_tally = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_tally);
106
107 copymode = 1;
108 if (particle_kk->sorted_kk && sparta->kokkos->need_atomics && !sparta->kokkos->atomic_reduction)
109 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_compute_per_grid>(0,nglocal),*this);
110 else {
111 if (sparta->kokkos->need_atomics)
112 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_compute_per_grid_atomic<1> >(0,nlocal),*this);
113 else
114 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_compute_per_grid_atomic<0> >(0,nlocal),*this);
115 }
116 DeviceType().fence();
117 copymode = 0;
118
119 if (need_dup) {
120 Kokkos::Experimental::contribute(d_tally, dup_tally);
121 dup_tally = decltype(dup_tally)(); // free duplicated memory
122 }
123
124 d_particles = t_particle_1d(); // destroy reference to reduce memory use
125 d_plist = DAT::t_int_2d(); // destroy reference to reduce memory use
126 }
127
128 /* ---------------------------------------------------------------------- */
129
130 template<int NEED_ATOMICS>
131 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeThermalGrid_compute_per_grid_atomic<NEED_ATOMICS>,const int & i) const132 void ComputeThermalGridKokkos::operator()(TagComputeThermalGrid_compute_per_grid_atomic<NEED_ATOMICS>, const int &i) const {
133
134 // The tally array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
135
136 auto v_tally = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_tally),decltype(ndup_tally)>::get(dup_tally,ndup_tally);
137 auto a_tally = v_tally.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
138
139 const int ispecies = d_particles[i].ispecies;
140 const int igroup = d_s2g(imix,ispecies);
141 if (igroup < 0) return;
142
143 const int icell = d_particles[i].icell;
144 if (!(d_cinfo[icell].mask & groupbit)) return;
145
146 const double mass = d_species[ispecies].mass;
147 double *v = d_particles[i].v;
148
149 // 6 tallies per particle: N, Mass, mVx, mVy, mVz, mV^2
150
151 int k = igroup*npergroup;
152
153 a_tally(icell,k++) += 1.0;
154 a_tally(icell,k++) += mass;
155 a_tally(icell,k++) += mass*v[0];
156 a_tally(icell,k++) += mass*v[1];
157 a_tally(icell,k++) += mass*v[2];
158 a_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
159 }
160
161 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeThermalGrid_compute_per_grid,const int & icell) const162 void ComputeThermalGridKokkos::operator()(TagComputeThermalGrid_compute_per_grid, const int &icell) const {
163 if (!(d_cinfo[icell].mask & groupbit)) return;
164
165 const int np = d_cellcount[icell];
166
167 for (int n = 0; n < np; n++) {
168
169 const int i = d_plist(icell,n);
170
171 const int ispecies = d_particles[i].ispecies;
172 const int igroup = d_s2g(imix,ispecies);
173 if (igroup < 0) return;
174
175 const int icell = d_particles[i].icell;
176
177 const double mass = d_species[ispecies].mass;
178 double *v = d_particles[i].v;
179
180 // 6 tallies per particle: N, Mass, mVx, mVy, mVz, mV^2
181
182
183
184 int k = igroup*npergroup;
185
186 d_tally(icell,k++) += 1.0;
187 d_tally(icell,k++) += mass;
188 d_tally(icell,k++) += mass*v[0];
189 d_tally(icell,k++) += mass*v[1];
190 d_tally(icell,k++) += mass*v[2];
191 d_tally(icell,k++) += mass * (v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
192 }
193 }
194
195 /* ----------------------------------------------------------------------
196 query info about internal tally array for this compute
197 index = which column of output (0 for vec, 1 to N for array)
198 return # of tally quantities for this index
199 also return array = ptr to tally array
200 also return cols = ptr to list of columns in tally for this index
201 ------------------------------------------------------------------------- */
202
query_tally_grid_kokkos(DAT::t_float_2d_lr & d_array)203 int ComputeThermalGridKokkos::query_tally_grid_kokkos(DAT::t_float_2d_lr &d_array)
204 {
205 d_array = d_tally;
206 return 0;
207 }
208
209 /* ----------------------------------------------------------------------
210 tally accumulated info to compute final normalized values
211 index = which column of output (0 for vec, 1 to N for array)
212 for etally = NULL:
213 use internal tallied info for single timestep, set nsample = 1
214 compute values for all grid cells
215 store results in vector_grid with nstride = 1 (single col of array_grid)
216 for etaylly = ptr to caller array:
217 use external tallied info for many timesteps
218 nsample = additional normalization factor used by some values
219 emap = list of etally columns to use, # of columns determined by index
220 store results in caller's vec, spaced by nstride
221 if norm = 0.0, set result to 0.0 directly so do not divide by 0.0
222 ------------------------------------------------------------------------- */
223
224 void ComputeThermalGridKokkos::
post_process_grid_kokkos(int index,int nsample,DAT::t_float_2d_lr d_etally,int * emap,DAT::t_float_1d_strided d_vec)225 post_process_grid_kokkos(int index, int nsample,
226 DAT::t_float_2d_lr d_etally, int *emap, DAT::t_float_1d_strided d_vec)
227 {
228 index--;
229 int ivalue = index % nvalue;
230
231 int lo = 0;
232 int hi = nglocal;
233 //k = 0;
234
235 if (!d_etally.data()) {
236 nsample = 1;
237 d_etally = d_tally;
238 emap = map[index];
239 d_vec = d_vector;
240 nstride = 1;
241 }
242
243 this->d_etally = d_etally;
244 this->d_vec = d_vec;
245 this->nstride = nstride;
246 this->nsample = nsample;
247
248 // compute normalized final value for each grid cell
249 // Vcm = Sum mv / Sum m
250 // total KE = 0.5 * Sum m(v - Vcm)^2
251 // KE = 0.5 * Sum(i=xyz) [(Sum mVi^2) - M Vcmx^2]
252 // KE = 0.5 * Sum(i=xyz) [(Sum mVi^2) - (Sum mVx)^2 / M]
253 // KE = 0.5 * Sum (mv^2) - [(Sum mVx)^2 + (Sum mVy)^2 + (Sum mVz)^2] / M
254 // thermal temp = (2/(3NkB)) * KE
255 // press = (2/(3V)) * KE
256
257 tflag = 1;
258 if (value[ivalue] == PRESS) tflag = 0;
259
260 if (value[ivalue] == TEMP) prefactor = tprefactor;
261 else if (value[ivalue] == PRESS) prefactor = pprefactor;
262
263 GridKokkos* grid_kk = (GridKokkos*) grid;
264 grid_kk->sync(Device,CINFO_MASK);
265 d_cinfo = grid_kk->k_cinfo.d_view;
266
267 n = emap[0];
268
269 copymode = 1;
270 Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeThermalGrid_post_process_grid>(lo,hi),*this);
271 DeviceType().fence();
272 copymode = 0;
273 }
274
275 /* ---------------------------------------------------------------------- */
276
277 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeThermalGrid_post_process_grid,const int & icell) const278 void ComputeThermalGridKokkos::operator()(TagComputeThermalGrid_post_process_grid, const int &icell) const {
279 const double ncount = d_etally(icell,n);
280 if (ncount == 0.0) d_vec[icell] = 0.0;
281 else {
282 const double mass = d_etally(icell,n+1);
283 const double mvx = d_etally(icell,n+2);
284 const double mvy = d_etally(icell,n+3);
285 const double mvz = d_etally(icell,n+4);
286 const double mvsq = d_etally(icell,n+5);
287 d_vec[icell] = mvsq - (mvx*mvx + mvy*mvy + mvz*mvz)/mass;
288 d_vec[icell] *= prefactor;
289 if (tflag) d_vec[icell] /= ncount;
290 else d_vec[icell] *= d_cinfo[icell].weight / d_cinfo[icell].volume / nsample;
291 }
292 }
293
294 /* ----------------------------------------------------------------------
295 reallocate arrays if nglocal has changed
296 called by init() and whenever grid changes
297 ------------------------------------------------------------------------- */
298
reallocate()299 void ComputeThermalGridKokkos::reallocate()
300 {
301 if (grid->nlocal == nglocal) return;
302
303 memoryKK->destroy_kokkos(k_vector_grid,vector_grid);
304 memoryKK->destroy_kokkos(k_tally,tally);
305 nglocal = grid->nlocal;
306 memoryKK->create_kokkos(k_vector_grid,vector_grid,nglocal,"thermal/grid:vector_grid");
307 d_vector = k_vector_grid.d_view;
308 memoryKK->create_kokkos(k_tally,tally,nglocal,ntotal,"thermal/grid:tally");
309 d_tally = k_tally.d_view;
310 }
311