1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Alan Stagg (Sandia)
17 ------------------------------------------------------------------------- */
18 
19 #include "mpi.h"
20 #include "string.h"
21 #include "compute_count_kokkos.h"
22 #include "update.h"
23 #include "particle.h"
24 #include "mixture.h"
25 #include "comm.h"
26 #include "error.h"
27 #include "memory_kokkos.h"
28 #include "particle_kokkos.h"
29 #include "sparta_masks.h"
30 #include "kokkos.h"
31 
32 using namespace SPARTA_NS;
33 
34 enum{SPECIES,MIXTURE};
35 
36 
37 /* ---------------------------------------------------------------------- */
38 
ComputeCountKokkos(SPARTA * sparta,int narg,char ** arg)39 ComputeCountKokkos::ComputeCountKokkos(SPARTA *sparta, int narg, char **arg) :
40   ComputeCount(sparta, narg, arg)
41 {
42   kokkos_flag = 1;
43 }
44 
45 /* ---------------------------------------------------------------------- */
46 
~ComputeCountKokkos()47 ComputeCountKokkos::~ComputeCountKokkos()
48 {
49   if (copymode) return;
50 
51   memoryKK->destroy_kokkos(k_count, count);
52   count = NULL;
53 }
54 
55 
56 /* ---------------------------------------------------------------------- */
57 
init()58 void ComputeCountKokkos::init()
59 {
60   // insure count vector is long enough for species count
61   if (maxspecies < particle->nspecies) {
62     maxspecies = particle->nspecies;
63     memoryKK->destroy_kokkos(k_count,count);
64     memoryKK->create_kokkos(k_count,count,maxspecies,"compute/count:count");
65     d_count = k_count.d_view;
66   }
67 
68   // check if the group count in any accessed mixtures has changed
69   int warn = 0;
70   int err = 0;
71   for (int i = 0; i < nvalues; i++) {
72     if (spmix[i] == SPECIES) continue;
73     int imix = index[i];
74     int igroup = indexgroup[i];
75     int ngroup = mixgroups[i];
76     if (ngroup != particle->mixture[imix]->ngroup) warn = 1;
77     if (igroup >= particle->mixture[imix]->ngroup) err = 1;
78   }
79 
80   if (err)
81     error->all(FLERR,
82                "Group in mixture used by compute count no longer valid");
83   if (warn && comm->me == 0)
84     error->warning(FLERR,
85                    "Group count in mixture used by compute count has changed");
86 }
87 
88 /* ---------------------------------------------------------------------- */
89 
compute_scalar()90 double ComputeCountKokkos::compute_scalar()
91 {
92   invoked_scalar = update->ntimestep;
93 
94   per_species_tally_kokkos();
95   k_count.modify_device();
96   k_count.sync_host();
97   for (int m=0; m<maxspecies; ++m)
98     count[m] = k_count.h_view(m);
99 
100   bigint one;
101   if (spmix[0] == SPECIES) one = count[index[0]];
102   else {
103     int nspecies = particle->mixture[index[0]]->groupsize[indexgroup[0]];
104     one = 0;
105     for (int m = 0; m < nspecies; m++) {
106       int isp = particle->mixture[index[0]]->groupspecies[indexgroup[0]][m];
107       one += count[isp];
108     }
109   }
110 
111   bigint sum;
112   MPI_Allreduce(&one,&sum,1,MPI_SPARTA_BIGINT,MPI_SUM,world);
113   scalar = sum;
114   return scalar;
115 }
116 
117 /* ---------------------------------------------------------------------- */
118 
compute_vector()119 void ComputeCountKokkos::compute_vector()
120 {
121   int i,m;
122   invoked_scalar = update->ntimestep;
123 
124   per_species_tally_kokkos();
125   k_count.modify_device();
126   k_count.sync_host();
127   for (int m=0; m<maxspecies; ++m)
128     count[m] = k_count.h_view(m);
129 
130   for (i = 0; i < nvalues; i++) {
131     onevec[i] = 0;
132     if (spmix[i] == SPECIES) onevec[i] = count[index[i]];
133     else {
134       int nspecies = particle->mixture[index[i]]->groupsize[indexgroup[i]];
135       for (m = 0; m < nspecies; m++) {
136         int isp = particle->mixture[index[i]]->groupspecies[indexgroup[i]][m];
137         onevec[i] += count[isp];
138       }
139     }
140   }
141 
142   MPI_Allreduce(onevec,sumvec,nvalues,MPI_SPARTA_BIGINT,MPI_SUM,world);
143   for (i = 0; i < nvalues; i++) vector[i] = sumvec[i];
144 }
145 
146 /* ---------------------------------------------------------------------- */
147 
per_species_tally_kokkos()148 void ComputeCountKokkos::per_species_tally_kokkos()
149 {
150   if (lasttally == update->ntimestep) return;
151   lasttally = update->ntimestep;
152 
153   Kokkos::deep_copy(d_count,0);
154 
155   ParticleKokkos* particle_kk = (ParticleKokkos*) particle;
156   particle_kk->sync(Device,PARTICLE_MASK);
157   d_particles = particle_kk->k_particles.d_view;
158 
159   need_dup = sparta->kokkos->need_dup<DeviceType>();
160   if (need_dup)
161     dup_count = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterDuplicated>(d_count);
162   else
163     ndup_count = Kokkos::Experimental::create_scatter_view<typename Kokkos::Experimental::ScatterSum, typename Kokkos::Experimental::ScatterNonDuplicated>(d_count);
164 
165   // loop over all particles
166   int nlocal = particle->nlocal;
167   copymode = 1;
168   if (sparta->kokkos->need_atomics)
169     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeCount_per_species_tally_atomic<1> >(0,nlocal),*this);
170   else
171     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagComputeCount_per_species_tally_atomic<0> >(0,nlocal),*this);
172   DeviceType().fence();
173   copymode = 0;
174 
175   if (need_dup) {
176     Kokkos::Experimental::contribute(d_count, dup_count);
177     dup_count = decltype(dup_count)(); // free duplicated memory
178   }
179 
180   d_particles = t_particle_1d(); // destroy reference to reduce memory use
181 }
182 
183 /* ---------------------------------------------------------------------- */
184 
185 template<int NEED_ATOMICS>
186 KOKKOS_INLINE_FUNCTION
operator ()(TagComputeCount_per_species_tally_atomic<NEED_ATOMICS>,const int & i) const187 void ComputeCountKokkos::operator()(TagComputeCount_per_species_tally_atomic<NEED_ATOMICS>, const int& i) const {
188 
189   // The tally (count) array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
190 
191   auto v_count = ScatterViewHelper<typename NeedDup<NEED_ATOMICS,DeviceType>::value,decltype(dup_count),decltype(ndup_count)>::get(dup_count,ndup_count);
192   auto a_count = v_count.template access<typename AtomicDup<NEED_ATOMICS,DeviceType>::value>();
193 
194   a_count[d_particles[i].ispecies] += 1;
195 }
196