1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 #include "stdio.h"
16 #include "string.h"
17 #include "stdlib.h"
18 #include "ctype.h"
19 #include "kokkos.h"
20 #include "sparta.h"
21 #include "error.h"
22 #include "memory_kokkos.h"
23
24 using namespace SPARTA_NS;
25
26 /* ---------------------------------------------------------------------- */
27
KokkosSPARTA(SPARTA * sparta,int narg,char ** arg)28 KokkosSPARTA::KokkosSPARTA(SPARTA *sparta, int narg, char **arg) : Pointers(sparta)
29 {
30 kokkos_exists = 1;
31 sparta->kokkos = this;
32
33 delete memory;
34 memory = new MemoryKokkos(sparta);
35 memoryKK = (MemoryKokkos*) memory;
36
37 int me = 0;
38 MPI_Comm_rank(world,&me);
39 if (me == 0) error->message(FLERR,"KOKKOS mode is enabled");
40
41 // process any command-line args that invoke Kokkos settings
42
43 ngpus = 0;
44 int device = 0;
45 nthreads = 1;
46 numa = 1;
47
48 int iarg = 0;
49 while (iarg < narg) {
50 if (strcmp(arg[iarg],"d") == 0 || strcmp(arg[iarg],"device") == 0) {
51 if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
52 device = atoi(arg[iarg+1]);
53 iarg += 2;
54
55 } else if (strcmp(arg[iarg],"g") == 0 ||
56 strcmp(arg[iarg],"gpus") == 0) {
57 #ifndef SPARTA_KOKKOS_GPU
58 error->all(FLERR,"GPUs are requested but Kokkos has not been compiled for CUDA or HIP");
59 #endif
60 if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
61 ngpus = atoi(arg[iarg+1]);
62
63 int skip_gpu = 9999;
64 if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
65 skip_gpu = atoi(arg[iarg+2]);
66 iarg++;
67 }
68 iarg += 2;
69
70 int set_flag = 0;
71 char *str;
72 if ((str = getenv("SLURM_LOCALID"))) {
73 int local_rank = atoi(str);
74 device = local_rank % ngpus;
75 if (device >= skip_gpu) device++;
76 set_flag = 1;
77 }
78 if ((str = getenv("MPT_LRANK"))) {
79 int local_rank = atoi(str);
80 device = local_rank % ngpus;
81 if (device >= skip_gpu) device++;
82 set_flag = 1;
83 }
84 if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
85 int local_rank = atoi(str);
86 device = local_rank % ngpus;
87 if (device >= skip_gpu) device++;
88 set_flag = 1;
89 }
90 if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
91 int local_rank = atoi(str);
92 device = local_rank % ngpus;
93 if (device >= skip_gpu) device++;
94 set_flag = 1;
95 }
96
97 if (ngpus > 1 && !set_flag)
98 error->all(FLERR,"Could not determine local MPI rank for multiple "
99 "GPUs with Kokkos CUDA or HIP because MPI library not recognized");
100
101 } else if (strcmp(arg[iarg],"t") == 0 ||
102 strcmp(arg[iarg],"threads") == 0) {
103 nthreads = atoi(arg[iarg+1]);
104 iarg += 2;
105
106 } else if (strcmp(arg[iarg],"n") == 0 ||
107 strcmp(arg[iarg],"numa") == 0) {
108 numa = atoi(arg[iarg+1]);
109 iarg += 2;
110
111 } else error->all(FLERR,"Invalid Kokkos command-line args");
112 }
113
114 // initialize Kokkos
115
116 if (me == 0) {
117 if (screen) fprintf(screen," using %d GPU(s) per MPI task\n",ngpus);
118 if (logfile) fprintf(logfile," using %d GPU(s) per MPI task\n",ngpus);
119
120 if (screen) fprintf(screen," using %d thread(s) per MPI task\n",nthreads);
121 if (logfile) fprintf(logfile," using %d thread(s) per MPI task\n",nthreads);
122 }
123
124 #ifdef KOKKOS_ENABLE_CUDA
125 if (ngpus <= 0)
126 error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested");
127 #endif
128
129 #ifndef KOKKOS_ENABLE_SERIAL
130 if (nthreads == 1 && me == 0)
131 error->warning(FLERR,"When using a single thread, the Kokkos Serial backend "
132 "(i.e. Makefile.kokkos_mpi_only) gives better performance "
133 "than the OpenMP backend");
134 #endif
135
136 Kokkos::InitArguments args;
137 args.num_threads = nthreads;
138 args.num_numa = numa;
139 args.device_id = device;
140
141 Kokkos::initialize(args);
142
143 // default settings for package kokkos command
144
145 comm_classic = 0;
146 atomic_reduction = 0;
147 prewrap = 1;
148 auto_sync = 1;
149 gpu_direct_flag = 1;
150
151 need_atomics = 1;
152 if (nthreads == 1 && ngpus == 0)
153 need_atomics = 0;
154
155 collide_retry_flag = 0;
156 collide_extra = 1.1;
157
158 //if (need_atomics == 0) // prevent unnecessary parallel_reduce
159 // atomic_reduction = 1;
160 }
161
162 /* ---------------------------------------------------------------------- */
163
~KokkosSPARTA()164 KokkosSPARTA::~KokkosSPARTA()
165 {
166 // finalize Kokkos
167
168 Kokkos::finalize();
169 }
170
171 /* ----------------------------------------------------------------------
172 invoked by package kokkos command
173 ------------------------------------------------------------------------- */
174
accelerator(int narg,char ** arg)175 void KokkosSPARTA::accelerator(int narg, char **arg)
176 {
177 // defaults
178
179 comm_classic = 0;
180
181 int iarg = 0;
182 while (iarg < narg) {
183 if (strcmp(arg[iarg],"comm") == 0) {
184 if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
185 if (strcmp(arg[iarg+1],"classic") == 0) {
186 comm_classic = 1;
187 } else if (strcmp(arg[iarg+1],"threaded") == 0) {
188 comm_classic = 0;
189 } else error->all(FLERR,"Illegal package kokkos command");
190 iarg += 2;
191 } else if (strcmp(arg[iarg],"reduction") == 0) {
192 if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
193 if (strcmp(arg[iarg+1],"atomic") == 0) {
194 atomic_reduction = 1;
195 } else if (strcmp(arg[iarg+1],"parallel/reduce") == 0) {
196 atomic_reduction = 0;
197 } else error->all(FLERR,"Illegal package kokkos command");
198 iarg += 2;
199 } else if (strcmp(arg[iarg],"collide/retry") == 0) {
200 if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
201 if (strcmp(arg[iarg+1],"yes") == 0) {
202 collide_retry_flag = 1;
203 } else if (strcmp(arg[iarg+1],"no") == 0) {
204 collide_retry_flag = 0;
205 } else error->all(FLERR,"Illegal package kokkos command");
206 iarg += 2;
207 } else if (strcmp(arg[iarg],"collide/extra") == 0) {
208 if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
209 collide_extra = atof(arg[iarg+1]);
210 iarg += 2;
211 } else if (strcmp(arg[iarg],"gpu/direct") == 0) {
212 if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
213 if (strcmp(arg[iarg+1],"yes") == 0) {
214 gpu_direct_flag = 1;
215 } else if (strcmp(arg[iarg+1],"no") == 0) {
216 gpu_direct_flag = 0;
217 } else error->all(FLERR,"Illegal package kokkos command");
218 iarg += 2;
219 } else error->all(FLERR,"Illegal package kokkos command");
220 }
221 }
222