1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "stdio.h"
16 #include "string.h"
17 #include "stdlib.h"
18 #include "ctype.h"
19 #include "kokkos.h"
20 #include "sparta.h"
21 #include "error.h"
22 #include "memory_kokkos.h"
23 
24 using namespace SPARTA_NS;
25 
26 /* ---------------------------------------------------------------------- */
27 
KokkosSPARTA(SPARTA * sparta,int narg,char ** arg)28 KokkosSPARTA::KokkosSPARTA(SPARTA *sparta, int narg, char **arg) : Pointers(sparta)
29 {
30   kokkos_exists = 1;
31   sparta->kokkos = this;
32 
33   delete memory;
34   memory = new MemoryKokkos(sparta);
35   memoryKK = (MemoryKokkos*) memory;
36 
37   int me = 0;
38   MPI_Comm_rank(world,&me);
39   if (me == 0) error->message(FLERR,"KOKKOS mode is enabled");
40 
41   // process any command-line args that invoke Kokkos settings
42 
43   ngpus = 0;
44   int device = 0;
45   nthreads = 1;
46   numa = 1;
47 
48   int iarg = 0;
49   while (iarg < narg) {
50     if (strcmp(arg[iarg],"d") == 0 || strcmp(arg[iarg],"device") == 0) {
51       if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
52       device = atoi(arg[iarg+1]);
53       iarg += 2;
54 
55     } else if (strcmp(arg[iarg],"g") == 0 ||
56                strcmp(arg[iarg],"gpus") == 0) {
57 #ifndef SPARTA_KOKKOS_GPU
58       error->all(FLERR,"GPUs are requested but Kokkos has not been compiled for CUDA or HIP");
59 #endif
60       if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
61       ngpus = atoi(arg[iarg+1]);
62 
63       int skip_gpu = 9999;
64       if (iarg+2 < narg && isdigit(arg[iarg+2][0])) {
65         skip_gpu = atoi(arg[iarg+2]);
66         iarg++;
67       }
68       iarg += 2;
69 
70       int set_flag = 0;
71       char *str;
72       if ((str = getenv("SLURM_LOCALID"))) {
73         int local_rank = atoi(str);
74         device = local_rank % ngpus;
75         if (device >= skip_gpu) device++;
76         set_flag = 1;
77       }
78       if ((str = getenv("MPT_LRANK"))) {
79         int local_rank = atoi(str);
80         device = local_rank % ngpus;
81         if (device >= skip_gpu) device++;
82         set_flag = 1;
83       }
84       if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
85         int local_rank = atoi(str);
86         device = local_rank % ngpus;
87         if (device >= skip_gpu) device++;
88         set_flag = 1;
89       }
90       if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
91         int local_rank = atoi(str);
92         device = local_rank % ngpus;
93         if (device >= skip_gpu) device++;
94         set_flag = 1;
95       }
96 
97       if (ngpus > 1 && !set_flag)
98         error->all(FLERR,"Could not determine local MPI rank for multiple "
99                            "GPUs with Kokkos CUDA or HIP because MPI library not recognized");
100 
101     } else if (strcmp(arg[iarg],"t") == 0 ||
102                strcmp(arg[iarg],"threads") == 0) {
103       nthreads = atoi(arg[iarg+1]);
104       iarg += 2;
105 
106     } else if (strcmp(arg[iarg],"n") == 0 ||
107                strcmp(arg[iarg],"numa") == 0) {
108       numa = atoi(arg[iarg+1]);
109       iarg += 2;
110 
111     } else error->all(FLERR,"Invalid Kokkos command-line args");
112   }
113 
114   // initialize Kokkos
115 
116   if (me == 0) {
117     if (screen) fprintf(screen,"  using %d GPU(s) per MPI task\n",ngpus);
118     if (logfile) fprintf(logfile,"  using %d GPU(s) per MPI task\n",ngpus);
119 
120     if (screen) fprintf(screen,"  using %d thread(s) per MPI task\n",nthreads);
121     if (logfile) fprintf(logfile,"  using %d thread(s) per MPI task\n",nthreads);
122   }
123 
124 #ifdef KOKKOS_ENABLE_CUDA
125   if (ngpus <= 0)
126     error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested");
127 #endif
128 
129 #ifndef KOKKOS_ENABLE_SERIAL
130   if (nthreads == 1 && me == 0)
131     error->warning(FLERR,"When using a single thread, the Kokkos Serial backend "
132                          "(i.e. Makefile.kokkos_mpi_only) gives better performance "
133                          "than the OpenMP backend");
134 #endif
135 
136   Kokkos::InitArguments args;
137   args.num_threads = nthreads;
138   args.num_numa = numa;
139   args.device_id = device;
140 
141   Kokkos::initialize(args);
142 
143   // default settings for package kokkos command
144 
145   comm_classic = 0;
146   atomic_reduction = 0;
147   prewrap = 1;
148   auto_sync = 1;
149   gpu_direct_flag = 1;
150 
151   need_atomics = 1;
152   if (nthreads == 1 && ngpus == 0)
153     need_atomics = 0;
154 
155   collide_retry_flag = 0;
156   collide_extra = 1.1;
157 
158   //if (need_atomics == 0) // prevent unnecessary parallel_reduce
159   //  atomic_reduction = 1;
160 }
161 
162 /* ---------------------------------------------------------------------- */
163 
~KokkosSPARTA()164 KokkosSPARTA::~KokkosSPARTA()
165 {
166   // finalize Kokkos
167 
168   Kokkos::finalize();
169 }
170 
171 /* ----------------------------------------------------------------------
172    invoked by package kokkos command
173 ------------------------------------------------------------------------- */
174 
accelerator(int narg,char ** arg)175 void KokkosSPARTA::accelerator(int narg, char **arg)
176 {
177   // defaults
178 
179   comm_classic = 0;
180 
181   int iarg = 0;
182   while (iarg < narg) {
183     if (strcmp(arg[iarg],"comm") == 0) {
184       if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
185       if (strcmp(arg[iarg+1],"classic") == 0) {
186         comm_classic = 1;
187       } else if (strcmp(arg[iarg+1],"threaded") == 0) {
188         comm_classic = 0;
189       } else error->all(FLERR,"Illegal package kokkos command");
190       iarg += 2;
191     } else if (strcmp(arg[iarg],"reduction") == 0) {
192       if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
193       if (strcmp(arg[iarg+1],"atomic") == 0) {
194         atomic_reduction = 1;
195       } else if (strcmp(arg[iarg+1],"parallel/reduce") == 0) {
196         atomic_reduction = 0;
197       } else error->all(FLERR,"Illegal package kokkos command");
198       iarg += 2;
199     } else if (strcmp(arg[iarg],"collide/retry") == 0) {
200       if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
201       if (strcmp(arg[iarg+1],"yes") == 0) {
202         collide_retry_flag = 1;
203       } else if (strcmp(arg[iarg+1],"no") == 0) {
204         collide_retry_flag = 0;
205       } else error->all(FLERR,"Illegal package kokkos command");
206       iarg += 2;
207     } else if (strcmp(arg[iarg],"collide/extra") == 0) {
208       if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
209       collide_extra = atof(arg[iarg+1]);
210       iarg += 2;
211     } else if (strcmp(arg[iarg],"gpu/direct") == 0) {
212       if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
213       if (strcmp(arg[iarg+1],"yes") == 0) {
214         gpu_direct_flag = 1;
215       } else if (strcmp(arg[iarg+1],"no") == 0) {
216         gpu_direct_flag = 0;
217       } else error->all(FLERR,"Illegal package kokkos command");
218       iarg += 2;
219     } else error->all(FLERR,"Illegal package kokkos command");
220   }
221 }
222