1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #include <cstdio>
46 #include <cstring>
47 #include <cstdlib>
48
49 #include <Kokkos_Core.hpp>
50 #include <impl/Kokkos_Timer.hpp>
51
52 using exec_space = Kokkos::DefaultExecutionSpace;
53
54 #define RESET 0
55 #define BRIGHT 1
56 #define DIM 2
57 #define UNDERLINE 3
58 #define BLINK 4
59 #define REVERSE 7
60 #define HIDDEN 8
61
62 #define BLACK 0
63 #define RED 1
64 #define GREEN 2
65 #define YELLOW 3
66 #define BLUE 4
67 #define MAGENTA 5
68 #define CYAN 6
69 #define GREY 7
70 #define WHITE 8
71
textcolor(int attr,int fg,int bg)72 void textcolor(int attr, int fg, int bg) {
73 char command[40];
74
75 /* Command is the control command to the terminal */
76 sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
77 printf("%s", command);
78 }
textcolor_standard()79 void textcolor_standard() { textcolor(RESET, BLACK, WHITE); }
80
81 template <class T, class DEVICE_TYPE>
82 struct ZeroFunctor {
83 using execution_space = DEVICE_TYPE;
84 using type = typename Kokkos::View<T, execution_space>;
85 using h_type = typename Kokkos::View<T, execution_space>::HostMirror;
86 type data;
87 KOKKOS_INLINE_FUNCTION
operator ()ZeroFunctor88 void operator()(int) const { data() = 0; }
89 };
90
91 //---------------------------------------------------
92 //--------------atomic_fetch_add---------------------
93 //---------------------------------------------------
94
95 template <class T, class DEVICE_TYPE>
96 struct AddFunctor {
97 using execution_space = DEVICE_TYPE;
98 using type = Kokkos::View<T, execution_space>;
99 type data;
100
101 KOKKOS_INLINE_FUNCTION
operator ()AddFunctor102 void operator()(int) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
103 };
104
105 template <class T>
AddLoop(int loop)106 T AddLoop(int loop) {
107 struct ZeroFunctor<T, exec_space> f_zero;
108 typename ZeroFunctor<T, exec_space>::type data("Data");
109 typename ZeroFunctor<T, exec_space>::h_type h_data("HData");
110 f_zero.data = data;
111 Kokkos::parallel_for(1, f_zero);
112 exec_space().fence();
113
114 struct AddFunctor<T, exec_space> f_add;
115 f_add.data = data;
116 Kokkos::parallel_for(loop, f_add);
117 exec_space().fence();
118
119 Kokkos::deep_copy(h_data, data);
120 T val = h_data();
121 return val;
122 }
123
124 template <class T, class DEVICE_TYPE>
125 struct AddNonAtomicFunctor {
126 using execution_space = DEVICE_TYPE;
127 using type = Kokkos::View<T, execution_space>;
128 type data;
129
130 KOKKOS_INLINE_FUNCTION
operator ()AddNonAtomicFunctor131 void operator()(int) const { data() += (T)1; }
132 };
133
134 template <class T>
AddLoopNonAtomic(int loop)135 T AddLoopNonAtomic(int loop) {
136 struct ZeroFunctor<T, exec_space> f_zero;
137 typename ZeroFunctor<T, exec_space>::type data("Data");
138 typename ZeroFunctor<T, exec_space>::h_type h_data("HData");
139
140 f_zero.data = data;
141 Kokkos::parallel_for(1, f_zero);
142 exec_space().fence();
143
144 struct AddNonAtomicFunctor<T, exec_space> f_add;
145 f_add.data = data;
146 Kokkos::parallel_for(loop, f_add);
147 exec_space().fence();
148
149 Kokkos::deep_copy(h_data, data);
150 T val = h_data();
151
152 return val;
153 }
154
155 template <class T>
AddLoopSerial(int loop)156 T AddLoopSerial(int loop) {
157 T* data = new T[1];
158 data[0] = 0;
159
160 for (int i = 0; i < loop; i++) *data += (T)1;
161
162 T val = *data;
163 delete[] data;
164 return val;
165 }
166
167 template <class T, class DEVICE_TYPE>
168 struct CASFunctor {
169 using execution_space = DEVICE_TYPE;
170 using type = Kokkos::View<T, execution_space>;
171 type data;
172
173 KOKKOS_INLINE_FUNCTION
operator ()CASFunctor174 void operator()(int) const {
175 T old = data();
176 T newval, assumed;
177 do {
178 assumed = old;
179 newval = assumed + (T)1;
180 old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
181 } while (old != assumed);
182 }
183 };
184
185 template <class T>
CASLoop(int loop)186 T CASLoop(int loop) {
187 struct ZeroFunctor<T, exec_space> f_zero;
188 typename ZeroFunctor<T, exec_space>::type data("Data");
189 typename ZeroFunctor<T, exec_space>::h_type h_data("HData");
190 f_zero.data = data;
191 Kokkos::parallel_for(1, f_zero);
192 exec_space().fence();
193
194 struct CASFunctor<T, exec_space> f_cas;
195 f_cas.data = data;
196 Kokkos::parallel_for(loop, f_cas);
197 exec_space().fence();
198
199 Kokkos::deep_copy(h_data, data);
200 T val = h_data();
201
202 return val;
203 }
204
205 template <class T, class DEVICE_TYPE>
206 struct CASNonAtomicFunctor {
207 using execution_space = DEVICE_TYPE;
208 using type = Kokkos::View<T, execution_space>;
209 type data;
210
211 KOKKOS_INLINE_FUNCTION
operator ()CASNonAtomicFunctor212 void operator()(int) const {
213 volatile T assumed;
214 volatile T newval;
215 bool fail = 1;
216 do {
217 assumed = data();
218 newval = assumed + (T)1;
219 if (data() == assumed) {
220 data() = newval;
221 fail = 0;
222 }
223 } while (fail);
224 }
225 };
226
227 template <class T>
CASLoopNonAtomic(int loop)228 T CASLoopNonAtomic(int loop) {
229 struct ZeroFunctor<T, exec_space> f_zero;
230 typename ZeroFunctor<T, exec_space>::type data("Data");
231 typename ZeroFunctor<T, exec_space>::h_type h_data("HData");
232 f_zero.data = data;
233 Kokkos::parallel_for(1, f_zero);
234 exec_space().fence();
235
236 struct CASNonAtomicFunctor<T, exec_space> f_cas;
237 f_cas.data = data;
238 Kokkos::parallel_for(loop, f_cas);
239 exec_space().fence();
240
241 Kokkos::deep_copy(h_data, data);
242 T val = h_data();
243
244 return val;
245 }
246
247 template <class T>
CASLoopSerial(int loop)248 T CASLoopSerial(int loop) {
249 T* data = new T[1];
250 data[0] = 0;
251
252 for (int i = 0; i < loop; i++) {
253 T assumed;
254 T newval;
255 T old;
256 do {
257 assumed = *data;
258 newval = assumed + (T)1;
259 old = *data;
260 *data = newval;
261 } while (!(assumed == old));
262 }
263
264 T val = *data;
265 delete[] data;
266 return val;
267 }
268
269 template <class T, class DEVICE_TYPE>
270 struct ExchFunctor {
271 using execution_space = DEVICE_TYPE;
272 using type = Kokkos::View<T, execution_space>;
273 type data, data2;
274
275 KOKKOS_INLINE_FUNCTION
operator ()ExchFunctor276 void operator()(int i) const {
277 T old = Kokkos::atomic_exchange(&data(), (T)i);
278 Kokkos::atomic_fetch_add(&data2(), old);
279 }
280 };
281
282 template <class T>
ExchLoop(int loop)283 T ExchLoop(int loop) {
284 struct ZeroFunctor<T, exec_space> f_zero;
285 typename ZeroFunctor<T, exec_space>::type data("Data");
286 typename ZeroFunctor<T, exec_space>::h_type h_data("HData");
287 f_zero.data = data;
288 Kokkos::parallel_for(1, f_zero);
289 exec_space().fence();
290
291 typename ZeroFunctor<T, exec_space>::type data2("Data");
292 typename ZeroFunctor<T, exec_space>::h_type h_data2("HData");
293 f_zero.data = data2;
294 Kokkos::parallel_for(1, f_zero);
295 exec_space().fence();
296
297 struct ExchFunctor<T, exec_space> f_exch;
298 f_exch.data = data;
299 f_exch.data2 = data2;
300 Kokkos::parallel_for(loop, f_exch);
301 exec_space().fence();
302
303 Kokkos::deep_copy(h_data, data);
304 Kokkos::deep_copy(h_data2, data2);
305 T val = h_data() + h_data2();
306
307 return val;
308 }
309
310 template <class T, class DEVICE_TYPE>
311 struct ExchNonAtomicFunctor {
312 using execution_space = DEVICE_TYPE;
313 using type = Kokkos::View<T, execution_space>;
314 type data, data2;
315
316 KOKKOS_INLINE_FUNCTION
operator ()ExchNonAtomicFunctor317 void operator()(int i) const {
318 T old = data();
319 data() = (T)i;
320 data2() += old;
321 }
322 };
323
324 template <class T>
ExchLoopNonAtomic(int loop)325 T ExchLoopNonAtomic(int loop) {
326 struct ZeroFunctor<T, exec_space> f_zero;
327 typename ZeroFunctor<T, exec_space>::type data("Data");
328 typename ZeroFunctor<T, exec_space>::h_type h_data("HData");
329 f_zero.data = data;
330 Kokkos::parallel_for(1, f_zero);
331 exec_space().fence();
332
333 typename ZeroFunctor<T, exec_space>::type data2("Data");
334 typename ZeroFunctor<T, exec_space>::h_type h_data2("HData");
335 f_zero.data = data2;
336 Kokkos::parallel_for(1, f_zero);
337 exec_space().fence();
338
339 struct ExchNonAtomicFunctor<T, exec_space> f_exch;
340 f_exch.data = data;
341 f_exch.data2 = data2;
342 Kokkos::parallel_for(loop, f_exch);
343 exec_space().fence();
344
345 Kokkos::deep_copy(h_data, data);
346 Kokkos::deep_copy(h_data2, data2);
347 T val = h_data() + h_data2();
348
349 return val;
350 }
351
352 template <class T>
ExchLoopSerial(int loop)353 T ExchLoopSerial(int loop) {
354 T* data = new T[1];
355 T* data2 = new T[1];
356 data[0] = 0;
357 data2[0] = 0;
358 for (int i = 0; i < loop; i++) {
359 T old = *data;
360 *data = (T)i;
361 *data2 += old;
362 }
363
364 T val = *data2 + *data;
365 delete[] data;
366 delete[] data2;
367 return val;
368 }
369
370 template <class T>
LoopVariant(int loop,int test)371 T LoopVariant(int loop, int test) {
372 switch (test) {
373 case 1: return AddLoop<T>(loop);
374 case 2: return CASLoop<T>(loop);
375 case 3: return ExchLoop<T>(loop);
376 }
377 return 0;
378 }
379
380 template <class T>
LoopVariantSerial(int loop,int test)381 T LoopVariantSerial(int loop, int test) {
382 switch (test) {
383 case 1: return AddLoopSerial<T>(loop);
384 case 2: return CASLoopSerial<T>(loop);
385 case 3: return ExchLoopSerial<T>(loop);
386 }
387 return 0;
388 }
389
390 template <class T>
LoopVariantNonAtomic(int loop,int test)391 T LoopVariantNonAtomic(int loop, int test) {
392 switch (test) {
393 case 1: return AddLoopNonAtomic<T>(loop);
394 case 2: return CASLoopNonAtomic<T>(loop);
395 case 3: return ExchLoopNonAtomic<T>(loop);
396 }
397 return 0;
398 }
399
400 template <class T>
Loop(int loop,int test,const char * type_name)401 void Loop(int loop, int test, const char* type_name) {
402 LoopVariant<T>(loop, test);
403
404 Kokkos::Impl::Timer timer;
405 T res = LoopVariant<T>(loop, test);
406 double time = timer.seconds();
407
408 timer.reset();
409 T resNonAtomic = LoopVariantNonAtomic<T>(loop, test);
410 double timeNonAtomic = timer.seconds();
411
412 timer.reset();
413 T resSerial = LoopVariantSerial<T>(loop, test);
414 double timeSerial = timer.seconds();
415
416 time *= 1e6 / loop;
417 timeNonAtomic *= 1e6 / loop;
418 timeSerial *= 1e6 / loop;
419 // textcolor_standard();
420 bool passed = true;
421 if (resSerial != res) passed = false;
422 // if(!passed) textcolor(RESET,BLACK,YELLOW);
423 printf(
424 "%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e "
425 "%7.4e Size of Type %i)",
426 type_name, test, passed ? "PASSED" : "FAILED", loop, 1.0 * resSerial,
427 1.0 * res, 1.0 * resNonAtomic, timeSerial, time, timeNonAtomic,
428 (int)sizeof(T));
429 // if(!passed) textcolor_standard();
430 printf("\n");
431 }
432
433 template <class T>
Test(int loop,int test,const char * type_name)434 void Test(int loop, int test, const char* type_name) {
435 if (test == -1) {
436 Loop<T>(loop, 1, type_name);
437 Loop<T>(loop, 2, type_name);
438 Loop<T>(loop, 3, type_name);
439
440 } else
441 Loop<T>(loop, test, type_name);
442 }
443
main(int argc,char * argv[])444 int main(int argc, char* argv[]) {
445 int type = -1;
446 int loop = 100000;
447 int test = -1;
448
449 for (int i = 0; i < argc; i++) {
450 if ((strcmp(argv[i], "--test") == 0)) {
451 test = std::stoi(argv[++i]);
452 continue;
453 }
454 if ((strcmp(argv[i], "--type") == 0)) {
455 type = std::stoi(argv[++i]);
456 continue;
457 }
458 if ((strcmp(argv[i], "-l") == 0) || (strcmp(argv[i], "--loop") == 0)) {
459 loop = std::stoi(argv[++i]);
460 continue;
461 }
462 }
463
464 Kokkos::initialize(argc, argv);
465
466 printf("Using %s\n", Kokkos::atomic_query_version());
467 bool all_tests = false;
468 if (type == -1) all_tests = true;
469 while (type < 100) {
470 if (type == 1) {
471 Test<int>(loop, test, "int ");
472 }
473 if (type == 2) {
474 Test<long int>(loop, test, "long int ");
475 }
476 if (type == 3) {
477 Test<long long int>(loop, test, "long long int ");
478 }
479 if (type == 4) {
480 Test<unsigned int>(loop, test, "unsigned int ");
481 }
482 if (type == 5) {
483 Test<unsigned long int>(loop, test, "unsigned long int ");
484 }
485 if (type == 6) {
486 Test<unsigned long long int>(loop, test, "unsigned long long int ");
487 }
488 if (type == 10) {
489 // Test<float>(loop,test,"float ");
490 }
491 if (type == 11) {
492 Test<double>(loop, test, "double ");
493 }
494 if (!all_tests)
495 type = 100;
496 else
497 type++;
498 }
499
500 Kokkos::finalize();
501 }
502