1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // ************************************************************************
40 //@HEADER
41 */
42 
43 #include "Kokkos_Core.hpp"
44 #include <cstdio>
45 #include <cstdlib>
46 #include <cmath>
47 
48 #include <sys/time.h>
49 
50 #define HLINE "-------------------------------------------------------------\n"
51 
52 #if defined(KOKKOS_ENABLE_CUDA)
53 using GUPSHostArray   = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
54 using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
55 #else
56 using GUPSHostArray   = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
57 using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
58 #endif
59 
60 using GUPSIndex = int;
61 
now()62 double now() {
63   struct timeval now;
64   gettimeofday(&now, nullptr);
65 
66   return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
67 }
68 
randomize_indices(GUPSHostArray & indices,GUPSDeviceArray & dev_indices,const int64_t dataCount)69 void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
70                        const int64_t dataCount) {
71   for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
72     indices[i] = lrand48() % dataCount;
73   }
74 
75   Kokkos::deep_copy(dev_indices, indices);
76 }
77 
run_gups(GUPSDeviceArray & indices,GUPSDeviceArray & data,const int64_t datum,const bool performAtomics)78 void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
79               const int64_t datum, const bool performAtomics) {
80   if (performAtomics) {
81     Kokkos::parallel_for(
82         "bench-gups-atomic", indices.extent(0),
83         KOKKOS_LAMBDA(const GUPSIndex i) {
84           Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
85         });
86   } else {
87     Kokkos::parallel_for(
88         "bench-gups-non-atomic", indices.extent(0),
89         KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
90   }
91 
92   Kokkos::fence();
93 }
94 
run_benchmark(const GUPSIndex indicesCount,const GUPSIndex dataCount,const int repeats,const bool useAtomics)95 int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
96                   const int repeats, const bool useAtomics) {
97   printf("Reports fastest timing per kernel\n");
98   printf("Creating Views...\n");
99 
100   printf("Memory Sizes:\n");
101   printf("- Elements:      %15" PRIu64 " (%12.4f MB)\n",
102          static_cast<uint64_t>(dataCount),
103          1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
104   printf("- Indices:       %15" PRIu64 " (%12.4f MB)\n",
105          static_cast<uint64_t>(indicesCount),
106          1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
107   printf(" - Atomics:      %15s\n", (useAtomics ? "Yes" : "No"));
108   printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
109 
110   printf(HLINE);
111 
112   GUPSDeviceArray dev_indices("indices", indicesCount);
113   GUPSDeviceArray dev_data("data", dataCount);
114   int64_t datum = -1;
115 
116   GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
117   GUPSHostArray data    = Kokkos::create_mirror_view(dev_data);
118 
119   double gupsTime = 0.0;
120 
121   printf("Initializing Views...\n");
122 
123 #if defined(KOKKOS_HAVE_OPENMP)
124   Kokkos::parallel_for(
125       "init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
126 #else
127   Kokkos::parallel_for(
128       "init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
129 #endif
130       KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });
131 
132 #if defined(KOKKOS_HAVE_OPENMP)
133   Kokkos::parallel_for(
134       "init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
135 #else
136   Kokkos::parallel_for(
137       "init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
138 #endif
139       KOKKOS_LAMBDA(const int i) { indices[i] = 0; });
140 
141   Kokkos::deep_copy(dev_data, data);
142   Kokkos::deep_copy(dev_indices, indices);
143   double start;
144 
145   printf("Starting benchmarking...\n");
146 
147   for (GUPSIndex k = 0; k < repeats; ++k) {
148     randomize_indices(indices, dev_indices, data.extent(0));
149 
150     start = now();
151     run_gups(dev_indices, dev_data, datum, useAtomics);
152     gupsTime += now() - start;
153   }
154 
155   Kokkos::deep_copy(indices, dev_indices);
156   Kokkos::deep_copy(data, dev_data);
157 
158   printf(HLINE);
159   printf(
160       "GUP/s Random:      %18.6f\n",
161       (1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
162   printf(HLINE);
163 
164   return 0;
165 }
166 
main(int argc,char * argv[])167 int main(int argc, char* argv[]) {
168   printf(HLINE);
169   printf("Kokkos GUPS Benchmark\n");
170   printf(HLINE);
171 
172   srand48(1010101);
173 
174   Kokkos::initialize(argc, argv);
175 
176   int64_t indices = 8192;
177   int64_t data    = 33554432;
178   int64_t repeats = 10;
179   bool useAtomics = false;
180 
181   for (int i = 1; i < argc; ++i) {
182     if (strcmp(argv[i], "--indices") == 0) {
183       indices = std::atoll(argv[i + 1]);
184       ++i;
185     } else if (strcmp(argv[i], "--data") == 0) {
186       data = std::atoll(argv[i + 1]);
187       ++i;
188     } else if (strcmp(argv[i], "--repeats") == 0) {
189       repeats = std::atoll(argv[i + 1]);
190       ++i;
191     } else if (strcmp(argv[i], "--atomics") == 0) {
192       useAtomics = true;
193     }
194   }
195 
196   const int rc = run_benchmark(indices, data, repeats, useAtomics);
197 
198   Kokkos::finalize();
199 
200   return rc;
201 }
202