1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // ************************************************************************
40 //@HEADER
41 */
42
43 #include "Kokkos_Core.hpp"
44 #include <cstdio>
45 #include <cstdlib>
46 #include <cmath>
47
48 #include <sys/time.h>
49
50 #define HLINE "-------------------------------------------------------------\n"
51
52 #if defined(KOKKOS_ENABLE_CUDA)
53 using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
54 using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
55 #else
56 using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
57 using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
58 #endif
59
60 using GUPSIndex = int;
61
now()62 double now() {
63 struct timeval now;
64 gettimeofday(&now, nullptr);
65
66 return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
67 }
68
randomize_indices(GUPSHostArray & indices,GUPSDeviceArray & dev_indices,const int64_t dataCount)69 void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
70 const int64_t dataCount) {
71 for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
72 indices[i] = lrand48() % dataCount;
73 }
74
75 Kokkos::deep_copy(dev_indices, indices);
76 }
77
run_gups(GUPSDeviceArray & indices,GUPSDeviceArray & data,const int64_t datum,const bool performAtomics)78 void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
79 const int64_t datum, const bool performAtomics) {
80 if (performAtomics) {
81 Kokkos::parallel_for(
82 "bench-gups-atomic", indices.extent(0),
83 KOKKOS_LAMBDA(const GUPSIndex i) {
84 Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
85 });
86 } else {
87 Kokkos::parallel_for(
88 "bench-gups-non-atomic", indices.extent(0),
89 KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
90 }
91
92 Kokkos::fence();
93 }
94
run_benchmark(const GUPSIndex indicesCount,const GUPSIndex dataCount,const int repeats,const bool useAtomics)95 int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
96 const int repeats, const bool useAtomics) {
97 printf("Reports fastest timing per kernel\n");
98 printf("Creating Views...\n");
99
100 printf("Memory Sizes:\n");
101 printf("- Elements: %15" PRIu64 " (%12.4f MB)\n",
102 static_cast<uint64_t>(dataCount),
103 1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
104 printf("- Indices: %15" PRIu64 " (%12.4f MB)\n",
105 static_cast<uint64_t>(indicesCount),
106 1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
107 printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No"));
108 printf("Benchmark kernels will be performed for %d iterations.\n", repeats);
109
110 printf(HLINE);
111
112 GUPSDeviceArray dev_indices("indices", indicesCount);
113 GUPSDeviceArray dev_data("data", dataCount);
114 int64_t datum = -1;
115
116 GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
117 GUPSHostArray data = Kokkos::create_mirror_view(dev_data);
118
119 double gupsTime = 0.0;
120
121 printf("Initializing Views...\n");
122
123 #if defined(KOKKOS_HAVE_OPENMP)
124 Kokkos::parallel_for(
125 "init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
126 #else
127 Kokkos::parallel_for(
128 "init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
129 #endif
130 KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });
131
132 #if defined(KOKKOS_HAVE_OPENMP)
133 Kokkos::parallel_for(
134 "init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
135 #else
136 Kokkos::parallel_for(
137 "init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
138 #endif
139 KOKKOS_LAMBDA(const int i) { indices[i] = 0; });
140
141 Kokkos::deep_copy(dev_data, data);
142 Kokkos::deep_copy(dev_indices, indices);
143 double start;
144
145 printf("Starting benchmarking...\n");
146
147 for (GUPSIndex k = 0; k < repeats; ++k) {
148 randomize_indices(indices, dev_indices, data.extent(0));
149
150 start = now();
151 run_gups(dev_indices, dev_data, datum, useAtomics);
152 gupsTime += now() - start;
153 }
154
155 Kokkos::deep_copy(indices, dev_indices);
156 Kokkos::deep_copy(data, dev_data);
157
158 printf(HLINE);
159 printf(
160 "GUP/s Random: %18.6f\n",
161 (1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
162 printf(HLINE);
163
164 return 0;
165 }
166
main(int argc,char * argv[])167 int main(int argc, char* argv[]) {
168 printf(HLINE);
169 printf("Kokkos GUPS Benchmark\n");
170 printf(HLINE);
171
172 srand48(1010101);
173
174 Kokkos::initialize(argc, argv);
175
176 int64_t indices = 8192;
177 int64_t data = 33554432;
178 int64_t repeats = 10;
179 bool useAtomics = false;
180
181 for (int i = 1; i < argc; ++i) {
182 if (strcmp(argv[i], "--indices") == 0) {
183 indices = std::atoll(argv[i + 1]);
184 ++i;
185 } else if (strcmp(argv[i], "--data") == 0) {
186 data = std::atoll(argv[i + 1]);
187 ++i;
188 } else if (strcmp(argv[i], "--repeats") == 0) {
189 repeats = std::atoll(argv[i + 1]);
190 ++i;
191 } else if (strcmp(argv[i], "--atomics") == 0) {
192 useAtomics = true;
193 }
194 }
195
196 const int rc = run_benchmark(indices, data, repeats, useAtomics);
197
198 Kokkos::finalize();
199
200 return rc;
201 }
202