1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #include <cstdio>
46 #include <cstring>
47 #include <cstdlib>
48 #include <limits>
49
50 #include <Kokkos_Core.hpp>
51 #include <impl/Kokkos_Timer.hpp>
52
53 using ExecSpace = Kokkos::DefaultExecutionSpace;
54 using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
55
56 using MemoryPool = Kokkos::MemoryPool<ExecSpace>;
57
58 struct TestFunctor {
59 using ptrs_type = Kokkos::View<uintptr_t*, ExecSpace>;
60
61 enum : unsigned { chunk = 32 };
62
63 MemoryPool pool;
64 ptrs_type ptrs;
65 unsigned chunk_span;
66 unsigned fill_stride;
67 unsigned range_iter;
68 unsigned repeat_inner;
69
TestFunctorTestFunctor70 TestFunctor(size_t total_alloc_size, unsigned min_superblock_size,
71 unsigned number_alloc, unsigned arg_stride_alloc,
72 unsigned arg_chunk_span, unsigned arg_repeat)
73 : pool(), ptrs(), chunk_span(0), fill_stride(0), repeat_inner(0) {
74 MemorySpace m;
75
76 const unsigned min_block_size = chunk;
77 const unsigned max_block_size = chunk * arg_chunk_span;
78 pool = MemoryPool(m, total_alloc_size, min_block_size, max_block_size,
79 min_superblock_size);
80
81 ptrs = ptrs_type(Kokkos::view_alloc(m, "ptrs"), number_alloc);
82 fill_stride = arg_stride_alloc;
83 chunk_span = arg_chunk_span;
84 range_iter = fill_stride * number_alloc;
85 repeat_inner = arg_repeat;
86 }
87
88 //----------------------------------------
89
90 using value_type = long;
91
92 //----------------------------------------
93
94 struct TagFill {};
95
96 KOKKOS_INLINE_FUNCTION
operator ()TestFunctor97 void operator()(TagFill, int i, value_type& update) const noexcept {
98 if (0 == i % fill_stride) {
99 const int j = i / fill_stride;
100
101 const unsigned size_alloc = chunk * (1 + (j % chunk_span));
102
103 ptrs(j) = (uintptr_t)pool.allocate(size_alloc);
104
105 if (ptrs(j)) ++update;
106 }
107 }
108
test_fillTestFunctor109 bool test_fill() {
110 using policy = Kokkos::RangePolicy<ExecSpace, TagFill>;
111
112 long result = 0;
113
114 Kokkos::parallel_reduce(policy(0, range_iter), *this, result);
115
116 if (result == long(ptrs.extent(0))) return true;
117 pool.print_state(std::cerr);
118 return false;
119 }
120
121 //----------------------------------------
122
123 struct TagDel {};
124
125 KOKKOS_INLINE_FUNCTION
operator ()TestFunctor126 void operator()(TagDel, int i) const noexcept {
127 if (0 == i % fill_stride) {
128 const int j = i / fill_stride;
129
130 const unsigned size_alloc = chunk * (1 + (j % chunk_span));
131
132 pool.deallocate((void*)ptrs(j), size_alloc);
133 }
134 }
135
test_delTestFunctor136 void test_del() {
137 using policy = Kokkos::RangePolicy<ExecSpace, TagDel>;
138
139 Kokkos::parallel_for(policy(0, range_iter), *this);
140 Kokkos::fence();
141 }
142
143 //----------------------------------------
144
145 struct TagAllocDealloc {};
146
147 KOKKOS_INLINE_FUNCTION
operator ()TestFunctor148 void operator()(TagAllocDealloc, int i, long& update) const noexcept {
149 if (0 == i % fill_stride) {
150 const int j = i / fill_stride;
151
152 if (0 == j % 3) {
153 for (unsigned k = 0; k < repeat_inner; ++k) {
154 const unsigned size_alloc = chunk * (1 + (j % chunk_span));
155
156 pool.deallocate((void*)ptrs(j), size_alloc);
157
158 ptrs(j) = (uintptr_t)pool.allocate(size_alloc);
159
160 if (0 == ptrs(j)) update++;
161 }
162 }
163 }
164 }
165
test_alloc_deallocTestFunctor166 bool test_alloc_dealloc() {
167 using policy = Kokkos::RangePolicy<ExecSpace, TagAllocDealloc>;
168
169 long error_count = 0;
170
171 Kokkos::parallel_reduce(policy(0, range_iter), *this, error_count);
172
173 return 0 == error_count;
174 }
175 };
176
main(int argc,char * argv[])177 int main(int argc, char* argv[]) {
178 static const char help_flag[] = "--help";
179 static const char alloc_size_flag[] = "--alloc_size=";
180 static const char super_size_flag[] = "--super_size=";
181 static const char chunk_span_flag[] = "--chunk_span=";
182 static const char fill_stride_flag[] = "--fill_stride=";
183 static const char fill_level_flag[] = "--fill_level=";
184 static const char repeat_outer_flag[] = "--repeat_outer=";
185 static const char repeat_inner_flag[] = "--repeat_inner=";
186
187 long total_alloc_size = 1000000;
188 int min_superblock_size = 10000;
189 int chunk_span = 5;
190 int fill_stride = 1;
191 int fill_level = 70;
192 int repeat_outer = 1;
193 int repeat_inner = 1;
194
195 int ask_help = 0;
196
197 for (int i = 1; i < argc; i++) {
198 const char* const a = argv[i];
199
200 if (!strncmp(a, help_flag, strlen(help_flag))) ask_help = 1;
201
202 if (!strncmp(a, alloc_size_flag, strlen(alloc_size_flag)))
203 total_alloc_size = atol(a + strlen(alloc_size_flag));
204
205 if (!strncmp(a, super_size_flag, strlen(super_size_flag)))
206 min_superblock_size = std::stoi(a + strlen(super_size_flag));
207
208 if (!strncmp(a, fill_stride_flag, strlen(fill_stride_flag)))
209 fill_stride = std::stoi(a + strlen(fill_stride_flag));
210
211 if (!strncmp(a, fill_level_flag, strlen(fill_level_flag)))
212 fill_level = std::stoi(a + strlen(fill_level_flag));
213
214 if (!strncmp(a, chunk_span_flag, strlen(chunk_span_flag)))
215 chunk_span = std::stoi(a + strlen(chunk_span_flag));
216
217 if (!strncmp(a, repeat_outer_flag, strlen(repeat_outer_flag)))
218 repeat_outer = std::stoi(a + strlen(repeat_outer_flag));
219
220 if (!strncmp(a, repeat_inner_flag, strlen(repeat_inner_flag)))
221 repeat_inner = std::stoi(a + strlen(repeat_inner_flag));
222 }
223
224 int chunk_span_bytes = 0;
225 for (int i = 0; i < chunk_span; ++i) {
226 auto chunk_bytes = TestFunctor::chunk * (1 + i);
227 if (chunk_bytes < 64) chunk_bytes = 64;
228 auto block_bytes_lg2 =
229 Kokkos::Impl::integral_power_of_two_that_contains(chunk_bytes);
230 auto block_bytes = (1 << block_bytes_lg2);
231 chunk_span_bytes += block_bytes;
232 }
233 auto actual_superblock_bytes_lg2 =
234 Kokkos::Impl::integral_power_of_two_that_contains(min_superblock_size);
235 auto actual_superblock_bytes = (1 << actual_superblock_bytes_lg2);
236 auto superblock_mask = actual_superblock_bytes - 1;
237 auto nsuperblocks =
238 (total_alloc_size + superblock_mask) >> actual_superblock_bytes_lg2;
239 auto actual_total_bytes = nsuperblocks * actual_superblock_bytes;
240 auto bytes_wanted = (actual_total_bytes * fill_level) / 100;
241 auto chunk_spans = bytes_wanted / chunk_span_bytes;
242 auto number_alloc = int(chunk_spans * chunk_span);
243
244 if (ask_help) {
245 std::cout << "command line options:"
246 << " " << help_flag << " " << alloc_size_flag << "##"
247 << " " << super_size_flag << "##"
248 << " " << fill_stride_flag << "##"
249 << " " << fill_level_flag << "##"
250 << " " << chunk_span_flag << "##"
251 << " " << repeat_outer_flag << "##"
252 << " " << repeat_inner_flag << "##" << std::endl;
253 return 0;
254 }
255
256 Kokkos::initialize(argc, argv);
257
258 double sum_fill_time = 0;
259 double sum_cycle_time = 0;
260 double sum_both_time = 0;
261 double min_fill_time = std::numeric_limits<double>::max();
262 double min_cycle_time = std::numeric_limits<double>::max();
263 double min_both_time = std::numeric_limits<double>::max();
264 // one alloc in fill, alloc/dealloc pair in repeat_inner
265 for (int i = 0; i < repeat_outer; ++i) {
266 TestFunctor functor(total_alloc_size, min_superblock_size, number_alloc,
267 fill_stride, chunk_span, repeat_inner);
268
269 Kokkos::Impl::Timer timer;
270
271 if (!functor.test_fill()) {
272 Kokkos::abort("fill ");
273 }
274
275 auto t0 = timer.seconds();
276
277 if (!functor.test_alloc_dealloc()) {
278 Kokkos::abort("alloc/dealloc ");
279 }
280
281 auto t1 = timer.seconds();
282 auto this_fill_time = t0;
283 auto this_cycle_time = t1 - t0;
284 auto this_both_time = t1;
285 sum_fill_time += this_fill_time;
286 sum_cycle_time += this_cycle_time;
287 sum_both_time += this_both_time;
288 min_fill_time = std::min(min_fill_time, this_fill_time);
289 min_cycle_time = std::min(min_cycle_time, this_cycle_time);
290 min_both_time = std::min(min_both_time, this_both_time);
291 }
292
293 Kokkos::finalize();
294
295 printf(
296 "\"mempool: alloc super stride level span inner outer number\" %ld %d %d "
297 "%d %d %d %d %d\n",
298 total_alloc_size, min_superblock_size, fill_stride, fill_level,
299 chunk_span, repeat_inner, repeat_outer, number_alloc);
300
301 auto avg_fill_time = sum_fill_time / repeat_outer;
302 auto avg_cycle_time = sum_cycle_time / repeat_outer;
303 auto avg_both_time = sum_both_time / repeat_outer;
304
305 printf("\"mempool: fill time (min, avg)\" %.8f %.8f\n", min_fill_time,
306 avg_fill_time);
307
308 printf("\"mempool: cycle time (min, avg)\" %.8f %.8f\n", min_cycle_time,
309 avg_cycle_time);
310
311 printf("\"mempool: test time (min, avg)\" %.8f %.8f\n", min_both_time,
312 avg_both_time);
313
314 printf("\"mempool: fill ops per second (max, avg)\" %g %g\n",
315 number_alloc / min_fill_time, number_alloc / avg_fill_time);
316
317 printf("\"mempool: cycle ops per second (max, avg)\" %g %g\n",
318 (2 * number_alloc * repeat_inner) / min_cycle_time,
319 (2 * number_alloc * repeat_inner) / avg_cycle_time);
320 }
321