1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #include <cstdio>
46 #include <cstring>
47 #include <cstdlib>
48 #include <limits>
49 
50 #include <Kokkos_Core.hpp>
51 #include <impl/Kokkos_Timer.hpp>
52 
53 using ExecSpace   = Kokkos::DefaultExecutionSpace;
54 using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
55 
56 using MemoryPool = Kokkos::MemoryPool<ExecSpace>;
57 
58 struct TestFunctor {
59   using ptrs_type = Kokkos::View<uintptr_t*, ExecSpace>;
60 
61   enum : unsigned { chunk = 32 };
62 
63   MemoryPool pool;
64   ptrs_type ptrs;
65   unsigned chunk_span;
66   unsigned fill_stride;
67   unsigned range_iter;
68   unsigned repeat_inner;
69 
TestFunctorTestFunctor70   TestFunctor(size_t total_alloc_size, unsigned min_superblock_size,
71               unsigned number_alloc, unsigned arg_stride_alloc,
72               unsigned arg_chunk_span, unsigned arg_repeat)
73       : pool(), ptrs(), chunk_span(0), fill_stride(0), repeat_inner(0) {
74     MemorySpace m;
75 
76     const unsigned min_block_size = chunk;
77     const unsigned max_block_size = chunk * arg_chunk_span;
78     pool = MemoryPool(m, total_alloc_size, min_block_size, max_block_size,
79                       min_superblock_size);
80 
81     ptrs         = ptrs_type(Kokkos::view_alloc(m, "ptrs"), number_alloc);
82     fill_stride  = arg_stride_alloc;
83     chunk_span   = arg_chunk_span;
84     range_iter   = fill_stride * number_alloc;
85     repeat_inner = arg_repeat;
86   }
87 
88   //----------------------------------------
89 
90   using value_type = long;
91 
92   //----------------------------------------
93 
94   struct TagFill {};
95 
96   KOKKOS_INLINE_FUNCTION
operator ()TestFunctor97   void operator()(TagFill, int i, value_type& update) const noexcept {
98     if (0 == i % fill_stride) {
99       const int j = i / fill_stride;
100 
101       const unsigned size_alloc = chunk * (1 + (j % chunk_span));
102 
103       ptrs(j) = (uintptr_t)pool.allocate(size_alloc);
104 
105       if (ptrs(j)) ++update;
106     }
107   }
108 
test_fillTestFunctor109   bool test_fill() {
110     using policy = Kokkos::RangePolicy<ExecSpace, TagFill>;
111 
112     long result = 0;
113 
114     Kokkos::parallel_reduce(policy(0, range_iter), *this, result);
115 
116     if (result == long(ptrs.extent(0))) return true;
117     pool.print_state(std::cerr);
118     return false;
119   }
120 
121   //----------------------------------------
122 
123   struct TagDel {};
124 
125   KOKKOS_INLINE_FUNCTION
operator ()TestFunctor126   void operator()(TagDel, int i) const noexcept {
127     if (0 == i % fill_stride) {
128       const int j = i / fill_stride;
129 
130       const unsigned size_alloc = chunk * (1 + (j % chunk_span));
131 
132       pool.deallocate((void*)ptrs(j), size_alloc);
133     }
134   }
135 
test_delTestFunctor136   void test_del() {
137     using policy = Kokkos::RangePolicy<ExecSpace, TagDel>;
138 
139     Kokkos::parallel_for(policy(0, range_iter), *this);
140     Kokkos::fence();
141   }
142 
143   //----------------------------------------
144 
145   struct TagAllocDealloc {};
146 
147   KOKKOS_INLINE_FUNCTION
operator ()TestFunctor148   void operator()(TagAllocDealloc, int i, long& update) const noexcept {
149     if (0 == i % fill_stride) {
150       const int j = i / fill_stride;
151 
152       if (0 == j % 3) {
153         for (unsigned k = 0; k < repeat_inner; ++k) {
154           const unsigned size_alloc = chunk * (1 + (j % chunk_span));
155 
156           pool.deallocate((void*)ptrs(j), size_alloc);
157 
158           ptrs(j) = (uintptr_t)pool.allocate(size_alloc);
159 
160           if (0 == ptrs(j)) update++;
161         }
162       }
163     }
164   }
165 
test_alloc_deallocTestFunctor166   bool test_alloc_dealloc() {
167     using policy = Kokkos::RangePolicy<ExecSpace, TagAllocDealloc>;
168 
169     long error_count = 0;
170 
171     Kokkos::parallel_reduce(policy(0, range_iter), *this, error_count);
172 
173     return 0 == error_count;
174   }
175 };
176 
main(int argc,char * argv[])177 int main(int argc, char* argv[]) {
178   static const char help_flag[]         = "--help";
179   static const char alloc_size_flag[]   = "--alloc_size=";
180   static const char super_size_flag[]   = "--super_size=";
181   static const char chunk_span_flag[]   = "--chunk_span=";
182   static const char fill_stride_flag[]  = "--fill_stride=";
183   static const char fill_level_flag[]   = "--fill_level=";
184   static const char repeat_outer_flag[] = "--repeat_outer=";
185   static const char repeat_inner_flag[] = "--repeat_inner=";
186 
187   long total_alloc_size   = 1000000;
188   int min_superblock_size = 10000;
189   int chunk_span          = 5;
190   int fill_stride         = 1;
191   int fill_level          = 70;
192   int repeat_outer        = 1;
193   int repeat_inner        = 1;
194 
195   int ask_help = 0;
196 
197   for (int i = 1; i < argc; i++) {
198     const char* const a = argv[i];
199 
200     if (!strncmp(a, help_flag, strlen(help_flag))) ask_help = 1;
201 
202     if (!strncmp(a, alloc_size_flag, strlen(alloc_size_flag)))
203       total_alloc_size = atol(a + strlen(alloc_size_flag));
204 
205     if (!strncmp(a, super_size_flag, strlen(super_size_flag)))
206       min_superblock_size = std::stoi(a + strlen(super_size_flag));
207 
208     if (!strncmp(a, fill_stride_flag, strlen(fill_stride_flag)))
209       fill_stride = std::stoi(a + strlen(fill_stride_flag));
210 
211     if (!strncmp(a, fill_level_flag, strlen(fill_level_flag)))
212       fill_level = std::stoi(a + strlen(fill_level_flag));
213 
214     if (!strncmp(a, chunk_span_flag, strlen(chunk_span_flag)))
215       chunk_span = std::stoi(a + strlen(chunk_span_flag));
216 
217     if (!strncmp(a, repeat_outer_flag, strlen(repeat_outer_flag)))
218       repeat_outer = std::stoi(a + strlen(repeat_outer_flag));
219 
220     if (!strncmp(a, repeat_inner_flag, strlen(repeat_inner_flag)))
221       repeat_inner = std::stoi(a + strlen(repeat_inner_flag));
222   }
223 
224   int chunk_span_bytes = 0;
225   for (int i = 0; i < chunk_span; ++i) {
226     auto chunk_bytes = TestFunctor::chunk * (1 + i);
227     if (chunk_bytes < 64) chunk_bytes = 64;
228     auto block_bytes_lg2 =
229         Kokkos::Impl::integral_power_of_two_that_contains(chunk_bytes);
230     auto block_bytes = (1 << block_bytes_lg2);
231     chunk_span_bytes += block_bytes;
232   }
233   auto actual_superblock_bytes_lg2 =
234       Kokkos::Impl::integral_power_of_two_that_contains(min_superblock_size);
235   auto actual_superblock_bytes = (1 << actual_superblock_bytes_lg2);
236   auto superblock_mask         = actual_superblock_bytes - 1;
237   auto nsuperblocks =
238       (total_alloc_size + superblock_mask) >> actual_superblock_bytes_lg2;
239   auto actual_total_bytes = nsuperblocks * actual_superblock_bytes;
240   auto bytes_wanted       = (actual_total_bytes * fill_level) / 100;
241   auto chunk_spans        = bytes_wanted / chunk_span_bytes;
242   auto number_alloc       = int(chunk_spans * chunk_span);
243 
244   if (ask_help) {
245     std::cout << "command line options:"
246               << " " << help_flag << " " << alloc_size_flag << "##"
247               << " " << super_size_flag << "##"
248               << " " << fill_stride_flag << "##"
249               << " " << fill_level_flag << "##"
250               << " " << chunk_span_flag << "##"
251               << " " << repeat_outer_flag << "##"
252               << " " << repeat_inner_flag << "##" << std::endl;
253     return 0;
254   }
255 
256   Kokkos::initialize(argc, argv);
257 
258   double sum_fill_time  = 0;
259   double sum_cycle_time = 0;
260   double sum_both_time  = 0;
261   double min_fill_time  = std::numeric_limits<double>::max();
262   double min_cycle_time = std::numeric_limits<double>::max();
263   double min_both_time  = std::numeric_limits<double>::max();
264   // one alloc in fill, alloc/dealloc pair in repeat_inner
265   for (int i = 0; i < repeat_outer; ++i) {
266     TestFunctor functor(total_alloc_size, min_superblock_size, number_alloc,
267                         fill_stride, chunk_span, repeat_inner);
268 
269     Kokkos::Impl::Timer timer;
270 
271     if (!functor.test_fill()) {
272       Kokkos::abort("fill ");
273     }
274 
275     auto t0 = timer.seconds();
276 
277     if (!functor.test_alloc_dealloc()) {
278       Kokkos::abort("alloc/dealloc ");
279     }
280 
281     auto t1              = timer.seconds();
282     auto this_fill_time  = t0;
283     auto this_cycle_time = t1 - t0;
284     auto this_both_time  = t1;
285     sum_fill_time += this_fill_time;
286     sum_cycle_time += this_cycle_time;
287     sum_both_time += this_both_time;
288     min_fill_time  = std::min(min_fill_time, this_fill_time);
289     min_cycle_time = std::min(min_cycle_time, this_cycle_time);
290     min_both_time  = std::min(min_both_time, this_both_time);
291   }
292 
293   Kokkos::finalize();
294 
295   printf(
296       "\"mempool: alloc super stride level span inner outer number\" %ld %d %d "
297       "%d %d %d %d %d\n",
298       total_alloc_size, min_superblock_size, fill_stride, fill_level,
299       chunk_span, repeat_inner, repeat_outer, number_alloc);
300 
301   auto avg_fill_time  = sum_fill_time / repeat_outer;
302   auto avg_cycle_time = sum_cycle_time / repeat_outer;
303   auto avg_both_time  = sum_both_time / repeat_outer;
304 
305   printf("\"mempool: fill time (min, avg)\" %.8f %.8f\n", min_fill_time,
306          avg_fill_time);
307 
308   printf("\"mempool: cycle time (min, avg)\" %.8f %.8f\n", min_cycle_time,
309          avg_cycle_time);
310 
311   printf("\"mempool: test time (min, avg)\" %.8f %.8f\n", min_both_time,
312          avg_both_time);
313 
314   printf("\"mempool: fill ops per second (max, avg)\" %g %g\n",
315          number_alloc / min_fill_time, number_alloc / avg_fill_time);
316 
317   printf("\"mempool: cycle ops per second (max, avg)\" %g %g\n",
318          (2 * number_alloc * repeat_inner) / min_cycle_time,
319          (2 * number_alloc * repeat_inner) / avg_cycle_time);
320 }
321