1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #include <iostream>
46 #include <string>
47
48 // mfh 06 Jun 2013: This macro doesn't work like one might thing it
49 // should. It doesn't take the template parameter DeviceType and
50 // print its actual type name; it just literally prints out
51 // "DeviceType". I've worked around this below without using the
52 // macro, so I'm commenting out the macro to avoid compiler complaints
53 // about an unused macro.
54
55 // #define KOKKOS_IMPL_MACRO_TO_STRING( X ) #X
56 // #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_IMPL_MACRO_TO_STRING( X )
57
58 //------------------------------------------------------------------------
59
60 namespace Test {
61
62 enum { NUMBER_OF_TRIALS = 5 };
63
64 template <class DeviceType, class LayoutType>
run_test_mdrange(int exp_beg,int exp_end,const char deviceTypeName[],int range_offset=0,int tile_offset=0)65 void run_test_mdrange(int exp_beg, int exp_end, const char deviceTypeName[],
66 int range_offset = 0, int tile_offset = 0)
67 // exp_beg = 6 => 2^6 = 64 is starting range length
68 {
69 #define MDRANGE_PERFORMANCE_OUTPUT_VERBOSE 0
70
71 std::string label_mdrange;
72 label_mdrange.append("\"MDRange< double , ");
73 label_mdrange.append(deviceTypeName);
74 label_mdrange.append(" >\"");
75
76 std::string label_range_col2;
77 label_range_col2.append("\"RangeColTwo< double , ");
78 label_range_col2.append(deviceTypeName);
79 label_range_col2.append(" >\"");
80
81 std::string label_range_col_all;
82 label_range_col_all.append("\"RangeColAll< double , ");
83 label_range_col_all.append(deviceTypeName);
84 label_range_col_all.append(" >\"");
85
86 if (std::is_same<LayoutType, Kokkos::LayoutRight>::value) {
87 std::cout
88 << "--------------------------------------------------------------\n"
89 << "Performance tests for MDRange Layout Right"
90 << "\n--------------------------------------------------------------"
91 << std::endl;
92 } else {
93 std::cout
94 << "--------------------------------------------------------------\n"
95 << "Performance tests for MDRange Layout Left"
96 << "\n--------------------------------------------------------------"
97 << std::endl;
98 }
99
100 for (int i = exp_beg; i < exp_end; ++i) {
101 const int range_length = (1 << i) + range_offset;
102
103 std::cout
104 << "\n--------------------------------------------------------------\n"
105 << "--------------------------------------------------------------\n"
106 << "MDRange Test: range bounds: " << range_length << " , "
107 << range_length << " , " << range_length
108 << "\n--------------------------------------------------------------\n"
109 << "--------------------------------------------------------------\n";
110 // << std::endl;
111
112 int t0_min = 0, t1_min = 0, t2_min = 0;
113 double seconds_min = 0.0;
114
115 // Test 1: The MDRange in full
116 {
117 int t0 = 1, t1 = 1, t2 = 1;
118 int counter = 1;
119 #if !defined(KOKKOS_ENABLE_CUDA)
120 int min_bnd = 8;
121 int tfast = range_length;
122 #else
123 int min_bnd = 2;
124 int tfast = 32;
125 #endif
126 while (tfast >= min_bnd) {
127 int tmid = min_bnd;
128 while (tmid < tfast) {
129 t0 = min_bnd;
130 t1 = tmid;
131 t2 = tfast;
132 int t2_rev = min_bnd;
133 int t1_rev = tmid;
134 int t0_rev = tfast;
135
136 #if defined(KOKKOS_ENABLE_CUDA)
137 // Note: Product of tile sizes must be < 1024 for Cuda
138 if (t0 * t1 * t2 >= 1024) {
139 printf(" Exceeded Cuda tile limits; onto next range set\n\n");
140 break;
141 }
142 #endif
143
144 // Run 1 with tiles LayoutRight style
145 double seconds_1 = 0;
146 {
147 seconds_1 =
148 MultiDimRangePerf3D<DeviceType, double,
149 LayoutType>::test_multi_index(range_length,
150 range_length,
151 range_length,
152 t0, t1, t2);
153 }
154
155 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
156 std::cout << label_mdrange << " , " << t0 << " , " << t1 << " , "
157 << t2 << " , " << seconds_1 << std::endl;
158 #endif
159
160 if (counter == 1) {
161 seconds_min = seconds_1;
162 t0_min = t0;
163 t1_min = t1;
164 t2_min = t2;
165 } else {
166 if (seconds_1 < seconds_min) {
167 seconds_min = seconds_1;
168 t0_min = t0;
169 t1_min = t1;
170 t2_min = t2;
171 }
172 }
173
174 // Run 2 with tiles LayoutLeft style - reverse order of tile dims
175 double seconds_1rev = 0;
176 {
177 seconds_1rev =
178 MultiDimRangePerf3D<DeviceType, double,
179 LayoutType>::test_multi_index(range_length,
180 range_length,
181 range_length,
182 t0_rev,
183 t1_rev,
184 t2_rev);
185 }
186
187 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
188 std::cout << label_mdrange << " , " << t0_rev << " , " << t1_rev
189 << " , " << t2_rev << " , " << seconds_1rev << std::endl;
190 #endif
191
192 if (seconds_1rev < seconds_min) {
193 seconds_min = seconds_1rev;
194 t0_min = t0_rev;
195 t1_min = t1_rev;
196 t2_min = t2_rev;
197 }
198
199 ++counter;
200 tmid <<= 1;
201 } // end inner while
202 tfast >>= 1;
203 } // end outer while
204
205 std::cout
206 << "\n"
207 << "--------------------------------------------------------------\n"
208 << label_mdrange << "\n Min values "
209 << "\n Range length per dim (3D): " << range_length
210 << "\n TileDims: " << t0_min << " , " << t1_min << " , " << t2_min
211 << "\n Min time: " << seconds_min
212 << "\n---------------------------------------------------------------"
213 << std::endl;
214 } // end scope
215
216 #if !defined(KOKKOS_ENABLE_CUDA)
217 double seconds_min_c = 0.0;
218 int t0c_min = 0, t1c_min = 0, t2c_min = 0;
219 int counter = 1;
220 {
221 int min_bnd = 8;
222 // Test 1_c: MDRange with 0 for 'inner' tile dim; this case will utilize
223 // the full span in that direction, should be similar to Collapse<2>
224 if (std::is_same<LayoutType, Kokkos::LayoutRight>::value) {
225 for (unsigned int T0 = min_bnd;
226 T0 < static_cast<unsigned int>(range_length); T0 <<= 1) {
227 for (unsigned int T1 = min_bnd;
228 T1 < static_cast<unsigned int>(range_length); T1 <<= 1) {
229 double seconds_c = 0;
230 {
231 seconds_c = MultiDimRangePerf3D<DeviceType, double, LayoutType>::
232 test_multi_index(range_length, range_length, range_length, T0,
233 T1, 0);
234 }
235
236 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
237 std::cout << " MDRange LR with '0' tile - collapse-like \n"
238 << label_mdrange << " , " << T0 << " , " << T1 << " , "
239 << range_length << " , " << seconds_c << std::endl;
240 #endif
241
242 t2c_min = range_length;
243 if (counter == 1) {
244 seconds_min_c = seconds_c;
245 t0c_min = T0;
246 t1c_min = T1;
247 } else {
248 if (seconds_c < seconds_min_c) {
249 seconds_min_c = seconds_c;
250 t0c_min = T0;
251 t1c_min = T1;
252 }
253 }
254 ++counter;
255 }
256 }
257 } else {
258 for (unsigned int T1 = min_bnd;
259 T1 <= static_cast<unsigned int>(range_length); T1 <<= 1) {
260 for (unsigned int T2 = min_bnd;
261 T2 <= static_cast<unsigned int>(range_length); T2 <<= 1) {
262 double seconds_c = 0;
263 {
264 seconds_c = MultiDimRangePerf3D<DeviceType, double, LayoutType>::
265 test_multi_index(range_length, range_length, range_length, 0,
266 T1, T2);
267 }
268
269 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
270 std::cout << " MDRange LL with '0' tile - collapse-like \n"
271 << label_mdrange << " , " << range_length << " < " << T1
272 << " , " << T2 << " , " << seconds_c << std::endl;
273 #endif
274
275 t0c_min = range_length;
276 if (counter == 1) {
277 seconds_min_c = seconds_c;
278 t1c_min = T1;
279 t2c_min = T2;
280 } else {
281 if (seconds_c < seconds_min_c) {
282 seconds_min_c = seconds_c;
283 t1c_min = T1;
284 t2c_min = T2;
285 }
286 }
287 ++counter;
288 }
289 }
290 }
291
292 std::cout
293 // <<
294 // "--------------------------------------------------------------\n"
295 << label_mdrange << " Collapse<2> style: "
296 << "\n Min values "
297 << "\n Range length per dim (3D): " << range_length
298 << "\n TileDims: " << t0c_min << " , " << t1c_min << " , " << t2c_min
299 << "\n Min time: " << seconds_min_c
300 << "\n---------------------------------------------------------------"
301 << std::endl;
302 } // end scope test 2
303 #endif
304
305 // Test 2: RangePolicy Collapse2 style
306 double seconds_2 = 0;
307 {
308 seconds_2 = RangePolicyCollapseTwo<DeviceType, double, LayoutType>::
309 test_index_collapse_two(range_length, range_length, range_length);
310 }
311 std::cout << label_range_col2 << " , " << range_length << " , " << seconds_2
312 << std::endl;
313
314 // Test 3: RangePolicy Collapse all style - not necessary, always slow
315 /*
316 double seconds_3 = 0;
317 { seconds_3 = RangePolicyCollapseAll< DeviceType , double , LayoutType
318 >::test_collapse_all(range_length,range_length,range_length) ; } std::cout
319 << label_range_col_all
320 << " , " << range_length
321 << " , " << seconds_3
322 << "\n---------------------------------------------------------------"
323 << std::endl ;
324 */
325
326 // Compare fastest times... will never be collapse all so ignore it
327 // seconds_min = tiled MDRange
328 // seconds_min_c = collapse<2>-like MDRange (tiledim = span for fast dim) -
329 // only for non-Cuda, else tile too long seconds_2 = collapse<2>-style
330 // RangePolicy seconds_3 = collapse<3>-style RangePolicy
331
332 #if !defined(KOKKOS_ENABLE_CUDA)
333 if (seconds_min < seconds_min_c) {
334 if (seconds_min < seconds_2) {
335 std::cout
336 << "--------------------------------------------------------------"
337 "\n"
338 << " Fastest run: MDRange tiled\n"
339 << " Time: " << seconds_min
340 << " Difference: " << seconds_2 - seconds_min << " Other times: \n"
341 << " MDrange collapse-like (tiledim = span on fast dim) type: "
342 << seconds_min_c << "\n"
343 << " Collapse2 Range Policy: " << seconds_2 << "\n"
344 << "\n-------------------------------------------------------------"
345 "-"
346 << "\n-------------------------------------------------------------"
347 "-"
348 //<< "\n\n"
349 << std::endl;
350 } else if (seconds_min > seconds_2) {
351 std::cout
352 << " Fastest run: Collapse2 RangePolicy\n"
353 << " Time: " << seconds_2
354 << " Difference: " << seconds_min - seconds_2 << " Other times: \n"
355 << " MDrange Tiled: " << seconds_min << "\n"
356 << " MDrange collapse-like (tiledim = span on fast dim) type: "
357 << seconds_min_c << "\n"
358 << "\n-------------------------------------------------------------"
359 "-"
360 << "\n-------------------------------------------------------------"
361 "-"
362 //<< "\n\n"
363 << std::endl;
364 }
365 } else if (seconds_min > seconds_min_c) {
366 if (seconds_min_c < seconds_2) {
367 std::cout << "---------------------------------------------------------"
368 "-----\n"
369 << " Fastest run: MDRange collapse-like (tiledim = span on "
370 "fast dim) type\n"
371 << " Time: " << seconds_min_c
372 << " Difference: " << seconds_2 - seconds_min_c
373 << " Other times: \n"
374 << " MDrange Tiled: " << seconds_min << "\n"
375 << " Collapse2 Range Policy: " << seconds_2 << "\n"
376 << "\n-------------------------------------------------------"
377 "-------"
378 << "\n-------------------------------------------------------"
379 "-------"
380 //<< "\n\n"
381 << std::endl;
382 } else if (seconds_min_c > seconds_2) {
383 std::cout
384 << " Fastest run: Collapse2 RangePolicy\n"
385 << " Time: " << seconds_2
386 << " Difference: " << seconds_min_c - seconds_2
387 << " Other times: \n"
388 << " MDrange Tiled: " << seconds_min << "\n"
389 << " MDrange collapse-like (tiledim = span on fast dim) type: "
390 << seconds_min_c << "\n"
391 << "\n-------------------------------------------------------------"
392 "-"
393 << "\n-------------------------------------------------------------"
394 "-"
395 //<< "\n\n"
396 << std::endl;
397 }
398 } // end else if
399 #else
400 if (seconds_min < seconds_2) {
401 std::cout
402 << "--------------------------------------------------------------\n"
403 << " Fastest run: MDRange tiled\n"
404 << " Time: " << seconds_min
405 << " Difference: " << seconds_2 - seconds_min << " Other times: \n"
406 << " Collapse2 Range Policy: " << seconds_2 << "\n"
407 << "\n--------------------------------------------------------------"
408 << "\n--------------------------------------------------------------"
409 //<< "\n\n"
410 << std::endl;
411 } else if (seconds_min > seconds_2) {
412 std::cout
413 << " Fastest run: Collapse2 RangePolicy\n"
414 << " Time: " << seconds_2
415 << " Difference: " << seconds_min - seconds_2 << " Other times: \n"
416 << " MDrange Tiled: " << seconds_min << "\n"
417 << "\n--------------------------------------------------------------"
418 << "\n--------------------------------------------------------------"
419 //<< "\n\n"
420 << std::endl;
421 }
422 #endif
423
424 } // end for
425
426 #undef MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
427 }
428
429 } // namespace Test
430