1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #include <iostream>
46 #include <string>
47 
48 // mfh 06 Jun 2013: This macro doesn't work like one might thing it
49 // should.  It doesn't take the template parameter DeviceType and
50 // print its actual type name; it just literally prints out
51 // "DeviceType".  I've worked around this below without using the
52 // macro, so I'm commenting out the macro to avoid compiler complaints
53 // about an unused macro.
54 
55 // #define KOKKOS_IMPL_MACRO_TO_STRING( X ) #X
56 // #define KOKKOS_MACRO_TO_STRING( X )  KOKKOS_IMPL_MACRO_TO_STRING( X )
57 
58 //------------------------------------------------------------------------
59 
60 namespace Test {
61 
62 enum { NUMBER_OF_TRIALS = 5 };
63 
64 template <class DeviceType, class LayoutType>
run_test_mdrange(int exp_beg,int exp_end,const char deviceTypeName[],int range_offset=0,int tile_offset=0)65 void run_test_mdrange(int exp_beg, int exp_end, const char deviceTypeName[],
66                       int range_offset = 0, int tile_offset = 0)
67 // exp_beg = 6 => 2^6 = 64 is starting range length
68 {
69 #define MDRANGE_PERFORMANCE_OUTPUT_VERBOSE 0
70 
71   std::string label_mdrange;
72   label_mdrange.append("\"MDRange< double , ");
73   label_mdrange.append(deviceTypeName);
74   label_mdrange.append(" >\"");
75 
76   std::string label_range_col2;
77   label_range_col2.append("\"RangeColTwo< double , ");
78   label_range_col2.append(deviceTypeName);
79   label_range_col2.append(" >\"");
80 
81   std::string label_range_col_all;
82   label_range_col_all.append("\"RangeColAll< double , ");
83   label_range_col_all.append(deviceTypeName);
84   label_range_col_all.append(" >\"");
85 
86   if (std::is_same<LayoutType, Kokkos::LayoutRight>::value) {
87     std::cout
88         << "--------------------------------------------------------------\n"
89         << "Performance tests for MDRange Layout Right"
90         << "\n--------------------------------------------------------------"
91         << std::endl;
92   } else {
93     std::cout
94         << "--------------------------------------------------------------\n"
95         << "Performance tests for MDRange Layout Left"
96         << "\n--------------------------------------------------------------"
97         << std::endl;
98   }
99 
100   for (int i = exp_beg; i < exp_end; ++i) {
101     const int range_length = (1 << i) + range_offset;
102 
103     std::cout
104         << "\n--------------------------------------------------------------\n"
105         << "--------------------------------------------------------------\n"
106         << "MDRange Test:  range bounds: " << range_length << " , "
107         << range_length << " , " << range_length
108         << "\n--------------------------------------------------------------\n"
109         << "--------------------------------------------------------------\n";
110     //      << std::endl;
111 
112     int t0_min = 0, t1_min = 0, t2_min = 0;
113     double seconds_min = 0.0;
114 
115     // Test 1: The MDRange in full
116     {
117       int t0 = 1, t1 = 1, t2 = 1;
118       int counter = 1;
119 #if !defined(KOKKOS_ENABLE_CUDA)
120       int min_bnd = 8;
121       int tfast   = range_length;
122 #else
123       int min_bnd = 2;
124       int tfast   = 32;
125 #endif
126       while (tfast >= min_bnd) {
127         int tmid = min_bnd;
128         while (tmid < tfast) {
129           t0         = min_bnd;
130           t1         = tmid;
131           t2         = tfast;
132           int t2_rev = min_bnd;
133           int t1_rev = tmid;
134           int t0_rev = tfast;
135 
136 #if defined(KOKKOS_ENABLE_CUDA)
137           // Note: Product of tile sizes must be < 1024 for Cuda
138           if (t0 * t1 * t2 >= 1024) {
139             printf("  Exceeded Cuda tile limits; onto next range set\n\n");
140             break;
141           }
142 #endif
143 
144           // Run 1 with tiles LayoutRight style
145           double seconds_1 = 0;
146           {
147             seconds_1 =
148                 MultiDimRangePerf3D<DeviceType, double,
149                                     LayoutType>::test_multi_index(range_length,
150                                                                   range_length,
151                                                                   range_length,
152                                                                   t0, t1, t2);
153           }
154 
155 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
156           std::cout << label_mdrange << " , " << t0 << " , " << t1 << " , "
157                     << t2 << " , " << seconds_1 << std::endl;
158 #endif
159 
160           if (counter == 1) {
161             seconds_min = seconds_1;
162             t0_min      = t0;
163             t1_min      = t1;
164             t2_min      = t2;
165           } else {
166             if (seconds_1 < seconds_min) {
167               seconds_min = seconds_1;
168               t0_min      = t0;
169               t1_min      = t1;
170               t2_min      = t2;
171             }
172           }
173 
174           // Run 2 with tiles LayoutLeft style - reverse order of tile dims
175           double seconds_1rev = 0;
176           {
177             seconds_1rev =
178                 MultiDimRangePerf3D<DeviceType, double,
179                                     LayoutType>::test_multi_index(range_length,
180                                                                   range_length,
181                                                                   range_length,
182                                                                   t0_rev,
183                                                                   t1_rev,
184                                                                   t2_rev);
185           }
186 
187 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
188           std::cout << label_mdrange << " , " << t0_rev << " , " << t1_rev
189                     << " , " << t2_rev << " , " << seconds_1rev << std::endl;
190 #endif
191 
192           if (seconds_1rev < seconds_min) {
193             seconds_min = seconds_1rev;
194             t0_min      = t0_rev;
195             t1_min      = t1_rev;
196             t2_min      = t2_rev;
197           }
198 
199           ++counter;
200           tmid <<= 1;
201         }  // end inner while
202         tfast >>= 1;
203       }  // end outer while
204 
205       std::cout
206           << "\n"
207           << "--------------------------------------------------------------\n"
208           << label_mdrange << "\n Min values "
209           << "\n Range length per dim (3D): " << range_length
210           << "\n TileDims:  " << t0_min << " , " << t1_min << " , " << t2_min
211           << "\n Min time: " << seconds_min
212           << "\n---------------------------------------------------------------"
213           << std::endl;
214     }  // end scope
215 
216 #if !defined(KOKKOS_ENABLE_CUDA)
217     double seconds_min_c = 0.0;
218     int t0c_min = 0, t1c_min = 0, t2c_min = 0;
219     int counter = 1;
220     {
221       int min_bnd = 8;
222       // Test 1_c: MDRange with 0 for 'inner' tile dim; this case will utilize
223       // the full span in that direction, should be similar to Collapse<2>
224       if (std::is_same<LayoutType, Kokkos::LayoutRight>::value) {
225         for (unsigned int T0 = min_bnd;
226              T0 < static_cast<unsigned int>(range_length); T0 <<= 1) {
227           for (unsigned int T1 = min_bnd;
228                T1 < static_cast<unsigned int>(range_length); T1 <<= 1) {
229             double seconds_c = 0;
230             {
231               seconds_c = MultiDimRangePerf3D<DeviceType, double, LayoutType>::
232                   test_multi_index(range_length, range_length, range_length, T0,
233                                    T1, 0);
234             }
235 
236 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
237             std::cout << " MDRange LR with '0' tile - collapse-like \n"
238                       << label_mdrange << " , " << T0 << " , " << T1 << " , "
239                       << range_length << " , " << seconds_c << std::endl;
240 #endif
241 
242             t2c_min = range_length;
243             if (counter == 1) {
244               seconds_min_c = seconds_c;
245               t0c_min       = T0;
246               t1c_min       = T1;
247             } else {
248               if (seconds_c < seconds_min_c) {
249                 seconds_min_c = seconds_c;
250                 t0c_min       = T0;
251                 t1c_min       = T1;
252               }
253             }
254             ++counter;
255           }
256         }
257       } else {
258         for (unsigned int T1 = min_bnd;
259              T1 <= static_cast<unsigned int>(range_length); T1 <<= 1) {
260           for (unsigned int T2 = min_bnd;
261                T2 <= static_cast<unsigned int>(range_length); T2 <<= 1) {
262             double seconds_c = 0;
263             {
264               seconds_c = MultiDimRangePerf3D<DeviceType, double, LayoutType>::
265                   test_multi_index(range_length, range_length, range_length, 0,
266                                    T1, T2);
267             }
268 
269 #if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
270             std::cout << " MDRange LL with '0' tile - collapse-like \n"
271                       << label_mdrange << " , " << range_length << " < " << T1
272                       << " , " << T2 << " , " << seconds_c << std::endl;
273 #endif
274 
275             t0c_min = range_length;
276             if (counter == 1) {
277               seconds_min_c = seconds_c;
278               t1c_min       = T1;
279               t2c_min       = T2;
280             } else {
281               if (seconds_c < seconds_min_c) {
282                 seconds_min_c = seconds_c;
283                 t1c_min       = T1;
284                 t2c_min       = T2;
285               }
286             }
287             ++counter;
288           }
289         }
290       }
291 
292       std::cout
293           //      <<
294           //      "--------------------------------------------------------------\n"
295           << label_mdrange << "  Collapse<2> style: "
296           << "\n Min values "
297           << "\n Range length per dim (3D): " << range_length
298           << "\n TileDims:  " << t0c_min << " , " << t1c_min << " , " << t2c_min
299           << "\n Min time: " << seconds_min_c
300           << "\n---------------------------------------------------------------"
301           << std::endl;
302     }  // end scope test 2
303 #endif
304 
305     // Test 2: RangePolicy Collapse2 style
306     double seconds_2 = 0;
307     {
308       seconds_2 = RangePolicyCollapseTwo<DeviceType, double, LayoutType>::
309           test_index_collapse_two(range_length, range_length, range_length);
310     }
311     std::cout << label_range_col2 << " , " << range_length << " , " << seconds_2
312               << std::endl;
313 
314     // Test 3: RangePolicy Collapse all style - not necessary, always slow
315     /*
316     double seconds_3 = 0;
317     { seconds_3 = RangePolicyCollapseAll< DeviceType , double , LayoutType
318     >::test_collapse_all(range_length,range_length,range_length) ; } std::cout
319     << label_range_col_all
320       << " , " << range_length
321       << " , " << seconds_3
322       << "\n---------------------------------------------------------------"
323       << std::endl ;
324     */
325 
326     // Compare fastest times... will never be collapse all so ignore it
327     // seconds_min = tiled MDRange
328     // seconds_min_c = collapse<2>-like MDRange (tiledim = span for fast dim) -
329     // only for non-Cuda, else tile too long seconds_2 = collapse<2>-style
330     // RangePolicy seconds_3 = collapse<3>-style RangePolicy
331 
332 #if !defined(KOKKOS_ENABLE_CUDA)
333     if (seconds_min < seconds_min_c) {
334       if (seconds_min < seconds_2) {
335         std::cout
336             << "--------------------------------------------------------------"
337                "\n"
338             << " Fastest run: MDRange tiled\n"
339             << " Time: " << seconds_min
340             << " Difference: " << seconds_2 - seconds_min << " Other times: \n"
341             << "   MDrange collapse-like (tiledim = span on fast dim) type: "
342             << seconds_min_c << "\n"
343             << "   Collapse2 Range Policy: " << seconds_2 << "\n"
344             << "\n-------------------------------------------------------------"
345                "-"
346             << "\n-------------------------------------------------------------"
347                "-"
348             //<< "\n\n"
349             << std::endl;
350       } else if (seconds_min > seconds_2) {
351         std::cout
352             << " Fastest run: Collapse2 RangePolicy\n"
353             << " Time: " << seconds_2
354             << " Difference: " << seconds_min - seconds_2 << " Other times: \n"
355             << "   MDrange Tiled: " << seconds_min << "\n"
356             << "   MDrange collapse-like (tiledim = span on fast dim) type: "
357             << seconds_min_c << "\n"
358             << "\n-------------------------------------------------------------"
359                "-"
360             << "\n-------------------------------------------------------------"
361                "-"
362             //<< "\n\n"
363             << std::endl;
364       }
365     } else if (seconds_min > seconds_min_c) {
366       if (seconds_min_c < seconds_2) {
367         std::cout << "---------------------------------------------------------"
368                      "-----\n"
369                   << " Fastest run: MDRange collapse-like (tiledim = span on "
370                      "fast dim) type\n"
371                   << " Time: " << seconds_min_c
372                   << " Difference: " << seconds_2 - seconds_min_c
373                   << " Other times: \n"
374                   << "   MDrange Tiled: " << seconds_min << "\n"
375                   << "   Collapse2 Range Policy: " << seconds_2 << "\n"
376                   << "\n-------------------------------------------------------"
377                      "-------"
378                   << "\n-------------------------------------------------------"
379                      "-------"
380                   //<< "\n\n"
381                   << std::endl;
382       } else if (seconds_min_c > seconds_2) {
383         std::cout
384             << " Fastest run: Collapse2 RangePolicy\n"
385             << " Time: " << seconds_2
386             << " Difference: " << seconds_min_c - seconds_2
387             << " Other times: \n"
388             << "   MDrange Tiled: " << seconds_min << "\n"
389             << "   MDrange collapse-like (tiledim = span on fast dim) type: "
390             << seconds_min_c << "\n"
391             << "\n-------------------------------------------------------------"
392                "-"
393             << "\n-------------------------------------------------------------"
394                "-"
395             //<< "\n\n"
396             << std::endl;
397       }
398     }  // end else if
399 #else
400     if (seconds_min < seconds_2) {
401       std::cout
402           << "--------------------------------------------------------------\n"
403           << " Fastest run: MDRange tiled\n"
404           << " Time: " << seconds_min
405           << " Difference: " << seconds_2 - seconds_min << " Other times: \n"
406           << "   Collapse2 Range Policy: " << seconds_2 << "\n"
407           << "\n--------------------------------------------------------------"
408           << "\n--------------------------------------------------------------"
409           //<< "\n\n"
410           << std::endl;
411     } else if (seconds_min > seconds_2) {
412       std::cout
413           << " Fastest run: Collapse2 RangePolicy\n"
414           << " Time: " << seconds_2
415           << " Difference: " << seconds_min - seconds_2 << " Other times: \n"
416           << "   MDrange Tiled: " << seconds_min << "\n"
417           << "\n--------------------------------------------------------------"
418           << "\n--------------------------------------------------------------"
419           //<< "\n\n"
420           << std::endl;
421     }
422 #endif
423 
424   }  // end for
425 
426 #undef MDRANGE_PERFORMANCE_OUTPUT_VERBOSE
427 }
428 
429 }  // namespace Test
430