1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #include <Kokkos_Core.hpp>
46 #include <gtest/gtest.h>
47 #include <cstdio>
48 #include <PerfTest_Category.hpp>
49 
50 namespace Test {
51 
52 template <class ViewType>
fill_view(ViewType & a,typename ViewType::const_value_type & val,int repeat)53 double fill_view(ViewType& a, typename ViewType::const_value_type& val,
54                  int repeat) {
55   Kokkos::Timer timer;
56   for (int i = 0; i < repeat; i++) {
57     Kokkos::deep_copy(a, val);
58   }
59   Kokkos::fence();
60   return timer.seconds();
61 }
62 
63 template <class Layout>
run_fillview_tests123(int N,int R)64 void run_fillview_tests123(int N, int R) {
65   const int N1 = N;
66   const int N2 = N1 * N1;
67   const int N3 = N2 * N1;
68   const int N4 = N2 * N2;
69   const int N8 = N4 * N4;
70 
71   double time1, time2, time3, time_raw = 100000.0;
72   {
73     Kokkos::View<double*, Layout> a("A1", N8);
74     time1 = fill_view(a, 1.1, R) / R;
75   }
76   {
77     Kokkos::View<double**, Layout> a("A2", N4, N4);
78     time2 = fill_view(a, 1.1, R) / R;
79   }
80   {
81     Kokkos::View<double***, Layout> a("A3", N3, N3, N2);
82     time3 = fill_view(a, 1.1, R) / R;
83   }
84 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
85   {
86     Kokkos::View<double*, Layout> a("A1", N8);
87     double* a_ptr = a.data();
88     Kokkos::Timer timer;
89     for (int r = 0; r < R; r++) {
90       Kokkos::parallel_for(
91           N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
92     }
93     Kokkos::fence();
94     time_raw = timer.seconds() / R;
95   }
96 #endif
97   double size = 1.0 * N8 * 8 / 1024 / 1024;
98   printf("   Raw:   %lf s   %lf MB   %lf GB/s\n", time_raw, size,
99          size / 1024 / time_raw);
100   printf("   Rank1: %lf s   %lf MB   %lf GB/s\n", time1, size,
101          size / 1024 / time1);
102   printf("   Rank2: %lf s   %lf MB   %lf GB/s\n", time2, size,
103          size / 1024 / time2);
104   printf("   Rank3: %lf s   %lf MB   %lf GB/s\n", time3, size,
105          size / 1024 / time3);
106 }
107 
108 template <class Layout>
run_fillview_tests45(int N,int R)109 void run_fillview_tests45(int N, int R) {
110   const int N1 = N;
111   const int N2 = N1 * N1;
112   const int N4 = N2 * N2;
113   const int N8 = N4 * N4;
114 
115   double time4, time5, time_raw = 100000.0;
116   {
117     Kokkos::View<double****, Layout> a("A4", N2, N2, N2, N2);
118     time4 = fill_view(a, 1.1, R) / R;
119   }
120   {
121     Kokkos::View<double*****, Layout> a("A5", N2, N2, N1, N1, N2);
122     time5 = fill_view(a, 1.1, R) / R;
123   }
124 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
125   {
126     Kokkos::View<double*, Layout> a("A1", N8);
127     double* a_ptr = a.data();
128     Kokkos::Timer timer;
129     for (int r = 0; r < R; r++) {
130       Kokkos::parallel_for(
131           N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
132     }
133     Kokkos::fence();
134     time_raw = timer.seconds() / R;
135   }
136 #endif
137   double size = 1.0 * N8 * 8 / 1024 / 1024;
138   printf("   Raw:   %lf s   %lf MB   %lf GB/s\n", time_raw, size,
139          size / 1024 / time_raw);
140   printf("   Rank4: %lf s   %lf MB   %lf GB/s\n", time4, size,
141          size / 1024 / time4);
142   printf("   Rank5: %lf s   %lf MB   %lf GB/s\n", time5, size,
143          size / 1024 / time5);
144 }
145 
146 template <class Layout>
run_fillview_tests6(int N,int R)147 void run_fillview_tests6(int N, int R) {
148   const int N1 = N;
149   const int N2 = N1 * N1;
150   const int N4 = N2 * N2;
151   const int N8 = N4 * N4;
152 
153   double time6, time_raw = 100000.0;
154   {
155     Kokkos::View<double******, Layout> a("A6", N2, N1, N1, N1, N1, N2);
156     time6 = fill_view(a, 1.1, R) / R;
157   }
158 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
159   {
160     Kokkos::View<double*, Layout> a("A1", N8);
161     double* a_ptr = a.data();
162     Kokkos::Timer timer;
163     for (int r = 0; r < R; r++) {
164       Kokkos::parallel_for(
165           N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
166     }
167     Kokkos::fence();
168     time_raw = timer.seconds() / R;
169   }
170 #endif
171   double size = 1.0 * N8 * 8 / 1024 / 1024;
172   printf("   Raw:   %lf s   %lf MB   %lf GB/s\n", time_raw, size,
173          size / 1024 / time_raw);
174   printf("   Rank6: %lf s   %lf MB   %lf GB/s\n", time6, size,
175          size / 1024 / time6);
176 }
177 
178 template <class Layout>
run_fillview_tests7(int N,int R)179 void run_fillview_tests7(int N, int R) {
180   const int N1 = N;
181   const int N2 = N1 * N1;
182   const int N4 = N2 * N2;
183   const int N8 = N4 * N4;
184 
185   double time7, time_raw = 100000.0;
186   {
187     Kokkos::View<double*******, Layout> a("A7", N2, N1, N1, N1, N1, N1, N1);
188     time7 = fill_view(a, 1.1, R) / R;
189   }
190 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
191   {
192     Kokkos::View<double*, Layout> a("A1", N8);
193     double* a_ptr = a.data();
194     Kokkos::Timer timer;
195     for (int r = 0; r < R; r++) {
196       Kokkos::parallel_for(
197           N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
198     }
199     Kokkos::fence();
200     time_raw = timer.seconds() / R;
201   }
202 #endif
203   double size = 1.0 * N8 * 8 / 1024 / 1024;
204   printf("   Raw:   %lf s   %lf MB   %lf GB/s\n", time_raw, size,
205          size / 1024 / time_raw);
206   printf("   Rank7: %lf s   %lf MB   %lf GB/s\n", time7, size,
207          size / 1024 / time7);
208 }
209 
210 template <class Layout>
run_fillview_tests8(int N,int R)211 void run_fillview_tests8(int N, int R) {
212   const int N1 = N;
213   const int N2 = N1 * N1;
214   const int N4 = N2 * N2;
215   const int N8 = N4 * N4;
216 
217   double time8, time_raw = 100000.0;
218   {
219     Kokkos::View<double********, Layout> a("A8", N1, N1, N1, N1, N1, N1, N1,
220                                            N1);
221     time8 = fill_view(a, 1.1, R) / R;
222   }
223 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
224   {
225     Kokkos::View<double*, Layout> a("A1", N8);
226     double* a_ptr = a.data();
227     Kokkos::Timer timer;
228     for (int r = 0; r < R; r++) {
229       Kokkos::parallel_for(
230           N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
231     }
232     Kokkos::fence();
233     time_raw = timer.seconds() / R;
234   }
235 #endif
236   double size = 1.0 * N8 * 8 / 1024 / 1024;
237   printf("   Raw:   %lf s   %lf MB   %lf GB/s\n", time_raw, size,
238          size / 1024 / time_raw);
239   printf("   Rank8: %lf s   %lf MB   %lf GB/s\n", time8, size,
240          size / 1024 / time8);
241 }
242 
243 }  // namespace Test
244