1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #include <Kokkos_Core.hpp>
46 #include <gtest/gtest.h>
47 #include <cstdio>
48 #include <PerfTest_Category.hpp>
49
50 namespace Test {
51
52 template <class ViewType>
fill_view(ViewType & a,typename ViewType::const_value_type & val,int repeat)53 double fill_view(ViewType& a, typename ViewType::const_value_type& val,
54 int repeat) {
55 Kokkos::Timer timer;
56 for (int i = 0; i < repeat; i++) {
57 Kokkos::deep_copy(a, val);
58 }
59 Kokkos::fence();
60 return timer.seconds();
61 }
62
63 template <class Layout>
run_fillview_tests123(int N,int R)64 void run_fillview_tests123(int N, int R) {
65 const int N1 = N;
66 const int N2 = N1 * N1;
67 const int N3 = N2 * N1;
68 const int N4 = N2 * N2;
69 const int N8 = N4 * N4;
70
71 double time1, time2, time3, time_raw = 100000.0;
72 {
73 Kokkos::View<double*, Layout> a("A1", N8);
74 time1 = fill_view(a, 1.1, R) / R;
75 }
76 {
77 Kokkos::View<double**, Layout> a("A2", N4, N4);
78 time2 = fill_view(a, 1.1, R) / R;
79 }
80 {
81 Kokkos::View<double***, Layout> a("A3", N3, N3, N2);
82 time3 = fill_view(a, 1.1, R) / R;
83 }
84 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
85 {
86 Kokkos::View<double*, Layout> a("A1", N8);
87 double* a_ptr = a.data();
88 Kokkos::Timer timer;
89 for (int r = 0; r < R; r++) {
90 Kokkos::parallel_for(
91 N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
92 }
93 Kokkos::fence();
94 time_raw = timer.seconds() / R;
95 }
96 #endif
97 double size = 1.0 * N8 * 8 / 1024 / 1024;
98 printf(" Raw: %lf s %lf MB %lf GB/s\n", time_raw, size,
99 size / 1024 / time_raw);
100 printf(" Rank1: %lf s %lf MB %lf GB/s\n", time1, size,
101 size / 1024 / time1);
102 printf(" Rank2: %lf s %lf MB %lf GB/s\n", time2, size,
103 size / 1024 / time2);
104 printf(" Rank3: %lf s %lf MB %lf GB/s\n", time3, size,
105 size / 1024 / time3);
106 }
107
108 template <class Layout>
run_fillview_tests45(int N,int R)109 void run_fillview_tests45(int N, int R) {
110 const int N1 = N;
111 const int N2 = N1 * N1;
112 const int N4 = N2 * N2;
113 const int N8 = N4 * N4;
114
115 double time4, time5, time_raw = 100000.0;
116 {
117 Kokkos::View<double****, Layout> a("A4", N2, N2, N2, N2);
118 time4 = fill_view(a, 1.1, R) / R;
119 }
120 {
121 Kokkos::View<double*****, Layout> a("A5", N2, N2, N1, N1, N2);
122 time5 = fill_view(a, 1.1, R) / R;
123 }
124 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
125 {
126 Kokkos::View<double*, Layout> a("A1", N8);
127 double* a_ptr = a.data();
128 Kokkos::Timer timer;
129 for (int r = 0; r < R; r++) {
130 Kokkos::parallel_for(
131 N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
132 }
133 Kokkos::fence();
134 time_raw = timer.seconds() / R;
135 }
136 #endif
137 double size = 1.0 * N8 * 8 / 1024 / 1024;
138 printf(" Raw: %lf s %lf MB %lf GB/s\n", time_raw, size,
139 size / 1024 / time_raw);
140 printf(" Rank4: %lf s %lf MB %lf GB/s\n", time4, size,
141 size / 1024 / time4);
142 printf(" Rank5: %lf s %lf MB %lf GB/s\n", time5, size,
143 size / 1024 / time5);
144 }
145
146 template <class Layout>
run_fillview_tests6(int N,int R)147 void run_fillview_tests6(int N, int R) {
148 const int N1 = N;
149 const int N2 = N1 * N1;
150 const int N4 = N2 * N2;
151 const int N8 = N4 * N4;
152
153 double time6, time_raw = 100000.0;
154 {
155 Kokkos::View<double******, Layout> a("A6", N2, N1, N1, N1, N1, N2);
156 time6 = fill_view(a, 1.1, R) / R;
157 }
158 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
159 {
160 Kokkos::View<double*, Layout> a("A1", N8);
161 double* a_ptr = a.data();
162 Kokkos::Timer timer;
163 for (int r = 0; r < R; r++) {
164 Kokkos::parallel_for(
165 N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
166 }
167 Kokkos::fence();
168 time_raw = timer.seconds() / R;
169 }
170 #endif
171 double size = 1.0 * N8 * 8 / 1024 / 1024;
172 printf(" Raw: %lf s %lf MB %lf GB/s\n", time_raw, size,
173 size / 1024 / time_raw);
174 printf(" Rank6: %lf s %lf MB %lf GB/s\n", time6, size,
175 size / 1024 / time6);
176 }
177
178 template <class Layout>
run_fillview_tests7(int N,int R)179 void run_fillview_tests7(int N, int R) {
180 const int N1 = N;
181 const int N2 = N1 * N1;
182 const int N4 = N2 * N2;
183 const int N8 = N4 * N4;
184
185 double time7, time_raw = 100000.0;
186 {
187 Kokkos::View<double*******, Layout> a("A7", N2, N1, N1, N1, N1, N1, N1);
188 time7 = fill_view(a, 1.1, R) / R;
189 }
190 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
191 {
192 Kokkos::View<double*, Layout> a("A1", N8);
193 double* a_ptr = a.data();
194 Kokkos::Timer timer;
195 for (int r = 0; r < R; r++) {
196 Kokkos::parallel_for(
197 N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
198 }
199 Kokkos::fence();
200 time_raw = timer.seconds() / R;
201 }
202 #endif
203 double size = 1.0 * N8 * 8 / 1024 / 1024;
204 printf(" Raw: %lf s %lf MB %lf GB/s\n", time_raw, size,
205 size / 1024 / time_raw);
206 printf(" Rank7: %lf s %lf MB %lf GB/s\n", time7, size,
207 size / 1024 / time7);
208 }
209
210 template <class Layout>
run_fillview_tests8(int N,int R)211 void run_fillview_tests8(int N, int R) {
212 const int N1 = N;
213 const int N2 = N1 * N1;
214 const int N4 = N2 * N2;
215 const int N8 = N4 * N4;
216
217 double time8, time_raw = 100000.0;
218 {
219 Kokkos::View<double********, Layout> a("A8", N1, N1, N1, N1, N1, N1, N1,
220 N1);
221 time8 = fill_view(a, 1.1, R) / R;
222 }
223 #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
224 {
225 Kokkos::View<double*, Layout> a("A1", N8);
226 double* a_ptr = a.data();
227 Kokkos::Timer timer;
228 for (int r = 0; r < R; r++) {
229 Kokkos::parallel_for(
230 N8, KOKKOS_LAMBDA(const int& i) { a_ptr[i] = 1.1; });
231 }
232 Kokkos::fence();
233 time_raw = timer.seconds() / R;
234 }
235 #endif
236 double size = 1.0 * N8 * 8 / 1024 / 1024;
237 printf(" Raw: %lf s %lf MB %lf GB/s\n", time_raw, size,
238 size / 1024 / time_raw);
239 printf(" Rank8: %lf s %lf MB %lf GB/s\n", time8, size,
240 size / 1024 / time8);
241 }
242
243 } // namespace Test
244