1 #include <Kokkos_Core.hpp>
2 #include <gtest/gtest.h>
3 #include <PerfTest_Category.hpp>
4
5 namespace Test {
6
7 namespace {
8 template <class ExecSpace>
9 struct SpaceInstance {
createTest::__anon062167350111::SpaceInstance10 static ExecSpace create() { return ExecSpace(); }
destroyTest::__anon062167350111::SpaceInstance11 static void destroy(ExecSpace&) {}
overlapTest::__anon062167350111::SpaceInstance12 static bool overlap() { return false; }
13 };
14
15 #ifndef KOKKOS_ENABLE_DEBUG
16 #ifdef KOKKOS_ENABLE_CUDA
17 template <>
18 struct SpaceInstance<Kokkos::Cuda> {
createTest::__anon062167350111::SpaceInstance19 static Kokkos::Cuda create() {
20 cudaStream_t stream;
21 cudaStreamCreate(&stream);
22 return Kokkos::Cuda(stream);
23 }
destroyTest::__anon062167350111::SpaceInstance24 static void destroy(Kokkos::Cuda& space) {
25 cudaStream_t stream = space.cuda_stream();
26 cudaStreamDestroy(stream);
27 }
overlapTest::__anon062167350111::SpaceInstance28 static bool overlap() {
29 bool value = true;
30 auto local_rank_str = std::getenv("CUDA_LAUNCH_BLOCKING");
31 if (local_rank_str) {
32 value = (std::stoi(local_rank_str) == 0);
33 }
34 return value;
35 }
36 };
37 #endif
38 #endif
39 } // namespace
40
41 struct FunctorRange {
42 int M, R;
43 Kokkos::View<double**, TEST_EXECSPACE> a;
FunctorRangeTest::FunctorRange44 FunctorRange(int M_, int R_, Kokkos::View<double**, TEST_EXECSPACE> a_)
45 : M(M_), R(R_), a(a_) {}
46 KOKKOS_INLINE_FUNCTION
operator ()Test::FunctorRange47 void operator()(const int i) const {
48 for (int r = 0; r < R; r++)
49 for (int j = 0; j < M; j++) {
50 a(i, j) += 1.0;
51 }
52 }
53 };
54
55 struct FunctorMDRange {
56 int M, R;
57 Kokkos::View<double**, TEST_EXECSPACE> a;
FunctorMDRangeTest::FunctorMDRange58 FunctorMDRange(int M_, int R_, Kokkos::View<double**, TEST_EXECSPACE> a_)
59 : M(M_), R(R_), a(a_) {}
60 KOKKOS_INLINE_FUNCTION
operator ()Test::FunctorMDRange61 void operator()(const int i, const int) const {
62 for (int j = 0; j < M; j++) a(i, j) += 1.0;
63 }
64 };
65
66 struct FunctorTeam {
67 int M, R;
68 Kokkos::View<double**, Kokkos::LayoutRight, TEST_EXECSPACE> a;
FunctorTeamTest::FunctorTeam69 FunctorTeam(int M_, int R_,
70 Kokkos::View<double**, Kokkos::LayoutRight, TEST_EXECSPACE> a_)
71 : M(M_), R(R_), a(a_) {}
72 KOKKOS_INLINE_FUNCTION
operator ()Test::FunctorTeam73 void operator()(
74 const Kokkos::TeamPolicy<TEST_EXECSPACE>::member_type& team) const {
75 int i = team.league_rank();
76 for (int r = 0; r < R; r++) {
77 Kokkos::parallel_for(Kokkos::TeamThreadRange(team, M),
78 [&](const int j) { a(i, j) += 1.0; });
79 }
80 }
81 };
82
83 struct FunctorRangeReduce {
84 int M, R;
85 Kokkos::View<double**, TEST_EXECSPACE> a;
FunctorRangeReduceTest::FunctorRangeReduce86 FunctorRangeReduce(int M_, int R_, Kokkos::View<double**, TEST_EXECSPACE> a_)
87 : M(M_), R(R_), a(a_) {}
88 KOKKOS_INLINE_FUNCTION
operator ()Test::FunctorRangeReduce89 void operator()(const int i, double& tmp) const {
90 for (int r = 0; r < R; r++)
91 for (int j = 0; j < M; j++) {
92 tmp += a(i, j);
93 }
94 }
95 };
96
97 struct FunctorMDRangeReduce {
98 int M, R;
99 Kokkos::View<double**, TEST_EXECSPACE> a;
FunctorMDRangeReduceTest::FunctorMDRangeReduce100 FunctorMDRangeReduce(int M_, int R_,
101 Kokkos::View<double**, TEST_EXECSPACE> a_)
102 : M(M_), R(R_), a(a_) {}
103 KOKKOS_INLINE_FUNCTION
operator ()Test::FunctorMDRangeReduce104 void operator()(const int i, const int, double& tmp) const {
105 for (int j = 0; j < M; j++) tmp += a(i, j);
106 }
107 };
108
109 struct FunctorTeamReduce {
110 int M, R;
111 Kokkos::View<double**, Kokkos::LayoutRight, TEST_EXECSPACE> a;
FunctorTeamReduceTest::FunctorTeamReduce112 FunctorTeamReduce(
113 int M_, int R_,
114 Kokkos::View<double**, Kokkos::LayoutRight, TEST_EXECSPACE> a_)
115 : M(M_), R(R_), a(a_) {}
116 KOKKOS_INLINE_FUNCTION
operator ()Test::FunctorTeamReduce117 void operator()(const Kokkos::TeamPolicy<TEST_EXECSPACE>::member_type& team,
118 double& tmp) const {
119 int i = team.league_rank();
120 for (int r = 0; r < R; r++) {
121 double val;
122 Kokkos::parallel_reduce(
123 Kokkos::TeamThreadRange(team, M),
124 [&](const int j, double& tmp2) { tmp2 += a(i, j); }, val);
125 tmp += val;
126 }
127 }
128 };
129
TEST(default_exec,overlap_range_policy)130 TEST(default_exec, overlap_range_policy) {
131 int N = 2000;
132 int M = 10000;
133 int R = 10;
134
135 TEST_EXECSPACE space;
136 TEST_EXECSPACE space1 = SpaceInstance<TEST_EXECSPACE>::create();
137 TEST_EXECSPACE space2 = SpaceInstance<TEST_EXECSPACE>::create();
138
139 Kokkos::View<double**, TEST_EXECSPACE> a("A", N, M);
140 FunctorRange f(M, R, a);
141 FunctorRangeReduce fr(M, R, a);
142 Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0",
143 Kokkos::RangePolicy<TEST_EXECSPACE>(0, N),
144 FunctorRange(M, R, a));
145
146 Kokkos::parallel_for(
147 "default_exec::overlap_range_policy::kernel1",
148 Kokkos::Experimental::require(
149 Kokkos::RangePolicy<TEST_EXECSPACE>(space1, 0, N),
150 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
151 f);
152 Kokkos::parallel_for(
153 "default_exec::overlap_range_policy::kernel2",
154 Kokkos::Experimental::require(
155 Kokkos::RangePolicy<TEST_EXECSPACE>(space2, 0, N),
156 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
157 f);
158 Kokkos::fence();
159
160 Kokkos::Timer timer;
161 Kokkos::parallel_for(
162 "default_exec::overlap_range_policy::kernel3",
163 Kokkos::Experimental::require(
164 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
165 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
166 f);
167 Kokkos::parallel_for(
168 "default_exec::overlap_range_policy::kernel4",
169 Kokkos::Experimental::require(
170 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
171 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
172 f);
173 Kokkos::fence();
174
175 timer.reset();
176 Kokkos::parallel_for(
177 "default_exec::overlap_range_policy::kernel5",
178 Kokkos::Experimental::require(
179 Kokkos::RangePolicy<TEST_EXECSPACE>(space1, 0, N),
180 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
181 FunctorRange(M, R, a));
182 Kokkos::parallel_for(
183 "default_exec::overlap_range_policy::kernel6",
184 Kokkos::Experimental::require(
185 Kokkos::RangePolicy<TEST_EXECSPACE>(space2, 0, N),
186 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
187 FunctorRange(M, R, a));
188 Kokkos::fence();
189 double time_overlap = timer.seconds();
190
191 timer.reset();
192 Kokkos::parallel_for(
193 "default_exec::overlap_range_policy::kernel7",
194 Kokkos::Experimental::require(
195 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
196 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
197 f);
198 Kokkos::parallel_for(
199 "default_exec::overlap_range_policy::kernel8",
200 Kokkos::Experimental::require(
201 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
202 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
203 f);
204 Kokkos::fence();
205 double time_end = timer.seconds();
206
207 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
208 ASSERT_TRUE((time_end > 1.5 * time_overlap));
209 }
210 printf("Time RangePolicy: NonOverlap: %lf Time Overlap: %lf\n", time_end,
211 time_overlap);
212
213 Kokkos::View<double, TEST_EXECSPACE> result("result");
214 Kokkos::View<double, TEST_EXECSPACE> result1("result1");
215 Kokkos::View<double, TEST_EXECSPACE> result2("result2");
216 Kokkos::View<double, Kokkos::HostSpace> h_result("h_result");
217 Kokkos::View<double, Kokkos::HostSpace> h_result1("h_result1");
218 Kokkos::View<double, Kokkos::HostSpace> h_result2("h_result2");
219
220 timer.reset();
221 Kokkos::parallel_reduce(
222 "default_exec::overlap_range_policy::kernel_reduce",
223 Kokkos::Experimental::require(
224 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
225 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
226 fr, result);
227 Kokkos::fence();
228 double time_fenced = timer.seconds();
229 Kokkos::deep_copy(h_result, result);
230
231 timer.reset();
232 Kokkos::parallel_reduce(
233 "default_exec::overlap_range_policy::kernel_reduce",
234 Kokkos::Experimental::require(
235 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
236 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
237 fr, result);
238 double time_not_fenced = timer.seconds();
239 Kokkos::fence();
240 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
241 ASSERT_TRUE(time_fenced > 2.0 * time_not_fenced);
242 }
243
244 timer.reset();
245 Kokkos::parallel_reduce(
246 "default_exec::overlap_range_policy::kernel_reduce",
247 Kokkos::Experimental::require(
248 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
249 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
250 fr, result);
251 Kokkos::parallel_reduce(
252 "default_exec::overlap_range_policy::kernel_reduce",
253 Kokkos::Experimental::require(
254 Kokkos::RangePolicy<TEST_EXECSPACE>(space, 0, N),
255 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
256 fr, result);
257 Kokkos::fence();
258 double time_no_overlapped_reduce = timer.seconds();
259
260 timer.reset();
261 Kokkos::parallel_reduce(
262 "default_exec::overlap_range_policy::kernel_reduce",
263 Kokkos::Experimental::require(
264 Kokkos::RangePolicy<TEST_EXECSPACE>(space1, 0, N),
265 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
266 fr, result1);
267 Kokkos::parallel_reduce(
268 "default_exec::overlap_range_policy::kernel_reduce",
269 Kokkos::Experimental::require(
270 Kokkos::RangePolicy<TEST_EXECSPACE>(space2, 0, N),
271 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
272 fr, result2);
273 Kokkos::fence();
274 double time_overlapped_reduce = timer.seconds();
275
276 Kokkos::deep_copy(h_result2, result2);
277 Kokkos::deep_copy(h_result1, result1);
278
279 ASSERT_EQ(h_result1(), h_result());
280 ASSERT_EQ(h_result2(), h_result());
281
282 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
283 ASSERT_TRUE(time_overlapped_reduce < 1.5 * time_no_overlapped_reduce);
284 }
285 printf("Time RangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",
286 time_no_overlapped_reduce, time_overlapped_reduce);
287 SpaceInstance<TEST_EXECSPACE>::destroy(space1);
288 SpaceInstance<TEST_EXECSPACE>::destroy(space2);
289 }
290
TEST(default_exec,overlap_mdrange_policy)291 TEST(default_exec, overlap_mdrange_policy) {
292 int N = 200;
293 int M = 10000;
294 int R = 10;
295
296 TEST_EXECSPACE space;
297 TEST_EXECSPACE space1 = SpaceInstance<TEST_EXECSPACE>::create();
298 TEST_EXECSPACE space2 = SpaceInstance<TEST_EXECSPACE>::create();
299
300 Kokkos::View<double**, TEST_EXECSPACE> a("A", N, M);
301 FunctorMDRange f(M, R, a);
302 FunctorMDRangeReduce fr(M, R, a);
303 Kokkos::parallel_for(
304 "default_exec::overlap_range_policy::kernel0",
305 Kokkos::Experimental::require(
306 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>({0, 0},
307 {N, R}),
308 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
309 FunctorMDRange(M, R, a));
310
311 Kokkos::parallel_for(
312 "default_exec::overlap_range_policy::kernel1",
313 Kokkos::Experimental::require(
314 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space1, {0, 0},
315 {N, R}),
316 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
317 f);
318 Kokkos::parallel_for(
319 "default_exec::overlap_range_policy::kernel2",
320 Kokkos::Experimental::require(
321 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space2, {0, 0},
322 {N, R}),
323 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
324 f);
325 Kokkos::fence();
326
327 Kokkos::Timer timer;
328 Kokkos::parallel_for(
329 "default_exec::overlap_range_policy::kernel3",
330 Kokkos::Experimental::require(
331 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
332 {N, R}),
333 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
334 f);
335 Kokkos::parallel_for(
336 "default_exec::overlap_range_policy::kernel4",
337 Kokkos::Experimental::require(
338 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
339 {N, R}),
340 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
341 f);
342 Kokkos::fence();
343
344 timer.reset();
345 Kokkos::parallel_for(
346 "default_exec::overlap_range_policy::kernel5",
347 Kokkos::Experimental::require(
348 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space1, {0, 0},
349 {N, R}),
350 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
351 FunctorMDRange(M, R, a));
352 Kokkos::parallel_for(
353 "default_exec::overlap_range_policy::kernel6",
354 Kokkos::Experimental::require(
355 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space2, {0, 0},
356 {N, R}),
357 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
358 FunctorMDRange(M, R, a));
359 Kokkos::fence();
360 double time_overlap = timer.seconds();
361
362 timer.reset();
363 Kokkos::parallel_for(
364 "default_exec::overlap_range_policy::kernel7",
365 Kokkos::Experimental::require(
366 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
367 {N, R}),
368 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
369 f);
370 Kokkos::parallel_for(
371 "default_exec::overlap_range_policy::kernel8",
372 Kokkos::Experimental::require(
373 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
374 {N, R}),
375 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
376 f);
377 Kokkos::fence();
378 double time_end = timer.seconds();
379
380 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
381 ASSERT_TRUE((time_end > 1.5 * time_overlap));
382 }
383 printf("Time MDRangePolicy: NonOverlap: %lf Time Overlap: %lf\n", time_end,
384 time_overlap);
385
386 Kokkos::View<double, TEST_EXECSPACE> result("result");
387 Kokkos::View<double, TEST_EXECSPACE> result1("result1");
388 Kokkos::View<double, TEST_EXECSPACE> result2("result2");
389 Kokkos::View<double, Kokkos::HostSpace> h_result("h_result");
390 Kokkos::View<double, Kokkos::HostSpace> h_result1("h_result1");
391 Kokkos::View<double, Kokkos::HostSpace> h_result2("h_result2");
392
393 timer.reset();
394 Kokkos::parallel_reduce(
395 "default_exec::overlap_mdrange_policy::kernel_reduce",
396 Kokkos::Experimental::require(
397 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
398 {N, R}),
399 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
400 fr, result);
401 Kokkos::fence();
402 double time_fenced = timer.seconds();
403 Kokkos::deep_copy(h_result, result);
404
405 timer.reset();
406 Kokkos::parallel_reduce(
407 "default_exec::overlap_mdrange_policy::kernel_reduce",
408 Kokkos::Experimental::require(
409 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
410 {N, R}),
411 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
412 fr, result);
413 double time_not_fenced = timer.seconds();
414 Kokkos::fence();
415 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
416 ASSERT_TRUE(time_fenced > 2.0 * time_not_fenced);
417 }
418
419 timer.reset();
420 Kokkos::parallel_reduce(
421 "default_exec::overlap_mdrange_policy::kernel_reduce",
422 Kokkos::Experimental::require(
423 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
424 {N, R}),
425 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
426 fr, result);
427 Kokkos::parallel_reduce(
428 "default_exec::overlap_mdrange_policy::kernel_reduce",
429 Kokkos::Experimental::require(
430 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space, {0, 0},
431 {N, R}),
432 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
433 fr, result);
434 Kokkos::fence();
435 double time_no_overlapped_reduce = timer.seconds();
436
437 timer.reset();
438 Kokkos::parallel_reduce(
439 "default_exec::overlap_mdrange_policy::kernel_reduce",
440 Kokkos::Experimental::require(
441 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space1, {0, 0},
442 {N, R}),
443 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
444 fr, result1);
445 Kokkos::parallel_reduce(
446 "default_exec::overlap_mdrange_policy::kernel_reduce",
447 Kokkos::Experimental::require(
448 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(space2, {0, 0},
449 {N, R}),
450 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
451 fr, result2);
452 Kokkos::fence();
453 double time_overlapped_reduce = timer.seconds();
454
455 Kokkos::deep_copy(h_result2, result2);
456 Kokkos::deep_copy(h_result1, result1);
457
458 ASSERT_EQ(h_result1(), h_result());
459 ASSERT_EQ(h_result2(), h_result());
460
461 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
462 ASSERT_TRUE(time_overlapped_reduce < 1.5 * time_no_overlapped_reduce);
463 }
464 printf("Time MDRangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",
465 time_no_overlapped_reduce, time_overlapped_reduce);
466 SpaceInstance<TEST_EXECSPACE>::destroy(space2);
467 SpaceInstance<TEST_EXECSPACE>::destroy(space1);
468 }
469
TEST(default_exec,overlap_team_policy)470 TEST(default_exec, overlap_team_policy) {
471 int N = 20;
472 int M = 1000000;
473 int R = 10;
474
475 TEST_EXECSPACE space;
476 TEST_EXECSPACE space1 = SpaceInstance<TEST_EXECSPACE>::create();
477 TEST_EXECSPACE space2 = SpaceInstance<TEST_EXECSPACE>::create();
478
479 Kokkos::View<double**, Kokkos::LayoutRight, TEST_EXECSPACE> a("A", N, M);
480 FunctorTeam f(M, R, a);
481 FunctorTeamReduce fr(M, R, a);
482 Kokkos::parallel_for(
483 "default_exec::overlap_range_policy::kernel0",
484 Kokkos::Experimental::require(
485 Kokkos::TeamPolicy<TEST_EXECSPACE>(N, Kokkos::AUTO),
486 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
487 FunctorTeam(M, R, a));
488
489 Kokkos::parallel_for(
490 "default_exec::overlap_range_policy::kernel1",
491 Kokkos::Experimental::require(
492 Kokkos::TeamPolicy<TEST_EXECSPACE>(space1, N, Kokkos::AUTO),
493 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
494 f);
495 Kokkos::parallel_for(
496 "default_exec::overlap_range_policy::kernel2",
497 Kokkos::Experimental::require(
498 Kokkos::TeamPolicy<TEST_EXECSPACE>(space2, N, Kokkos::AUTO),
499 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
500 f);
501 Kokkos::fence();
502
503 Kokkos::Timer timer;
504 Kokkos::parallel_for(
505 "default_exec::overlap_range_policy::kernel3",
506 Kokkos::Experimental::require(
507 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
508 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
509 f);
510 Kokkos::parallel_for(
511 "default_exec::overlap_range_policy::kernel4",
512 Kokkos::Experimental::require(
513 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
514 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
515 f);
516 Kokkos::fence();
517
518 timer.reset();
519 Kokkos::parallel_for(
520 "default_exec::overlap_range_policy::kernel5",
521 Kokkos::Experimental::require(
522 Kokkos::TeamPolicy<TEST_EXECSPACE>(space1, N, Kokkos::AUTO),
523 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
524 FunctorTeam(M, R, a));
525 Kokkos::parallel_for(
526 "default_exec::overlap_range_policy::kernel6",
527 Kokkos::Experimental::require(
528 Kokkos::TeamPolicy<TEST_EXECSPACE>(space2, N, Kokkos::AUTO),
529 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
530 FunctorTeam(M, R, a));
531 Kokkos::fence();
532 double time_overlap = timer.seconds();
533
534 timer.reset();
535 Kokkos::parallel_for(
536 "default_exec::overlap_range_policy::kernel7",
537 Kokkos::Experimental::require(
538 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
539 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
540 f);
541 Kokkos::parallel_for(
542 "default_exec::overlap_range_policy::kernel8",
543 Kokkos::Experimental::require(
544 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
545 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
546 f);
547 Kokkos::fence();
548 double time_end = timer.seconds();
549
550 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
551 ASSERT_TRUE((time_end > 1.5 * time_overlap));
552 }
553 printf("Time TeamPolicy: NonOverlap: %lf Time Overlap: %lf\n", time_end,
554 time_overlap);
555
556 Kokkos::View<double, TEST_EXECSPACE> result("result");
557 Kokkos::View<double, TEST_EXECSPACE> result1("result1");
558 Kokkos::View<double, TEST_EXECSPACE> result2("result2");
559 Kokkos::View<double, Kokkos::HostSpace> h_result("h_result");
560 Kokkos::View<double, Kokkos::HostSpace> h_result1("h_result1");
561 Kokkos::View<double, Kokkos::HostSpace> h_result2("h_result2");
562
563 timer.reset();
564 Kokkos::parallel_reduce(
565 "default_exec::overlap_team_policy::kernel_reduce",
566 Kokkos::Experimental::require(
567 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
568 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
569 fr, result);
570 Kokkos::fence();
571 double time_fenced = timer.seconds();
572 Kokkos::deep_copy(h_result, result);
573
574 timer.reset();
575 Kokkos::parallel_reduce(
576 "default_exec::overlap_team_policy::kernel_reduce",
577 Kokkos::Experimental::require(
578 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
579 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
580 fr, result);
581 double time_not_fenced = timer.seconds();
582 Kokkos::fence();
583 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
584 ASSERT_TRUE(time_fenced > 2.0 * time_not_fenced);
585 }
586 timer.reset();
587 Kokkos::parallel_reduce(
588 "default_exec::overlap_team_policy::kernel_reduce",
589 Kokkos::Experimental::require(
590 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
591 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
592 fr, result);
593 Kokkos::parallel_reduce(
594 "default_exec::overlap_team_policy::kernel_reduce",
595 Kokkos::Experimental::require(
596 Kokkos::TeamPolicy<TEST_EXECSPACE>(space, N, Kokkos::AUTO),
597 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
598 fr, result);
599 Kokkos::fence();
600 double time_no_overlapped_reduce = timer.seconds();
601
602 timer.reset();
603 Kokkos::parallel_reduce(
604 "default_exec::overlap_team_policy::kernel_reduce",
605 Kokkos::Experimental::require(
606 Kokkos::TeamPolicy<TEST_EXECSPACE>(space1, N, Kokkos::AUTO),
607 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
608 fr, result1);
609 Kokkos::parallel_reduce(
610 "default_exec::overlap_team_policy::kernel_reduce",
611 Kokkos::Experimental::require(
612 Kokkos::TeamPolicy<TEST_EXECSPACE>(space2, N, Kokkos::AUTO),
613 Kokkos::Experimental::WorkItemProperty::HintLightWeight),
614 fr, result2);
615 Kokkos::fence();
616 double time_overlapped_reduce = timer.seconds();
617
618 Kokkos::deep_copy(h_result2, result2);
619 Kokkos::deep_copy(h_result1, result1);
620
621 ASSERT_EQ(h_result1(), h_result());
622 ASSERT_EQ(h_result2(), h_result());
623
624 if (SpaceInstance<TEST_EXECSPACE>::overlap()) {
625 ASSERT_TRUE(time_overlapped_reduce < 1.5 * time_no_overlapped_reduce);
626 }
627 printf("Time TeamPolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",
628 time_no_overlapped_reduce, time_overlapped_reduce);
629 SpaceInstance<TEST_EXECSPACE>::destroy(space1);
630 SpaceInstance<TEST_EXECSPACE>::destroy(space2);
631 }
632 } // namespace Test
633