1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
46 #define KOKKOS_TEST_SCATTER_VIEW_HPP
47
48 #include <Kokkos_ScatterView.hpp>
49 #include <gtest/gtest.h>
50
51 namespace Test {
52
53 template <typename DeviceType, typename Layout, typename Duplication,
54 typename Contribution, typename Op, typename NumberType>
55 struct test_scatter_view_impl_cls;
56
57 template <typename DeviceType, typename Layout, typename Duplication,
58 typename Contribution, typename NumberType>
59 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
60 Kokkos::Experimental::ScatterSum,
61 NumberType> {
62 public:
63 using scatter_view_type =
64 Kokkos::Experimental::ScatterView<NumberType * [12], Layout, DeviceType,
65 Kokkos::Experimental::ScatterSum,
66 Duplication, Contribution>;
67
68 using orig_view_type = Kokkos::View<NumberType * [12], Layout, DeviceType>;
69
70 scatter_view_type scatter_view;
71 int scatterSize;
72
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls73 test_scatter_view_impl_cls(const scatter_view_type& view) {
74 scatter_view = view;
75 scatterSize = 0;
76 }
77
initializeTest::test_scatter_view_impl_cls78 void initialize(orig_view_type orig) {
79 auto host_view =
80 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
81 Kokkos::fence();
82 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
83 ++i) {
84 host_view(i, 0) = 0.0;
85 host_view(i, 1) = 0.0;
86 host_view(i, 2) = 0.0;
87 host_view(i, 3) = 0.0;
88 host_view(i, 4) = 0.0;
89 host_view(i, 5) = 0.0;
90 host_view(i, 6) = 0.0;
91 host_view(i, 7) = 0.0;
92 host_view(i, 8) = 0.0;
93 host_view(i, 9) = 0.0;
94 host_view(i, 10) = 0.0;
95 host_view(i, 11) = 0.0;
96 }
97 Kokkos::fence();
98 Kokkos::deep_copy(orig, host_view);
99 }
100
run_parallelTest::test_scatter_view_impl_cls101 void run_parallel(int n) {
102 scatterSize = n;
103 auto policy =
104 Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
105 Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum");
106 }
107
108 KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls109 void operator()(int i) const {
110 auto scatter_access = scatter_view.access();
111 auto scatter_access_atomic =
112 scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
113 for (int j = 0; j < 10; ++j) {
114 auto k = (i + j) % scatterSize;
115 scatter_access(k, 0) += 4;
116 ++scatter_access(k, 1);
117 --scatter_access(k, 2);
118 scatter_access(k, 3)++;
119 scatter_access(k, 4)--;
120 scatter_access(k, 5) -= 5;
121 scatter_access_atomic(k, 6) += 2;
122 scatter_access_atomic(k, 7)++;
123 scatter_access_atomic(k, 8)--;
124 --scatter_access_atomic(k, 9);
125 ++scatter_access_atomic(k, 10);
126 scatter_access(k, 11) -= 3;
127 }
128 }
129
validateResultsTest::test_scatter_view_impl_cls130 void validateResults(orig_view_type orig) {
131 auto host_view =
132 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
133 Kokkos::fence();
134 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
135 ++i) {
136 auto val0 = host_view(i, 0);
137 auto val1 = host_view(i, 1);
138 auto val2 = host_view(i, 2);
139 auto val3 = host_view(i, 3);
140 auto val4 = host_view(i, 4);
141 auto val5 = host_view(i, 5);
142 auto val6 = host_view(i, 6);
143 auto val7 = host_view(i, 7);
144 auto val8 = host_view(i, 8);
145 auto val9 = host_view(i, 9);
146 auto val10 = host_view(i, 10);
147 auto val11 = host_view(i, 11);
148 EXPECT_NEAR(val0, NumberType(80), 1e-14);
149 EXPECT_NEAR(val1, NumberType(20), 1e-14);
150 EXPECT_NEAR(val2, NumberType(-20), 1e-14);
151 EXPECT_NEAR(val3, NumberType(20), 1e-14);
152 EXPECT_NEAR(val4, NumberType(-20), 1e-14);
153 EXPECT_NEAR(val5, NumberType(-100), 1e-14);
154 EXPECT_NEAR(val6, NumberType(40), 1e-14);
155 EXPECT_NEAR(val7, NumberType(20), 1e-14);
156 EXPECT_NEAR(val8, NumberType(-20), 1e-14);
157 EXPECT_NEAR(val9, NumberType(-20), 1e-14);
158 EXPECT_NEAR(val10, NumberType(20), 1e-14);
159 EXPECT_NEAR(val11, NumberType(-60), 1e-14);
160 }
161 }
162 };
163
164 template <typename DeviceType, typename Layout, typename Duplication,
165 typename Contribution, typename NumberType>
166 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
167 Kokkos::Experimental::ScatterProd,
168 NumberType> {
169 public:
170 using scatter_view_type =
171 Kokkos::Experimental::ScatterView<NumberType * [3], Layout, DeviceType,
172 Kokkos::Experimental::ScatterProd,
173 Duplication, Contribution>;
174
175 using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
176
177 scatter_view_type scatter_view;
178 int scatterSize;
179
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls180 test_scatter_view_impl_cls(const scatter_view_type& view) {
181 scatter_view = view;
182 scatterSize = 0;
183 }
184
initializeTest::test_scatter_view_impl_cls185 void initialize(orig_view_type orig) {
186 auto host_view =
187 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
188 Kokkos::fence();
189 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
190 ++i) {
191 host_view(i, 0) = 1.0;
192 host_view(i, 1) = 1.0;
193 host_view(i, 2) = 1.0;
194 }
195 Kokkos::fence();
196 Kokkos::deep_copy(orig, host_view);
197 }
198
run_parallelTest::test_scatter_view_impl_cls199 void run_parallel(int n) {
200 scatterSize = n;
201 auto policy =
202 Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
203 Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
204 }
205
206 KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls207 void operator()(int i) const {
208 auto scatter_access = scatter_view.access();
209 auto scatter_access_atomic =
210 scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
211 for (int j = 0; j < 4; ++j) {
212 auto k = (i + j) % scatterSize;
213 scatter_access(k, 0) *= 4.0;
214 scatter_access_atomic(k, 1) *= 2.0;
215 scatter_access(k, 2) *= 1.0;
216 }
217 }
218
validateResultsTest::test_scatter_view_impl_cls219 void validateResults(orig_view_type orig) {
220 auto host_view =
221 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
222 Kokkos::fence();
223 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
224 ++i) {
225 auto val0 = host_view(i, 0);
226 auto val1 = host_view(i, 1);
227 auto val2 = host_view(i, 2);
228 EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14);
229 EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14);
230 EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14);
231 }
232 }
233 };
234
235 template <typename DeviceType, typename Layout, typename Duplication,
236 typename Contribution, typename NumberType>
237 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
238 Kokkos::Experimental::ScatterMin,
239 NumberType> {
240 public:
241 using scatter_view_type =
242 Kokkos::Experimental::ScatterView<NumberType * [3], Layout, DeviceType,
243 Kokkos::Experimental::ScatterMin,
244 Duplication, Contribution>;
245
246 using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
247
248 scatter_view_type scatter_view;
249 int scatterSize;
250
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls251 test_scatter_view_impl_cls(const scatter_view_type& view) {
252 scatter_view = view;
253 scatterSize = 0;
254 }
255
initializeTest::test_scatter_view_impl_cls256 void initialize(orig_view_type orig) {
257 auto host_view =
258 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
259 Kokkos::fence();
260 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
261 ++i) {
262 host_view(i, 0) = 999999.0;
263 host_view(i, 1) = 999999.0;
264 host_view(i, 2) = 999999.0;
265 }
266 Kokkos::fence();
267 Kokkos::deep_copy(orig, host_view);
268 }
269
run_parallelTest::test_scatter_view_impl_cls270 void run_parallel(int n) {
271 scatterSize = n;
272 auto policy =
273 Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
274 Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
275 }
276
277 KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls278 void operator()(int i) const {
279 auto scatter_access = scatter_view.access();
280 auto scatter_access_atomic =
281 scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
282 for (int j = 0; j < 4; ++j) {
283 auto k = (i + j) % scatterSize;
284 scatter_access(k, 0).update((NumberType)(j + 1) * 4);
285 scatter_access_atomic(k, 1).update((NumberType)(j + 1) * 2.0);
286 scatter_access(k, 2).update((NumberType)(j + 1) * 1.0);
287 }
288 }
289
validateResultsTest::test_scatter_view_impl_cls290 void validateResults(orig_view_type orig) {
291 auto host_view =
292 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
293 Kokkos::fence();
294 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
295 ++i) {
296 auto val0 = host_view(i, 0);
297 auto val1 = host_view(i, 1);
298 auto val2 = host_view(i, 2);
299 EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14);
300 EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14);
301 EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14);
302 }
303 }
304 };
305
306 template <typename DeviceType, typename Layout, typename Duplication,
307 typename Contribution, typename NumberType>
308 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
309 Kokkos::Experimental::ScatterMax,
310 NumberType> {
311 public:
312 using scatter_view_type =
313 Kokkos::Experimental::ScatterView<NumberType * [3], Layout, DeviceType,
314 Kokkos::Experimental::ScatterMax,
315 Duplication, Contribution>;
316
317 using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
318
319 scatter_view_type scatter_view;
320 int scatterSize;
321
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls322 test_scatter_view_impl_cls(const scatter_view_type& view) {
323 scatter_view = view;
324 scatterSize = 0;
325 }
326
initializeTest::test_scatter_view_impl_cls327 void initialize(orig_view_type orig) {
328 auto host_view =
329 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
330 Kokkos::fence();
331 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
332 ++i) {
333 host_view(i, 0) = 0.0;
334 host_view(i, 1) = 0.0;
335 host_view(i, 2) = 0.0;
336 }
337 Kokkos::fence();
338 Kokkos::deep_copy(orig, host_view);
339 }
340
run_parallelTest::test_scatter_view_impl_cls341 void run_parallel(int n) {
342 scatterSize = n;
343 Kokkos::RangePolicy<typename DeviceType::execution_space, int> policy(0, n);
344 Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
345 }
346
347 KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls348 void operator()(int i) const {
349 auto scatter_access = scatter_view.access();
350 auto scatter_access_atomic =
351 scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
352 for (int j = 0; j < 4; ++j) {
353 auto k = (i + j) % scatterSize;
354 scatter_access(k, 0).update((NumberType)(j + 1) * 4);
355 scatter_access_atomic(k, 1).update((NumberType)(j + 1) * 2.0);
356 scatter_access(k, 2).update((NumberType)(j + 1) * 1.0);
357 }
358 }
359
validateResultsTest::test_scatter_view_impl_cls360 void validateResults(orig_view_type orig) {
361 auto host_view =
362 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
363 Kokkos::fence();
364 for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
365 ++i) {
366 auto val0 = host_view(i, 0);
367 auto val1 = host_view(i, 1);
368 auto val2 = host_view(i, 2);
369 EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14);
370 EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14);
371 EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14);
372 }
373 }
374 };
375
376 template <typename DeviceType, typename Layout, typename Op,
377 typename NumberType>
378 struct test_default_scatter_view {
379 public:
380 using default_duplication = Kokkos::Impl::Experimental::DefaultDuplication<
381 typename DeviceType::execution_space>;
382 using Duplication = typename default_duplication::type;
383 using Contribution = typename Kokkos::Impl::Experimental::DefaultContribution<
384 typename DeviceType::execution_space, Duplication>::type;
385 using scatter_view_def =
386 typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
387 Contribution, Op,
388 NumberType>::scatter_view_type;
389 using orig_view_def =
390 typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
391 Contribution, Op,
392 NumberType>::orig_view_type;
393
run_testTest::test_default_scatter_view394 void run_test(int n) {
395 // Test creation via create_scatter_view overload 1
396 {
397 orig_view_def original_view("original_view", n);
398 scatter_view_def scatter_view =
399 Kokkos::Experimental::create_scatter_view(Op{}, original_view);
400
401 test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
402 Op, NumberType>
403 scatter_view_test_impl(scatter_view);
404 scatter_view_test_impl.initialize(original_view);
405 scatter_view_test_impl.run_parallel(n);
406
407 Kokkos::Experimental::contribute(original_view, scatter_view);
408 scatter_view.reset_except(original_view);
409
410 scatter_view_test_impl.run_parallel(n);
411
412 Kokkos::Experimental::contribute(original_view, scatter_view);
413 Kokkos::fence();
414
415 scatter_view_test_impl.validateResults(original_view);
416
417 {
418 scatter_view_def persistent_view("persistent", n);
419 auto result_view = persistent_view.subview();
420 contribute(result_view, persistent_view);
421 Kokkos::fence();
422 }
423 }
424 }
425 };
426
427 template <typename DeviceType, typename Layout, typename Duplication,
428 typename Contribution, typename Op, typename NumberType>
429 struct test_scatter_view_config {
430 public:
431 using scatter_view_def =
432 typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
433 Contribution, Op,
434 NumberType>::scatter_view_type;
435 using orig_view_def =
436 typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
437 Contribution, Op,
438 NumberType>::orig_view_type;
439
compile_constructorTest::test_scatter_view_config440 void compile_constructor() {
441 auto sv = scatter_view_def(Kokkos::view_alloc(DeviceType{}, "label"), 10);
442 }
443
run_testTest::test_scatter_view_config444 void run_test(int n) {
445 // test allocation
446 {
447 orig_view_def ov1("ov1", n);
448 scatter_view_def sv1;
449
450 ASSERT_FALSE(sv1.is_allocated());
451
452 sv1 = Kokkos::Experimental::create_scatter_view<Op, Duplication,
453 Contribution>(ov1);
454
455 scatter_view_def sv2(sv1);
456 scatter_view_def sv3("sv3", n);
457
458 ASSERT_TRUE(sv1.is_allocated());
459 ASSERT_TRUE(sv2.is_allocated());
460 ASSERT_TRUE(sv3.is_allocated());
461 }
462
463 // Test creation via create_scatter_view
464 {
465 orig_view_def original_view("original_view", n);
466 scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view<
467 Op, Duplication, Contribution>(original_view);
468
469 test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
470 Op, NumberType>
471 scatter_view_test_impl(scatter_view);
472 scatter_view_test_impl.initialize(original_view);
473 scatter_view_test_impl.run_parallel(n);
474
475 Kokkos::Experimental::contribute(original_view, scatter_view);
476 scatter_view.reset_except(original_view);
477
478 scatter_view_test_impl.run_parallel(n);
479
480 Kokkos::Experimental::contribute(original_view, scatter_view);
481 Kokkos::fence();
482
483 scatter_view_test_impl.validateResults(original_view);
484
485 {
486 scatter_view_def persistent_view("persistent", n);
487 auto result_view = persistent_view.subview();
488 contribute(result_view, persistent_view);
489 Kokkos::fence();
490 }
491 }
492 // Test creation via create_scatter_view overload 2
493 {
494 orig_view_def original_view("original_view", n);
495 scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view(
496 Op{}, Duplication{}, Contribution{}, original_view);
497
498 test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
499 Op, NumberType>
500 scatter_view_test_impl(scatter_view);
501 scatter_view_test_impl.initialize(original_view);
502 scatter_view_test_impl.run_parallel(n);
503
504 Kokkos::Experimental::contribute(original_view, scatter_view);
505 scatter_view.reset_except(original_view);
506
507 scatter_view_test_impl.run_parallel(n);
508
509 Kokkos::Experimental::contribute(original_view, scatter_view);
510 Kokkos::fence();
511
512 scatter_view_test_impl.validateResults(original_view);
513
514 {
515 scatter_view_def persistent_view("persistent", n);
516 auto result_view = persistent_view.subview();
517 contribute(result_view, persistent_view);
518 Kokkos::fence();
519 }
520 }
521 // Test creation via constructor
522 {
523 orig_view_def original_view("original_view", n);
524 scatter_view_def scatter_view(original_view);
525
526 test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
527 Op, NumberType>
528 scatter_view_test_impl(scatter_view);
529 scatter_view_test_impl.initialize(original_view);
530 scatter_view_test_impl.run_parallel(n);
531
532 Kokkos::Experimental::contribute(original_view, scatter_view);
533 scatter_view.reset_except(original_view);
534
535 scatter_view_test_impl.run_parallel(n);
536
537 Kokkos::Experimental::contribute(original_view, scatter_view);
538 Kokkos::fence();
539
540 scatter_view_test_impl.validateResults(original_view);
541
542 {
543 scatter_view_def persistent_view("persistent", n);
544 auto result_view = persistent_view.subview();
545 contribute(result_view, persistent_view);
546 Kokkos::fence();
547 }
548 }
549 }
550 };
551
552 template <typename DeviceType, typename ScatterType, typename NumberType>
553 struct TestDuplicatedScatterView {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView554 TestDuplicatedScatterView(int n) {
555 // ScatterSum test
556 test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
557 Kokkos::Experimental::ScatterDuplicated,
558 Kokkos::Experimental::ScatterNonAtomic,
559 ScatterType, NumberType>
560 test_sv_right_config;
561 test_sv_right_config.run_test(n);
562 test_scatter_view_config<
563 DeviceType, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated,
564 Kokkos::Experimental::ScatterNonAtomic, ScatterType, NumberType>
565 test_sv_left_config;
566 test_sv_left_config.run_test(n);
567 }
568 };
569
570 #ifdef KOKKOS_ENABLE_CUDA
571 // disable duplicated instantiation with CUDA until
572 // UniqueToken can support it
573 template <typename ScatterType, typename NumberType>
574 struct TestDuplicatedScatterView<Kokkos::Cuda, ScatterType, NumberType> {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView575 TestDuplicatedScatterView(int) {}
576 };
577 template <typename ScatterType, typename NumberType>
578 struct TestDuplicatedScatterView<
579 Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, ScatterType, NumberType> {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView580 TestDuplicatedScatterView(int) {}
581 };
582 template <typename ScatterType, typename NumberType>
583 struct TestDuplicatedScatterView<
584 Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>, ScatterType,
585 NumberType> {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView586 TestDuplicatedScatterView(int) {}
587 };
588 #endif
589
590 template <typename DeviceType, typename ScatterType,
591 typename NumberType = double>
test_scatter_view(int64_t n)592 void test_scatter_view(int64_t n) {
593 using execution_space = typename DeviceType::execution_space;
594
595 // no atomics or duplication is only sensible if the execution space
596 // is running essentially in serial (doesn't have to be Serial though,
597 // we also test OpenMP with one thread: LAMMPS cares about that)
598 if (execution_space().concurrency() == 1) {
599 test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
600 Kokkos::Experimental::ScatterNonDuplicated,
601 Kokkos::Experimental::ScatterNonAtomic,
602 ScatterType, NumberType>
603 test_sv_config;
604 test_sv_config.run_test(n);
605 }
606 #ifdef KOKKOS_ENABLE_SERIAL
607 if (!std::is_same<DeviceType, Kokkos::Serial>::value) {
608 #endif
609 test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
610 Kokkos::Experimental::ScatterNonDuplicated,
611 Kokkos::Experimental::ScatterAtomic, ScatterType,
612 NumberType>
613 test_sv_config;
614 test_sv_config.run_test(n);
615 #ifdef KOKKOS_ENABLE_SERIAL
616 }
617 #endif
618 // with hundreds of threads we were running out of memory.
619 // limit (n) so that duplication doesn't exceed 4GB
620 constexpr std::size_t maximum_allowed_total_bytes =
621 4ull * 1024ull * 1024ull * 1024ull;
622 std::size_t const maximum_allowed_copy_bytes =
623 maximum_allowed_total_bytes /
624 std::size_t(execution_space().concurrency());
625 constexpr std::size_t bytes_per_value = sizeof(NumberType) * 12;
626 std::size_t const maximum_allowed_copy_values =
627 maximum_allowed_copy_bytes / bytes_per_value;
628 n = std::min(n, int64_t(maximum_allowed_copy_values));
629
630 // if the default is duplicated, this needs to follow the limit
631 {
632 test_default_scatter_view<DeviceType, Kokkos::LayoutRight, ScatterType,
633 NumberType>
634 test_default_sv;
635 test_default_sv.run_test(n);
636 }
637 TestDuplicatedScatterView<DeviceType, ScatterType, NumberType> duptest(n);
638 }
639
TEST(TEST_CATEGORY,scatterview)640 TEST(TEST_CATEGORY, scatterview) {
641 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, double>(
642 10);
643 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum,
644 unsigned int>(10);
645 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(10);
646 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(10);
647 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(10);
648 // tests were timing out in DEBUG mode, reduce the amount of work
649 #ifdef KOKKOS_ENABLE_DEBUG
650 int big_n = 100 * 1000;
651 #else
652
653 #ifdef KOKKOS_ENABLE_SERIAL
654 bool is_serial = std::is_same<TEST_EXECSPACE, Kokkos::Serial>::value;
655 int big_n = is_serial ? 100 * 1000 : 10000 * 1000;
656 #else
657 int big_n = 10000 * 1000;
658 #endif
659
660 #endif
661 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, double>(
662 big_n);
663 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum,
664 unsigned int>(big_n);
665 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(big_n);
666 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(big_n);
667 test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(big_n);
668 }
669
TEST(TEST_CATEGORY,scatterview_devicetype)670 TEST(TEST_CATEGORY, scatterview_devicetype) {
671 using device_type =
672 Kokkos::Device<TEST_EXECSPACE, typename TEST_EXECSPACE::memory_space>;
673
674 test_scatter_view<device_type, Kokkos::Experimental::ScatterSum, double>(10);
675 test_scatter_view<device_type, Kokkos::Experimental::ScatterSum,
676 unsigned int>(10);
677 test_scatter_view<device_type, Kokkos::Experimental::ScatterProd>(10);
678 test_scatter_view<device_type, Kokkos::Experimental::ScatterMin>(10);
679 test_scatter_view<device_type, Kokkos::Experimental::ScatterMax>(10);
680
681 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
682 #ifdef KOKKOS_ENABLE_CUDA
683 using device_execution_space = Kokkos::Cuda;
684 using device_memory_space = Kokkos::CudaSpace;
685 using host_accessible_space = Kokkos::CudaUVMSpace;
686 #else
687 using device_execution_space = Kokkos::Experimental::HIP;
688 using device_memory_space = Kokkos::Experimental::HIPSpace;
689 using host_accessible_space = Kokkos::Experimental::HIPHostPinnedSpace;
690 #endif
691 if (std::is_same<TEST_EXECSPACE, device_execution_space>::value) {
692 using device_device_type =
693 Kokkos::Device<device_execution_space, device_memory_space>;
694 test_scatter_view<device_device_type, Kokkos::Experimental::ScatterSum,
695 double>(10);
696 test_scatter_view<device_device_type, Kokkos::Experimental::ScatterSum,
697 unsigned int>(10);
698 test_scatter_view<device_device_type, Kokkos::Experimental::ScatterProd>(
699 10);
700 test_scatter_view<device_device_type, Kokkos::Experimental::ScatterMin>(10);
701 test_scatter_view<device_device_type, Kokkos::Experimental::ScatterMax>(10);
702 using host_device_type =
703 Kokkos::Device<device_execution_space, host_accessible_space>;
704 test_scatter_view<host_device_type, Kokkos::Experimental::ScatterSum,
705 double>(10);
706 test_scatter_view<host_device_type, Kokkos::Experimental::ScatterSum,
707 unsigned int>(10);
708 test_scatter_view<host_device_type, Kokkos::Experimental::ScatterProd>(10);
709 test_scatter_view<host_device_type, Kokkos::Experimental::ScatterMin>(10);
710 test_scatter_view<host_device_type, Kokkos::Experimental::ScatterMax>(10);
711 }
712 #endif
713 }
714
715 } // namespace Test
716
717 #endif // KOKKOS_TEST_SCATTER_VIEW_HPP
718