1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_TEST_SCATTER_VIEW_HPP
46 #define KOKKOS_TEST_SCATTER_VIEW_HPP
47 
48 #include <Kokkos_ScatterView.hpp>
49 #include <gtest/gtest.h>
50 
51 namespace Test {
52 
53 template <typename DeviceType, typename Layout, typename Duplication,
54           typename Contribution, typename Op, typename NumberType>
55 struct test_scatter_view_impl_cls;
56 
57 template <typename DeviceType, typename Layout, typename Duplication,
58           typename Contribution, typename NumberType>
59 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
60                                   Kokkos::Experimental::ScatterSum,
61                                   NumberType> {
62  public:
63   using scatter_view_type =
64       Kokkos::Experimental::ScatterView<NumberType * [12], Layout, DeviceType,
65                                         Kokkos::Experimental::ScatterSum,
66                                         Duplication, Contribution>;
67 
68   using orig_view_type = Kokkos::View<NumberType * [12], Layout, DeviceType>;
69 
70   scatter_view_type scatter_view;
71   int scatterSize;
72 
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls73   test_scatter_view_impl_cls(const scatter_view_type& view) {
74     scatter_view = view;
75     scatterSize  = 0;
76   }
77 
initializeTest::test_scatter_view_impl_cls78   void initialize(orig_view_type orig) {
79     auto host_view =
80         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
81     Kokkos::fence();
82     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
83          ++i) {
84       host_view(i, 0)  = 0.0;
85       host_view(i, 1)  = 0.0;
86       host_view(i, 2)  = 0.0;
87       host_view(i, 3)  = 0.0;
88       host_view(i, 4)  = 0.0;
89       host_view(i, 5)  = 0.0;
90       host_view(i, 6)  = 0.0;
91       host_view(i, 7)  = 0.0;
92       host_view(i, 8)  = 0.0;
93       host_view(i, 9)  = 0.0;
94       host_view(i, 10) = 0.0;
95       host_view(i, 11) = 0.0;
96     }
97     Kokkos::fence();
98     Kokkos::deep_copy(orig, host_view);
99   }
100 
run_parallelTest::test_scatter_view_impl_cls101   void run_parallel(int n) {
102     scatterSize = n;
103     auto policy =
104         Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
105     Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum");
106   }
107 
108   KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls109   void operator()(int i) const {
110     auto scatter_access = scatter_view.access();
111     auto scatter_access_atomic =
112         scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
113     for (int j = 0; j < 10; ++j) {
114       auto k = (i + j) % scatterSize;
115       scatter_access(k, 0) += 4;
116       ++scatter_access(k, 1);
117       --scatter_access(k, 2);
118       scatter_access(k, 3)++;
119       scatter_access(k, 4)--;
120       scatter_access(k, 5) -= 5;
121       scatter_access_atomic(k, 6) += 2;
122       scatter_access_atomic(k, 7)++;
123       scatter_access_atomic(k, 8)--;
124       --scatter_access_atomic(k, 9);
125       ++scatter_access_atomic(k, 10);
126       scatter_access(k, 11) -= 3;
127     }
128   }
129 
validateResultsTest::test_scatter_view_impl_cls130   void validateResults(orig_view_type orig) {
131     auto host_view =
132         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
133     Kokkos::fence();
134     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
135          ++i) {
136       auto val0  = host_view(i, 0);
137       auto val1  = host_view(i, 1);
138       auto val2  = host_view(i, 2);
139       auto val3  = host_view(i, 3);
140       auto val4  = host_view(i, 4);
141       auto val5  = host_view(i, 5);
142       auto val6  = host_view(i, 6);
143       auto val7  = host_view(i, 7);
144       auto val8  = host_view(i, 8);
145       auto val9  = host_view(i, 9);
146       auto val10 = host_view(i, 10);
147       auto val11 = host_view(i, 11);
148       EXPECT_NEAR(val0, NumberType(80), 1e-14);
149       EXPECT_NEAR(val1, NumberType(20), 1e-14);
150       EXPECT_NEAR(val2, NumberType(-20), 1e-14);
151       EXPECT_NEAR(val3, NumberType(20), 1e-14);
152       EXPECT_NEAR(val4, NumberType(-20), 1e-14);
153       EXPECT_NEAR(val5, NumberType(-100), 1e-14);
154       EXPECT_NEAR(val6, NumberType(40), 1e-14);
155       EXPECT_NEAR(val7, NumberType(20), 1e-14);
156       EXPECT_NEAR(val8, NumberType(-20), 1e-14);
157       EXPECT_NEAR(val9, NumberType(-20), 1e-14);
158       EXPECT_NEAR(val10, NumberType(20), 1e-14);
159       EXPECT_NEAR(val11, NumberType(-60), 1e-14);
160     }
161   }
162 };
163 
164 template <typename DeviceType, typename Layout, typename Duplication,
165           typename Contribution, typename NumberType>
166 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
167                                   Kokkos::Experimental::ScatterProd,
168                                   NumberType> {
169  public:
170   using scatter_view_type =
171       Kokkos::Experimental::ScatterView<NumberType * [3], Layout, DeviceType,
172                                         Kokkos::Experimental::ScatterProd,
173                                         Duplication, Contribution>;
174 
175   using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
176 
177   scatter_view_type scatter_view;
178   int scatterSize;
179 
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls180   test_scatter_view_impl_cls(const scatter_view_type& view) {
181     scatter_view = view;
182     scatterSize  = 0;
183   }
184 
initializeTest::test_scatter_view_impl_cls185   void initialize(orig_view_type orig) {
186     auto host_view =
187         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
188     Kokkos::fence();
189     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
190          ++i) {
191       host_view(i, 0) = 1.0;
192       host_view(i, 1) = 1.0;
193       host_view(i, 2) = 1.0;
194     }
195     Kokkos::fence();
196     Kokkos::deep_copy(orig, host_view);
197   }
198 
run_parallelTest::test_scatter_view_impl_cls199   void run_parallel(int n) {
200     scatterSize = n;
201     auto policy =
202         Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
203     Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
204   }
205 
206   KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls207   void operator()(int i) const {
208     auto scatter_access = scatter_view.access();
209     auto scatter_access_atomic =
210         scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
211     for (int j = 0; j < 4; ++j) {
212       auto k = (i + j) % scatterSize;
213       scatter_access(k, 0) *= 4.0;
214       scatter_access_atomic(k, 1) *= 2.0;
215       scatter_access(k, 2) *= 1.0;
216     }
217   }
218 
validateResultsTest::test_scatter_view_impl_cls219   void validateResults(orig_view_type orig) {
220     auto host_view =
221         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
222     Kokkos::fence();
223     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
224          ++i) {
225       auto val0 = host_view(i, 0);
226       auto val1 = host_view(i, 1);
227       auto val2 = host_view(i, 2);
228       EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14);
229       EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14);
230       EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14);
231     }
232   }
233 };
234 
235 template <typename DeviceType, typename Layout, typename Duplication,
236           typename Contribution, typename NumberType>
237 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
238                                   Kokkos::Experimental::ScatterMin,
239                                   NumberType> {
240  public:
241   using scatter_view_type =
242       Kokkos::Experimental::ScatterView<NumberType * [3], Layout, DeviceType,
243                                         Kokkos::Experimental::ScatterMin,
244                                         Duplication, Contribution>;
245 
246   using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
247 
248   scatter_view_type scatter_view;
249   int scatterSize;
250 
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls251   test_scatter_view_impl_cls(const scatter_view_type& view) {
252     scatter_view = view;
253     scatterSize  = 0;
254   }
255 
initializeTest::test_scatter_view_impl_cls256   void initialize(orig_view_type orig) {
257     auto host_view =
258         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
259     Kokkos::fence();
260     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
261          ++i) {
262       host_view(i, 0) = 999999.0;
263       host_view(i, 1) = 999999.0;
264       host_view(i, 2) = 999999.0;
265     }
266     Kokkos::fence();
267     Kokkos::deep_copy(orig, host_view);
268   }
269 
run_parallelTest::test_scatter_view_impl_cls270   void run_parallel(int n) {
271     scatterSize = n;
272     auto policy =
273         Kokkos::RangePolicy<typename DeviceType::execution_space, int>(0, n);
274     Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
275   }
276 
277   KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls278   void operator()(int i) const {
279     auto scatter_access = scatter_view.access();
280     auto scatter_access_atomic =
281         scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
282     for (int j = 0; j < 4; ++j) {
283       auto k = (i + j) % scatterSize;
284       scatter_access(k, 0).update((NumberType)(j + 1) * 4);
285       scatter_access_atomic(k, 1).update((NumberType)(j + 1) * 2.0);
286       scatter_access(k, 2).update((NumberType)(j + 1) * 1.0);
287     }
288   }
289 
validateResultsTest::test_scatter_view_impl_cls290   void validateResults(orig_view_type orig) {
291     auto host_view =
292         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
293     Kokkos::fence();
294     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
295          ++i) {
296       auto val0 = host_view(i, 0);
297       auto val1 = host_view(i, 1);
298       auto val2 = host_view(i, 2);
299       EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14);
300       EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14);
301       EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14);
302     }
303   }
304 };
305 
306 template <typename DeviceType, typename Layout, typename Duplication,
307           typename Contribution, typename NumberType>
308 struct test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
309                                   Kokkos::Experimental::ScatterMax,
310                                   NumberType> {
311  public:
312   using scatter_view_type =
313       Kokkos::Experimental::ScatterView<NumberType * [3], Layout, DeviceType,
314                                         Kokkos::Experimental::ScatterMax,
315                                         Duplication, Contribution>;
316 
317   using orig_view_type = Kokkos::View<NumberType * [3], Layout, DeviceType>;
318 
319   scatter_view_type scatter_view;
320   int scatterSize;
321 
test_scatter_view_impl_clsTest::test_scatter_view_impl_cls322   test_scatter_view_impl_cls(const scatter_view_type& view) {
323     scatter_view = view;
324     scatterSize  = 0;
325   }
326 
initializeTest::test_scatter_view_impl_cls327   void initialize(orig_view_type orig) {
328     auto host_view =
329         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
330     Kokkos::fence();
331     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
332          ++i) {
333       host_view(i, 0) = 0.0;
334       host_view(i, 1) = 0.0;
335       host_view(i, 2) = 0.0;
336     }
337     Kokkos::fence();
338     Kokkos::deep_copy(orig, host_view);
339   }
340 
run_parallelTest::test_scatter_view_impl_cls341   void run_parallel(int n) {
342     scatterSize = n;
343     Kokkos::RangePolicy<typename DeviceType::execution_space, int> policy(0, n);
344     Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod");
345   }
346 
347   KOKKOS_INLINE_FUNCTION
operator ()Test::test_scatter_view_impl_cls348   void operator()(int i) const {
349     auto scatter_access = scatter_view.access();
350     auto scatter_access_atomic =
351         scatter_view.template access<Kokkos::Experimental::ScatterAtomic>();
352     for (int j = 0; j < 4; ++j) {
353       auto k = (i + j) % scatterSize;
354       scatter_access(k, 0).update((NumberType)(j + 1) * 4);
355       scatter_access_atomic(k, 1).update((NumberType)(j + 1) * 2.0);
356       scatter_access(k, 2).update((NumberType)(j + 1) * 1.0);
357     }
358   }
359 
validateResultsTest::test_scatter_view_impl_cls360   void validateResults(orig_view_type orig) {
361     auto host_view =
362         Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig);
363     Kokkos::fence();
364     for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0);
365          ++i) {
366       auto val0 = host_view(i, 0);
367       auto val1 = host_view(i, 1);
368       auto val2 = host_view(i, 2);
369       EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14);
370       EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14);
371       EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14);
372     }
373   }
374 };
375 
376 template <typename DeviceType, typename Layout, typename Op,
377           typename NumberType>
378 struct test_default_scatter_view {
379  public:
380   using default_duplication = Kokkos::Impl::Experimental::DefaultDuplication<
381       typename DeviceType::execution_space>;
382   using Duplication  = typename default_duplication::type;
383   using Contribution = typename Kokkos::Impl::Experimental::DefaultContribution<
384       typename DeviceType::execution_space, Duplication>::type;
385   using scatter_view_def =
386       typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
387                                           Contribution, Op,
388                                           NumberType>::scatter_view_type;
389   using orig_view_def =
390       typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
391                                           Contribution, Op,
392                                           NumberType>::orig_view_type;
393 
run_testTest::test_default_scatter_view394   void run_test(int n) {
395     // Test creation via create_scatter_view overload 1
396     {
397       orig_view_def original_view("original_view", n);
398       scatter_view_def scatter_view =
399           Kokkos::Experimental::create_scatter_view(Op{}, original_view);
400 
401       test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
402                                  Op, NumberType>
403           scatter_view_test_impl(scatter_view);
404       scatter_view_test_impl.initialize(original_view);
405       scatter_view_test_impl.run_parallel(n);
406 
407       Kokkos::Experimental::contribute(original_view, scatter_view);
408       scatter_view.reset_except(original_view);
409 
410       scatter_view_test_impl.run_parallel(n);
411 
412       Kokkos::Experimental::contribute(original_view, scatter_view);
413       Kokkos::fence();
414 
415       scatter_view_test_impl.validateResults(original_view);
416 
417       {
418         scatter_view_def persistent_view("persistent", n);
419         auto result_view = persistent_view.subview();
420         contribute(result_view, persistent_view);
421         Kokkos::fence();
422       }
423     }
424   }
425 };
426 
427 template <typename DeviceType, typename Layout, typename Duplication,
428           typename Contribution, typename Op, typename NumberType>
429 struct test_scatter_view_config {
430  public:
431   using scatter_view_def =
432       typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
433                                           Contribution, Op,
434                                           NumberType>::scatter_view_type;
435   using orig_view_def =
436       typename test_scatter_view_impl_cls<DeviceType, Layout, Duplication,
437                                           Contribution, Op,
438                                           NumberType>::orig_view_type;
439 
compile_constructorTest::test_scatter_view_config440   void compile_constructor() {
441     auto sv = scatter_view_def(Kokkos::view_alloc(DeviceType{}, "label"), 10);
442   }
443 
run_testTest::test_scatter_view_config444   void run_test(int n) {
445     // test allocation
446     {
447       orig_view_def ov1("ov1", n);
448       scatter_view_def sv1;
449 
450       ASSERT_FALSE(sv1.is_allocated());
451 
452       sv1 = Kokkos::Experimental::create_scatter_view<Op, Duplication,
453                                                       Contribution>(ov1);
454 
455       scatter_view_def sv2(sv1);
456       scatter_view_def sv3("sv3", n);
457 
458       ASSERT_TRUE(sv1.is_allocated());
459       ASSERT_TRUE(sv2.is_allocated());
460       ASSERT_TRUE(sv3.is_allocated());
461     }
462 
463     // Test creation via create_scatter_view
464     {
465       orig_view_def original_view("original_view", n);
466       scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view<
467           Op, Duplication, Contribution>(original_view);
468 
469       test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
470                                  Op, NumberType>
471           scatter_view_test_impl(scatter_view);
472       scatter_view_test_impl.initialize(original_view);
473       scatter_view_test_impl.run_parallel(n);
474 
475       Kokkos::Experimental::contribute(original_view, scatter_view);
476       scatter_view.reset_except(original_view);
477 
478       scatter_view_test_impl.run_parallel(n);
479 
480       Kokkos::Experimental::contribute(original_view, scatter_view);
481       Kokkos::fence();
482 
483       scatter_view_test_impl.validateResults(original_view);
484 
485       {
486         scatter_view_def persistent_view("persistent", n);
487         auto result_view = persistent_view.subview();
488         contribute(result_view, persistent_view);
489         Kokkos::fence();
490       }
491     }
492     // Test creation via create_scatter_view overload 2
493     {
494       orig_view_def original_view("original_view", n);
495       scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view(
496           Op{}, Duplication{}, Contribution{}, original_view);
497 
498       test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
499                                  Op, NumberType>
500           scatter_view_test_impl(scatter_view);
501       scatter_view_test_impl.initialize(original_view);
502       scatter_view_test_impl.run_parallel(n);
503 
504       Kokkos::Experimental::contribute(original_view, scatter_view);
505       scatter_view.reset_except(original_view);
506 
507       scatter_view_test_impl.run_parallel(n);
508 
509       Kokkos::Experimental::contribute(original_view, scatter_view);
510       Kokkos::fence();
511 
512       scatter_view_test_impl.validateResults(original_view);
513 
514       {
515         scatter_view_def persistent_view("persistent", n);
516         auto result_view = persistent_view.subview();
517         contribute(result_view, persistent_view);
518         Kokkos::fence();
519       }
520     }
521     // Test creation via constructor
522     {
523       orig_view_def original_view("original_view", n);
524       scatter_view_def scatter_view(original_view);
525 
526       test_scatter_view_impl_cls<DeviceType, Layout, Duplication, Contribution,
527                                  Op, NumberType>
528           scatter_view_test_impl(scatter_view);
529       scatter_view_test_impl.initialize(original_view);
530       scatter_view_test_impl.run_parallel(n);
531 
532       Kokkos::Experimental::contribute(original_view, scatter_view);
533       scatter_view.reset_except(original_view);
534 
535       scatter_view_test_impl.run_parallel(n);
536 
537       Kokkos::Experimental::contribute(original_view, scatter_view);
538       Kokkos::fence();
539 
540       scatter_view_test_impl.validateResults(original_view);
541 
542       {
543         scatter_view_def persistent_view("persistent", n);
544         auto result_view = persistent_view.subview();
545         contribute(result_view, persistent_view);
546         Kokkos::fence();
547       }
548     }
549   }
550 };
551 
552 template <typename DeviceType, typename ScatterType, typename NumberType>
553 struct TestDuplicatedScatterView {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView554   TestDuplicatedScatterView(int n) {
555     // ScatterSum test
556     test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
557                              Kokkos::Experimental::ScatterDuplicated,
558                              Kokkos::Experimental::ScatterNonAtomic,
559                              ScatterType, NumberType>
560         test_sv_right_config;
561     test_sv_right_config.run_test(n);
562     test_scatter_view_config<
563         DeviceType, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated,
564         Kokkos::Experimental::ScatterNonAtomic, ScatterType, NumberType>
565         test_sv_left_config;
566     test_sv_left_config.run_test(n);
567   }
568 };
569 
570 #ifdef KOKKOS_ENABLE_CUDA
571 // disable duplicated instantiation with CUDA until
572 // UniqueToken can support it
573 template <typename ScatterType, typename NumberType>
574 struct TestDuplicatedScatterView<Kokkos::Cuda, ScatterType, NumberType> {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView575   TestDuplicatedScatterView(int) {}
576 };
577 template <typename ScatterType, typename NumberType>
578 struct TestDuplicatedScatterView<
579     Kokkos::Device<Kokkos::Cuda, Kokkos::CudaSpace>, ScatterType, NumberType> {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView580   TestDuplicatedScatterView(int) {}
581 };
582 template <typename ScatterType, typename NumberType>
583 struct TestDuplicatedScatterView<
584     Kokkos::Device<Kokkos::Cuda, Kokkos::CudaUVMSpace>, ScatterType,
585     NumberType> {
TestDuplicatedScatterViewTest::TestDuplicatedScatterView586   TestDuplicatedScatterView(int) {}
587 };
588 #endif
589 
590 template <typename DeviceType, typename ScatterType,
591           typename NumberType = double>
test_scatter_view(int64_t n)592 void test_scatter_view(int64_t n) {
593   using execution_space = typename DeviceType::execution_space;
594 
595   // no atomics or duplication is only sensible if the execution space
596   // is running essentially in serial (doesn't have to be Serial though,
597   // we also test OpenMP with one thread: LAMMPS cares about that)
598   if (execution_space().concurrency() == 1) {
599     test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
600                              Kokkos::Experimental::ScatterNonDuplicated,
601                              Kokkos::Experimental::ScatterNonAtomic,
602                              ScatterType, NumberType>
603         test_sv_config;
604     test_sv_config.run_test(n);
605   }
606 #ifdef KOKKOS_ENABLE_SERIAL
607   if (!std::is_same<DeviceType, Kokkos::Serial>::value) {
608 #endif
609     test_scatter_view_config<DeviceType, Kokkos::LayoutRight,
610                              Kokkos::Experimental::ScatterNonDuplicated,
611                              Kokkos::Experimental::ScatterAtomic, ScatterType,
612                              NumberType>
613         test_sv_config;
614     test_sv_config.run_test(n);
615 #ifdef KOKKOS_ENABLE_SERIAL
616   }
617 #endif
618   // with hundreds of threads we were running out of memory.
619   // limit (n) so that duplication doesn't exceed 4GB
620   constexpr std::size_t maximum_allowed_total_bytes =
621       4ull * 1024ull * 1024ull * 1024ull;
622   std::size_t const maximum_allowed_copy_bytes =
623       maximum_allowed_total_bytes /
624       std::size_t(execution_space().concurrency());
625   constexpr std::size_t bytes_per_value = sizeof(NumberType) * 12;
626   std::size_t const maximum_allowed_copy_values =
627       maximum_allowed_copy_bytes / bytes_per_value;
628   n = std::min(n, int64_t(maximum_allowed_copy_values));
629 
630   // if the default is duplicated, this needs to follow the limit
631   {
632     test_default_scatter_view<DeviceType, Kokkos::LayoutRight, ScatterType,
633                               NumberType>
634         test_default_sv;
635     test_default_sv.run_test(n);
636   }
637   TestDuplicatedScatterView<DeviceType, ScatterType, NumberType> duptest(n);
638 }
639 
TEST(TEST_CATEGORY,scatterview)640 TEST(TEST_CATEGORY, scatterview) {
641   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, double>(
642       10);
643   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum,
644                     unsigned int>(10);
645   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(10);
646   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(10);
647   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(10);
648   // tests were timing out in DEBUG mode, reduce the amount of work
649 #ifdef KOKKOS_ENABLE_DEBUG
650   int big_n = 100 * 1000;
651 #else
652 
653 #ifdef KOKKOS_ENABLE_SERIAL
654   bool is_serial = std::is_same<TEST_EXECSPACE, Kokkos::Serial>::value;
655   int big_n      = is_serial ? 100 * 1000 : 10000 * 1000;
656 #else
657   int big_n = 10000 * 1000;
658 #endif
659 
660 #endif
661   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum, double>(
662       big_n);
663   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterSum,
664                     unsigned int>(big_n);
665   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterProd>(big_n);
666   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMin>(big_n);
667   test_scatter_view<TEST_EXECSPACE, Kokkos::Experimental::ScatterMax>(big_n);
668 }
669 
TEST(TEST_CATEGORY,scatterview_devicetype)670 TEST(TEST_CATEGORY, scatterview_devicetype) {
671   using device_type =
672       Kokkos::Device<TEST_EXECSPACE, typename TEST_EXECSPACE::memory_space>;
673 
674   test_scatter_view<device_type, Kokkos::Experimental::ScatterSum, double>(10);
675   test_scatter_view<device_type, Kokkos::Experimental::ScatterSum,
676                     unsigned int>(10);
677   test_scatter_view<device_type, Kokkos::Experimental::ScatterProd>(10);
678   test_scatter_view<device_type, Kokkos::Experimental::ScatterMin>(10);
679   test_scatter_view<device_type, Kokkos::Experimental::ScatterMax>(10);
680 
681 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
682 #ifdef KOKKOS_ENABLE_CUDA
683   using device_execution_space = Kokkos::Cuda;
684   using device_memory_space    = Kokkos::CudaSpace;
685   using host_accessible_space  = Kokkos::CudaUVMSpace;
686 #else
687   using device_execution_space = Kokkos::Experimental::HIP;
688   using device_memory_space    = Kokkos::Experimental::HIPSpace;
689   using host_accessible_space  = Kokkos::Experimental::HIPHostPinnedSpace;
690 #endif
691   if (std::is_same<TEST_EXECSPACE, device_execution_space>::value) {
692     using device_device_type =
693         Kokkos::Device<device_execution_space, device_memory_space>;
694     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterSum,
695                       double>(10);
696     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterSum,
697                       unsigned int>(10);
698     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterProd>(
699         10);
700     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterMin>(10);
701     test_scatter_view<device_device_type, Kokkos::Experimental::ScatterMax>(10);
702     using host_device_type =
703         Kokkos::Device<device_execution_space, host_accessible_space>;
704     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterSum,
705                       double>(10);
706     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterSum,
707                       unsigned int>(10);
708     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterProd>(10);
709     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterMin>(10);
710     test_scatter_view<host_device_type, Kokkos::Experimental::ScatterMax>(10);
711   }
712 #endif
713 }
714 
715 }  // namespace Test
716 
717 #endif  // KOKKOS_TEST_SCATTER_VIEW_HPP
718