1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #include <TestCuda_Category.hpp>
46 #include <Test_InterOp_Streams.hpp>
47
48 namespace Test {
49 // Test Interoperability with Cuda Streams
TEST(cuda,raw_cuda_streams)50 TEST(cuda, raw_cuda_streams) {
51 cudaStream_t stream;
52 cudaStreamCreate(&stream);
53 Kokkos::InitArguments arguments{-1, -1, -1, false};
54 Kokkos::initialize(arguments);
55 int* p;
56 cudaMalloc(&p, sizeof(int) * 100);
57 using MemorySpace = typename TEST_EXECSPACE::memory_space;
58
59 {
60 TEST_EXECSPACE space0(stream);
61 Kokkos::View<int*, TEST_EXECSPACE> v(p, 100);
62 Kokkos::deep_copy(space0, v, 5);
63 int sum;
64
65 Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Range",
66 Kokkos::RangePolicy<TEST_EXECSPACE>(space0, 0, 100),
67 FunctorRange<MemorySpace>(v));
68 Kokkos::parallel_reduce(
69 "Test::cuda::raw_cuda_stream::RangeReduce",
70 Kokkos::RangePolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<128, 2>>(
71 space0, 0, 100),
72 FunctorRangeReduce<MemorySpace>(v), sum);
73 space0.fence();
74 ASSERT_EQ(600, sum);
75
76 Kokkos::parallel_for("Test::cuda::raw_cuda_stream::MDRange",
77 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(
78 space0, {0, 0}, {10, 10}),
79 FunctorMDRange<MemorySpace>(v));
80 Kokkos::parallel_reduce(
81 "Test::cuda::raw_cuda_stream::MDRangeReduce",
82 Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>,
83 Kokkos::LaunchBounds<128, 2>>(space0, {0, 0},
84 {10, 10}),
85 FunctorMDRangeReduce<MemorySpace>(v), sum);
86 space0.fence();
87 ASSERT_EQ(700, sum);
88
89 Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Team",
90 Kokkos::TeamPolicy<TEST_EXECSPACE>(space0, 10, 10),
91 FunctorTeam<MemorySpace, TEST_EXECSPACE>(v));
92 Kokkos::parallel_reduce(
93 "Test::cuda::raw_cuda_stream::Team",
94 Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<128, 2>>(
95 space0, 10, 10),
96 FunctorTeamReduce<MemorySpace, TEST_EXECSPACE>(v), sum);
97 space0.fence();
98 ASSERT_EQ(800, sum);
99 }
100 Kokkos::finalize();
101 offset_streams<<<100, 64, 0, stream>>>(p);
102 CUDA_SAFE_CALL(cudaDeviceSynchronize());
103 cudaStreamDestroy(stream);
104
105 int h_p[100];
106 cudaMemcpy(h_p, p, sizeof(int) * 100, cudaMemcpyDefault);
107 CUDA_SAFE_CALL(cudaDeviceSynchronize());
108 int64_t sum = 0;
109 int64_t sum_expect = 0;
110 for (int i = 0; i < 100; i++) {
111 sum += h_p[i];
112 sum_expect += 8 + i;
113 }
114
115 ASSERT_EQ(sum, sum_expect);
116 }
117 } // namespace Test
118