1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #include <TestCuda_Category.hpp>
46 #include <Test_InterOp_Streams.hpp>
47 
48 namespace Test {
49 // Test Interoperability with Cuda Streams
TEST(cuda,raw_cuda_streams)50 TEST(cuda, raw_cuda_streams) {
51   cudaStream_t stream;
52   cudaStreamCreate(&stream);
53   Kokkos::InitArguments arguments{-1, -1, -1, false};
54   Kokkos::initialize(arguments);
55   int* p;
56   cudaMalloc(&p, sizeof(int) * 100);
57   using MemorySpace = typename TEST_EXECSPACE::memory_space;
58 
59   {
60     TEST_EXECSPACE space0(stream);
61     Kokkos::View<int*, TEST_EXECSPACE> v(p, 100);
62     Kokkos::deep_copy(space0, v, 5);
63     int sum;
64 
65     Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Range",
66                          Kokkos::RangePolicy<TEST_EXECSPACE>(space0, 0, 100),
67                          FunctorRange<MemorySpace>(v));
68     Kokkos::parallel_reduce(
69         "Test::cuda::raw_cuda_stream::RangeReduce",
70         Kokkos::RangePolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<128, 2>>(
71             space0, 0, 100),
72         FunctorRangeReduce<MemorySpace>(v), sum);
73     space0.fence();
74     ASSERT_EQ(600, sum);
75 
76     Kokkos::parallel_for("Test::cuda::raw_cuda_stream::MDRange",
77                          Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>>(
78                              space0, {0, 0}, {10, 10}),
79                          FunctorMDRange<MemorySpace>(v));
80     Kokkos::parallel_reduce(
81         "Test::cuda::raw_cuda_stream::MDRangeReduce",
82         Kokkos::MDRangePolicy<TEST_EXECSPACE, Kokkos::Rank<2>,
83                               Kokkos::LaunchBounds<128, 2>>(space0, {0, 0},
84                                                             {10, 10}),
85         FunctorMDRangeReduce<MemorySpace>(v), sum);
86     space0.fence();
87     ASSERT_EQ(700, sum);
88 
89     Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Team",
90                          Kokkos::TeamPolicy<TEST_EXECSPACE>(space0, 10, 10),
91                          FunctorTeam<MemorySpace, TEST_EXECSPACE>(v));
92     Kokkos::parallel_reduce(
93         "Test::cuda::raw_cuda_stream::Team",
94         Kokkos::TeamPolicy<TEST_EXECSPACE, Kokkos::LaunchBounds<128, 2>>(
95             space0, 10, 10),
96         FunctorTeamReduce<MemorySpace, TEST_EXECSPACE>(v), sum);
97     space0.fence();
98     ASSERT_EQ(800, sum);
99   }
100   Kokkos::finalize();
101   offset_streams<<<100, 64, 0, stream>>>(p);
102   CUDA_SAFE_CALL(cudaDeviceSynchronize());
103   cudaStreamDestroy(stream);
104 
105   int h_p[100];
106   cudaMemcpy(h_p, p, sizeof(int) * 100, cudaMemcpyDefault);
107   CUDA_SAFE_CALL(cudaDeviceSynchronize());
108   int64_t sum        = 0;
109   int64_t sum_expect = 0;
110   for (int i = 0; i < 100; i++) {
111     sum += h_p[i];
112     sum_expect += 8 + i;
113   }
114 
115   ASSERT_EQ(sum, sum_expect);
116 }
117 }  // namespace Test
118