1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 3.0
6 // Copyright (2020) National Technology & Engineering
7 // Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44
45 #ifndef KOKKOS_HIP_KERNEL_LAUNCH_HPP
46 #define KOKKOS_HIP_KERNEL_LAUNCH_HPP
47
48 #include <Kokkos_Macros.hpp>
49
50 #if defined(__HIPCC__)
51
52 #include <HIP/Kokkos_HIP_Error.hpp>
53 #include <HIP/Kokkos_HIP_Instance.hpp>
54 #include <Kokkos_HIP_Space.hpp>
55
56 // Must use global variable on the device with HIP-Clang
57 #ifdef __HIP__
58 __device__ __constant__ unsigned long kokkos_impl_hip_constant_memory_buffer
59 [Kokkos::Experimental::Impl::HIPTraits::ConstantMemoryUsage /
60 sizeof(unsigned long)];
61 #endif
62
63 namespace Kokkos {
64 namespace Experimental {
65 template <typename T>
kokkos_impl_hip_shared_memory()66 inline __device__ T *kokkos_impl_hip_shared_memory() {
67 HIP_DYNAMIC_SHARED(HIPSpace::size_type, sh);
68 return (T *)sh;
69 }
70 } // namespace Experimental
71 } // namespace Kokkos
72
73 namespace Kokkos {
74 namespace Experimental {
75 namespace Impl {
76
77 template <typename DriverType>
hip_parallel_launch_constant_memory()78 __global__ static void hip_parallel_launch_constant_memory() {
79 const DriverType &driver = *(reinterpret_cast<const DriverType *>(
80 kokkos_impl_hip_constant_memory_buffer));
81 driver();
82 }
83
84 template <typename DriverType, unsigned int maxTperB, unsigned int minBperSM>
__launch_bounds__(maxTperB,minBperSM)85 __global__ __launch_bounds__(
86 maxTperB, minBperSM) static void hip_parallel_launch_constant_memory() {
87 const DriverType &driver = *(reinterpret_cast<const DriverType *>(
88 kokkos_impl_hip_constant_memory_buffer));
89
90 driver->operator()();
91 }
92
93 template <class DriverType>
hip_parallel_launch_local_memory(const DriverType * driver)94 __global__ static void hip_parallel_launch_local_memory(
95 const DriverType *driver) {
96 driver->operator()();
97 }
98
99 template <class DriverType, unsigned int maxTperB, unsigned int minBperSM>
__launch_bounds__(maxTperB,minBperSM)100 __global__ __launch_bounds__(
101 maxTperB,
102 minBperSM) static void hip_parallel_launch_local_memory(const DriverType
103 *driver) {
104 driver->operator()();
105 }
106
107 enum class HIPLaunchMechanism : unsigned {
108 Default = 0,
109 ConstantMemory = 1,
110 GlobalMemory = 2,
111 LocalMemory = 4
112 };
113
operator |(HIPLaunchMechanism p1,HIPLaunchMechanism p2)114 constexpr inline HIPLaunchMechanism operator|(HIPLaunchMechanism p1,
115 HIPLaunchMechanism p2) {
116 return static_cast<HIPLaunchMechanism>(static_cast<unsigned>(p1) |
117 static_cast<unsigned>(p2));
118 }
operator &(HIPLaunchMechanism p1,HIPLaunchMechanism p2)119 constexpr inline HIPLaunchMechanism operator&(HIPLaunchMechanism p1,
120 HIPLaunchMechanism p2) {
121 return static_cast<HIPLaunchMechanism>(static_cast<unsigned>(p1) &
122 static_cast<unsigned>(p2));
123 }
124
125 template <HIPLaunchMechanism l>
126 struct HIPDispatchProperties {
127 HIPLaunchMechanism launch_mechanism = l;
128 };
129
130 template <typename DriverType, typename LaunchBounds,
131 HIPLaunchMechanism LaunchMechanism>
132 struct HIPParallelLaunchKernelFunc;
133
134 template <typename DriverType, unsigned int MaxThreadsPerBlock,
135 unsigned int MinBlocksPerSM>
136 struct HIPParallelLaunchKernelFunc<
137 DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>,
138 HIPLaunchMechanism::LocalMemory> {
get_kernel_funcKokkos::Experimental::Impl::HIPParallelLaunchKernelFunc139 static auto get_kernel_func() {
140 return hip_parallel_launch_local_memory<DriverType, MaxThreadsPerBlock,
141 MinBlocksPerSM>;
142 }
143 };
144
145 template <typename DriverType>
146 struct HIPParallelLaunchKernelFunc<DriverType, Kokkos::LaunchBounds<0, 0>,
147 HIPLaunchMechanism::LocalMemory> {
get_kernel_funcKokkos::Experimental::Impl::HIPParallelLaunchKernelFunc148 static auto get_kernel_func() {
149 return hip_parallel_launch_local_memory<DriverType, 1024, 1>;
150 }
151 };
152
153 template <typename DriverType, typename LaunchBounds,
154 HIPLaunchMechanism LaunchMechanism>
155 struct HIPParallelLaunchKernelInvoker;
156
157 template <typename DriverType, typename LaunchBounds>
158 struct HIPParallelLaunchKernelInvoker<DriverType, LaunchBounds,
159 HIPLaunchMechanism::LocalMemory>
160 : HIPParallelLaunchKernelFunc<DriverType, LaunchBounds,
161 HIPLaunchMechanism::LocalMemory> {
162 using base_t = HIPParallelLaunchKernelFunc<DriverType, LaunchBounds,
163 HIPLaunchMechanism::LocalMemory>;
164
invoke_kernelKokkos::Experimental::Impl::HIPParallelLaunchKernelInvoker165 static void invoke_kernel(DriverType const *driver, dim3 const &grid,
166 dim3 const &block, int shmem,
167 HIPInternal const *hip_instance) {
168 (base_t::get_kernel_func())<<<grid, block, shmem, hip_instance->m_stream>>>(
169 driver);
170 }
171 };
172
173 template <typename DriverType, typename LaunchBounds = Kokkos::LaunchBounds<>,
174 HIPLaunchMechanism LaunchMechanism = HIPLaunchMechanism::LocalMemory>
175 struct HIPParallelLaunch;
176
177 template <typename DriverType, unsigned int MaxThreadsPerBlock,
178 unsigned int MinBlocksPerSM>
179 struct HIPParallelLaunch<
180 DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>,
181 HIPLaunchMechanism::LocalMemory>
182 : HIPParallelLaunchKernelInvoker<
183 DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>,
184 HIPLaunchMechanism::LocalMemory> {
185 using base_t = HIPParallelLaunchKernelInvoker<
186 DriverType, Kokkos::LaunchBounds<MaxThreadsPerBlock, MinBlocksPerSM>,
187 HIPLaunchMechanism::LocalMemory>;
188
HIPParallelLaunchKokkos::Experimental::Impl::HIPParallelLaunch189 HIPParallelLaunch(const DriverType &driver, const dim3 &grid,
190 const dim3 &block, const int shmem,
191 const HIPInternal *hip_instance,
192 const bool /*prefer_shmem*/) {
193 if ((grid.x != 0) && ((block.x * block.y * block.z) != 0)) {
194 if (hip_instance->m_maxShmemPerBlock < shmem) {
195 Kokkos::Impl::throw_runtime_exception(
196 "HIPParallelLaunch FAILED: shared memory request is too large");
197 }
198
199 KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE();
200
201 // Invoke the driver function on the device
202 DriverType *d_driver = reinterpret_cast<DriverType *>(
203 hip_instance->get_next_driver(sizeof(DriverType)));
204 std::memcpy((void *)d_driver, (void *)&driver, sizeof(DriverType));
205 base_t::invoke_kernel(d_driver, grid, block, shmem, hip_instance);
206
207 #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK)
208 HIP_SAFE_CALL(hipGetLastError());
209 hip_instance->fence();
210 #endif
211 }
212 }
213
get_hip_func_attributesKokkos::Experimental::Impl::HIPParallelLaunch214 static hipFuncAttributes get_hip_func_attributes() {
215 static hipFuncAttributes attr = []() {
216 hipFuncAttributes attr;
217 HIP_SAFE_CALL(hipFuncGetAttributes(
218 &attr, reinterpret_cast<void const *>(base_t::get_kernel_func())));
219 return attr;
220 }();
221 return attr;
222 }
223 };
224 } // namespace Impl
225 } // namespace Experimental
226 } // namespace Kokkos
227
228 #endif
229
230 #endif
231