/* //@HEADER // ************************************************************************ // // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #ifndef KOKKOS_HIP_KERNEL_LAUNCH_HPP #define KOKKOS_HIP_KERNEL_LAUNCH_HPP #include #if defined(__HIPCC__) #include #include #include // Must use global variable on the device with HIP-Clang #ifdef __HIP__ __device__ __constant__ unsigned long kokkos_impl_hip_constant_memory_buffer [Kokkos::Experimental::Impl::HIPTraits::ConstantMemoryUsage / sizeof(unsigned long)]; #endif namespace Kokkos { namespace Experimental { template inline __device__ T *kokkos_impl_hip_shared_memory() { HIP_DYNAMIC_SHARED(HIPSpace::size_type, sh); return (T *)sh; } } // namespace Experimental } // namespace Kokkos namespace Kokkos { namespace Experimental { namespace Impl { template __global__ static void hip_parallel_launch_constant_memory() { const DriverType &driver = *(reinterpret_cast( kokkos_impl_hip_constant_memory_buffer)); driver(); } template __global__ __launch_bounds__( maxTperB, minBperSM) static void hip_parallel_launch_constant_memory() { const DriverType &driver = *(reinterpret_cast( kokkos_impl_hip_constant_memory_buffer)); driver->operator()(); } template __global__ static void hip_parallel_launch_local_memory( const DriverType *driver) { driver->operator()(); } template __global__ __launch_bounds__( maxTperB, minBperSM) static void hip_parallel_launch_local_memory(const DriverType *driver) { driver->operator()(); } enum class HIPLaunchMechanism : unsigned { Default = 0, ConstantMemory = 1, GlobalMemory = 2, LocalMemory = 4 }; constexpr inline HIPLaunchMechanism operator|(HIPLaunchMechanism p1, HIPLaunchMechanism p2) { return static_cast(static_cast(p1) | static_cast(p2)); } constexpr inline HIPLaunchMechanism operator&(HIPLaunchMechanism p1, HIPLaunchMechanism p2) { return static_cast(static_cast(p1) & static_cast(p2)); } template struct HIPDispatchProperties { HIPLaunchMechanism launch_mechanism = l; }; template struct HIPParallelLaunchKernelFunc; template struct HIPParallelLaunchKernelFunc< DriverType, Kokkos::LaunchBounds, HIPLaunchMechanism::LocalMemory> { static auto get_kernel_func() { return hip_parallel_launch_local_memory; } }; template struct HIPParallelLaunchKernelFunc, HIPLaunchMechanism::LocalMemory> { static auto get_kernel_func() { return hip_parallel_launch_local_memory; } }; template struct HIPParallelLaunchKernelInvoker; template struct HIPParallelLaunchKernelInvoker : HIPParallelLaunchKernelFunc { using base_t = HIPParallelLaunchKernelFunc; static void invoke_kernel(DriverType const *driver, dim3 const &grid, dim3 const &block, int shmem, HIPInternal const *hip_instance) { (base_t::get_kernel_func())<<m_stream>>>( driver); } }; template , HIPLaunchMechanism LaunchMechanism = HIPLaunchMechanism::LocalMemory> struct HIPParallelLaunch; template struct HIPParallelLaunch< DriverType, Kokkos::LaunchBounds, HIPLaunchMechanism::LocalMemory> : HIPParallelLaunchKernelInvoker< DriverType, Kokkos::LaunchBounds, HIPLaunchMechanism::LocalMemory> { using base_t = HIPParallelLaunchKernelInvoker< DriverType, Kokkos::LaunchBounds, HIPLaunchMechanism::LocalMemory>; HIPParallelLaunch(const DriverType &driver, const dim3 &grid, const dim3 &block, const int shmem, const HIPInternal *hip_instance, const bool /*prefer_shmem*/) { if ((grid.x != 0) && ((block.x * block.y * block.z) != 0)) { if (hip_instance->m_maxShmemPerBlock < shmem) { Kokkos::Impl::throw_runtime_exception( "HIPParallelLaunch FAILED: shared memory request is too large"); } KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE(); // Invoke the driver function on the device DriverType *d_driver = reinterpret_cast( hip_instance->get_next_driver(sizeof(DriverType))); std::memcpy((void *)d_driver, (void *)&driver, sizeof(DriverType)); base_t::invoke_kernel(d_driver, grid, block, shmem, hip_instance); #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) HIP_SAFE_CALL(hipGetLastError()); hip_instance->fence(); #endif } } static hipFuncAttributes get_hip_func_attributes() { static hipFuncAttributes attr = []() { hipFuncAttributes attr; HIP_SAFE_CALL(hipFuncGetAttributes( &attr, reinterpret_cast(base_t::get_kernel_func()))); return attr; }(); return attr; } }; } // namespace Impl } // namespace Experimental } // namespace Kokkos #endif #endif