1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #include <Kokkos_Core.hpp>
46 #ifdef KOKKOS_ENABLE_CUDA
47 #include <Cuda/Kokkos_Cuda_Locks.hpp>
48 #include <Cuda/Kokkos_Cuda_Error.hpp>
49 
50 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
51 namespace Kokkos {
52 namespace Impl {
53 __device__ __constant__ CudaLockArrays g_device_cuda_lock_arrays = {nullptr,
54                                                                     nullptr, 0};
55 }
56 }  // namespace Kokkos
57 #endif
58 
59 namespace Kokkos {
60 
61 namespace {
62 
init_lock_array_kernel_atomic()63 __global__ void init_lock_array_kernel_atomic() {
64   unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
65   if (i < CUDA_SPACE_ATOMIC_MASK + 1) {
66     Kokkos::Impl::g_device_cuda_lock_arrays.atomic[i] = 0;
67   }
68 }
69 
init_lock_array_kernel_threadid(int N)70 __global__ void init_lock_array_kernel_threadid(int N) {
71   unsigned i = blockIdx.x * blockDim.x + threadIdx.x;
72   if (i < (unsigned)N) {
73     Kokkos::Impl::g_device_cuda_lock_arrays.scratch[i] = 0;
74   }
75 }
76 
77 }  // namespace
78 
79 namespace Impl {
80 
81 CudaLockArrays g_host_cuda_lock_arrays = {nullptr, nullptr, 0};
82 
initialize_host_cuda_lock_arrays()83 void initialize_host_cuda_lock_arrays() {
84   if (g_host_cuda_lock_arrays.atomic != nullptr) return;
85   CUDA_SAFE_CALL(cudaMalloc(&g_host_cuda_lock_arrays.atomic,
86                             sizeof(int) * (CUDA_SPACE_ATOMIC_MASK + 1)));
87   CUDA_SAFE_CALL(cudaMalloc(&g_host_cuda_lock_arrays.scratch,
88                             sizeof(int) * (Cuda::concurrency())));
89   CUDA_SAFE_CALL(cudaDeviceSynchronize());
90   g_host_cuda_lock_arrays.n = Cuda::concurrency();
91   KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
92   init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256,
93                                   256>>>();
94   init_lock_array_kernel_threadid<<<(Kokkos::Cuda::concurrency() + 255) / 256,
95                                     256>>>(Kokkos::Cuda::concurrency());
96   CUDA_SAFE_CALL(cudaDeviceSynchronize());
97 }
98 
finalize_host_cuda_lock_arrays()99 void finalize_host_cuda_lock_arrays() {
100   if (g_host_cuda_lock_arrays.atomic == nullptr) return;
101   cudaFree(g_host_cuda_lock_arrays.atomic);
102   g_host_cuda_lock_arrays.atomic = nullptr;
103   cudaFree(g_host_cuda_lock_arrays.scratch);
104   g_host_cuda_lock_arrays.scratch = nullptr;
105   g_host_cuda_lock_arrays.n       = 0;
106 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
107   KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
108 #endif
109 }
110 
111 }  // namespace Impl
112 
113 }  // namespace Kokkos
114 
115 #else
116 
KOKKOS_CORE_SRC_CUDA_CUDA_LOCKS_PREVENT_LINK_ERROR()117 void KOKKOS_CORE_SRC_CUDA_CUDA_LOCKS_PREVENT_LINK_ERROR() {}
118 
119 #endif
120