1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #ifndef OPENCV_CUDA_EMULATION_HPP_
44 #define OPENCV_CUDA_EMULATION_HPP_
45 
46 #include "common.hpp"
47 #include "warp_reduce.hpp"
48 
49 /** @file
50  * @deprecated Use @ref cudev instead.
51  */
52 
53 //! @cond IGNORED
54 
55 namespace cv { namespace cuda { namespace device
56 {
57     struct Emulation
58     {
59 
syncthreadsOrcv::cuda::device::Emulation60         static __device__ __forceinline__ int syncthreadsOr(int pred)
61         {
62 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
63                 // just campilation stab
64                 return 0;
65 #else
66                 return __syncthreads_or(pred);
67 #endif
68         }
69 
70         template<int CTA_SIZE>
Ballotcv::cuda::device::Emulation71         static __forceinline__ __device__ int Ballot(int predicate)
72         {
73 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
74             return __ballot(predicate);
75 #else
76             __shared__ volatile int cta_buffer[CTA_SIZE];
77 
78             int tid = threadIdx.x;
79             cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
80             return warp_reduce(cta_buffer);
81 #endif
82         }
83 
84         struct smem
85         {
86             enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
87 
88             template<typename T>
atomicInccv::cuda::device::Emulation::smem89             static __device__ __forceinline__ T atomicInc(T* address, T val)
90             {
91 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
92                 T count;
93                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
94                 do
95                 {
96                     count = *address & TAG_MASK;
97                     count = tag | (count + 1);
98                     *address = count;
99                 } while (*address != count);
100 
101                 return (count & TAG_MASK) - 1;
102 #else
103                 return ::atomicInc(address, val);
104 #endif
105             }
106 
107             template<typename T>
atomicAddcv::cuda::device::Emulation::smem108             static __device__ __forceinline__ T atomicAdd(T* address, T val)
109             {
110 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
111                 T count;
112                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
113                 do
114                 {
115                     count = *address & TAG_MASK;
116                     count = tag | (count + val);
117                     *address = count;
118                 } while (*address != count);
119 
120                 return (count & TAG_MASK) - val;
121 #else
122                 return ::atomicAdd(address, val);
123 #endif
124             }
125 
126             template<typename T>
atomicMincv::cuda::device::Emulation::smem127             static __device__ __forceinline__ T atomicMin(T* address, T val)
128             {
129 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
130                 T count = ::min(*address, val);
131                 do
132                 {
133                     *address = count;
134                 } while (*address > count);
135 
136                 return count;
137 #else
138                 return ::atomicMin(address, val);
139 #endif
140             }
141         }; // struct cmem
142 
143         struct glob
144         {
atomicAddcv::cuda::device::Emulation::glob145             static __device__ __forceinline__ int atomicAdd(int* address, int val)
146             {
147                 return ::atomicAdd(address, val);
148             }
atomicAddcv::cuda::device::Emulation::glob149             static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
150             {
151                 return ::atomicAdd(address, val);
152             }
atomicAddcv::cuda::device::Emulation::glob153             static __device__ __forceinline__ float atomicAdd(float* address, float val)
154             {
155             #if __CUDA_ARCH__ >= 200
156                 return ::atomicAdd(address, val);
157             #else
158                 int* address_as_i = (int*) address;
159                 int old = *address_as_i, assumed;
160                 do {
161                     assumed = old;
162                     old = ::atomicCAS(address_as_i, assumed,
163                         __float_as_int(val + __int_as_float(assumed)));
164                 } while (assumed != old);
165                 return __int_as_float(old);
166             #endif
167             }
atomicAddcv::cuda::device::Emulation::glob168             static __device__ __forceinline__ double atomicAdd(double* address, double val)
169             {
170             #if __CUDA_ARCH__ >= 130
171                 unsigned long long int* address_as_ull = (unsigned long long int*) address;
172                 unsigned long long int old = *address_as_ull, assumed;
173                 do {
174                     assumed = old;
175                     old = ::atomicCAS(address_as_ull, assumed,
176                         __double_as_longlong(val + __longlong_as_double(assumed)));
177                 } while (assumed != old);
178                 return __longlong_as_double(old);
179             #else
180                 CV_UNUSED(address);
181                 CV_UNUSED(val);
182                 return 0.0;
183             #endif
184             }
185 
atomicMincv::cuda::device::Emulation::glob186             static __device__ __forceinline__ int atomicMin(int* address, int val)
187             {
188                 return ::atomicMin(address, val);
189             }
atomicMincv::cuda::device::Emulation::glob190             static __device__ __forceinline__ float atomicMin(float* address, float val)
191             {
192             #if __CUDA_ARCH__ >= 120
193                 int* address_as_i = (int*) address;
194                 int old = *address_as_i, assumed;
195                 do {
196                     assumed = old;
197                     old = ::atomicCAS(address_as_i, assumed,
198                         __float_as_int(::fminf(val, __int_as_float(assumed))));
199                 } while (assumed != old);
200                 return __int_as_float(old);
201             #else
202                 CV_UNUSED(address);
203                 CV_UNUSED(val);
204                 return 0.0f;
205             #endif
206             }
atomicMincv::cuda::device::Emulation::glob207             static __device__ __forceinline__ double atomicMin(double* address, double val)
208             {
209             #if __CUDA_ARCH__ >= 130
210                 unsigned long long int* address_as_ull = (unsigned long long int*) address;
211                 unsigned long long int old = *address_as_ull, assumed;
212                 do {
213                     assumed = old;
214                     old = ::atomicCAS(address_as_ull, assumed,
215                         __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
216                 } while (assumed != old);
217                 return __longlong_as_double(old);
218             #else
219                 CV_UNUSED(address);
220                 CV_UNUSED(val);
221                 return 0.0;
222             #endif
223             }
224 
atomicMaxcv::cuda::device::Emulation::glob225             static __device__ __forceinline__ int atomicMax(int* address, int val)
226             {
227                 return ::atomicMax(address, val);
228             }
atomicMaxcv::cuda::device::Emulation::glob229             static __device__ __forceinline__ float atomicMax(float* address, float val)
230             {
231             #if __CUDA_ARCH__ >= 120
232                 int* address_as_i = (int*) address;
233                 int old = *address_as_i, assumed;
234                 do {
235                     assumed = old;
236                     old = ::atomicCAS(address_as_i, assumed,
237                         __float_as_int(::fmaxf(val, __int_as_float(assumed))));
238                 } while (assumed != old);
239                 return __int_as_float(old);
240             #else
241                 CV_UNUSED(address);
242                 CV_UNUSED(val);
243                 return 0.0f;
244             #endif
245             }
atomicMaxcv::cuda::device::Emulation::glob246             static __device__ __forceinline__ double atomicMax(double* address, double val)
247             {
248             #if __CUDA_ARCH__ >= 130
249                 unsigned long long int* address_as_ull = (unsigned long long int*) address;
250                 unsigned long long int old = *address_as_ull, assumed;
251                 do {
252                     assumed = old;
253                     old = ::atomicCAS(address_as_ull, assumed,
254                         __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
255                 } while (assumed != old);
256                 return __longlong_as_double(old);
257             #else
258                 CV_UNUSED(address);
259                 CV_UNUSED(val);
260                 return 0.0;
261             #endif
262             }
263         };
264     }; //struct Emulation
265 }}} // namespace cv { namespace cuda { namespace cudev
266 
267 //! @endcond
268 
269 #endif /* OPENCV_CUDA_EMULATION_HPP_ */
270