1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/transform.hpp" 47 #include "opencv2/core/cuda/functional.hpp" 48 #include "opencv2/core/cuda/reduce.hpp" 49 50 namespace cv { namespace cuda { namespace device 51 { 52 /////////////////////////////////// reprojectImageTo3D /////////////////////////////////////////////// 53 54 __constant__ float cq[16]; 55 56 template <typename T, typename D> reprojectImageTo3D(const PtrStepSz<T> disp,PtrStep<D> xyz)57 __global__ void reprojectImageTo3D(const PtrStepSz<T> disp, PtrStep<D> xyz) 58 { 59 const int x = blockIdx.x * blockDim.x + threadIdx.x; 60 const int y = blockIdx.y * blockDim.y + threadIdx.y; 61 62 if (y >= disp.rows || x >= disp.cols) 63 return; 64 65 const float qx = x * cq[ 0] + y * cq[ 1] + cq[ 3]; 66 const float qy = x * cq[ 4] + y * cq[ 5] + cq[ 7]; 67 const float qz = x * cq[ 8] + y * cq[ 9] + cq[11]; 68 const float qw = x * cq[12] + y * cq[13] + cq[15]; 69 70 const T d = disp(y, x); 71 72 const float iW = 1.f / (qw + cq[14] * d); 73 74 D v = VecTraits<D>::all(1.0f); 75 v.x = (qx + cq[2] * d) * iW; 76 v.y = (qy + cq[6] * d) * iW; 77 v.z = (qz + cq[10] * d) * iW; 78 79 xyz(y, x) = v; 80 } 81 82 template <typename T, typename D> reprojectImageTo3D_gpu(const PtrStepSzb disp,PtrStepSzb xyz,const float * q,cudaStream_t stream)83 void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream) 84 { 85 dim3 block(32, 8); 86 dim3 grid(divUp(disp.cols, block.x), divUp(disp.rows, block.y)); 87 88 cudaSafeCall( cudaMemcpyToSymbol(cq, q, 16 * sizeof(float)) ); 89 90 reprojectImageTo3D<T, D><<<grid, block, 0, stream>>>((PtrStepSz<T>)disp, (PtrStepSz<D>)xyz); 91 cudaSafeCall( cudaGetLastError() ); 92 93 if (stream == 0) 94 cudaSafeCall( cudaDeviceSynchronize() ); 95 } 96 97 template void reprojectImageTo3D_gpu<uchar, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 98 template void reprojectImageTo3D_gpu<uchar, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 99 template void reprojectImageTo3D_gpu<short, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 100 template void reprojectImageTo3D_gpu<short, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 101 template void reprojectImageTo3D_gpu<int, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 102 template void reprojectImageTo3D_gpu<int, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 103 template void reprojectImageTo3D_gpu<float, float3>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 104 template void reprojectImageTo3D_gpu<float, float4>(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); 105 106 /////////////////////////////////// drawColorDisp /////////////////////////////////////////////// 107 108 template <typename T> cvtPixel(T d,int ndisp,float S=1,float V=1)109 __device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1) 110 { 111 unsigned int H = ((ndisp-d) * 240)/ndisp; 112 113 unsigned int hi = (H/60) % 6; 114 float f = H/60.f - H/60; 115 float p = V * (1 - S); 116 float q = V * (1 - f * S); 117 float t = V * (1 - (1 - f) * S); 118 119 float3 res; 120 121 if (hi == 0) //R = V, G = t, B = p 122 { 123 res.x = p; 124 res.y = t; 125 res.z = V; 126 } 127 128 if (hi == 1) // R = q, G = V, B = p 129 { 130 res.x = p; 131 res.y = V; 132 res.z = q; 133 } 134 135 if (hi == 2) // R = p, G = V, B = t 136 { 137 res.x = t; 138 res.y = V; 139 res.z = p; 140 } 141 142 if (hi == 3) // R = p, G = q, B = V 143 { 144 res.x = V; 145 res.y = q; 146 res.z = p; 147 } 148 149 if (hi == 4) // R = t, G = p, B = V 150 { 151 res.x = V; 152 res.y = p; 153 res.z = t; 154 } 155 156 if (hi == 5) // R = V, G = p, B = q 157 { 158 res.x = q; 159 res.y = p; 160 res.z = V; 161 } 162 const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f); 163 const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f); 164 const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f); 165 const unsigned int a = 255U; 166 167 return (a << 24) + (r << 16) + (g << 8) + b; 168 } 169 drawColorDisp(uchar * disp,size_t disp_step,uchar * out_image,size_t out_step,int width,int height,int ndisp)170 __global__ void drawColorDisp(uchar* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) 171 { 172 const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; 173 const int y = blockIdx.y * blockDim.y + threadIdx.y; 174 175 if(x < width && y < height) 176 { 177 uchar4 d4 = *(uchar4*)(disp + y * disp_step + x); 178 179 uint4 res; 180 res.x = cvtPixel(d4.x, ndisp); 181 res.y = cvtPixel(d4.y, ndisp); 182 res.z = cvtPixel(d4.z, ndisp); 183 res.w = cvtPixel(d4.w, ndisp); 184 185 uint4* line = (uint4*)(out_image + y * out_step); 186 line[x >> 2] = res; 187 } 188 } 189 drawColorDisp(short * disp,size_t disp_step,uchar * out_image,size_t out_step,int width,int height,int ndisp)190 __global__ void drawColorDisp(short* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) 191 { 192 const int x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; 193 const int y = blockIdx.y * blockDim.y + threadIdx.y; 194 195 if(x < width && y < height) 196 { 197 short2 d2 = *(short2*)(disp + y * disp_step + x); 198 199 uint2 res; 200 res.x = cvtPixel(d2.x, ndisp); 201 res.y = cvtPixel(d2.y, ndisp); 202 203 uint2* line = (uint2*)(out_image + y * out_step); 204 line[x >> 1] = res; 205 } 206 } 207 drawColorDisp(int * disp,size_t disp_step,uchar * out_image,size_t out_step,int width,int height,int ndisp)208 __global__ void drawColorDisp(int* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) 209 { 210 const int x = blockIdx.x * blockDim.x + threadIdx.x; 211 const int y = blockIdx.y * blockDim.y + threadIdx.y; 212 213 if(x < width && y < height) 214 { 215 uint *line = (uint*)(out_image + y * out_step); 216 line[x] = cvtPixel(disp[y*disp_step + x], ndisp); 217 } 218 } 219 drawColorDisp(float * disp,size_t disp_step,uchar * out_image,size_t out_step,int width,int height,int ndisp)220 __global__ void drawColorDisp(float* disp, size_t disp_step, uchar* out_image, size_t out_step, int width, int height, int ndisp) 221 { 222 const int x = blockIdx.x * blockDim.x + threadIdx.x; 223 const int y = blockIdx.y * blockDim.y + threadIdx.y; 224 225 if(x < width && y < height) 226 { 227 uint *line = (uint*)(out_image + y * out_step); 228 line[x] = cvtPixel(disp[y*disp_step + x], ndisp); 229 } 230 } 231 drawColorDisp_gpu(const PtrStepSzb & src,const PtrStepSzb & dst,int ndisp,const cudaStream_t & stream)232 void drawColorDisp_gpu(const PtrStepSzb& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream) 233 { 234 dim3 threads(16, 16, 1); 235 dim3 grid(1, 1, 1); 236 grid.x = divUp(src.cols, threads.x << 2); 237 grid.y = divUp(src.rows, threads.y); 238 239 drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step, dst.data, dst.step, src.cols, src.rows, ndisp); 240 cudaSafeCall( cudaGetLastError() ); 241 242 if (stream == 0) 243 cudaSafeCall( cudaDeviceSynchronize() ); 244 } 245 drawColorDisp_gpu(const PtrStepSz<short> & src,const PtrStepSzb & dst,int ndisp,const cudaStream_t & stream)246 void drawColorDisp_gpu(const PtrStepSz<short>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream) 247 { 248 dim3 threads(32, 8, 1); 249 dim3 grid(1, 1, 1); 250 grid.x = divUp(src.cols, threads.x << 1); 251 grid.y = divUp(src.rows, threads.y); 252 253 drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(short), dst.data, dst.step, src.cols, src.rows, ndisp); 254 cudaSafeCall( cudaGetLastError() ); 255 256 if (stream == 0) 257 cudaSafeCall( cudaDeviceSynchronize() ); 258 } 259 drawColorDisp_gpu(const PtrStepSz<int> & src,const PtrStepSzb & dst,int ndisp,const cudaStream_t & stream)260 void drawColorDisp_gpu(const PtrStepSz<int>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream) 261 { 262 dim3 threads(32, 8, 1); 263 dim3 grid(1, 1, 1); 264 grid.x = divUp(src.cols, threads.x); 265 grid.y = divUp(src.rows, threads.y); 266 267 drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(int), dst.data, dst.step, src.cols, src.rows, ndisp); 268 cudaSafeCall( cudaGetLastError() ); 269 270 if (stream == 0) 271 cudaSafeCall( cudaDeviceSynchronize() ); 272 } 273 drawColorDisp_gpu(const PtrStepSz<float> & src,const PtrStepSzb & dst,int ndisp,const cudaStream_t & stream)274 void drawColorDisp_gpu(const PtrStepSz<float>& src, const PtrStepSzb& dst, int ndisp, const cudaStream_t& stream) 275 { 276 dim3 threads(32, 8, 1); 277 dim3 grid(1, 1, 1); 278 grid.x = divUp(src.cols, threads.x); 279 grid.y = divUp(src.rows, threads.y); 280 281 drawColorDisp<<<grid, threads, 0, stream>>>(src.data, src.step / sizeof(float), dst.data, dst.step, src.cols, src.rows, ndisp); 282 cudaSafeCall( cudaGetLastError() ); 283 284 if (stream == 0) 285 cudaSafeCall( cudaDeviceSynchronize() ); 286 } 287 }}} // namespace cv { namespace cuda { namespace cudev 288 289 290 #endif /* CUDA_DISABLER */ 291