1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 47 namespace cv { namespace cuda { namespace device 48 { 49 namespace optical_flow 50 { 51 #define NEEDLE_MAP_SCALE 16 52 #define NUM_VERTS_PER_ARROW 6 53 NeedleMapAverageKernel(const PtrStepSzf u,const PtrStepf v,PtrStepf u_avg,PtrStepf v_avg)54 __global__ void NeedleMapAverageKernel(const PtrStepSzf u, const PtrStepf v, PtrStepf u_avg, PtrStepf v_avg) 55 { 56 __shared__ float smem[2 * NEEDLE_MAP_SCALE]; 57 58 volatile float* u_col_sum = smem; 59 volatile float* v_col_sum = u_col_sum + NEEDLE_MAP_SCALE; 60 61 const int x = blockIdx.x * NEEDLE_MAP_SCALE + threadIdx.x; 62 const int y = blockIdx.y * NEEDLE_MAP_SCALE; 63 64 u_col_sum[threadIdx.x] = 0; 65 v_col_sum[threadIdx.x] = 0; 66 67 #pragma unroll 68 for(int i = 0; i < NEEDLE_MAP_SCALE; ++i) 69 { 70 u_col_sum[threadIdx.x] += u(::min(y + i, u.rows - 1), x); 71 v_col_sum[threadIdx.x] += v(::min(y + i, u.rows - 1), x); 72 } 73 74 if (threadIdx.x < 8) 75 { 76 // now add the column sums 77 const uint X = threadIdx.x; 78 79 if (X | 0xfe == 0xfe) // bit 0 is 0 80 { 81 u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 1]; 82 v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 1]; 83 } 84 85 if (X | 0xfe == 0xfc) // bits 0 & 1 == 0 86 { 87 u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 2]; 88 v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 2]; 89 } 90 91 if (X | 0xf8 == 0xf8) 92 { 93 u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 4]; 94 v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 4]; 95 } 96 97 if (X == 0) 98 { 99 u_col_sum[threadIdx.x] += u_col_sum[threadIdx.x + 8]; 100 v_col_sum[threadIdx.x] += v_col_sum[threadIdx.x + 8]; 101 } 102 } 103 104 if (threadIdx.x == 0) 105 { 106 const float coeff = 1.0f / (NEEDLE_MAP_SCALE * NEEDLE_MAP_SCALE); 107 108 u_col_sum[0] *= coeff; 109 v_col_sum[0] *= coeff; 110 111 u_avg(blockIdx.y, blockIdx.x) = u_col_sum[0]; 112 v_avg(blockIdx.y, blockIdx.x) = v_col_sum[0]; 113 } 114 } 115 NeedleMapAverage_gpu(PtrStepSzf u,PtrStepSzf v,PtrStepSzf u_avg,PtrStepSzf v_avg)116 void NeedleMapAverage_gpu(PtrStepSzf u, PtrStepSzf v, PtrStepSzf u_avg, PtrStepSzf v_avg) 117 { 118 const dim3 block(NEEDLE_MAP_SCALE); 119 const dim3 grid(u_avg.cols, u_avg.rows); 120 121 NeedleMapAverageKernel<<<grid, block>>>(u, v, u_avg, v_avg); 122 cudaSafeCall( cudaGetLastError() ); 123 124 cudaSafeCall( cudaDeviceSynchronize() ); 125 } 126 NeedleMapVertexKernel(const PtrStepSzf u_avg,const PtrStepf v_avg,float * vertex_data,float * color_data,float max_flow,float xscale,float yscale)127 __global__ void NeedleMapVertexKernel(const PtrStepSzf u_avg, const PtrStepf v_avg, float* vertex_data, float* color_data, float max_flow, float xscale, float yscale) 128 { 129 // test - just draw a triangle at each pixel 130 const int x = blockIdx.x * blockDim.x + threadIdx.x; 131 const int y = blockIdx.y * blockDim.y + threadIdx.y; 132 133 const float arrow_x = x * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f; 134 const float arrow_y = y * NEEDLE_MAP_SCALE + NEEDLE_MAP_SCALE / 2.0f; 135 136 float3 v[NUM_VERTS_PER_ARROW]; 137 138 if (x < u_avg.cols && y < u_avg.rows) 139 { 140 const float u_avg_val = u_avg(y, x); 141 const float v_avg_val = v_avg(y, x); 142 143 const float theta = ::atan2f(v_avg_val, u_avg_val); 144 145 float r = ::sqrtf(v_avg_val * v_avg_val + u_avg_val * u_avg_val); 146 r = fmin(14.0f * (r / max_flow), 14.0f); 147 148 v[0].z = 1.0f; 149 v[1].z = 0.7f; 150 v[2].z = 0.7f; 151 v[3].z = 0.7f; 152 v[4].z = 0.7f; 153 v[5].z = 1.0f; 154 155 v[0].x = arrow_x; 156 v[0].y = arrow_y; 157 v[5].x = arrow_x; 158 v[5].y = arrow_y; 159 160 v[2].x = arrow_x + r * ::cosf(theta); 161 v[2].y = arrow_y + r * ::sinf(theta); 162 v[3].x = v[2].x; 163 v[3].y = v[2].y; 164 165 r = ::fmin(r, 2.5f); 166 167 v[1].x = arrow_x + r * ::cosf(theta - CV_PI_F / 2.0f); 168 v[1].y = arrow_y + r * ::sinf(theta - CV_PI_F / 2.0f); 169 170 v[4].x = arrow_x + r * ::cosf(theta + CV_PI_F / 2.0f); 171 v[4].y = arrow_y + r * ::sinf(theta + CV_PI_F / 2.0f); 172 173 int indx = (y * u_avg.cols + x) * NUM_VERTS_PER_ARROW * 3; 174 175 color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; 176 vertex_data[indx++] = v[0].x * xscale; 177 vertex_data[indx++] = v[0].y * yscale; 178 vertex_data[indx++] = v[0].z; 179 180 color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; 181 vertex_data[indx++] = v[1].x * xscale; 182 vertex_data[indx++] = v[1].y * yscale; 183 vertex_data[indx++] = v[1].z; 184 185 color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; 186 vertex_data[indx++] = v[2].x * xscale; 187 vertex_data[indx++] = v[2].y * yscale; 188 vertex_data[indx++] = v[2].z; 189 190 color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; 191 vertex_data[indx++] = v[3].x * xscale; 192 vertex_data[indx++] = v[3].y * yscale; 193 vertex_data[indx++] = v[3].z; 194 195 color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; 196 vertex_data[indx++] = v[4].x * xscale; 197 vertex_data[indx++] = v[4].y * yscale; 198 vertex_data[indx++] = v[4].z; 199 200 color_data[indx] = (theta - CV_PI_F) / CV_PI_F * 180.0f; 201 vertex_data[indx++] = v[5].x * xscale; 202 vertex_data[indx++] = v[5].y * yscale; 203 vertex_data[indx++] = v[5].z; 204 } 205 } 206 CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg,PtrStepSzf v_avg,float * vertex_buffer,float * color_data,float max_flow,float xscale,float yscale)207 void CreateOpticalFlowNeedleMap_gpu(PtrStepSzf u_avg, PtrStepSzf v_avg, float* vertex_buffer, float* color_data, float max_flow, float xscale, float yscale) 208 { 209 const dim3 block(16); 210 const dim3 grid(divUp(u_avg.cols, block.x), divUp(u_avg.rows, block.y)); 211 212 NeedleMapVertexKernel<<<grid, block>>>(u_avg, v_avg, vertex_buffer, color_data, max_flow, xscale, yscale); 213 cudaSafeCall( cudaGetLastError() ); 214 215 cudaSafeCall( cudaDeviceSynchronize() ); 216 } 217 } 218 }}} 219 220 #endif /* CUDA_DISABLER */ 221