1// OpenCL port of the ORB feature detector and descriptor extractor 2// Copyright (C) 2014, Itseez Inc. See the license at http://opencv.org 3// 4// The original code has been contributed by Peter Andreas Entschev, peter@entschev.com 5 6#define LAYERINFO_SIZE 1 7#define LAYERINFO_OFS 0 8#define KEYPOINT_SIZE 3 9#define ORIENTED_KEYPOINT_SIZE 4 10#define KEYPOINT_X 0 11#define KEYPOINT_Y 1 12#define KEYPOINT_Z 2 13#define KEYPOINT_ANGLE 3 14 15///////////////////////////////////////////////////////////// 16 17#ifdef ORB_RESPONSES 18 19__kernel void 20ORB_HarrisResponses(__global const uchar* imgbuf, int imgstep, int imgoffset0, 21 __global const int* layerinfo, __global const int* keypoints, 22 __global float* responses, int nkeypoints ) 23{ 24 int idx = get_global_id(0); 25 if( idx < nkeypoints ) 26 { 27 __global const int* kpt = keypoints + idx*KEYPOINT_SIZE; 28 __global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE; 29 __global const uchar* img = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] + 30 (kpt[KEYPOINT_Y] - blockSize/2)*imgstep + (kpt[KEYPOINT_X] - blockSize/2); 31 32 int i, j; 33 int a = 0, b = 0, c = 0; 34 for( i = 0; i < blockSize; i++, img += imgstep-blockSize ) 35 { 36 for( j = 0; j < blockSize; j++, img++ ) 37 { 38 int Ix = (img[1] - img[-1])*2 + img[-imgstep+1] - img[-imgstep-1] + img[imgstep+1] - img[imgstep-1]; 39 int Iy = (img[imgstep] - img[-imgstep])*2 + img[imgstep-1] - img[-imgstep-1] + img[imgstep+1] - img[-imgstep+1]; 40 a += Ix*Ix; 41 b += Iy*Iy; 42 c += Ix*Iy; 43 } 44 } 45 responses[idx] = ((float)a * b - (float)c * c - HARRIS_K * (float)(a + b) * (a + b))*scale_sq_sq; 46 } 47} 48 49#endif 50 51///////////////////////////////////////////////////////////// 52 53#ifdef ORB_ANGLES 54 55#define _DBL_EPSILON 2.2204460492503131e-16f 56#define atan2_p1 (0.9997878412794807f*57.29577951308232f) 57#define atan2_p3 (-0.3258083974640975f*57.29577951308232f) 58#define atan2_p5 (0.1555786518463281f*57.29577951308232f) 59#define atan2_p7 (-0.04432655554792128f*57.29577951308232f) 60 61inline float fastAtan2( float y, float x ) 62{ 63 float ax = fabs(x), ay = fabs(y); 64 float a, c, c2; 65 if( ax >= ay ) 66 { 67 c = ay/(ax + _DBL_EPSILON); 68 c2 = c*c; 69 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; 70 } 71 else 72 { 73 c = ax/(ay + _DBL_EPSILON); 74 c2 = c*c; 75 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; 76 } 77 if( x < 0 ) 78 a = 180.f - a; 79 if( y < 0 ) 80 a = 360.f - a; 81 return a; 82} 83 84 85__kernel void 86ORB_ICAngle(__global const uchar* imgbuf, int imgstep, int imgoffset0, 87 __global const int* layerinfo, __global const int* keypoints, 88 __global float* responses, const __global int* u_max, 89 int nkeypoints, int half_k ) 90{ 91 int idx = get_global_id(0); 92 if( idx < nkeypoints ) 93 { 94 __global const int* kpt = keypoints + idx*KEYPOINT_SIZE; 95 96 __global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE; 97 __global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] + 98 kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X]; 99 100 int u, v, m_01 = 0, m_10 = 0; 101 102 // Treat the center line differently, v=0 103 for( u = -half_k; u <= half_k; u++ ) 104 m_10 += u * center[u]; 105 106 // Go line by line in the circular patch 107 for( v = 1; v <= half_k; v++ ) 108 { 109 // Proceed over the two lines 110 int v_sum = 0; 111 int d = u_max[v]; 112 for( u = -d; u <= d; u++ ) 113 { 114 int val_plus = center[u + v*imgstep], val_minus = center[u - v*imgstep]; 115 v_sum += (val_plus - val_minus); 116 m_10 += u * (val_plus + val_minus); 117 } 118 m_01 += v * v_sum; 119 } 120 121 // we do not use OpenCL's atan2 intrinsic, 122 // because we want to get _exactly_ the same results as the CPU version 123 responses[idx] = fastAtan2((float)m_01, (float)m_10); 124 } 125} 126 127#endif 128 129///////////////////////////////////////////////////////////// 130 131#ifdef ORB_DESCRIPTORS 132 133__kernel void 134ORB_computeDescriptor(__global const uchar* imgbuf, int imgstep, int imgoffset0, 135 __global const int* layerinfo, __global const int* keypoints, 136 __global uchar* _desc, const __global int* pattern, 137 int nkeypoints, int dsize ) 138{ 139 int idx = get_global_id(0); 140 if( idx < nkeypoints ) 141 { 142 int i; 143 __global const int* kpt = keypoints + idx*ORIENTED_KEYPOINT_SIZE; 144 145 __global const int* layer = layerinfo + kpt[KEYPOINT_Z]*LAYERINFO_SIZE; 146 __global const uchar* center = imgbuf + imgoffset0 + layer[LAYERINFO_OFS] + 147 kpt[KEYPOINT_Y]*imgstep + kpt[KEYPOINT_X]; 148 float angle = as_float(kpt[KEYPOINT_ANGLE]); 149 angle *= 0.01745329251994329547f; 150 151 float cosa; 152 float sina = sincos(angle, &cosa); 153 154 __global uchar* desc = _desc + idx*dsize; 155 156 #define GET_VALUE(idx) \ 157 center[mad24(convert_int_rte(pattern[(idx)*2] * sina + pattern[(idx)*2+1] * cosa), imgstep, \ 158 convert_int_rte(pattern[(idx)*2] * cosa - pattern[(idx)*2+1] * sina))] 159 160 for( i = 0; i < dsize; i++ ) 161 { 162 int val; 163 #if WTA_K == 2 164 int t0, t1; 165 166 t0 = GET_VALUE(0); t1 = GET_VALUE(1); 167 val = t0 < t1; 168 169 t0 = GET_VALUE(2); t1 = GET_VALUE(3); 170 val |= (t0 < t1) << 1; 171 172 t0 = GET_VALUE(4); t1 = GET_VALUE(5); 173 val |= (t0 < t1) << 2; 174 175 t0 = GET_VALUE(6); t1 = GET_VALUE(7); 176 val |= (t0 < t1) << 3; 177 178 t0 = GET_VALUE(8); t1 = GET_VALUE(9); 179 val |= (t0 < t1) << 4; 180 181 t0 = GET_VALUE(10); t1 = GET_VALUE(11); 182 val |= (t0 < t1) << 5; 183 184 t0 = GET_VALUE(12); t1 = GET_VALUE(13); 185 val |= (t0 < t1) << 6; 186 187 t0 = GET_VALUE(14); t1 = GET_VALUE(15); 188 val |= (t0 < t1) << 7; 189 190 pattern += 16*2; 191 192 #elif WTA_K == 3 193 int t0, t1, t2; 194 195 t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2); 196 val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0); 197 198 t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5); 199 val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2; 200 201 t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8); 202 val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4; 203 204 t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11); 205 val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6; 206 207 pattern += 12*2; 208 209 #elif WTA_K == 4 210 int t0, t1, t2, t3, k; 211 int a, b; 212 213 t0 = GET_VALUE(0); t1 = GET_VALUE(1); 214 t2 = GET_VALUE(2); t3 = GET_VALUE(3); 215 a = 0, b = 2; 216 if( t1 > t0 ) t0 = t1, a = 1; 217 if( t3 > t2 ) t2 = t3, b = 3; 218 k = t0 > t2 ? a : b; 219 val = k; 220 221 t0 = GET_VALUE(4); t1 = GET_VALUE(5); 222 t2 = GET_VALUE(6); t3 = GET_VALUE(7); 223 a = 0, b = 2; 224 if( t1 > t0 ) t0 = t1, a = 1; 225 if( t3 > t2 ) t2 = t3, b = 3; 226 k = t0 > t2 ? a : b; 227 val |= k << 2; 228 229 t0 = GET_VALUE(8); t1 = GET_VALUE(9); 230 t2 = GET_VALUE(10); t3 = GET_VALUE(11); 231 a = 0, b = 2; 232 if( t1 > t0 ) t0 = t1, a = 1; 233 if( t3 > t2 ) t2 = t3, b = 3; 234 k = t0 > t2 ? a : b; 235 val |= k << 4; 236 237 t0 = GET_VALUE(12); t1 = GET_VALUE(13); 238 t2 = GET_VALUE(14); t3 = GET_VALUE(15); 239 a = 0, b = 2; 240 if( t1 > t0 ) t0 = t1, a = 1; 241 if( t3 > t2 ) t2 = t3, b = 3; 242 k = t0 > t2 ? a : b; 243 val |= k << 6; 244 245 pattern += 16*2; 246 #else 247 #error "unknown/undefined WTA_K value; should be 2, 3 or 4" 248 #endif 249 desc[i] = (uchar)val; 250 } 251 } 252} 253 254#endif 255