1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
4
5
6 #include "precomp.hpp"
7 #include "opencl_kernels_core.hpp"
8 #include "stat.hpp"
9
10 /****************************************************************************************\
11 * norm *
12 \****************************************************************************************/
13
14 namespace cv { namespace hal {
15
16 extern const uchar popCountTable[256] =
17 {
18 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
19 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
20 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
21 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
22 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
23 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
24 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
25 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
26 };
27
28 static const uchar popCountTable2[] =
29 {
30 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
31 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
32 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
33 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
34 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
35 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
36 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
37 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
38 };
39
40 static const uchar popCountTable4[] =
41 {
42 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
43 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
44 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
46 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
50 };
51
52
normHamming(const uchar * a,int n,int cellSize)53 int normHamming(const uchar* a, int n, int cellSize)
54 {
55 if( cellSize == 1 )
56 return normHamming(a, n);
57 const uchar* tab = 0;
58 if( cellSize == 2 )
59 tab = popCountTable2;
60 else if( cellSize == 4 )
61 tab = popCountTable4;
62 else
63 return -1;
64 int i = 0;
65 int result = 0;
66 #if CV_SIMD
67 v_uint64 t = vx_setzero_u64();
68 if ( cellSize == 2)
69 {
70 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
71 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
72 {
73 v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
74 t += v_popcount(v_reinterpret_as_u64((a0 | (a0 >> 1)) & mask));
75 }
76 }
77 else // cellSize == 4
78 {
79 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
80 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
81 {
82 v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
83 v_uint16 a1 = a0 | (a0 >> 2);
84 t += v_popcount(v_reinterpret_as_u64((a1 | (a1 >> 1)) & mask));
85
86 }
87 }
88 result += (int)v_reduce_sum(t);
89 vx_cleanup();
90 #elif CV_ENABLE_UNROLLED
91 for( ; i <= n - 4; i += 4 )
92 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
93 #endif
94 for( ; i < n; i++ )
95 result += tab[a[i]];
96 return result;
97 }
98
normHamming(const uchar * a,const uchar * b,int n,int cellSize)99 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
100 {
101 if( cellSize == 1 )
102 return normHamming(a, b, n);
103 const uchar* tab = 0;
104 if( cellSize == 2 )
105 tab = popCountTable2;
106 else if( cellSize == 4 )
107 tab = popCountTable4;
108 else
109 return -1;
110 int i = 0;
111 int result = 0;
112 #if CV_SIMD
113 v_uint64 t = vx_setzero_u64();
114 if ( cellSize == 2)
115 {
116 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
117 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
118 {
119 v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
120 t += v_popcount(v_reinterpret_as_u64((ab0 | (ab0 >> 1)) & mask));
121 }
122 }
123 else // cellSize == 4
124 {
125 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
126 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
127 {
128 v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
129 v_uint16 ab1 = ab0 | (ab0 >> 2);
130 t += v_popcount(v_reinterpret_as_u64((ab1 | (ab1 >> 1)) & mask));
131 }
132 }
133 result += (int)v_reduce_sum(t);
134 vx_cleanup();
135 #elif CV_ENABLE_UNROLLED
136 for( ; i <= n - 4; i += 4 )
137 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
138 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
139 #endif
140 for( ; i < n; i++ )
141 result += tab[a[i] ^ b[i]];
142 return result;
143 }
144
normL2Sqr_(const float * a,const float * b,int n)145 float normL2Sqr_(const float* a, const float* b, int n)
146 {
147 int j = 0; float d = 0.f;
148 #if CV_SIMD
149 v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
150 v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
151 for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
152 {
153 v_float32 t0 = vx_load(a + j) - vx_load(b + j);
154 v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes);
155 v_d0 = v_muladd(t0, t0, v_d0);
156 v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes);
157 v_d1 = v_muladd(t1, t1, v_d1);
158 v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes);
159 v_d2 = v_muladd(t2, t2, v_d2);
160 v_d3 = v_muladd(t3, t3, v_d3);
161 }
162 d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
163 #endif
164 for( ; j < n; j++ )
165 {
166 float t = a[j] - b[j];
167 d += t*t;
168 }
169 return d;
170 }
171
172
normL1_(const float * a,const float * b,int n)173 float normL1_(const float* a, const float* b, int n)
174 {
175 int j = 0; float d = 0.f;
176 #if CV_SIMD
177 v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
178 v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
179 for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
180 {
181 v_d0 += v_absdiff(vx_load(a + j), vx_load(b + j));
182 v_d1 += v_absdiff(vx_load(a + j + v_float32::nlanes), vx_load(b + j + v_float32::nlanes));
183 v_d2 += v_absdiff(vx_load(a + j + 2 * v_float32::nlanes), vx_load(b + j + 2 * v_float32::nlanes));
184 v_d3 += v_absdiff(vx_load(a + j + 3 * v_float32::nlanes), vx_load(b + j + 3 * v_float32::nlanes));
185 }
186 d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
187 #endif
188 for( ; j < n; j++ )
189 d += std::abs(a[j] - b[j]);
190 return d;
191 }
192
normL1_(const uchar * a,const uchar * b,int n)193 int normL1_(const uchar* a, const uchar* b, int n)
194 {
195 int j = 0, d = 0;
196 #if CV_SIMD
197 for (; j <= n - 4 * v_uint8::nlanes; j += 4 * v_uint8::nlanes)
198 d += v_reduce_sad(vx_load(a + j), vx_load(b + j)) +
199 v_reduce_sad(vx_load(a + j + v_uint8::nlanes), vx_load(b + j + v_uint8::nlanes)) +
200 v_reduce_sad(vx_load(a + j + 2 * v_uint8::nlanes), vx_load(b + j + 2 * v_uint8::nlanes)) +
201 v_reduce_sad(vx_load(a + j + 3 * v_uint8::nlanes), vx_load(b + j + 3 * v_uint8::nlanes));
202 #endif
203 for( ; j < n; j++ )
204 d += std::abs(a[j] - b[j]);
205 return d;
206 }
207
208 } //cv::hal
209
210 //==================================================================================================
211
212 template<typename T, typename ST> int
normInf_(const T * src,const uchar * mask,ST * _result,int len,int cn)213 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
214 {
215 ST result = *_result;
216 if( !mask )
217 {
218 result = std::max(result, normInf<T, ST>(src, len*cn));
219 }
220 else
221 {
222 for( int i = 0; i < len; i++, src += cn )
223 if( mask[i] )
224 {
225 for( int k = 0; k < cn; k++ )
226 result = std::max(result, ST(cv_abs(src[k])));
227 }
228 }
229 *_result = result;
230 return 0;
231 }
232
233 template<typename T, typename ST> int
normL1_(const T * src,const uchar * mask,ST * _result,int len,int cn)234 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
235 {
236 ST result = *_result;
237 if( !mask )
238 {
239 result += normL1<T, ST>(src, len*cn);
240 }
241 else
242 {
243 for( int i = 0; i < len; i++, src += cn )
244 if( mask[i] )
245 {
246 for( int k = 0; k < cn; k++ )
247 result += cv_abs(src[k]);
248 }
249 }
250 *_result = result;
251 return 0;
252 }
253
254 template<typename T, typename ST> int
normL2_(const T * src,const uchar * mask,ST * _result,int len,int cn)255 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
256 {
257 ST result = *_result;
258 if( !mask )
259 {
260 result += normL2Sqr<T, ST>(src, len*cn);
261 }
262 else
263 {
264 for( int i = 0; i < len; i++, src += cn )
265 if( mask[i] )
266 {
267 for( int k = 0; k < cn; k++ )
268 {
269 T v = src[k];
270 result += (ST)v*v;
271 }
272 }
273 }
274 *_result = result;
275 return 0;
276 }
277
278 template<typename T, typename ST> int
normDiffInf_(const T * src1,const T * src2,const uchar * mask,ST * _result,int len,int cn)279 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
280 {
281 ST result = *_result;
282 if( !mask )
283 {
284 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
285 }
286 else
287 {
288 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
289 if( mask[i] )
290 {
291 for( int k = 0; k < cn; k++ )
292 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
293 }
294 }
295 *_result = result;
296 return 0;
297 }
298
299 template<typename T, typename ST> int
normDiffL1_(const T * src1,const T * src2,const uchar * mask,ST * _result,int len,int cn)300 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
301 {
302 ST result = *_result;
303 if( !mask )
304 {
305 result += normL1<T, ST>(src1, src2, len*cn);
306 }
307 else
308 {
309 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
310 if( mask[i] )
311 {
312 for( int k = 0; k < cn; k++ )
313 result += std::abs(src1[k] - src2[k]);
314 }
315 }
316 *_result = result;
317 return 0;
318 }
319
320 template<typename T, typename ST> int
normDiffL2_(const T * src1,const T * src2,const uchar * mask,ST * _result,int len,int cn)321 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
322 {
323 ST result = *_result;
324 if( !mask )
325 {
326 result += normL2Sqr<T, ST>(src1, src2, len*cn);
327 }
328 else
329 {
330 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
331 if( mask[i] )
332 {
333 for( int k = 0; k < cn; k++ )
334 {
335 ST v = src1[k] - src2[k];
336 result += v*v;
337 }
338 }
339 }
340 *_result = result;
341 return 0;
342 }
343
344 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
345 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
346 { return norm##L##_(src, mask, r, len, cn); } \
347 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
348 const uchar* mask, ntype* r, int len, int cn) \
349 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
350
351 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
352 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
353 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
354 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
355
356 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
357 CV_DEF_NORM_ALL(8s, schar, int, int, int)
358 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
359 CV_DEF_NORM_ALL(16s, short, int, int, double)
360 CV_DEF_NORM_ALL(32s, int, int, double, double)
361 CV_DEF_NORM_ALL(32f, float, float, double, double)
362 CV_DEF_NORM_ALL(64f, double, double, double, double)
363
364
365 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
366 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
367
getNormFunc(int normType,int depth)368 static NormFunc getNormFunc(int normType, int depth)
369 {
370 static NormFunc normTab[3][8] =
371 {
372 {
373 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
374 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
375 },
376 {
377 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
378 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
379 },
380 {
381 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
382 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
383 }
384 };
385
386 return normTab[normType][depth];
387 }
388
getNormDiffFunc(int normType,int depth)389 static NormDiffFunc getNormDiffFunc(int normType, int depth)
390 {
391 static NormDiffFunc normDiffTab[3][8] =
392 {
393 {
394 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
395 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
396 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
397 (NormDiffFunc)normDiffInf_64f, 0
398 },
399 {
400 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
401 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
402 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
403 (NormDiffFunc)normDiffL1_64f, 0
404 },
405 {
406 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
407 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
408 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
409 (NormDiffFunc)normDiffL2_64f, 0
410 }
411 };
412
413 return normDiffTab[normType][depth];
414 }
415
416 #ifdef HAVE_OPENCL
417
ocl_norm(InputArray _src,int normType,InputArray _mask,double & result)418 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
419 {
420 const ocl::Device & d = ocl::Device::getDefault();
421
422 #ifdef __ANDROID__
423 if (d.isNVidia())
424 return false;
425 #endif
426 const int cn = _src.channels();
427 if (cn > 4)
428 return false;
429 int type = _src.type(), depth = CV_MAT_DEPTH(type);
430 bool doubleSupport = d.doubleFPConfig() > 0,
431 haveMask = _mask.kind() != _InputArray::NONE;
432
433 if (depth >= CV_16F)
434 return false; // TODO: support FP16
435
436 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
437 (!doubleSupport && depth == CV_64F))
438 return false;
439
440 UMat src = _src.getUMat();
441
442 if (normType == NORM_INF)
443 {
444 if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
445 std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
446 return false;
447 }
448 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
449 {
450 Scalar sc;
451 bool unstype = depth == CV_8U || depth == CV_16U;
452
453 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
454 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
455 return false;
456
457 double s = 0.0;
458 for (int i = 0; i < (haveMask ? cn : 1); ++i)
459 s += sc[i];
460
461 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
462 }
463
464 return true;
465 }
466
467 #endif
468
469 #ifdef HAVE_IPP
ipp_norm(Mat & src,int normType,Mat & mask,double & result)470 static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
471 {
472 CV_INSTRUMENT_REGION_IPP();
473
474 #if IPP_VERSION_X100 >= 700
475 size_t total_size = src.total();
476 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
477
478 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
479 && cols > 0 && (size_t)rows*cols == total_size )
480 {
481 if( !mask.empty() )
482 {
483 IppiSize sz = { cols, rows };
484 int type = src.type();
485
486 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
487 ippiMaskNormFuncC1 ippiNorm_C1MR =
488 normType == NORM_INF ?
489 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
490 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
491 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
492 0) :
493 normType == NORM_L1 ?
494 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
495 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
496 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
497 0) :
498 normType == NORM_L2 || normType == NORM_L2SQR ?
499 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
500 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
501 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
502 0) : 0;
503 if( ippiNorm_C1MR )
504 {
505 Ipp64f norm;
506 if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
507 {
508 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
509 return true;
510 }
511 }
512 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
513 ippiMaskNormFuncC3 ippiNorm_C3CMR =
514 normType == NORM_INF ?
515 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
516 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
517 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
518 0) :
519 normType == NORM_L1 ?
520 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
521 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
522 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
523 0) :
524 normType == NORM_L2 || normType == NORM_L2SQR ?
525 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
526 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
527 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
528 0) : 0;
529 if( ippiNorm_C3CMR )
530 {
531 Ipp64f norm1, norm2, norm3;
532 if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
533 CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
534 CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
535 {
536 Ipp64f norm =
537 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
538 normType == NORM_L1 ? norm1 + norm2 + norm3 :
539 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
540 0;
541 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
542 return true;
543 }
544 }
545 }
546 else
547 {
548 IppiSize sz = { cols*src.channels(), rows };
549 int type = src.depth();
550
551 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
552 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
553 ippiNormFuncHint ippiNormHint =
554 normType == NORM_L1 ?
555 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
556 0) :
557 normType == NORM_L2 || normType == NORM_L2SQR ?
558 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
559 0) : 0;
560 ippiNormFuncNoHint ippiNorm =
561 normType == NORM_INF ?
562 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
563 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
564 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
565 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
566 0) :
567 normType == NORM_L1 ?
568 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
569 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
570 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
571 0) :
572 normType == NORM_L2 || normType == NORM_L2SQR ?
573 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
574 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
575 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
576 0) : 0;
577 if( ippiNormHint || ippiNorm )
578 {
579 Ipp64f norm;
580 IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, &norm, ippAlgHintAccurate) :
581 CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, &norm);
582 if( ret >= 0 )
583 {
584 result = (normType == NORM_L2SQR) ? norm * norm : norm;
585 return true;
586 }
587 }
588 }
589 }
590 #else
591 CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result);
592 #endif
593 return false;
594 } // ipp_norm()
595 #endif // HAVE_IPP
596
norm(InputArray _src,int normType,InputArray _mask)597 double norm( InputArray _src, int normType, InputArray _mask )
598 {
599 CV_INSTRUMENT_REGION();
600
601 normType &= NORM_TYPE_MASK;
602 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
603 normType == NORM_L2 || normType == NORM_L2SQR ||
604 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
605
606 #if defined HAVE_OPENCL || defined HAVE_IPP
607 double _result = 0;
608 #endif
609
610 #ifdef HAVE_OPENCL
611 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
612 ocl_norm(_src, normType, _mask, _result),
613 _result)
614 #endif
615
616 Mat src = _src.getMat(), mask = _mask.getMat();
617 CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(src, normType, mask, _result), _result);
618
619 int depth = src.depth(), cn = src.channels();
620 if( src.isContinuous() && mask.empty() )
621 {
622 size_t len = src.total()*cn;
623 if( len == (size_t)(int)len )
624 {
625 if( depth == CV_32F )
626 {
627 const float* data = src.ptr<float>();
628
629 if( normType == NORM_L2 )
630 {
631 double result = 0;
632 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
633 return std::sqrt(result);
634 }
635 if( normType == NORM_L2SQR )
636 {
637 double result = 0;
638 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
639 return result;
640 }
641 if( normType == NORM_L1 )
642 {
643 double result = 0;
644 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
645 return result;
646 }
647 if( normType == NORM_INF )
648 {
649 float result = 0;
650 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
651 return result;
652 }
653 }
654 if( depth == CV_8U )
655 {
656 const uchar* data = src.ptr<uchar>();
657
658 if( normType == NORM_HAMMING )
659 {
660 return hal::normHamming(data, (int)len);
661 }
662
663 if( normType == NORM_HAMMING2 )
664 {
665 return hal::normHamming(data, (int)len, 2);
666 }
667 }
668 }
669 }
670
671 CV_Assert( mask.empty() || mask.type() == CV_8U );
672
673 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
674 {
675 if( !mask.empty() )
676 {
677 Mat temp;
678 bitwise_and(src, mask, temp);
679 return norm(temp, normType);
680 }
681 int cellSize = normType == NORM_HAMMING ? 1 : 2;
682
683 const Mat* arrays[] = {&src, 0};
684 uchar* ptrs[1] = {};
685 NAryMatIterator it(arrays, ptrs);
686 int total = (int)it.size;
687 int result = 0;
688
689 for( size_t i = 0; i < it.nplanes; i++, ++it )
690 {
691 result += hal::normHamming(ptrs[0], total, cellSize);
692 }
693
694 return result;
695 }
696
697 NormFunc func = getNormFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
698 CV_Assert( func != 0 );
699
700 const Mat* arrays[] = {&src, &mask, 0};
701 uchar* ptrs[2] = {};
702 union
703 {
704 double d;
705 int i;
706 float f;
707 }
708 result;
709 result.d = 0;
710 NAryMatIterator it(arrays, ptrs);
711 CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
712
713 if ((normType == NORM_L1 && depth <= CV_16S) ||
714 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
715 {
716 // special case to handle "integer" overflow in accumulator
717 const size_t esz = src.elemSize();
718 const int total = (int)it.size;
719 const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
720 const int blockSize = std::min(total, intSumBlockSize);
721 int isum = 0;
722 int count = 0;
723
724 for (size_t i = 0; i < it.nplanes; i++, ++it)
725 {
726 for (int j = 0; j < total; j += blockSize)
727 {
728 int bsz = std::min(total - j, blockSize);
729 func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
730 count += bsz;
731 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
732 {
733 result.d += isum;
734 isum = 0;
735 count = 0;
736 }
737 ptrs[0] += bsz*esz;
738 if (ptrs[1])
739 ptrs[1] += bsz;
740 }
741 }
742 }
743 else if (depth == CV_16F)
744 {
745 const size_t esz = src.elemSize();
746 const int total = (int)it.size;
747 const int blockSize = std::min(total, divUp(1024, cn));
748 AutoBuffer<float, 1026/*divUp(1024,3)*3*/> fltbuf(blockSize * cn);
749 float* data0 = fltbuf.data();
750 for (size_t i = 0; i < it.nplanes; i++, ++it)
751 {
752 for (int j = 0; j < total; j += blockSize)
753 {
754 int bsz = std::min(total - j, blockSize);
755 hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
756 func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn);
757 ptrs[0] += bsz*esz;
758 if (ptrs[1])
759 ptrs[1] += bsz;
760 }
761 }
762 }
763 else
764 {
765 // generic implementation
766 for (size_t i = 0; i < it.nplanes; i++, ++it)
767 {
768 func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
769 }
770 }
771
772 if( normType == NORM_INF )
773 {
774 if(depth == CV_64F || depth == CV_16F)
775 return result.d;
776 else if (depth == CV_32F)
777 return result.f;
778 else
779 return result.i;
780 }
781 else if( normType == NORM_L2 )
782 return std::sqrt(result.d);
783
784 return result.d;
785 }
786
787 //==================================================================================================
788
789 #ifdef HAVE_OPENCL
ocl_norm(InputArray _src1,InputArray _src2,int normType,InputArray _mask,double & result)790 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
791 {
792 #ifdef __ANDROID__
793 if (ocl::Device::getDefault().isNVidia())
794 return false;
795 #endif
796
797 Scalar sc1, sc2;
798 int cn = _src1.channels();
799 if (cn > 4)
800 return false;
801 int type = _src1.type(), depth = CV_MAT_DEPTH(type);
802 bool relative = (normType & NORM_RELATIVE) != 0;
803 normType &= ~NORM_RELATIVE;
804 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
805
806 #ifdef __APPLE__
807 if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
808 return false;
809 #endif
810
811 if (normsum)
812 {
813 if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
814 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
815 return false;
816 }
817 else
818 {
819 if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
820 false, _src2, relative ? &sc2[0] : NULL))
821 return false;
822 cn = 1;
823 }
824
825 double s2 = 0;
826 for (int i = 0; i < cn; ++i)
827 {
828 result += sc1[i];
829 if (relative)
830 s2 += sc2[i];
831 }
832
833 if (normType == NORM_L2)
834 {
835 result = std::sqrt(result);
836 if (relative)
837 s2 = std::sqrt(s2);
838 }
839
840 if (relative)
841 result /= (s2 + DBL_EPSILON);
842
843 return true;
844 } // ocl_norm()
845 #endif // HAVE_OPENCL
846
847 #ifdef HAVE_IPP
ipp_norm(InputArray _src1,InputArray _src2,int normType,InputArray _mask,double & result)848 static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result)
849 {
850 CV_INSTRUMENT_REGION_IPP();
851
852 #if IPP_VERSION_X100 >= 700
853 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
854
855 if( normType & CV_RELATIVE )
856 {
857 normType &= NORM_TYPE_MASK;
858
859 size_t total_size = src1.total();
860 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
861 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
862 && cols > 0 && (size_t)rows*cols == total_size )
863 {
864 if( !mask.empty() )
865 {
866 IppiSize sz = { cols, rows };
867 int type = src1.type();
868
869 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
870 ippiMaskNormDiffFuncC1 ippiNormRel_C1MR =
871 normType == NORM_INF ?
872 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_8u_C1MR :
873 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_16u_C1MR :
874 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_32f_C1MR :
875 0) :
876 normType == NORM_L1 ?
877 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_8u_C1MR :
878 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_16u_C1MR :
879 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_32f_C1MR :
880 0) :
881 normType == NORM_L2 || normType == NORM_L2SQR ?
882 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_8u_C1MR :
883 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_16u_C1MR :
884 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_32f_C1MR :
885 0) : 0;
886 if( ippiNormRel_C1MR )
887 {
888 Ipp64f norm;
889 if( CV_INSTRUMENT_FUN_IPP(ippiNormRel_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
890 {
891 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
892 return true;
893 }
894 }
895 }
896 else
897 {
898 IppiSize sz = { cols*src1.channels(), rows };
899 int type = src1.depth();
900
901 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
902 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
903 ippiNormRelFuncHint ippiNormRelHint =
904 normType == NORM_L1 ?
905 (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
906 0) :
907 normType == NORM_L2 || normType == NORM_L2SQR ?
908 (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
909 0) : 0;
910 ippiNormRelFuncNoHint ippiNormRel =
911 normType == NORM_INF ?
912 (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
913 type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
914 type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
915 type == CV_32F ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
916 0) :
917 normType == NORM_L1 ?
918 (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
919 type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
920 type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
921 0) :
922 normType == NORM_L2 || normType == NORM_L2SQR ?
923 (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
924 type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
925 type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
926 0) : 0;
927 if( ippiNormRelHint || ippiNormRel )
928 {
929 Ipp64f norm;
930 IppStatus ret = ippiNormRelHint ? CV_INSTRUMENT_FUN_IPP(ippiNormRelHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
931 CV_INSTRUMENT_FUN_IPP(ippiNormRel, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
932 if( ret >= 0 )
933 {
934 result = (normType == NORM_L2SQR) ? norm * norm : norm;
935 return true;
936 }
937 }
938 }
939 }
940 return false;
941 }
942
943 normType &= NORM_TYPE_MASK;
944
945 size_t total_size = src1.total();
946 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
947 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
948 && cols > 0 && (size_t)rows*cols == total_size )
949 {
950 if( !mask.empty() )
951 {
952 IppiSize sz = { cols, rows };
953 int type = src1.type();
954
955 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
956 ippiMaskNormDiffFuncC1 ippiNormDiff_C1MR =
957 normType == NORM_INF ?
958 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
959 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
960 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
961 0) :
962 normType == NORM_L1 ?
963 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
964 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
965 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
966 0) :
967 normType == NORM_L2 || normType == NORM_L2SQR ?
968 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
969 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
970 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
971 0) : 0;
972 if( ippiNormDiff_C1MR )
973 {
974 Ipp64f norm;
975 if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
976 {
977 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
978 return true;
979 }
980 }
981 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
982 ippiMaskNormDiffFuncC3 ippiNormDiff_C3CMR =
983 normType == NORM_INF ?
984 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
985 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
986 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
987 0) :
988 normType == NORM_L1 ?
989 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
990 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
991 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
992 0) :
993 normType == NORM_L2 || normType == NORM_L2SQR ?
994 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
995 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
996 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
997 0) : 0;
998 if (cv::ipp::getIppTopFeatures() & (
999 #if IPP_VERSION_X100 >= 201700
1000 ippCPUID_AVX512F |
1001 #endif
1002 ippCPUID_AVX2)
1003 ) // IPP_DISABLE_NORM_16UC3_mask_small (#11399)
1004 {
1005 if (normType == NORM_L1 && type == CV_16UC3 && sz.width < 16)
1006 return false;
1007 }
1008 if( ippiNormDiff_C3CMR )
1009 {
1010 Ipp64f norm1, norm2, norm3;
1011 if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
1012 CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
1013 CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
1014 {
1015 Ipp64f norm =
1016 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
1017 normType == NORM_L1 ? norm1 + norm2 + norm3 :
1018 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
1019 0;
1020 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
1021 return true;
1022 }
1023 }
1024 }
1025 else
1026 {
1027 IppiSize sz = { cols*src1.channels(), rows };
1028 int type = src1.depth();
1029
1030 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
1031 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
1032 ippiNormDiffFuncHint ippiNormDiffHint =
1033 normType == NORM_L1 ?
1034 (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
1035 0) :
1036 normType == NORM_L2 || normType == NORM_L2SQR ?
1037 (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
1038 0) : 0;
1039 ippiNormDiffFuncNoHint ippiNormDiff =
1040 normType == NORM_INF ?
1041 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
1042 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
1043 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
1044 type == CV_32F ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
1045 0) :
1046 normType == NORM_L1 ?
1047 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
1048 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
1049 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
1050 0) :
1051 normType == NORM_L2 || normType == NORM_L2SQR ?
1052 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
1053 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
1054 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
1055 0) : 0;
1056 if( ippiNormDiffHint || ippiNormDiff )
1057 {
1058 Ipp64f norm;
1059 IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
1060 CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
1061 if( ret >= 0 )
1062 {
1063 result = (normType == NORM_L2SQR) ? norm * norm : norm;
1064 return true;
1065 }
1066 }
1067 }
1068 }
1069 #else
1070 CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result);
1071 #endif
1072 return false;
1073 } // ipp_norm
1074 #endif // HAVE_IPP
1075
1076
norm(InputArray _src1,InputArray _src2,int normType,InputArray _mask)1077 double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
1078 {
1079 CV_INSTRUMENT_REGION();
1080
1081 CV_CheckTypeEQ(_src1.type(), _src2.type(), "Input type mismatch");
1082 CV_Assert(_src1.sameSize(_src2));
1083
1084 #if defined HAVE_OPENCL || defined HAVE_IPP
1085 double _result = 0;
1086 #endif
1087
1088 #ifdef HAVE_OPENCL
1089 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
1090 ocl_norm(_src1, _src2, normType, _mask, _result),
1091 _result)
1092 #endif
1093
1094 CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(_src1, _src2, normType, _mask, _result), _result);
1095
1096 if( normType & CV_RELATIVE )
1097 {
1098 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
1099 }
1100
1101 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1102 int depth = src1.depth(), cn = src1.channels();
1103
1104 normType &= 7;
1105 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
1106 normType == NORM_L2 || normType == NORM_L2SQR ||
1107 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
1108
1109 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
1110 {
1111 size_t len = src1.total()*src1.channels();
1112 if( len == (size_t)(int)len )
1113 {
1114 if( src1.depth() == CV_32F )
1115 {
1116 const float* data1 = src1.ptr<float>();
1117 const float* data2 = src2.ptr<float>();
1118
1119 if( normType == NORM_L2 )
1120 {
1121 double result = 0;
1122 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1123 return std::sqrt(result);
1124 }
1125 if( normType == NORM_L2SQR )
1126 {
1127 double result = 0;
1128 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1129 return result;
1130 }
1131 if( normType == NORM_L1 )
1132 {
1133 double result = 0;
1134 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1135 return result;
1136 }
1137 if( normType == NORM_INF )
1138 {
1139 float result = 0;
1140 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
1141 return result;
1142 }
1143 }
1144 }
1145 }
1146
1147 CV_Assert( mask.empty() || mask.type() == CV_8U );
1148
1149 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
1150 {
1151 if( !mask.empty() )
1152 {
1153 Mat temp;
1154 bitwise_xor(src1, src2, temp);
1155 bitwise_and(temp, mask, temp);
1156 return norm(temp, normType);
1157 }
1158 int cellSize = normType == NORM_HAMMING ? 1 : 2;
1159
1160 const Mat* arrays[] = {&src1, &src2, 0};
1161 uchar* ptrs[2] = {};
1162 NAryMatIterator it(arrays, ptrs);
1163 int total = (int)it.size;
1164 int result = 0;
1165
1166 for( size_t i = 0; i < it.nplanes; i++, ++it )
1167 {
1168 result += hal::normHamming(ptrs[0], ptrs[1], total, cellSize);
1169 }
1170
1171 return result;
1172 }
1173
1174 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
1175 CV_Assert( func != 0 );
1176
1177 const Mat* arrays[] = {&src1, &src2, &mask, 0};
1178 uchar* ptrs[3] = {};
1179 union
1180 {
1181 double d;
1182 float f;
1183 int i;
1184 unsigned u;
1185 }
1186 result;
1187 result.d = 0;
1188 NAryMatIterator it(arrays, ptrs);
1189 CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
1190
1191 if ((normType == NORM_L1 && depth <= CV_16S) ||
1192 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
1193 {
1194 // special case to handle "integer" overflow in accumulator
1195 const size_t esz = src1.elemSize();
1196 const int total = (int)it.size;
1197 const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
1198 const int blockSize = std::min(total, intSumBlockSize);
1199 int isum = 0;
1200 int count = 0;
1201
1202 for (size_t i = 0; i < it.nplanes; i++, ++it)
1203 {
1204 for (int j = 0; j < total; j += blockSize)
1205 {
1206 int bsz = std::min(total - j, blockSize);
1207 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
1208 count += bsz;
1209 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
1210 {
1211 result.d += isum;
1212 isum = 0;
1213 count = 0;
1214 }
1215 ptrs[0] += bsz*esz;
1216 ptrs[1] += bsz*esz;
1217 if (ptrs[2])
1218 ptrs[2] += bsz;
1219 }
1220 }
1221 }
1222 else if (depth == CV_16F)
1223 {
1224 const size_t esz = src1.elemSize();
1225 const int total = (int)it.size;
1226 const int blockSize = std::min(total, divUp(512, cn));
1227 AutoBuffer<float, 1026/*divUp(512,3)*3*2*/> fltbuf(blockSize * cn * 2);
1228 float* data0 = fltbuf.data();
1229 float* data1 = fltbuf.data() + blockSize * cn;
1230 for (size_t i = 0; i < it.nplanes; i++, ++it)
1231 {
1232 for (int j = 0; j < total; j += blockSize)
1233 {
1234 int bsz = std::min(total - j, blockSize);
1235 hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
1236 hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
1237 func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn);
1238 ptrs[0] += bsz*esz;
1239 ptrs[1] += bsz*esz;
1240 if (ptrs[2])
1241 ptrs[2] += bsz;
1242 }
1243 }
1244 }
1245 else
1246 {
1247 // generic implementation
1248 for (size_t i = 0; i < it.nplanes; i++, ++it)
1249 {
1250 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
1251 }
1252 }
1253
1254 if( normType == NORM_INF )
1255 {
1256 if (depth == CV_64F || depth == CV_16F)
1257 return result.d;
1258 else if (depth == CV_32F)
1259 return result.f;
1260 else
1261 return result.u;
1262 }
1263 else if( normType == NORM_L2 )
1264 return std::sqrt(result.d);
1265
1266 return result.d;
1267 }
1268
operator ()(const unsigned char * a,const unsigned char * b,int size) const1269 cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
1270 {
1271 return cv::hal::normHamming(a, b, size);
1272 }
1273
PSNR(InputArray _src1,InputArray _src2,double R)1274 double PSNR(InputArray _src1, InputArray _src2, double R)
1275 {
1276 CV_INSTRUMENT_REGION();
1277
1278 //Input arrays must have depth CV_8U
1279 CV_Assert( _src1.type() == _src2.type() );
1280
1281 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
1282 return 20*log10(R/(diff+DBL_EPSILON));
1283 }
1284
1285
1286 #ifdef HAVE_OPENCL
ocl_normalize(InputArray _src,InputOutputArray _dst,InputArray _mask,int dtype,double scale,double delta)1287 static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
1288 double scale, double delta )
1289 {
1290 UMat src = _src.getUMat();
1291
1292 if( _mask.empty() )
1293 src.convertTo( _dst, dtype, scale, delta );
1294 else if (src.channels() <= 4)
1295 {
1296 const ocl::Device & dev = ocl::Device::getDefault();
1297
1298 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
1299 ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)),
1300 rowsPerWI = dev.isIntel() ? 4 : 1;
1301
1302 float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
1303 bool haveScale = std::fabs(scale - 1) > DBL_EPSILON,
1304 haveZeroScale = !(std::fabs(scale) > DBL_EPSILON),
1305 haveDelta = std::fabs(delta) > DBL_EPSILON,
1306 doubleSupport = dev.doubleFPConfig() > 0;
1307
1308 if (!haveScale && !haveDelta && stype == dtype)
1309 {
1310 _src.copyTo(_dst, _mask);
1311 return true;
1312 }
1313 if (haveZeroScale)
1314 {
1315 _dst.setTo(Scalar(delta), _mask);
1316 return true;
1317 }
1318
1319 if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport)
1320 return false;
1321
1322 char cvt[2][40];
1323 String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
1324 " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
1325 ocl::typeToStr(stype), ocl::typeToStr(dtype),
1326 ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn,
1327 rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
1328 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
1329 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
1330 haveScale ? " -D HAVE_SCALE" : "",
1331 haveDelta ? " -D HAVE_DELTA" : "",
1332 ocl::typeToStr(sdepth), ocl::typeToStr(ddepth));
1333
1334 ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
1335 if (k.empty())
1336 return false;
1337
1338 UMat mask = _mask.getUMat(), dst = _dst.getUMat();
1339
1340 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
1341 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
1342 dstarg = ocl::KernelArg::ReadWrite(dst);
1343
1344 if (haveScale)
1345 {
1346 if (haveDelta)
1347 k.args(srcarg, maskarg, dstarg, fscale, fdelta);
1348 else
1349 k.args(srcarg, maskarg, dstarg, fscale);
1350 }
1351 else
1352 {
1353 if (haveDelta)
1354 k.args(srcarg, maskarg, dstarg, fdelta);
1355 else
1356 k.args(srcarg, maskarg, dstarg);
1357 }
1358
1359 size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
1360 return k.run(2, globalsize, NULL, false);
1361 }
1362 else
1363 {
1364 UMat temp;
1365 src.convertTo( temp, dtype, scale, delta );
1366 temp.copyTo( _dst, _mask );
1367 }
1368
1369 return true;
1370 } // ocl_normalize
1371 #endif // HAVE_OPENCL
1372
normalize(InputArray _src,InputOutputArray _dst,double a,double b,int norm_type,int rtype,InputArray _mask)1373 void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
1374 int norm_type, int rtype, InputArray _mask)
1375 {
1376 CV_INSTRUMENT_REGION();
1377
1378 double scale = 1, shift = 0;
1379 int type = _src.type(), depth = CV_MAT_DEPTH(type);
1380
1381 if( rtype < 0 )
1382 rtype = _dst.fixedType() ? _dst.depth() : depth;
1383
1384 if( norm_type == CV_MINMAX )
1385 {
1386 double smin = 0, smax = 0;
1387 double dmin = MIN( a, b ), dmax = MAX( a, b );
1388 minMaxIdx( _src, &smin, &smax, 0, 0, _mask );
1389 scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
1390 if( rtype == CV_32F )
1391 {
1392 scale = (float)scale;
1393 shift = (float)dmin - (float)(smin*scale);
1394 }
1395 else
1396 shift = dmin - smin*scale;
1397 }
1398 else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
1399 {
1400 scale = norm( _src, norm_type, _mask );
1401 scale = scale > DBL_EPSILON ? a/scale : 0.;
1402 shift = 0;
1403 }
1404 else
1405 CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
1406
1407 CV_OCL_RUN(_dst.isUMat(),
1408 ocl_normalize(_src, _dst, _mask, rtype, scale, shift))
1409
1410 Mat src = _src.getMat();
1411 if( _mask.empty() )
1412 src.convertTo( _dst, rtype, scale, shift );
1413 else
1414 {
1415 Mat temp;
1416 src.convertTo( temp, rtype, scale, shift );
1417 temp.copyTo( _dst, _mask );
1418 }
1419 }
1420
1421 } // namespace
1422