1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
17 //
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
20 //
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
23 //
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
27 //
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
30 //
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
41 //
42 //M*/
43
44 /* ////////////////////////////////////////////////////////////////////
45 //
46 // Geometrical transforms on images and matrices: rotation, zoom etc.
47 //
48 // */
49
50 #include "precomp.hpp"
51 #include "opencl_kernels_imgproc.hpp"
52 #include "hal_replacement.hpp"
53 #include <opencv2/core/utils/configuration.private.hpp>
54 #include "opencv2/core/hal/intrin.hpp"
55 #include "opencv2/core/openvx/ovx_defs.hpp"
56 #include "opencv2/core/softfloat.hpp"
57 #include "imgwarp.hpp"
58
59 using namespace cv;
60
61 namespace cv
62 {
63
64 #if defined (HAVE_IPP) && (!IPP_DISABLE_WARPAFFINE || !IPP_DISABLE_WARPPERSPECTIVE || !IPP_DISABLE_REMAP)
65 typedef IppStatus (CV_STDCALL* ippiSetFunc)(const void*, void *, int, IppiSize);
66
67 template <int channels, typename Type>
IPPSetSimple(cv::Scalar value,void * dataPointer,int step,IppiSize & size,ippiSetFunc func)68 bool IPPSetSimple(cv::Scalar value, void *dataPointer, int step, IppiSize &size, ippiSetFunc func)
69 {
70 CV_INSTRUMENT_REGION_IPP();
71
72 Type values[channels];
73 for( int i = 0; i < channels; i++ )
74 values[i] = saturate_cast<Type>(value[i]);
75 return func(values, dataPointer, step, size) >= 0;
76 }
77
IPPSet(const cv::Scalar & value,void * dataPointer,int step,IppiSize & size,int channels,int depth)78 static bool IPPSet(const cv::Scalar &value, void *dataPointer, int step, IppiSize &size, int channels, int depth)
79 {
80 CV_INSTRUMENT_REGION_IPP();
81
82 if( channels == 1 )
83 {
84 switch( depth )
85 {
86 case CV_8U:
87 return CV_INSTRUMENT_FUN_IPP(ippiSet_8u_C1R, saturate_cast<Ipp8u>(value[0]), (Ipp8u *)dataPointer, step, size) >= 0;
88 case CV_16U:
89 return CV_INSTRUMENT_FUN_IPP(ippiSet_16u_C1R, saturate_cast<Ipp16u>(value[0]), (Ipp16u *)dataPointer, step, size) >= 0;
90 case CV_32F:
91 return CV_INSTRUMENT_FUN_IPP(ippiSet_32f_C1R, saturate_cast<Ipp32f>(value[0]), (Ipp32f *)dataPointer, step, size) >= 0;
92 }
93 }
94 else
95 {
96 if( channels == 3 )
97 {
98 switch( depth )
99 {
100 case CV_8U:
101 return IPPSetSimple<3, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C3R);
102 case CV_16U:
103 return IPPSetSimple<3, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C3R);
104 case CV_32F:
105 return IPPSetSimple<3, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C3R);
106 }
107 }
108 else if( channels == 4 )
109 {
110 switch( depth )
111 {
112 case CV_8U:
113 return IPPSetSimple<4, Ipp8u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_8u_C4R);
114 case CV_16U:
115 return IPPSetSimple<4, Ipp16u>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_16u_C4R);
116 case CV_32F:
117 return IPPSetSimple<4, Ipp32f>(value, dataPointer, step, size, (ippiSetFunc)ippiSet_32f_C4R);
118 }
119 }
120 }
121 return false;
122 }
123 #endif
124
125 /************** interpolation formulas and tables ***************/
126
127 const int INTER_REMAP_COEF_BITS=15;
128 const int INTER_REMAP_COEF_SCALE=1 << INTER_REMAP_COEF_BITS;
129
130 static uchar NNDeltaTab_i[INTER_TAB_SIZE2][2];
131
132 static float BilinearTab_f[INTER_TAB_SIZE2][2][2];
133 static short BilinearTab_i[INTER_TAB_SIZE2][2][2];
134
135 #if CV_SIMD128
136 static short BilinearTab_iC4_buf[INTER_TAB_SIZE2+2][2][8];
137 static short (*BilinearTab_iC4)[2][8] = (short (*)[2][8])alignPtr(BilinearTab_iC4_buf, 16);
138 #endif
139
140 static float BicubicTab_f[INTER_TAB_SIZE2][4][4];
141 static short BicubicTab_i[INTER_TAB_SIZE2][4][4];
142
143 static float Lanczos4Tab_f[INTER_TAB_SIZE2][8][8];
144 static short Lanczos4Tab_i[INTER_TAB_SIZE2][8][8];
145
interpolateLinear(float x,float * coeffs)146 static inline void interpolateLinear( float x, float* coeffs )
147 {
148 coeffs[0] = 1.f - x;
149 coeffs[1] = x;
150 }
151
interpolateCubic(float x,float * coeffs)152 static inline void interpolateCubic( float x, float* coeffs )
153 {
154 const float A = -0.75f;
155
156 coeffs[0] = ((A*(x + 1) - 5*A)*(x + 1) + 8*A)*(x + 1) - 4*A;
157 coeffs[1] = ((A + 2)*x - (A + 3))*x*x + 1;
158 coeffs[2] = ((A + 2)*(1 - x) - (A + 3))*(1 - x)*(1 - x) + 1;
159 coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
160 }
161
interpolateLanczos4(float x,float * coeffs)162 static inline void interpolateLanczos4( float x, float* coeffs )
163 {
164 static const double s45 = 0.70710678118654752440084436210485;
165 static const double cs[][2]=
166 {{1, 0}, {-s45, -s45}, {0, 1}, {s45, -s45}, {-1, 0}, {s45, s45}, {0, -1}, {-s45, s45}};
167
168 if( x < FLT_EPSILON )
169 {
170 for( int i = 0; i < 8; i++ )
171 coeffs[i] = 0;
172 coeffs[3] = 1;
173 return;
174 }
175
176 float sum = 0;
177 double y0=-(x+3)*CV_PI*0.25, s0 = std::sin(y0), c0= std::cos(y0);
178 for(int i = 0; i < 8; i++ )
179 {
180 double y = -(x+3-i)*CV_PI*0.25;
181 coeffs[i] = (float)((cs[i][0]*s0 + cs[i][1]*c0)/(y*y));
182 sum += coeffs[i];
183 }
184
185 sum = 1.f/sum;
186 for(int i = 0; i < 8; i++ )
187 coeffs[i] *= sum;
188 }
189
initInterTab1D(int method,float * tab,int tabsz)190 static void initInterTab1D(int method, float* tab, int tabsz)
191 {
192 float scale = 1.f/tabsz;
193 if( method == INTER_LINEAR )
194 {
195 for( int i = 0; i < tabsz; i++, tab += 2 )
196 interpolateLinear( i*scale, tab );
197 }
198 else if( method == INTER_CUBIC )
199 {
200 for( int i = 0; i < tabsz; i++, tab += 4 )
201 interpolateCubic( i*scale, tab );
202 }
203 else if( method == INTER_LANCZOS4 )
204 {
205 for( int i = 0; i < tabsz; i++, tab += 8 )
206 interpolateLanczos4( i*scale, tab );
207 }
208 else
209 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
210 }
211
212
initInterTab2D(int method,bool fixpt)213 static const void* initInterTab2D( int method, bool fixpt )
214 {
215 static bool inittab[INTER_MAX+1] = {false};
216 float* tab = 0;
217 short* itab = 0;
218 int ksize = 0;
219 if( method == INTER_LINEAR )
220 tab = BilinearTab_f[0][0], itab = BilinearTab_i[0][0], ksize=2;
221 else if( method == INTER_CUBIC )
222 tab = BicubicTab_f[0][0], itab = BicubicTab_i[0][0], ksize=4;
223 else if( method == INTER_LANCZOS4 )
224 tab = Lanczos4Tab_f[0][0], itab = Lanczos4Tab_i[0][0], ksize=8;
225 else
226 CV_Error( CV_StsBadArg, "Unknown/unsupported interpolation type" );
227
228 if( !inittab[method] )
229 {
230 AutoBuffer<float> _tab(8*INTER_TAB_SIZE);
231 int i, j, k1, k2;
232 initInterTab1D(method, _tab.data(), INTER_TAB_SIZE);
233 for( i = 0; i < INTER_TAB_SIZE; i++ )
234 for( j = 0; j < INTER_TAB_SIZE; j++, tab += ksize*ksize, itab += ksize*ksize )
235 {
236 int isum = 0;
237 NNDeltaTab_i[i*INTER_TAB_SIZE+j][0] = j < INTER_TAB_SIZE/2;
238 NNDeltaTab_i[i*INTER_TAB_SIZE+j][1] = i < INTER_TAB_SIZE/2;
239
240 for( k1 = 0; k1 < ksize; k1++ )
241 {
242 float vy = _tab[i*ksize + k1];
243 for( k2 = 0; k2 < ksize; k2++ )
244 {
245 float v = vy*_tab[j*ksize + k2];
246 tab[k1*ksize + k2] = v;
247 isum += itab[k1*ksize + k2] = saturate_cast<short>(v*INTER_REMAP_COEF_SCALE);
248 }
249 }
250
251 if( isum != INTER_REMAP_COEF_SCALE )
252 {
253 int diff = isum - INTER_REMAP_COEF_SCALE;
254 int ksize2 = ksize/2, Mk1=ksize2, Mk2=ksize2, mk1=ksize2, mk2=ksize2;
255 for( k1 = ksize2; k1 < ksize2+2; k1++ )
256 for( k2 = ksize2; k2 < ksize2+2; k2++ )
257 {
258 if( itab[k1*ksize+k2] < itab[mk1*ksize+mk2] )
259 mk1 = k1, mk2 = k2;
260 else if( itab[k1*ksize+k2] > itab[Mk1*ksize+Mk2] )
261 Mk1 = k1, Mk2 = k2;
262 }
263 if( diff < 0 )
264 itab[Mk1*ksize + Mk2] = (short)(itab[Mk1*ksize + Mk2] - diff);
265 else
266 itab[mk1*ksize + mk2] = (short)(itab[mk1*ksize + mk2] - diff);
267 }
268 }
269 tab -= INTER_TAB_SIZE2*ksize*ksize;
270 itab -= INTER_TAB_SIZE2*ksize*ksize;
271 #if CV_SIMD128
272 if( method == INTER_LINEAR )
273 {
274 for( i = 0; i < INTER_TAB_SIZE2; i++ )
275 for( j = 0; j < 4; j++ )
276 {
277 BilinearTab_iC4[i][0][j*2] = BilinearTab_i[i][0][0];
278 BilinearTab_iC4[i][0][j*2+1] = BilinearTab_i[i][0][1];
279 BilinearTab_iC4[i][1][j*2] = BilinearTab_i[i][1][0];
280 BilinearTab_iC4[i][1][j*2+1] = BilinearTab_i[i][1][1];
281 }
282 }
283 #endif
284 inittab[method] = true;
285 }
286 return fixpt ? (const void*)itab : (const void*)tab;
287 }
288
289 #ifndef __MINGW32__
initAllInterTab2D()290 static bool initAllInterTab2D()
291 {
292 return initInterTab2D( INTER_LINEAR, false ) &&
293 initInterTab2D( INTER_LINEAR, true ) &&
294 initInterTab2D( INTER_CUBIC, false ) &&
295 initInterTab2D( INTER_CUBIC, true ) &&
296 initInterTab2D( INTER_LANCZOS4, false ) &&
297 initInterTab2D( INTER_LANCZOS4, true );
298 }
299
300 static volatile bool doInitAllInterTab2D = initAllInterTab2D();
301 #endif
302
303 template<typename ST, typename DT> struct Cast
304 {
305 typedef ST type1;
306 typedef DT rtype;
307
operator ()cv::Cast308 DT operator()(ST val) const { return saturate_cast<DT>(val); }
309 };
310
311 template<typename ST, typename DT, int bits> struct FixedPtCast
312 {
313 typedef ST type1;
314 typedef DT rtype;
315 enum { SHIFT = bits, DELTA = 1 << (bits-1) };
316
operator ()cv::FixedPtCast317 DT operator()(ST val) const { return saturate_cast<DT>((val + DELTA)>>SHIFT); }
318 };
319
clip(int x,int a,int b)320 static inline int clip(int x, int a, int b)
321 {
322 return x >= a ? (x < b ? x : b-1) : a;
323 }
324
325 /****************************************************************************************\
326 * General warping (affine, perspective, remap) *
327 \****************************************************************************************/
328
329 template<typename T>
remapNearest(const Mat & _src,Mat & _dst,const Mat & _xy,int borderType,const Scalar & _borderValue)330 static void remapNearest( const Mat& _src, Mat& _dst, const Mat& _xy,
331 int borderType, const Scalar& _borderValue )
332 {
333 Size ssize = _src.size(), dsize = _dst.size();
334 const int cn = _src.channels();
335 const T* S0 = _src.ptr<T>();
336 T cval[CV_CN_MAX];
337 size_t sstep = _src.step/sizeof(S0[0]);
338
339 for(int k = 0; k < cn; k++ )
340 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
341
342 unsigned width1 = ssize.width, height1 = ssize.height;
343
344 if( _dst.isContinuous() && _xy.isContinuous() )
345 {
346 dsize.width *= dsize.height;
347 dsize.height = 1;
348 }
349
350 for(int dy = 0; dy < dsize.height; dy++ )
351 {
352 T* D = _dst.ptr<T>(dy);
353 const short* XY = _xy.ptr<short>(dy);
354
355 if( cn == 1 )
356 {
357 for(int dx = 0; dx < dsize.width; dx++ )
358 {
359 int sx = XY[dx*2], sy = XY[dx*2+1];
360 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
361 D[dx] = S0[sy*sstep + sx];
362 else
363 {
364 if( borderType == BORDER_REPLICATE )
365 {
366 sx = clip(sx, 0, ssize.width);
367 sy = clip(sy, 0, ssize.height);
368 D[dx] = S0[sy*sstep + sx];
369 }
370 else if( borderType == BORDER_CONSTANT )
371 D[dx] = cval[0];
372 else if( borderType != BORDER_TRANSPARENT )
373 {
374 sx = borderInterpolate(sx, ssize.width, borderType);
375 sy = borderInterpolate(sy, ssize.height, borderType);
376 D[dx] = S0[sy*sstep + sx];
377 }
378 }
379 }
380 }
381 else
382 {
383 for(int dx = 0; dx < dsize.width; dx++, D += cn )
384 {
385 int sx = XY[dx*2], sy = XY[dx*2+1];
386 const T *S;
387 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
388 {
389 if( cn == 3 )
390 {
391 S = S0 + sy*sstep + sx*3;
392 D[0] = S[0], D[1] = S[1], D[2] = S[2];
393 }
394 else if( cn == 4 )
395 {
396 S = S0 + sy*sstep + sx*4;
397 D[0] = S[0], D[1] = S[1], D[2] = S[2], D[3] = S[3];
398 }
399 else
400 {
401 S = S0 + sy*sstep + sx*cn;
402 for(int k = 0; k < cn; k++ )
403 D[k] = S[k];
404 }
405 }
406 else if( borderType != BORDER_TRANSPARENT )
407 {
408 if( borderType == BORDER_REPLICATE )
409 {
410 sx = clip(sx, 0, ssize.width);
411 sy = clip(sy, 0, ssize.height);
412 S = S0 + sy*sstep + sx*cn;
413 }
414 else if( borderType == BORDER_CONSTANT )
415 S = &cval[0];
416 else
417 {
418 sx = borderInterpolate(sx, ssize.width, borderType);
419 sy = borderInterpolate(sy, ssize.height, borderType);
420 S = S0 + sy*sstep + sx*cn;
421 }
422 for(int k = 0; k < cn; k++ )
423 D[k] = S[k];
424 }
425 }
426 }
427 }
428 }
429
430
431 struct RemapNoVec
432 {
operator ()cv::RemapNoVec433 int operator()( const Mat&, void*, const short*, const ushort*,
434 const void*, int ) const { return 0; }
435 };
436
437 #if CV_SIMD128
438
439 typedef unsigned short CV_DECL_ALIGNED(1) unaligned_ushort;
440 typedef int CV_DECL_ALIGNED(1) unaligned_int;
441
442 struct RemapVec_8u
443 {
operator ()cv::RemapVec_8u444 int operator()( const Mat& _src, void* _dst, const short* XY,
445 const ushort* FXY, const void* _wtab, int width ) const
446 {
447 int cn = _src.channels(), x = 0, sstep = (int)_src.step;
448
449 if( (cn != 1 && cn != 3 && cn != 4) || sstep >= 0x8000 )
450 return 0;
451
452 const uchar *S0 = _src.ptr(), *S1 = _src.ptr(1);
453 const short* wtab = cn == 1 ? (const short*)_wtab : &BilinearTab_iC4[0][0][0];
454 uchar* D = (uchar*)_dst;
455 v_int32x4 delta = v_setall_s32(INTER_REMAP_COEF_SCALE / 2);
456 v_int16x8 xy2ofs = v_reinterpret_as_s16(v_setall_s32(cn + (sstep << 16)));
457 int CV_DECL_ALIGNED(16) iofs0[4], iofs1[4];
458 const uchar* src_limit_8bytes = _src.datalimit - v_int16x8::nlanes;
459 #define CV_PICK_AND_PACK_RGB(ptr, offset, result) \
460 { \
461 const uchar* const p = ((const uchar*)ptr) + (offset); \
462 if (p <= src_limit_8bytes) \
463 { \
464 v_uint8x16 rrggbb, dummy; \
465 v_uint16x8 rrggbb8, dummy8; \
466 v_uint8x16 rgb0 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p), 0, 0, 0)); \
467 v_uint8x16 rgb1 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p + 3), 0, 0, 0)); \
468 v_zip(rgb0, rgb1, rrggbb, dummy); \
469 v_expand(rrggbb, rrggbb8, dummy8); \
470 result = v_reinterpret_as_s16(rrggbb8); \
471 } \
472 else \
473 { \
474 result = v_int16x8((short)p[0], (short)p[3], /* r0r1 */ \
475 (short)p[1], (short)p[4], /* g0g1 */ \
476 (short)p[2], (short)p[5], /* b0b1 */ 0, 0); \
477 } \
478 }
479 #define CV_PICK_AND_PACK_RGBA(ptr, offset, result) \
480 { \
481 const uchar* const p = ((const uchar*)ptr) + (offset); \
482 CV_DbgAssert(p <= src_limit_8bytes); \
483 v_uint8x16 rrggbbaa, dummy; \
484 v_uint16x8 rrggbbaa8, dummy8; \
485 v_uint8x16 rgba0 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p), 0, 0, 0)); \
486 v_uint8x16 rgba1 = v_reinterpret_as_u8(v_int32x4(*(unaligned_int*)(p + v_int32x4::nlanes), 0, 0, 0)); \
487 v_zip(rgba0, rgba1, rrggbbaa, dummy); \
488 v_expand(rrggbbaa, rrggbbaa8, dummy8); \
489 result = v_reinterpret_as_s16(rrggbbaa8); \
490 }
491 #define CV_PICK_AND_PACK4(base,offset) \
492 v_uint16x8(*(unaligned_ushort*)(base + offset[0]), *(unaligned_ushort*)(base + offset[1]), \
493 *(unaligned_ushort*)(base + offset[2]), *(unaligned_ushort*)(base + offset[3]), \
494 0, 0, 0, 0)
495
496 if( cn == 1 )
497 {
498 for( ; x <= width - 8; x += 8 )
499 {
500 v_int16x8 _xy0 = v_load(XY + x*2);
501 v_int16x8 _xy1 = v_load(XY + x*2 + 8);
502 v_int32x4 v0, v1, v2, v3, a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2;
503
504 v_int32x4 xy0 = v_dotprod( _xy0, xy2ofs );
505 v_int32x4 xy1 = v_dotprod( _xy1, xy2ofs );
506 v_store( iofs0, xy0 );
507 v_store( iofs1, xy1 );
508
509 v_uint16x8 stub, dummy;
510 v_uint16x8 vec16;
511 vec16 = CV_PICK_AND_PACK4(S0, iofs0);
512 v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
513 v0 = v_reinterpret_as_s32(stub);
514 vec16 = CV_PICK_AND_PACK4(S1, iofs0);
515 v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
516 v1 = v_reinterpret_as_s32(stub);
517
518 v_zip(v_load_low((int*)(wtab + FXY[x] * 4)), v_load_low((int*)(wtab + FXY[x + 1] * 4)), a0, a1);
519 v_zip(v_load_low((int*)(wtab + FXY[x + 2] * 4)), v_load_low((int*)(wtab + FXY[x + 3] * 4)), b0, b1);
520 v_recombine(a0, b0, a2, b2);
521 v1 = v_dotprod(v_reinterpret_as_s16(v1), v_reinterpret_as_s16(b2), delta);
522 v0 = v_dotprod(v_reinterpret_as_s16(v0), v_reinterpret_as_s16(a2), v1);
523
524 vec16 = CV_PICK_AND_PACK4(S0, iofs1);
525 v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
526 v2 = v_reinterpret_as_s32(stub);
527 vec16 = CV_PICK_AND_PACK4(S1, iofs1);
528 v_expand(v_reinterpret_as_u8(vec16), stub, dummy);
529 v3 = v_reinterpret_as_s32(stub);
530
531 v_zip(v_load_low((int*)(wtab + FXY[x + 4] * 4)), v_load_low((int*)(wtab + FXY[x + 5] * 4)), c0, c1);
532 v_zip(v_load_low((int*)(wtab + FXY[x + 6] * 4)), v_load_low((int*)(wtab + FXY[x + 7] * 4)), d0, d1);
533 v_recombine(c0, d0, c2, d2);
534 v3 = v_dotprod(v_reinterpret_as_s16(v3), v_reinterpret_as_s16(d2), delta);
535 v2 = v_dotprod(v_reinterpret_as_s16(v2), v_reinterpret_as_s16(c2), v3);
536
537 v0 = v0 >> INTER_REMAP_COEF_BITS;
538 v2 = v2 >> INTER_REMAP_COEF_BITS;
539 v_pack_u_store(D + x, v_pack(v0, v2));
540 }
541 }
542 else if( cn == 3 )
543 {
544 for( ; x <= width - 5; x += 4, D += 12 )
545 {
546 v_int16x8 u0, v0, u1, v1;
547 v_int16x8 _xy0 = v_load(XY + x * 2);
548
549 v_int32x4 xy0 = v_dotprod(_xy0, xy2ofs);
550 v_store(iofs0, xy0);
551
552 int offset0 = FXY[x] * 16;
553 int offset1 = FXY[x + 1] * 16;
554 int offset2 = FXY[x + 2] * 16;
555 int offset3 = FXY[x + 3] * 16;
556 v_int16x8 w00 = v_load(wtab + offset0);
557 v_int16x8 w01 = v_load(wtab + offset0 + 8);
558 v_int16x8 w10 = v_load(wtab + offset1);
559 v_int16x8 w11 = v_load(wtab + offset1 + 8);
560
561 CV_PICK_AND_PACK_RGB(S0, iofs0[0], u0);
562 CV_PICK_AND_PACK_RGB(S1, iofs0[0], v0);
563 CV_PICK_AND_PACK_RGB(S0, iofs0[1], u1);
564 CV_PICK_AND_PACK_RGB(S1, iofs0[1], v1);
565
566 v_int32x4 result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
567 v_int32x4 result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
568
569 result0 = v_rotate_left<1>(result0);
570 v_int16x8 result8 = v_pack(result0, result1);
571 v_uint8x16 result16 = v_pack_u(result8, result8);
572 v_store_low(D, v_rotate_right<1>(result16));
573
574
575 w00 = v_load(wtab + offset2);
576 w01 = v_load(wtab + offset2 + 8);
577 w10 = v_load(wtab + offset3);
578 w11 = v_load(wtab + offset3 + 8);
579 CV_PICK_AND_PACK_RGB(S0, iofs0[2], u0);
580 CV_PICK_AND_PACK_RGB(S1, iofs0[2], v0);
581 CV_PICK_AND_PACK_RGB(S0, iofs0[3], u1);
582 CV_PICK_AND_PACK_RGB(S1, iofs0[3], v1);
583
584 result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
585 result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
586
587 result0 = v_rotate_left<1>(result0);
588 result8 = v_pack(result0, result1);
589 result16 = v_pack_u(result8, result8);
590 v_store_low(D + 6, v_rotate_right<1>(result16));
591 }
592 }
593 else if( cn == 4 )
594 {
595 for( ; x <= width - 4; x += 4, D += 16 )
596 {
597 v_int16x8 _xy0 = v_load(XY + x * 2);
598 v_int16x8 u0, v0, u1, v1;
599
600 v_int32x4 xy0 = v_dotprod( _xy0, xy2ofs );
601 v_store(iofs0, xy0);
602 int offset0 = FXY[x] * 16;
603 int offset1 = FXY[x + 1] * 16;
604 int offset2 = FXY[x + 2] * 16;
605 int offset3 = FXY[x + 3] * 16;
606
607 v_int16x8 w00 = v_load(wtab + offset0);
608 v_int16x8 w01 = v_load(wtab + offset0 + 8);
609 v_int16x8 w10 = v_load(wtab + offset1);
610 v_int16x8 w11 = v_load(wtab + offset1 + 8);
611 CV_PICK_AND_PACK_RGBA(S0, iofs0[0], u0);
612 CV_PICK_AND_PACK_RGBA(S1, iofs0[0], v0);
613 CV_PICK_AND_PACK_RGBA(S0, iofs0[1], u1);
614 CV_PICK_AND_PACK_RGBA(S1, iofs0[1], v1);
615
616 v_int32x4 result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
617 v_int32x4 result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
618 v_int16x8 result8 = v_pack(result0, result1);
619 v_pack_u_store(D, result8);
620
621 w00 = v_load(wtab + offset2);
622 w01 = v_load(wtab + offset2 + 8);
623 w10 = v_load(wtab + offset3);
624 w11 = v_load(wtab + offset3 + 8);
625 CV_PICK_AND_PACK_RGBA(S0, iofs0[2], u0);
626 CV_PICK_AND_PACK_RGBA(S1, iofs0[2], v0);
627 CV_PICK_AND_PACK_RGBA(S0, iofs0[3], u1);
628 CV_PICK_AND_PACK_RGBA(S1, iofs0[3], v1);
629
630 result0 = v_dotprod(u0, w00, v_dotprod(v0, w01, delta)) >> INTER_REMAP_COEF_BITS;
631 result1 = v_dotprod(u1, w10, v_dotprod(v1, w11, delta)) >> INTER_REMAP_COEF_BITS;
632 result8 = v_pack(result0, result1);
633 v_pack_u_store(D + 8, result8);
634 }
635 }
636
637 return x;
638 }
639 };
640
641 #else
642
643 typedef RemapNoVec RemapVec_8u;
644
645 #endif
646
647
648 template<class CastOp, class VecOp, typename AT>
remapBilinear(const Mat & _src,Mat & _dst,const Mat & _xy,const Mat & _fxy,const void * _wtab,int borderType,const Scalar & _borderValue)649 static void remapBilinear( const Mat& _src, Mat& _dst, const Mat& _xy,
650 const Mat& _fxy, const void* _wtab,
651 int borderType, const Scalar& _borderValue )
652 {
653 typedef typename CastOp::rtype T;
654 typedef typename CastOp::type1 WT;
655 Size ssize = _src.size(), dsize = _dst.size();
656 const int cn = _src.channels();
657 const AT* wtab = (const AT*)_wtab;
658 const T* S0 = _src.ptr<T>();
659 size_t sstep = _src.step/sizeof(S0[0]);
660 T cval[CV_CN_MAX];
661 CastOp castOp;
662 VecOp vecOp;
663
664 for(int k = 0; k < cn; k++ )
665 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
666
667 unsigned width1 = std::max(ssize.width-1, 0), height1 = std::max(ssize.height-1, 0);
668 CV_Assert( !ssize.empty() );
669 #if CV_SIMD128
670 if( _src.type() == CV_8UC3 )
671 width1 = std::max(ssize.width-2, 0);
672 #endif
673
674 for(int dy = 0; dy < dsize.height; dy++ )
675 {
676 T* D = _dst.ptr<T>(dy);
677 const short* XY = _xy.ptr<short>(dy);
678 const ushort* FXY = _fxy.ptr<ushort>(dy);
679 int X0 = 0;
680 bool prevInlier = false;
681
682 for(int dx = 0; dx <= dsize.width; dx++ )
683 {
684 bool curInlier = dx < dsize.width ?
685 (unsigned)XY[dx*2] < width1 &&
686 (unsigned)XY[dx*2+1] < height1 : !prevInlier;
687 if( curInlier == prevInlier )
688 continue;
689
690 int X1 = dx;
691 dx = X0;
692 X0 = X1;
693 prevInlier = curInlier;
694
695 if( !curInlier )
696 {
697 int len = vecOp( _src, D, XY + dx*2, FXY + dx, wtab, X1 - dx );
698 D += len*cn;
699 dx += len;
700
701 if( cn == 1 )
702 {
703 for( ; dx < X1; dx++, D++ )
704 {
705 int sx = XY[dx*2], sy = XY[dx*2+1];
706 const AT* w = wtab + FXY[dx]*4;
707 const T* S = S0 + sy*sstep + sx;
708 *D = castOp(WT(S[0]*w[0] + S[1]*w[1] + S[sstep]*w[2] + S[sstep+1]*w[3]));
709 }
710 }
711 else if( cn == 2 )
712 for( ; dx < X1; dx++, D += 2 )
713 {
714 int sx = XY[dx*2], sy = XY[dx*2+1];
715 const AT* w = wtab + FXY[dx]*4;
716 const T* S = S0 + sy*sstep + sx*2;
717 WT t0 = S[0]*w[0] + S[2]*w[1] + S[sstep]*w[2] + S[sstep+2]*w[3];
718 WT t1 = S[1]*w[0] + S[3]*w[1] + S[sstep+1]*w[2] + S[sstep+3]*w[3];
719 D[0] = castOp(t0); D[1] = castOp(t1);
720 }
721 else if( cn == 3 )
722 for( ; dx < X1; dx++, D += 3 )
723 {
724 int sx = XY[dx*2], sy = XY[dx*2+1];
725 const AT* w = wtab + FXY[dx]*4;
726 const T* S = S0 + sy*sstep + sx*3;
727 WT t0 = S[0]*w[0] + S[3]*w[1] + S[sstep]*w[2] + S[sstep+3]*w[3];
728 WT t1 = S[1]*w[0] + S[4]*w[1] + S[sstep+1]*w[2] + S[sstep+4]*w[3];
729 WT t2 = S[2]*w[0] + S[5]*w[1] + S[sstep+2]*w[2] + S[sstep+5]*w[3];
730 D[0] = castOp(t0); D[1] = castOp(t1); D[2] = castOp(t2);
731 }
732 else if( cn == 4 )
733 for( ; dx < X1; dx++, D += 4 )
734 {
735 int sx = XY[dx*2], sy = XY[dx*2+1];
736 const AT* w = wtab + FXY[dx]*4;
737 const T* S = S0 + sy*sstep + sx*4;
738 WT t0 = S[0]*w[0] + S[4]*w[1] + S[sstep]*w[2] + S[sstep+4]*w[3];
739 WT t1 = S[1]*w[0] + S[5]*w[1] + S[sstep+1]*w[2] + S[sstep+5]*w[3];
740 D[0] = castOp(t0); D[1] = castOp(t1);
741 t0 = S[2]*w[0] + S[6]*w[1] + S[sstep+2]*w[2] + S[sstep+6]*w[3];
742 t1 = S[3]*w[0] + S[7]*w[1] + S[sstep+3]*w[2] + S[sstep+7]*w[3];
743 D[2] = castOp(t0); D[3] = castOp(t1);
744 }
745 else
746 for( ; dx < X1; dx++, D += cn )
747 {
748 int sx = XY[dx*2], sy = XY[dx*2+1];
749 const AT* w = wtab + FXY[dx]*4;
750 const T* S = S0 + sy*sstep + sx*cn;
751 for(int k = 0; k < cn; k++ )
752 {
753 WT t0 = S[k]*w[0] + S[k+cn]*w[1] + S[sstep+k]*w[2] + S[sstep+k+cn]*w[3];
754 D[k] = castOp(t0);
755 }
756 }
757 }
758 else
759 {
760 if( borderType == BORDER_TRANSPARENT && cn != 3 )
761 {
762 D += (X1 - dx)*cn;
763 dx = X1;
764 continue;
765 }
766
767 if( cn == 1 )
768 for( ; dx < X1; dx++, D++ )
769 {
770 int sx = XY[dx*2], sy = XY[dx*2+1];
771 if( borderType == BORDER_CONSTANT &&
772 (sx >= ssize.width || sx+1 < 0 ||
773 sy >= ssize.height || sy+1 < 0) )
774 {
775 D[0] = cval[0];
776 }
777 else
778 {
779 int sx0, sx1, sy0, sy1;
780 T v0, v1, v2, v3;
781 const AT* w = wtab + FXY[dx]*4;
782 if( borderType == BORDER_REPLICATE )
783 {
784 sx0 = clip(sx, 0, ssize.width);
785 sx1 = clip(sx+1, 0, ssize.width);
786 sy0 = clip(sy, 0, ssize.height);
787 sy1 = clip(sy+1, 0, ssize.height);
788 v0 = S0[sy0*sstep + sx0];
789 v1 = S0[sy0*sstep + sx1];
790 v2 = S0[sy1*sstep + sx0];
791 v3 = S0[sy1*sstep + sx1];
792 }
793 else
794 {
795 sx0 = borderInterpolate(sx, ssize.width, borderType);
796 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
797 sy0 = borderInterpolate(sy, ssize.height, borderType);
798 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
799 v0 = sx0 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx0] : cval[0];
800 v1 = sx1 >= 0 && sy0 >= 0 ? S0[sy0*sstep + sx1] : cval[0];
801 v2 = sx0 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx0] : cval[0];
802 v3 = sx1 >= 0 && sy1 >= 0 ? S0[sy1*sstep + sx1] : cval[0];
803 }
804 D[0] = castOp(WT(v0*w[0] + v1*w[1] + v2*w[2] + v3*w[3]));
805 }
806 }
807 else
808 for( ; dx < X1; dx++, D += cn )
809 {
810 int sx = XY[dx*2], sy = XY[dx*2+1];
811 if( borderType == BORDER_CONSTANT &&
812 (sx >= ssize.width || sx+1 < 0 ||
813 sy >= ssize.height || sy+1 < 0) )
814 {
815 for(int k = 0; k < cn; k++ )
816 D[k] = cval[k];
817 }
818 else
819 {
820 int sx0, sx1, sy0, sy1;
821 const T *v0, *v1, *v2, *v3;
822 const AT* w = wtab + FXY[dx]*4;
823 if( borderType == BORDER_REPLICATE )
824 {
825 sx0 = clip(sx, 0, ssize.width);
826 sx1 = clip(sx+1, 0, ssize.width);
827 sy0 = clip(sy, 0, ssize.height);
828 sy1 = clip(sy+1, 0, ssize.height);
829 v0 = S0 + sy0*sstep + sx0*cn;
830 v1 = S0 + sy0*sstep + sx1*cn;
831 v2 = S0 + sy1*sstep + sx0*cn;
832 v3 = S0 + sy1*sstep + sx1*cn;
833 }
834 else if( borderType == BORDER_TRANSPARENT &&
835 ((unsigned)sx >= (unsigned)(ssize.width-1) ||
836 (unsigned)sy >= (unsigned)(ssize.height-1)))
837 continue;
838 else
839 {
840 sx0 = borderInterpolate(sx, ssize.width, borderType);
841 sx1 = borderInterpolate(sx+1, ssize.width, borderType);
842 sy0 = borderInterpolate(sy, ssize.height, borderType);
843 sy1 = borderInterpolate(sy+1, ssize.height, borderType);
844 v0 = sx0 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx0*cn : &cval[0];
845 v1 = sx1 >= 0 && sy0 >= 0 ? S0 + sy0*sstep + sx1*cn : &cval[0];
846 v2 = sx0 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx0*cn : &cval[0];
847 v3 = sx1 >= 0 && sy1 >= 0 ? S0 + sy1*sstep + sx1*cn : &cval[0];
848 }
849 for(int k = 0; k < cn; k++ )
850 D[k] = castOp(WT(v0[k]*w[0] + v1[k]*w[1] + v2[k]*w[2] + v3[k]*w[3]));
851 }
852 }
853 }
854 }
855 }
856 }
857
858
859 template<class CastOp, typename AT, int ONE>
remapBicubic(const Mat & _src,Mat & _dst,const Mat & _xy,const Mat & _fxy,const void * _wtab,int borderType,const Scalar & _borderValue)860 static void remapBicubic( const Mat& _src, Mat& _dst, const Mat& _xy,
861 const Mat& _fxy, const void* _wtab,
862 int borderType, const Scalar& _borderValue )
863 {
864 typedef typename CastOp::rtype T;
865 typedef typename CastOp::type1 WT;
866 Size ssize = _src.size(), dsize = _dst.size();
867 const int cn = _src.channels();
868 const AT* wtab = (const AT*)_wtab;
869 const T* S0 = _src.ptr<T>();
870 size_t sstep = _src.step/sizeof(S0[0]);
871 T cval[CV_CN_MAX];
872 CastOp castOp;
873
874 for(int k = 0; k < cn; k++ )
875 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
876
877 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
878
879 unsigned width1 = std::max(ssize.width-3, 0), height1 = std::max(ssize.height-3, 0);
880
881 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
882 {
883 dsize.width *= dsize.height;
884 dsize.height = 1;
885 }
886
887 for(int dy = 0; dy < dsize.height; dy++ )
888 {
889 T* D = _dst.ptr<T>(dy);
890 const short* XY = _xy.ptr<short>(dy);
891 const ushort* FXY = _fxy.ptr<ushort>(dy);
892
893 for(int dx = 0; dx < dsize.width; dx++, D += cn )
894 {
895 int sx = XY[dx*2]-1, sy = XY[dx*2+1]-1;
896 const AT* w = wtab + FXY[dx]*16;
897 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
898 {
899 const T* S = S0 + sy*sstep + sx*cn;
900 for(int k = 0; k < cn; k++ )
901 {
902 WT sum = S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3];
903 S += sstep;
904 sum += S[0]*w[4] + S[cn]*w[5] + S[cn*2]*w[6] + S[cn*3]*w[7];
905 S += sstep;
906 sum += S[0]*w[8] + S[cn]*w[9] + S[cn*2]*w[10] + S[cn*3]*w[11];
907 S += sstep;
908 sum += S[0]*w[12] + S[cn]*w[13] + S[cn*2]*w[14] + S[cn*3]*w[15];
909 S += 1 - sstep*3;
910 D[k] = castOp(sum);
911 }
912 }
913 else
914 {
915 int x[4], y[4];
916 if( borderType == BORDER_TRANSPARENT &&
917 ((unsigned)(sx+1) >= (unsigned)ssize.width ||
918 (unsigned)(sy+1) >= (unsigned)ssize.height) )
919 continue;
920
921 if( borderType1 == BORDER_CONSTANT &&
922 (sx >= ssize.width || sx+4 <= 0 ||
923 sy >= ssize.height || sy+4 <= 0))
924 {
925 for(int k = 0; k < cn; k++ )
926 D[k] = cval[k];
927 continue;
928 }
929
930 for(int i = 0; i < 4; i++ )
931 {
932 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
933 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
934 }
935
936 for(int k = 0; k < cn; k++, S0++, w -= 16 )
937 {
938 WT cv = cval[k], sum = cv*ONE;
939 for(int i = 0; i < 4; i++, w += 4 )
940 {
941 int yi = y[i];
942 const T* S = S0 + yi*sstep;
943 if( yi < 0 )
944 continue;
945 if( x[0] >= 0 )
946 sum += (S[x[0]] - cv)*w[0];
947 if( x[1] >= 0 )
948 sum += (S[x[1]] - cv)*w[1];
949 if( x[2] >= 0 )
950 sum += (S[x[2]] - cv)*w[2];
951 if( x[3] >= 0 )
952 sum += (S[x[3]] - cv)*w[3];
953 }
954 D[k] = castOp(sum);
955 }
956 S0 -= cn;
957 }
958 }
959 }
960 }
961
962
963 template<class CastOp, typename AT, int ONE>
remapLanczos4(const Mat & _src,Mat & _dst,const Mat & _xy,const Mat & _fxy,const void * _wtab,int borderType,const Scalar & _borderValue)964 static void remapLanczos4( const Mat& _src, Mat& _dst, const Mat& _xy,
965 const Mat& _fxy, const void* _wtab,
966 int borderType, const Scalar& _borderValue )
967 {
968 typedef typename CastOp::rtype T;
969 typedef typename CastOp::type1 WT;
970 Size ssize = _src.size(), dsize = _dst.size();
971 const int cn = _src.channels();
972 const AT* wtab = (const AT*)_wtab;
973 const T* S0 = _src.ptr<T>();
974 size_t sstep = _src.step/sizeof(S0[0]);
975 T cval[CV_CN_MAX];
976 CastOp castOp;
977
978 for(int k = 0; k < cn; k++ )
979 cval[k] = saturate_cast<T>(_borderValue[k & 3]);
980
981 int borderType1 = borderType != BORDER_TRANSPARENT ? borderType : BORDER_REFLECT_101;
982
983 unsigned width1 = std::max(ssize.width-7, 0), height1 = std::max(ssize.height-7, 0);
984
985 if( _dst.isContinuous() && _xy.isContinuous() && _fxy.isContinuous() )
986 {
987 dsize.width *= dsize.height;
988 dsize.height = 1;
989 }
990
991 for(int dy = 0; dy < dsize.height; dy++ )
992 {
993 T* D = _dst.ptr<T>(dy);
994 const short* XY = _xy.ptr<short>(dy);
995 const ushort* FXY = _fxy.ptr<ushort>(dy);
996
997 for(int dx = 0; dx < dsize.width; dx++, D += cn )
998 {
999 int sx = XY[dx*2]-3, sy = XY[dx*2+1]-3;
1000 const AT* w = wtab + FXY[dx]*64;
1001 const T* S = S0 + sy*sstep + sx*cn;
1002 if( (unsigned)sx < width1 && (unsigned)sy < height1 )
1003 {
1004 for(int k = 0; k < cn; k++ )
1005 {
1006 WT sum = 0;
1007 for( int r = 0; r < 8; r++, S += sstep, w += 8 )
1008 sum += S[0]*w[0] + S[cn]*w[1] + S[cn*2]*w[2] + S[cn*3]*w[3] +
1009 S[cn*4]*w[4] + S[cn*5]*w[5] + S[cn*6]*w[6] + S[cn*7]*w[7];
1010 w -= 64;
1011 S -= sstep*8 - 1;
1012 D[k] = castOp(sum);
1013 }
1014 }
1015 else
1016 {
1017 int x[8], y[8];
1018 if( borderType == BORDER_TRANSPARENT &&
1019 ((unsigned)(sx+3) >= (unsigned)ssize.width ||
1020 (unsigned)(sy+3) >= (unsigned)ssize.height) )
1021 continue;
1022
1023 if( borderType1 == BORDER_CONSTANT &&
1024 (sx >= ssize.width || sx+8 <= 0 ||
1025 sy >= ssize.height || sy+8 <= 0))
1026 {
1027 for(int k = 0; k < cn; k++ )
1028 D[k] = cval[k];
1029 continue;
1030 }
1031
1032 for(int i = 0; i < 8; i++ )
1033 {
1034 x[i] = borderInterpolate(sx + i, ssize.width, borderType1)*cn;
1035 y[i] = borderInterpolate(sy + i, ssize.height, borderType1);
1036 }
1037
1038 for(int k = 0; k < cn; k++, S0++, w -= 64 )
1039 {
1040 WT cv = cval[k], sum = cv*ONE;
1041 for(int i = 0; i < 8; i++, w += 8 )
1042 {
1043 int yi = y[i];
1044 const T* S1 = S0 + yi*sstep;
1045 if( yi < 0 )
1046 continue;
1047 if( x[0] >= 0 )
1048 sum += (S1[x[0]] - cv)*w[0];
1049 if( x[1] >= 0 )
1050 sum += (S1[x[1]] - cv)*w[1];
1051 if( x[2] >= 0 )
1052 sum += (S1[x[2]] - cv)*w[2];
1053 if( x[3] >= 0 )
1054 sum += (S1[x[3]] - cv)*w[3];
1055 if( x[4] >= 0 )
1056 sum += (S1[x[4]] - cv)*w[4];
1057 if( x[5] >= 0 )
1058 sum += (S1[x[5]] - cv)*w[5];
1059 if( x[6] >= 0 )
1060 sum += (S1[x[6]] - cv)*w[6];
1061 if( x[7] >= 0 )
1062 sum += (S1[x[7]] - cv)*w[7];
1063 }
1064 D[k] = castOp(sum);
1065 }
1066 S0 -= cn;
1067 }
1068 }
1069 }
1070 }
1071
1072
1073 typedef void (*RemapNNFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
1074 int borderType, const Scalar& _borderValue );
1075
1076 typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
1077 const Mat& _fxy, const void* _wtab,
1078 int borderType, const Scalar& _borderValue);
1079
1080 class RemapInvoker :
1081 public ParallelLoopBody
1082 {
1083 public:
RemapInvoker(const Mat & _src,Mat & _dst,const Mat * _m1,const Mat * _m2,int _borderType,const Scalar & _borderValue,int _planar_input,RemapNNFunc _nnfunc,RemapFunc _ifunc,const void * _ctab)1084 RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
1085 const Mat *_m2, int _borderType, const Scalar &_borderValue,
1086 int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
1087 ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
1088 borderType(_borderType), borderValue(_borderValue),
1089 planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
1090 {
1091 }
1092
operator ()(const Range & range) const1093 virtual void operator() (const Range& range) const CV_OVERRIDE
1094 {
1095 int x, y, x1, y1;
1096 const int buf_size = 1 << 14;
1097 int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
1098 int bcols0 = std::min(buf_size/brows0, dst->cols);
1099 brows0 = std::min(buf_size/bcols0, dst->rows);
1100
1101 Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
1102 if( !nnfunc )
1103 _bufa.create(brows0, bcols0, CV_16UC1);
1104
1105 for( y = range.start; y < range.end; y += brows0 )
1106 {
1107 for( x = 0; x < dst->cols; x += bcols0 )
1108 {
1109 int brows = std::min(brows0, range.end - y);
1110 int bcols = std::min(bcols0, dst->cols - x);
1111 Mat dpart(*dst, Rect(x, y, bcols, brows));
1112 Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
1113
1114 if( nnfunc )
1115 {
1116 if( m1->type() == CV_16SC2 && m2->empty() ) // the data is already in the right format
1117 bufxy = (*m1)(Rect(x, y, bcols, brows));
1118 else if( map_depth != CV_32F )
1119 {
1120 for( y1 = 0; y1 < brows; y1++ )
1121 {
1122 short* XY = bufxy.ptr<short>(y1);
1123 const short* sXY = m1->ptr<short>(y+y1) + x*2;
1124 const ushort* sA = m2->ptr<ushort>(y+y1) + x;
1125
1126 for( x1 = 0; x1 < bcols; x1++ )
1127 {
1128 int a = sA[x1] & (INTER_TAB_SIZE2-1);
1129 XY[x1*2] = sXY[x1*2] + NNDeltaTab_i[a][0];
1130 XY[x1*2+1] = sXY[x1*2+1] + NNDeltaTab_i[a][1];
1131 }
1132 }
1133 }
1134 else if( !planar_input )
1135 (*m1)(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
1136 else
1137 {
1138 for( y1 = 0; y1 < brows; y1++ )
1139 {
1140 short* XY = bufxy.ptr<short>(y1);
1141 const float* sX = m1->ptr<float>(y+y1) + x;
1142 const float* sY = m2->ptr<float>(y+y1) + x;
1143 x1 = 0;
1144
1145 #if CV_SIMD128
1146 {
1147 int span = v_float32x4::nlanes;
1148 for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1149 {
1150 v_int32x4 ix0 = v_round(v_load(sX + x1));
1151 v_int32x4 iy0 = v_round(v_load(sY + x1));
1152 v_int32x4 ix1 = v_round(v_load(sX + x1 + span));
1153 v_int32x4 iy1 = v_round(v_load(sY + x1 + span));
1154
1155 v_int16x8 dx, dy;
1156 dx = v_pack(ix0, ix1);
1157 dy = v_pack(iy0, iy1);
1158 v_store_interleave(XY + x1 * 2, dx, dy);
1159 }
1160 }
1161 #endif
1162 for( ; x1 < bcols; x1++ )
1163 {
1164 XY[x1*2] = saturate_cast<short>(sX[x1]);
1165 XY[x1*2+1] = saturate_cast<short>(sY[x1]);
1166 }
1167 }
1168 }
1169 nnfunc( *src, dpart, bufxy, borderType, borderValue );
1170 continue;
1171 }
1172
1173 Mat bufa(_bufa, Rect(0, 0, bcols, brows));
1174 for( y1 = 0; y1 < brows; y1++ )
1175 {
1176 short* XY = bufxy.ptr<short>(y1);
1177 ushort* A = bufa.ptr<ushort>(y1);
1178
1179 if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
1180 {
1181 bufxy = (*m1)(Rect(x, y, bcols, brows));
1182
1183 const ushort* sA = m2->ptr<ushort>(y+y1) + x;
1184 x1 = 0;
1185
1186 #if CV_SIMD128
1187 {
1188 v_uint16x8 v_scale = v_setall_u16(INTER_TAB_SIZE2 - 1);
1189 int span = v_uint16x8::nlanes;
1190 for( ; x1 <= bcols - span; x1 += span )
1191 v_store((unsigned short*)(A + x1), v_load(sA + x1) & v_scale);
1192 }
1193 #endif
1194 for( ; x1 < bcols; x1++ )
1195 A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
1196 }
1197 else if( planar_input )
1198 {
1199 const float* sX = m1->ptr<float>(y+y1) + x;
1200 const float* sY = m2->ptr<float>(y+y1) + x;
1201
1202 x1 = 0;
1203 #if CV_SIMD128
1204 {
1205 v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
1206 v_int32x4 v_scale2 = v_setall_s32(INTER_TAB_SIZE - 1);
1207 int span = v_float32x4::nlanes;
1208 for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1209 {
1210 v_int32x4 v_sx0 = v_round(v_scale * v_load(sX + x1));
1211 v_int32x4 v_sy0 = v_round(v_scale * v_load(sY + x1));
1212 v_int32x4 v_sx1 = v_round(v_scale * v_load(sX + x1 + span));
1213 v_int32x4 v_sy1 = v_round(v_scale * v_load(sY + x1 + span));
1214 v_uint16x8 v_sx8 = v_reinterpret_as_u16(v_pack(v_sx0 & v_scale2, v_sx1 & v_scale2));
1215 v_uint16x8 v_sy8 = v_reinterpret_as_u16(v_pack(v_sy0 & v_scale2, v_sy1 & v_scale2));
1216 v_uint16x8 v_v = v_shl<INTER_BITS>(v_sy8) | (v_sx8);
1217 v_store(A + x1, v_v);
1218
1219 v_int16x8 v_d0 = v_pack(v_shr<INTER_BITS>(v_sx0), v_shr<INTER_BITS>(v_sx1));
1220 v_int16x8 v_d1 = v_pack(v_shr<INTER_BITS>(v_sy0), v_shr<INTER_BITS>(v_sy1));
1221 v_store_interleave(XY + (x1 << 1), v_d0, v_d1);
1222 }
1223 }
1224 #endif
1225 for( ; x1 < bcols; x1++ )
1226 {
1227 int sx = cvRound(sX[x1]*INTER_TAB_SIZE);
1228 int sy = cvRound(sY[x1]*INTER_TAB_SIZE);
1229 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
1230 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
1231 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
1232 A[x1] = (ushort)v;
1233 }
1234 }
1235 else
1236 {
1237 const float* sXY = m1->ptr<float>(y+y1) + x*2;
1238 x1 = 0;
1239
1240 #if CV_SIMD128
1241 {
1242 v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
1243 v_int32x4 v_scale2 = v_setall_s32(INTER_TAB_SIZE - 1), v_scale3 = v_setall_s32(INTER_TAB_SIZE);
1244 int span = v_float32x4::nlanes;
1245 for( ; x1 <= bcols - span * 2; x1 += span * 2 )
1246 {
1247 v_float32x4 v_fx, v_fy;
1248 v_load_deinterleave(sXY + (x1 << 1), v_fx, v_fy);
1249 v_int32x4 v_sx0 = v_round(v_fx * v_scale);
1250 v_int32x4 v_sy0 = v_round(v_fy * v_scale);
1251 v_load_deinterleave(sXY + ((x1 + span) << 1), v_fx, v_fy);
1252 v_int32x4 v_sx1 = v_round(v_fx * v_scale);
1253 v_int32x4 v_sy1 = v_round(v_fy * v_scale);
1254 v_int32x4 v_v0 = v_muladd(v_scale3, (v_sy0 & v_scale2), (v_sx0 & v_scale2));
1255 v_int32x4 v_v1 = v_muladd(v_scale3, (v_sy1 & v_scale2), (v_sx1 & v_scale2));
1256 v_uint16x8 v_v8 = v_reinterpret_as_u16(v_pack(v_v0, v_v1));
1257 v_store(A + x1, v_v8);
1258 v_int16x8 v_dx = v_pack(v_shr<INTER_BITS>(v_sx0), v_shr<INTER_BITS>(v_sx1));
1259 v_int16x8 v_dy = v_pack(v_shr<INTER_BITS>(v_sy0), v_shr<INTER_BITS>(v_sy1));
1260 v_store_interleave(XY + (x1 << 1), v_dx, v_dy);
1261 }
1262 }
1263 #endif
1264
1265 for( ; x1 < bcols; x1++ )
1266 {
1267 int sx = cvRound(sXY[x1*2]*INTER_TAB_SIZE);
1268 int sy = cvRound(sXY[x1*2+1]*INTER_TAB_SIZE);
1269 int v = (sy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (sx & (INTER_TAB_SIZE-1));
1270 XY[x1*2] = saturate_cast<short>(sx >> INTER_BITS);
1271 XY[x1*2+1] = saturate_cast<short>(sy >> INTER_BITS);
1272 A[x1] = (ushort)v;
1273 }
1274 }
1275 }
1276 ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue);
1277 }
1278 }
1279 }
1280
1281 private:
1282 const Mat* src;
1283 Mat* dst;
1284 const Mat *m1, *m2;
1285 int borderType;
1286 Scalar borderValue;
1287 int planar_input;
1288 RemapNNFunc nnfunc;
1289 RemapFunc ifunc;
1290 const void *ctab;
1291 };
1292
1293 #ifdef HAVE_OPENCL
1294
ocl_remap(InputArray _src,OutputArray _dst,InputArray _map1,InputArray _map2,int interpolation,int borderType,const Scalar & borderValue)1295 static bool ocl_remap(InputArray _src, OutputArray _dst, InputArray _map1, InputArray _map2,
1296 int interpolation, int borderType, const Scalar& borderValue)
1297 {
1298 const ocl::Device & dev = ocl::Device::getDefault();
1299 int cn = _src.channels(), type = _src.type(), depth = _src.depth(),
1300 rowsPerWI = dev.isIntel() ? 4 : 1;
1301
1302 if (borderType == BORDER_TRANSPARENT || !(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST)
1303 || _map1.type() == CV_16SC1 || _map2.type() == CV_16SC1)
1304 return false;
1305
1306 UMat src = _src.getUMat(), map1 = _map1.getUMat(), map2 = _map2.getUMat();
1307
1308 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.empty())) ||
1309 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.empty())) )
1310 {
1311 if (map1.type() != CV_16SC2)
1312 std::swap(map1, map2);
1313 }
1314 else
1315 CV_Assert( map1.type() == CV_32FC2 || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
1316
1317 _dst.create(map1.size(), type);
1318 UMat dst = _dst.getUMat();
1319
1320 String kernelName = "remap";
1321 if (map1.type() == CV_32FC2 && map2.empty())
1322 kernelName += "_32FC2";
1323 else if (map1.type() == CV_16SC2)
1324 {
1325 kernelName += "_16SC2";
1326 if (!map2.empty())
1327 kernelName += "_16UC1";
1328 }
1329 else if (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
1330 kernelName += "_2_32FC1";
1331 else
1332 CV_Error(Error::StsBadArg, "Unsupported map types");
1333
1334 static const char * const interMap[] = { "INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC", "INTER_LINEAR", "INTER_LANCZOS" };
1335 static const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
1336 "BORDER_REFLECT_101", "BORDER_TRANSPARENT" };
1337 String buildOptions = format("-D %s -D %s -D T=%s -D rowsPerWI=%d",
1338 interMap[interpolation], borderMap[borderType],
1339 ocl::typeToStr(type), rowsPerWI);
1340
1341 if (interpolation != INTER_NEAREST)
1342 {
1343 char cvt[3][40];
1344 int wdepth = std::max(CV_32F, depth);
1345 buildOptions = buildOptions
1346 + format(" -D WT=%s -D convertToT=%s -D convertToWT=%s"
1347 " -D convertToWT2=%s -D WT2=%s",
1348 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
1349 ocl::convertTypeStr(wdepth, depth, cn, cvt[0]),
1350 ocl::convertTypeStr(depth, wdepth, cn, cvt[1]),
1351 ocl::convertTypeStr(CV_32S, wdepth, 2, cvt[2]),
1352 ocl::typeToStr(CV_MAKE_TYPE(wdepth, 2)));
1353 }
1354 int scalarcn = cn == 3 ? 4 : cn;
1355 int sctype = CV_MAKETYPE(depth, scalarcn);
1356 buildOptions += format(" -D T=%s -D T1=%s -D cn=%d -D ST=%s -D depth=%d",
1357 ocl::typeToStr(type), ocl::typeToStr(depth),
1358 cn, ocl::typeToStr(sctype), depth);
1359
1360 ocl::Kernel k(kernelName.c_str(), ocl::imgproc::remap_oclsrc, buildOptions);
1361
1362 Mat scalar(1, 1, sctype, borderValue);
1363 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst),
1364 map1arg = ocl::KernelArg::ReadOnlyNoSize(map1),
1365 scalararg = ocl::KernelArg::Constant((void*)scalar.ptr(), scalar.elemSize());
1366
1367 if (map2.empty())
1368 k.args(srcarg, dstarg, map1arg, scalararg);
1369 else
1370 k.args(srcarg, dstarg, map1arg, ocl::KernelArg::ReadOnlyNoSize(map2), scalararg);
1371
1372 size_t globalThreads[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
1373 return k.run(2, globalThreads, NULL, false);
1374 }
1375
1376 #if 0
1377 /**
1378 @deprecated with old version of cv::linearPolar
1379 */
1380 static bool ocl_linearPolar(InputArray _src, OutputArray _dst,
1381 Point2f center, double maxRadius, int flags)
1382 {
1383 UMat src_with_border; // don't scope this variable (it holds image data)
1384
1385 UMat mapx, mapy, r, cp_sp;
1386 UMat src = _src.getUMat();
1387 _dst.create(src.size(), src.type());
1388 Size dsize = src.size();
1389 r.create(Size(1, dsize.width), CV_32F);
1390 cp_sp.create(Size(1, dsize.height), CV_32FC2);
1391
1392 mapx.create(dsize, CV_32F);
1393 mapy.create(dsize, CV_32F);
1394 size_t w = dsize.width;
1395 size_t h = dsize.height;
1396 String buildOptions;
1397 unsigned mem_size = 32;
1398 if (flags & CV_WARP_INVERSE_MAP)
1399 {
1400 buildOptions = "-D InverseMap";
1401 }
1402 else
1403 {
1404 buildOptions = format("-D ForwardMap -D MEM_SIZE=%d", mem_size);
1405 }
1406 String retval;
1407 ocl::Program p(ocl::imgproc::linearPolar_oclsrc, buildOptions, retval);
1408 ocl::Kernel k("linearPolar", p);
1409 ocl::KernelArg ocl_mapx = ocl::KernelArg::PtrReadWrite(mapx), ocl_mapy = ocl::KernelArg::PtrReadWrite(mapy);
1410 ocl::KernelArg ocl_cp_sp = ocl::KernelArg::PtrReadWrite(cp_sp);
1411 ocl::KernelArg ocl_r = ocl::KernelArg::PtrReadWrite(r);
1412
1413 if (!(flags & CV_WARP_INVERSE_MAP))
1414 {
1415
1416
1417
1418 ocl::Kernel computeAngleRadius_Kernel("computeAngleRadius", p);
1419 float PI2_height = (float) CV_2PI / dsize.height;
1420 float maxRadius_width = (float) maxRadius / dsize.width;
1421 computeAngleRadius_Kernel.args(ocl_cp_sp, ocl_r, maxRadius_width, PI2_height, (unsigned)dsize.width, (unsigned)dsize.height);
1422 size_t max_dim = max(h, w);
1423 computeAngleRadius_Kernel.run(1, &max_dim, NULL, false);
1424 k.args(ocl_mapx, ocl_mapy, ocl_cp_sp, ocl_r, center.x, center.y, (unsigned)dsize.width, (unsigned)dsize.height);
1425 }
1426 else
1427 {
1428 const int ANGLE_BORDER = 1;
1429
1430 cv::copyMakeBorder(src, src_with_border, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
1431 src = src_with_border;
1432 Size ssize = src_with_border.size();
1433 ssize.height -= 2 * ANGLE_BORDER;
1434 float ascale = ssize.height / ((float)CV_2PI);
1435 float pscale = ssize.width / ((float) maxRadius);
1436
1437 k.args(ocl_mapx, ocl_mapy, ascale, pscale, center.x, center.y, ANGLE_BORDER, (unsigned)dsize.width, (unsigned)dsize.height);
1438
1439
1440 }
1441 size_t globalThreads[2] = { (size_t)dsize.width , (size_t)dsize.height };
1442 size_t localThreads[2] = { mem_size , mem_size };
1443 k.run(2, globalThreads, localThreads, false);
1444 remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
1445 return true;
1446 }
1447 static bool ocl_logPolar(InputArray _src, OutputArray _dst,
1448 Point2f center, double M, int flags)
1449 {
1450 if (M <= 0)
1451 CV_Error(CV_StsOutOfRange, "M should be >0");
1452 UMat src_with_border; // don't scope this variable (it holds image data)
1453
1454 UMat mapx, mapy, r, cp_sp;
1455 UMat src = _src.getUMat();
1456 _dst.create(src.size(), src.type());
1457 Size dsize = src.size();
1458 r.create(Size(1, dsize.width), CV_32F);
1459 cp_sp.create(Size(1, dsize.height), CV_32FC2);
1460
1461 mapx.create(dsize, CV_32F);
1462 mapy.create(dsize, CV_32F);
1463 size_t w = dsize.width;
1464 size_t h = dsize.height;
1465 String buildOptions;
1466 unsigned mem_size = 32;
1467 if (flags & CV_WARP_INVERSE_MAP)
1468 {
1469 buildOptions = "-D InverseMap";
1470 }
1471 else
1472 {
1473 buildOptions = format("-D ForwardMap -D MEM_SIZE=%d", mem_size);
1474 }
1475 String retval;
1476 ocl::Program p(ocl::imgproc::logPolar_oclsrc, buildOptions, retval);
1477 //ocl::Program p(ocl::imgproc::my_linearPolar_oclsrc, buildOptions, retval);
1478 //printf("%s\n", retval);
1479 ocl::Kernel k("logPolar", p);
1480 ocl::KernelArg ocl_mapx = ocl::KernelArg::PtrReadWrite(mapx), ocl_mapy = ocl::KernelArg::PtrReadWrite(mapy);
1481 ocl::KernelArg ocl_cp_sp = ocl::KernelArg::PtrReadWrite(cp_sp);
1482 ocl::KernelArg ocl_r = ocl::KernelArg::PtrReadWrite(r);
1483
1484 if (!(flags & CV_WARP_INVERSE_MAP))
1485 {
1486
1487
1488
1489 ocl::Kernel computeAngleRadius_Kernel("computeAngleRadius", p);
1490 float PI2_height = (float) CV_2PI / dsize.height;
1491
1492 computeAngleRadius_Kernel.args(ocl_cp_sp, ocl_r, (float)M, PI2_height, (unsigned)dsize.width, (unsigned)dsize.height);
1493 size_t max_dim = max(h, w);
1494 computeAngleRadius_Kernel.run(1, &max_dim, NULL, false);
1495 k.args(ocl_mapx, ocl_mapy, ocl_cp_sp, ocl_r, center.x, center.y, (unsigned)dsize.width, (unsigned)dsize.height);
1496 }
1497 else
1498 {
1499 const int ANGLE_BORDER = 1;
1500
1501 cv::copyMakeBorder(src, src_with_border, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
1502 src = src_with_border;
1503 Size ssize = src_with_border.size();
1504 ssize.height -= 2 * ANGLE_BORDER;
1505 float ascale = ssize.height / ((float)CV_2PI);
1506
1507
1508 k.args(ocl_mapx, ocl_mapy, ascale, (float)M, center.x, center.y, ANGLE_BORDER, (unsigned)dsize.width, (unsigned)dsize.height);
1509
1510
1511 }
1512 size_t globalThreads[2] = { (size_t)dsize.width , (size_t)dsize.height };
1513 size_t localThreads[2] = { mem_size , mem_size };
1514 k.run(2, globalThreads, localThreads, false);
1515 remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
1516 return true;
1517 }
1518 #endif
1519
1520 #endif
1521
1522 #ifdef HAVE_OPENVX
openvx_remap(Mat src,Mat dst,Mat map1,Mat map2,int interpolation,const Scalar & borderValue)1523 static bool openvx_remap(Mat src, Mat dst, Mat map1, Mat map2, int interpolation, const Scalar& borderValue)
1524 {
1525 vx_interpolation_type_e inter_type;
1526 switch (interpolation)
1527 {
1528 case INTER_LINEAR:
1529 #if VX_VERSION > VX_VERSION_1_0
1530 inter_type = VX_INTERPOLATION_BILINEAR;
1531 #else
1532 inter_type = VX_INTERPOLATION_TYPE_BILINEAR;
1533 #endif
1534 break;
1535 case INTER_NEAREST:
1536 /* NEAREST_NEIGHBOR mode disabled since OpenCV round half to even while OpenVX sample implementation round half up
1537 #if VX_VERSION > VX_VERSION_1_0
1538 inter_type = VX_INTERPOLATION_NEAREST_NEIGHBOR;
1539 #else
1540 inter_type = VX_INTERPOLATION_TYPE_NEAREST_NEIGHBOR;
1541 #endif
1542 if (!map1.empty())
1543 for (int y = 0; y < map1.rows; ++y)
1544 {
1545 float* line = map1.ptr<float>(y);
1546 for (int x = 0; x < map1.cols; ++x)
1547 line[x] = cvRound(line[x]);
1548 }
1549 if (!map2.empty())
1550 for (int y = 0; y < map2.rows; ++y)
1551 {
1552 float* line = map2.ptr<float>(y);
1553 for (int x = 0; x < map2.cols; ++x)
1554 line[x] = cvRound(line[x]);
1555 }
1556 break;
1557 */
1558 case INTER_AREA://AREA interpolation mode is unsupported
1559 default:
1560 return false;
1561 }
1562
1563 try
1564 {
1565 ivx::Context ctx = ovx::getOpenVXContext();
1566
1567 Mat a;
1568 if (dst.data != src.data)
1569 a = src;
1570 else
1571 src.copyTo(a);
1572
1573 ivx::Image
1574 ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
1575 ivx::Image::createAddressing(a.cols, a.rows, 1, (vx_int32)(a.step)), a.data),
1576 ib = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
1577 ivx::Image::createAddressing(dst.cols, dst.rows, 1, (vx_int32)(dst.step)), dst.data);
1578
1579 //ATTENTION: VX_CONTEXT_IMMEDIATE_BORDER attribute change could lead to strange issues in multi-threaded environments
1580 //since OpenVX standard says nothing about thread-safety for now
1581 ivx::border_t prevBorder = ctx.immediateBorder();
1582 ctx.setImmediateBorder(VX_BORDER_CONSTANT, (vx_uint8)(borderValue[0]));
1583
1584 ivx::Remap map = ivx::Remap::create(ctx, src.cols, src.rows, dst.cols, dst.rows);
1585 if (map1.empty()) map.setMappings(map2);
1586 else if (map2.empty()) map.setMappings(map1);
1587 else map.setMappings(map1, map2);
1588 ivx::IVX_CHECK_STATUS(vxuRemap(ctx, ia, map, inter_type, ib));
1589 #ifdef VX_VERSION_1_1
1590 ib.swapHandle();
1591 ia.swapHandle();
1592 #endif
1593
1594 ctx.setImmediateBorder(prevBorder);
1595 }
1596 catch (const ivx::RuntimeError & e)
1597 {
1598 CV_Error(CV_StsInternal, e.what());
1599 return false;
1600 }
1601 catch (const ivx::WrapperError & e)
1602 {
1603 CV_Error(CV_StsInternal, e.what());
1604 return false;
1605 }
1606 return true;
1607 }
1608 #endif
1609
1610 #if defined HAVE_IPP && !IPP_DISABLE_REMAP
1611
1612 typedef IppStatus (CV_STDCALL * ippiRemap)(const void * pSrc, IppiSize srcSize, int srcStep, IppiRect srcRoi,
1613 const Ipp32f* pxMap, int xMapStep, const Ipp32f* pyMap, int yMapStep,
1614 void * pDst, int dstStep, IppiSize dstRoiSize, int interpolation);
1615
1616 class IPPRemapInvoker :
1617 public ParallelLoopBody
1618 {
1619 public:
IPPRemapInvoker(Mat & _src,Mat & _dst,Mat & _xmap,Mat & _ymap,ippiRemap _ippFunc,int _ippInterpolation,int _borderType,const Scalar & _borderValue,bool * _ok)1620 IPPRemapInvoker(Mat & _src, Mat & _dst, Mat & _xmap, Mat & _ymap, ippiRemap _ippFunc,
1621 int _ippInterpolation, int _borderType, const Scalar & _borderValue, bool * _ok) :
1622 ParallelLoopBody(), src(_src), dst(_dst), map1(_xmap), map2(_ymap), ippFunc(_ippFunc),
1623 ippInterpolation(_ippInterpolation), borderType(_borderType), borderValue(_borderValue), ok(_ok)
1624 {
1625 *ok = true;
1626 }
1627
operator ()(const Range & range) const1628 virtual void operator() (const Range & range) const
1629 {
1630 IppiRect srcRoiRect = { 0, 0, src.cols, src.rows };
1631 Mat dstRoi = dst.rowRange(range);
1632 IppiSize dstRoiSize = ippiSize(dstRoi.size());
1633 int type = dst.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1634
1635 if (borderType == BORDER_CONSTANT &&
1636 !IPPSet(borderValue, dstRoi.ptr(), (int)dstRoi.step, dstRoiSize, cn, depth))
1637 {
1638 *ok = false;
1639 return;
1640 }
1641
1642 if (CV_INSTRUMENT_FUN_IPP(ippFunc, src.ptr(), ippiSize(src.size()), (int)src.step, srcRoiRect,
1643 map1.ptr<Ipp32f>(), (int)map1.step, map2.ptr<Ipp32f>(), (int)map2.step,
1644 dstRoi.ptr(), (int)dstRoi.step, dstRoiSize, ippInterpolation) < 0)
1645 *ok = false;
1646 else
1647 {
1648 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1649 }
1650 }
1651
1652 private:
1653 Mat & src, & dst, & map1, & map2;
1654 ippiRemap ippFunc;
1655 int ippInterpolation, borderType;
1656 Scalar borderValue;
1657 bool * ok;
1658 };
1659
1660 #endif
1661
1662 }
1663
remap(InputArray _src,OutputArray _dst,InputArray _map1,InputArray _map2,int interpolation,int borderType,const Scalar & borderValue)1664 void cv::remap( InputArray _src, OutputArray _dst,
1665 InputArray _map1, InputArray _map2,
1666 int interpolation, int borderType, const Scalar& borderValue )
1667 {
1668 CV_INSTRUMENT_REGION();
1669
1670 static RemapNNFunc nn_tab[] =
1671 {
1672 remapNearest<uchar>, remapNearest<schar>, remapNearest<ushort>, remapNearest<short>,
1673 remapNearest<int>, remapNearest<float>, remapNearest<double>, 0
1674 };
1675
1676 static RemapFunc linear_tab[] =
1677 {
1678 remapBilinear<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, RemapVec_8u, short>, 0,
1679 remapBilinear<Cast<float, ushort>, RemapNoVec, float>,
1680 remapBilinear<Cast<float, short>, RemapNoVec, float>, 0,
1681 remapBilinear<Cast<float, float>, RemapNoVec, float>,
1682 remapBilinear<Cast<double, double>, RemapNoVec, float>, 0
1683 };
1684
1685 static RemapFunc cubic_tab[] =
1686 {
1687 remapBicubic<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
1688 remapBicubic<Cast<float, ushort>, float, 1>,
1689 remapBicubic<Cast<float, short>, float, 1>, 0,
1690 remapBicubic<Cast<float, float>, float, 1>,
1691 remapBicubic<Cast<double, double>, float, 1>, 0
1692 };
1693
1694 static RemapFunc lanczos4_tab[] =
1695 {
1696 remapLanczos4<FixedPtCast<int, uchar, INTER_REMAP_COEF_BITS>, short, INTER_REMAP_COEF_SCALE>, 0,
1697 remapLanczos4<Cast<float, ushort>, float, 1>,
1698 remapLanczos4<Cast<float, short>, float, 1>, 0,
1699 remapLanczos4<Cast<float, float>, float, 1>,
1700 remapLanczos4<Cast<double, double>, float, 1>, 0
1701 };
1702
1703 CV_Assert( !_map1.empty() );
1704 CV_Assert( _map2.empty() || (_map2.size() == _map1.size()));
1705
1706 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
1707 ocl_remap(_src, _dst, _map1, _map2, interpolation, borderType, borderValue))
1708
1709 Mat src = _src.getMat(), map1 = _map1.getMat(), map2 = _map2.getMat();
1710 _dst.create( map1.size(), src.type() );
1711 Mat dst = _dst.getMat();
1712
1713
1714 CV_OVX_RUN(
1715 src.type() == CV_8UC1 && dst.type() == CV_8UC1 &&
1716 !ovx::skipSmallImages<VX_KERNEL_REMAP>(src.cols, src.rows) &&
1717 (borderType& ~BORDER_ISOLATED) == BORDER_CONSTANT &&
1718 ((map1.type() == CV_32FC2 && map2.empty() && map1.size == dst.size) ||
1719 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1 && map1.size == dst.size && map2.size == dst.size) ||
1720 (map1.empty() && map2.type() == CV_32FC2 && map2.size == dst.size)) &&
1721 ((borderType & BORDER_ISOLATED) != 0 || !src.isSubmatrix()),
1722 openvx_remap(src, dst, map1, map2, interpolation, borderValue));
1723
1724 CV_Assert( dst.cols < SHRT_MAX && dst.rows < SHRT_MAX && src.cols < SHRT_MAX && src.rows < SHRT_MAX );
1725
1726 if( dst.data == src.data )
1727 src = src.clone();
1728
1729 if( interpolation == INTER_AREA )
1730 interpolation = INTER_LINEAR;
1731
1732 int type = src.type(), depth = CV_MAT_DEPTH(type);
1733
1734 #if defined HAVE_IPP && !IPP_DISABLE_REMAP
1735 CV_IPP_CHECK()
1736 {
1737 if ((interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_NEAREST) &&
1738 map1.type() == CV_32FC1 && map2.type() == CV_32FC1 &&
1739 (borderType == BORDER_CONSTANT || borderType == BORDER_TRANSPARENT))
1740 {
1741 int ippInterpolation =
1742 interpolation == INTER_NEAREST ? IPPI_INTER_NN :
1743 interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : IPPI_INTER_CUBIC;
1744
1745 ippiRemap ippFunc =
1746 type == CV_8UC1 ? (ippiRemap)ippiRemap_8u_C1R :
1747 type == CV_8UC3 ? (ippiRemap)ippiRemap_8u_C3R :
1748 type == CV_8UC4 ? (ippiRemap)ippiRemap_8u_C4R :
1749 type == CV_16UC1 ? (ippiRemap)ippiRemap_16u_C1R :
1750 type == CV_16UC3 ? (ippiRemap)ippiRemap_16u_C3R :
1751 type == CV_16UC4 ? (ippiRemap)ippiRemap_16u_C4R :
1752 type == CV_32FC1 ? (ippiRemap)ippiRemap_32f_C1R :
1753 type == CV_32FC3 ? (ippiRemap)ippiRemap_32f_C3R :
1754 type == CV_32FC4 ? (ippiRemap)ippiRemap_32f_C4R : 0;
1755
1756 if (ippFunc)
1757 {
1758 bool ok;
1759 IPPRemapInvoker invoker(src, dst, map1, map2, ippFunc, ippInterpolation,
1760 borderType, borderValue, &ok);
1761 Range range(0, dst.rows);
1762 parallel_for_(range, invoker, dst.total() / (double)(1 << 16));
1763
1764 if (ok)
1765 {
1766 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1767 return;
1768 }
1769 setIppErrorStatus();
1770 }
1771 }
1772 }
1773 #endif
1774
1775 RemapNNFunc nnfunc = 0;
1776 RemapFunc ifunc = 0;
1777 const void* ctab = 0;
1778 bool fixpt = depth == CV_8U;
1779 bool planar_input = false;
1780
1781 if( interpolation == INTER_NEAREST )
1782 {
1783 nnfunc = nn_tab[depth];
1784 CV_Assert( nnfunc != 0 );
1785 }
1786 else
1787 {
1788 if( interpolation == INTER_LINEAR )
1789 ifunc = linear_tab[depth];
1790 else if( interpolation == INTER_CUBIC ){
1791 ifunc = cubic_tab[depth];
1792 CV_Assert( _src.channels() <= 4 );
1793 }
1794 else if( interpolation == INTER_LANCZOS4 ){
1795 ifunc = lanczos4_tab[depth];
1796 CV_Assert( _src.channels() <= 4 );
1797 }
1798 else
1799 CV_Error( CV_StsBadArg, "Unknown interpolation method" );
1800 CV_Assert( ifunc != 0 );
1801 ctab = initInterTab2D( interpolation, fixpt );
1802 }
1803
1804 const Mat *m1 = &map1, *m2 = &map2;
1805
1806 if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || map2.empty())) ||
1807 (map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || map1.empty())) )
1808 {
1809 if( map1.type() != CV_16SC2 )
1810 std::swap(m1, m2);
1811 }
1812 else
1813 {
1814 CV_Assert( ((map1.type() == CV_32FC2 || map1.type() == CV_16SC2) && map2.empty()) ||
1815 (map1.type() == CV_32FC1 && map2.type() == CV_32FC1) );
1816 planar_input = map1.channels() == 1;
1817 }
1818
1819 RemapInvoker invoker(src, dst, m1, m2,
1820 borderType, borderValue, planar_input, nnfunc, ifunc,
1821 ctab);
1822 parallel_for_(Range(0, dst.rows), invoker, dst.total()/(double)(1<<16));
1823 }
1824
1825
convertMaps(InputArray _map1,InputArray _map2,OutputArray _dstmap1,OutputArray _dstmap2,int dstm1type,bool nninterpolate)1826 void cv::convertMaps( InputArray _map1, InputArray _map2,
1827 OutputArray _dstmap1, OutputArray _dstmap2,
1828 int dstm1type, bool nninterpolate )
1829 {
1830 CV_INSTRUMENT_REGION();
1831
1832 Mat map1 = _map1.getMat(), map2 = _map2.getMat(), dstmap1, dstmap2;
1833 Size size = map1.size();
1834 const Mat *m1 = &map1, *m2 = &map2;
1835 int m1type = m1->type(), m2type = m2->type();
1836
1837 CV_Assert( (m1type == CV_16SC2 && (nninterpolate || m2type == CV_16UC1 || m2type == CV_16SC1)) ||
1838 (m2type == CV_16SC2 && (nninterpolate || m1type == CV_16UC1 || m1type == CV_16SC1)) ||
1839 (m1type == CV_32FC1 && m2type == CV_32FC1) ||
1840 (m1type == CV_32FC2 && m2->empty()) );
1841
1842 if( m2type == CV_16SC2 )
1843 {
1844 std::swap( m1, m2 );
1845 std::swap( m1type, m2type );
1846 }
1847
1848 if( dstm1type <= 0 )
1849 dstm1type = m1type == CV_16SC2 ? CV_32FC2 : CV_16SC2;
1850 CV_Assert( dstm1type == CV_16SC2 || dstm1type == CV_32FC1 || dstm1type == CV_32FC2 );
1851 _dstmap1.create( size, dstm1type );
1852 dstmap1 = _dstmap1.getMat();
1853
1854 if( !nninterpolate && dstm1type != CV_32FC2 )
1855 {
1856 _dstmap2.create( size, dstm1type == CV_16SC2 ? CV_16UC1 : CV_32FC1 );
1857 dstmap2 = _dstmap2.getMat();
1858 }
1859 else
1860 _dstmap2.release();
1861
1862 if( m1type == dstm1type || (nninterpolate &&
1863 ((m1type == CV_16SC2 && dstm1type == CV_32FC2) ||
1864 (m1type == CV_32FC2 && dstm1type == CV_16SC2))) )
1865 {
1866 m1->convertTo( dstmap1, dstmap1.type() );
1867 if( !dstmap2.empty() && dstmap2.type() == m2->type() )
1868 m2->copyTo( dstmap2 );
1869 return;
1870 }
1871
1872 if( m1type == CV_32FC1 && dstm1type == CV_32FC2 )
1873 {
1874 Mat vdata[] = { *m1, *m2 };
1875 merge( vdata, 2, dstmap1 );
1876 return;
1877 }
1878
1879 if( m1type == CV_32FC2 && dstm1type == CV_32FC1 )
1880 {
1881 Mat mv[] = { dstmap1, dstmap2 };
1882 split( *m1, mv );
1883 return;
1884 }
1885
1886 if( m1->isContinuous() && (m2->empty() || m2->isContinuous()) &&
1887 dstmap1.isContinuous() && (dstmap2.empty() || dstmap2.isContinuous()) )
1888 {
1889 size.width *= size.height;
1890 size.height = 1;
1891 }
1892
1893 #if CV_TRY_SSE4_1
1894 bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
1895 #endif
1896
1897 const float scale = 1.f/INTER_TAB_SIZE;
1898 int x, y;
1899 for( y = 0; y < size.height; y++ )
1900 {
1901 const float* src1f = m1->ptr<float>(y);
1902 const float* src2f = m2->ptr<float>(y);
1903 const short* src1 = (const short*)src1f;
1904 const ushort* src2 = (const ushort*)src2f;
1905
1906 float* dst1f = dstmap1.ptr<float>(y);
1907 float* dst2f = dstmap2.ptr<float>(y);
1908 short* dst1 = (short*)dst1f;
1909 ushort* dst2 = (ushort*)dst2f;
1910 x = 0;
1911
1912 if( m1type == CV_32FC1 && dstm1type == CV_16SC2 )
1913 {
1914 if( nninterpolate )
1915 {
1916 #if CV_TRY_SSE4_1
1917 if (useSSE4_1)
1918 opt_SSE4_1::convertMaps_nninterpolate32f1c16s_SSE41(src1f, src2f, dst1, size.width);
1919 else
1920 #endif
1921 {
1922 #if CV_SIMD128
1923 {
1924 int span = v_int16x8::nlanes;
1925 for( ; x <= size.width - span; x += span )
1926 {
1927 v_int16x8 v_dst[2];
1928 #define CV_PACK_MAP(X) v_pack(v_round(v_load(X)), v_round(v_load((X)+4)))
1929 v_dst[0] = CV_PACK_MAP(src1f + x);
1930 v_dst[1] = CV_PACK_MAP(src2f + x);
1931 #undef CV_PACK_MAP
1932 v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
1933 }
1934 }
1935 #endif
1936 for( ; x < size.width; x++ )
1937 {
1938 dst1[x*2] = saturate_cast<short>(src1f[x]);
1939 dst1[x*2+1] = saturate_cast<short>(src2f[x]);
1940 }
1941 }
1942 }
1943 else
1944 {
1945 #if CV_TRY_SSE4_1
1946 if (useSSE4_1)
1947 opt_SSE4_1::convertMaps_32f1c16s_SSE41(src1f, src2f, dst1, dst2, size.width);
1948 else
1949 #endif
1950 {
1951 #if CV_SIMD128
1952 {
1953 v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
1954 v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
1955 v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
1956 int span = v_float32x4::nlanes;
1957 for( ; x <= size.width - span * 2; x += span * 2 )
1958 {
1959 v_int32x4 v_ix0 = v_round(v_scale * (v_load(src1f + x)));
1960 v_int32x4 v_ix1 = v_round(v_scale * (v_load(src1f + x + span)));
1961 v_int32x4 v_iy0 = v_round(v_scale * (v_load(src2f + x)));
1962 v_int32x4 v_iy1 = v_round(v_scale * (v_load(src2f + x + span)));
1963
1964 v_int16x8 v_dst[2];
1965 v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
1966 v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
1967 v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
1968
1969 v_int32x4 v_dst0 = v_muladd(v_scale3, (v_iy0 & v_mask), (v_ix0 & v_mask));
1970 v_int32x4 v_dst1 = v_muladd(v_scale3, (v_iy1 & v_mask), (v_ix1 & v_mask));
1971 v_store(dst2 + x, v_pack_u(v_dst0, v_dst1));
1972 }
1973 }
1974 #endif
1975 for( ; x < size.width; x++ )
1976 {
1977 int ix = saturate_cast<int>(src1f[x]*INTER_TAB_SIZE);
1978 int iy = saturate_cast<int>(src2f[x]*INTER_TAB_SIZE);
1979 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
1980 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
1981 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
1982 }
1983 }
1984 }
1985 }
1986 else if( m1type == CV_32FC2 && dstm1type == CV_16SC2 )
1987 {
1988 if( nninterpolate )
1989 {
1990 #if CV_SIMD128
1991 int span = v_float32x4::nlanes;
1992 {
1993 for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
1994 v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
1995 v_round(v_load(src1f + x + span))));
1996 }
1997 #endif
1998 for( ; x < size.width; x++ )
1999 {
2000 dst1[x*2] = saturate_cast<short>(src1f[x*2]);
2001 dst1[x*2+1] = saturate_cast<short>(src1f[x*2+1]);
2002 }
2003 }
2004 else
2005 {
2006 #if CV_TRY_SSE4_1
2007 if( useSSE4_1 )
2008 opt_SSE4_1::convertMaps_32f2c16s_SSE41(src1f, dst1, dst2, size.width);
2009 else
2010 #endif
2011 {
2012 #if CV_SIMD128
2013 {
2014 v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
2015 v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
2016 v_int32x4 v_scale3 = v_setall_s32(INTER_TAB_SIZE);
2017 int span = v_uint16x8::nlanes;
2018 for (; x <= size.width - span; x += span )
2019 {
2020 v_float32x4 v_src0[2], v_src1[2];
2021 v_load_deinterleave(src1f + (x << 1), v_src0[0], v_src0[1]);
2022 v_load_deinterleave(src1f + (x << 1) + span, v_src1[0], v_src1[1]);
2023 v_int32x4 v_ix0 = v_round(v_src0[0] * v_scale);
2024 v_int32x4 v_ix1 = v_round(v_src1[0] * v_scale);
2025 v_int32x4 v_iy0 = v_round(v_src0[1] * v_scale);
2026 v_int32x4 v_iy1 = v_round(v_src1[1] * v_scale);
2027
2028 v_int16x8 v_dst[2];
2029 v_dst[0] = v_pack(v_shr<INTER_BITS>(v_ix0), v_shr<INTER_BITS>(v_ix1));
2030 v_dst[1] = v_pack(v_shr<INTER_BITS>(v_iy0), v_shr<INTER_BITS>(v_iy1));
2031 v_store_interleave(dst1 + (x << 1), v_dst[0], v_dst[1]);
2032
2033 v_store(dst2 + x, v_pack_u(
2034 v_muladd(v_scale3, (v_iy0 & v_mask), (v_ix0 & v_mask)),
2035 v_muladd(v_scale3, (v_iy1 & v_mask), (v_ix1 & v_mask))));
2036 }
2037 }
2038 #endif
2039 for( ; x < size.width; x++ )
2040 {
2041 int ix = saturate_cast<int>(src1f[x*2]*INTER_TAB_SIZE);
2042 int iy = saturate_cast<int>(src1f[x*2+1]*INTER_TAB_SIZE);
2043 dst1[x*2] = saturate_cast<short>(ix >> INTER_BITS);
2044 dst1[x*2+1] = saturate_cast<short>(iy >> INTER_BITS);
2045 dst2[x] = (ushort)((iy & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (ix & (INTER_TAB_SIZE-1)));
2046 }
2047 }
2048 }
2049 }
2050 else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
2051 {
2052 #if CV_SIMD128
2053 {
2054 v_uint16x8 v_mask2 = v_setall_u16(INTER_TAB_SIZE2-1);
2055 v_uint32x4 v_zero = v_setzero_u32(), v_mask = v_setall_u32(INTER_TAB_SIZE-1);
2056 v_float32x4 v_scale = v_setall_f32(scale);
2057 int span = v_float32x4::nlanes;
2058 for( ; x <= size.width - span * 2; x += span * 2 )
2059 {
2060 v_uint32x4 v_fxy1, v_fxy2;
2061 if ( src2 )
2062 {
2063 v_uint16x8 v_src2 = v_load(src2 + x) & v_mask2;
2064 v_expand(v_src2, v_fxy1, v_fxy2);
2065 }
2066 else
2067 v_fxy1 = v_fxy2 = v_zero;
2068
2069 v_int16x8 v_src[2];
2070 v_int32x4 v_src0[2], v_src1[2];
2071 v_load_deinterleave(src1 + (x << 1), v_src[0], v_src[1]);
2072 v_expand(v_src[0], v_src0[0], v_src0[1]);
2073 v_expand(v_src[1], v_src1[0], v_src1[1]);
2074 #define CV_COMPUTE_MAP_X(X, FXY) v_muladd(v_scale, v_cvt_f32(v_reinterpret_as_s32((FXY) & v_mask)),\
2075 v_cvt_f32(v_reinterpret_as_s32(X)))
2076 #define CV_COMPUTE_MAP_Y(Y, FXY) v_muladd(v_scale, v_cvt_f32(v_reinterpret_as_s32((FXY) >> INTER_BITS)),\
2077 v_cvt_f32(v_reinterpret_as_s32(Y)))
2078 v_float32x4 v_dst1 = CV_COMPUTE_MAP_X(v_src0[0], v_fxy1);
2079 v_float32x4 v_dst2 = CV_COMPUTE_MAP_Y(v_src1[0], v_fxy1);
2080 v_store(dst1f + x, v_dst1);
2081 v_store(dst2f + x, v_dst2);
2082
2083 v_dst1 = CV_COMPUTE_MAP_X(v_src0[1], v_fxy2);
2084 v_dst2 = CV_COMPUTE_MAP_Y(v_src1[1], v_fxy2);
2085 v_store(dst1f + x + span, v_dst1);
2086 v_store(dst2f + x + span, v_dst2);
2087 #undef CV_COMPUTE_MAP_X
2088 #undef CV_COMPUTE_MAP_Y
2089 }
2090 }
2091 #endif
2092 for( ; x < size.width; x++ )
2093 {
2094 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
2095 dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2096 dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2097 }
2098 }
2099 else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
2100 {
2101 #if CV_SIMD128
2102 {
2103 v_int16x8 v_mask2 = v_setall_s16(INTER_TAB_SIZE2-1);
2104 v_int32x4 v_zero = v_setzero_s32(), v_mask = v_setall_s32(INTER_TAB_SIZE-1);
2105 v_float32x4 v_scale = v_setall_f32(scale);
2106 int span = v_int16x8::nlanes;
2107 for( ; x <= size.width - span; x += span )
2108 {
2109 v_int32x4 v_fxy1, v_fxy2;
2110 if (src2)
2111 {
2112 v_int16x8 v_src2 = v_load((short *)src2 + x) & v_mask2;
2113 v_expand(v_src2, v_fxy1, v_fxy2);
2114 }
2115 else
2116 v_fxy1 = v_fxy2 = v_zero;
2117
2118 v_int16x8 v_src[2];
2119 v_int32x4 v_src0[2], v_src1[2];
2120 v_float32x4 v_dst[2];
2121 v_load_deinterleave(src1 + (x << 1), v_src[0], v_src[1]);
2122 v_expand(v_src[0], v_src0[0], v_src0[1]);
2123 v_expand(v_src[1], v_src1[0], v_src1[1]);
2124
2125 #define CV_COMPUTE_MAP_X(X, FXY) v_muladd(v_scale, v_cvt_f32((FXY) & v_mask), v_cvt_f32(X))
2126 #define CV_COMPUTE_MAP_Y(Y, FXY) v_muladd(v_scale, v_cvt_f32((FXY) >> INTER_BITS), v_cvt_f32(Y))
2127 v_dst[0] = CV_COMPUTE_MAP_X(v_src0[0], v_fxy1);
2128 v_dst[1] = CV_COMPUTE_MAP_Y(v_src1[0], v_fxy1);
2129 v_store_interleave(dst1f + (x << 1), v_dst[0], v_dst[1]);
2130
2131 v_dst[0] = CV_COMPUTE_MAP_X(v_src0[1], v_fxy2);
2132 v_dst[1] = CV_COMPUTE_MAP_Y(v_src1[1], v_fxy2);
2133 v_store_interleave(dst1f + (x << 1) + span, v_dst[0], v_dst[1]);
2134 #undef CV_COMPUTE_MAP_X
2135 #undef CV_COMPUTE_MAP_Y
2136 }
2137 }
2138 #endif
2139 for( ; x < size.width; x++ )
2140 {
2141 int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
2142 dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
2143 dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
2144 }
2145 }
2146 else
2147 CV_Error( CV_StsNotImplemented, "Unsupported combination of input/output matrices" );
2148 }
2149 }
2150
2151
2152 namespace cv
2153 {
2154
2155 class WarpAffineInvoker :
2156 public ParallelLoopBody
2157 {
2158 public:
WarpAffineInvoker(const Mat & _src,Mat & _dst,int _interpolation,int _borderType,const Scalar & _borderValue,int * _adelta,int * _bdelta,const double * _M)2159 WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType,
2160 const Scalar &_borderValue, int *_adelta, int *_bdelta, const double *_M) :
2161 ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation),
2162 borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta),
2163 M(_M)
2164 {
2165 }
2166
operator ()(const Range & range) const2167 virtual void operator() (const Range& range) const CV_OVERRIDE
2168 {
2169 const int BLOCK_SZ = 64;
2170 AutoBuffer<short, 0> __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ);
2171 short *XY = __XY.data(), *A = __A.data();
2172 const int AB_BITS = MAX(10, (int)INTER_BITS);
2173 const int AB_SCALE = 1 << AB_BITS;
2174 int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
2175 #if CV_TRY_AVX2
2176 bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
2177 #endif
2178 #if CV_TRY_SSE4_1
2179 bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
2180 #endif
2181
2182 int bh0 = std::min(BLOCK_SZ/2, dst.rows);
2183 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols);
2184 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, dst.rows);
2185
2186 for( y = range.start; y < range.end; y += bh0 )
2187 {
2188 for( x = 0; x < dst.cols; x += bw0 )
2189 {
2190 int bw = std::min( bw0, dst.cols - x);
2191 int bh = std::min( bh0, range.end - y);
2192
2193 Mat _XY(bh, bw, CV_16SC2, XY);
2194 Mat dpart(dst, Rect(x, y, bw, bh));
2195
2196 for( y1 = 0; y1 < bh; y1++ )
2197 {
2198 short* xy = XY + y1*bw*2;
2199 int X0 = saturate_cast<int>((M[1]*(y + y1) + M[2])*AB_SCALE) + round_delta;
2200 int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
2201
2202 if( interpolation == INTER_NEAREST )
2203 {
2204 x1 = 0;
2205 #if CV_TRY_SSE4_1
2206 if( useSSE4_1 )
2207 opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta + x, bdelta + x, xy, X0, Y0, bw);
2208 else
2209 #endif
2210 {
2211 #if CV_SIMD128
2212 {
2213 v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
2214 int span = v_uint16x8::nlanes;
2215 for( ; x1 <= bw - span; x1 += span )
2216 {
2217 v_int16x8 v_dst[2];
2218 #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(shift+v_load(ptr + offset)),\
2219 v_shr<AB_BITS>(shift+v_load(ptr + offset + 4)))
2220 v_dst[0] = CV_CONVERT_MAP(adelta, x+x1, v_X0);
2221 v_dst[1] = CV_CONVERT_MAP(bdelta, x+x1, v_Y0);
2222 #undef CV_CONVERT_MAP
2223 v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]);
2224 }
2225 }
2226 #endif
2227 for( ; x1 < bw; x1++ )
2228 {
2229 int X = (X0 + adelta[x+x1]) >> AB_BITS;
2230 int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
2231 xy[x1*2] = saturate_cast<short>(X);
2232 xy[x1*2+1] = saturate_cast<short>(Y);
2233 }
2234 }
2235 }
2236 else
2237 {
2238 short* alpha = A + y1*bw;
2239 x1 = 0;
2240 #if CV_TRY_AVX2
2241 if ( useAVX2 )
2242 x1 = opt_AVX2::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw);
2243 #endif
2244 #if CV_SIMD128
2245 {
2246 v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0);
2247 v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
2248 int span = v_float32x4::nlanes;
2249 for( ; x1 <= bw - span * 2; x1 += span * 2 )
2250 {
2251 v_int32x4 v_X0 = v_shr<AB_BITS - INTER_BITS>(v__X0 + v_load(adelta + x + x1));
2252 v_int32x4 v_Y0 = v_shr<AB_BITS - INTER_BITS>(v__Y0 + v_load(bdelta + x + x1));
2253 v_int32x4 v_X1 = v_shr<AB_BITS - INTER_BITS>(v__X0 + v_load(adelta + x + x1 + span));
2254 v_int32x4 v_Y1 = v_shr<AB_BITS - INTER_BITS>(v__Y0 + v_load(bdelta + x + x1 + span));
2255
2256 v_int16x8 v_xy[2];
2257 v_xy[0] = v_pack(v_shr<INTER_BITS>(v_X0), v_shr<INTER_BITS>(v_X1));
2258 v_xy[1] = v_pack(v_shr<INTER_BITS>(v_Y0), v_shr<INTER_BITS>(v_Y1));
2259 v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]);
2260
2261 v_int32x4 v_alpha0 = v_shl<INTER_BITS>(v_Y0 & v_mask) | (v_X0 & v_mask);
2262 v_int32x4 v_alpha1 = v_shl<INTER_BITS>(v_Y1 & v_mask) | (v_X1 & v_mask);
2263 v_store(alpha + x1, v_pack(v_alpha0, v_alpha1));
2264 }
2265 }
2266 #endif
2267 for( ; x1 < bw; x1++ )
2268 {
2269 int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
2270 int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
2271 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
2272 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
2273 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2274 (X & (INTER_TAB_SIZE-1)));
2275 }
2276 }
2277 }
2278
2279 if( interpolation == INTER_NEAREST )
2280 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
2281 else
2282 {
2283 Mat _matA(bh, bw, CV_16U, A);
2284 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
2285 }
2286 }
2287 }
2288 }
2289
2290 private:
2291 Mat src;
2292 Mat dst;
2293 int interpolation, borderType;
2294 Scalar borderValue;
2295 int *adelta, *bdelta;
2296 const double *M;
2297 };
2298
2299
2300 #if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPAFFINE
2301 typedef IppStatus (CV_STDCALL* ippiWarpAffineBackFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [2][3], int);
2302
2303 class IPPWarpAffineInvoker :
2304 public ParallelLoopBody
2305 {
2306 public:
IPPWarpAffineInvoker(Mat & _src,Mat & _dst,double (& _coeffs)[2][3],int & _interpolation,int _borderType,const Scalar & _borderValue,ippiWarpAffineBackFunc _func,bool * _ok)2307 IPPWarpAffineInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[2][3], int &_interpolation, int _borderType,
2308 const Scalar &_borderValue, ippiWarpAffineBackFunc _func, bool *_ok) :
2309 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
2310 borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
2311 {
2312 *ok = true;
2313 }
2314
operator ()(const Range & range) const2315 virtual void operator() (const Range& range) const CV_OVERRIDE
2316 {
2317 IppiSize srcsize = { src.cols, src.rows };
2318 IppiRect srcroi = { 0, 0, src.cols, src.rows };
2319 IppiRect dstroi = { 0, range.start, dst.cols, range.end - range.start };
2320 int cnn = src.channels();
2321 if( borderType == BORDER_CONSTANT )
2322 {
2323 IppiSize setSize = { dst.cols, range.end - range.start };
2324 void *dataPointer = dst.ptr(range.start);
2325 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
2326 {
2327 *ok = false;
2328 return;
2329 }
2330 }
2331
2332 // Aug 2013: problem in IPP 7.1, 8.0 : sometimes function return ippStsCoeffErr
2333 IppStatus status = CV_INSTRUMENT_FUN_IPP(func,( src.ptr(), srcsize, (int)src.step[0], srcroi, dst.ptr(),
2334 (int)dst.step[0], dstroi, coeffs, mode ));
2335 if( status < 0)
2336 *ok = false;
2337 else
2338 {
2339 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2340 }
2341 }
2342 private:
2343 Mat &src;
2344 Mat &dst;
2345 int mode;
2346 double (&coeffs)[2][3];
2347 int borderType;
2348 Scalar borderValue;
2349 ippiWarpAffineBackFunc func;
2350 bool *ok;
2351 const IPPWarpAffineInvoker& operator= (const IPPWarpAffineInvoker&);
2352 };
2353 #endif
2354
2355 #ifdef HAVE_OPENCL
2356
2357 enum { OCL_OP_PERSPECTIVE = 1, OCL_OP_AFFINE = 0 };
2358
ocl_warpTransform_cols4(InputArray _src,OutputArray _dst,InputArray _M0,Size dsize,int flags,int borderType,const Scalar & borderValue,int op_type)2359 static bool ocl_warpTransform_cols4(InputArray _src, OutputArray _dst, InputArray _M0,
2360 Size dsize, int flags, int borderType, const Scalar& borderValue,
2361 int op_type)
2362 {
2363 CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
2364 const ocl::Device & dev = ocl::Device::getDefault();
2365 int type = _src.type(), dtype = _dst.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2366
2367 int interpolation = flags & INTER_MAX;
2368 if( interpolation == INTER_AREA )
2369 interpolation = INTER_LINEAR;
2370
2371 if ( !dev.isIntel() || !(type == CV_8UC1) ||
2372 !(dtype == CV_8UC1) || !(_dst.cols() % 4 == 0) ||
2373 !(borderType == cv::BORDER_CONSTANT &&
2374 (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)))
2375 return false;
2376
2377 const char * const warp_op[2] = { "Affine", "Perspective" };
2378 const char * const interpolationMap[3] = { "nearest", "linear", "cubic" };
2379 ocl::ProgramSource program = ocl::imgproc::warp_transform_oclsrc;
2380 String kernelName = format("warp%s_%s_8u", warp_op[op_type], interpolationMap[interpolation]);
2381
2382 bool is32f = (interpolation == INTER_CUBIC || interpolation == INTER_LINEAR) && op_type == OCL_OP_AFFINE;
2383 int wdepth = interpolation == INTER_NEAREST ? depth : std::max(is32f ? CV_32F : CV_32S, depth);
2384 int sctype = CV_MAKETYPE(wdepth, cn);
2385
2386 ocl::Kernel k;
2387 String opts = format("-D ST=%s", ocl::typeToStr(sctype));
2388
2389 k.create(kernelName.c_str(), program, opts);
2390 if (k.empty())
2391 return false;
2392
2393 float borderBuf[] = { 0, 0, 0, 0 };
2394 scalarToRawData(borderValue, borderBuf, sctype);
2395
2396 UMat src = _src.getUMat(), M0;
2397 _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
2398 UMat dst = _dst.getUMat();
2399
2400 float M[9] = {0};
2401 int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
2402 Mat matM(matRows, 3, CV_32F, M), M1 = _M0.getMat();
2403 CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) && M1.rows == matRows && M1.cols == 3 );
2404 M1.convertTo(matM, matM.type());
2405
2406 if( !(flags & WARP_INVERSE_MAP) )
2407 {
2408 if (op_type == OCL_OP_PERSPECTIVE)
2409 invert(matM, matM);
2410 else
2411 {
2412 float D = M[0]*M[4] - M[1]*M[3];
2413 D = D != 0 ? 1.f/D : 0;
2414 float A11 = M[4]*D, A22=M[0]*D;
2415 M[0] = A11; M[1] *= -D;
2416 M[3] *= -D; M[4] = A22;
2417 float b1 = -M[0]*M[2] - M[1]*M[5];
2418 float b2 = -M[3]*M[2] - M[4]*M[5];
2419 M[2] = b1; M[5] = b2;
2420 }
2421 }
2422 matM.convertTo(M0, CV_32F);
2423
2424 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(M0),
2425 ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
2426
2427 size_t globalThreads[2];
2428 globalThreads[0] = (size_t)(dst.cols / 4);
2429 globalThreads[1] = (size_t)dst.rows;
2430
2431 return k.run(2, globalThreads, NULL, false);
2432 }
2433
ocl_warpTransform(InputArray _src,OutputArray _dst,InputArray _M0,Size dsize,int flags,int borderType,const Scalar & borderValue,int op_type)2434 static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0,
2435 Size dsize, int flags, int borderType, const Scalar& borderValue,
2436 int op_type)
2437 {
2438 CV_Assert(op_type == OCL_OP_AFFINE || op_type == OCL_OP_PERSPECTIVE);
2439 const ocl::Device & dev = ocl::Device::getDefault();
2440
2441 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2442 const bool doubleSupport = dev.doubleFPConfig() > 0;
2443
2444 int interpolation = flags & INTER_MAX;
2445 if( interpolation == INTER_AREA )
2446 interpolation = INTER_LINEAR;
2447 int rowsPerWI = dev.isIntel() && op_type == OCL_OP_AFFINE && interpolation <= INTER_LINEAR ? 4 : 1;
2448
2449 if ( !(borderType == cv::BORDER_CONSTANT &&
2450 (interpolation == cv::INTER_NEAREST || interpolation == cv::INTER_LINEAR || interpolation == cv::INTER_CUBIC)) ||
2451 (!doubleSupport && depth == CV_64F) || cn > 4)
2452 return false;
2453
2454 bool useDouble = depth == CV_64F;
2455
2456 const char * const interpolationMap[3] = { "NEAREST", "LINEAR", "CUBIC" };
2457 ocl::ProgramSource program = op_type == OCL_OP_AFFINE ?
2458 ocl::imgproc::warp_affine_oclsrc : ocl::imgproc::warp_perspective_oclsrc;
2459 const char * const kernelName = op_type == OCL_OP_AFFINE ? "warpAffine" : "warpPerspective";
2460
2461 int scalarcn = cn == 3 ? 4 : cn;
2462 bool is32f = !dev.isAMD() && (interpolation == INTER_CUBIC || interpolation == INTER_LINEAR) && op_type == OCL_OP_AFFINE;
2463 int wdepth = interpolation == INTER_NEAREST ? depth : std::max(is32f ? CV_32F : CV_32S, depth);
2464 int sctype = CV_MAKETYPE(wdepth, scalarcn);
2465
2466 ocl::Kernel k;
2467 String opts;
2468 if (interpolation == INTER_NEAREST)
2469 {
2470 opts = format("-D INTER_NEAREST -D T=%s%s -D CT=%s -D T1=%s -D ST=%s -D cn=%d -D rowsPerWI=%d",
2471 ocl::typeToStr(type),
2472 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2473 useDouble ? "double" : "float",
2474 ocl::typeToStr(CV_MAT_DEPTH(type)),
2475 ocl::typeToStr(sctype), cn, rowsPerWI);
2476 }
2477 else
2478 {
2479 char cvt[2][50];
2480 opts = format("-D INTER_%s -D T=%s -D T1=%s -D ST=%s -D WT=%s -D depth=%d"
2481 " -D convertToWT=%s -D convertToT=%s%s -D CT=%s -D cn=%d -D rowsPerWI=%d",
2482 interpolationMap[interpolation], ocl::typeToStr(type),
2483 ocl::typeToStr(CV_MAT_DEPTH(type)),
2484 ocl::typeToStr(sctype),
2485 ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)), depth,
2486 ocl::convertTypeStr(depth, wdepth, cn, cvt[0]),
2487 ocl::convertTypeStr(wdepth, depth, cn, cvt[1]),
2488 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2489 useDouble ? "double" : "float",
2490 cn, rowsPerWI);
2491 }
2492
2493 k.create(kernelName, program, opts);
2494 if (k.empty())
2495 return false;
2496
2497 double borderBuf[] = { 0, 0, 0, 0 };
2498 scalarToRawData(borderValue, borderBuf, sctype);
2499
2500 UMat src = _src.getUMat(), M0;
2501 _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
2502 UMat dst = _dst.getUMat();
2503
2504 double M[9] = {0};
2505 int matRows = (op_type == OCL_OP_AFFINE ? 2 : 3);
2506 Mat matM(matRows, 3, CV_64F, M), M1 = _M0.getMat();
2507 CV_Assert( (M1.type() == CV_32F || M1.type() == CV_64F) &&
2508 M1.rows == matRows && M1.cols == 3 );
2509 M1.convertTo(matM, matM.type());
2510
2511 if( !(flags & WARP_INVERSE_MAP) )
2512 {
2513 if (op_type == OCL_OP_PERSPECTIVE)
2514 invert(matM, matM);
2515 else
2516 {
2517 double D = M[0]*M[4] - M[1]*M[3];
2518 D = D != 0 ? 1./D : 0;
2519 double A11 = M[4]*D, A22=M[0]*D;
2520 M[0] = A11; M[1] *= -D;
2521 M[3] *= -D; M[4] = A22;
2522 double b1 = -M[0]*M[2] - M[1]*M[5];
2523 double b2 = -M[3]*M[2] - M[4]*M[5];
2524 M[2] = b1; M[5] = b2;
2525 }
2526 }
2527 matM.convertTo(M0, useDouble ? CV_64F : CV_32F);
2528
2529 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(M0),
2530 ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, borderBuf, CV_ELEM_SIZE(sctype)));
2531
2532 size_t globalThreads[2] = { (size_t)dst.cols, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
2533 return k.run(2, globalThreads, NULL, false);
2534 }
2535
2536 #endif
2537
2538 namespace hal {
2539
warpAffine(int src_type,const uchar * src_data,size_t src_step,int src_width,int src_height,uchar * dst_data,size_t dst_step,int dst_width,int dst_height,const double M[6],int interpolation,int borderType,const double borderValue[4])2540 void warpAffine(int src_type,
2541 const uchar * src_data, size_t src_step, int src_width, int src_height,
2542 uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
2543 const double M[6], int interpolation, int borderType, const double borderValue[4])
2544 {
2545 CALL_HAL(warpAffine, cv_hal_warpAffine, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
2546
2547 Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
2548 Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
2549
2550 int x;
2551 AutoBuffer<int> _abdelta(dst.cols*2);
2552 int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
2553 const int AB_BITS = MAX(10, (int)INTER_BITS);
2554 const int AB_SCALE = 1 << AB_BITS;
2555
2556 for( x = 0; x < dst.cols; x++ )
2557 {
2558 adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
2559 bdelta[x] = saturate_cast<int>(M[3]*x*AB_SCALE);
2560 }
2561
2562 Range range(0, dst.rows);
2563 WarpAffineInvoker invoker(src, dst, interpolation, borderType,
2564 Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]),
2565 adelta, bdelta, M);
2566 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2567 }
2568
2569 } // hal::
2570 } // cv::
2571
2572
warpAffine(InputArray _src,OutputArray _dst,InputArray _M0,Size dsize,int flags,int borderType,const Scalar & borderValue)2573 void cv::warpAffine( InputArray _src, OutputArray _dst,
2574 InputArray _M0, Size dsize,
2575 int flags, int borderType, const Scalar& borderValue )
2576 {
2577 CV_INSTRUMENT_REGION();
2578
2579 int interpolation = flags & INTER_MAX;
2580 CV_Assert( _src.channels() <= 4 || (interpolation != INTER_LANCZOS4 &&
2581 interpolation != INTER_CUBIC) );
2582
2583 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
2584 _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
2585 ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType,
2586 borderValue, OCL_OP_AFFINE))
2587
2588 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
2589 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType,
2590 borderValue, OCL_OP_AFFINE))
2591
2592 Mat src = _src.getMat(), M0 = _M0.getMat();
2593 _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
2594 Mat dst = _dst.getMat();
2595 CV_Assert( src.cols > 0 && src.rows > 0 );
2596 if( dst.data == src.data )
2597 src = src.clone();
2598
2599 double M[6] = {0};
2600 Mat matM(2, 3, CV_64F, M);
2601 if( interpolation == INTER_AREA )
2602 interpolation = INTER_LINEAR;
2603
2604 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 );
2605 M0.convertTo(matM, matM.type());
2606
2607 if( !(flags & WARP_INVERSE_MAP) )
2608 {
2609 double D = M[0]*M[4] - M[1]*M[3];
2610 D = D != 0 ? 1./D : 0;
2611 double A11 = M[4]*D, A22=M[0]*D;
2612 M[0] = A11; M[1] *= -D;
2613 M[3] *= -D; M[4] = A22;
2614 double b1 = -M[0]*M[2] - M[1]*M[5];
2615 double b2 = -M[3]*M[2] - M[4]*M[5];
2616 M[2] = b1; M[5] = b2;
2617 }
2618
2619 #if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPAFFINE
2620 CV_IPP_CHECK()
2621 {
2622 int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2623 if( ( depth == CV_8U || depth == CV_16U || depth == CV_32F ) &&
2624 ( cn == 1 || cn == 3 || cn == 4 ) &&
2625 ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) &&
2626 ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT) )
2627 {
2628 ippiWarpAffineBackFunc ippFunc = 0;
2629 if ((flags & WARP_INVERSE_MAP) != 0)
2630 {
2631 ippFunc =
2632 type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C1R :
2633 type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C3R :
2634 type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_8u_C4R :
2635 type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C1R :
2636 type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C3R :
2637 type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_16u_C4R :
2638 type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C1R :
2639 type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C3R :
2640 type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffineBack_32f_C4R :
2641 0;
2642 }
2643 else
2644 {
2645 ippFunc =
2646 type == CV_8UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C1R :
2647 type == CV_8UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C3R :
2648 type == CV_8UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_8u_C4R :
2649 type == CV_16UC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C1R :
2650 type == CV_16UC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C3R :
2651 type == CV_16UC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_16u_C4R :
2652 type == CV_32FC1 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C1R :
2653 type == CV_32FC3 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C3R :
2654 type == CV_32FC4 ? (ippiWarpAffineBackFunc)ippiWarpAffine_32f_C4R :
2655 0;
2656 }
2657 int mode =
2658 interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
2659 interpolation == INTER_NEAREST ? IPPI_INTER_NN :
2660 interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC :
2661 0;
2662 CV_Assert(mode && ippFunc);
2663
2664 double coeffs[2][3];
2665 for( int i = 0; i < 2; i++ )
2666 for( int j = 0; j < 3; j++ )
2667 coeffs[i][j] = matM.at<double>(i, j);
2668
2669 bool ok;
2670 Range range(0, dst.rows);
2671 IPPWarpAffineInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
2672 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
2673 if( ok )
2674 {
2675 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2676 return;
2677 }
2678 setIppErrorStatus();
2679 }
2680 }
2681 #endif
2682
2683 hal::warpAffine(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
2684 M, interpolation, borderType, borderValue.val);
2685 }
2686
2687
2688 namespace cv
2689 {
2690 #if CV_SIMD128_64F
WarpPerspectiveLine_ProcessNN_CV_SIMD(const double * M,short * xy,double X0,double Y0,double W0,int bw)2691 void WarpPerspectiveLine_ProcessNN_CV_SIMD(const double *M, short* xy, double X0, double Y0, double W0, int bw)
2692 {
2693 const v_float64x2 v_M0 = v_setall_f64(M[0]);
2694 const v_float64x2 v_M3 = v_setall_f64(M[3]);
2695 const v_float64x2 v_M6 = v_setall_f64(M[6]);
2696 const v_float64x2 v_intmax = v_setall_f64((double)INT_MAX);
2697 const v_float64x2 v_intmin = v_setall_f64((double)INT_MIN);
2698 const v_float64x2 v_2 = v_setall_f64(2.0);
2699 const v_float64x2 v_zero = v_setzero_f64();
2700 const v_float64x2 v_1 = v_setall_f64(1.0);
2701
2702 int x1 = 0;
2703 v_float64x2 v_X0d = v_setall_f64(X0);
2704 v_float64x2 v_Y0d = v_setall_f64(Y0);
2705 v_float64x2 v_W0 = v_setall_f64(W0);
2706 v_float64x2 v_x1(0.0, 1.0);
2707
2708 for (; x1 <= bw - 16; x1 += 16)
2709 {
2710 // 0-3
2711 v_int32x4 v_X0, v_Y0;
2712 {
2713 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2714 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2715 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2716 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2717 v_x1 += v_2;
2718
2719 v_W = v_muladd(v_M6, v_x1, v_W0);
2720 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2721 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2722 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2723 v_x1 += v_2;
2724
2725 v_X0 = v_round(v_fX0, v_fX1);
2726 v_Y0 = v_round(v_fY0, v_fY1);
2727 }
2728
2729 // 4-7
2730 v_int32x4 v_X1, v_Y1;
2731 {
2732 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2733 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2734 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2735 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2736 v_x1 += v_2;
2737
2738 v_W = v_muladd(v_M6, v_x1, v_W0);
2739 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2740 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2741 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2742 v_x1 += v_2;
2743
2744 v_X1 = v_round(v_fX0, v_fX1);
2745 v_Y1 = v_round(v_fY0, v_fY1);
2746 }
2747
2748 // 8-11
2749 v_int32x4 v_X2, v_Y2;
2750 {
2751 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2752 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2753 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2754 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2755 v_x1 += v_2;
2756
2757 v_W = v_muladd(v_M6, v_x1, v_W0);
2758 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2759 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2760 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2761 v_x1 += v_2;
2762
2763 v_X2 = v_round(v_fX0, v_fX1);
2764 v_Y2 = v_round(v_fY0, v_fY1);
2765 }
2766
2767 // 12-15
2768 v_int32x4 v_X3, v_Y3;
2769 {
2770 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2771 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2772 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2773 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2774 v_x1 += v_2;
2775
2776 v_W = v_muladd(v_M6, v_x1, v_W0);
2777 v_W = v_select(v_W != v_zero, v_1 / v_W, v_zero);
2778 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2779 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2780 v_x1 += v_2;
2781
2782 v_X3 = v_round(v_fX0, v_fX1);
2783 v_Y3 = v_round(v_fY0, v_fY1);
2784 }
2785
2786 // convert to 16s
2787 v_X0 = v_reinterpret_as_s32(v_pack(v_X0, v_X1));
2788 v_X1 = v_reinterpret_as_s32(v_pack(v_X2, v_X3));
2789 v_Y0 = v_reinterpret_as_s32(v_pack(v_Y0, v_Y1));
2790 v_Y1 = v_reinterpret_as_s32(v_pack(v_Y2, v_Y3));
2791
2792 v_store_interleave(xy + x1 * 2, (v_reinterpret_as_s16)(v_X0), (v_reinterpret_as_s16)(v_Y0));
2793 v_store_interleave(xy + x1 * 2 + 16, (v_reinterpret_as_s16)(v_X1), (v_reinterpret_as_s16)(v_Y1));
2794 }
2795
2796 for( ; x1 < bw; x1++ )
2797 {
2798 double W = W0 + M[6]*x1;
2799 W = W ? 1./W : 0;
2800 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
2801 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
2802 int X = saturate_cast<int>(fX);
2803 int Y = saturate_cast<int>(fY);
2804
2805 xy[x1*2] = saturate_cast<short>(X);
2806 xy[x1*2+1] = saturate_cast<short>(Y);
2807 }
2808 }
2809
WarpPerspectiveLine_Process_CV_SIMD(const double * M,short * xy,short * alpha,double X0,double Y0,double W0,int bw)2810 void WarpPerspectiveLine_Process_CV_SIMD(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw)
2811 {
2812 const v_float64x2 v_M0 = v_setall_f64(M[0]);
2813 const v_float64x2 v_M3 = v_setall_f64(M[3]);
2814 const v_float64x2 v_M6 = v_setall_f64(M[6]);
2815 const v_float64x2 v_intmax = v_setall_f64((double)INT_MAX);
2816 const v_float64x2 v_intmin = v_setall_f64((double)INT_MIN);
2817 const v_float64x2 v_2 = v_setall_f64(2.0);
2818 const v_float64x2 v_zero = v_setzero_f64();
2819 const v_float64x2 v_its = v_setall_f64((double)INTER_TAB_SIZE);
2820 const v_int32x4 v_itsi1 = v_setall_s32(INTER_TAB_SIZE - 1);
2821
2822 int x1 = 0;
2823
2824 v_float64x2 v_X0d = v_setall_f64(X0);
2825 v_float64x2 v_Y0d = v_setall_f64(Y0);
2826 v_float64x2 v_W0 = v_setall_f64(W0);
2827 v_float64x2 v_x1(0.0, 1.0);
2828
2829 for (; x1 <= bw - 16; x1 += 16)
2830 {
2831 // 0-3
2832 v_int32x4 v_X0, v_Y0;
2833 {
2834 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2835 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2836 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2837 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2838 v_x1 += v_2;
2839
2840 v_W = v_muladd(v_M6, v_x1, v_W0);
2841 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2842 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2843 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2844 v_x1 += v_2;
2845
2846 v_X0 = v_round(v_fX0, v_fX1);
2847 v_Y0 = v_round(v_fY0, v_fY1);
2848 }
2849
2850 // 4-7
2851 v_int32x4 v_X1, v_Y1;
2852 {
2853 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2854 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2855 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2856 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2857 v_x1 += v_2;
2858
2859 v_W = v_muladd(v_M6, v_x1, v_W0);
2860 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2861 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2862 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2863 v_x1 += v_2;
2864
2865 v_X1 = v_round(v_fX0, v_fX1);
2866 v_Y1 = v_round(v_fY0, v_fY1);
2867 }
2868
2869 // 8-11
2870 v_int32x4 v_X2, v_Y2;
2871 {
2872 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2873 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2874 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2875 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2876 v_x1 += v_2;
2877
2878 v_W = v_muladd(v_M6, v_x1, v_W0);
2879 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2880 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2881 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2882 v_x1 += v_2;
2883
2884 v_X2 = v_round(v_fX0, v_fX1);
2885 v_Y2 = v_round(v_fY0, v_fY1);
2886 }
2887
2888 // 12-15
2889 v_int32x4 v_X3, v_Y3;
2890 {
2891 v_float64x2 v_W = v_muladd(v_M6, v_x1, v_W0);
2892 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2893 v_float64x2 v_fX0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2894 v_float64x2 v_fY0 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2895 v_x1 += v_2;
2896
2897 v_W = v_muladd(v_M6, v_x1, v_W0);
2898 v_W = v_select(v_W != v_zero, v_its / v_W, v_zero);
2899 v_float64x2 v_fX1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M0, v_x1, v_X0d) * v_W));
2900 v_float64x2 v_fY1 = v_max(v_intmin, v_min(v_intmax, v_muladd(v_M3, v_x1, v_Y0d) * v_W));
2901 v_x1 += v_2;
2902
2903 v_X3 = v_round(v_fX0, v_fX1);
2904 v_Y3 = v_round(v_fY0, v_fY1);
2905 }
2906
2907 // store alpha
2908 v_int32x4 v_alpha0 = ((v_Y0 & v_itsi1) << INTER_BITS) + (v_X0 & v_itsi1);
2909 v_int32x4 v_alpha1 = ((v_Y1 & v_itsi1) << INTER_BITS) + (v_X1 & v_itsi1);
2910 v_store((alpha + x1), v_pack(v_alpha0, v_alpha1));
2911
2912 v_alpha0 = ((v_Y2 & v_itsi1) << INTER_BITS) + (v_X2 & v_itsi1);
2913 v_alpha1 = ((v_Y3 & v_itsi1) << INTER_BITS) + (v_X3 & v_itsi1);
2914 v_store((alpha + x1 + 8), v_pack(v_alpha0, v_alpha1));
2915
2916 // convert to 16s
2917 v_X0 = v_reinterpret_as_s32(v_pack(v_X0 >> INTER_BITS, v_X1 >> INTER_BITS));
2918 v_X1 = v_reinterpret_as_s32(v_pack(v_X2 >> INTER_BITS, v_X3 >> INTER_BITS));
2919 v_Y0 = v_reinterpret_as_s32(v_pack(v_Y0 >> INTER_BITS, v_Y1 >> INTER_BITS));
2920 v_Y1 = v_reinterpret_as_s32(v_pack(v_Y2 >> INTER_BITS, v_Y3 >> INTER_BITS));
2921
2922 v_store_interleave(xy + x1 * 2, (v_reinterpret_as_s16)(v_X0), (v_reinterpret_as_s16)(v_Y0));
2923 v_store_interleave(xy + x1 * 2 + 16, (v_reinterpret_as_s16)(v_X1), (v_reinterpret_as_s16)(v_Y1));
2924 }
2925
2926 for( ; x1 < bw; x1++ )
2927 {
2928 double W = W0 + M[6]*x1;
2929 W = W ? INTER_TAB_SIZE/W : 0;
2930 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
2931 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
2932 int X = saturate_cast<int>(fX);
2933 int Y = saturate_cast<int>(fY);
2934
2935 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
2936 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
2937 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
2938 (X & (INTER_TAB_SIZE-1)));
2939 }
2940 }
2941 #endif
2942
2943 class WarpPerspectiveInvoker :
2944 public ParallelLoopBody
2945 {
2946 public:
WarpPerspectiveInvoker(const Mat & _src,Mat & _dst,const double * _M,int _interpolation,int _borderType,const Scalar & _borderValue)2947 WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, const double *_M, int _interpolation,
2948 int _borderType, const Scalar &_borderValue) :
2949 ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation),
2950 borderType(_borderType), borderValue(_borderValue)
2951 {
2952 #if defined(_MSC_VER) && _MSC_VER == 1800 /* MSVS 2013 */ && CV_AVX
2953 // details: https://github.com/opencv/opencv/issues/11026
2954 borderValue.val[2] = _borderValue.val[2];
2955 borderValue.val[3] = _borderValue.val[3];
2956 #endif
2957 }
2958
operator ()(const Range & range) const2959 virtual void operator() (const Range& range) const CV_OVERRIDE
2960 {
2961 const int BLOCK_SZ = 32;
2962 short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
2963 int x, y, y1, width = dst.cols, height = dst.rows;
2964
2965 int bh0 = std::min(BLOCK_SZ/2, height);
2966 int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
2967 bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
2968
2969 #if CV_TRY_SSE4_1
2970 Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
2971 if(CV_CPU_HAS_SUPPORT_SSE4_1)
2972 pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
2973 #endif
2974
2975 for( y = range.start; y < range.end; y += bh0 )
2976 {
2977 for( x = 0; x < width; x += bw0 )
2978 {
2979 int bw = std::min( bw0, width - x);
2980 int bh = std::min( bh0, range.end - y); // height
2981
2982 Mat _XY(bh, bw, CV_16SC2, XY);
2983 Mat dpart(dst, Rect(x, y, bw, bh));
2984
2985 for( y1 = 0; y1 < bh; y1++ )
2986 {
2987 short* xy = XY + y1*bw*2;
2988 double X0 = M[0]*x + M[1]*(y + y1) + M[2];
2989 double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
2990 double W0 = M[6]*x + M[7]*(y + y1) + M[8];
2991
2992 if( interpolation == INTER_NEAREST )
2993 {
2994 #if CV_TRY_SSE4_1
2995 if (pwarp_impl_sse4)
2996 pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw);
2997 else
2998 #endif
2999 #if CV_SIMD128_64F
3000 WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw);
3001 #else
3002 for( int x1 = 0; x1 < bw; x1++ )
3003 {
3004 double W = W0 + M[6]*x1;
3005 W = W ? 1./W : 0;
3006 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3007 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3008 int X = saturate_cast<int>(fX);
3009 int Y = saturate_cast<int>(fY);
3010
3011 xy[x1*2] = saturate_cast<short>(X);
3012 xy[x1*2+1] = saturate_cast<short>(Y);
3013 }
3014 #endif
3015 }
3016 else
3017 {
3018 short* alpha = A + y1*bw;
3019
3020 #if CV_TRY_SSE4_1
3021 if (pwarp_impl_sse4)
3022 pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw);
3023 else
3024 #endif
3025 #if CV_SIMD128_64F
3026 WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw);
3027 #else
3028 for( int x1 = 0; x1 < bw; x1++ )
3029 {
3030 double W = W0 + M[6]*x1;
3031 W = W ? INTER_TAB_SIZE/W : 0;
3032 double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
3033 double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
3034 int X = saturate_cast<int>(fX);
3035 int Y = saturate_cast<int>(fY);
3036
3037 xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
3038 xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
3039 alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
3040 (X & (INTER_TAB_SIZE-1)));
3041 }
3042 #endif
3043 }
3044 }
3045
3046 if( interpolation == INTER_NEAREST )
3047 remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
3048 else
3049 {
3050 Mat _matA(bh, bw, CV_16U, A);
3051 remap( src, dpart, _XY, _matA, interpolation, borderType, borderValue );
3052 }
3053 }
3054 }
3055 }
3056
3057 private:
3058 Mat src;
3059 Mat dst;
3060 const double* M;
3061 int interpolation, borderType;
3062 Scalar borderValue;
3063 };
3064
3065 #if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPPERSPECTIVE
3066 typedef IppStatus (CV_STDCALL* ippiWarpPerspectiveFunc)(const void*, IppiSize, int, IppiRect, void *, int, IppiRect, double [3][3], int);
3067
3068 class IPPWarpPerspectiveInvoker :
3069 public ParallelLoopBody
3070 {
3071 public:
IPPWarpPerspectiveInvoker(Mat & _src,Mat & _dst,double (& _coeffs)[3][3],int & _interpolation,int & _borderType,const Scalar & _borderValue,ippiWarpPerspectiveFunc _func,bool * _ok)3072 IPPWarpPerspectiveInvoker(Mat &_src, Mat &_dst, double (&_coeffs)[3][3], int &_interpolation,
3073 int &_borderType, const Scalar &_borderValue, ippiWarpPerspectiveFunc _func, bool *_ok) :
3074 ParallelLoopBody(), src(_src), dst(_dst), mode(_interpolation), coeffs(_coeffs),
3075 borderType(_borderType), borderValue(_borderValue), func(_func), ok(_ok)
3076 {
3077 *ok = true;
3078 }
3079
operator ()(const Range & range) const3080 virtual void operator() (const Range& range) const CV_OVERRIDE
3081 {
3082 IppiSize srcsize = {src.cols, src.rows};
3083 IppiRect srcroi = {0, 0, src.cols, src.rows};
3084 IppiRect dstroi = {0, range.start, dst.cols, range.end - range.start};
3085 int cnn = src.channels();
3086
3087 if( borderType == BORDER_CONSTANT )
3088 {
3089 IppiSize setSize = {dst.cols, range.end - range.start};
3090 void *dataPointer = dst.ptr(range.start);
3091 if( !IPPSet( borderValue, dataPointer, (int)dst.step[0], setSize, cnn, src.depth() ) )
3092 {
3093 *ok = false;
3094 return;
3095 }
3096 }
3097
3098 IppStatus status = CV_INSTRUMENT_FUN_IPP(func,(src.ptr();, srcsize, (int)src.step[0], srcroi, dst.ptr(), (int)dst.step[0], dstroi, coeffs, mode));
3099 if (status != ippStsNoErr)
3100 *ok = false;
3101 else
3102 {
3103 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
3104 }
3105 }
3106 private:
3107 Mat &src;
3108 Mat &dst;
3109 int mode;
3110 double (&coeffs)[3][3];
3111 int borderType;
3112 const Scalar borderValue;
3113 ippiWarpPerspectiveFunc func;
3114 bool *ok;
3115
3116 const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&);
3117 };
3118 #endif
3119
3120 namespace hal {
3121
warpPerspective(int src_type,const uchar * src_data,size_t src_step,int src_width,int src_height,uchar * dst_data,size_t dst_step,int dst_width,int dst_height,const double M[9],int interpolation,int borderType,const double borderValue[4])3122 void warpPerspective(int src_type,
3123 const uchar * src_data, size_t src_step, int src_width, int src_height,
3124 uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
3125 const double M[9], int interpolation, int borderType, const double borderValue[4])
3126 {
3127 CALL_HAL(warpPerspective, cv_hal_warpPerspective, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue);
3128 Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
3129 Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
3130
3131 Range range(0, dst.rows);
3132 WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]));
3133 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
3134 }
3135
3136 } // hal::
3137 } // cv::
3138
warpPerspective(InputArray _src,OutputArray _dst,InputArray _M0,Size dsize,int flags,int borderType,const Scalar & borderValue)3139 void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
3140 Size dsize, int flags, int borderType, const Scalar& borderValue )
3141 {
3142 CV_INSTRUMENT_REGION();
3143
3144 CV_Assert( _src.total() > 0 );
3145
3146 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() &&
3147 _src.cols() <= SHRT_MAX && _src.rows() <= SHRT_MAX,
3148 ocl_warpTransform_cols4(_src, _dst, _M0, dsize, flags, borderType, borderValue,
3149 OCL_OP_PERSPECTIVE))
3150
3151 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
3152 ocl_warpTransform(_src, _dst, _M0, dsize, flags, borderType, borderValue,
3153 OCL_OP_PERSPECTIVE))
3154
3155 Mat src = _src.getMat(), M0 = _M0.getMat();
3156 _dst.create( dsize.empty() ? src.size() : dsize, src.type() );
3157 Mat dst = _dst.getMat();
3158
3159 if( dst.data == src.data )
3160 src = src.clone();
3161
3162 double M[9];
3163 Mat matM(3, 3, CV_64F, M);
3164 int interpolation = flags & INTER_MAX;
3165 if( interpolation == INTER_AREA )
3166 interpolation = INTER_LINEAR;
3167
3168 CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
3169 M0.convertTo(matM, matM.type());
3170
3171 #if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && !IPP_DISABLE_WARPPERSPECTIVE
3172 CV_IPP_CHECK()
3173 {
3174 int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
3175 if( (depth == CV_8U || depth == CV_16U || depth == CV_32F) &&
3176 (cn == 1 || cn == 3 || cn == 4) &&
3177 ( borderType == cv::BORDER_TRANSPARENT || borderType == cv::BORDER_CONSTANT ) &&
3178 (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC))
3179 {
3180 ippiWarpPerspectiveFunc ippFunc = 0;
3181 if ((flags & WARP_INVERSE_MAP) != 0)
3182 {
3183 ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C1R :
3184 type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C3R :
3185 type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_8u_C4R :
3186 type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C1R :
3187 type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C3R :
3188 type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_16u_C4R :
3189 type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C1R :
3190 type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C3R :
3191 type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspectiveBack_32f_C4R : 0;
3192 }
3193 else
3194 {
3195 ippFunc = type == CV_8UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C1R :
3196 type == CV_8UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C3R :
3197 type == CV_8UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_8u_C4R :
3198 type == CV_16UC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C1R :
3199 type == CV_16UC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C3R :
3200 type == CV_16UC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_16u_C4R :
3201 type == CV_32FC1 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C1R :
3202 type == CV_32FC3 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C3R :
3203 type == CV_32FC4 ? (ippiWarpPerspectiveFunc)ippiWarpPerspective_32f_C4R : 0;
3204 }
3205 int mode =
3206 interpolation == INTER_NEAREST ? IPPI_INTER_NN :
3207 interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR :
3208 interpolation == INTER_CUBIC ? IPPI_INTER_CUBIC : 0;
3209 CV_Assert(mode && ippFunc);
3210
3211 double coeffs[3][3];
3212 for( int i = 0; i < 3; i++ )
3213 for( int j = 0; j < 3; j++ )
3214 coeffs[i][j] = matM.at<double>(i, j);
3215
3216 bool ok;
3217 Range range(0, dst.rows);
3218 IPPWarpPerspectiveInvoker invoker(src, dst, coeffs, mode, borderType, borderValue, ippFunc, &ok);
3219 parallel_for_(range, invoker, dst.total()/(double)(1<<16));
3220 if( ok )
3221 {
3222 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
3223 return;
3224 }
3225 setIppErrorStatus();
3226 }
3227 }
3228 #endif
3229
3230 if( !(flags & WARP_INVERSE_MAP) )
3231 invert(matM, matM);
3232
3233 hal::warpPerspective(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows,
3234 matM.ptr<double>(), interpolation, borderType, borderValue.val);
3235 }
3236
3237
getRotationMatrix2D_(Point2f center,double angle,double scale)3238 cv::Matx23d cv::getRotationMatrix2D_(Point2f center, double angle, double scale)
3239 {
3240 CV_INSTRUMENT_REGION();
3241
3242 angle *= CV_PI/180;
3243 double alpha = std::cos(angle)*scale;
3244 double beta = std::sin(angle)*scale;
3245
3246 Matx23d M(
3247 alpha, beta, (1-alpha)*center.x - beta*center.y,
3248 -beta, alpha, beta*center.x + (1-alpha)*center.y
3249 );
3250 return M;
3251 }
3252
3253 /* Calculates coefficients of perspective transformation
3254 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4):
3255 *
3256 * c00*xi + c01*yi + c02
3257 * ui = ---------------------
3258 * c20*xi + c21*yi + c22
3259 *
3260 * c10*xi + c11*yi + c12
3261 * vi = ---------------------
3262 * c20*xi + c21*yi + c22
3263 *
3264 * Coefficients are calculated by solving linear system:
3265 * / x0 y0 1 0 0 0 -x0*u0 -y0*u0 \ /c00\ /u0\
3266 * | x1 y1 1 0 0 0 -x1*u1 -y1*u1 | |c01| |u1|
3267 * | x2 y2 1 0 0 0 -x2*u2 -y2*u2 | |c02| |u2|
3268 * | x3 y3 1 0 0 0 -x3*u3 -y3*u3 |.|c10|=|u3|,
3269 * | 0 0 0 x0 y0 1 -x0*v0 -y0*v0 | |c11| |v0|
3270 * | 0 0 0 x1 y1 1 -x1*v1 -y1*v1 | |c12| |v1|
3271 * | 0 0 0 x2 y2 1 -x2*v2 -y2*v2 | |c20| |v2|
3272 * \ 0 0 0 x3 y3 1 -x3*v3 -y3*v3 / \c21/ \v3/
3273 *
3274 * where:
3275 * cij - matrix coefficients, c22 = 1
3276 */
getPerspectiveTransform(const Point2f src[],const Point2f dst[],int solveMethod)3277 cv::Mat cv::getPerspectiveTransform(const Point2f src[], const Point2f dst[], int solveMethod)
3278 {
3279 CV_INSTRUMENT_REGION();
3280
3281 Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.ptr());
3282 double a[8][8], b[8];
3283 Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b);
3284
3285 for( int i = 0; i < 4; ++i )
3286 {
3287 a[i][0] = a[i+4][3] = src[i].x;
3288 a[i][1] = a[i+4][4] = src[i].y;
3289 a[i][2] = a[i+4][5] = 1;
3290 a[i][3] = a[i][4] = a[i][5] =
3291 a[i+4][0] = a[i+4][1] = a[i+4][2] = 0;
3292 a[i][6] = -src[i].x*dst[i].x;
3293 a[i][7] = -src[i].y*dst[i].x;
3294 a[i+4][6] = -src[i].x*dst[i].y;
3295 a[i+4][7] = -src[i].y*dst[i].y;
3296 b[i] = dst[i].x;
3297 b[i+4] = dst[i].y;
3298 }
3299
3300 solve(A, B, X, solveMethod);
3301 M.ptr<double>()[8] = 1.;
3302
3303 return M;
3304 }
3305
3306 /* Calculates coefficients of affine transformation
3307 * which maps (xi,yi) to (ui,vi), (i=1,2,3):
3308 *
3309 * ui = c00*xi + c01*yi + c02
3310 *
3311 * vi = c10*xi + c11*yi + c12
3312 *
3313 * Coefficients are calculated by solving linear system:
3314 * / x0 y0 1 0 0 0 \ /c00\ /u0\
3315 * | x1 y1 1 0 0 0 | |c01| |u1|
3316 * | x2 y2 1 0 0 0 | |c02| |u2|
3317 * | 0 0 0 x0 y0 1 | |c10| |v0|
3318 * | 0 0 0 x1 y1 1 | |c11| |v1|
3319 * \ 0 0 0 x2 y2 1 / |c12| |v2|
3320 *
3321 * where:
3322 * cij - matrix coefficients
3323 */
3324
getAffineTransform(const Point2f src[],const Point2f dst[])3325 cv::Mat cv::getAffineTransform( const Point2f src[], const Point2f dst[] )
3326 {
3327 Mat M(2, 3, CV_64F), X(6, 1, CV_64F, M.ptr());
3328 double a[6*6], b[6];
3329 Mat A(6, 6, CV_64F, a), B(6, 1, CV_64F, b);
3330
3331 for( int i = 0; i < 3; i++ )
3332 {
3333 int j = i*12;
3334 int k = i*12+6;
3335 a[j] = a[k+3] = src[i].x;
3336 a[j+1] = a[k+4] = src[i].y;
3337 a[j+2] = a[k+5] = 1;
3338 a[j+3] = a[j+4] = a[j+5] = 0;
3339 a[k] = a[k+1] = a[k+2] = 0;
3340 b[i*2] = dst[i].x;
3341 b[i*2+1] = dst[i].y;
3342 }
3343
3344 solve( A, B, X );
3345 return M;
3346 }
3347
invertAffineTransform(InputArray _matM,OutputArray __iM)3348 void cv::invertAffineTransform(InputArray _matM, OutputArray __iM)
3349 {
3350 Mat matM = _matM.getMat();
3351 CV_Assert(matM.rows == 2 && matM.cols == 3);
3352 __iM.create(2, 3, matM.type());
3353 Mat _iM = __iM.getMat();
3354
3355 if( matM.type() == CV_32F )
3356 {
3357 const softfloat* M = matM.ptr<softfloat>();
3358 softfloat* iM = _iM.ptr<softfloat>();
3359 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3360
3361 softdouble D = M[0]*M[step+1] - M[1]*M[step];
3362 D = D != 0. ? softdouble(1.)/D : softdouble(0.);
3363 softdouble A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3364 softdouble b1 = -A11*M[2] - A12*M[step+2];
3365 softdouble b2 = -A21*M[2] - A22*M[step+2];
3366
3367 iM[0] = A11; iM[1] = A12; iM[2] = b1;
3368 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3369 }
3370 else if( matM.type() == CV_64F )
3371 {
3372 const softdouble* M = matM.ptr<softdouble>();
3373 softdouble* iM = _iM.ptr<softdouble>();
3374 int step = (int)(matM.step/sizeof(M[0])), istep = (int)(_iM.step/sizeof(iM[0]));
3375
3376 softdouble D = M[0]*M[step+1] - M[1]*M[step];
3377 D = D != 0. ? softdouble(1.)/D : softdouble(0.);
3378 softdouble A11 = M[step+1]*D, A22 = M[0]*D, A12 = -M[1]*D, A21 = -M[step]*D;
3379 softdouble b1 = -A11*M[2] - A12*M[step+2];
3380 softdouble b2 = -A21*M[2] - A22*M[step+2];
3381
3382 iM[0] = A11; iM[1] = A12; iM[2] = b1;
3383 iM[istep] = A21; iM[istep+1] = A22; iM[istep+2] = b2;
3384 }
3385 else
3386 CV_Error( CV_StsUnsupportedFormat, "" );
3387 }
3388
getPerspectiveTransform(InputArray _src,InputArray _dst,int solveMethod)3389 cv::Mat cv::getPerspectiveTransform(InputArray _src, InputArray _dst, int solveMethod)
3390 {
3391 Mat src = _src.getMat(), dst = _dst.getMat();
3392 CV_Assert(src.checkVector(2, CV_32F) == 4 && dst.checkVector(2, CV_32F) == 4);
3393 return getPerspectiveTransform((const Point2f*)src.data, (const Point2f*)dst.data, solveMethod);
3394 }
3395
getAffineTransform(InputArray _src,InputArray _dst)3396 cv::Mat cv::getAffineTransform(InputArray _src, InputArray _dst)
3397 {
3398 Mat src = _src.getMat(), dst = _dst.getMat();
3399 CV_Assert(src.checkVector(2, CV_32F) == 3 && dst.checkVector(2, CV_32F) == 3);
3400 return getAffineTransform((const Point2f*)src.data, (const Point2f*)dst.data);
3401 }
3402
3403 CV_IMPL void
cvWarpAffine(const CvArr * srcarr,CvArr * dstarr,const CvMat * marr,int flags,CvScalar fillval)3404 cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3405 int flags, CvScalar fillval )
3406 {
3407 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3408 cv::Mat matrix = cv::cvarrToMat(marr);
3409 CV_Assert( src.type() == dst.type() );
3410 cv::warpAffine( src, dst, matrix, dst.size(), flags,
3411 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3412 fillval );
3413 }
3414
3415 CV_IMPL void
cvWarpPerspective(const CvArr * srcarr,CvArr * dstarr,const CvMat * marr,int flags,CvScalar fillval)3416 cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, const CvMat* marr,
3417 int flags, CvScalar fillval )
3418 {
3419 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3420 cv::Mat matrix = cv::cvarrToMat(marr);
3421 CV_Assert( src.type() == dst.type() );
3422 cv::warpPerspective( src, dst, matrix, dst.size(), flags,
3423 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3424 fillval );
3425 }
3426
3427 CV_IMPL void
cvRemap(const CvArr * srcarr,CvArr * dstarr,const CvArr * _mapx,const CvArr * _mapy,int flags,CvScalar fillval)3428 cvRemap( const CvArr* srcarr, CvArr* dstarr,
3429 const CvArr* _mapx, const CvArr* _mapy,
3430 int flags, CvScalar fillval )
3431 {
3432 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
3433 cv::Mat mapx = cv::cvarrToMat(_mapx), mapy = cv::cvarrToMat(_mapy);
3434 CV_Assert( src.type() == dst.type() && dst.size() == mapx.size() );
3435 cv::remap( src, dst, mapx, mapy, flags & cv::INTER_MAX,
3436 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT,
3437 fillval );
3438 CV_Assert( dst0.data == dst.data );
3439 }
3440
3441
3442 CV_IMPL CvMat*
cv2DRotationMatrix(CvPoint2D32f center,double angle,double scale,CvMat * matrix)3443 cv2DRotationMatrix( CvPoint2D32f center, double angle,
3444 double scale, CvMat* matrix )
3445 {
3446 cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale);
3447 CV_Assert( M.size() == M0.size() );
3448 M.convertTo(M0, M0.type());
3449 return matrix;
3450 }
3451
3452
3453 CV_IMPL CvMat*
cvGetPerspectiveTransform(const CvPoint2D32f * src,const CvPoint2D32f * dst,CvMat * matrix)3454 cvGetPerspectiveTransform( const CvPoint2D32f* src,
3455 const CvPoint2D32f* dst,
3456 CvMat* matrix )
3457 {
3458 cv::Mat M0 = cv::cvarrToMat(matrix),
3459 M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3460 CV_Assert( M.size() == M0.size() );
3461 M.convertTo(M0, M0.type());
3462 return matrix;
3463 }
3464
3465
3466 CV_IMPL CvMat*
cvGetAffineTransform(const CvPoint2D32f * src,const CvPoint2D32f * dst,CvMat * matrix)3467 cvGetAffineTransform( const CvPoint2D32f* src,
3468 const CvPoint2D32f* dst,
3469 CvMat* matrix )
3470 {
3471 cv::Mat M0 = cv::cvarrToMat(matrix),
3472 M = cv::getAffineTransform((const cv::Point2f*)src, (const cv::Point2f*)dst);
3473 CV_Assert( M.size() == M0.size() );
3474 M.convertTo(M0, M0.type());
3475 return matrix;
3476 }
3477
3478
3479 CV_IMPL void
cvConvertMaps(const CvArr * arr1,const CvArr * arr2,CvArr * dstarr1,CvArr * dstarr2)3480 cvConvertMaps( const CvArr* arr1, const CvArr* arr2, CvArr* dstarr1, CvArr* dstarr2 )
3481 {
3482 cv::Mat map1 = cv::cvarrToMat(arr1), map2;
3483 cv::Mat dstmap1 = cv::cvarrToMat(dstarr1), dstmap2;
3484
3485 if( arr2 )
3486 map2 = cv::cvarrToMat(arr2);
3487 if( dstarr2 )
3488 {
3489 dstmap2 = cv::cvarrToMat(dstarr2);
3490 if( dstmap2.type() == CV_16SC1 )
3491 dstmap2 = cv::Mat(dstmap2.size(), CV_16UC1, dstmap2.ptr(), dstmap2.step);
3492 }
3493
3494 cv::convertMaps( map1, map2, dstmap1, dstmap2, dstmap1.type(), false );
3495 }
3496
3497 /****************************************************************************************
3498 PkLab.net 2018 based on cv::linearPolar from OpenCV by J.L. Blanco, Apr 2009
3499 ****************************************************************************************/
warpPolar(InputArray _src,OutputArray _dst,Size dsize,Point2f center,double maxRadius,int flags)3500 void cv::warpPolar(InputArray _src, OutputArray _dst, Size dsize,
3501 Point2f center, double maxRadius, int flags)
3502 {
3503 // if dest size is empty given than calculate using proportional setting
3504 // thus we calculate needed angles to keep same area as bounding circle
3505 if ((dsize.width <= 0) && (dsize.height <= 0))
3506 {
3507 dsize.width = cvRound(maxRadius);
3508 dsize.height = cvRound(maxRadius * CV_PI);
3509 }
3510 else if (dsize.height <= 0)
3511 {
3512 dsize.height = cvRound(dsize.width * CV_PI);
3513 }
3514
3515 Mat mapx, mapy;
3516 mapx.create(dsize, CV_32F);
3517 mapy.create(dsize, CV_32F);
3518 bool semiLog = (flags & WARP_POLAR_LOG) != 0;
3519
3520 if (!(flags & CV_WARP_INVERSE_MAP))
3521 {
3522 CV_Assert(!dsize.empty());
3523 double Kangle = CV_2PI / dsize.height;
3524 int phi, rho;
3525
3526 // precalculate scaled rho
3527 Mat rhos = Mat(1, dsize.width, CV_32F);
3528 float* bufRhos = (float*)(rhos.data);
3529 if (semiLog)
3530 {
3531 double Kmag = std::log(maxRadius) / dsize.width;
3532 for (rho = 0; rho < dsize.width; rho++)
3533 bufRhos[rho] = (float)(std::exp(rho * Kmag) - 1.0);
3534
3535 }
3536 else
3537 {
3538 double Kmag = maxRadius / dsize.width;
3539 for (rho = 0; rho < dsize.width; rho++)
3540 bufRhos[rho] = (float)(rho * Kmag);
3541 }
3542
3543 for (phi = 0; phi < dsize.height; phi++)
3544 {
3545 double KKy = Kangle * phi;
3546 double cp = std::cos(KKy);
3547 double sp = std::sin(KKy);
3548 float* mx = (float*)(mapx.data + phi*mapx.step);
3549 float* my = (float*)(mapy.data + phi*mapy.step);
3550
3551 for (rho = 0; rho < dsize.width; rho++)
3552 {
3553 double x = bufRhos[rho] * cp + center.x;
3554 double y = bufRhos[rho] * sp + center.y;
3555
3556 mx[rho] = (float)x;
3557 my[rho] = (float)y;
3558 }
3559 }
3560 remap(_src, _dst, mapx, mapy, flags & cv::INTER_MAX, (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
3561 }
3562 else
3563 {
3564 const int ANGLE_BORDER = 1;
3565 cv::copyMakeBorder(_src, _dst, ANGLE_BORDER, ANGLE_BORDER, 0, 0, BORDER_WRAP);
3566 Mat src = _dst.getMat();
3567 Size ssize = _dst.size();
3568 ssize.height -= 2 * ANGLE_BORDER;
3569 CV_Assert(!ssize.empty());
3570 const double Kangle = CV_2PI / ssize.height;
3571 double Kmag;
3572 if (semiLog)
3573 Kmag = std::log(maxRadius) / ssize.width;
3574 else
3575 Kmag = maxRadius / ssize.width;
3576
3577 int x, y;
3578 Mat bufx, bufy, bufp, bufa;
3579
3580 bufx = Mat(1, dsize.width, CV_32F);
3581 bufy = Mat(1, dsize.width, CV_32F);
3582 bufp = Mat(1, dsize.width, CV_32F);
3583 bufa = Mat(1, dsize.width, CV_32F);
3584
3585 for (x = 0; x < dsize.width; x++)
3586 bufx.at<float>(0, x) = (float)x - center.x;
3587
3588 for (y = 0; y < dsize.height; y++)
3589 {
3590 float* mx = (float*)(mapx.data + y*mapx.step);
3591 float* my = (float*)(mapy.data + y*mapy.step);
3592
3593 for (x = 0; x < dsize.width; x++)
3594 bufy.at<float>(0, x) = (float)y - center.y;
3595
3596 cartToPolar(bufx, bufy, bufp, bufa, 0);
3597
3598 if (semiLog)
3599 {
3600 bufp += 1.f;
3601 log(bufp, bufp);
3602 }
3603
3604 for (x = 0; x < dsize.width; x++)
3605 {
3606 double rho = bufp.at<float>(0, x) / Kmag;
3607 double phi = bufa.at<float>(0, x) / Kangle;
3608 mx[x] = (float)rho;
3609 my[x] = (float)phi + ANGLE_BORDER;
3610 }
3611 }
3612 remap(src, _dst, mapx, mapy, flags & cv::INTER_MAX,
3613 (flags & CV_WARP_FILL_OUTLIERS) ? cv::BORDER_CONSTANT : cv::BORDER_TRANSPARENT);
3614 }
3615 }
3616
linearPolar(InputArray _src,OutputArray _dst,Point2f center,double maxRadius,int flags)3617 void cv::linearPolar( InputArray _src, OutputArray _dst,
3618 Point2f center, double maxRadius, int flags )
3619 {
3620 warpPolar(_src, _dst, _src.size(), center, maxRadius, flags & ~WARP_POLAR_LOG);
3621 }
3622
logPolar(InputArray _src,OutputArray _dst,Point2f center,double maxRadius,int flags)3623 void cv::logPolar( InputArray _src, OutputArray _dst,
3624 Point2f center, double maxRadius, int flags )
3625 {
3626 Size ssize = _src.size();
3627 double M = maxRadius > 0 ? std::exp(ssize.width / maxRadius) : 1;
3628 warpPolar(_src, _dst, ssize, center, M, flags | WARP_POLAR_LOG);
3629 }
3630
3631 CV_IMPL
cvLinearPolar(const CvArr * srcarr,CvArr * dstarr,CvPoint2D32f center,double maxRadius,int flags)3632 void cvLinearPolar( const CvArr* srcarr, CvArr* dstarr,
3633 CvPoint2D32f center, double maxRadius, int flags )
3634 {
3635 Mat src = cvarrToMat(srcarr);
3636 Mat dst = cvarrToMat(dstarr);
3637
3638 CV_Assert(src.size == dst.size);
3639 CV_Assert(src.type() == dst.type());
3640
3641 cv::linearPolar(src, dst, center, maxRadius, flags);
3642 }
3643
3644 CV_IMPL
cvLogPolar(const CvArr * srcarr,CvArr * dstarr,CvPoint2D32f center,double M,int flags)3645 void cvLogPolar( const CvArr* srcarr, CvArr* dstarr,
3646 CvPoint2D32f center, double M, int flags )
3647 {
3648 Mat src = cvarrToMat(srcarr);
3649 Mat dst = cvarrToMat(dstarr);
3650
3651 CV_Assert(src.size == dst.size);
3652 CV_Assert(src.type() == dst.type());
3653
3654 cv::logPolar(src, dst, center, M, flags);
3655 }
3656
3657 /* End of file. */
3658