1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42 
43 #include "precomp.hpp"
44 #include "opencv2/core/hal/intrin.hpp"
45 
46 namespace cv {
47 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
48 // forward declarations
49 Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type,
50                                                          const double* _ir, Matx33d &_matTilt,
51                                                          double _u0, double _v0, double _fx, double _fy,
52                                                          double _k1, double _k2, double _p1, double _p2,
53                                                          double _k3, double _k4, double _k5, double _k6,
54                                                          double _s1, double _s2, double _s3, double _s4);
55 
56 
57 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
58 namespace
59 {
60 class initUndistortRectifyMapComputer : public ParallelLoopBody
61 {
62 public:
initUndistortRectifyMapComputer(Size _size,Mat & _map1,Mat & _map2,int _m1type,const double * _ir,Matx33d & _matTilt,double _u0,double _v0,double _fx,double _fy,double _k1,double _k2,double _p1,double _p2,double _k3,double _k4,double _k5,double _k6,double _s1,double _s2,double _s3,double _s4)63     initUndistortRectifyMapComputer(
64         Size _size, Mat &_map1, Mat &_map2, int _m1type,
65         const double* _ir, Matx33d &_matTilt,
66         double _u0, double _v0, double _fx, double _fy,
67         double _k1, double _k2, double _p1, double _p2,
68         double _k3, double _k4, double _k5, double _k6,
69         double _s1, double _s2, double _s3, double _s4)
70       : size(_size),
71         map1(_map1),
72         map2(_map2),
73         m1type(_m1type),
74         ir(_ir),
75         matTilt(_matTilt),
76         u0(_u0),
77         v0(_v0),
78         fx(_fx),
79         fy(_fy),
80         k1(_k1),
81         k2(_k2),
82         p1(_p1),
83         p2(_p2),
84         k3(_k3),
85         k4(_k4),
86         k5(_k5),
87         k6(_k6),
88         s1(_s1),
89         s2(_s2),
90         s3(_s3),
91         s4(_s4) {
92 #if CV_SIMD_64F
93         for (int i = 0; i < 2 * v_float64::nlanes; ++i)
94         {
95             s_x[i] = ir[0] * i;
96             s_y[i] = ir[3] * i;
97             s_w[i] = ir[6] * i;
98         }
99 #endif
100     }
101 
operator ()(const cv::Range & range) const102     void operator()( const cv::Range& range ) const CV_OVERRIDE
103     {
104         CV_INSTRUMENT_REGION();
105 
106         const int begin = range.start;
107         const int end = range.end;
108 
109         for( int i = begin; i < end; i++ )
110         {
111             float* m1f = map1.ptr<float>(i);
112             float* m2f = map2.empty() ? 0 : map2.ptr<float>(i);
113             short* m1 = (short*)m1f;
114             ushort* m2 = (ushort*)m2f;
115             double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8];
116 
117             int j = 0;
118 
119             if (m1type == CV_16SC2)
120                 CV_Assert(m1 != NULL && m2 != NULL);
121             else if (m1type == CV_32FC1)
122                 CV_Assert(m1f != NULL && m2f != NULL);
123             else
124                 CV_Assert(m1 != NULL);
125 
126 #if CV_SIMD_64F
127             const v_float64 v_one = vx_setall_f64(1.0);
128             for (; j <= size.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes, _x += 2*v_float64::nlanes * ir[0], _y += 2*v_float64::nlanes * ir[3], _w += 2*v_float64::nlanes * ir[6])
129             {
130                 v_float64 m_0, m_1, m_2, m_3;
131                 m_2 = v_one / (vx_setall_f64(_w) + vx_load(s_w));
132                 m_3 = v_one / (vx_setall_f64(_w) + vx_load(s_w + v_float64::nlanes));
133                 m_0 = vx_setall_f64(_x); m_1 = vx_setall_f64(_y);
134                 v_float64 x_0 = (m_0 + vx_load(s_x)) * m_2;
135                 v_float64 x_1 = (m_0 + vx_load(s_x + v_float64::nlanes)) * m_3;
136                 v_float64 y_0 = (m_1 + vx_load(s_y)) * m_2;
137                 v_float64 y_1 = (m_1 + vx_load(s_y + v_float64::nlanes)) * m_3;
138 
139                 v_float64 xd_0 = x_0 * x_0;
140                 v_float64 yd_0 = y_0 * y_0;
141                 v_float64 xd_1 = x_1 * x_1;
142                 v_float64 yd_1 = y_1 * y_1;
143 
144                 v_float64 r2_0 = xd_0 + yd_0;
145                 v_float64 r2_1 = xd_1 + yd_1;
146 
147                 m_1 = vx_setall_f64(k3);
148                 m_2 = vx_setall_f64(k2);
149                 m_3 = vx_setall_f64(k1);
150                 m_0 = v_muladd(v_muladd(v_muladd(m_1, r2_0, m_2), r2_0, m_3), r2_0, v_one);
151                 m_1 = v_muladd(v_muladd(v_muladd(m_1, r2_1, m_2), r2_1, m_3), r2_1, v_one);
152                 m_3 = vx_setall_f64(k6);
153                 m_2 = vx_setall_f64(k5);
154                 m_0 /= v_muladd(v_muladd(v_muladd(m_3, r2_0, m_2), r2_0, vx_setall_f64(k4)), r2_0, v_one);
155                 m_1 /= v_muladd(v_muladd(v_muladd(m_3, r2_1, m_2), r2_1, vx_setall_f64(k4)), r2_1, v_one);
156 
157                 m_3 = vx_setall_f64(2.0);
158                 xd_0 = v_muladd(m_3, xd_0, r2_0);
159                 yd_0 = v_muladd(m_3, yd_0, r2_0);
160                 xd_1 = v_muladd(m_3, xd_1, r2_1);
161                 yd_1 = v_muladd(m_3, yd_1, r2_1);
162                 m_2 = x_0 * y_0 * m_3;
163                 m_3 = x_1 * y_1 * m_3;
164 
165                 x_0 *= m_0; y_0 *= m_0; x_1 *= m_1; y_1 *= m_1;
166 
167                 m_0 = vx_setall_f64(p1);
168                 m_1 = vx_setall_f64(p2);
169                 xd_0 = v_muladd(xd_0, m_1, x_0);
170                 yd_0 = v_muladd(yd_0, m_0, y_0);
171                 xd_1 = v_muladd(xd_1, m_1, x_1);
172                 yd_1 = v_muladd(yd_1, m_0, y_1);
173 
174                 xd_0 = v_muladd(m_0, m_2, xd_0);
175                 yd_0 = v_muladd(m_1, m_2, yd_0);
176                 xd_1 = v_muladd(m_0, m_3, xd_1);
177                 yd_1 = v_muladd(m_1, m_3, yd_1);
178 
179                 m_0 = r2_0 * r2_0;
180                 m_1 = r2_1 * r2_1;
181                 m_2 = vx_setall_f64(s2);
182                 m_3 = vx_setall_f64(s1);
183                 xd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, xd_0));
184                 xd_1 = v_muladd(m_3, r2_1, v_muladd(m_2, m_1, xd_1));
185                 m_2 = vx_setall_f64(s4);
186                 m_3 = vx_setall_f64(s3);
187                 yd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, yd_0));
188                 yd_1 = v_muladd(m_3, r2_1, v_muladd(m_2, m_1, yd_1));
189 
190                 m_0 = vx_setall_f64(matTilt.val[0]);
191                 m_1 = vx_setall_f64(matTilt.val[1]);
192                 m_2 = vx_setall_f64(matTilt.val[2]);
193                 x_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
194                 x_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
195                 m_0 = vx_setall_f64(matTilt.val[3]);
196                 m_1 = vx_setall_f64(matTilt.val[4]);
197                 m_2 = vx_setall_f64(matTilt.val[5]);
198                 y_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
199                 y_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
200                 m_0 = vx_setall_f64(matTilt.val[6]);
201                 m_1 = vx_setall_f64(matTilt.val[7]);
202                 m_2 = vx_setall_f64(matTilt.val[8]);
203                 r2_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
204                 r2_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
205                 m_0 = vx_setzero_f64();
206                 r2_0 = v_select(r2_0 == m_0, v_one, v_one / r2_0);
207                 r2_1 = v_select(r2_1 == m_0, v_one, v_one / r2_1);
208 
209                 m_0 = vx_setall_f64(fx);
210                 m_1 = vx_setall_f64(u0);
211                 m_2 = vx_setall_f64(fy);
212                 m_3 = vx_setall_f64(v0);
213                 x_0 = v_muladd(m_0 * r2_0, x_0, m_1);
214                 y_0 = v_muladd(m_2 * r2_0, y_0, m_3);
215                 x_1 = v_muladd(m_0 * r2_1, x_1, m_1);
216                 y_1 = v_muladd(m_2 * r2_1, y_1, m_3);
217 
218                 if (m1type == CV_32FC1)
219                 {
220                     v_store(&m1f[j], v_cvt_f32(x_0, x_1));
221                     v_store(&m2f[j], v_cvt_f32(y_0, y_1));
222                 }
223                 else if (m1type == CV_32FC2)
224                 {
225                     v_float32 mf0, mf1;
226                     v_zip(v_cvt_f32(x_0, x_1), v_cvt_f32(y_0, y_1), mf0, mf1);
227                     v_store(&m1f[j * 2], mf0);
228                     v_store(&m1f[j * 2 + v_float32::nlanes], mf1);
229                 }
230                 else // m1type == CV_16SC2
231                 {
232                     m_0 = vx_setall_f64(INTER_TAB_SIZE);
233                     x_0 *= m_0; x_1 *= m_0; y_0 *= m_0; y_1 *= m_0;
234 
235                     v_int32 mask = vx_setall_s32(INTER_TAB_SIZE - 1);
236                     v_int32 iu = v_round(x_0, x_1);
237                     v_int32 iv = v_round(y_0, y_1);
238 
239                     v_pack_u_store(&m2[j], (iu & mask) + (iv & mask) * vx_setall_s32(INTER_TAB_SIZE));
240                     v_int32 out0, out1;
241                     v_zip(iu >> INTER_BITS, iv >> INTER_BITS, out0, out1);
242                     v_store(&m1[j * 2], v_pack(out0, out1));
243                 }
244             }
245 
246             vx_cleanup();
247 #endif
248             for( ; j < size.width; j++, _x += ir[0], _y += ir[3], _w += ir[6] )
249             {
250                 double w = 1./_w, x = _x*w, y = _y*w;
251                 double x2 = x*x, y2 = y*y;
252                 double r2 = x2 + y2, _2xy = 2*x*y;
253                 double kr = (1 + ((k3*r2 + k2)*r2 + k1)*r2)/(1 + ((k6*r2 + k5)*r2 + k4)*r2);
254                 double xd = (x*kr + p1*_2xy + p2*(r2 + 2*x2) + s1*r2+s2*r2*r2);
255                 double yd = (y*kr + p1*(r2 + 2*y2) + p2*_2xy + s3*r2+s4*r2*r2);
256                 Vec3d vecTilt = matTilt*cv::Vec3d(xd, yd, 1);
257                 double invProj = vecTilt(2) ? 1./vecTilt(2) : 1;
258                 double u = fx*invProj*vecTilt(0) + u0;
259                 double v = fy*invProj*vecTilt(1) + v0;
260                 if( m1type == CV_16SC2 )
261                 {
262                     int iu = saturate_cast<int>(u*INTER_TAB_SIZE);
263                     int iv = saturate_cast<int>(v*INTER_TAB_SIZE);
264                     m1[j*2] = (short)(iu >> INTER_BITS);
265                     m1[j*2+1] = (short)(iv >> INTER_BITS);
266                     m2[j] = (ushort)((iv & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (iu & (INTER_TAB_SIZE-1)));
267                 }
268                 else if( m1type == CV_32FC1 )
269                 {
270                     m1f[j] = (float)u;
271                     m2f[j] = (float)v;
272                 }
273                 else
274                 {
275                     m1f[j*2] = (float)u;
276                     m1f[j*2+1] = (float)v;
277                 }
278             }
279         }
280     }
281 
282 private:
283     Size size;
284     Mat &map1;
285     Mat &map2;
286     int m1type;
287     const double* ir;
288     Matx33d &matTilt;
289     double u0;
290     double v0;
291     double fx;
292     double fy;
293     double k1;
294     double k2;
295     double p1;
296     double p2;
297     double k3;
298     double k4;
299     double k5;
300     double k6;
301     double s1;
302     double s2;
303     double s3;
304     double s4;
305 #if CV_SIMD_64F
306     double s_x[2*v_float64::nlanes];
307     double s_y[2*v_float64::nlanes];
308     double s_w[2*v_float64::nlanes];
309 #endif
310 };
311 }
312 
getInitUndistortRectifyMapComputer(Size _size,Mat & _map1,Mat & _map2,int _m1type,const double * _ir,Matx33d & _matTilt,double _u0,double _v0,double _fx,double _fy,double _k1,double _k2,double _p1,double _p2,double _k3,double _k4,double _k5,double _k6,double _s1,double _s2,double _s3,double _s4)313 Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type,
314                                                          const double* _ir, Matx33d &_matTilt,
315                                                          double _u0, double _v0, double _fx, double _fy,
316                                                          double _k1, double _k2, double _p1, double _p2,
317                                                          double _k3, double _k4, double _k5, double _k6,
318                                                          double _s1, double _s2, double _s3, double _s4)
319 {
320     CV_INSTRUMENT_REGION();
321 
322     return Ptr<initUndistortRectifyMapComputer>(new initUndistortRectifyMapComputer(_size, _map1, _map2, _m1type, _ir, _matTilt, _u0, _v0, _fx, _fy,
323                                                                                     _k1, _k2, _p1, _p2, _k3, _k4, _k5, _k6, _s1, _s2, _s3, _s4));
324 }
325 
326 #endif
327 CV_CPU_OPTIMIZATION_NAMESPACE_END
328 }
329 /*  End of file  */
330