1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
8 //
9 //
10 // License Agreement
11 // For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
22 //
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
26 //
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include "opencv2/core/hal/intrin.hpp"
45
46 namespace cv {
47 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
48 // forward declarations
49 Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type,
50 const double* _ir, Matx33d &_matTilt,
51 double _u0, double _v0, double _fx, double _fy,
52 double _k1, double _k2, double _p1, double _p2,
53 double _k3, double _k4, double _k5, double _k6,
54 double _s1, double _s2, double _s3, double _s4);
55
56
57 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
58 namespace
59 {
60 class initUndistortRectifyMapComputer : public ParallelLoopBody
61 {
62 public:
initUndistortRectifyMapComputer(Size _size,Mat & _map1,Mat & _map2,int _m1type,const double * _ir,Matx33d & _matTilt,double _u0,double _v0,double _fx,double _fy,double _k1,double _k2,double _p1,double _p2,double _k3,double _k4,double _k5,double _k6,double _s1,double _s2,double _s3,double _s4)63 initUndistortRectifyMapComputer(
64 Size _size, Mat &_map1, Mat &_map2, int _m1type,
65 const double* _ir, Matx33d &_matTilt,
66 double _u0, double _v0, double _fx, double _fy,
67 double _k1, double _k2, double _p1, double _p2,
68 double _k3, double _k4, double _k5, double _k6,
69 double _s1, double _s2, double _s3, double _s4)
70 : size(_size),
71 map1(_map1),
72 map2(_map2),
73 m1type(_m1type),
74 ir(_ir),
75 matTilt(_matTilt),
76 u0(_u0),
77 v0(_v0),
78 fx(_fx),
79 fy(_fy),
80 k1(_k1),
81 k2(_k2),
82 p1(_p1),
83 p2(_p2),
84 k3(_k3),
85 k4(_k4),
86 k5(_k5),
87 k6(_k6),
88 s1(_s1),
89 s2(_s2),
90 s3(_s3),
91 s4(_s4) {
92 #if CV_SIMD_64F
93 for (int i = 0; i < 2 * v_float64::nlanes; ++i)
94 {
95 s_x[i] = ir[0] * i;
96 s_y[i] = ir[3] * i;
97 s_w[i] = ir[6] * i;
98 }
99 #endif
100 }
101
operator ()(const cv::Range & range) const102 void operator()( const cv::Range& range ) const CV_OVERRIDE
103 {
104 CV_INSTRUMENT_REGION();
105
106 const int begin = range.start;
107 const int end = range.end;
108
109 for( int i = begin; i < end; i++ )
110 {
111 float* m1f = map1.ptr<float>(i);
112 float* m2f = map2.empty() ? 0 : map2.ptr<float>(i);
113 short* m1 = (short*)m1f;
114 ushort* m2 = (ushort*)m2f;
115 double _x = i*ir[1] + ir[2], _y = i*ir[4] + ir[5], _w = i*ir[7] + ir[8];
116
117 int j = 0;
118
119 if (m1type == CV_16SC2)
120 CV_Assert(m1 != NULL && m2 != NULL);
121 else if (m1type == CV_32FC1)
122 CV_Assert(m1f != NULL && m2f != NULL);
123 else
124 CV_Assert(m1 != NULL);
125
126 #if CV_SIMD_64F
127 const v_float64 v_one = vx_setall_f64(1.0);
128 for (; j <= size.width - 2*v_float64::nlanes; j += 2*v_float64::nlanes, _x += 2*v_float64::nlanes * ir[0], _y += 2*v_float64::nlanes * ir[3], _w += 2*v_float64::nlanes * ir[6])
129 {
130 v_float64 m_0, m_1, m_2, m_3;
131 m_2 = v_one / (vx_setall_f64(_w) + vx_load(s_w));
132 m_3 = v_one / (vx_setall_f64(_w) + vx_load(s_w + v_float64::nlanes));
133 m_0 = vx_setall_f64(_x); m_1 = vx_setall_f64(_y);
134 v_float64 x_0 = (m_0 + vx_load(s_x)) * m_2;
135 v_float64 x_1 = (m_0 + vx_load(s_x + v_float64::nlanes)) * m_3;
136 v_float64 y_0 = (m_1 + vx_load(s_y)) * m_2;
137 v_float64 y_1 = (m_1 + vx_load(s_y + v_float64::nlanes)) * m_3;
138
139 v_float64 xd_0 = x_0 * x_0;
140 v_float64 yd_0 = y_0 * y_0;
141 v_float64 xd_1 = x_1 * x_1;
142 v_float64 yd_1 = y_1 * y_1;
143
144 v_float64 r2_0 = xd_0 + yd_0;
145 v_float64 r2_1 = xd_1 + yd_1;
146
147 m_1 = vx_setall_f64(k3);
148 m_2 = vx_setall_f64(k2);
149 m_3 = vx_setall_f64(k1);
150 m_0 = v_muladd(v_muladd(v_muladd(m_1, r2_0, m_2), r2_0, m_3), r2_0, v_one);
151 m_1 = v_muladd(v_muladd(v_muladd(m_1, r2_1, m_2), r2_1, m_3), r2_1, v_one);
152 m_3 = vx_setall_f64(k6);
153 m_2 = vx_setall_f64(k5);
154 m_0 /= v_muladd(v_muladd(v_muladd(m_3, r2_0, m_2), r2_0, vx_setall_f64(k4)), r2_0, v_one);
155 m_1 /= v_muladd(v_muladd(v_muladd(m_3, r2_1, m_2), r2_1, vx_setall_f64(k4)), r2_1, v_one);
156
157 m_3 = vx_setall_f64(2.0);
158 xd_0 = v_muladd(m_3, xd_0, r2_0);
159 yd_0 = v_muladd(m_3, yd_0, r2_0);
160 xd_1 = v_muladd(m_3, xd_1, r2_1);
161 yd_1 = v_muladd(m_3, yd_1, r2_1);
162 m_2 = x_0 * y_0 * m_3;
163 m_3 = x_1 * y_1 * m_3;
164
165 x_0 *= m_0; y_0 *= m_0; x_1 *= m_1; y_1 *= m_1;
166
167 m_0 = vx_setall_f64(p1);
168 m_1 = vx_setall_f64(p2);
169 xd_0 = v_muladd(xd_0, m_1, x_0);
170 yd_0 = v_muladd(yd_0, m_0, y_0);
171 xd_1 = v_muladd(xd_1, m_1, x_1);
172 yd_1 = v_muladd(yd_1, m_0, y_1);
173
174 xd_0 = v_muladd(m_0, m_2, xd_0);
175 yd_0 = v_muladd(m_1, m_2, yd_0);
176 xd_1 = v_muladd(m_0, m_3, xd_1);
177 yd_1 = v_muladd(m_1, m_3, yd_1);
178
179 m_0 = r2_0 * r2_0;
180 m_1 = r2_1 * r2_1;
181 m_2 = vx_setall_f64(s2);
182 m_3 = vx_setall_f64(s1);
183 xd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, xd_0));
184 xd_1 = v_muladd(m_3, r2_1, v_muladd(m_2, m_1, xd_1));
185 m_2 = vx_setall_f64(s4);
186 m_3 = vx_setall_f64(s3);
187 yd_0 = v_muladd(m_3, r2_0, v_muladd(m_2, m_0, yd_0));
188 yd_1 = v_muladd(m_3, r2_1, v_muladd(m_2, m_1, yd_1));
189
190 m_0 = vx_setall_f64(matTilt.val[0]);
191 m_1 = vx_setall_f64(matTilt.val[1]);
192 m_2 = vx_setall_f64(matTilt.val[2]);
193 x_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
194 x_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
195 m_0 = vx_setall_f64(matTilt.val[3]);
196 m_1 = vx_setall_f64(matTilt.val[4]);
197 m_2 = vx_setall_f64(matTilt.val[5]);
198 y_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
199 y_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
200 m_0 = vx_setall_f64(matTilt.val[6]);
201 m_1 = vx_setall_f64(matTilt.val[7]);
202 m_2 = vx_setall_f64(matTilt.val[8]);
203 r2_0 = v_muladd(m_0, xd_0, v_muladd(m_1, yd_0, m_2));
204 r2_1 = v_muladd(m_0, xd_1, v_muladd(m_1, yd_1, m_2));
205 m_0 = vx_setzero_f64();
206 r2_0 = v_select(r2_0 == m_0, v_one, v_one / r2_0);
207 r2_1 = v_select(r2_1 == m_0, v_one, v_one / r2_1);
208
209 m_0 = vx_setall_f64(fx);
210 m_1 = vx_setall_f64(u0);
211 m_2 = vx_setall_f64(fy);
212 m_3 = vx_setall_f64(v0);
213 x_0 = v_muladd(m_0 * r2_0, x_0, m_1);
214 y_0 = v_muladd(m_2 * r2_0, y_0, m_3);
215 x_1 = v_muladd(m_0 * r2_1, x_1, m_1);
216 y_1 = v_muladd(m_2 * r2_1, y_1, m_3);
217
218 if (m1type == CV_32FC1)
219 {
220 v_store(&m1f[j], v_cvt_f32(x_0, x_1));
221 v_store(&m2f[j], v_cvt_f32(y_0, y_1));
222 }
223 else if (m1type == CV_32FC2)
224 {
225 v_float32 mf0, mf1;
226 v_zip(v_cvt_f32(x_0, x_1), v_cvt_f32(y_0, y_1), mf0, mf1);
227 v_store(&m1f[j * 2], mf0);
228 v_store(&m1f[j * 2 + v_float32::nlanes], mf1);
229 }
230 else // m1type == CV_16SC2
231 {
232 m_0 = vx_setall_f64(INTER_TAB_SIZE);
233 x_0 *= m_0; x_1 *= m_0; y_0 *= m_0; y_1 *= m_0;
234
235 v_int32 mask = vx_setall_s32(INTER_TAB_SIZE - 1);
236 v_int32 iu = v_round(x_0, x_1);
237 v_int32 iv = v_round(y_0, y_1);
238
239 v_pack_u_store(&m2[j], (iu & mask) + (iv & mask) * vx_setall_s32(INTER_TAB_SIZE));
240 v_int32 out0, out1;
241 v_zip(iu >> INTER_BITS, iv >> INTER_BITS, out0, out1);
242 v_store(&m1[j * 2], v_pack(out0, out1));
243 }
244 }
245
246 vx_cleanup();
247 #endif
248 for( ; j < size.width; j++, _x += ir[0], _y += ir[3], _w += ir[6] )
249 {
250 double w = 1./_w, x = _x*w, y = _y*w;
251 double x2 = x*x, y2 = y*y;
252 double r2 = x2 + y2, _2xy = 2*x*y;
253 double kr = (1 + ((k3*r2 + k2)*r2 + k1)*r2)/(1 + ((k6*r2 + k5)*r2 + k4)*r2);
254 double xd = (x*kr + p1*_2xy + p2*(r2 + 2*x2) + s1*r2+s2*r2*r2);
255 double yd = (y*kr + p1*(r2 + 2*y2) + p2*_2xy + s3*r2+s4*r2*r2);
256 Vec3d vecTilt = matTilt*cv::Vec3d(xd, yd, 1);
257 double invProj = vecTilt(2) ? 1./vecTilt(2) : 1;
258 double u = fx*invProj*vecTilt(0) + u0;
259 double v = fy*invProj*vecTilt(1) + v0;
260 if( m1type == CV_16SC2 )
261 {
262 int iu = saturate_cast<int>(u*INTER_TAB_SIZE);
263 int iv = saturate_cast<int>(v*INTER_TAB_SIZE);
264 m1[j*2] = (short)(iu >> INTER_BITS);
265 m1[j*2+1] = (short)(iv >> INTER_BITS);
266 m2[j] = (ushort)((iv & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + (iu & (INTER_TAB_SIZE-1)));
267 }
268 else if( m1type == CV_32FC1 )
269 {
270 m1f[j] = (float)u;
271 m2f[j] = (float)v;
272 }
273 else
274 {
275 m1f[j*2] = (float)u;
276 m1f[j*2+1] = (float)v;
277 }
278 }
279 }
280 }
281
282 private:
283 Size size;
284 Mat &map1;
285 Mat &map2;
286 int m1type;
287 const double* ir;
288 Matx33d &matTilt;
289 double u0;
290 double v0;
291 double fx;
292 double fy;
293 double k1;
294 double k2;
295 double p1;
296 double p2;
297 double k3;
298 double k4;
299 double k5;
300 double k6;
301 double s1;
302 double s2;
303 double s3;
304 double s4;
305 #if CV_SIMD_64F
306 double s_x[2*v_float64::nlanes];
307 double s_y[2*v_float64::nlanes];
308 double s_w[2*v_float64::nlanes];
309 #endif
310 };
311 }
312
getInitUndistortRectifyMapComputer(Size _size,Mat & _map1,Mat & _map2,int _m1type,const double * _ir,Matx33d & _matTilt,double _u0,double _v0,double _fx,double _fy,double _k1,double _k2,double _p1,double _p2,double _k3,double _k4,double _k5,double _k6,double _s1,double _s2,double _s3,double _s4)313 Ptr<ParallelLoopBody> getInitUndistortRectifyMapComputer(Size _size, Mat &_map1, Mat &_map2, int _m1type,
314 const double* _ir, Matx33d &_matTilt,
315 double _u0, double _v0, double _fx, double _fy,
316 double _k1, double _k2, double _p1, double _p2,
317 double _k3, double _k4, double _k5, double _k6,
318 double _s1, double _s2, double _s3, double _s4)
319 {
320 CV_INSTRUMENT_REGION();
321
322 return Ptr<initUndistortRectifyMapComputer>(new initUndistortRectifyMapComputer(_size, _map1, _map2, _m1type, _ir, _matTilt, _u0, _v0, _fx, _fy,
323 _k1, _k2, _p1, _p2, _k3, _k4, _k5, _k6, _s1, _s2, _s3, _s4));
324 }
325
326 #endif
327 CV_CPU_OPTIMIZATION_NAMESPACE_END
328 }
329 /* End of file */
330