1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
4 
5 
6 #include "precomp.hpp"
7 #include "convert.hpp"
8 
9 namespace cv {
10 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
11 
12 BinaryFunc getCvtScaleAbsFunc(int depth);
13 BinaryFunc getConvertScaleFunc(int sdepth, int ddepth);
14 
15 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
16 
17 /****************************************************************************************\
18 *                                convertScale[Abs]                                       *
19 \****************************************************************************************/
20 
21 template<typename _Ts, typename _Td> inline void
cvtabs_32f(const _Ts * src,size_t sstep,_Td * dst,size_t dstep,Size size,float a,float b)22 cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
23             Size size, float a, float b )
24 {
25 #if CV_SIMD
26     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
27     const int VECSZ = v_float32::nlanes*2;
28 #endif
29     sstep /= sizeof(src[0]);
30     dstep /= sizeof(dst[0]);
31 
32     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
33     {
34         int j = 0;
35 #if CV_SIMD
36         for( ; j < size.width; j += VECSZ )
37         {
38             if( j > size.width - VECSZ )
39             {
40                 if( j == 0 || src == (_Ts*)dst )
41                     break;
42                 j = size.width - VECSZ;
43             }
44             v_float32 v0, v1;
45             vx_load_pair_as(src + j, v0, v1);
46             v0 = v_fma(v0, va, vb);
47             v1 = v_fma(v1, va, vb);
48             v_store_pair_as(dst + j, v_abs(v0), v_abs(v1));
49         }
50 #endif
51         for( ; j < size.width; j++ )
52             dst[j] = saturate_cast<_Td>(std::abs(src[j]*a + b));
53     }
54 }
55 
56 // variant for conversions 16f <-> ... w/o unrolling
57 template<typename _Ts, typename _Td> inline void
cvtabs1_32f(const _Ts * src,size_t sstep,_Td * dst,size_t dstep,Size size,float a,float b)58 cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
59              Size size, float a, float b )
60 {
61 #if CV_SIMD
62     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
63     const int VECSZ = v_float32::nlanes*2;
64 #endif
65     sstep /= sizeof(src[0]);
66     dstep /= sizeof(dst[0]);
67 
68     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
69     {
70         int j = 0;
71 #if CV_SIMD
72         for( ; j < size.width; j += VECSZ )
73         {
74             if( j > size.width - VECSZ )
75             {
76                 if( j == 0 || src == (_Ts*)dst )
77                     break;
78                 j = size.width - VECSZ;
79             }
80             v_float32 v0;
81             vx_load_as(src + j, v0);
82             v0 = v_fma(v0, va, vb);
83             v_store_as(dst + j, v_abs(v0));
84         }
85 #endif
86         for( ; j < size.width; j++ )
87             dst[j] = saturate_cast<_Td>(src[j]*a + b);
88     }
89 }
90 
91 template<typename _Ts, typename _Td> inline void
cvt_32f(const _Ts * src,size_t sstep,_Td * dst,size_t dstep,Size size,float a,float b)92 cvt_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
93          Size size, float a, float b )
94 {
95 #if CV_SIMD
96     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
97     const int VECSZ = v_float32::nlanes*2;
98 #endif
99     sstep /= sizeof(src[0]);
100     dstep /= sizeof(dst[0]);
101 
102     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
103     {
104         int j = 0;
105 #if CV_SIMD
106         for( ; j < size.width; j += VECSZ )
107         {
108             if( j > size.width - VECSZ )
109             {
110                 if( j == 0 || src == (_Ts*)dst )
111                     break;
112                 j = size.width - VECSZ;
113             }
114             v_float32 v0, v1;
115             vx_load_pair_as(src + j, v0, v1);
116             v0 = v_fma(v0, va, vb);
117             v1 = v_fma(v1, va, vb);
118             v_store_pair_as(dst + j, v0, v1);
119         }
120 #endif
121         for( ; j < size.width; j++ )
122             dst[j] = saturate_cast<_Td>(src[j]*a + b);
123     }
124 }
125 
126 // variant for conversions 16f <-> ... w/o unrolling
127 template<typename _Ts, typename _Td> inline void
cvt1_32f(const _Ts * src,size_t sstep,_Td * dst,size_t dstep,Size size,float a,float b)128 cvt1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
129           Size size, float a, float b )
130 {
131 #if CV_SIMD
132     v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b);
133     const int VECSZ = v_float32::nlanes;
134 #endif
135     sstep /= sizeof(src[0]);
136     dstep /= sizeof(dst[0]);
137 
138     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
139     {
140         int j = 0;
141 #if CV_SIMD
142         for( ; j < size.width; j += VECSZ )
143         {
144             if( j > size.width - VECSZ )
145             {
146                 if( j == 0 || src == (_Ts*)dst )
147                     break;
148                 j = size.width - VECSZ;
149             }
150             v_float32 v0;
151             vx_load_as(src + j, v0);
152             v0 = v_fma(v0, va, vb);
153             v_store_as(dst + j, v0);
154         }
155 #endif
156         for( ; j < size.width; j++ )
157             dst[j] = saturate_cast<_Td>(src[j]*a + b);
158     }
159 }
160 
161 
162 template<typename _Ts, typename _Td> inline void
cvt_64f(const _Ts * src,size_t sstep,_Td * dst,size_t dstep,Size size,double a,double b)163 cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep,
164          Size size, double a, double b )
165 {
166 #if CV_SIMD_64F
167     v_float64 va = vx_setall_f64(a), vb = vx_setall_f64(b);
168     const int VECSZ = v_float64::nlanes*2;
169 #endif
170     sstep /= sizeof(src[0]);
171     dstep /= sizeof(dst[0]);
172 
173     for( int i = 0; i < size.height; i++, src += sstep, dst += dstep )
174     {
175         int j = 0;
176 #if CV_SIMD_64F
177         for( ; j < size.width; j += VECSZ )
178         {
179             if( j > size.width - VECSZ )
180             {
181                 if( j == 0 || src == (_Ts*)dst )
182                     break;
183                 j = size.width - VECSZ;
184             }
185             v_float64 v0, v1;
186             vx_load_pair_as(src + j, v0, v1);
187             v0 = v_fma(v0, va, vb);
188             v1 = v_fma(v1, va, vb);
189             v_store_pair_as(dst + j, v0, v1);
190         }
191 #endif
192         for( ; j < size.width; j++ )
193             dst[j] = saturate_cast<_Td>(src[j]*a + b);
194     }
195 }
196 
197 //==================================================================================================
198 
199 #define DEF_CVT_SCALE_ABS_FUNC(suffix, cvt, stype, dtype, wtype) \
200 static void cvtScaleAbs##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \
201                                  uchar* dst_, size_t dstep, Size size, void* scale_) \
202 { \
203     const stype* src = (const stype*)src_; \
204     dtype* dst = (dtype*)dst_; \
205     double* scale = (double*)scale_; \
206     cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
207 }
208 
209 
210 #define DEF_CVT_SCALE_FUNC(suffix, cvt, stype, dtype, wtype) \
211 static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \
212                               uchar* dst_, size_t dstep, Size size, void* scale_) \
213 { \
214     const stype* src = (const stype*)src_; \
215     dtype* dst = (dtype*)dst_; \
216     double* scale = (double*)scale_; \
217     cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \
218 }
219 
220 DEF_CVT_SCALE_ABS_FUNC(8u,    cvtabs_32f, uchar,  uchar, float)
221 DEF_CVT_SCALE_ABS_FUNC(8s8u,  cvtabs_32f, schar,  uchar, float)
222 DEF_CVT_SCALE_ABS_FUNC(16u8u, cvtabs_32f, ushort, uchar, float)
223 DEF_CVT_SCALE_ABS_FUNC(16s8u, cvtabs_32f, short,  uchar, float)
224 DEF_CVT_SCALE_ABS_FUNC(32s8u, cvtabs_32f, int,    uchar, float)
225 DEF_CVT_SCALE_ABS_FUNC(32f8u, cvtabs_32f, float,  uchar, float)
226 DEF_CVT_SCALE_ABS_FUNC(64f8u, cvtabs_32f, double, uchar, float)
227 
228 DEF_CVT_SCALE_FUNC(8u,     cvt_32f, uchar,  uchar, float)
229 DEF_CVT_SCALE_FUNC(8s8u,   cvt_32f, schar,  uchar, float)
230 DEF_CVT_SCALE_FUNC(16u8u,  cvt_32f, ushort, uchar, float)
231 DEF_CVT_SCALE_FUNC(16s8u,  cvt_32f, short,  uchar, float)
232 DEF_CVT_SCALE_FUNC(32s8u,  cvt_32f, int,    uchar, float)
233 DEF_CVT_SCALE_FUNC(32f8u,  cvt_32f, float,  uchar, float)
234 DEF_CVT_SCALE_FUNC(64f8u,  cvt_32f, double, uchar, float)
235 DEF_CVT_SCALE_FUNC(16f8u,  cvt_32f, float16_t, uchar, float)
236 
237 DEF_CVT_SCALE_FUNC(8u8s,   cvt_32f, uchar,  schar, float)
238 DEF_CVT_SCALE_FUNC(8s,     cvt_32f, schar,  schar, float)
239 DEF_CVT_SCALE_FUNC(16u8s,  cvt_32f, ushort, schar, float)
240 DEF_CVT_SCALE_FUNC(16s8s,  cvt_32f, short,  schar, float)
241 DEF_CVT_SCALE_FUNC(32s8s,  cvt_32f, int,    schar, float)
242 DEF_CVT_SCALE_FUNC(32f8s,  cvt_32f, float,  schar, float)
243 DEF_CVT_SCALE_FUNC(64f8s,  cvt_32f, double, schar, float)
244 DEF_CVT_SCALE_FUNC(16f8s,  cvt_32f, float16_t, schar, float)
245 
246 DEF_CVT_SCALE_FUNC(8u16u,  cvt_32f, uchar,  ushort, float)
247 DEF_CVT_SCALE_FUNC(8s16u,  cvt_32f, schar,  ushort, float)
248 DEF_CVT_SCALE_FUNC(16u,    cvt_32f, ushort, ushort, float)
249 DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short,  ushort, float)
250 DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int,    ushort, float)
251 DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float,  ushort, float)
252 DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float)
253 DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float)
254 
255 DEF_CVT_SCALE_FUNC(8u16s,  cvt_32f, uchar,  short, float)
256 DEF_CVT_SCALE_FUNC(8s16s,  cvt_32f, schar,  short, float)
257 DEF_CVT_SCALE_FUNC(16u16s, cvt_32f, ushort, short, float)
258 DEF_CVT_SCALE_FUNC(16s,    cvt_32f, short,  short, float)
259 DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int,    short, float)
260 DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float,  short, float)
261 DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float)
262 DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float)
263 
264 DEF_CVT_SCALE_FUNC(8u32s,  cvt_32f, uchar,  int, float)
265 DEF_CVT_SCALE_FUNC(8s32s,  cvt_32f, schar,  int, float)
266 DEF_CVT_SCALE_FUNC(16u32s, cvt_32f, ushort, int, float)
267 DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short,  int, float)
268 DEF_CVT_SCALE_FUNC(32s,    cvt_64f, int,    int, double)
269 DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float,  int, float)
270 DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double)
271 DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float)
272 
273 DEF_CVT_SCALE_FUNC(8u32f,  cvt_32f, uchar,  float, float)
274 DEF_CVT_SCALE_FUNC(8s32f,  cvt_32f, schar,  float, float)
275 DEF_CVT_SCALE_FUNC(16u32f, cvt_32f, ushort, float, float)
276 DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short,  float, float)
277 DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int,    float, float)
278 DEF_CVT_SCALE_FUNC(32f,    cvt_32f, float,  float, float)
279 DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double)
280 DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float)
281 
282 DEF_CVT_SCALE_FUNC(8u64f,  cvt_64f, uchar,  double, double)
283 DEF_CVT_SCALE_FUNC(8s64f,  cvt_64f, schar,  double, double)
284 DEF_CVT_SCALE_FUNC(16u64f, cvt_64f, ushort, double, double)
285 DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short,  double, double)
286 DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int,    double, double)
287 DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float,  double, double)
288 DEF_CVT_SCALE_FUNC(64f,    cvt_64f, double, double, double)
289 DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double)
290 
291 DEF_CVT_SCALE_FUNC(8u16f,  cvt1_32f, uchar,  float16_t, float)
292 DEF_CVT_SCALE_FUNC(8s16f,  cvt1_32f, schar,  float16_t, float)
293 DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, float16_t, float)
294 DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short,  float16_t, float)
295 DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int,    float16_t, float)
296 DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float,  float16_t, float)
297 DEF_CVT_SCALE_FUNC(64f16f, cvt_64f,  double, float16_t, double)
298 DEF_CVT_SCALE_FUNC(16f,    cvt1_32f, float16_t, float16_t, float)
299 
getCvtScaleAbsFunc(int depth)300 BinaryFunc getCvtScaleAbsFunc(int depth)
301 {
302     static BinaryFunc cvtScaleAbsTab[] =
303     {
304         (BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u,
305         (BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u,
306         (BinaryFunc)cvtScaleAbs64f8u, 0
307     };
308 
309     return cvtScaleAbsTab[depth];
310 }
311 
getConvertScaleFunc(int sdepth,int ddepth)312 BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
313 {
314     static BinaryFunc cvtScaleTab[][8] =
315     {
316         {
317             (BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u),
318             (BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u),
319             (BinaryFunc)cvtScale64f8u, (BinaryFunc)cvtScale16f8u
320         },
321         {
322             (BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s),
323             (BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s),
324             (BinaryFunc)cvtScale64f8s, (BinaryFunc)cvtScale16f8s
325         },
326         {
327             (BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u),
328             (BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u),
329             (BinaryFunc)cvtScale64f16u, (BinaryFunc)cvtScale16f16u
330         },
331         {
332             (BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s),
333             (BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s),
334             (BinaryFunc)cvtScale64f16s, (BinaryFunc)cvtScale16f16s
335         },
336         {
337             (BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s),
338             (BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s),
339             (BinaryFunc)cvtScale64f32s, (BinaryFunc)cvtScale16f32s
340         },
341         {
342             (BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f),
343             (BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f),
344             (BinaryFunc)cvtScale64f32f, (BinaryFunc)cvtScale16f32f
345         },
346         {
347             (BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f,
348             (BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f,
349             (BinaryFunc)cvtScale64f, (BinaryFunc)cvtScale16f64f
350         },
351         {
352             (BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f,
353             (BinaryFunc)cvtScale16s16f, (BinaryFunc)cvtScale32s16f, (BinaryFunc)cvtScale32f16f,
354             (BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f
355         },
356     };
357 
358     return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
359 }
360 
361 #endif
362 
363 CV_CPU_OPTIMIZATION_NAMESPACE_END
364 } // namespace
365