1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                          License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
17 // Third party copyrights are property of their respective owners.
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 //   * Redistribution's of source code must retain the above copyright notice,
23 //     this list of conditions and the following disclaimer.
24 //
25 //   * Redistribution's in binary form must reproduce the above copyright notice,
26 //     this list of conditions and the following disclaimer in the documentation
27 //     and/or other materials provided with the distribution.
28 //
29 //   * The name of the copyright holders may not be used to endorse or promote products
30 //     derived from this software without specific prior written permission.
31 //
32 // This software is provided by the copyright holders and contributors "as is" and
33 // any express or implied warranties, including, but not limited to, the implied
34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
35 // In no event shall the Intel Corporation or contributors be liable for any direct,
36 // indirect, incidental, special, exemplary, or consequential damages
37 // (including, but not limited to, procurement of substitute goods or services;
38 // loss of use, data, or profits; or business interruption) however caused
39 // and on any theory of liability, whether in contract, strict liability,
40 // or tort (including negligence or otherwise) arising in any way out of
41 // the use of this software, even if advised of the possibility of such damage.
42 //
43 //M*/
44 
45 #ifndef OPENCV_CORE_FAST_MATH_HPP
46 #define OPENCV_CORE_FAST_MATH_HPP
47 
48 #include "opencv2/core/cvdef.h"
49 
50 //! @addtogroup core_utils
51 //! @{
52 
53 /****************************************************************************************\
54 *                                      fast math                                         *
55 \****************************************************************************************/
56 
57 #ifdef __cplusplus
58 #  include <cmath>
59 #else
60 #  ifdef __BORLANDC__
61 #    include <fastmath.h>
62 #  else
63 #    include <math.h>
64 #  endif
65 #endif
66 
67 #if defined(__CUDACC__)
68   // nothing, intrinsics/asm code is not supported
69 #else
70   #if ((defined _MSC_VER && defined _M_X64) \
71       || (defined __GNUC__ && defined __x86_64__ && defined __SSE2__)) \
72       && !defined(OPENCV_SKIP_INCLUDE_EMMINTRIN_H)
73     #include <emmintrin.h>
74   #endif
75 
76   #if defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8 \
77       && !defined(OPENCV_SKIP_INCLUDE_ALTIVEC_H)
78     #include <altivec.h>
79     #undef vector
80     #undef bool
81     #undef pixel
82   #endif
83 
84   #if defined(CV_INLINE_ROUND_FLT)
85     // user-specified version
86     // CV_INLINE_ROUND_DBL should be defined too
87   #elif defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__
88     // 1. general scheme
89     #define ARM_ROUND(_value, _asm_string) \
90         int res; \
91         float temp; \
92         CV_UNUSED(temp); \
93         __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
94         return res
95     // 2. version for double
96     #ifdef __clang__
97         #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
98     #else
99         #define CV_INLINE_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
100     #endif
101     // 3. version for float
102     #define CV_INLINE_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
103   #elif defined __PPC64__ && defined __GNUC__ && defined _ARCH_PWR8
104     // P8 and newer machines can convert fp32/64 to int quickly.
105     #define CV_INLINE_ROUND_DBL(value) \
106         int out; \
107         double temp; \
108         __asm__( "fctiw %[temp],%[in]\n\tmfvsrwz %[out],%[temp]\n\t" : [out] "=r" (out), [temp] "=d" (temp) : [in] "d" ((double)(value)) : ); \
109         return out;
110 
111     // FP32 also works with FP64 routine above
112     #define CV_INLINE_ROUND_FLT(value) CV_INLINE_ROUND_DBL(value)
113   #endif
114 
115   #ifdef CV_INLINE_ISINF_FLT
116     // user-specified version
117     // CV_INLINE_ISINF_DBL should be defined too
118   #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
119     #define CV_INLINE_ISINF_DBL(value) return scalar_test_data_class(value, 0x30);
120     #define CV_INLINE_ISINF_FLT(value) CV_INLINE_ISINF_DBL(value)
121   #endif
122 
123   #ifdef CV_INLINE_ISNAN_FLT
124     // user-specified version
125     // CV_INLINE_ISNAN_DBL should be defined too
126   #elif defined __PPC64__ && defined _ARCH_PWR9 && defined(scalar_test_data_class)
127     #define CV_INLINE_ISNAN_DBL(value) return scalar_test_data_class(value, 0x40);
128     #define CV_INLINE_ISNAN_FLT(value) CV_INLINE_ISNAN_DBL(value)
129   #endif
130 
131   #if !defined(OPENCV_USE_FASTMATH_BUILTINS) \
132     && ( \
133         defined(__x86_64__) || defined(__i686__) \
134         || defined(__arm__) \
135         || defined(__PPC64__) \
136     )
137     /* Let builtin C math functions when available. Dedicated hardware is available to
138        round and convert FP values. */
139     #define OPENCV_USE_FASTMATH_BUILTINS 1
140   #endif
141 
142   /* Enable builtin math functions if possible, desired, and available.
143      Note, not all math functions inline equally. E.g lrint will not inline
144      without the -fno-math-errno option. */
145   #if defined(CV_ICC)
146     // nothing
147   #elif defined(OPENCV_USE_FASTMATH_BUILTINS) && OPENCV_USE_FASTMATH_BUILTINS
148     #if defined(__clang__)
149       #define CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS
150       #if !defined(CV_INLINE_ISNAN_DBL) && __has_builtin(__builtin_isnan)
151         #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
152       #endif
153       #if !defined(CV_INLINE_ISNAN_FLT) && __has_builtin(__builtin_isnan)
154         #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnan(value);
155       #endif
156       #if !defined(CV_INLINE_ISINF_DBL) && __has_builtin(__builtin_isinf)
157         #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
158       #endif
159       #if !defined(CV_INLINE_ISINF_FLT) && __has_builtin(__builtin_isinf)
160         #define CV_INLINE_ISINF_FLT(value) return __builtin_isinf(value);
161       #endif
162     #elif defined(__GNUC__)
163       #define CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS
164       #if !defined(CV_INLINE_ISNAN_DBL)
165         #define CV_INLINE_ISNAN_DBL(value) return __builtin_isnan(value);
166       #endif
167       #if !defined(CV_INLINE_ISNAN_FLT)
168         #define CV_INLINE_ISNAN_FLT(value) return __builtin_isnanf(value);
169       #endif
170       #if !defined(CV_INLINE_ISINF_DBL)
171         #define CV_INLINE_ISINF_DBL(value) return __builtin_isinf(value);
172       #endif
173       #if !defined(CV_INLINE_ISINF_FLT)
174         #define CV_INLINE_ISINF_FLT(value) return __builtin_isinff(value);
175       #endif
176     #elif defined(_MSC_VER)
177       #if !defined(CV_INLINE_ISNAN_DBL)
178         #define CV_INLINE_ISNAN_DBL(value) return isnan(value);
179       #endif
180       #if !defined(CV_INLINE_ISNAN_FLT)
181         #define CV_INLINE_ISNAN_FLT(value) return isnan(value);
182       #endif
183       #if !defined(CV_INLINE_ISINF_DBL)
184         #define CV_INLINE_ISINF_DBL(value) return isinf(value);
185       #endif
186       #if !defined(CV_INLINE_ISINF_FLT)
187         #define CV_INLINE_ISINF_FLT(value) return isinf(value);
188       #endif
189     #endif
190   #endif
191 
192 #endif // defined(__CUDACC__)
193 
194 /** @brief Rounds floating-point number to the nearest integer
195 
196  @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
197  result is not defined.
198  */
199 CV_INLINE int
cvRound(double value)200 cvRound( double value )
201 {
202 #if defined CV_INLINE_ROUND_DBL
203     CV_INLINE_ROUND_DBL(value);
204 #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
205     && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
206     && !defined(__CUDACC__)
207     __m128d t = _mm_set_sd( value );
208     return _mm_cvtsd_si32(t);
209 #elif defined _MSC_VER && defined _M_IX86
210     int t;
211     __asm
212     {
213         fld value;
214         fistp t;
215     }
216     return t;
217 #elif defined CV_ICC || defined __GNUC__
218     return (int)(lrint(value));
219 #else
220     /* it's ok if round does not comply with IEEE754 standard;
221        the tests should allow +/-1 difference when the tested functions use round */
222     return (int)(value + (value >= 0 ? 0.5 : -0.5));
223 #endif
224 }
225 
226 
227 /** @brief Rounds floating-point number to the nearest integer not larger than the original.
228 
229  The function computes an integer i such that:
230  \f[i \le \texttt{value} < i+1\f]
231  @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
232  result is not defined.
233  */
cvFloor(double value)234 CV_INLINE int cvFloor( double value )
235 {
236 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
237     && ( \
238         defined(__PPC64__) \
239     )
240     return __builtin_floor(value);
241 #else
242     int i = (int)value;
243     return i - (i > value);
244 #endif
245 }
246 
247 /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
248 
249  The function computes an integer i such that:
250  \f[i \le \texttt{value} < i+1\f]
251  @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
252  result is not defined.
253  */
cvCeil(double value)254 CV_INLINE int cvCeil( double value )
255 {
256 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
257     && ( \
258         defined(__PPC64__) \
259     )
260     return __builtin_ceil(value);
261 #else
262     int i = (int)value;
263     return i + (i < value);
264 #endif
265 }
266 
267 /** @brief Determines if the argument is Not A Number.
268 
269  @param value The input floating-point value
270 
271  The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
272  otherwise. */
cvIsNaN(double value)273 CV_INLINE int cvIsNaN( double value )
274 {
275 #if defined CV_INLINE_ISNAN_DBL
276     CV_INLINE_ISNAN_DBL(value);
277 #else
278     Cv64suf ieee754;
279     ieee754.f = value;
280     return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
281            ((unsigned)ieee754.u != 0) > 0x7ff00000;
282 #endif
283 }
284 
285 /** @brief Determines if the argument is Infinity.
286 
287  @param value The input floating-point value
288 
289  The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
290  and 0 otherwise. */
cvIsInf(double value)291 CV_INLINE int cvIsInf( double value )
292 {
293 #if defined CV_INLINE_ISINF_DBL
294     CV_INLINE_ISINF_DBL(value);
295 #elif defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__PPC64__)
296     Cv64suf ieee754;
297     ieee754.f = value;
298     return (ieee754.u & 0x7fffffff00000000) ==
299                         0x7ff0000000000000;
300 #else
301     Cv64suf ieee754;
302     ieee754.f = value;
303     return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
304             (unsigned)ieee754.u == 0;
305 #endif
306 }
307 
308 #ifdef __cplusplus
309 
310 /** @overload */
cvRound(float value)311 CV_INLINE int cvRound(float value)
312 {
313 #if defined CV_INLINE_ROUND_FLT
314     CV_INLINE_ROUND_FLT(value);
315 #elif ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
316     && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) \
317     && !defined(__CUDACC__)
318     __m128 t = _mm_set_ss( value );
319     return _mm_cvtss_si32(t);
320 #elif defined _MSC_VER && defined _M_IX86
321     int t;
322     __asm
323     {
324         fld value;
325         fistp t;
326     }
327     return t;
328 #elif defined CV_ICC || defined __GNUC__
329     return (int)(lrintf(value));
330 #else
331     /* it's ok if round does not comply with IEEE754 standard;
332      the tests should allow +/-1 difference when the tested functions use round */
333     return (int)(value + (value >= 0 ? 0.5f : -0.5f));
334 #endif
335 }
336 
337 /** @overload */
cvRound(int value)338 CV_INLINE int cvRound( int value )
339 {
340     return value;
341 }
342 
343 /** @overload */
cvFloor(float value)344 CV_INLINE int cvFloor( float value )
345 {
346 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
347     && ( \
348         defined(__PPC64__) \
349     )
350     return __builtin_floorf(value);
351 #else
352     int i = (int)value;
353     return i - (i > value);
354 #endif
355 }
356 
357 /** @overload */
cvFloor(int value)358 CV_INLINE int cvFloor( int value )
359 {
360     return value;
361 }
362 
363 /** @overload */
cvCeil(float value)364 CV_INLINE int cvCeil( float value )
365 {
366 #if (defined CV__FASTMATH_ENABLE_GCC_MATH_BUILTINS || defined CV__FASTMATH_ENABLE_CLANG_MATH_BUILTINS) \
367     && ( \
368         defined(__PPC64__) \
369     )
370     return __builtin_ceilf(value);
371 #else
372     int i = (int)value;
373     return i + (i < value);
374 #endif
375 }
376 
377 /** @overload */
cvCeil(int value)378 CV_INLINE int cvCeil( int value )
379 {
380     return value;
381 }
382 
383 /** @overload */
cvIsNaN(float value)384 CV_INLINE int cvIsNaN( float value )
385 {
386 #if defined CV_INLINE_ISNAN_FLT
387     CV_INLINE_ISNAN_FLT(value);
388 #else
389     Cv32suf ieee754;
390     ieee754.f = value;
391     return (ieee754.u & 0x7fffffff) > 0x7f800000;
392 #endif
393 }
394 
395 /** @overload */
cvIsInf(float value)396 CV_INLINE int cvIsInf( float value )
397 {
398 #if defined CV_INLINE_ISINF_FLT
399     CV_INLINE_ISINF_FLT(value);
400 #else
401     Cv32suf ieee754;
402     ieee754.f = value;
403     return (ieee754.u & 0x7fffffff) == 0x7f800000;
404 #endif
405 }
406 
407 #endif // __cplusplus
408 
409 //! @} core_utils
410 
411 #endif
412