1 /*
2  * By downloading, copying, installing or using the software you agree to this license.
3  * If you do not agree to this license, do not download, install,
4  * copy or use the software.
5  *
6  *
7  *                           License Agreement
8  *                For Open Source Computer Vision Library
9  *                        (3-clause BSD License)
10  *
11  * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12  * Third party copyrights are property of their respective owners.
13  *
14  * Redistribution and use in source and binary forms, with or without modification,
15  * are permitted provided that the following conditions are met:
16  *
17  *   * Redistributions of source code must retain the above copyright notice,
18  *     this list of conditions and the following disclaimer.
19  *
20  *   * Redistributions in binary form must reproduce the above copyright notice,
21  *     this list of conditions and the following disclaimer in the documentation
22  *     and/or other materials provided with the distribution.
23  *
24  *   * Neither the names of the copyright holders nor the names of the contributors
25  *     may be used to endorse or promote products derived from this software
26  *     without specific prior written permission.
27  *
28  * This software is provided by the copyright holders and contributors "as is" and
29  * any express or implied warranties, including, but not limited to, the implied
30  * warranties of merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall copyright holders or contributors be liable for any direct,
32  * indirect, incidental, special, exemplary, or consequential damages
33  * (including, but not limited to, procurement of substitute goods or services;
34  * loss of use, data, or profits; or business interruption) however caused
35  * and on any theory of liability, whether in contract, strict liability,
36  * or tort (including negligence or otherwise) arising in any way out of
37  * the use of this software, even if advised of the possibility of such damage.
38  */
39 
40 #ifndef CAROTENE_SATURATE_CAST_HPP
41 #define CAROTENE_SATURATE_CAST_HPP
42 
43 #include <algorithm>
44 #include <climits>
45 #include <cmath>
46 
47 #if defined _MSC_VER && defined _M_ARM
48 # include <intrin.h>
49 #endif
50 
51 #include <carotene/definitions.hpp>
52 #include <carotene/types.hpp>
53 
54 namespace CAROTENE_NS { namespace internal {
55 
56 #if defined _MSC_VER && defined _M_ARM
57 
vcvtr_s32_f64_imp(f64 d)58 __declspec(naked) static void vcvtr_s32_f64_imp(f64 d)
59 {
60     (void)d;
61     __emit(0xEEBD);  // vcvtr.s32.f64 s0, d0
62     __emit(0x0B40);
63     __emit(0xEE10);  // vmov r0, s0
64     __emit(0x0A10);
65     __emit(0x4770);  // bx lr
66 }
67 
68 # define CAROTENE_ROUND_FLT(x) return ((s32 (*)(f64))vcvtr_s32_f64_imp)((f64)x);
69 # define CAROTENE_ROUND_DBL(x) return ((s32 (*)(f64))vcvtr_s32_f64_imp)(x);
70 
71 #elif defined CV_ICC || defined __GNUC__
72 
73 # if defined(__VFP_FP__) && !defined(__SOFTFP__) && !(defined _DEBUG || defined DEBUG) && !defined(__CUDACC__)
74 #  define CAROTENE_ROUND_FLT(value) {                              \
75     union { f32 f; s32 i; } result; \
76     asm ("ftosis  %0, %1 \n" : "=w" (result.f) : "w" (value) ); \
77     return result.i; }
78 #  define CAROTENE_ROUND_DBL(value) {                      \
79     union {f32 f; s32 i;} __tegra_result; \
80     asm (                                               \
81         "ftosid  %0, %P1\n"                             \
82         : "=w" (__tegra_result.f)                       \
83         : "w" (value)                                   \
84     );                                                  \
85     return __tegra_result.i;                            \
86     }
87 # else
88 #  define CAROTENE_ROUND_FLT(x) return (s32)lrintf(value);
89 #  define CAROTENE_ROUND_DBL(value) return (s32)lrint(value);
90 # endif
91 
92 #endif
93 
round(f32 value)94 inline s32 round(f32 value)
95 {
96 #ifdef CAROTENE_ROUND_FLT
97     CAROTENE_ROUND_FLT(value)
98 #else
99     s32 intpart = (s32)(value);
100     f32 fractpart = value - intpart;
101     if ((fractpart != 0.5 && fractpart != -0.5) || ((intpart % 2) != 0))
102         return (s32)(value + (value >= 0 ? 0.5 : -0.5));
103     else
104         return intpart;
105 #endif
106 }
107 
round(f64 value)108 inline s32 round(f64 value)
109 {
110 #ifdef CAROTENE_ROUND_DBL
111     CAROTENE_ROUND_DBL(value)
112 #else
113     s32 intpart = (s32)(value);
114     f64 fractpart = value - intpart;
115     if ((fractpart != 0.5 && fractpart != -0.5) || ((intpart % 2) != 0))
116         return (s32)(value + (value >= 0 ? 0.5 : -0.5));
117     else
118         return intpart;
119 #endif
120 }
121 /////////////// saturate_cast (used in image & signal processing) ///////////////////
122 
saturate_cast(u8 v)123 template<typename _Tp> inline _Tp saturate_cast(u8 v)    { return _Tp(v); }
saturate_cast(s8 v)124 template<typename _Tp> inline _Tp saturate_cast(s8 v)    { return _Tp(v); }
saturate_cast(u16 v)125 template<typename _Tp> inline _Tp saturate_cast(u16 v)   { return _Tp(v); }
saturate_cast(s16 v)126 template<typename _Tp> inline _Tp saturate_cast(s16 v)   { return _Tp(v); }
saturate_cast(u32 v)127 template<typename _Tp> inline _Tp saturate_cast(u32 v)   { return _Tp(v); }
saturate_cast(s32 v)128 template<typename _Tp> inline _Tp saturate_cast(s32 v)   { return _Tp(v); }
saturate_cast(s64 v)129 template<typename _Tp> inline _Tp saturate_cast(s64 v)   { return _Tp(v); }
saturate_cast(u64 v)130 template<typename _Tp> inline _Tp saturate_cast(u64 v)   { return _Tp(v); }
saturate_cast(f32 v)131 template<typename _Tp> inline _Tp saturate_cast(f32 v)   { return _Tp(v); }
saturate_cast(f64 v)132 template<typename _Tp> inline _Tp saturate_cast(f64 v)   { return _Tp(v); }
133 
saturate_cast(s8 v)134 template<> inline u8 saturate_cast<u8>(s8 v)      { return (u8)std::max((s32)v, 0); }
saturate_cast(u16 v)135 template<> inline u8 saturate_cast<u8>(u16 v)     { return (u8)std::min((u32)v, (u32)UCHAR_MAX); }
saturate_cast(s32 v)136 template<> inline u8 saturate_cast<u8>(s32 v)     { return (u8)((u32)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
saturate_cast(s16 v)137 template<> inline u8 saturate_cast<u8>(s16 v)     { return saturate_cast<u8>((s32)v); }
saturate_cast(u32 v)138 template<> inline u8 saturate_cast<u8>(u32 v)     { return (u8)std::min(v, (u32)UCHAR_MAX); }
saturate_cast(s64 v)139 template<> inline u8 saturate_cast<u8>(s64 v)     { return (u8)((u64)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
saturate_cast(u64 v)140 template<> inline u8 saturate_cast<u8>(u64 v)     { return (u8)std::min(v, (u64)UCHAR_MAX); }
saturate_cast(f32 v)141 template<> inline u8 saturate_cast<u8>(f32 v)     { return saturate_cast<u8>(round(v)); }
saturate_cast(f64 v)142 template<> inline u8 saturate_cast<u8>(f64 v)     { return saturate_cast<u8>(round(v)); }
143 
saturate_cast(u8 v)144 template<> inline s8 saturate_cast<s8>(u8 v)      { return (s8)std::min((s32)v, SCHAR_MAX); }
saturate_cast(u16 v)145 template<> inline s8 saturate_cast<s8>(u16 v)     { return (s8)std::min((u32)v, (u32)SCHAR_MAX); }
saturate_cast(s32 v)146 template<> inline s8 saturate_cast<s8>(s32 v)     { return (s8)((u32)(v-SCHAR_MIN) <= (u32)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
saturate_cast(s16 v)147 template<> inline s8 saturate_cast<s8>(s16 v)     { return saturate_cast<s8>((s32)v); }
saturate_cast(u32 v)148 template<> inline s8 saturate_cast<s8>(u32 v)     { return (s8)std::min(v, (u32)SCHAR_MAX); }
saturate_cast(s64 v)149 template<> inline s8 saturate_cast<s8>(s64 v)     { return (s8)((u64)(v-SCHAR_MIN) <= (u64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
saturate_cast(u64 v)150 template<> inline s8 saturate_cast<s8>(u64 v)     { return (s8)std::min(v, (u64)SCHAR_MAX); }
saturate_cast(f32 v)151 template<> inline s8 saturate_cast<s8>(f32 v)     { return saturate_cast<s8>(round(v)); }
saturate_cast(f64 v)152 template<> inline s8 saturate_cast<s8>(f64 v)     { return saturate_cast<s8>(round(v)); }
153 
saturate_cast(s8 v)154 template<> inline u16 saturate_cast<u16>(s8 v)    { return (u16)std::max((s32)v, 0); }
saturate_cast(s16 v)155 template<> inline u16 saturate_cast<u16>(s16 v)   { return (u16)std::max((s32)v, 0); }
saturate_cast(s32 v)156 template<> inline u16 saturate_cast<u16>(s32 v)   { return (u16)((u32)v <= (u32)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
saturate_cast(u32 v)157 template<> inline u16 saturate_cast<u16>(u32 v)   { return (u16)std::min(v, (u32)USHRT_MAX); }
saturate_cast(s64 v)158 template<> inline u16 saturate_cast<u16>(s64 v)   { return (u16)((u64)v <= (u64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
saturate_cast(u64 v)159 template<> inline u16 saturate_cast<u16>(u64 v)   { return (u16)std::min(v, (u64)USHRT_MAX); }
saturate_cast(f32 v)160 template<> inline u16 saturate_cast<u16>(f32 v)   { return saturate_cast<u16>(round(v)); }
saturate_cast(f64 v)161 template<> inline u16 saturate_cast<u16>(f64 v)   { return saturate_cast<u16>(round(v)); }
162 
saturate_cast(u16 v)163 template<> inline s16 saturate_cast<s16>(u16 v)   { return (s16)std::min((s32)v, SHRT_MAX); }
saturate_cast(s32 v)164 template<> inline s16 saturate_cast<s16>(s32 v)   { return (s16)((u32)(v - SHRT_MIN) <= (u32)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
saturate_cast(u32 v)165 template<> inline s16 saturate_cast<s16>(u32 v)   { return (s16)std::min(v, (u32)SHRT_MAX); }
saturate_cast(s64 v)166 template<> inline s16 saturate_cast<s16>(s64 v)   { return (s16)((u64)(v - SHRT_MIN) <= (u64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
saturate_cast(u64 v)167 template<> inline s16 saturate_cast<s16>(u64 v)   { return (s16)std::min(v, (u64)SHRT_MAX); }
saturate_cast(f32 v)168 template<> inline s16 saturate_cast<s16>(f32 v)   { return saturate_cast<s16>(round(v)); }
saturate_cast(f64 v)169 template<> inline s16 saturate_cast<s16>(f64 v)   { return saturate_cast<s16>(round(v)); }
170 
saturate_cast(s8 v)171 template<> inline u32 saturate_cast<u32>(s8 v)    { return (u32)std::max(v, (s8)0); }
saturate_cast(s16 v)172 template<> inline u32 saturate_cast<u32>(s16 v)   { return (u32)std::max(v, (s16)0); }
saturate_cast(s32 v)173 template<> inline u32 saturate_cast<u32>(s32 v)   { return (u32)std::max(v, (s32)0); }
saturate_cast(s64 v)174 template<> inline u32 saturate_cast<u32>(s64 v)   { return (u32)((u64)v <= (u64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); }
saturate_cast(u64 v)175 template<> inline u32 saturate_cast<u32>(u64 v)   { return (u32)std::min(v, (u64)UINT_MAX); }
176 //OpenCV like f32/f64 -> u32 conversion
177 //we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
saturate_cast(f32 v)178 template<> inline u32 saturate_cast<u32>(f32 v)   { return round(v); }
saturate_cast(f64 v)179 template<> inline u32 saturate_cast<u32>(f64 v)   { return round(v); }
180 //Negative clipping implementation
181 //template<> inline u32 saturate_cast<u32>(f32 v)   { return saturate_cast<u32>(round(v)); }
182 //template<> inline u32 saturate_cast<u32>(f64 v)   { return saturate_cast<u32>(round(v)); }
183 
saturate_cast(u32 v)184 template<> inline s32 saturate_cast<s32>(u32 v)   { return (s32)std::min(v, (u32)INT_MAX); }
saturate_cast(s64 v)185 template<> inline s32 saturate_cast<s32>(s64 v)   { return (s32)((u64)(v - INT_MIN) <= (u64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); }
saturate_cast(u64 v)186 template<> inline s32 saturate_cast<s32>(u64 v)   { return (s32)std::min(v, (u64)INT_MAX); }
saturate_cast(f32 v)187 template<> inline s32 saturate_cast<s32>(f32 v)   { return round(v); }
saturate_cast(f64 v)188 template<> inline s32 saturate_cast<s32>(f64 v)   { return round(v); }
189 
saturate_cast(s8 v)190 template<> inline u64 saturate_cast<u64>(s8 v)    { return (u64)std::max(v, (s8)0); }
saturate_cast(s16 v)191 template<> inline u64 saturate_cast<u64>(s16 v)   { return (u64)std::max(v, (s16)0); }
saturate_cast(s32 v)192 template<> inline u64 saturate_cast<u64>(s32 v)   { return (u64)std::max(v, (s32)0); }
saturate_cast(s64 v)193 template<> inline u64 saturate_cast<u64>(s64 v)   { return (u64)std::max(v, (s64)0); }
194 
saturate_cast(u64 v)195 template<> inline s64 saturate_cast<s64>(u64 v)   { return (s64)std::min(v, (u64)LLONG_MAX); }
196 
197 } }
198 
199 #endif
200