1 /*=========================================================================
2  *
3  *  Copyright Insight Software Consortium
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at
8  *
9  *         http://www.apache.org/licenses/LICENSE-2.0.txt
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  *=========================================================================*/
18 /*=========================================================================
19  *
20  *  Portions of this file are subject to the VTK Toolkit Version 3 copyright.
21  *
22  *  Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
23  *
24  *  For complete copyright, license and disclaimer of warranty information
25  *  please refer to the NOTICE file at the top of the ITK source tree.
26  *
27  *=========================================================================*/
28 #ifndef itkMathDetail_h
29 #define itkMathDetail_h
30 
31 #include "itkIntTypes.h"
32 #include "itkNumericTraits.h"
33 
34 #include <cfenv>
35 
36 #if defined( ITK_HAVE_EMMINTRIN_H ) && !defined( ITK_WRAPPING_PARSER )
37 #include <emmintrin.h> // sse 2 intrinsics
38 #endif /* ITK_HAVE_EMMINTRIN_H && ! ITK_WRAPPING_PARSER */
39 
40 // assume no SSE2:
41 #define USE_SSE2_64IMPL 0
42 #define USE_SSE2_32IMPL 0
43 
44 // For apple assume sse2 is on for all intel builds, check for 64 and 32
45 // bit versions
46 #if defined(__APPLE__) && defined( __SSE2__ ) && !defined( ITK_WRAPPING_PARSER )
47 
48 #  if defined( __i386__ )
49 #    undef  USE_SSE2_32IMPL
50 #    define USE_SSE2_32IMPL 1
51 #  endif
52 
53 #  if defined(  __x86_64 )
54 //   Turn on the 64 bits implementation
55 #    undef  USE_SSE2_64IMPL
56 #    define USE_SSE2_64IMPL 1
57 //   Turn on also the 32 bits implementation
58 //   since it is available in 64 bits versions.
59 #    undef  USE_SSE2_32IMPL
60 #    define USE_SSE2_32IMPL 1
61 #  endif
62 
63 #else
64 
65 // For non-apple (no universal binary possible) just use the
66 // try-compile set ITK_COMPILER_SUPPORTS_SSE2_32 and
67 // ITK_COMPILER_SUPPORTS_SSE2_64 to set values:
68 
69 #  if defined(ITK_COMPILER_SUPPORTS_SSE2_32) && !defined( ITK_WRAPPING_PARSER )
70 #    undef  USE_SSE2_32IMPL
71 #    define USE_SSE2_32IMPL 1
72 #  endif
73 #  if defined(ITK_COMPILER_SUPPORTS_SSE2_64) && !defined( ITK_WRAPPING_PARSER )
74 #    undef  USE_SSE2_64IMPL
75 #    define USE_SSE2_64IMPL 1
76 #  endif
77 
78 #endif
79 
80 
81 // Turn on 32-bit and 64-bit asm impl when using GCC on x86 platform with the
82 // following exception:
83 //   GCCXML
84 #if defined( __GNUC__ ) && ( !defined( ITK_WRAPPING_PARSER ) ) &&  ( defined( __i386__ ) || defined( __i386 ) \
85   || defined( __x86_64__ ) || defined( __x86_64 ) )
86 #define GCC_USE_ASM_32IMPL 1
87 #define GCC_USE_ASM_64IMPL 1
88 #else
89 #define GCC_USE_ASM_32IMPL 0
90 #define GCC_USE_ASM_64IMPL 0
91 #endif
92 // Turn on 32-bit and 64-bit asm impl when using msvc on 32 bits windows
93 #if defined( VCL_VC ) && ( !defined( ITK_WRAPPING_PARSER ) ) && !defined( _WIN64 )
94 #define VC_USE_ASM_32IMPL 1
95 #define VC_USE_ASM_64IMPL 1
96 #else
97 #define VC_USE_ASM_32IMPL 0
98 #define VC_USE_ASM_64IMPL 0
99 #endif
100 
101 namespace itk
102 {
103 namespace Math
104 {
105 namespace Detail
106 {
107 // The functions defined in this namespace are not meant to be used directly
108 // and thus do not adhere to the standard backward-compatibility
109 // policy of ITK, as any Detail namespace should be considered private.
110 // Please use the functions from the itk::Math namespace instead
111 
112 ////////////////////////////////////////
113 // Base versions
114 
115 CLANG_PRAGMA_PUSH
116 CLANG_SUPPRESS_Wfloat_equal
117 
118 template< typename TReturn, typename TInput >
RoundHalfIntegerToEven_base(TInput x)119 inline TReturn RoundHalfIntegerToEven_base(TInput x)
120 {
121   if ( NumericTraits< TInput >::IsNonnegative(x) )
122     {
123     x += static_cast< TInput >( 0.5 );
124     }
125   else
126     {
127     x -= static_cast< TInput >( 0.5 );
128     }
129 
130   const auto r = static_cast< TReturn >( x );
131   return ( x != static_cast< TInput >( r ) ) ? r : static_cast< TReturn >( 2 * ( r / 2 ) );
132 }
133 
134 template< typename TReturn, typename TInput >
RoundHalfIntegerUp_base(TInput x)135 inline TReturn RoundHalfIntegerUp_base(TInput x)
136 {
137   x += static_cast< TInput >( 0.5 );
138   const auto r = static_cast< TReturn >( x );
139   return ( NumericTraits< TInput >::IsNonnegative(x) ) ?
140          r :
141          ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) );
142 }
143 
144 template< typename TReturn, typename TInput >
Floor_base(TInput x)145 inline TReturn Floor_base(TInput x)
146 {
147   const auto r = static_cast< TReturn >( x );
148 
149   return ( NumericTraits< TInput >::IsNonnegative(x) ) ?
150          r :
151          ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) );
152 }
153 
154 template< typename TReturn, typename TInput >
Ceil_base(TInput x)155 inline TReturn Ceil_base(TInput x)
156 {
157   const auto r = static_cast< TReturn >( x );
158 
159   return ( NumericTraits< TInput >::IsNegative(x) ) ?
160          r :
161          ( x == static_cast< TInput >( r ) ? r : r + static_cast< TReturn >( 1 ) );
162 }
163 
164 CLANG_PRAGMA_POP
165 
166 ////////////////////////////////////////
167 // 32 bits versions
168 
169 #if USE_SSE2_32IMPL // sse2 implementation
170 
RoundHalfIntegerToEven_32(double x)171 inline int32_t RoundHalfIntegerToEven_32(double x)
172 {
173   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
174   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
175   #endif
176   return _mm_cvtsd_si32( _mm_set_sd(x) );
177 }
178 
RoundHalfIntegerToEven_32(float x)179 inline int32_t RoundHalfIntegerToEven_32(float x)
180 {
181   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
182   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
183   #endif
184   return _mm_cvtss_si32( _mm_set_ss(x) );
185 }
186 
187 #elif GCC_USE_ASM_32IMPL // gcc asm implementation
188 
189 inline int32_t RoundHalfIntegerToEven_32(double x)
190 {
191   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
192   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
193   #endif
194   int32_t r;
195   __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" );
196   return r;
197 }
198 
199 inline int32_t RoundHalfIntegerToEven_32(float x)
200 {
201   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
202   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
203   #endif
204   int32_t r;
205   __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" );
206   return r;
207 }
208 
209 #elif VC_USE_ASM_32IMPL // msvc asm implementation
210 
211 inline int32_t RoundHalfIntegerToEven_32(double x)
212 {
213   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
214   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
215   #endif
216   int32_t r;
217   __asm
218     {
219     fld x
220     fistp r
221     }
222   return r;
223 }
224 
225 inline int32_t RoundHalfIntegerToEven_32(float x)
226 {
227   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
228   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
229   #endif
230   int32_t r;
231   __asm
232     {
233     fld x
234     fistp r
235     }
236   return r;
237 }
238 
239 #else // Base implementation
240 
241 inline int32_t RoundHalfIntegerToEven_32(double x) { return RoundHalfIntegerToEven_base< int32_t, double >(x); }
242 inline int32_t RoundHalfIntegerToEven_32(float x) { return RoundHalfIntegerToEven_base< int32_t, float >(x); }
243 
244 #endif
245 
246 #if USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL
247 
RoundHalfIntegerUp_32(double x)248 inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerToEven_32(2 * x + 0.5) >> 1; }
RoundHalfIntegerUp_32(float x)249 inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerToEven_32(2 * x + 0.5f) >> 1; }
250 
Floor_32(double x)251 inline int32_t Floor_32(double x) { return RoundHalfIntegerToEven_32(2 * x - 0.5) >> 1; }
Floor_32(float x)252 inline int32_t Floor_32(float x) { return RoundHalfIntegerToEven_32(2 * x - 0.5f) >> 1; }
253 
Ceil_32(double x)254 inline int32_t Ceil_32(double x) { return -( RoundHalfIntegerToEven_32(-0.5 - 2 * x) >> 1 ); }
Ceil_32(float x)255 inline int32_t Ceil_32(float x) { return -( RoundHalfIntegerToEven_32(-0.5f - 2 * x) >> 1 ); }
256 
257 #else // Base implementation
258 
RoundHalfIntegerUp_32(double x)259 inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerUp_base< int32_t, double >(x); }
RoundHalfIntegerUp_32(float x)260 inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerUp_base< int32_t, float >(x); }
261 
Floor_32(double x)262 inline int32_t Floor_32(double x) { return Floor_base< int32_t, double >(x); }
Floor_32(float x)263 inline int32_t Floor_32(float x) { return Floor_base< int32_t, float >(x); }
264 
Ceil_32(double x)265 inline int32_t Ceil_32(double x) { return Ceil_base< int32_t, double >(x); }
Ceil_32(float x)266 inline int32_t Ceil_32(float x) { return Ceil_base< int32_t, float >(x); }
267 
268 #endif // USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL
269 
270 ////////////////////////////////////////
271 // 64 bits versions
272 
273 #if USE_SSE2_64IMPL // sse2 implementation
274 
RoundHalfIntegerToEven_64(double x)275 inline int64_t RoundHalfIntegerToEven_64(double x)
276 {
277   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
278   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
279   #endif
280   return _mm_cvtsd_si64( _mm_set_sd(x) );
281 }
282 
RoundHalfIntegerToEven_64(float x)283 inline int64_t RoundHalfIntegerToEven_64(float x)
284 {
285   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
286   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
287   #endif
288   return _mm_cvtss_si64( _mm_set_ss(x) );
289 }
290 
291 #elif GCC_USE_ASM_64IMPL // gcc asm implementation
292 
RoundHalfIntegerToEven_64(double x)293 inline int64_t RoundHalfIntegerToEven_64(double x)
294 {
295   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
296   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
297   #endif
298   int64_t r;
299   __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" );
300   return r;
301 }
302 
RoundHalfIntegerToEven_64(float x)303 inline int64_t RoundHalfIntegerToEven_64(float x)
304 {
305   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
306   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
307   #endif
308   int64_t r;
309   __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" );
310   return r;
311 }
312 
313 #elif VC_USE_ASM_64IMPL // msvc asm implementation
314 
RoundHalfIntegerToEven_64(double x)315 inline int64_t RoundHalfIntegerToEven_64(double x)
316 {
317   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
318   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
319   #endif
320   int64_t r;
321   __asm
322     {
323     fld x
324     fistp r
325     }
326   return r;
327 }
328 
RoundHalfIntegerToEven_64(float x)329 inline int64_t RoundHalfIntegerToEven_64(float x)
330 {
331   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )
332   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
333   #endif
334   int64_t r;
335   __asm
336     {
337     fld x
338     fistp r
339     }
340   return r;
341 }
342 
343 #else // Base implementation
344 
RoundHalfIntegerToEven_64(double x)345 inline int64_t RoundHalfIntegerToEven_64(double x) { return RoundHalfIntegerToEven_base< int64_t, double >(x); }
RoundHalfIntegerToEven_64(float x)346 inline int64_t RoundHalfIntegerToEven_64(float x) { return RoundHalfIntegerToEven_base< int64_t, float >(x); }
347 
348 #endif
349 
350 #if USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL
351 
RoundHalfIntegerUp_64(double x)352 inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerToEven_64(2 * x + 0.5) >> 1; }
RoundHalfIntegerUp_64(float x)353 inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerToEven_64(2 * x + 0.5f) >> 1; }
354 
Floor_64(double x)355 inline int64_t Floor_64(double x) { return RoundHalfIntegerToEven_64(2 * x - 0.5) >> 1; }
Floor_64(float x)356 inline int64_t Floor_64(float x) { return RoundHalfIntegerToEven_64(2 * x - 0.5f) >> 1; }
357 
Ceil_64(double x)358 inline int64_t Ceil_64(double x) { return -( RoundHalfIntegerToEven_64(-0.5 - 2 * x) >> 1 ); }
Ceil_64(float x)359 inline int64_t Ceil_64(float x) { return -( RoundHalfIntegerToEven_64(-0.5f - 2 * x) >> 1 ); }
360 
361 #else // Base implementation
362 
RoundHalfIntegerUp_64(double x)363 inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerUp_base< int64_t, double >(x); }
RoundHalfIntegerUp_64(float x)364 inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerUp_base< int64_t, float >(x); }
365 
Floor_64(double x)366 inline int64_t Floor_64(double x) { return Floor_base< int64_t, double >(x); }
Floor_64(float x)367 inline int64_t Floor_64(float x) { return Floor_base< int64_t, float >(x); }
368 
Ceil_64(double x)369 inline int64_t Ceil_64(double x) { return Ceil_base< int64_t, double >(x); }
Ceil_64(float x)370 inline int64_t Ceil_64(float x) { return Ceil_base< int64_t, float >(x); }
371 
372 #endif // USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL
373 
374 template <typename T>
375 struct FloatIEEETraits;
376 
377 template <>
378 struct FloatIEEETraits<float>
379 {
380   using IntType = int32_t;
381   using UIntType = uint32_t;
382 };
383 
384 template <>
385 struct FloatIEEETraits<double>
386 {
387   using IntType = int64_t;
388   using UIntType = uint64_t;
389 };
390 
391 template <typename T>
392 union FloatIEEE
393 {
394   using FloatType = T;
395   using IntType = typename FloatIEEETraits<T>::IntType;
396   using UIntType = typename FloatIEEETraits<T>::UIntType;
397 
398   FloatType asFloat;
399   IntType asInt;
400   UIntType asUInt;
401 
402   FloatIEEE(FloatType f): asFloat(f) {}
403   FloatIEEE(IntType i): asInt(i) {}
404   bool Sign() const
405     {
406     return (asUInt >> (sizeof(asUInt)*8-1)) != 0;
407     }
408   IntType AsULP() const
409     {
410     return this->Sign()? IntType(~(~UIntType(0) >> 1) - asUInt) : asInt;
411     }
412 };
413 
414 } // end namespace Detail
415 } // end namespace Math
416 
417 // move to itkConceptChecking?
418 namespace Concept
419 {
420 template< typename T >
421 struct FloatOrDouble;
422 template< >
423 struct FloatOrDouble< float > {};
424 template< >
425 struct FloatOrDouble< double > {};
426 } // end namespace Concept
427 } // end namespace itk
428 
429 #undef USE_SSE2_32IMPL
430 #undef GCC_USE_ASM_32IMPL
431 #undef VC_USE_ASM_32IMPL
432 
433 #undef USE_SSE2_64IMPL
434 #undef GCC_USE_ASM_64IMPL
435 #undef VC_USE_ASM_64IMPL
436 
437 #endif // end of itkMathDetail.h
438