1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #pragma once 5 6 #include "../platform.h" 7 #include "constants.h" 8 // std 9 #include <algorithm> // std::min()/std::max() on Windows 10 #include <cmath> 11 12 #ifdef _WIN32 13 #include <intrin.h> 14 #if (__MSV_VER <= 1700) 15 namespace std { isinf(const float x)16 __forceinline bool isinf(const float x) 17 { 18 return !_finite(x); 19 } isnan(const float x)20 __forceinline bool isnan(const float x) 21 { 22 return _isnan(x); 23 } isfinite(const float x)24 __forceinline bool isfinite(const float x) 25 { 26 return _finite(x); 27 } 28 } // namespace std 29 #endif 30 #else 31 #if !defined(__ARM_NEON) 32 #include <emmintrin.h> 33 #include <xmmintrin.h> 34 #endif 35 #endif 36 37 #if !defined(_WIN32) && defined(__ARM_NEON) 38 #include "arm/emulation.h" 39 #endif 40 41 namespace rkcommon { 42 namespace math { 43 44 using std::cos; 45 using std::sin; 46 using std::tan; 47 48 using std::max; 49 using std::min; 50 51 using std::fmod; 52 sign(const float x)53 __forceinline float sign(const float x) 54 { 55 return x < 0 ? -1.0f : 1.0f; 56 } 57 rcp(const float x)58 __forceinline float rcp(const float x) 59 { 60 const __m128 a = _mm_set_ss(x); 61 const __m128 r = _mm_rcp_ss(a); 62 return _mm_cvtss_f32( 63 _mm_mul_ss(r, _mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a)))); 64 } 65 rcp_safe(float f)66 __forceinline float rcp_safe(float f) 67 { 68 return rcp(std::abs(f) < flt_min ? (f >= 0.f ? flt_min : -flt_min) : f); 69 } 70 rsqrt(const float x)71 __forceinline float rsqrt(const float x) 72 { 73 const __m128 a = _mm_set_ss(x); 74 const __m128 r = _mm_rsqrt_ss(a); 75 const __m128 c = 76 _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), 77 _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), 78 _mm_mul_ss(r, r))); 79 return _mm_cvtss_f32(c); 80 } 81 82 template <typename T> 83 __forceinline T clamp(const T &x, 84 const T &lower = T(zero), 85 const T &upper = T(one)) 86 { 87 return max(min(x, upper), lower); 88 } 89 90 template <typename T> deg2rad(const T & x)91 __forceinline T deg2rad(const T &x) 92 { 93 return x * T(1.745329251994329576923690768489e-2); 94 } 95 madd(const float a,const float b,const float c)96 __forceinline float madd(const float a, const float b, const float c) 97 { 98 return a * b + c; 99 } 100 101 template <typename T> lerp(const float factor,const T & a,const T & b)102 inline T lerp(const float factor, const T &a, const T &b) 103 { 104 return (1.f - factor) * a + factor * b; 105 } 106 107 template <typename T> divRoundUp(T a,T b)108 inline T divRoundUp(T a, T b) 109 { 110 return (a + b - 1) / b; 111 } 112 113 } // namespace math 114 } // namespace rkcommon 115