1 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 2 /** 3 * Contains FPU related code. 4 * \file IceFPU.h 5 * \author Pierre Terdiman 6 * \date April, 4, 2000 7 */ 8 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 10 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 // Include Guard 12 #ifndef __ICEFPU_H__ 13 #define __ICEFPU_H__ 14 15 #define SIGN_BITMASK 0x80000000 16 17 //! Integer representation of a floating-point value. 18 #define IR(x) ((udword&)(x)) 19 20 //! Signed integer representation of a floating-point value. 21 #define SIR(x) ((sdword&)(x)) 22 23 //! Absolute integer representation of a floating-point value 24 #define AIR(x) (IR(x)&0x7fffffff) 25 26 //! Floating-point representation of an integer value. 27 #define FR(x) ((float&)(x)) 28 29 //! Integer-based comparison of a floating point value. 30 //! Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context. 31 #define IS_NEGATIVE_FLOAT(x) (IR(x)&0x80000000) 32 33 //! Fast fabs for floating-point values. It just clears the sign bit. 34 //! Don't use it blindy, it can be faster or slower than the FPU comparison, depends on the context. FastFabs(float x)35 inline_ float FastFabs(float x) 36 { 37 udword FloatBits = IR(x)&0x7fffffff; 38 return FR(FloatBits); 39 } 40 41 //! Fast square root for floating-point values. FastSqrt(float square)42 inline_ float FastSqrt(float square) 43 { 44 #if defined(_MSC_VER) && !defined(_WIN64) 45 float retval; 46 47 __asm { 48 mov eax, square 49 sub eax, 0x3F800000 50 sar eax, 1 51 add eax, 0x3F800000 52 mov [retval], eax 53 } 54 return retval; 55 #else 56 return sqrtf(square); 57 #endif 58 } 59 60 //! Saturates positive to zero. fsat(float f)61 inline_ float fsat(float f) 62 { 63 udword y = (udword&)f & ~((sdword&)f >>31); 64 return (float&)y; 65 } 66 67 //! Computes 1.0f / sqrtf(x). frsqrt(float f)68 inline_ float frsqrt(float f) 69 { 70 float x = f * 0.5f; 71 udword y = 0x5f3759df - ((udword&)f >> 1); 72 // Iteration... 73 (float&)y = (float&)y * ( 1.5f - ( x * (float&)y * (float&)y ) ); 74 // Result 75 return (float&)y; 76 } 77 78 //! Computes 1.0f / sqrtf(x). Comes from NVIDIA. InvSqrt(const float & x)79 inline_ float InvSqrt(const float& x) 80 { 81 udword tmp = (udword(IEEE_1_0 << 1) + IEEE_1_0 - *(udword*)&x) >> 1; 82 float y = *(float*)&tmp; 83 return y * (1.47f - 0.47f * x * y * y); 84 } 85 86 //! Computes 1.0f / sqrtf(x). Comes from Quake3. Looks like the first one I had above. 87 //! See http://www.magic-software.com/3DGEDInvSqrt.html RSqrt(float number)88 inline_ float RSqrt(float number) 89 { 90 long i; 91 float x2, y; 92 const float threehalfs = 1.5f; 93 94 x2 = number * 0.5f; 95 y = number; 96 i = * (long *) &y; 97 i = 0x5f3759df - (i >> 1); 98 y = * (float *) &i; 99 y = y * (threehalfs - (x2 * y * y)); 100 101 return y; 102 } 103 104 //! TO BE DOCUMENTED fsqrt(float f)105 inline_ float fsqrt(float f) 106 { 107 udword y = ( ( (sdword&)f - 0x3f800000 ) >> 1 ) + 0x3f800000; 108 // Iteration...? 109 // (float&)y = (3.0f - ((float&)y * (float&)y) / f) * (float&)y * 0.5f; 110 // Result 111 return (float&)y; 112 } 113 114 //! Returns the float ranged espilon value. fepsilon(float f)115 inline_ float fepsilon(float f) 116 { 117 udword b = (udword&)f & 0xff800000; 118 udword a = b | 0x00000001; 119 (float&)a -= (float&)b; 120 // Result 121 return (float&)a; 122 } 123 124 //! Is the float valid ? IsNAN(float value)125 inline_ bool IsNAN(float value) { return (IR(value)&0x7f800000) == 0x7f800000; } IsIndeterminate(float value)126 inline_ bool IsIndeterminate(float value) { return IR(value) == 0xffc00000; } IsPlusInf(float value)127 inline_ bool IsPlusInf(float value) { return IR(value) == 0x7f800000; } IsMinusInf(float value)128 inline_ bool IsMinusInf(float value) { return IR(value) == 0xff800000; } 129 IsValidFloat(float value)130 inline_ bool IsValidFloat(float value) 131 { 132 if(IsNAN(value)) return false; 133 if(IsIndeterminate(value)) return false; 134 if(IsPlusInf(value)) return false; 135 if(IsMinusInf(value)) return false; 136 return true; 137 } 138 139 #define CHECK_VALID_FLOAT(x) ASSERT(IsValidFloat(x)); 140 141 /* 142 //! FPU precision setting function. 143 inline_ void SetFPU() 144 { 145 // This function evaluates whether the floating-point 146 // control word is set to single precision/round to nearest/ 147 // exceptions disabled. If these conditions don't hold, the 148 // function changes the control word to set them and returns 149 // TRUE, putting the old control word value in the passback 150 // location pointed to by pwOldCW. 151 { 152 uword wTemp, wSave; 153 154 __asm fstcw wSave 155 if (wSave & 0x300 || // Not single mode 156 0x3f != (wSave & 0x3f) || // Exceptions enabled 157 wSave & 0xC00) // Not round to nearest mode 158 { 159 __asm 160 { 161 mov ax, wSave 162 and ax, not 300h ;; single mode 163 or ax, 3fh ;; disable all exceptions 164 and ax, not 0xC00 ;; round to nearest mode 165 mov wTemp, ax 166 fldcw wTemp 167 } 168 } 169 } 170 } 171 */ 172 //! This function computes the slowest possible floating-point value (you can also directly use FLT_EPSILON) ComputeFloatEpsilon()173 inline_ float ComputeFloatEpsilon() 174 { 175 float f = 1.0f; 176 ((udword&)f)^=1; 177 return f - 1.0f; // You can check it's the same as FLT_EPSILON 178 } 179 180 inline_ bool IsFloatZero(float x, float epsilon=1e-6f) 181 { 182 return x*x < epsilon; 183 } 184 185 #define FCOMI_ST0 _asm _emit 0xdb _asm _emit 0xf0 186 #define FCOMIP_ST0 _asm _emit 0xdf _asm _emit 0xf0 187 #define FCMOVB_ST0 _asm _emit 0xda _asm _emit 0xc0 188 #define FCMOVNB_ST0 _asm _emit 0xdb _asm _emit 0xc0 189 190 #define FCOMI_ST1 _asm _emit 0xdb _asm _emit 0xf1 191 #define FCOMIP_ST1 _asm _emit 0xdf _asm _emit 0xf1 192 #define FCMOVB_ST1 _asm _emit 0xda _asm _emit 0xc1 193 #define FCMOVNB_ST1 _asm _emit 0xdb _asm _emit 0xc1 194 195 #define FCOMI_ST2 _asm _emit 0xdb _asm _emit 0xf2 196 #define FCOMIP_ST2 _asm _emit 0xdf _asm _emit 0xf2 197 #define FCMOVB_ST2 _asm _emit 0xda _asm _emit 0xc2 198 #define FCMOVNB_ST2 _asm _emit 0xdb _asm _emit 0xc2 199 200 #define FCOMI_ST3 _asm _emit 0xdb _asm _emit 0xf3 201 #define FCOMIP_ST3 _asm _emit 0xdf _asm _emit 0xf3 202 #define FCMOVB_ST3 _asm _emit 0xda _asm _emit 0xc3 203 #define FCMOVNB_ST3 _asm _emit 0xdb _asm _emit 0xc3 204 205 #define FCOMI_ST4 _asm _emit 0xdb _asm _emit 0xf4 206 #define FCOMIP_ST4 _asm _emit 0xdf _asm _emit 0xf4 207 #define FCMOVB_ST4 _asm _emit 0xda _asm _emit 0xc4 208 #define FCMOVNB_ST4 _asm _emit 0xdb _asm _emit 0xc4 209 210 #define FCOMI_ST5 _asm _emit 0xdb _asm _emit 0xf5 211 #define FCOMIP_ST5 _asm _emit 0xdf _asm _emit 0xf5 212 #define FCMOVB_ST5 _asm _emit 0xda _asm _emit 0xc5 213 #define FCMOVNB_ST5 _asm _emit 0xdb _asm _emit 0xc5 214 215 #define FCOMI_ST6 _asm _emit 0xdb _asm _emit 0xf6 216 #define FCOMIP_ST6 _asm _emit 0xdf _asm _emit 0xf6 217 #define FCMOVB_ST6 _asm _emit 0xda _asm _emit 0xc6 218 #define FCMOVNB_ST6 _asm _emit 0xdb _asm _emit 0xc6 219 220 #define FCOMI_ST7 _asm _emit 0xdb _asm _emit 0xf7 221 #define FCOMIP_ST7 _asm _emit 0xdf _asm _emit 0xf7 222 #define FCMOVB_ST7 _asm _emit 0xda _asm _emit 0xc7 223 #define FCMOVNB_ST7 _asm _emit 0xdb _asm _emit 0xc7 224 225 //! A global function to find MAX(a,b) using FCOMI/FCMOV FCMax2(float a,float b)226 inline_ float FCMax2(float a, float b) 227 { 228 #if defined(_MSC_VER) && !defined(_WIN64) 229 float Res; 230 _asm fld [a] 231 _asm fld [b] 232 FCOMI_ST1 233 FCMOVB_ST1 234 _asm fstp [Res] 235 _asm fcomp 236 return Res; 237 #else 238 return a < b ? b : a; 239 #endif 240 } 241 242 //! A global function to find MIN(a,b) using FCOMI/FCMOV FCMin2(float a,float b)243 inline_ float FCMin2(float a, float b) 244 { 245 #if defined(_MSC_VER) && !defined(_WIN64) 246 float Res; 247 _asm fld [a] 248 _asm fld [b] 249 FCOMI_ST1 250 FCMOVNB_ST1 251 _asm fstp [Res] 252 _asm fcomp 253 return Res; 254 #else 255 return a < b ? a : b; 256 #endif 257 } 258 259 //! A global function to find MAX(a,b,c) using FCOMI/FCMOV FCMax3(float a,float b,float c)260 inline_ float FCMax3(float a, float b, float c) 261 { 262 #if defined(_MSC_VER) && !defined(_WIN64) 263 float Res; 264 _asm fld [a] 265 _asm fld [b] 266 _asm fld [c] 267 FCOMI_ST1 268 FCMOVB_ST1 269 FCOMI_ST2 270 FCMOVB_ST2 271 _asm fstp [Res] 272 _asm fcompp 273 return Res; 274 #else 275 return a < b ? (b < c ? c : b) : (a < c ? c : a); 276 #endif 277 } 278 279 //! A global function to find MIN(a,b,c) using FCOMI/FCMOV FCMin3(float a,float b,float c)280 inline_ float FCMin3(float a, float b, float c) 281 { 282 #if defined(_MSC_VER) && !defined(_WIN64) 283 float Res; 284 _asm fld [a] 285 _asm fld [b] 286 _asm fld [c] 287 FCOMI_ST1 288 FCMOVNB_ST1 289 FCOMI_ST2 290 FCMOVNB_ST2 291 _asm fstp [Res] 292 _asm fcompp 293 return Res; 294 #else 295 return a < b ? (a < c ? a : c) : (b < c ? b : c); 296 #endif 297 } 298 ConvertToSortable(float f)299 inline_ int ConvertToSortable(float f) 300 { 301 int& Fi = (int&)f; 302 int Fmask = (Fi>>31); 303 Fi ^= Fmask; 304 Fmask &= ~(1<<31); 305 Fi -= Fmask; 306 return Fi; 307 } 308 309 enum FPUMode 310 { 311 FPU_FLOOR = 0, 312 FPU_CEIL = 1, 313 FPU_BEST = 2, 314 315 FPU_FORCE_DWORD = 0x7fffffff 316 }; 317 318 FUNCTION ICECORE_API FPUMode GetFPUMode(); 319 FUNCTION ICECORE_API void SaveFPU(); 320 FUNCTION ICECORE_API void RestoreFPU(); 321 FUNCTION ICECORE_API void SetFPUFloorMode(); 322 FUNCTION ICECORE_API void SetFPUCeilMode(); 323 FUNCTION ICECORE_API void SetFPUBestMode(); 324 325 FUNCTION ICECORE_API void SetFPUPrecision24(); 326 FUNCTION ICECORE_API void SetFPUPrecision53(); 327 FUNCTION ICECORE_API void SetFPUPrecision64(); 328 FUNCTION ICECORE_API void SetFPURoundingChop(); 329 FUNCTION ICECORE_API void SetFPURoundingUp(); 330 FUNCTION ICECORE_API void SetFPURoundingDown(); 331 FUNCTION ICECORE_API void SetFPURoundingNear(); 332 333 FUNCTION ICECORE_API int intChop(const float& f); 334 FUNCTION ICECORE_API int intFloor(const float& f); 335 FUNCTION ICECORE_API int intCeil(const float& f); 336 337 #endif // __ICEFPU_H__ 338