1 // Copyright (C) 2002-2012 Nikolaus Gebhardt
2 // This file is part of the "Irrlicht Engine".
3 // For conditions of distribution and use, see copyright notice in irrlicht.h
4 
5 #ifndef __IRR_MATH_H_INCLUDED__
6 #define __IRR_MATH_H_INCLUDED__
7 
8 #include "IrrCompileConfig.h"
9 #include "irrTypes.h"
10 #include <math.h>
11 #include <float.h>
12 #include <stdlib.h> // for abs() etc.
13 #include <limits.h> // For INT_MAX / UINT_MAX
14 
15 #if defined(_IRR_SOLARIS_PLATFORM_) || defined(__BORLANDC__) || defined (__BCPLUSPLUS__) || defined (_WIN32_WCE)
16 	#define sqrtf(X) (irr::f32)sqrt((irr::f64)(X))
17 	#define sinf(X) (irr::f32)sin((irr::f64)(X))
18 	#define cosf(X) (irr::f32)cos((irr::f64)(X))
19 	#define asinf(X) (irr::f32)asin((irr::f64)(X))
20 	#define acosf(X) (irr::f32)acos((irr::f64)(X))
21 	#define atan2f(X,Y) (irr::f32)atan2((irr::f64)(X),(irr::f64)(Y))
22 	#define ceilf(X) (irr::f32)ceil((irr::f64)(X))
23 	#define floorf(X) (irr::f32)floor((irr::f64)(X))
24 	#define powf(X,Y) (irr::f32)pow((irr::f64)(X),(irr::f64)(Y))
25 	#define fmodf(X,Y) (irr::f32)fmod((irr::f64)(X),(irr::f64)(Y))
26 	#define fabsf(X) (irr::f32)fabs((irr::f64)(X))
27 	#define logf(X) (irr::f32)log((irr::f64)(X))
28 #endif
29 
30 #ifndef FLT_MAX
31 #define FLT_MAX 3.402823466E+38F
32 #endif
33 
34 #ifndef FLT_MIN
35 #define FLT_MIN 1.17549435e-38F
36 #endif
37 
38 namespace irr
39 {
40 namespace core
41 {
42 
43 	//! Rounding error constant often used when comparing f32 values.
44 
45 	const s32 ROUNDING_ERROR_S32 = 0;
46 #ifdef __IRR_HAS_S64
47 	const s64 ROUNDING_ERROR_S64 = 0;
48 #endif
49 	const f32 ROUNDING_ERROR_f32 = 0.000001f;
50 	const f64 ROUNDING_ERROR_f64 = 0.00000001;
51 
52 #ifdef PI // make sure we don't collide with a define
53 #undef PI
54 #endif
55 	//! Constant for PI.
56 	const f32 PI		= 3.14159265359f;
57 
58 	//! Constant for reciprocal of PI.
59 	const f32 RECIPROCAL_PI	= 1.0f/PI;
60 
61 	//! Constant for half of PI.
62 	const f32 HALF_PI	= PI/2.0f;
63 
64 #ifdef PI64 // make sure we don't collide with a define
65 #undef PI64
66 #endif
67 	//! Constant for 64bit PI.
68 	const f64 PI64		= 3.1415926535897932384626433832795028841971693993751;
69 
70 	//! Constant for 64bit reciprocal of PI.
71 	const f64 RECIPROCAL_PI64 = 1.0/PI64;
72 
73 	//! 32bit Constant for converting from degrees to radians
74 	const f32 DEGTORAD = PI / 180.0f;
75 
76 	//! 32bit constant for converting from radians to degrees (formally known as GRAD_PI)
77 	const f32 RADTODEG   = 180.0f / PI;
78 
79 	//! 64bit constant for converting from degrees to radians (formally known as GRAD_PI2)
80 	const f64 DEGTORAD64 = PI64 / 180.0;
81 
82 	//! 64bit constant for converting from radians to degrees
83 	const f64 RADTODEG64 = 180.0 / PI64;
84 
85 	//! Utility function to convert a radian value to degrees
86 	/** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
87 	\param radians	The radians value to convert to degrees.
88 	*/
radToDeg(f32 radians)89 	inline f32 radToDeg(f32 radians)
90 	{
91 		return RADTODEG * radians;
92 	}
93 
94 	//! Utility function to convert a radian value to degrees
95 	/** Provided as it can be clearer to write radToDeg(X) than RADTODEG * X
96 	\param radians	The radians value to convert to degrees.
97 	*/
radToDeg(f64 radians)98 	inline f64 radToDeg(f64 radians)
99 	{
100 		return RADTODEG64 * radians;
101 	}
102 
103 	//! Utility function to convert a degrees value to radians
104 	/** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
105 	\param degrees	The degrees value to convert to radians.
106 	*/
degToRad(f32 degrees)107 	inline f32 degToRad(f32 degrees)
108 	{
109 		return DEGTORAD * degrees;
110 	}
111 
112 	//! Utility function to convert a degrees value to radians
113 	/** Provided as it can be clearer to write degToRad(X) than DEGTORAD * X
114 	\param degrees	The degrees value to convert to radians.
115 	*/
degToRad(f64 degrees)116 	inline f64 degToRad(f64 degrees)
117 	{
118 		return DEGTORAD64 * degrees;
119 	}
120 
121 	//! returns minimum of two values. Own implementation to get rid of the STL (VS6 problems)
122 	template<class T>
min_(const T & a,const T & b)123 	inline const T& min_(const T& a, const T& b)
124 	{
125 		return a < b ? a : b;
126 	}
127 
128 	//! returns minimum of three values. Own implementation to get rid of the STL (VS6 problems)
129 	template<class T>
min_(const T & a,const T & b,const T & c)130 	inline const T& min_(const T& a, const T& b, const T& c)
131 	{
132 		return a < b ? min_(a, c) : min_(b, c);
133 	}
134 
135 	//! returns maximum of two values. Own implementation to get rid of the STL (VS6 problems)
136 	template<class T>
max_(const T & a,const T & b)137 	inline const T& max_(const T& a, const T& b)
138 	{
139 		return a < b ? b : a;
140 	}
141 
142 	//! returns maximum of three values. Own implementation to get rid of the STL (VS6 problems)
143 	template<class T>
max_(const T & a,const T & b,const T & c)144 	inline const T& max_(const T& a, const T& b, const T& c)
145 	{
146 		return a < b ? max_(b, c) : max_(a, c);
147 	}
148 
149 	//! returns abs of two values. Own implementation to get rid of STL (VS6 problems)
150 	template<class T>
abs_(const T & a)151 	inline T abs_(const T& a)
152 	{
153 		return a < (T)0 ? -a : a;
154 	}
155 
156 	//! returns linear interpolation of a and b with ratio t
157 	//! \return: a if t==0, b if t==1, and the linear interpolation else
158 	template<class T>
lerp(const T & a,const T & b,const f32 t)159 	inline T lerp(const T& a, const T& b, const f32 t)
160 	{
161 		return (T)(a*(1.f-t)) + (b*t);
162 	}
163 
164 	//! clamps a value between low and high
165 	template <class T>
clamp(const T & value,const T & low,const T & high)166 	inline const T clamp (const T& value, const T& low, const T& high)
167 	{
168 		return min_ (max_(value,low), high);
169 	}
170 
171 	//! swaps the content of the passed parameters
172 	// Note: We use the same trick as boost and use two template arguments to
173 	// avoid ambiguity when swapping objects of an Irrlicht type that has not
174 	// it's own swap overload. Otherwise we get conflicts with some compilers
175 	// in combination with stl.
176 	template <class T1, class T2>
swap(T1 & a,T2 & b)177 	inline void swap(T1& a, T2& b)
178 	{
179 		T1 c(a);
180 		a = b;
181 		b = c;
182 	}
183 
184 	//! returns if a equals b, taking possible rounding errors into account
185 	inline bool equals(const f64 a, const f64 b, const f64 tolerance = ROUNDING_ERROR_f64)
186 	{
187 		return (a + tolerance >= b) && (a - tolerance <= b);
188 	}
189 
190 	//! returns if a equals b, taking possible rounding errors into account
191 	inline bool equals(const f32 a, const f32 b, const f32 tolerance = ROUNDING_ERROR_f32)
192 	{
193 		return (a + tolerance >= b) && (a - tolerance <= b);
194 	}
195 
196 	//! We compare the difference in ULP's (spacing between floating-point numbers, aka ULP=1 means there exists no float between).
197 	//\result true when numbers have a ULP <= maxUlpDiff AND have the same sign.
equalsByUlp(f32 a,f32 b,int maxUlpDiff)198 	inline bool equalsByUlp(f32 a, f32 b, int maxUlpDiff)
199 	{
200 		// Based on the ideas and code from Bruce Dawson on
201 		// http://www.altdevblogaday.com/2012/02/22/comparing-floating-point-numbers-2012-edition/
202 		// When floats are interpreted as integers the two nearest possible float numbers differ just
203 		// by one integer number. Also works the other way round, an integer of 1 interpreted as float
204 		// is for example the smallest possible float number.
205 		union Float_t
206 		{
207 			Float_t(float f1 = 0.0f) : f(f1) {}
208 			// Portable sign-extraction
209 			bool sign() const { return (i >> 31) != 0; }
210 
211 			int i;
212 			float f;
213 		};
214 
215 		Float_t fa(a);
216 		Float_t fb(b);
217 
218 		// Different signs, we could maybe get difference to 0, but so close to 0 using epsilons is better.
219 		if ( fa.sign() != fb.sign() )
220 		{
221 			// Check for equality to make sure +0==-0
222 			if (fa.i == fb.i)
223 				return true;
224 			return false;
225 		}
226 
227 		// Find the difference in ULPs.
228 		int ulpsDiff = abs_(fa.i- fb.i);
229 		if (ulpsDiff <= maxUlpDiff)
230 			return true;
231 
232 		return false;
233 	}
234 
235 #if 0
236 	//! returns if a equals b, not using any rounding tolerance
237 	inline bool equals(const s32 a, const s32 b)
238 	{
239 		return (a == b);
240 	}
241 
242 	//! returns if a equals b, not using any rounding tolerance
243 	inline bool equals(const u32 a, const u32 b)
244 	{
245 		return (a == b);
246 	}
247 #endif
248 	//! returns if a equals b, taking an explicit rounding tolerance into account
249 	inline bool equals(const s32 a, const s32 b, const s32 tolerance = ROUNDING_ERROR_S32)
250 	{
251 		return (a + tolerance >= b) && (a - tolerance <= b);
252 	}
253 
254 	//! returns if a equals b, taking an explicit rounding tolerance into account
255 	inline bool equals(const u32 a, const u32 b, const s32 tolerance = ROUNDING_ERROR_S32)
256 	{
257 		return (a + tolerance >= b) && (a - tolerance <= b);
258 	}
259 
260 #ifdef __IRR_HAS_S64
261 	//! returns if a equals b, taking an explicit rounding tolerance into account
262 	inline bool equals(const s64 a, const s64 b, const s64 tolerance = ROUNDING_ERROR_S64)
263 	{
264 		return (a + tolerance >= b) && (a - tolerance <= b);
265 	}
266 #endif
267 
268 	//! returns if a equals zero, taking rounding errors into account
269 	inline bool iszero(const f64 a, const f64 tolerance = ROUNDING_ERROR_f64)
270 	{
271 		return fabs(a) <= tolerance;
272 	}
273 
274 	//! returns if a equals zero, taking rounding errors into account
275 	inline bool iszero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
276 	{
277 		return fabsf(a) <= tolerance;
278 	}
279 
280 	//! returns if a equals not zero, taking rounding errors into account
281 	inline bool isnotzero(const f32 a, const f32 tolerance = ROUNDING_ERROR_f32)
282 	{
283 		return fabsf(a) > tolerance;
284 	}
285 
286 	//! returns if a equals zero, taking rounding errors into account
287 	inline bool iszero(const s32 a, const s32 tolerance = 0)
288 	{
289 		return ( a & 0x7ffffff ) <= tolerance;
290 	}
291 
292 	//! returns if a equals zero, taking rounding errors into account
293 	inline bool iszero(const u32 a, const u32 tolerance = 0)
294 	{
295 		return a <= tolerance;
296 	}
297 
298 #ifdef __IRR_HAS_S64
299 	//! returns if a equals zero, taking rounding errors into account
300 	inline bool iszero(const s64 a, const s64 tolerance = 0)
301 	{
302 		return abs_(a) > tolerance;
303 	}
304 #endif
305 
s32_min(s32 a,s32 b)306 	inline s32 s32_min(s32 a, s32 b)
307 	{
308 		const s32 mask = (a - b) >> 31;
309 		return (a & mask) | (b & ~mask);
310 	}
311 
s32_max(s32 a,s32 b)312 	inline s32 s32_max(s32 a, s32 b)
313 	{
314 		const s32 mask = (a - b) >> 31;
315 		return (b & mask) | (a & ~mask);
316 	}
317 
s32_clamp(s32 value,s32 low,s32 high)318 	inline s32 s32_clamp (s32 value, s32 low, s32 high)
319 	{
320 		return s32_min(s32_max(value,low), high);
321 	}
322 
323 	/*
324 		float IEEE-754 bit represenation
325 
326 		0      0x00000000
327 		1.0    0x3f800000
328 		0.5    0x3f000000
329 		3      0x40400000
330 		+inf   0x7f800000
331 		-inf   0xff800000
332 		+NaN   0x7fc00000 or 0x7ff00000
333 		in general: number = (sign ? -1:1) * 2^(exponent) * 1.(mantissa bits)
334 	*/
335 
336 	typedef union { u32 u; s32 s; f32 f; } inttofloat;
337 
338 	#define F32_AS_S32(f)		(*((s32 *) &(f)))
339 	#define F32_AS_U32(f)		(*((u32 *) &(f)))
340 	#define F32_AS_U32_POINTER(f)	( ((u32 *) &(f)))
341 
342 	#define F32_VALUE_0		0x00000000
343 	#define F32_VALUE_1		0x3f800000
344 	#define F32_SIGN_BIT		0x80000000U
345 	#define F32_EXPON_MANTISSA	0x7FFFFFFFU
346 
347 	//! code is taken from IceFPU
348 	//! Integer representation of a floating-point value.
349 #ifdef IRRLICHT_FAST_MATH
350 	#define IR(x)                           ((u32&)(x))
351 #else
IR(f32 x)352 	inline u32 IR(f32 x) {inttofloat tmp; tmp.f=x; return tmp.u;}
353 #endif
354 
355 	//! Absolute integer representation of a floating-point value
356 	#define AIR(x)				(IR(x)&0x7fffffff)
357 
358 	//! Floating-point representation of an integer value.
359 #ifdef IRRLICHT_FAST_MATH
360 	#define FR(x)                           ((f32&)(x))
361 #else
FR(u32 x)362 	inline f32 FR(u32 x) {inttofloat tmp; tmp.u=x; return tmp.f;}
FR(s32 x)363 	inline f32 FR(s32 x) {inttofloat tmp; tmp.s=x; return tmp.f;}
364 #endif
365 
366 	//! integer representation of 1.0
367 	#define IEEE_1_0			0x3f800000
368 	//! integer representation of 255.0
369 	#define IEEE_255_0			0x437f0000
370 
371 #ifdef IRRLICHT_FAST_MATH
372 	#define	F32_LOWER_0(f)		(F32_AS_U32(f) >  F32_SIGN_BIT)
373 	#define	F32_LOWER_EQUAL_0(f)	(F32_AS_S32(f) <= F32_VALUE_0)
374 	#define	F32_GREATER_0(f)	(F32_AS_S32(f) >  F32_VALUE_0)
375 	#define	F32_GREATER_EQUAL_0(f)	(F32_AS_U32(f) <= F32_SIGN_BIT)
376 	#define	F32_EQUAL_1(f)		(F32_AS_U32(f) == F32_VALUE_1)
377 	#define	F32_EQUAL_0(f)		( (F32_AS_U32(f) & F32_EXPON_MANTISSA ) == F32_VALUE_0)
378 
379 	// only same sign
380 	#define	F32_A_GREATER_B(a,b)	(F32_AS_S32((a)) > F32_AS_S32((b)))
381 
382 #else
383 
384 	#define	F32_LOWER_0(n)		((n) <  0.0f)
385 	#define	F32_LOWER_EQUAL_0(n)	((n) <= 0.0f)
386 	#define	F32_GREATER_0(n)	((n) >  0.0f)
387 	#define	F32_GREATER_EQUAL_0(n)	((n) >= 0.0f)
388 	#define	F32_EQUAL_1(n)		((n) == 1.0f)
389 	#define	F32_EQUAL_0(n)		((n) == 0.0f)
390 	#define	F32_A_GREATER_B(a,b)	((a) > (b))
391 #endif
392 
393 
394 #ifndef REALINLINE
395 	#ifdef _MSC_VER
396 		#define REALINLINE __forceinline
397 	#else
398 		#define REALINLINE inline
399 	#endif
400 #endif
401 
402 #if defined(__BORLANDC__) || defined (__BCPLUSPLUS__)
403 
404 	// 8-bit bools in borland builder
405 
406 	//! conditional set based on mask and arithmetic shift
if_c_a_else_b(const c8 condition,const u32 a,const u32 b)407 	REALINLINE u32 if_c_a_else_b ( const c8 condition, const u32 a, const u32 b )
408 	{
409 		return ( ( -condition >> 7 ) & ( a ^ b ) ) ^ b;
410 	}
411 
412 	//! conditional set based on mask and arithmetic shift
if_c_a_else_0(const c8 condition,const u32 a)413 	REALINLINE u32 if_c_a_else_0 ( const c8 condition, const u32 a )
414 	{
415 		return ( -condition >> 31 ) & a;
416 	}
417 #else
418 
419 	//! conditional set based on mask and arithmetic shift
if_c_a_else_b(const s32 condition,const u32 a,const u32 b)420 	REALINLINE u32 if_c_a_else_b ( const s32 condition, const u32 a, const u32 b )
421 	{
422 		return ( ( -condition >> 31 ) & ( a ^ b ) ) ^ b;
423 	}
424 
425 	//! conditional set based on mask and arithmetic shift
if_c_a_else_b(const s16 condition,const u16 a,const u16 b)426 	REALINLINE u16 if_c_a_else_b ( const s16 condition, const u16 a, const u16 b )
427 	{
428 		return ( ( -condition >> 15 ) & ( a ^ b ) ) ^ b;
429 	}
430 
431 	//! conditional set based on mask and arithmetic shift
if_c_a_else_0(const s32 condition,const u32 a)432 	REALINLINE u32 if_c_a_else_0 ( const s32 condition, const u32 a )
433 	{
434 		return ( -condition >> 31 ) & a;
435 	}
436 #endif
437 
438 	/*
439 		if (condition) state |= m; else state &= ~m;
440 	*/
setbit_cond(u32 & state,s32 condition,u32 mask)441 	REALINLINE void setbit_cond ( u32 &state, s32 condition, u32 mask )
442 	{
443 		// 0, or any postive to mask
444 		//s32 conmask = -condition >> 31;
445 		state ^= ( ( -condition >> 31 ) ^ state ) & mask;
446 	}
447 
round_(f32 x)448 	inline f32 round_( f32 x )
449 	{
450 		return floorf( x + 0.5f );
451 	}
452 
clearFPUException()453 	REALINLINE void clearFPUException ()
454 	{
455 #ifdef IRRLICHT_FAST_MATH
456 		return;
457 #ifdef feclearexcept
458 		feclearexcept(FE_ALL_EXCEPT);
459 #elif defined(_MSC_VER)
460 		__asm fnclex;
461 #elif defined(__GNUC__) && defined(__x86__)
462 		__asm__ __volatile__ ("fclex \n\t");
463 #else
464 #  warn clearFPUException not supported.
465 #endif
466 #endif
467 	}
468 
469 	// calculate: sqrt ( x )
squareroot(const f32 f)470 	REALINLINE f32 squareroot(const f32 f)
471 	{
472 		return sqrtf(f);
473 	}
474 
475 	// calculate: sqrt ( x )
squareroot(const f64 f)476 	REALINLINE f64 squareroot(const f64 f)
477 	{
478 		return sqrt(f);
479 	}
480 
481 	// calculate: sqrt ( x )
squareroot(const s32 f)482 	REALINLINE s32 squareroot(const s32 f)
483 	{
484 		return static_cast<s32>(squareroot(static_cast<f32>(f)));
485 	}
486 
487 #ifdef __IRR_HAS_S64
488 	// calculate: sqrt ( x )
squareroot(const s64 f)489 	REALINLINE s64 squareroot(const s64 f)
490 	{
491 		return static_cast<s64>(squareroot(static_cast<f64>(f)));
492 	}
493 #endif
494 
495 	// calculate: 1 / sqrt ( x )
reciprocal_squareroot(const f64 x)496 	REALINLINE f64 reciprocal_squareroot(const f64 x)
497 	{
498 		return 1.0 / sqrt(x);
499 	}
500 
501 	// calculate: 1 / sqrtf ( x )
reciprocal_squareroot(const f32 f)502 	REALINLINE f32 reciprocal_squareroot(const f32 f)
503 	{
504 #if defined ( IRRLICHT_FAST_MATH )
505 	#if defined(_MSC_VER)
506 		// SSE reciprocal square root estimate, accurate to 12 significant
507 		// bits of the mantissa
508 		f32 recsqrt;
509 		__asm rsqrtss xmm0, f           // xmm0 = rsqrtss(f)
510 		__asm movss recsqrt, xmm0       // return xmm0
511 		return recsqrt;
512 
513 /*
514 		// comes from Nvidia
515 		u32 tmp = (u32(IEEE_1_0 << 1) + IEEE_1_0 - *(u32*)&x) >> 1;
516 		f32 y = *(f32*)&tmp;
517 		return y * (1.47f - 0.47f * x * y * y);
518 */
519 	#else
520 		return 1.f / sqrtf(f);
521 	#endif
522 #else // no fast math
523 		return 1.f / sqrtf(f);
524 #endif
525 	}
526 
527 	// calculate: 1 / sqrtf( x )
reciprocal_squareroot(const s32 x)528 	REALINLINE s32 reciprocal_squareroot(const s32 x)
529 	{
530 		return static_cast<s32>(reciprocal_squareroot(static_cast<f32>(x)));
531 	}
532 
533 	// calculate: 1 / x
reciprocal(const f32 f)534 	REALINLINE f32 reciprocal( const f32 f )
535 	{
536 #if defined (IRRLICHT_FAST_MATH)
537 
538 		// SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
539 		// bi ts of the mantissa
540 		// One Newtown-Raphson Iteration:
541 		// f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
542 		f32 rec;
543 		__asm rcpss xmm0, f               // xmm0 = rcpss(f)
544 		__asm movss xmm1, f               // xmm1 = f
545 		__asm mulss xmm1, xmm0            // xmm1 = f * rcpss(f)
546 		__asm mulss xmm1, xmm0            // xmm2 = f * rcpss(f) * rcpss(f)
547 		__asm addss xmm0, xmm0            // xmm0 = 2 * rcpss(f)
548 		__asm subss xmm0, xmm1            // xmm0 = 2 * rcpss(f)
549 										  //        - f * rcpss(f) * rcpss(f)
550 		__asm movss rec, xmm0             // return xmm0
551 		return rec;
552 
553 
554 		//! i do not divide through 0.. (fpu expection)
555 		// instead set f to a high value to get a return value near zero..
556 		// -1000000000000.f.. is use minus to stay negative..
557 		// must test's here (plane.normal dot anything ) checks on <= 0.f
558 		//u32 x = (-(AIR(f) != 0 ) >> 31 ) & ( IR(f) ^ 0xd368d4a5 ) ^ 0xd368d4a5;
559 		//return 1.f / FR ( x );
560 
561 #else // no fast math
562 		return 1.f / f;
563 #endif
564 	}
565 
566 	// calculate: 1 / x
reciprocal(const f64 f)567 	REALINLINE f64 reciprocal ( const f64 f )
568 	{
569 		return 1.0 / f;
570 	}
571 
572 
573 	// calculate: 1 / x, low precision allowed
reciprocal_approxim(const f32 f)574 	REALINLINE f32 reciprocal_approxim ( const f32 f )
575 	{
576 #if defined( IRRLICHT_FAST_MATH)
577 
578 		// SSE Newton-Raphson reciprocal estimate, accurate to 23 significant
579 		// bi ts of the mantissa
580 		// One Newtown-Raphson Iteration:
581 		// f(i+1) = 2 * rcpss(f) - f * rcpss(f) * rcpss(f)
582 		f32 rec;
583 		__asm rcpss xmm0, f               // xmm0 = rcpss(f)
584 		__asm movss xmm1, f               // xmm1 = f
585 		__asm mulss xmm1, xmm0            // xmm1 = f * rcpss(f)
586 		__asm mulss xmm1, xmm0            // xmm2 = f * rcpss(f) * rcpss(f)
587 		__asm addss xmm0, xmm0            // xmm0 = 2 * rcpss(f)
588 		__asm subss xmm0, xmm1            // xmm0 = 2 * rcpss(f)
589 										  //        - f * rcpss(f) * rcpss(f)
590 		__asm movss rec, xmm0             // return xmm0
591 		return rec;
592 
593 
594 /*
595 		// SSE reciprocal estimate, accurate to 12 significant bits of
596 		f32 rec;
597 		__asm rcpss xmm0, f             // xmm0 = rcpss(f)
598 		__asm movss rec , xmm0          // return xmm0
599 		return rec;
600 */
601 /*
602 		register u32 x = 0x7F000000 - IR ( p );
603 		const f32 r = FR ( x );
604 		return r * (2.0f - p * r);
605 */
606 #else // no fast math
607 		return 1.f / f;
608 #endif
609 	}
610 
611 
floor32(f32 x)612 	REALINLINE s32 floor32(f32 x)
613 	{
614 #ifdef IRRLICHT_FAST_MATH
615 		const f32 h = 0.5f;
616 
617 		s32 t;
618 
619 #if defined(_MSC_VER)
620 		__asm
621 		{
622 			fld	x
623 			fsub	h
624 			fistp	t
625 		}
626 #elif defined(__GNUC__)
627 		__asm__ __volatile__ (
628 			"fsub %2 \n\t"
629 			"fistpl %0"
630 			: "=m" (t)
631 			: "t" (x), "f" (h)
632 			: "st"
633 			);
634 #else
635 #  warn IRRLICHT_FAST_MATH not supported.
636 		return (s32) floorf ( x );
637 #endif
638 		return t;
639 #else // no fast math
640 		return (s32) floorf ( x );
641 #endif
642 	}
643 
644 
ceil32(f32 x)645 	REALINLINE s32 ceil32 ( f32 x )
646 	{
647 #ifdef IRRLICHT_FAST_MATH
648 		const f32 h = 0.5f;
649 
650 		s32 t;
651 
652 #if defined(_MSC_VER)
653 		__asm
654 		{
655 			fld	x
656 			fadd	h
657 			fistp	t
658 		}
659 #elif defined(__GNUC__)
660 		__asm__ __volatile__ (
661 			"fadd %2 \n\t"
662 			"fistpl %0 \n\t"
663 			: "=m"(t)
664 			: "t"(x), "f"(h)
665 			: "st"
666 			);
667 #else
668 #  warn IRRLICHT_FAST_MATH not supported.
669 		return (s32) ceilf ( x );
670 #endif
671 		return t;
672 #else // not fast math
673 		return (s32) ceilf ( x );
674 #endif
675 	}
676 
677 
678 
round32(f32 x)679 	REALINLINE s32 round32(f32 x)
680 	{
681 #if defined(IRRLICHT_FAST_MATH)
682 		s32 t;
683 
684 #if defined(_MSC_VER)
685 		__asm
686 		{
687 			fld   x
688 			fistp t
689 		}
690 #elif defined(__GNUC__)
691 		__asm__ __volatile__ (
692 			"fistpl %0 \n\t"
693 			: "=m"(t)
694 			: "t"(x)
695 			: "st"
696 			);
697 #else
698 #  warn IRRLICHT_FAST_MATH not supported.
699 		return (s32) round_(x);
700 #endif
701 		return t;
702 #else // no fast math
703 		return (s32) round_(x);
704 #endif
705 	}
706 
f32_max3(const f32 a,const f32 b,const f32 c)707 	inline f32 f32_max3(const f32 a, const f32 b, const f32 c)
708 	{
709 		return a > b ? (a > c ? a : c) : (b > c ? b : c);
710 	}
711 
f32_min3(const f32 a,const f32 b,const f32 c)712 	inline f32 f32_min3(const f32 a, const f32 b, const f32 c)
713 	{
714 		return a < b ? (a < c ? a : c) : (b < c ? b : c);
715 	}
716 
fract(f32 x)717 	inline f32 fract ( f32 x )
718 	{
719 		return x - floorf ( x );
720 	}
721 
722 } // end namespace core
723 } // end namespace irr
724 
725 #ifndef IRRLICHT_FAST_MATH
726 	using irr::core::IR;
727 	using irr::core::FR;
728 #endif
729 
730 #endif
731 
732