1 /*
2  * fpu/mathlib.h - Floating-point math support library
3  *
4  * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS)
5  *
6  * Inspired by Christian Bauer's Basilisk II
7  *
8  * This file is part of the ARAnyM project which builds a new and powerful
9  * TOS/FreeMiNT compatible virtual machine running on almost any hardware.
10  *
11  * MC68881/68040 fpu emulation
12  *
13  * Original UAE FPU, copyright 1996 Herman ten Brugge
14  * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
15  * New framework, copyright 2000-2001 Gwenole Beauchesne
16  * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
17  *
18  * ARAnyM is free software; you can redistribute it and/or modify
19  * it under the terms of the GNU General Public License as published by
20  * the Free Software Foundation; either version 2 of the License, or
21  * (at your option) any later version.
22  *
23  * ARAnyM is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26  * GNU General Public License for more details.
27  *
28  * You should have received a copy of the GNU General Public License
29  * along with ARAnyM; if not, write to the Free Software
30  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
31  */
32 
33 #ifndef FPU_MATHLIB_H
34 #define FPU_MATHLIB_H
35 
36 /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
37 #undef	PUBLIC
38 #define PUBLIC	extern
39 
40 #undef	PRIVATE
41 #define PRIVATE	static
42 
43 #undef	FFPU
44 #define FFPU	/**/
45 
46 #undef	FPU
47 #define	FPU		fpu.
48 
49 // Define the following macro if branches are expensive. If so,
50 // integer-based isnan() and isinf() functions are implemented.
51 // TODO: move to Makefile.in
52 #define BRANCHES_ARE_EXPENSIVE 1
53 
54 // Use ISO C99 extended-precision math functions (glibc 2.1+)
55 #define FPU_USE_ISO_C99 1
56 
57 #if defined(FPU_USE_ISO_C99)
58 // NOTE: no prior <math.h> shall be included at this point
59 #define __USE_ISOC99 1 // for glibc 2.2.X and newer
60 #define __USE_ISOC9X 1 // for glibc 2.1.X
61 #include <math.h>
62 #else
63 #include <cmath>
64 using namespace std;
65 #endif
66 
67 /* -------------------------------------------------------------------------- */
68 /* --- Floating-point register types                                      --- */
69 /* -------------------------------------------------------------------------- */
70 
71 // Single : S 8*E 23*F
72 #define FP_SINGLE_EXP_MAX		0xff
73 #define FP_SINGLE_EXP_BIAS		0x7f
74 
75 // Double : S 11*E 52*F
76 #define FP_DOUBLE_EXP_MAX		0x7ff
77 #define FP_DOUBLE_EXP_BIAS		0x3ff
78 
79 // Extended : S 15*E 64*F
80 #define FP_EXTENDED_EXP_MAX		0x7fff
81 #define FP_EXTENDED_EXP_BIAS	0x3fff
82 
83 // Zeroes						: E = 0 & F = 0
84 // Infinities					: E = MAX & F = 0
85 // Not-A-Number					: E = MAX & F # 0
86 
87 /* -------------------------------------------------------------------------- */
88 /* --- Floating-point type shapes (IEEE-compliant)                        --- */
89 /* -------------------------------------------------------------------------- */
90 
91 // Taken from glibc 2.2.x: ieee754.h
92 
93 // IEEE-754 float format
94 union fpu_single_shape {
95 
96 	fpu_single value;
97 
98 	/* This is the IEEE 754 single-precision format.  */
99 	struct {
100 #ifdef WORDS_BIGENDIAN
101 		unsigned int negative:1;
102 		unsigned int exponent:8;
103 		unsigned int mantissa:23;
104 #else
105 		unsigned int mantissa:23;
106 		unsigned int exponent:8;
107 		unsigned int negative:1;
108 #endif
109 	} ieee;
110 
111 	/* This format makes it easier to see if a NaN is a signalling NaN.  */
112 	struct {
113 #ifdef WORDS_BIGENDIAN
114 		unsigned int negative:1;
115 		unsigned int exponent:8;
116 		unsigned int quiet_nan:1;
117 		unsigned int mantissa:22;
118 #else
119 		unsigned int mantissa:22;
120 		unsigned int quiet_nan:1;
121 		unsigned int exponent:8;
122 		unsigned int negative:1;
123 #endif
124 	} ieee_nan;
125 };
126 
127 // IEEE-754 double format
128 union fpu_double_shape {
129 	fpu_double value;
130 
131 	/* This is the IEEE 754 double-precision format.  */
132 	struct {
133 #ifdef WORDS_BIGENDIAN
134 		unsigned int negative:1;
135 		unsigned int exponent:11;
136 		/* Together these comprise the mantissa.  */
137 		unsigned int mantissa0:20;
138 		unsigned int mantissa1:32;
139 #else
140 #	if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
141 		unsigned int mantissa0:20;
142 		unsigned int exponent:11;
143 		unsigned int negative:1;
144 		unsigned int mantissa1:32;
145 #	else
146 		/* Together these comprise the mantissa.  */
147 		unsigned int mantissa1:32;
148 		unsigned int mantissa0:20;
149 		unsigned int exponent:11;
150 		unsigned int negative:1;
151 #	endif
152 #endif
153 	} ieee;
154 
155 	/* This format makes it easier to see if a NaN is a signalling NaN.  */
156 	struct {
157 #ifdef WORDS_BIGENDIAN
158 		unsigned int negative:1;
159 		unsigned int exponent:11;
160 		unsigned int quiet_nan:1;
161 		/* Together these comprise the mantissa.  */
162 		unsigned int mantissa0:19;
163 		unsigned int mantissa1:32;
164 #else
165 #	if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
166 		unsigned int mantissa0:19;
167 		unsigned int quiet_nan:1;
168 		unsigned int exponent:11;
169 		unsigned int negative:1;
170 		unsigned int mantissa1:32;
171 #	else
172 		/* Together these comprise the mantissa.  */
173 		unsigned int mantissa1:32;
174 		unsigned int mantissa0:19;
175 		unsigned int quiet_nan:1;
176 		unsigned int exponent:11;
177 		unsigned int negative:1;
178 #	endif
179 #endif
180 	} ieee_nan;
181 
182 	/* This format is used to extract the sign_exponent and mantissa parts only */
183 	struct {
184 #if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
185 		unsigned int msw:32;
186 		unsigned int lsw:32;
187 #else
188 		unsigned int lsw:32;
189 		unsigned int msw:32;
190 #endif
191 	} parts;
192 };
193 
194 #ifdef USE_LONG_DOUBLE
195 // IEEE-854 long double format
196 union fpu_extended_shape {
197 	fpu_extended value;
198 
199 	/* This is the IEEE 854 double-extended-precision format.  */
200 	struct {
201 #ifdef WORDS_BIGENDIAN
202 		unsigned int negative:1;
203 		unsigned int exponent:15;
204 		unsigned int empty:16;
205 		unsigned int mantissa0:32;
206 		unsigned int mantissa1:32;
207 #else
208 #	if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
209 		unsigned int exponent:15;
210 		unsigned int negative:1;
211 		unsigned int empty:16;
212 		unsigned int mantissa0:32;
213 		unsigned int mantissa1:32;
214 #	else
215 		unsigned int mantissa1:32;
216 		unsigned int mantissa0:32;
217 		unsigned int exponent:15;
218 		unsigned int negative:1;
219 		unsigned int empty:16;
220 #	endif
221 #endif
222 	} ieee;
223 
224 	/* This is for NaNs in the IEEE 854 double-extended-precision format.  */
225 	struct {
226 #ifdef WORDS_BIGENDIAN
227 		unsigned int negative:1;
228 		unsigned int exponent:15;
229 		unsigned int empty:16;
230 		unsigned int one:1;
231 		unsigned int quiet_nan:1;
232 		unsigned int mantissa0:30;
233 		unsigned int mantissa1:32;
234 #else
235 #	if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
236 		unsigned int exponent:15;
237 		unsigned int negative:1;
238 		unsigned int empty:16;
239 		unsigned int mantissa0:30;
240 		unsigned int quiet_nan:1;
241 		unsigned int one:1;
242 		unsigned int mantissa1:32;
243 #	else
244 		unsigned int mantissa1:32;
245 		unsigned int mantissa0:30;
246 		unsigned int quiet_nan:1;
247 		unsigned int one:1;
248 		unsigned int exponent:15;
249 		unsigned int negative:1;
250 		unsigned int empty:16;
251 #	endif
252 #endif
253 	} ieee_nan;
254 
255 	/* This format is used to extract the sign_exponent and mantissa parts only */
256 	struct {
257 #if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
258 		unsigned int sign_exponent:16;
259 		unsigned int empty:16;
260 		unsigned int msw:32;
261 		unsigned int lsw:32;
262 #else
263 		unsigned int lsw:32;
264 		unsigned int msw:32;
265 		unsigned int sign_exponent:16;
266 		unsigned int empty:16;
267 #endif
268 	} parts;
269 };
270 #endif
271 
272 #ifdef USE_QUAD_DOUBLE
273 // IEEE-854 quad double format
274 union fpu_extended_shape {
275 	fpu_extended value;
276 
277 	/* This is the IEEE 854 quad-precision format.  */
278 	struct {
279 #ifdef WORDS_BIGENDIAN
280 		unsigned int negative:1;
281 		unsigned int exponent:15;
282 		unsigned int mantissa0:16;
283 		unsigned int mantissa1:32;
284 		unsigned int mantissa2:32;
285 		unsigned int mantissa3:32;
286 #else
287 		unsigned int mantissa3:32;
288 		unsigned int mantissa2:32;
289 		unsigned int mantissa1:32;
290 		unsigned int mantissa0:16;
291 		unsigned int exponent:15;
292 		unsigned int negative:1;
293 #endif
294 	} ieee;
295 
296 	/* This is for NaNs in the IEEE 854 quad-precision format.  */
297 	struct {
298 #ifdef WORDS_BIGENDIAN
299 		unsigned int negative:1;
300 		unsigned int exponent:15;
301 		unsigned int quiet_nan:1;
302 		unsigned int mantissa0:15;
303 		unsigned int mantissa1:32;
304 		unsigned int mantissa2:32;
305 		unsigned int mantissa3:32;
306 #else
307 		unsigned int mantissa3:32;
308 		unsigned int mantissa2:32;
309 		unsigned int mantissa1:32;
310 		unsigned int mantissa0:15;
311 		unsigned int quiet_nan:1;
312 		unsigned int exponent:15;
313 		unsigned int negative:1;
314 #endif
315 	} ieee_nan;
316 
317 	/* This format is used to extract the sign_exponent and mantissa parts only */
318 #if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
319 	struct {
320 		uae_u64 msw;
321 		uae_u64 lsw;
322 	} parts64;
323 	struct {
324 		uae_u32 w0;
325 		uae_u32 w1;
326 		uae_u32 w2;
327 		uae_u32 w3;
328 	} parts32;
329 #else
330 	struct {
331 		uae_u64 lsw;
332 		uae_u64 msw;
333 	} parts64;
334 	struct {
335 		uae_u32 w3;
336 		uae_u32 w2;
337 		uae_u32 w1;
338 		uae_u32 w0;
339 	} parts32;
340 #endif
341 };
342 #endif
343 
344 // Declare a shape of the requested FP type
345 #define fp_declare_init_shape(psvar, ftype) \
346 	fpu_ ## ftype ## _shape psvar
347 
348 /* -------------------------------------------------------------------------- */
349 /* --- Extra Math Functions                                               --- */
350 /* --- (most of them had to be defined before including <fpu/flags.h>)    --- */
351 /* -------------------------------------------------------------------------- */
352 
353 #undef isnan
354 #if 0 && defined(HAVE_ISNANL)
355 # define isnan(x) isnanl((x))
356 #else
357 # define isnan(x) fp_do_isnan((x))
358 #endif
359 
fp_do_isnan(fpu_register const & r)360 PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
361 {
362 #ifdef BRANCHES_ARE_EXPENSIVE
363 #if !defined(USE_LONG_DOUBLE)
364 	fp_declare_init_shape(sxp, double);
365 	sxp.value = r;
366 	uae_s32 hx = sxp.parts.msw;
367 	uae_s32 lx = sxp.parts.lsw;
368 	hx &= 0x7fffffff;
369 	hx |= (uae_u32)(lx | (-lx)) >> 31;
370 	hx = 0x7ff00000 - hx;
371 	return (int)(((uae_u32)hx) >> 31);
372 #elif defined(USE_QUAD_DOUBLE)
373 	fp_declare_init_shape(sxp, extended);
374 	sxp.value = r;
375 	uae_s64 hx = sxp.parts64.msw;
376 	uae_s64 lx = sxp.parts64.lsw;
377 	hx &= 0x7fffffffffffffffLL;
378 	hx |= (uae_u64)(lx | (-lx)) >> 63;
379 	hx = 0x7fff000000000000LL - hx;
380 	return (int)((uae_u64)hx >> 63);
381 #else
382 	fp_declare_init_shape(sxp, extended);
383 	sxp.value = r;
384 	uae_s32 se = sxp.parts.sign_exponent;
385 	uae_s32 hx = sxp.parts.msw;
386 	uae_s32 lx = sxp.parts.lsw;
387 	se = (se & 0x7fff) << 1;
388 	lx |= hx & 0x7fffffff;
389 	se |= (uae_u32)(lx | (-lx)) >> 31;
390 	se = 0xfffe - se;
391 	return (int)(((uae_u32)(se)) >> 31);
392 #endif
393 #else
394 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
395 	fp_declare_init_shape(sxp, extended);
396 	sxp.value = r;
397 	return	(sxp.ieee.exponent == FP_EXTENDED_EXP_MAX)
398 #else
399 	fp_declare_init_shape(sxp, double);
400 	sxp.value = r;
401 	return	(sxp.ieee.exponent == FP_DOUBLE_EXP_MAX)
402 #endif
403 		&& (sxp.ieee.mantissa0 & 0x7fffffff) != 0
404 		&& sxp.ieee.mantissa1 != 0
405 #ifdef USE_QUAD_DOUBLE
406 		&& sxp.ieee.mantissa2 != 0
407 		&& sxp.ieee.mantissa3 != 0
408 #endif
409 		;
410 #endif
411 }
412 
413 #undef isinf
414 #if 0 && defined(HAVE_ISINFL)
415 # define isinf(x) isinfl((x))
416 #else
417 # define isinf(x) fp_do_isinf((x))
418 #endif
419 
fp_do_isinf(fpu_register const & r)420 PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
421 {
422 #ifdef BRANCHES_ARE_EXPENSIVE
423 #if !defined(USE_LONG_DOUBLE)
424 	fp_declare_init_shape(sxp, double);
425 	sxp.value = r;
426 	uae_s32 hx = sxp.parts.msw;
427 	uae_s32 lx = sxp.parts.lsw;
428 	lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
429 	lx |= -lx;
430 	return ~(lx >> 31) & (hx >> 30);
431 #elif defined(USE_QUAD_DOUBLE)
432 	fp_declare_init_shape(sxp, extended);
433 	sxp.value = r;
434 	uae_s64 hx = sxp.parts64.msw;
435 	uae_s64 lx = sxp.parts64.lsw;
436 	lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
437 	lx |= -lx;
438 	return ~(lx >> 63) & (hx >> 62);
439 #else
440 	fp_declare_init_shape(sxp, extended);
441 	sxp.value = r;
442 	/* NOTE: This function should work for both m68k and native INFs. */
443 #if 0
444 	uae_s32 se = sxp.parts.sign_exponent;
445 	uae_s32 hx = sxp.parts.msw;
446 	uae_s32 lx = sxp.parts.lsw;
447 	/* This additional ^ 0x80000000 is necessary because in Intel's
448 	   internal representation of the implicit one is explicit.
449 	   NOTE: anyway, this is equivalent to & 0x7fffffff in that case.  */
450 #ifdef CPU_i386
451 	lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
452 #else
453 	lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
454 #endif
455 	lx |= -lx;
456 	se &= 0x8000;
457 	return ~(lx >> 31) & (1 - (se >> 14));
458 #else
459 	return sxp.ieee.exponent == FP_EXTENDED_EXP_MAX
460 		&& (sxp.ieee.mantissa0 & 0x7fffffff) == 0
461 		&& sxp.ieee.mantissa1 == 0;
462 #endif
463 #endif
464 #else
465 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
466 	fp_declare_init_shape(sxp, extended);
467 	sxp.value = r;
468 	return	(sxp.ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
469 #else
470 	fp_declare_init_shape(sxp, double);
471 	sxp.value = r;
472 	return	(sxp.ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
473 #endif
474 		&& (sxp.ieee.mantissa0 & 0x7fffffff) == 0
475 		&& sxp.ieee.mantissa1 == 0
476 #ifdef USE_QUAD_DOUBLE
477 		&& sxp.ieee.mantissa2 == 0
478 		&& sxp.ieee.mantissa3 == 0
479 #endif
480 		;
481 #endif
482 }
483 
484 #undef isneg
485 #define isneg(x) fp_do_isneg((x))
486 
fp_do_isneg(fpu_register const & r)487 PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
488 {
489 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
490 	fp_declare_init_shape(sxp, extended);
491 #else
492 	fp_declare_init_shape(sxp, double);
493 #endif
494 	sxp.value = r;
495 	return sxp.ieee.negative;
496 }
497 
498 #undef iszero
499 #define iszero(x) fp_do_iszero((x))
500 
fp_do_iszero(fpu_register const & r)501 PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
502 {
503 	// TODO: BRANCHES_ARE_EXPENSIVE
504 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
505 	fp_declare_init_shape(sxp, extended);
506 #else
507 	fp_declare_init_shape(sxp, double);
508 #endif
509 	sxp.value = r;
510 	return	(sxp.ieee.exponent == 0)
511 		&&	(sxp.ieee.mantissa0 == 0)
512 		&&	(sxp.ieee.mantissa1 == 0)
513 #ifdef USE_QUAD_DOUBLE
514 		&&	(sxp.ieee.mantissa2 == 0)
515 		&&	(sxp.ieee.mantissa3 == 0)
516 #endif
517 		;
518 }
519 
get_dest_flags(fpu_register const & r)520 PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
521 {
522 	fl_dest.negative	= isneg(r);
523 	fl_dest.zero		= iszero(r);
524 	fl_dest.infinity	= isinf(r);
525 	fl_dest.nan			= isnan(r);
526 	fl_dest.in_range	= !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
527 }
528 
get_source_flags(fpu_register const & r)529 PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
530 {
531 	fl_source.negative	= isneg(r);
532 	fl_source.zero		= iszero(r);
533 	fl_source.infinity	= isinf(r);
534 	fl_source.nan		= isnan(r);
535 	fl_source.in_range	= !fl_source.zero && !fl_source.infinity && !fl_source.nan;
536 }
537 
make_nan(fpu_register & r,bool negative)538 PRIVATE inline void FFPU make_nan(fpu_register & r, bool negative)
539 {
540 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
541 	fp_declare_init_shape(sxp, extended);
542 	sxp.ieee.exponent	= FP_EXTENDED_EXP_MAX;
543 	sxp.ieee.empty		= 0;
544 	sxp.ieee.mantissa0	= 0xffffffff;
545 #else
546 	fp_declare_init_shape(sxp, double);
547 	sxp.ieee.exponent	= FP_DOUBLE_EXP_MAX;
548 	sxp.ieee.mantissa0	= 0xfffff;
549 #endif
550 	sxp.ieee.mantissa1	= 0xffffffff;
551 #ifdef USE_QUAD_DOUBLE
552 	sxp.ieee.mantissa2	= 0xffffffff;
553 	sxp.ieee.mantissa3	= 0xffffffff;
554 #endif
555 	sxp.ieee.negative   = negative;
556 	r = sxp.value;
557 }
558 
make_zero(fpu_register & r,bool negative)559 PRIVATE inline void FFPU make_zero(fpu_register & r, bool negative)
560 {
561 #if 1
562 	r = negative ? -0.0 : +0.0;
563 #else
564 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
565 	fp_declare_init_shape(sxp, extended);
566 	sxp.ieee.empty      = 0;
567 #else
568 	fp_declare_init_shape(sxp, double);
569 #endif
570 	sxp.ieee.negative	= negative;
571 	sxp.ieee.exponent	= 0;
572 	sxp.ieee.mantissa0	= 0;
573 	sxp.ieee.mantissa1	= 0;
574 #ifdef USE_QUAD_DOUBLE
575 	sxp.ieee.mantissa2	= 0;
576 	sxp.ieee.mantissa3	= 0;
577 #endif
578 	r = sxp.value;
579 #endif
580 }
581 
make_inf(fpu_register & r,bool negative)582 PRIVATE inline void FFPU make_inf(fpu_register & r, bool negative)
583 {
584 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
585 	fp_declare_init_shape(sxp, extended);
586 	sxp.ieee.exponent	= FP_EXTENDED_EXP_MAX;
587 	sxp.ieee.mantissa0	= 0x80000000;
588 	sxp.ieee.empty		= 0;
589 #else
590 	fp_declare_init_shape(sxp, double);
591 	sxp.ieee.exponent	= FP_DOUBLE_EXP_MAX;
592 	sxp.ieee.mantissa0	= 0;
593 #endif
594 	sxp.ieee.negative	= negative;
595 	sxp.ieee.mantissa1	= 0;
596 #ifdef USE_QUAD_DOUBLE
597 	sxp.ieee.mantissa2 = 0;
598 	sxp.ieee.mantissa3 = 0;
599 #endif
600 	r = sxp.value;
601 }
602 
fast_fgetexp(fpu_register const & r)603 PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
604 {
605 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
606 	fp_declare_init_shape(sxp, extended);
607 	sxp.value = r;
608 	return ((int) sxp.ieee.exponent - FP_EXTENDED_EXP_BIAS);
609 #else
610 	fp_declare_init_shape(sxp, double);
611 	sxp.value = r;
612 	return ((int) sxp.ieee.exponent - FP_DOUBLE_EXP_BIAS);
613 #endif
614 }
615 
616 // Normalize to range 1..2
fast_remove_exponent(fpu_register & r)617 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
618 {
619 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
620 	fp_declare_init_shape(sxp, extended);
621 	sxp.value = r;
622 	sxp.ieee.exponent = FP_EXTENDED_EXP_BIAS;
623 #else
624 	fp_declare_init_shape(sxp, double);
625 	sxp.value = r;
626 	sxp.ieee.exponent = FP_DOUBLE_EXP_BIAS;
627 #endif
628 	r = sxp.value;
629 }
630 
631 // The sign of the quotient is the exclusive-OR of the sign bits
632 // of the source and destination operands.
get_quotient_sign(fpu_register const & ra,fpu_register const & rb)633 PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
634 {
635 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
636 	fp_declare_init_shape(sap, extended);
637 	fp_declare_init_shape(sbp, extended);
638 #else
639 	fp_declare_init_shape(sap, double);
640 	fp_declare_init_shape(sbp, double);
641 #endif
642 	sap.value = ra;
643 	sbp.value = rb;
644 	return ((sap.ieee.negative ^ sbp.ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
645 }
646 
647 /* -------------------------------------------------------------------------- */
648 /* --- Math functions                                                     --- */
649 /* -------------------------------------------------------------------------- */
650 
651 #if defined(FPU_USE_ISO_C99) && (defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE))
652 # ifdef HAVE_LOGL
653 #  define fp_log	logl
654 # endif
655 # ifdef HAVE_LOG1PL
656 #  define fp_log1p	log1pl
657 # endif
658 # ifdef HAVE_EXPM1L
659 #  define fp_expm1	expm1l
660 # endif
661 # ifdef HAVE_LOG10L
662 #  define fp_log10	log10l
663 # endif
664 # ifdef HAVE_LOG2L
665 #  define fp_log2	log2l
666 # endif
667 # ifdef HAVE_EXPL
668 #  define fp_exp	expl
669 # endif
670 # ifdef HAVE_POWL
671 #  define fp_pow	powl
672 # endif
673 # if defined(HAVE_EXP10L)
674 #  define fp_pow10	exp10l
675 # elif defined(HAVE_POW10L)
676 #  define fp_pow10	pow10l
677 # else
678 #  define fp_pow10(x)	fp_pow(LD(10.0), x)
679 # endif
680 # if defined(HAVE_EXP2L)
681 #  define fp_pow2	exp2l
682 # elif defined(HAVE_POW2L)
683 #  define fp_pow2	pow2l
684 # else
685 #  define fp_pow2(x)	fp_pow(LD(2.0), x)
686 # endif
687 # ifdef HAVE_FABSL
688 #  define fp_fabs	fabsl
689 # endif
690 # ifdef HAVE_SQRTL
691 #  define fp_sqrt	sqrtl
692 # endif
693 # ifdef HAVE_SINL
694 #  define fp_sin	sinl
695 # endif
696 # ifdef HAVE_COSL
697 #  define fp_cos	cosl
698 # endif
699 # ifdef HAVE_TANL
700 #  define fp_tan	tanl
701 # endif
702 # ifdef HAVE_SINHL
703 #  define fp_sinh	sinhl
704 # endif
705 # ifdef HAVE_COSHL
706 #  define fp_cosh	coshl
707 # endif
708 # ifdef HAVE_TANHL
709 #  define fp_tanh	tanhl
710 # endif
711 # ifdef HAVE_ASINL
712 #  define fp_asin	asinl
713 # endif
714 # ifdef HAVE_ACOSL
715 #  define fp_acos	acosl
716 # endif
717 # ifdef HAVE_ATANL
718 #  define fp_atan	atanl
719 # endif
720 # ifdef HAVE_ASINHL
721 #  define fp_asinh	asinhl
722 # endif
723 # ifdef HAVE_ACOSHL
724 #  define fp_acosh	acoshl
725 # endif
726 # ifdef HAVE_ATANHL
727 #  define fp_atanh	atanhl
728 # endif
729 # ifdef HAVE_FLOORL
730 #  define fp_floor	floorl
731 # endif
732 # ifdef HAVE_CEILL
733 #  define fp_ceil	ceill
734 # endif
735 #endif
736 
737 #ifndef fp_log
738 # define fp_log		log
739 #endif
740 #ifndef fp_log1p
741 # define fp_log1p	log1p
742 #endif
743 #ifndef fp_expm1
744 # define fp_expm1	expm1
745 #endif
746 #ifndef fp_log10
747 # define fp_log10	log10
748 #endif
749 #ifndef fp_log2
750 # define fp_log2	log2
751 #endif
752 #ifndef fp_exp
753 # define fp_exp		exp
754 #endif
755 #ifndef fp_pow
756 # define fp_pow		pow
757 #endif
758 #ifndef fp_pow10
759 # if !defined(__FreeBSD__)
760 #  define fp_pow10	pow10
761 # else
762 #  define fp_pow10(x)	pow(10,x)
763 # endif
764 #endif
765 #ifndef fp_pow2
766 # ifdef HAVE_POW2
767 #   define fp_pow2	pow2
768 # else
769 #   define fp_pow2	exp2
770 # endif
771 #endif
772 #ifndef fp_fabs
773 # define fp_fabs	fabs
774 #endif
775 #ifndef fp_sqrt
776 # define fp_sqrt	sqrt
777 #endif
778 #ifndef fp_sin
779 # define fp_sin		sin
780 #endif
781 #ifndef fp_cos
782 # define fp_cos		cos
783 #endif
784 #ifndef fp_tan
785 # define fp_tan		tan
786 #endif
787 #ifndef fp_sinh
788 # define fp_sinh	sinh
789 #endif
790 #ifndef fp_cosh
791 # define fp_cosh	cosh
792 #endif
793 #ifndef fp_tanh
794 # define fp_tanh	tanh
795 #endif
796 #ifndef fp_asin
797 # define fp_asin	asin
798 #endif
799 #ifndef fp_acos
800 # define fp_acos	acos
801 #endif
802 #ifndef fp_atan
803 # define fp_atan	atan
804 #endif
805 #ifndef fp_asinh
806 # define fp_asinh	asinh
807 #endif
808 #ifndef fp_acosh
809 # define fp_acosh	acosh
810 #endif
811 #ifndef fp_atanh
812 # define fp_atanh	atanh
813 #endif
814 #ifndef fp_floor
815 # define fp_floor	floor
816 #endif
817 #ifndef fp_ceil
818 # define fp_ceil	ceil
819 #endif
820 
821 #if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
822 // Assembly optimized support functions. Taken from glibc 2.2.2
823 
824 #undef fp_log
825 #define fp_log fp_do_log
826 
fp_do_log(fpu_extended x)827 PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
828 {
829 	fpu_extended value;
830 	__asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
831 	return value;
832 }
833 
834 #undef fp_log10
835 #define fp_log10 fp_do_log10
836 
fp_do_log10(fpu_extended x)837 PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
838 {
839 	fpu_extended value;
840 	__asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
841 	return value;
842 }
843 
844 #if !defined(HAVE_EXPL)
845 #undef fp_exp
846 #define fp_exp fp_do_exp
847 
fp_do_exp(fpu_extended x)848 PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
849 {
850 	fpu_extended value, exponent;
851 	if (isinf(x))
852 	{
853 		if(isneg(x))
854 			return 0.;
855 		else
856 			return x;
857 	}
858 	__asm__ __volatile__("fldl2e                    # e^x = 2^(x * log2(e))\n\t"
859 				 "fmul      %%st(1)         # x * log2(e)\n\t"
860 				 "fst       %%st(1)\n\t"
861 				 "frndint                   # int(x * log2(e))\n\t"
862 				 "fxch\n\t"
863 				 "fsub      %%st(1)         # fract(x * log2(e))\n\t"
864 				 "f2xm1                     # 2^(fract(x * log2(e))) - 1\n\t"
865 				 : "=t" (value), "=u" (exponent) : "0" (x));
866 	value += 1.0;
867 	__asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
868 	return value;
869 }
870 #endif
871 
872 #if !defined(HAVE_EXP10L) && !defined(HAVE_POW10L)
873 #undef fp_pow
874 #define fp_pow fp_do_pow
875 
876 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
877 #endif
878 
879 #undef fp_fabs
880 #define fp_fabs fp_do_fabs
881 
fp_do_fabs(fpu_extended x)882 PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
883 {
884 	fpu_extended value;
885 	__asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
886 	return value;
887 }
888 
889 #undef fp_sqrt
890 #define fp_sqrt fp_do_sqrt
891 
fp_do_sqrt(fpu_extended x)892 PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
893 {
894 	fpu_extended value;
895 	__asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
896 	return value;
897 }
898 
899 #ifndef ACCURATE_SIN_COS_TAN
900 #undef fp_sin
901 #define fp_sin fp_do_sin
902 
fp_do_sin(fpu_extended x)903 PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
904 {
905 	fpu_extended value;
906 	__asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
907 	return value;
908 }
909 
910 #undef fp_cos
911 #define fp_cos fp_do_cos
912 
fp_do_cos(fpu_extended x)913 PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
914 {
915 	fpu_extended value;
916 	__asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
917 	return value;
918 }
919 
920 #undef fp_tan
921 #define fp_tan fp_do_tan
922 
fp_do_tan(fpu_extended x)923 PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
924 {
925 	fpu_extended value, value2;
926 	__asm__ __volatile__("fptan" : "=t" (value2), "=u" (value) : "0" (x));
927 	return value;
928 }
929 #endif /* ACCURATE_SIN_COS_TAN */
930 
931 #ifndef HAVE_EXPM1L
932 #undef fp_expm1
933 #define fp_expm1 fp_do_expm1
934 
935 // Returns: exp(X) - 1.0
fp_do_expm1(fpu_extended x)936 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
937 {
938 	fpu_extended value, exponent, temp, temp2;
939 	if (isinf(x))
940 	{
941 		if(isneg(x))
942 			return -1.;
943 		else
944 			return x;
945 	}
946 	__asm__ __volatile__("fldl2e                    # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
947 				 "fmul      %%st(1)         # x * log2(e)\n\t"
948 				 "fst       %%st(1)\n\t"
949 				 "frndint                   # int(x * log2(e))\n\t"
950 				 "fxch\n\t"
951 				 "fsub      %%st(1)         # fract(x * log2(e))\n\t"
952 				 "f2xm1                     # 2^(fract(x * log2(e))) - 1\n\t"
953 				 "fscale                    # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
954 				 : "=t" (value), "=u" (exponent) : "0" (x));
955 	__asm__ __volatile__("fld1    \n\t"
956                          "fscale  \n\t"
957                          : "=t" (temp), "=u" (temp2) : "0" (exponent));
958 	temp -= 1.0;
959 	return temp + value ? temp + value : x;
960 }
961 #endif
962 
963 #undef fp_sgn1
964 #define fp_sgn1 fp_do_sgn1
965 
fp_do_sgn1(fpu_extended x)966 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
967 {
968 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
969 	fp_declare_init_shape(sxp, extended);
970 	sxp.value = x;
971 	sxp.ieee_nan.exponent	= FP_EXTENDED_EXP_MAX>>1;
972 	sxp.ieee_nan.one		= 1;
973 #else
974 	fp_declare_init_shape(sxp, double);
975 	sxp.value = x;
976 	sxp.ieee_nan.exponent  = FP_DOUBLE_EXP_MAX>>1;
977 #endif
978 	sxp.ieee_nan.quiet_nan	= 0;
979 	sxp.ieee_nan.mantissa0	= 0;
980 	sxp.ieee_nan.mantissa1	= 0;
981 	x = sxp.value;
982 	return x;
983 }
984 
985 #ifndef HAVE_SINHL
986 #undef fp_sinh
987 #define fp_sinh fp_do_sinh
988 
fp_do_sinh(fpu_extended x)989 PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
990 {
991 	if (isinf(x)) return x;
992 	fpu_extended exm1 = fp_expm1(fp_fabs(x));
993 	return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
994 }
995 #endif
996 
997 #ifndef HAVE_COSHL
998 #undef fp_cosh
999 #define fp_cosh fp_do_cosh
1000 
fp_do_cosh(fpu_extended x)1001 PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
1002 {
1003 	fpu_extended ex = fp_exp(x);
1004 	return 0.5 * (ex + 1.0 / ex);
1005 }
1006 #endif
1007 
1008 #ifndef HAVE_TANHL
1009 #undef fp_tanh
1010 #define fp_tanh fp_do_tanh
1011 
fp_do_tanh(fpu_extended x)1012 PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
1013 {
1014 	fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
1015 	return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
1016 }
1017 #endif
1018 
1019 #undef fp_atan2
1020 #define fp_atan2 fp_do_atan2
1021 
fp_do_atan2(fpu_extended y,fpu_extended x)1022 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
1023 {
1024 	fpu_extended value;
1025 	__asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
1026 	return value;
1027 }
1028 
1029 #ifndef HAVE_ASINL
1030 #undef fp_asin
1031 #define fp_asin fp_do_asin
1032 
fp_do_asin(fpu_extended x)1033 PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
1034 {
1035 	return fp_atan2(x, fp_sqrt(1.0 - x * x));
1036 }
1037 #endif
1038 
1039 #ifndef HAVE_ACOSL
1040 #undef fp_acos
1041 #define fp_acos fp_do_acos
1042 
fp_do_acos(fpu_extended x)1043 PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
1044 {
1045 	return fp_atan2(fp_sqrt(1.0 - x * x), x);
1046 }
1047 #endif
1048 
1049 #undef fp_atan
1050 #define fp_atan fp_do_atan
1051 
fp_do_atan(fpu_extended x)1052 PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
1053 {
1054 	fpu_extended value;
1055 	__asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
1056 	return value;
1057 }
1058 
1059 #ifndef HAVE_LOG1PL
1060 #undef fp_log1p
1061 #define fp_log1p fp_do_log1p
1062 
1063 // Returns: ln(1.0 + X)
1064 PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
1065 #endif
1066 
1067 #ifndef HAVE_ASINHL
1068 #undef fp_asinh
1069 #define fp_asinh fp_do_asinh
1070 
fp_do_asinh(fpu_extended x)1071 PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
1072 {
1073 	fpu_extended y = fp_fabs(x);
1074 	return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
1075 }
1076 #endif
1077 
1078 #ifndef HAVE_ACOSHL
1079 #undef fp_acosh
1080 #define fp_acosh fp_do_acosh
1081 
fp_do_acosh(fpu_extended x)1082 PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
1083 {
1084 	return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
1085 }
1086 #endif
1087 
1088 #ifndef HAVE_ATANHL
1089 #undef fp_atanh
1090 #define fp_atanh fp_do_atanh
1091 
fp_do_atanh(fpu_extended x)1092 PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
1093 {
1094 	fpu_extended y = fp_fabs(x);
1095 	return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
1096 }
1097 #endif
1098 
1099 
1100 /*
1101  * LLVM 2.9 crashes on first definition,
1102  * clang with LLVM 3.x crashes on 2nd definition... sigh
1103  */
1104 #if defined(__clang__) || !defined(__llvm__)
1105 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode)						\
1106 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended __x)	\
1107 { \
1108 	register long double __value; \
1109 	register int __ignore; \
1110 	volatile unsigned short __cw; \
1111 	volatile unsigned short __cwtmp; \
1112 	__asm __volatile ("fnstcw %3\n\t" \
1113 					  "movzwl %3, %1\n\t" \
1114 					  "andl $0xf3ff, %1\n\t" \
1115 					  "orl %5, %1\n\t" \
1116 					  "movw %w1, %2\n\t" \
1117 					  "fldcw %2\n\t" \
1118 					  "frndint\n\t" \
1119 					  "fldcw %3" \
1120 					  : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \
1121 					  "=m" (__cw) \
1122 					  : "0" (__x), "i"(rounding_mode)); \
1123 	return __value; \
1124 }
1125 #else
1126 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode)						\
1127 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x)	\
1128 {																				\
1129 	volatile unsigned short cw;													\
1130 	__asm__ __volatile__("fnstcw %0" : "=m" (cw));										\
1131 	volatile unsigned short cw_temp = (cw & 0xf3ff) | (rounding_mode);			\
1132 	__asm__ __volatile__("fldcw %0" : : "m" (cw_temp));									\
1133 	fpu_extended value;															\
1134 	__asm__ __volatile__("frndint" : "=t" (value) : "0" (x));							\
1135 	__asm__ __volatile__("fldcw %0" : : "m" (cw));										\
1136 	return value;																\
1137 }
1138 #endif
1139 
1140 #undef fp_round_to_minus_infinity
1141 #ifdef HAVE_FLOORL
1142 #define fp_round_to_minus_infinity floorl
1143 #else
1144 #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1145 DEFINE_ROUND_FUNC(minus_infinity, CW_RC_DOWN)
1146 #endif
1147 
1148 #undef fp_round_to_plus_infinity
1149 #ifdef HAVE_CEILL
1150 #define fp_round_to_plus_infinity ceill
1151 #else
1152 #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1153 DEFINE_ROUND_FUNC(plus_infinity, CW_RC_UP)
1154 #endif
1155 
1156 #undef fp_round_to_zero
1157 #ifdef HAVE_TRUNCL
1158 #define fp_round_to_zero truncl
1159 #else
1160 #define fp_round_to_zero fp_do_round_to_zero
1161 DEFINE_ROUND_FUNC(zero, CW_RC_ZERO)
1162 #endif
1163 
1164 #undef fp_round_to_nearest
1165 #ifdef HAVE_ROUNDL
1166 #define fp_round_to_nearest roundl
1167 #else
1168 #define fp_round_to_nearest fp_do_round_to_nearest
1169 DEFINE_ROUND_FUNC(nearest, CW_RC_NEAR)
1170 #endif
1171 
1172 #undef fp_ceil
1173 #define fp_ceil fp_do_round_to_plus_infinity
1174 
1175 #undef fp_floor
1176 #define fp_floor fp_do_round_to_minus_infinity
1177 
1178 
1179 #endif /* USE_X87_ASSEMBLY */
1180 
1181 #ifndef fp_round_to_minus_infinity
1182 #define fp_round_to_minus_infinity(x) fp_floor(x)
1183 #endif
1184 
1185 #ifndef fp_round_to_plus_infinity
1186 #define fp_round_to_plus_infinity(x) fp_ceil(x)
1187 #endif
1188 
1189 #ifndef fp_round_to_zero
1190 #define fp_round_to_zero(x) ((int)(x))
1191 #endif
1192 
1193 #ifndef fp_round_to_nearest
1194 #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1195 #endif
1196 
1197 #endif /* FPU_MATHLIB_H */
1198