1 /*
2 * fpu/mathlib.h - Floating-point math support library
3 *
4 * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS)
5 *
6 * Inspired by Christian Bauer's Basilisk II
7 *
8 * This file is part of the ARAnyM project which builds a new and powerful
9 * TOS/FreeMiNT compatible virtual machine running on almost any hardware.
10 *
11 * MC68881/68040 fpu emulation
12 *
13 * Original UAE FPU, copyright 1996 Herman ten Brugge
14 * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
15 * New framework, copyright 2000-2001 Gwenole Beauchesne
16 * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
17 *
18 * ARAnyM is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * ARAnyM is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with ARAnyM; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 */
32
33 #ifndef FPU_MATHLIB_H
34 #define FPU_MATHLIB_H
35
36 /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
37 #undef PUBLIC
38 #define PUBLIC extern
39
40 #undef PRIVATE
41 #define PRIVATE static
42
43 #undef FFPU
44 #define FFPU /**/
45
46 #undef FPU
47 #define FPU fpu.
48
49 // Define the following macro if branches are expensive. If so,
50 // integer-based isnan() and isinf() functions are implemented.
51 // TODO: move to Makefile.in
52 #define BRANCHES_ARE_EXPENSIVE 1
53
54 // Use ISO C99 extended-precision math functions (glibc 2.1+)
55 #define FPU_USE_ISO_C99 1
56
57 #if defined(FPU_USE_ISO_C99)
58 // NOTE: no prior <math.h> shall be included at this point
59 #define __USE_ISOC99 1 // for glibc 2.2.X and newer
60 #define __USE_ISOC9X 1 // for glibc 2.1.X
61 #include <math.h>
62 #else
63 #include <cmath>
64 using namespace std;
65 #endif
66
67 /* -------------------------------------------------------------------------- */
68 /* --- Floating-point register types --- */
69 /* -------------------------------------------------------------------------- */
70
71 // Single : S 8*E 23*F
72 #define FP_SINGLE_EXP_MAX 0xff
73 #define FP_SINGLE_EXP_BIAS 0x7f
74
75 // Double : S 11*E 52*F
76 #define FP_DOUBLE_EXP_MAX 0x7ff
77 #define FP_DOUBLE_EXP_BIAS 0x3ff
78
79 // Extended : S 15*E 64*F
80 #define FP_EXTENDED_EXP_MAX 0x7fff
81 #define FP_EXTENDED_EXP_BIAS 0x3fff
82
83 // Zeroes : E = 0 & F = 0
84 // Infinities : E = MAX & F = 0
85 // Not-A-Number : E = MAX & F # 0
86
87 /* -------------------------------------------------------------------------- */
88 /* --- Floating-point type shapes (IEEE-compliant) --- */
89 /* -------------------------------------------------------------------------- */
90
91 // Taken from glibc 2.2.x: ieee754.h
92
93 // IEEE-754 float format
94 union fpu_single_shape {
95
96 fpu_single value;
97
98 /* This is the IEEE 754 single-precision format. */
99 struct {
100 #ifdef WORDS_BIGENDIAN
101 unsigned int negative:1;
102 unsigned int exponent:8;
103 unsigned int mantissa:23;
104 #else
105 unsigned int mantissa:23;
106 unsigned int exponent:8;
107 unsigned int negative:1;
108 #endif
109 } ieee;
110
111 /* This format makes it easier to see if a NaN is a signalling NaN. */
112 struct {
113 #ifdef WORDS_BIGENDIAN
114 unsigned int negative:1;
115 unsigned int exponent:8;
116 unsigned int quiet_nan:1;
117 unsigned int mantissa:22;
118 #else
119 unsigned int mantissa:22;
120 unsigned int quiet_nan:1;
121 unsigned int exponent:8;
122 unsigned int negative:1;
123 #endif
124 } ieee_nan;
125 };
126
127 // IEEE-754 double format
128 union fpu_double_shape {
129 fpu_double value;
130
131 /* This is the IEEE 754 double-precision format. */
132 struct {
133 #ifdef WORDS_BIGENDIAN
134 unsigned int negative:1;
135 unsigned int exponent:11;
136 /* Together these comprise the mantissa. */
137 unsigned int mantissa0:20;
138 unsigned int mantissa1:32;
139 #else
140 # if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
141 unsigned int mantissa0:20;
142 unsigned int exponent:11;
143 unsigned int negative:1;
144 unsigned int mantissa1:32;
145 # else
146 /* Together these comprise the mantissa. */
147 unsigned int mantissa1:32;
148 unsigned int mantissa0:20;
149 unsigned int exponent:11;
150 unsigned int negative:1;
151 # endif
152 #endif
153 } ieee;
154
155 /* This format makes it easier to see if a NaN is a signalling NaN. */
156 struct {
157 #ifdef WORDS_BIGENDIAN
158 unsigned int negative:1;
159 unsigned int exponent:11;
160 unsigned int quiet_nan:1;
161 /* Together these comprise the mantissa. */
162 unsigned int mantissa0:19;
163 unsigned int mantissa1:32;
164 #else
165 # if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
166 unsigned int mantissa0:19;
167 unsigned int quiet_nan:1;
168 unsigned int exponent:11;
169 unsigned int negative:1;
170 unsigned int mantissa1:32;
171 # else
172 /* Together these comprise the mantissa. */
173 unsigned int mantissa1:32;
174 unsigned int mantissa0:19;
175 unsigned int quiet_nan:1;
176 unsigned int exponent:11;
177 unsigned int negative:1;
178 # endif
179 #endif
180 } ieee_nan;
181
182 /* This format is used to extract the sign_exponent and mantissa parts only */
183 struct {
184 #if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
185 unsigned int msw:32;
186 unsigned int lsw:32;
187 #else
188 unsigned int lsw:32;
189 unsigned int msw:32;
190 #endif
191 } parts;
192 };
193
194 #ifdef USE_LONG_DOUBLE
195 // IEEE-854 long double format
196 union fpu_extended_shape {
197 fpu_extended value;
198
199 /* This is the IEEE 854 double-extended-precision format. */
200 struct {
201 #ifdef WORDS_BIGENDIAN
202 unsigned int negative:1;
203 unsigned int exponent:15;
204 unsigned int empty:16;
205 unsigned int mantissa0:32;
206 unsigned int mantissa1:32;
207 #else
208 # if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
209 unsigned int exponent:15;
210 unsigned int negative:1;
211 unsigned int empty:16;
212 unsigned int mantissa0:32;
213 unsigned int mantissa1:32;
214 # else
215 unsigned int mantissa1:32;
216 unsigned int mantissa0:32;
217 unsigned int exponent:15;
218 unsigned int negative:1;
219 unsigned int empty:16;
220 # endif
221 #endif
222 } ieee;
223
224 /* This is for NaNs in the IEEE 854 double-extended-precision format. */
225 struct {
226 #ifdef WORDS_BIGENDIAN
227 unsigned int negative:1;
228 unsigned int exponent:15;
229 unsigned int empty:16;
230 unsigned int one:1;
231 unsigned int quiet_nan:1;
232 unsigned int mantissa0:30;
233 unsigned int mantissa1:32;
234 #else
235 # if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
236 unsigned int exponent:15;
237 unsigned int negative:1;
238 unsigned int empty:16;
239 unsigned int mantissa0:30;
240 unsigned int quiet_nan:1;
241 unsigned int one:1;
242 unsigned int mantissa1:32;
243 # else
244 unsigned int mantissa1:32;
245 unsigned int mantissa0:30;
246 unsigned int quiet_nan:1;
247 unsigned int one:1;
248 unsigned int exponent:15;
249 unsigned int negative:1;
250 unsigned int empty:16;
251 # endif
252 #endif
253 } ieee_nan;
254
255 /* This format is used to extract the sign_exponent and mantissa parts only */
256 struct {
257 #if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
258 unsigned int sign_exponent:16;
259 unsigned int empty:16;
260 unsigned int msw:32;
261 unsigned int lsw:32;
262 #else
263 unsigned int lsw:32;
264 unsigned int msw:32;
265 unsigned int sign_exponent:16;
266 unsigned int empty:16;
267 #endif
268 } parts;
269 };
270 #endif
271
272 #ifdef USE_QUAD_DOUBLE
273 // IEEE-854 quad double format
274 union fpu_extended_shape {
275 fpu_extended value;
276
277 /* This is the IEEE 854 quad-precision format. */
278 struct {
279 #ifdef WORDS_BIGENDIAN
280 unsigned int negative:1;
281 unsigned int exponent:15;
282 unsigned int mantissa0:16;
283 unsigned int mantissa1:32;
284 unsigned int mantissa2:32;
285 unsigned int mantissa3:32;
286 #else
287 unsigned int mantissa3:32;
288 unsigned int mantissa2:32;
289 unsigned int mantissa1:32;
290 unsigned int mantissa0:16;
291 unsigned int exponent:15;
292 unsigned int negative:1;
293 #endif
294 } ieee;
295
296 /* This is for NaNs in the IEEE 854 quad-precision format. */
297 struct {
298 #ifdef WORDS_BIGENDIAN
299 unsigned int negative:1;
300 unsigned int exponent:15;
301 unsigned int quiet_nan:1;
302 unsigned int mantissa0:15;
303 unsigned int mantissa1:32;
304 unsigned int mantissa2:32;
305 unsigned int mantissa3:32;
306 #else
307 unsigned int mantissa3:32;
308 unsigned int mantissa2:32;
309 unsigned int mantissa1:32;
310 unsigned int mantissa0:15;
311 unsigned int quiet_nan:1;
312 unsigned int exponent:15;
313 unsigned int negative:1;
314 #endif
315 } ieee_nan;
316
317 /* This format is used to extract the sign_exponent and mantissa parts only */
318 #if defined(HOST_FLOAT_WORDS_BIG_ENDIAN) && HOST_FLOAT_WORDS_BIG_ENDIAN
319 struct {
320 uae_u64 msw;
321 uae_u64 lsw;
322 } parts64;
323 struct {
324 uae_u32 w0;
325 uae_u32 w1;
326 uae_u32 w2;
327 uae_u32 w3;
328 } parts32;
329 #else
330 struct {
331 uae_u64 lsw;
332 uae_u64 msw;
333 } parts64;
334 struct {
335 uae_u32 w3;
336 uae_u32 w2;
337 uae_u32 w1;
338 uae_u32 w0;
339 } parts32;
340 #endif
341 };
342 #endif
343
344 // Declare a shape of the requested FP type
345 #define fp_declare_init_shape(psvar, ftype) \
346 fpu_ ## ftype ## _shape psvar
347
348 /* -------------------------------------------------------------------------- */
349 /* --- Extra Math Functions --- */
350 /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
351 /* -------------------------------------------------------------------------- */
352
353 #undef isnan
354 #if 0 && defined(HAVE_ISNANL)
355 # define isnan(x) isnanl((x))
356 #else
357 # define isnan(x) fp_do_isnan((x))
358 #endif
359
fp_do_isnan(fpu_register const & r)360 PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
361 {
362 #ifdef BRANCHES_ARE_EXPENSIVE
363 #if !defined(USE_LONG_DOUBLE)
364 fp_declare_init_shape(sxp, double);
365 sxp.value = r;
366 uae_s32 hx = sxp.parts.msw;
367 uae_s32 lx = sxp.parts.lsw;
368 hx &= 0x7fffffff;
369 hx |= (uae_u32)(lx | (-lx)) >> 31;
370 hx = 0x7ff00000 - hx;
371 return (int)(((uae_u32)hx) >> 31);
372 #elif defined(USE_QUAD_DOUBLE)
373 fp_declare_init_shape(sxp, extended);
374 sxp.value = r;
375 uae_s64 hx = sxp.parts64.msw;
376 uae_s64 lx = sxp.parts64.lsw;
377 hx &= 0x7fffffffffffffffLL;
378 hx |= (uae_u64)(lx | (-lx)) >> 63;
379 hx = 0x7fff000000000000LL - hx;
380 return (int)((uae_u64)hx >> 63);
381 #else
382 fp_declare_init_shape(sxp, extended);
383 sxp.value = r;
384 uae_s32 se = sxp.parts.sign_exponent;
385 uae_s32 hx = sxp.parts.msw;
386 uae_s32 lx = sxp.parts.lsw;
387 se = (se & 0x7fff) << 1;
388 lx |= hx & 0x7fffffff;
389 se |= (uae_u32)(lx | (-lx)) >> 31;
390 se = 0xfffe - se;
391 return (int)(((uae_u32)(se)) >> 31);
392 #endif
393 #else
394 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
395 fp_declare_init_shape(sxp, extended);
396 sxp.value = r;
397 return (sxp.ieee.exponent == FP_EXTENDED_EXP_MAX)
398 #else
399 fp_declare_init_shape(sxp, double);
400 sxp.value = r;
401 return (sxp.ieee.exponent == FP_DOUBLE_EXP_MAX)
402 #endif
403 && (sxp.ieee.mantissa0 & 0x7fffffff) != 0
404 && sxp.ieee.mantissa1 != 0
405 #ifdef USE_QUAD_DOUBLE
406 && sxp.ieee.mantissa2 != 0
407 && sxp.ieee.mantissa3 != 0
408 #endif
409 ;
410 #endif
411 }
412
413 #undef isinf
414 #if 0 && defined(HAVE_ISINFL)
415 # define isinf(x) isinfl((x))
416 #else
417 # define isinf(x) fp_do_isinf((x))
418 #endif
419
fp_do_isinf(fpu_register const & r)420 PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
421 {
422 #ifdef BRANCHES_ARE_EXPENSIVE
423 #if !defined(USE_LONG_DOUBLE)
424 fp_declare_init_shape(sxp, double);
425 sxp.value = r;
426 uae_s32 hx = sxp.parts.msw;
427 uae_s32 lx = sxp.parts.lsw;
428 lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
429 lx |= -lx;
430 return ~(lx >> 31) & (hx >> 30);
431 #elif defined(USE_QUAD_DOUBLE)
432 fp_declare_init_shape(sxp, extended);
433 sxp.value = r;
434 uae_s64 hx = sxp.parts64.msw;
435 uae_s64 lx = sxp.parts64.lsw;
436 lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
437 lx |= -lx;
438 return ~(lx >> 63) & (hx >> 62);
439 #else
440 fp_declare_init_shape(sxp, extended);
441 sxp.value = r;
442 /* NOTE: This function should work for both m68k and native INFs. */
443 #if 0
444 uae_s32 se = sxp.parts.sign_exponent;
445 uae_s32 hx = sxp.parts.msw;
446 uae_s32 lx = sxp.parts.lsw;
447 /* This additional ^ 0x80000000 is necessary because in Intel's
448 internal representation of the implicit one is explicit.
449 NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
450 #ifdef CPU_i386
451 lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
452 #else
453 lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
454 #endif
455 lx |= -lx;
456 se &= 0x8000;
457 return ~(lx >> 31) & (1 - (se >> 14));
458 #else
459 return sxp.ieee.exponent == FP_EXTENDED_EXP_MAX
460 && (sxp.ieee.mantissa0 & 0x7fffffff) == 0
461 && sxp.ieee.mantissa1 == 0;
462 #endif
463 #endif
464 #else
465 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
466 fp_declare_init_shape(sxp, extended);
467 sxp.value = r;
468 return (sxp.ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
469 #else
470 fp_declare_init_shape(sxp, double);
471 sxp.value = r;
472 return (sxp.ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
473 #endif
474 && (sxp.ieee.mantissa0 & 0x7fffffff) == 0
475 && sxp.ieee.mantissa1 == 0
476 #ifdef USE_QUAD_DOUBLE
477 && sxp.ieee.mantissa2 == 0
478 && sxp.ieee.mantissa3 == 0
479 #endif
480 ;
481 #endif
482 }
483
484 #undef isneg
485 #define isneg(x) fp_do_isneg((x))
486
fp_do_isneg(fpu_register const & r)487 PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
488 {
489 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
490 fp_declare_init_shape(sxp, extended);
491 #else
492 fp_declare_init_shape(sxp, double);
493 #endif
494 sxp.value = r;
495 return sxp.ieee.negative;
496 }
497
498 #undef iszero
499 #define iszero(x) fp_do_iszero((x))
500
fp_do_iszero(fpu_register const & r)501 PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
502 {
503 // TODO: BRANCHES_ARE_EXPENSIVE
504 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
505 fp_declare_init_shape(sxp, extended);
506 #else
507 fp_declare_init_shape(sxp, double);
508 #endif
509 sxp.value = r;
510 return (sxp.ieee.exponent == 0)
511 && (sxp.ieee.mantissa0 == 0)
512 && (sxp.ieee.mantissa1 == 0)
513 #ifdef USE_QUAD_DOUBLE
514 && (sxp.ieee.mantissa2 == 0)
515 && (sxp.ieee.mantissa3 == 0)
516 #endif
517 ;
518 }
519
get_dest_flags(fpu_register const & r)520 PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
521 {
522 fl_dest.negative = isneg(r);
523 fl_dest.zero = iszero(r);
524 fl_dest.infinity = isinf(r);
525 fl_dest.nan = isnan(r);
526 fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
527 }
528
get_source_flags(fpu_register const & r)529 PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
530 {
531 fl_source.negative = isneg(r);
532 fl_source.zero = iszero(r);
533 fl_source.infinity = isinf(r);
534 fl_source.nan = isnan(r);
535 fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
536 }
537
make_nan(fpu_register & r,bool negative)538 PRIVATE inline void FFPU make_nan(fpu_register & r, bool negative)
539 {
540 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
541 fp_declare_init_shape(sxp, extended);
542 sxp.ieee.exponent = FP_EXTENDED_EXP_MAX;
543 sxp.ieee.empty = 0;
544 sxp.ieee.mantissa0 = 0xffffffff;
545 #else
546 fp_declare_init_shape(sxp, double);
547 sxp.ieee.exponent = FP_DOUBLE_EXP_MAX;
548 sxp.ieee.mantissa0 = 0xfffff;
549 #endif
550 sxp.ieee.mantissa1 = 0xffffffff;
551 #ifdef USE_QUAD_DOUBLE
552 sxp.ieee.mantissa2 = 0xffffffff;
553 sxp.ieee.mantissa3 = 0xffffffff;
554 #endif
555 sxp.ieee.negative = negative;
556 r = sxp.value;
557 }
558
make_zero(fpu_register & r,bool negative)559 PRIVATE inline void FFPU make_zero(fpu_register & r, bool negative)
560 {
561 #if 1
562 r = negative ? -0.0 : +0.0;
563 #else
564 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
565 fp_declare_init_shape(sxp, extended);
566 sxp.ieee.empty = 0;
567 #else
568 fp_declare_init_shape(sxp, double);
569 #endif
570 sxp.ieee.negative = negative;
571 sxp.ieee.exponent = 0;
572 sxp.ieee.mantissa0 = 0;
573 sxp.ieee.mantissa1 = 0;
574 #ifdef USE_QUAD_DOUBLE
575 sxp.ieee.mantissa2 = 0;
576 sxp.ieee.mantissa3 = 0;
577 #endif
578 r = sxp.value;
579 #endif
580 }
581
make_inf(fpu_register & r,bool negative)582 PRIVATE inline void FFPU make_inf(fpu_register & r, bool negative)
583 {
584 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
585 fp_declare_init_shape(sxp, extended);
586 sxp.ieee.exponent = FP_EXTENDED_EXP_MAX;
587 sxp.ieee.mantissa0 = 0x80000000;
588 sxp.ieee.empty = 0;
589 #else
590 fp_declare_init_shape(sxp, double);
591 sxp.ieee.exponent = FP_DOUBLE_EXP_MAX;
592 sxp.ieee.mantissa0 = 0;
593 #endif
594 sxp.ieee.negative = negative;
595 sxp.ieee.mantissa1 = 0;
596 #ifdef USE_QUAD_DOUBLE
597 sxp.ieee.mantissa2 = 0;
598 sxp.ieee.mantissa3 = 0;
599 #endif
600 r = sxp.value;
601 }
602
fast_fgetexp(fpu_register const & r)603 PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
604 {
605 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
606 fp_declare_init_shape(sxp, extended);
607 sxp.value = r;
608 return ((int) sxp.ieee.exponent - FP_EXTENDED_EXP_BIAS);
609 #else
610 fp_declare_init_shape(sxp, double);
611 sxp.value = r;
612 return ((int) sxp.ieee.exponent - FP_DOUBLE_EXP_BIAS);
613 #endif
614 }
615
616 // Normalize to range 1..2
fast_remove_exponent(fpu_register & r)617 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
618 {
619 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
620 fp_declare_init_shape(sxp, extended);
621 sxp.value = r;
622 sxp.ieee.exponent = FP_EXTENDED_EXP_BIAS;
623 #else
624 fp_declare_init_shape(sxp, double);
625 sxp.value = r;
626 sxp.ieee.exponent = FP_DOUBLE_EXP_BIAS;
627 #endif
628 r = sxp.value;
629 }
630
631 // The sign of the quotient is the exclusive-OR of the sign bits
632 // of the source and destination operands.
get_quotient_sign(fpu_register const & ra,fpu_register const & rb)633 PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
634 {
635 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
636 fp_declare_init_shape(sap, extended);
637 fp_declare_init_shape(sbp, extended);
638 #else
639 fp_declare_init_shape(sap, double);
640 fp_declare_init_shape(sbp, double);
641 #endif
642 sap.value = ra;
643 sbp.value = rb;
644 return ((sap.ieee.negative ^ sbp.ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
645 }
646
647 /* -------------------------------------------------------------------------- */
648 /* --- Math functions --- */
649 /* -------------------------------------------------------------------------- */
650
651 #if defined(FPU_USE_ISO_C99) && (defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE))
652 # ifdef HAVE_LOGL
653 # define fp_log logl
654 # endif
655 # ifdef HAVE_LOG1PL
656 # define fp_log1p log1pl
657 # endif
658 # ifdef HAVE_EXPM1L
659 # define fp_expm1 expm1l
660 # endif
661 # ifdef HAVE_LOG10L
662 # define fp_log10 log10l
663 # endif
664 # ifdef HAVE_LOG2L
665 # define fp_log2 log2l
666 # endif
667 # ifdef HAVE_EXPL
668 # define fp_exp expl
669 # endif
670 # ifdef HAVE_POWL
671 # define fp_pow powl
672 # endif
673 # if defined(HAVE_EXP10L)
674 # define fp_pow10 exp10l
675 # elif defined(HAVE_POW10L)
676 # define fp_pow10 pow10l
677 # else
678 # define fp_pow10(x) fp_pow(LD(10.0), x)
679 # endif
680 # if defined(HAVE_EXP2L)
681 # define fp_pow2 exp2l
682 # elif defined(HAVE_POW2L)
683 # define fp_pow2 pow2l
684 # else
685 # define fp_pow2(x) fp_pow(LD(2.0), x)
686 # endif
687 # ifdef HAVE_FABSL
688 # define fp_fabs fabsl
689 # endif
690 # ifdef HAVE_SQRTL
691 # define fp_sqrt sqrtl
692 # endif
693 # ifdef HAVE_SINL
694 # define fp_sin sinl
695 # endif
696 # ifdef HAVE_COSL
697 # define fp_cos cosl
698 # endif
699 # ifdef HAVE_TANL
700 # define fp_tan tanl
701 # endif
702 # ifdef HAVE_SINHL
703 # define fp_sinh sinhl
704 # endif
705 # ifdef HAVE_COSHL
706 # define fp_cosh coshl
707 # endif
708 # ifdef HAVE_TANHL
709 # define fp_tanh tanhl
710 # endif
711 # ifdef HAVE_ASINL
712 # define fp_asin asinl
713 # endif
714 # ifdef HAVE_ACOSL
715 # define fp_acos acosl
716 # endif
717 # ifdef HAVE_ATANL
718 # define fp_atan atanl
719 # endif
720 # ifdef HAVE_ASINHL
721 # define fp_asinh asinhl
722 # endif
723 # ifdef HAVE_ACOSHL
724 # define fp_acosh acoshl
725 # endif
726 # ifdef HAVE_ATANHL
727 # define fp_atanh atanhl
728 # endif
729 # ifdef HAVE_FLOORL
730 # define fp_floor floorl
731 # endif
732 # ifdef HAVE_CEILL
733 # define fp_ceil ceill
734 # endif
735 #endif
736
737 #ifndef fp_log
738 # define fp_log log
739 #endif
740 #ifndef fp_log1p
741 # define fp_log1p log1p
742 #endif
743 #ifndef fp_expm1
744 # define fp_expm1 expm1
745 #endif
746 #ifndef fp_log10
747 # define fp_log10 log10
748 #endif
749 #ifndef fp_log2
750 # define fp_log2 log2
751 #endif
752 #ifndef fp_exp
753 # define fp_exp exp
754 #endif
755 #ifndef fp_pow
756 # define fp_pow pow
757 #endif
758 #ifndef fp_pow10
759 # if !defined(__FreeBSD__)
760 # define fp_pow10 pow10
761 # else
762 # define fp_pow10(x) pow(10,x)
763 # endif
764 #endif
765 #ifndef fp_pow2
766 # ifdef HAVE_POW2
767 # define fp_pow2 pow2
768 # else
769 # define fp_pow2 exp2
770 # endif
771 #endif
772 #ifndef fp_fabs
773 # define fp_fabs fabs
774 #endif
775 #ifndef fp_sqrt
776 # define fp_sqrt sqrt
777 #endif
778 #ifndef fp_sin
779 # define fp_sin sin
780 #endif
781 #ifndef fp_cos
782 # define fp_cos cos
783 #endif
784 #ifndef fp_tan
785 # define fp_tan tan
786 #endif
787 #ifndef fp_sinh
788 # define fp_sinh sinh
789 #endif
790 #ifndef fp_cosh
791 # define fp_cosh cosh
792 #endif
793 #ifndef fp_tanh
794 # define fp_tanh tanh
795 #endif
796 #ifndef fp_asin
797 # define fp_asin asin
798 #endif
799 #ifndef fp_acos
800 # define fp_acos acos
801 #endif
802 #ifndef fp_atan
803 # define fp_atan atan
804 #endif
805 #ifndef fp_asinh
806 # define fp_asinh asinh
807 #endif
808 #ifndef fp_acosh
809 # define fp_acosh acosh
810 #endif
811 #ifndef fp_atanh
812 # define fp_atanh atanh
813 #endif
814 #ifndef fp_floor
815 # define fp_floor floor
816 #endif
817 #ifndef fp_ceil
818 # define fp_ceil ceil
819 #endif
820
821 #if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
822 // Assembly optimized support functions. Taken from glibc 2.2.2
823
824 #undef fp_log
825 #define fp_log fp_do_log
826
fp_do_log(fpu_extended x)827 PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
828 {
829 fpu_extended value;
830 __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
831 return value;
832 }
833
834 #undef fp_log10
835 #define fp_log10 fp_do_log10
836
fp_do_log10(fpu_extended x)837 PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
838 {
839 fpu_extended value;
840 __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
841 return value;
842 }
843
844 #if !defined(HAVE_EXPL)
845 #undef fp_exp
846 #define fp_exp fp_do_exp
847
fp_do_exp(fpu_extended x)848 PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
849 {
850 fpu_extended value, exponent;
851 if (isinf(x))
852 {
853 if(isneg(x))
854 return 0.;
855 else
856 return x;
857 }
858 __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
859 "fmul %%st(1) # x * log2(e)\n\t"
860 "fst %%st(1)\n\t"
861 "frndint # int(x * log2(e))\n\t"
862 "fxch\n\t"
863 "fsub %%st(1) # fract(x * log2(e))\n\t"
864 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
865 : "=t" (value), "=u" (exponent) : "0" (x));
866 value += 1.0;
867 __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
868 return value;
869 }
870 #endif
871
872 #if !defined(HAVE_EXP10L) && !defined(HAVE_POW10L)
873 #undef fp_pow
874 #define fp_pow fp_do_pow
875
876 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
877 #endif
878
879 #undef fp_fabs
880 #define fp_fabs fp_do_fabs
881
fp_do_fabs(fpu_extended x)882 PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
883 {
884 fpu_extended value;
885 __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
886 return value;
887 }
888
889 #undef fp_sqrt
890 #define fp_sqrt fp_do_sqrt
891
fp_do_sqrt(fpu_extended x)892 PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
893 {
894 fpu_extended value;
895 __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
896 return value;
897 }
898
899 #ifndef ACCURATE_SIN_COS_TAN
900 #undef fp_sin
901 #define fp_sin fp_do_sin
902
fp_do_sin(fpu_extended x)903 PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
904 {
905 fpu_extended value;
906 __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
907 return value;
908 }
909
910 #undef fp_cos
911 #define fp_cos fp_do_cos
912
fp_do_cos(fpu_extended x)913 PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
914 {
915 fpu_extended value;
916 __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
917 return value;
918 }
919
920 #undef fp_tan
921 #define fp_tan fp_do_tan
922
fp_do_tan(fpu_extended x)923 PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
924 {
925 fpu_extended value, value2;
926 __asm__ __volatile__("fptan" : "=t" (value2), "=u" (value) : "0" (x));
927 return value;
928 }
929 #endif /* ACCURATE_SIN_COS_TAN */
930
931 #ifndef HAVE_EXPM1L
932 #undef fp_expm1
933 #define fp_expm1 fp_do_expm1
934
935 // Returns: exp(X) - 1.0
fp_do_expm1(fpu_extended x)936 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
937 {
938 fpu_extended value, exponent, temp, temp2;
939 if (isinf(x))
940 {
941 if(isneg(x))
942 return -1.;
943 else
944 return x;
945 }
946 __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
947 "fmul %%st(1) # x * log2(e)\n\t"
948 "fst %%st(1)\n\t"
949 "frndint # int(x * log2(e))\n\t"
950 "fxch\n\t"
951 "fsub %%st(1) # fract(x * log2(e))\n\t"
952 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
953 "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
954 : "=t" (value), "=u" (exponent) : "0" (x));
955 __asm__ __volatile__("fld1 \n\t"
956 "fscale \n\t"
957 : "=t" (temp), "=u" (temp2) : "0" (exponent));
958 temp -= 1.0;
959 return temp + value ? temp + value : x;
960 }
961 #endif
962
963 #undef fp_sgn1
964 #define fp_sgn1 fp_do_sgn1
965
fp_do_sgn1(fpu_extended x)966 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
967 {
968 #if defined(USE_LONG_DOUBLE) || defined(USE_QUAD_DOUBLE)
969 fp_declare_init_shape(sxp, extended);
970 sxp.value = x;
971 sxp.ieee_nan.exponent = FP_EXTENDED_EXP_MAX>>1;
972 sxp.ieee_nan.one = 1;
973 #else
974 fp_declare_init_shape(sxp, double);
975 sxp.value = x;
976 sxp.ieee_nan.exponent = FP_DOUBLE_EXP_MAX>>1;
977 #endif
978 sxp.ieee_nan.quiet_nan = 0;
979 sxp.ieee_nan.mantissa0 = 0;
980 sxp.ieee_nan.mantissa1 = 0;
981 x = sxp.value;
982 return x;
983 }
984
985 #ifndef HAVE_SINHL
986 #undef fp_sinh
987 #define fp_sinh fp_do_sinh
988
fp_do_sinh(fpu_extended x)989 PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
990 {
991 if (isinf(x)) return x;
992 fpu_extended exm1 = fp_expm1(fp_fabs(x));
993 return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
994 }
995 #endif
996
997 #ifndef HAVE_COSHL
998 #undef fp_cosh
999 #define fp_cosh fp_do_cosh
1000
fp_do_cosh(fpu_extended x)1001 PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
1002 {
1003 fpu_extended ex = fp_exp(x);
1004 return 0.5 * (ex + 1.0 / ex);
1005 }
1006 #endif
1007
1008 #ifndef HAVE_TANHL
1009 #undef fp_tanh
1010 #define fp_tanh fp_do_tanh
1011
fp_do_tanh(fpu_extended x)1012 PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
1013 {
1014 fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
1015 return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
1016 }
1017 #endif
1018
1019 #undef fp_atan2
1020 #define fp_atan2 fp_do_atan2
1021
fp_do_atan2(fpu_extended y,fpu_extended x)1022 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
1023 {
1024 fpu_extended value;
1025 __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
1026 return value;
1027 }
1028
1029 #ifndef HAVE_ASINL
1030 #undef fp_asin
1031 #define fp_asin fp_do_asin
1032
fp_do_asin(fpu_extended x)1033 PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
1034 {
1035 return fp_atan2(x, fp_sqrt(1.0 - x * x));
1036 }
1037 #endif
1038
1039 #ifndef HAVE_ACOSL
1040 #undef fp_acos
1041 #define fp_acos fp_do_acos
1042
fp_do_acos(fpu_extended x)1043 PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
1044 {
1045 return fp_atan2(fp_sqrt(1.0 - x * x), x);
1046 }
1047 #endif
1048
1049 #undef fp_atan
1050 #define fp_atan fp_do_atan
1051
fp_do_atan(fpu_extended x)1052 PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
1053 {
1054 fpu_extended value;
1055 __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
1056 return value;
1057 }
1058
1059 #ifndef HAVE_LOG1PL
1060 #undef fp_log1p
1061 #define fp_log1p fp_do_log1p
1062
1063 // Returns: ln(1.0 + X)
1064 PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
1065 #endif
1066
1067 #ifndef HAVE_ASINHL
1068 #undef fp_asinh
1069 #define fp_asinh fp_do_asinh
1070
fp_do_asinh(fpu_extended x)1071 PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
1072 {
1073 fpu_extended y = fp_fabs(x);
1074 return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
1075 }
1076 #endif
1077
1078 #ifndef HAVE_ACOSHL
1079 #undef fp_acosh
1080 #define fp_acosh fp_do_acosh
1081
fp_do_acosh(fpu_extended x)1082 PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
1083 {
1084 return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
1085 }
1086 #endif
1087
1088 #ifndef HAVE_ATANHL
1089 #undef fp_atanh
1090 #define fp_atanh fp_do_atanh
1091
fp_do_atanh(fpu_extended x)1092 PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
1093 {
1094 fpu_extended y = fp_fabs(x);
1095 return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
1096 }
1097 #endif
1098
1099
1100 /*
1101 * LLVM 2.9 crashes on first definition,
1102 * clang with LLVM 3.x crashes on 2nd definition... sigh
1103 */
1104 #if defined(__clang__) || !defined(__llvm__)
1105 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
1106 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended __x) \
1107 { \
1108 register long double __value; \
1109 register int __ignore; \
1110 volatile unsigned short __cw; \
1111 volatile unsigned short __cwtmp; \
1112 __asm __volatile ("fnstcw %3\n\t" \
1113 "movzwl %3, %1\n\t" \
1114 "andl $0xf3ff, %1\n\t" \
1115 "orl %5, %1\n\t" \
1116 "movw %w1, %2\n\t" \
1117 "fldcw %2\n\t" \
1118 "frndint\n\t" \
1119 "fldcw %3" \
1120 : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \
1121 "=m" (__cw) \
1122 : "0" (__x), "i"(rounding_mode)); \
1123 return __value; \
1124 }
1125 #else
1126 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
1127 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
1128 { \
1129 volatile unsigned short cw; \
1130 __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1131 volatile unsigned short cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1132 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1133 fpu_extended value; \
1134 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1135 __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1136 return value; \
1137 }
1138 #endif
1139
1140 #undef fp_round_to_minus_infinity
1141 #ifdef HAVE_FLOORL
1142 #define fp_round_to_minus_infinity floorl
1143 #else
1144 #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1145 DEFINE_ROUND_FUNC(minus_infinity, CW_RC_DOWN)
1146 #endif
1147
1148 #undef fp_round_to_plus_infinity
1149 #ifdef HAVE_CEILL
1150 #define fp_round_to_plus_infinity ceill
1151 #else
1152 #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1153 DEFINE_ROUND_FUNC(plus_infinity, CW_RC_UP)
1154 #endif
1155
1156 #undef fp_round_to_zero
1157 #ifdef HAVE_TRUNCL
1158 #define fp_round_to_zero truncl
1159 #else
1160 #define fp_round_to_zero fp_do_round_to_zero
1161 DEFINE_ROUND_FUNC(zero, CW_RC_ZERO)
1162 #endif
1163
1164 #undef fp_round_to_nearest
1165 #ifdef HAVE_ROUNDL
1166 #define fp_round_to_nearest roundl
1167 #else
1168 #define fp_round_to_nearest fp_do_round_to_nearest
1169 DEFINE_ROUND_FUNC(nearest, CW_RC_NEAR)
1170 #endif
1171
1172 #undef fp_ceil
1173 #define fp_ceil fp_do_round_to_plus_infinity
1174
1175 #undef fp_floor
1176 #define fp_floor fp_do_round_to_minus_infinity
1177
1178
1179 #endif /* USE_X87_ASSEMBLY */
1180
1181 #ifndef fp_round_to_minus_infinity
1182 #define fp_round_to_minus_infinity(x) fp_floor(x)
1183 #endif
1184
1185 #ifndef fp_round_to_plus_infinity
1186 #define fp_round_to_plus_infinity(x) fp_ceil(x)
1187 #endif
1188
1189 #ifndef fp_round_to_zero
1190 #define fp_round_to_zero(x) ((int)(x))
1191 #endif
1192
1193 #ifndef fp_round_to_nearest
1194 #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1195 #endif
1196
1197 #endif /* FPU_MATHLIB_H */
1198