1 /* OpenCL built-in library: SLEEF C fallback using libm / compiler builtins
2
3 Copyright (c) 2017 Michal Babej / Tampere University of Technology
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #define _GNU_SOURCE
25 #include <float.h>
26 #include <limits.h>
27 #include <math.h>
28 #include <stdint.h>
29
30 #include "sleef.h"
31 #include "sleef_cl.h"
32 #include "rename.h"
33
34 //##################################################################
35
36 static int64_t
doubleToRawLongBits(double d)37 doubleToRawLongBits (double d)
38 {
39 union
40 {
41 double f;
42 int64_t i;
43 } tmp;
44 tmp.f = d;
45 return tmp.i;
46 }
47
48 static double
longBitsToDouble(int64_t i)49 longBitsToDouble (int64_t i)
50 {
51 union
52 {
53 double f;
54 int64_t i;
55 } tmp;
56 tmp.i = i;
57 return tmp.f;
58 }
59
60 static double
fabsk(double x)61 fabsk (double x)
62 {
63 return longBitsToDouble (0x7fffffffffffffffLL & doubleToRawLongBits (x));
64 }
65
66 static double
mulsign(double x,double y)67 mulsign (double x, double y)
68 {
69 return longBitsToDouble (doubleToRawLongBits (x)
70 ^ (doubleToRawLongBits (y) & (1LL << 63)));
71 }
72
73 //##################################################################
74
75 #define INFINITYf ((float)INFINITY)
76 #define NANf ((float)NAN)
77
78 static int32_t
floatToRawIntBits(float d)79 floatToRawIntBits (float d)
80 {
81 union
82 {
83 float f;
84 int32_t i;
85 } tmp;
86 tmp.f = d;
87 return tmp.i;
88 }
89
90 static float
intBitsToFloat(int32_t i)91 intBitsToFloat (int32_t i)
92 {
93 union
94 {
95 float f;
96 int32_t i;
97 } tmp;
98 tmp.i = i;
99 return tmp.f;
100 }
101
102 static float
fabsfk(float x)103 fabsfk (float x)
104 {
105 return intBitsToFloat (0x7fffffffL & floatToRawIntBits (x));
106 }
107
108 static float
mulsignf(float x,float y)109 mulsignf (float x, float y)
110 {
111 return intBitsToFloat (floatToRawIntBits (x)
112 ^ (floatToRawIntBits (y) & (1 << 31)));
113 }
114
115 //##################################################################
116
117
118 double
xsin(double x)119 xsin (double x)
120 {
121 return __builtin_sin (x);
122 }
123 double
xcos(double x)124 xcos (double x)
125 {
126 return __builtin_cos (x);
127 }
128 double
xtan(double x)129 xtan (double x)
130 {
131 return __builtin_tan (x);
132 }
133 double
xasin(double x)134 xasin (double x)
135 {
136 return __builtin_asin (x);
137 }
138 double
xacos(double x)139 xacos (double x)
140 {
141 return __builtin_acos (x);
142 }
143 double
xatan(double x)144 xatan (double x)
145 {
146 return __builtin_atan (x);
147 }
148 double
xatan2(double x,double y)149 xatan2 (double x, double y)
150 {
151 return __builtin_atan2 (x, y);
152 }
153 double
xlog(double x)154 xlog (double x)
155 {
156 return __builtin_log (x);
157 }
158 double
xcbrt(double x)159 xcbrt (double x)
160 {
161 return __builtin_cbrt (x);
162 }
163
164 double
xsin_u1(double x)165 xsin_u1 (double x)
166 {
167 return __builtin_sin (x);
168 }
169 double
xcos_u1(double x)170 xcos_u1 (double x)
171 {
172 return __builtin_cos (x);
173 }
174 double
xtan_u1(double x)175 xtan_u1 (double x)
176 {
177 return __builtin_tan (x);
178 }
179 double
xasin_u1(double x)180 xasin_u1 (double x)
181 {
182 return __builtin_asin (x);
183 }
184 double
xacos_u1(double x)185 xacos_u1 (double x)
186 {
187 return __builtin_acos (x);
188 }
189 double
xatan_u1(double x)190 xatan_u1 (double x)
191 {
192 return __builtin_atan (x);
193 }
194 double
xatan2_u1(double x,double y)195 xatan2_u1 (double x, double y)
196 {
197 return __builtin_atan2 (x, y);
198 }
199 double
xlog_u1(double x)200 xlog_u1 (double x)
201 {
202 return __builtin_log (x);
203 }
204 double
xcbrt_u1(double x)205 xcbrt_u1 (double x)
206 {
207 return __builtin_cbrt (x);
208 }
209
210 double
xexp(double x)211 xexp (double x)
212 {
213 return __builtin_exp (x);
214 }
215
216 double
xpow(double x,double y)217 xpow (double x, double y)
218 {
219 return __builtin_pow (x, y);
220 }
221
222 double
xpown(double x,int y)223 xpown (double x, int y)
224 {
225 return __builtin_pow (x, (double)y);
226 }
227
228 double
xpowr(double x,double y)229 xpowr (double x, double y)
230 {
231 if (x < 0.0)
232 return NAN;
233 if (isnan(y))
234 return y;
235 double res = __builtin_pow (x, y);
236 return res;
237 }
238
239 double
xsinh(double x)240 xsinh (double x)
241 {
242 return __builtin_sinh (x);
243 }
244
245 double
xcosh(double x)246 xcosh (double x)
247 {
248 return __builtin_cosh (x);
249 }
250 double
xtanh(double x)251 xtanh (double x)
252 {
253 return __builtin_tanh (x);
254 }
255
256 double
xasinh(double x)257 xasinh (double x)
258 {
259 return __builtin_asinh (x);
260 }
261 double
xacosh(double x)262 xacosh (double x)
263 {
264 return __builtin_acosh (x);
265 }
266 double
xatanh(double x)267 xatanh (double x)
268 {
269 return __builtin_atanh (x);
270 }
271
272 double
xexp2(double x)273 xexp2 (double x)
274 {
275 return __builtin_exp2 (x);
276 }
277 double
xexp10(double x)278 xexp10 (double x)
279 {
280 return exp10 (x);
281 }
282
283 double
xexpm1(double x)284 xexpm1 (double x)
285 {
286 return __builtin_expm1 (x);
287 }
288
289 double
xlog10(double x)290 xlog10 (double x)
291 {
292 return __builtin_log10 (x);
293 }
294 double
xlog1p(double x)295 xlog1p (double x)
296 {
297 return __builtin_log1p (x);
298 }
299
300 double
xsinpi_u05(double x)301 xsinpi_u05 (double x)
302 {
303 return __builtin_sin (x * (double)M_PI);
304 }
305 double
xcospi_u05(double x)306 xcospi_u05 (double x)
307 {
308 return __builtin_cos (x * (double)M_PI);
309 }
310
311 Sleef_double2
xsincos(double x)312 xsincos (double x)
313 {
314 Sleef_double2 tmp;
315 sincos (x, &tmp.x, &tmp.y);
316 return tmp;
317 }
318
319 Sleef_double2
xsincos_u1(double x)320 xsincos_u1 (double x)
321 {
322 return xsincos (x);
323 }
324
325 double
xldexp(double x,int k)326 xldexp (double x, int k)
327 {
328 return __builtin_ldexp (x, k);
329 }
330
331 int
xilogb(double x)332 xilogb (double x)
333 {
334 return __builtin_ilogb (x);
335 }
336
337 double
xfma(double x,double y,double z)338 xfma (double x, double y, double z)
339 {
340 return __builtin_fma (x, y, z);
341 }
342
343 double
xsqrt_u05(double x)344 xsqrt_u05 (double x)
345 {
346 return __builtin_sqrt (x);
347 }
348
349 double
xsqrt_u35(double x)350 xsqrt_u35 (double x)
351 {
352 return __builtin_sqrt (x);
353 }
354
355 double
xhypot_u05(double x,double y)356 xhypot_u05 (double x, double y)
357 {
358 return __builtin_hypot (x, y);
359 }
360
361 double
xhypot_u35(double x,double y)362 xhypot_u35 (double x, double y)
363 {
364 return __builtin_hypot (x, y);
365 }
366
367 double
xfabs(double x)368 xfabs (double x)
369 {
370 return __builtin_fabs (x);
371 }
372
373 double
xcopysign(double x,double y)374 xcopysign (double x, double y)
375 {
376 return __builtin_copysign (x, y);
377 }
378 double
xfmax(double x,double y)379 xfmax (double x, double y)
380 {
381 return __builtin_fmax (x, y);
382 }
383 double
xfmin(double x,double y)384 xfmin (double x, double y)
385 {
386 return __builtin_fmin (x, y);
387 }
388
389 double
xfdim(double x,double y)390 xfdim (double x, double y)
391 {
392 return __builtin_fdim (x, y);
393 }
394 double
xtrunc(double x)395 xtrunc (double x)
396 {
397 return __builtin_trunc (x);
398 }
399 double
xfloor(double x)400 xfloor (double x)
401 {
402 return __builtin_floor (x);
403 }
404
405 double
xceil(double x)406 xceil (double x)
407 {
408 return __builtin_ceil (x);
409 }
410 double
xround(double x)411 xround (double x)
412 {
413 return __builtin_round (x);
414 }
415 double
xrint(double x)416 xrint (double x)
417 {
418 return __builtin_rint (x);
419 }
420
421 double
xnextafter(double x,double y)422 xnextafter (double x, double y)
423 {
424 return __builtin_nextafter (x, y);
425 }
426
427 double
xfrfrexp(double x)428 xfrfrexp (double x)
429 {
430 union
431 {
432 double f;
433 uint64_t u;
434 } cx;
435
436 if (__builtin_isnan (x))
437 return x;
438
439 if (fabsk (x) < DBL_MIN)
440 x *= (1ULL << 63);
441
442 cx.f = x;
443 cx.u &= ~0x7ff0000000000000ULL;
444 cx.u |= 0x3fe0000000000000ULL;
445
446 if (__builtin_isinf (x))
447 cx.f = mulsign (INFINITY, x);
448 if (x == 0)
449 cx.f = x;
450
451 return cx.f;
452 }
453
454 int
xexpfrexp(double x)455 xexpfrexp (double x)
456 {
457 union
458 {
459 double f;
460 uint64_t u;
461 } cx;
462
463 int ret = 0;
464
465 if (fabsk (x) < DBL_MIN)
466 {
467 x *= (1ULL << 63);
468 ret = -63;
469 }
470
471 cx.f = x;
472 ret += (int32_t) (((cx.u >> 52) & 0x7ff)) - 0x3fe;
473
474 if (x == 0 || __builtin_isnan (x) || __builtin_isinf (x))
475 ret = 0;
476
477 return ret;
478 }
479
480 double
xfmod(double x,double y)481 xfmod (double x, double y)
482 {
483 return __builtin_fmod (x, y);
484 }
485
486 Sleef_double2
xmodf(double x)487 xmodf (double x)
488 {
489 Sleef_double2 res;
490 double tmp;
491 res.x = __builtin_modf (x, &tmp);
492 res.y = tmp;
493 return res;
494 }
495
496 double
xlgamma_u1(double x)497 xlgamma_u1 (double x)
498 {
499 return __builtin_lgamma (x);
500 }
501 Sleef_double2
xlgamma_r_u1(double x)502 xlgamma_r_u1 (double x)
503 {
504 Sleef_double2 ret;
505 int sign;
506 ret.x = lgamma_r (x, &sign);
507 ret.y = (sign > 0 ? 1.0 : -1.0);
508 return ret;
509 }
510 double
xtgamma_u1(double x)511 xtgamma_u1 (double x)
512 {
513 return __builtin_tgamma (x);
514 }
515 double
xerf_u1(double x)516 xerf_u1 (double x)
517 {
518 return __builtin_erf (x);
519 }
520 double
xerfc_u15(double x)521 xerfc_u15 (double x)
522 {
523 return __builtin_erfc (x);
524 }
525
526 // *********************************************************************
527 // *********************************************************************
528 // *********************************************************************
529 // *********************************************************************
530
531 float
xsinf(float x)532 xsinf (float x)
533 {
534 return __builtin_sinf (x);
535 }
536 float
xcosf(float x)537 xcosf (float x)
538 {
539 return __builtin_cosf (x);
540 }
541 float
xtanf(float x)542 xtanf (float x)
543 {
544 return __builtin_tanf (x);
545 }
546 float
xasinf(float x)547 xasinf (float x)
548 {
549 return __builtin_asinf (x);
550 }
551 float
xacosf(float x)552 xacosf (float x)
553 {
554 return __builtin_acosf (x);
555 }
556 float
xatanf(float x)557 xatanf (float x)
558 {
559 return __builtin_atanf (x);
560 }
561 float
xatan2f(float x,float y)562 xatan2f (float x, float y)
563 {
564 return __builtin_atan2f (x, y);
565 }
566 float
xlogf(float x)567 xlogf (float x)
568 {
569 return __builtin_logf (x);
570 }
571 float
xcbrtf(float x)572 xcbrtf (float x)
573 {
574 return __builtin_cbrtf (x);
575 }
576
577 float
xsinf_u1(float x)578 xsinf_u1 (float x)
579 {
580 return __builtin_sinf (x);
581 }
582 float
xcosf_u1(float x)583 xcosf_u1 (float x)
584 {
585 return __builtin_cosf (x);
586 }
587 float
xtanf_u1(float x)588 xtanf_u1 (float x)
589 {
590 return __builtin_tanf (x);
591 }
592 float
xasinf_u1(float x)593 xasinf_u1 (float x)
594 {
595 return __builtin_asinf (x);
596 }
597 float
xacosf_u1(float x)598 xacosf_u1 (float x)
599 {
600 return __builtin_acosf (x);
601 }
602 float
xatanf_u1(float x)603 xatanf_u1 (float x)
604 {
605 return __builtin_atanf (x);
606 }
607 float
xatan2f_u1(float x,float y)608 xatan2f_u1 (float x, float y)
609 {
610 return __builtin_atan2f (x, y);
611 }
612 float
xlogf_u1(float x)613 xlogf_u1 (float x)
614 {
615 return __builtin_logf (x);
616 }
617 float
xcbrtf_u1(float x)618 xcbrtf_u1 (float x)
619 {
620 return __builtin_cbrtf (x);
621 }
622
623 float
xexpf(float x)624 xexpf (float x)
625 {
626 return __builtin_expf (x);
627 }
628
629 float
xpowf(float x,float y)630 xpowf (float x, float y)
631 {
632
633 return (float) __builtin_pow ((double)x, (double)y);
634 }
635
636 float
xpownf(float x,int y)637 xpownf (float x, int y)
638 {
639 return (float) __builtin_pow ((double)x, (double)y);
640 }
641
642 float
xpowrf(float x,float y)643 xpowrf (float x, float y)
644 {
645 if (x < 0.0f)
646 return NAN;
647 float res = (float) __builtin_pow ((double)x, (double)y);
648 return res;
649 }
650
651 float
xsinhf(float x)652 xsinhf (float x)
653 {
654 return __builtin_sinhf (x);
655 }
656 float
xcoshf(float x)657 xcoshf (float x)
658 {
659 return __builtin_coshf (x);
660 }
661 float
xtanhf(float x)662 xtanhf (float x)
663 {
664 return __builtin_tanhf (x);
665 }
666
667 float
xasinhf(float x)668 xasinhf (float x)
669 {
670 return __builtin_asinhf (x);
671 }
672 float
xacoshf(float x)673 xacoshf (float x)
674 {
675 return __builtin_acoshf (x);
676 }
677 float
xatanhf(float x)678 xatanhf (float x)
679 {
680 return __builtin_atanhf (x);
681 }
682
683 float
xexp2f(float x)684 xexp2f (float x)
685 {
686 return __builtin_exp2f (x);
687 }
688
689 float
xexp10f(float x)690 xexp10f (float x)
691 {
692 return exp10f (x);
693 }
694
695 float
xexpm1f(float x)696 xexpm1f (float x)
697 {
698 return __builtin_expm1f (x);
699 }
700
701 float
xlog10f(float x)702 xlog10f (float x)
703 {
704 return __builtin_log10f (x);
705 }
706 float
xlog1pf(float x)707 xlog1pf (float x)
708 {
709 return __builtin_log1pf (x);
710 }
711
712 float
xsinpif_u05(float x)713 xsinpif_u05 (float x)
714 {
715 return __builtin_sinf (x * (float)M_PI);
716 }
717 float
xcospif_u05(float x)718 xcospif_u05 (float x)
719 {
720 return __builtin_cosf (x * (float)M_PI);
721 }
722
723 Sleef_float2
xsincosf(float x)724 xsincosf (float x)
725 {
726 Sleef_float2 tmp;
727 sincosf (x, &tmp.x, &tmp.y);
728 return tmp;
729 }
730
731 Sleef_float2
xsincosf_u1(float x)732 xsincosf_u1 (float x)
733 {
734 return xsincosf (x);
735 }
736
737 float
xsqrtf_u05(float x)738 xsqrtf_u05 (float x)
739 {
740 return __builtin_sqrtf (x);
741 }
742
743 float
xsqrtf_u35(float x)744 xsqrtf_u35 (float x)
745 {
746 return __builtin_sqrtf (x);
747 }
748
749 float
xhypotf_u05(float x,float y)750 xhypotf_u05 (float x, float y)
751 {
752 return __builtin_hypotf (x, y);
753 }
754
755 float
xhypotf_u35(float x,float y)756 xhypotf_u35 (float x, float y)
757 {
758 return __builtin_hypotf (x, y);
759 }
760
761 float
xldexpf(float x,int k)762 xldexpf (float x, int k)
763 {
764 return __builtin_ldexpf (x, k);
765 }
766
767 int
xilogbf(float x)768 xilogbf (float x)
769 {
770 return __builtin_ilogbf (x);
771 }
772
773 float
xfmaf(float x,float y,float z)774 xfmaf (float x, float y, float z)
775 {
776 return __builtin_fmaf (x, y, z);
777 }
778
779 float
xfabsf(float x)780 xfabsf (float x)
781 {
782 return __builtin_fabsf (x);
783 }
784
785 float
xcopysignf(float x,float y)786 xcopysignf (float x, float y)
787 {
788 return __builtin_copysignf (x, y);
789 }
790 float
xfmaxf(float x,float y)791 xfmaxf (float x, float y)
792 {
793 return __builtin_fmaxf (x, y);
794 }
795 float
xfminf(float x,float y)796 xfminf (float x, float y)
797 {
798 return __builtin_fminf (x, y);
799 }
800
801 float
xfdimf(float x,float y)802 xfdimf (float x, float y)
803 {
804 return __builtin_fdimf (x, y);
805 }
806 float
xtruncf(float x)807 xtruncf (float x)
808 {
809 return __builtin_truncf (x);
810 }
811 float
xfloorf(float x)812 xfloorf (float x)
813 {
814 return __builtin_floorf (x);
815 }
816
817 float
xceilf(float x)818 xceilf (float x)
819 {
820 return __builtin_ceilf (x);
821 }
822 float
xroundf(float x)823 xroundf (float x)
824 {
825 return __builtin_roundf (x);
826 }
827 float
xrintf(float x)828 xrintf (float x)
829 {
830 return __builtin_rintf (x);
831 }
832
833 float
xnextafterf(float x,float y)834 xnextafterf (float x, float y)
835 {
836 return __builtin_nextafterf (x, y);
837 }
838
839
840 float
xfrfrexpf(float x)841 xfrfrexpf (float x)
842 {
843 union
844 {
845 float f;
846 int32_t u;
847 } cx;
848
849 if (__builtin_isnan (x))
850 return x;
851
852 if (fabsfk (x) < FLT_MIN)
853 x *= (1 << 30);
854
855 cx.f = x;
856 cx.u &= ~0x7f800000U;
857 cx.u |= 0x3f000000U;
858
859 if (__builtin_isinf (x))
860 cx.f = mulsignf (INFINITYf, x);
861 if (x == 0)
862 cx.f = x;
863
864 return cx.f;
865 }
866
867 int
xexpfrexpf(float x)868 xexpfrexpf (float x)
869 {
870 union
871 {
872 float f;
873 uint32_t u;
874 } cx;
875
876 int ret = 0;
877
878 if (fabsfk (x) < FLT_MIN)
879 {
880 x *= (1 << 30);
881 ret = -30;
882 }
883
884 cx.f = x;
885 ret += (int32_t) (((cx.u >> 23) & 0xff)) - 0x7e;
886
887 if (x == 0 || __builtin_isnan (x) || __builtin_isinf (x))
888 ret = 0;
889
890 return ret;
891 }
892
893 float
xfmodf(float x,float y)894 xfmodf (float x, float y)
895 {
896 return __builtin_fmodf (x, y);
897 }
898
899 Sleef_float2
xmodff(float x)900 xmodff (float x)
901 {
902 Sleef_float2 res;
903 float tmp;
904 res.x = __builtin_modff (x, &tmp);
905 res.y = tmp;
906 return res;
907 }
908
909 float
xlgammaf_u1(float x)910 xlgammaf_u1 (float x)
911 {
912 return __builtin_lgammaf (x);
913 }
914 Sleef_float2
xlgamma_rf_u1(float x)915 xlgamma_rf_u1 (float x)
916 {
917 Sleef_float2 ret;
918 int sign;
919 ret.x = lgammaf_r (x, &sign);
920 ret.y = (sign > 0 ? 1.0f : -1.0f);
921 return ret;
922 }
923
924 float
xtgammaf_u1(float x)925 xtgammaf_u1 (float x)
926 {
927 return __builtin_tgammaf (x);
928 }
929 float
xerff_u1(float x)930 xerff_u1 (float x)
931 {
932 return __builtin_erff (x);
933 }
934 float
xerfcf_u15(float x)935 xerfcf_u15 (float x)
936 {
937 return __builtin_erfcf (x);
938 }
939