1 /* OpenCL built-in library: SLEEF C fallback using libm / compiler builtins
2 
3    Copyright (c) 2017 Michal Babej / Tampere University of Technology
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #define _GNU_SOURCE
25 #include <float.h>
26 #include <limits.h>
27 #include <math.h>
28 #include <stdint.h>
29 
30 #include "sleef.h"
31 #include "sleef_cl.h"
32 #include "rename.h"
33 
34 //##################################################################
35 
36 static int64_t
doubleToRawLongBits(double d)37 doubleToRawLongBits (double d)
38 {
39   union
40   {
41     double f;
42     int64_t i;
43   } tmp;
44   tmp.f = d;
45   return tmp.i;
46 }
47 
48 static double
longBitsToDouble(int64_t i)49 longBitsToDouble (int64_t i)
50 {
51   union
52   {
53     double f;
54     int64_t i;
55   } tmp;
56   tmp.i = i;
57   return tmp.f;
58 }
59 
60 static double
fabsk(double x)61 fabsk (double x)
62 {
63   return longBitsToDouble (0x7fffffffffffffffLL & doubleToRawLongBits (x));
64 }
65 
66 static double
mulsign(double x,double y)67 mulsign (double x, double y)
68 {
69   return longBitsToDouble (doubleToRawLongBits (x)
70                            ^ (doubleToRawLongBits (y) & (1LL << 63)));
71 }
72 
73 //##################################################################
74 
75 #define INFINITYf ((float)INFINITY)
76 #define NANf ((float)NAN)
77 
78 static int32_t
floatToRawIntBits(float d)79 floatToRawIntBits (float d)
80 {
81   union
82   {
83     float f;
84     int32_t i;
85   } tmp;
86   tmp.f = d;
87   return tmp.i;
88 }
89 
90 static float
intBitsToFloat(int32_t i)91 intBitsToFloat (int32_t i)
92 {
93   union
94   {
95     float f;
96     int32_t i;
97   } tmp;
98   tmp.i = i;
99   return tmp.f;
100 }
101 
102 static float
fabsfk(float x)103 fabsfk (float x)
104 {
105   return intBitsToFloat (0x7fffffffL & floatToRawIntBits (x));
106 }
107 
108 static float
mulsignf(float x,float y)109 mulsignf (float x, float y)
110 {
111   return intBitsToFloat (floatToRawIntBits (x)
112                          ^ (floatToRawIntBits (y) & (1 << 31)));
113 }
114 
115 //##################################################################
116 
117 
118 double
xsin(double x)119 xsin (double x)
120 {
121   return __builtin_sin (x);
122 }
123 double
xcos(double x)124 xcos (double x)
125 {
126   return __builtin_cos (x);
127 }
128 double
xtan(double x)129 xtan (double x)
130 {
131   return __builtin_tan (x);
132 }
133 double
xasin(double x)134 xasin (double x)
135 {
136   return __builtin_asin (x);
137 }
138 double
xacos(double x)139 xacos (double x)
140 {
141   return __builtin_acos (x);
142 }
143 double
xatan(double x)144 xatan (double x)
145 {
146   return __builtin_atan (x);
147 }
148 double
xatan2(double x,double y)149 xatan2 (double x, double y)
150 {
151   return __builtin_atan2 (x, y);
152 }
153 double
xlog(double x)154 xlog (double x)
155 {
156   return __builtin_log (x);
157 }
158 double
xcbrt(double x)159 xcbrt (double x)
160 {
161   return __builtin_cbrt (x);
162 }
163 
164 double
xsin_u1(double x)165 xsin_u1 (double x)
166 {
167   return __builtin_sin (x);
168 }
169 double
xcos_u1(double x)170 xcos_u1 (double x)
171 {
172   return __builtin_cos (x);
173 }
174 double
xtan_u1(double x)175 xtan_u1 (double x)
176 {
177   return __builtin_tan (x);
178 }
179 double
xasin_u1(double x)180 xasin_u1 (double x)
181 {
182   return __builtin_asin (x);
183 }
184 double
xacos_u1(double x)185 xacos_u1 (double x)
186 {
187   return __builtin_acos (x);
188 }
189 double
xatan_u1(double x)190 xatan_u1 (double x)
191 {
192   return __builtin_atan (x);
193 }
194 double
xatan2_u1(double x,double y)195 xatan2_u1 (double x, double y)
196 {
197   return __builtin_atan2 (x, y);
198 }
199 double
xlog_u1(double x)200 xlog_u1 (double x)
201 {
202   return __builtin_log (x);
203 }
204 double
xcbrt_u1(double x)205 xcbrt_u1 (double x)
206 {
207   return __builtin_cbrt (x);
208 }
209 
210 double
xexp(double x)211 xexp (double x)
212 {
213   return __builtin_exp (x);
214 }
215 
216 double
xpow(double x,double y)217 xpow (double x, double y)
218 {
219   return __builtin_pow (x, y);
220 }
221 
222 double
xpown(double x,int y)223 xpown (double x, int y)
224 {
225   return __builtin_pow (x, (double)y);
226 }
227 
228 double
xpowr(double x,double y)229 xpowr (double x, double y)
230 {
231   if (x < 0.0)
232     return NAN;
233   if (isnan(y))
234     return y;
235   double res = __builtin_pow (x, y);
236   return res;
237 }
238 
239 double
xsinh(double x)240 xsinh (double x)
241 {
242   return __builtin_sinh (x);
243 }
244 
245 double
xcosh(double x)246 xcosh (double x)
247 {
248   return __builtin_cosh (x);
249 }
250 double
xtanh(double x)251 xtanh (double x)
252 {
253   return __builtin_tanh (x);
254 }
255 
256 double
xasinh(double x)257 xasinh (double x)
258 {
259   return __builtin_asinh (x);
260 }
261 double
xacosh(double x)262 xacosh (double x)
263 {
264   return __builtin_acosh (x);
265 }
266 double
xatanh(double x)267 xatanh (double x)
268 {
269   return __builtin_atanh (x);
270 }
271 
272 double
xexp2(double x)273 xexp2 (double x)
274 {
275   return __builtin_exp2 (x);
276 }
277 double
xexp10(double x)278 xexp10 (double x)
279 {
280   return exp10 (x);
281 }
282 
283 double
xexpm1(double x)284 xexpm1 (double x)
285 {
286   return __builtin_expm1 (x);
287 }
288 
289 double
xlog10(double x)290 xlog10 (double x)
291 {
292   return __builtin_log10 (x);
293 }
294 double
xlog1p(double x)295 xlog1p (double x)
296 {
297   return __builtin_log1p (x);
298 }
299 
300 double
xsinpi_u05(double x)301 xsinpi_u05 (double x)
302 {
303   return __builtin_sin (x * (double)M_PI);
304 }
305 double
xcospi_u05(double x)306 xcospi_u05 (double x)
307 {
308   return __builtin_cos (x * (double)M_PI);
309 }
310 
311 Sleef_double2
xsincos(double x)312 xsincos (double x)
313 {
314   Sleef_double2 tmp;
315   sincos (x, &tmp.x, &tmp.y);
316   return tmp;
317 }
318 
319 Sleef_double2
xsincos_u1(double x)320 xsincos_u1 (double x)
321 {
322   return xsincos (x);
323 }
324 
325 double
xldexp(double x,int k)326 xldexp (double x, int k)
327 {
328   return __builtin_ldexp (x, k);
329 }
330 
331 int
xilogb(double x)332 xilogb (double x)
333 {
334   return __builtin_ilogb (x);
335 }
336 
337 double
xfma(double x,double y,double z)338 xfma (double x, double y, double z)
339 {
340   return __builtin_fma (x, y, z);
341 }
342 
343 double
xsqrt_u05(double x)344 xsqrt_u05 (double x)
345 {
346   return __builtin_sqrt (x);
347 }
348 
349 double
xsqrt_u35(double x)350 xsqrt_u35 (double x)
351 {
352   return __builtin_sqrt (x);
353 }
354 
355 double
xhypot_u05(double x,double y)356 xhypot_u05 (double x, double y)
357 {
358   return __builtin_hypot (x, y);
359 }
360 
361 double
xhypot_u35(double x,double y)362 xhypot_u35 (double x, double y)
363 {
364   return __builtin_hypot (x, y);
365 }
366 
367 double
xfabs(double x)368 xfabs (double x)
369 {
370   return __builtin_fabs (x);
371 }
372 
373 double
xcopysign(double x,double y)374 xcopysign (double x, double y)
375 {
376   return __builtin_copysign (x, y);
377 }
378 double
xfmax(double x,double y)379 xfmax (double x, double y)
380 {
381   return __builtin_fmax (x, y);
382 }
383 double
xfmin(double x,double y)384 xfmin (double x, double y)
385 {
386   return __builtin_fmin (x, y);
387 }
388 
389 double
xfdim(double x,double y)390 xfdim (double x, double y)
391 {
392   return __builtin_fdim (x, y);
393 }
394 double
xtrunc(double x)395 xtrunc (double x)
396 {
397   return __builtin_trunc (x);
398 }
399 double
xfloor(double x)400 xfloor (double x)
401 {
402   return __builtin_floor (x);
403 }
404 
405 double
xceil(double x)406 xceil (double x)
407 {
408   return __builtin_ceil (x);
409 }
410 double
xround(double x)411 xround (double x)
412 {
413   return __builtin_round (x);
414 }
415 double
xrint(double x)416 xrint (double x)
417 {
418   return __builtin_rint (x);
419 }
420 
421 double
xnextafter(double x,double y)422 xnextafter (double x, double y)
423 {
424   return __builtin_nextafter (x, y);
425 }
426 
427 double
xfrfrexp(double x)428 xfrfrexp (double x)
429 {
430   union
431   {
432     double f;
433     uint64_t u;
434   } cx;
435 
436   if (__builtin_isnan (x))
437     return x;
438 
439   if (fabsk (x) < DBL_MIN)
440     x *= (1ULL << 63);
441 
442   cx.f = x;
443   cx.u &= ~0x7ff0000000000000ULL;
444   cx.u |= 0x3fe0000000000000ULL;
445 
446   if (__builtin_isinf (x))
447     cx.f = mulsign (INFINITY, x);
448   if (x == 0)
449     cx.f = x;
450 
451   return cx.f;
452 }
453 
454 int
xexpfrexp(double x)455 xexpfrexp (double x)
456 {
457   union
458   {
459     double f;
460     uint64_t u;
461   } cx;
462 
463   int ret = 0;
464 
465   if (fabsk (x) < DBL_MIN)
466     {
467       x *= (1ULL << 63);
468       ret = -63;
469     }
470 
471   cx.f = x;
472   ret += (int32_t) (((cx.u >> 52) & 0x7ff)) - 0x3fe;
473 
474   if (x == 0 || __builtin_isnan (x) || __builtin_isinf (x))
475     ret = 0;
476 
477   return ret;
478 }
479 
480 double
xfmod(double x,double y)481 xfmod (double x, double y)
482 {
483   return __builtin_fmod (x, y);
484 }
485 
486 Sleef_double2
xmodf(double x)487 xmodf (double x)
488 {
489   Sleef_double2 res;
490   double tmp;
491   res.x = __builtin_modf (x, &tmp);
492   res.y = tmp;
493   return res;
494 }
495 
496 double
xlgamma_u1(double x)497 xlgamma_u1 (double x)
498 {
499   return __builtin_lgamma (x);
500 }
501 Sleef_double2
xlgamma_r_u1(double x)502 xlgamma_r_u1 (double x)
503 {
504   Sleef_double2 ret;
505   int sign;
506   ret.x = lgamma_r (x, &sign);
507   ret.y = (sign > 0 ? 1.0 : -1.0);
508   return ret;
509 }
510 double
xtgamma_u1(double x)511 xtgamma_u1 (double x)
512 {
513   return __builtin_tgamma (x);
514 }
515 double
xerf_u1(double x)516 xerf_u1 (double x)
517 {
518   return __builtin_erf (x);
519 }
520 double
xerfc_u15(double x)521 xerfc_u15 (double x)
522 {
523   return __builtin_erfc (x);
524 }
525 
526 // *********************************************************************
527 // *********************************************************************
528 // *********************************************************************
529 // *********************************************************************
530 
531 float
xsinf(float x)532 xsinf (float x)
533 {
534   return __builtin_sinf (x);
535 }
536 float
xcosf(float x)537 xcosf (float x)
538 {
539   return __builtin_cosf (x);
540 }
541 float
xtanf(float x)542 xtanf (float x)
543 {
544   return __builtin_tanf (x);
545 }
546 float
xasinf(float x)547 xasinf (float x)
548 {
549   return __builtin_asinf (x);
550 }
551 float
xacosf(float x)552 xacosf (float x)
553 {
554   return __builtin_acosf (x);
555 }
556 float
xatanf(float x)557 xatanf (float x)
558 {
559   return __builtin_atanf (x);
560 }
561 float
xatan2f(float x,float y)562 xatan2f (float x, float y)
563 {
564   return __builtin_atan2f (x, y);
565 }
566 float
xlogf(float x)567 xlogf (float x)
568 {
569   return __builtin_logf (x);
570 }
571 float
xcbrtf(float x)572 xcbrtf (float x)
573 {
574   return __builtin_cbrtf (x);
575 }
576 
577 float
xsinf_u1(float x)578 xsinf_u1 (float x)
579 {
580   return __builtin_sinf (x);
581 }
582 float
xcosf_u1(float x)583 xcosf_u1 (float x)
584 {
585   return __builtin_cosf (x);
586 }
587 float
xtanf_u1(float x)588 xtanf_u1 (float x)
589 {
590   return __builtin_tanf (x);
591 }
592 float
xasinf_u1(float x)593 xasinf_u1 (float x)
594 {
595   return __builtin_asinf (x);
596 }
597 float
xacosf_u1(float x)598 xacosf_u1 (float x)
599 {
600   return __builtin_acosf (x);
601 }
602 float
xatanf_u1(float x)603 xatanf_u1 (float x)
604 {
605   return __builtin_atanf (x);
606 }
607 float
xatan2f_u1(float x,float y)608 xatan2f_u1 (float x, float y)
609 {
610   return __builtin_atan2f (x, y);
611 }
612 float
xlogf_u1(float x)613 xlogf_u1 (float x)
614 {
615   return __builtin_logf (x);
616 }
617 float
xcbrtf_u1(float x)618 xcbrtf_u1 (float x)
619 {
620   return __builtin_cbrtf (x);
621 }
622 
623 float
xexpf(float x)624 xexpf (float x)
625 {
626   return __builtin_expf (x);
627 }
628 
629 float
xpowf(float x,float y)630 xpowf (float x, float y)
631 {
632 
633   return (float) __builtin_pow ((double)x, (double)y);
634 }
635 
636 float
xpownf(float x,int y)637 xpownf (float x, int y)
638 {
639   return (float) __builtin_pow ((double)x, (double)y);
640 }
641 
642 float
xpowrf(float x,float y)643 xpowrf (float x, float y)
644 {
645   if (x < 0.0f)
646     return NAN;
647   float res = (float) __builtin_pow ((double)x, (double)y);
648   return res;
649 }
650 
651 float
xsinhf(float x)652 xsinhf (float x)
653 {
654   return __builtin_sinhf (x);
655 }
656 float
xcoshf(float x)657 xcoshf (float x)
658 {
659   return __builtin_coshf (x);
660 }
661 float
xtanhf(float x)662 xtanhf (float x)
663 {
664   return __builtin_tanhf (x);
665 }
666 
667 float
xasinhf(float x)668 xasinhf (float x)
669 {
670   return __builtin_asinhf (x);
671 }
672 float
xacoshf(float x)673 xacoshf (float x)
674 {
675   return __builtin_acoshf (x);
676 }
677 float
xatanhf(float x)678 xatanhf (float x)
679 {
680   return __builtin_atanhf (x);
681 }
682 
683 float
xexp2f(float x)684 xexp2f (float x)
685 {
686   return __builtin_exp2f (x);
687 }
688 
689 float
xexp10f(float x)690 xexp10f (float x)
691 {
692   return exp10f (x);
693 }
694 
695 float
xexpm1f(float x)696 xexpm1f (float x)
697 {
698   return __builtin_expm1f (x);
699 }
700 
701 float
xlog10f(float x)702 xlog10f (float x)
703 {
704   return __builtin_log10f (x);
705 }
706 float
xlog1pf(float x)707 xlog1pf (float x)
708 {
709   return __builtin_log1pf (x);
710 }
711 
712 float
xsinpif_u05(float x)713 xsinpif_u05 (float x)
714 {
715   return __builtin_sinf (x * (float)M_PI);
716 }
717 float
xcospif_u05(float x)718 xcospif_u05 (float x)
719 {
720   return __builtin_cosf (x * (float)M_PI);
721 }
722 
723 Sleef_float2
xsincosf(float x)724 xsincosf (float x)
725 {
726   Sleef_float2 tmp;
727   sincosf (x, &tmp.x, &tmp.y);
728   return tmp;
729 }
730 
731 Sleef_float2
xsincosf_u1(float x)732 xsincosf_u1 (float x)
733 {
734   return xsincosf (x);
735 }
736 
737 float
xsqrtf_u05(float x)738 xsqrtf_u05 (float x)
739 {
740   return __builtin_sqrtf (x);
741 }
742 
743 float
xsqrtf_u35(float x)744 xsqrtf_u35 (float x)
745 {
746   return __builtin_sqrtf (x);
747 }
748 
749 float
xhypotf_u05(float x,float y)750 xhypotf_u05 (float x, float y)
751 {
752   return __builtin_hypotf (x, y);
753 }
754 
755 float
xhypotf_u35(float x,float y)756 xhypotf_u35 (float x, float y)
757 {
758   return __builtin_hypotf (x, y);
759 }
760 
761 float
xldexpf(float x,int k)762 xldexpf (float x, int k)
763 {
764   return __builtin_ldexpf (x, k);
765 }
766 
767 int
xilogbf(float x)768 xilogbf (float x)
769 {
770   return __builtin_ilogbf (x);
771 }
772 
773 float
xfmaf(float x,float y,float z)774 xfmaf (float x, float y, float z)
775 {
776   return __builtin_fmaf (x, y, z);
777 }
778 
779 float
xfabsf(float x)780 xfabsf (float x)
781 {
782   return __builtin_fabsf (x);
783 }
784 
785 float
xcopysignf(float x,float y)786 xcopysignf (float x, float y)
787 {
788   return __builtin_copysignf (x, y);
789 }
790 float
xfmaxf(float x,float y)791 xfmaxf (float x, float y)
792 {
793   return __builtin_fmaxf (x, y);
794 }
795 float
xfminf(float x,float y)796 xfminf (float x, float y)
797 {
798   return __builtin_fminf (x, y);
799 }
800 
801 float
xfdimf(float x,float y)802 xfdimf (float x, float y)
803 {
804   return __builtin_fdimf (x, y);
805 }
806 float
xtruncf(float x)807 xtruncf (float x)
808 {
809   return __builtin_truncf (x);
810 }
811 float
xfloorf(float x)812 xfloorf (float x)
813 {
814   return __builtin_floorf (x);
815 }
816 
817 float
xceilf(float x)818 xceilf (float x)
819 {
820   return __builtin_ceilf (x);
821 }
822 float
xroundf(float x)823 xroundf (float x)
824 {
825   return __builtin_roundf (x);
826 }
827 float
xrintf(float x)828 xrintf (float x)
829 {
830   return __builtin_rintf (x);
831 }
832 
833 float
xnextafterf(float x,float y)834 xnextafterf (float x, float y)
835 {
836   return __builtin_nextafterf (x, y);
837 }
838 
839 
840 float
xfrfrexpf(float x)841 xfrfrexpf (float x)
842 {
843   union
844   {
845     float f;
846     int32_t u;
847   } cx;
848 
849   if (__builtin_isnan (x))
850     return x;
851 
852   if (fabsfk (x) < FLT_MIN)
853     x *= (1 << 30);
854 
855   cx.f = x;
856   cx.u &= ~0x7f800000U;
857   cx.u |= 0x3f000000U;
858 
859   if (__builtin_isinf (x))
860     cx.f = mulsignf (INFINITYf, x);
861   if (x == 0)
862     cx.f = x;
863 
864   return cx.f;
865 }
866 
867 int
xexpfrexpf(float x)868 xexpfrexpf (float x)
869 {
870   union
871   {
872     float f;
873     uint32_t u;
874   } cx;
875 
876   int ret = 0;
877 
878   if (fabsfk (x) < FLT_MIN)
879     {
880       x *= (1 << 30);
881       ret = -30;
882     }
883 
884   cx.f = x;
885   ret += (int32_t) (((cx.u >> 23) & 0xff)) - 0x7e;
886 
887   if (x == 0 || __builtin_isnan (x) || __builtin_isinf (x))
888     ret = 0;
889 
890   return ret;
891 }
892 
893 float
xfmodf(float x,float y)894 xfmodf (float x, float y)
895 {
896   return __builtin_fmodf (x, y);
897 }
898 
899 Sleef_float2
xmodff(float x)900 xmodff (float x)
901 {
902   Sleef_float2 res;
903   float tmp;
904   res.x = __builtin_modff (x, &tmp);
905   res.y = tmp;
906   return res;
907 }
908 
909 float
xlgammaf_u1(float x)910 xlgammaf_u1 (float x)
911 {
912   return __builtin_lgammaf (x);
913 }
914 Sleef_float2
xlgamma_rf_u1(float x)915 xlgamma_rf_u1 (float x)
916 {
917   Sleef_float2 ret;
918   int sign;
919   ret.x = lgammaf_r (x, &sign);
920   ret.y = (sign > 0 ? 1.0f : -1.0f);
921   return ret;
922 }
923 
924 float
xtgammaf_u1(float x)925 xtgammaf_u1 (float x)
926 {
927   return __builtin_tgammaf (x);
928 }
929 float
xerff_u1(float x)930 xerff_u1 (float x)
931 {
932   return __builtin_erff (x);
933 }
934 float
xerfcf_u15(float x)935 xerfcf_u15 (float x)
936 {
937   return __builtin_erfcf (x);
938 }
939