xref: /reactos/sdk/lib/crt/math/libm_sse2/coshf.c (revision 9e8ed3f8)
14afb647cSTimo Kreuzer 
24afb647cSTimo Kreuzer /*******************************************************************************
34afb647cSTimo Kreuzer MIT License
44afb647cSTimo Kreuzer -----------
54afb647cSTimo Kreuzer 
64afb647cSTimo Kreuzer Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
74afb647cSTimo Kreuzer 
84afb647cSTimo Kreuzer Permission is hereby granted, free of charge, to any person obtaining a copy
94afb647cSTimo Kreuzer of this Software and associated documentaon files (the "Software"), to deal
104afb647cSTimo Kreuzer in the Software without restriction, including without limitation the rights
114afb647cSTimo Kreuzer to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
124afb647cSTimo Kreuzer copies of the Software, and to permit persons to whom the Software is
134afb647cSTimo Kreuzer furnished to do so, subject to the following conditions:
144afb647cSTimo Kreuzer 
154afb647cSTimo Kreuzer The above copyright notice and this permission notice shall be included in
164afb647cSTimo Kreuzer all copies or substantial portions of the Software.
174afb647cSTimo Kreuzer 
184afb647cSTimo Kreuzer THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
194afb647cSTimo Kreuzer IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
204afb647cSTimo Kreuzer FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
214afb647cSTimo Kreuzer AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
224afb647cSTimo Kreuzer LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
234afb647cSTimo Kreuzer OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
244afb647cSTimo Kreuzer THE SOFTWARE.
254afb647cSTimo Kreuzer *******************************************************************************/
264afb647cSTimo Kreuzer 
274afb647cSTimo Kreuzer #include "libm.h"
284afb647cSTimo Kreuzer #include "libm_util.h"
294afb647cSTimo Kreuzer 
304afb647cSTimo Kreuzer #define USE_SPLITEXP
314afb647cSTimo Kreuzer #define USE_SCALEDOUBLE_1
324afb647cSTimo Kreuzer #define USE_SCALEDOUBLE_2
334afb647cSTimo Kreuzer #define USE_INFINITYF_WITH_FLAGS
344afb647cSTimo Kreuzer #define USE_VALF_WITH_FLAGS
354afb647cSTimo Kreuzer #define USE_HANDLE_ERRORF
364afb647cSTimo Kreuzer #include "libm_inlines.h"
374afb647cSTimo Kreuzer #undef USE_SPLITEXP
384afb647cSTimo Kreuzer #undef USE_SCALEDOUBLE_1
394afb647cSTimo Kreuzer #undef USE_SCALEDOUBLE_2
404afb647cSTimo Kreuzer #undef USE_INFINITYF_WITH_FLAGS
414afb647cSTimo Kreuzer #undef USE_VALF_WITH_FLAGS
424afb647cSTimo Kreuzer #undef USE_HANDLE_ERRORF
434afb647cSTimo Kreuzer 
44*9e8ed3f8STimo Kreuzer #ifdef _MSC_VER
454afb647cSTimo Kreuzer // Disable "C4163: not available as intrinsic function" warning that older
464afb647cSTimo Kreuzer // compilers may issue here.
474afb647cSTimo Kreuzer #pragma warning(disable:4163)
484afb647cSTimo Kreuzer #pragma function(coshf)
49*9e8ed3f8STimo Kreuzer #endif
504afb647cSTimo Kreuzer 
coshf(float fx)514afb647cSTimo Kreuzer float coshf(float fx)
524afb647cSTimo Kreuzer {
534afb647cSTimo Kreuzer   /*
544afb647cSTimo Kreuzer     After dealing with special cases the computation is split into
554afb647cSTimo Kreuzer     regions as follows:
564afb647cSTimo Kreuzer 
574afb647cSTimo Kreuzer     abs(x) >= max_cosh_arg:
584afb647cSTimo Kreuzer     cosh(x) = sign(x)*Inf
594afb647cSTimo Kreuzer 
604afb647cSTimo Kreuzer     abs(x) >= small_threshold:
614afb647cSTimo Kreuzer     cosh(x) = sign(x)*exp(abs(x))/2 computed using the
624afb647cSTimo Kreuzer     splitexp and scaleDouble functions as for exp_amd().
634afb647cSTimo Kreuzer 
644afb647cSTimo Kreuzer     abs(x) < small_threshold:
654afb647cSTimo Kreuzer     compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
664afb647cSTimo Kreuzer     cosh(x) is then sign(x)*z.                             */
674afb647cSTimo Kreuzer 
684afb647cSTimo Kreuzer   static const double
694afb647cSTimo Kreuzer     /* The max argument of coshf, but stored as a double */
704afb647cSTimo Kreuzer     max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */
714afb647cSTimo Kreuzer     thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
724afb647cSTimo Kreuzer     log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
734afb647cSTimo Kreuzer     log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
744afb647cSTimo Kreuzer 
754afb647cSTimo Kreuzer     small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
764afb647cSTimo Kreuzer //    small_threshold = 20.0;
774afb647cSTimo Kreuzer   /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
784afb647cSTimo Kreuzer 
794afb647cSTimo Kreuzer   /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */
804afb647cSTimo Kreuzer 
814afb647cSTimo Kreuzer   static const double sinh_lead[   37] = {
824afb647cSTimo Kreuzer     0.00000000000000000000e+00,  /* 0x0000000000000000 */
834afb647cSTimo Kreuzer     1.17520119364380137839e+00,  /* 0x3ff2cd9fc44eb982 */
844afb647cSTimo Kreuzer     3.62686040784701857476e+00,  /* 0x400d03cf63b6e19f */
854afb647cSTimo Kreuzer     1.00178749274099008204e+01,  /* 0x40240926e70949ad */
864afb647cSTimo Kreuzer     2.72899171971277496596e+01,  /* 0x403b4a3803703630 */
874afb647cSTimo Kreuzer     7.42032105777887522891e+01,  /* 0x40528d0166f07374 */
884afb647cSTimo Kreuzer     2.01713157370279219549e+02,  /* 0x406936d22f67c805 */
894afb647cSTimo Kreuzer     5.48316123273246489589e+02,  /* 0x408122876ba380c9 */
904afb647cSTimo Kreuzer     1.49047882578955000099e+03,  /* 0x409749ea514eca65 */
914afb647cSTimo Kreuzer     4.05154190208278987484e+03,  /* 0x40afa7157430966f */
924afb647cSTimo Kreuzer     1.10132328747033916443e+04,  /* 0x40c5829dced69991 */
934afb647cSTimo Kreuzer     2.99370708492480553105e+04,  /* 0x40dd3c4488cb48d6 */
944afb647cSTimo Kreuzer     8.13773957064298447222e+04,  /* 0x40f3de1654d043f0 */
954afb647cSTimo Kreuzer     2.21206696003330085659e+05,  /* 0x410b00b5916a31a5 */
964afb647cSTimo Kreuzer     6.01302142081972560845e+05,  /* 0x412259ac48bef7e3 */
974afb647cSTimo Kreuzer     1.63450868623590236530e+06,  /* 0x4138f0ccafad27f6 */
984afb647cSTimo Kreuzer     4.44305526025387924165e+06,  /* 0x4150f2ebd0a7ffe3 */
994afb647cSTimo Kreuzer     1.20774763767876271158e+07,  /* 0x416709348c0ea4ed */
1004afb647cSTimo Kreuzer     3.28299845686652474105e+07,  /* 0x417f4f22091940bb */
1014afb647cSTimo Kreuzer     8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
1024afb647cSTimo Kreuzer     2.42582597704895108938e+08,  /* 0x41aceb088b68e803 */
1034afb647cSTimo Kreuzer     6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
1044afb647cSTimo Kreuzer     1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
1054afb647cSTimo Kreuzer     4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
1064afb647cSTimo Kreuzer     1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
1074afb647cSTimo Kreuzer     3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
1084afb647cSTimo Kreuzer     9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
1094afb647cSTimo Kreuzer     2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
1104afb647cSTimo Kreuzer     7.23128532145737548828e+11,  /* 0x42650bba3796379a */
1114afb647cSTimo Kreuzer     1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
1124afb647cSTimo Kreuzer     5.34323729076223046875e+12,  /* 0x429370470aec28ec */
1134afb647cSTimo Kreuzer     1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
1144afb647cSTimo Kreuzer     3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
1154afb647cSTimo Kreuzer     1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
1164afb647cSTimo Kreuzer     2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
1174afb647cSTimo Kreuzer     7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
1184afb647cSTimo Kreuzer     2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
1194afb647cSTimo Kreuzer 
1204afb647cSTimo Kreuzer   static const double cosh_lead[   37] = {
1214afb647cSTimo Kreuzer     1.00000000000000000000e+00,  /* 0x3ff0000000000000 */
1224afb647cSTimo Kreuzer     1.54308063481524371241e+00,  /* 0x3ff8b07551d9f550 */
1234afb647cSTimo Kreuzer     3.76219569108363138810e+00,  /* 0x400e18fa0df2d9bc */
1244afb647cSTimo Kreuzer     1.00676619957777653269e+01,  /* 0x402422a497d6185e */
1254afb647cSTimo Kreuzer     2.73082328360164865444e+01,  /* 0x403b4ee858de3e80 */
1264afb647cSTimo Kreuzer     7.42099485247878334349e+01,  /* 0x40528d6fcbeff3a9 */
1274afb647cSTimo Kreuzer     2.01715636122455890700e+02,  /* 0x406936e67db9b919 */
1284afb647cSTimo Kreuzer     5.48317035155212010977e+02,  /* 0x4081228949ba3a8b */
1294afb647cSTimo Kreuzer     1.49047916125217807348e+03,  /* 0x409749eaa93f4e76 */
1304afb647cSTimo Kreuzer     4.05154202549259389343e+03,  /* 0x40afa715845d8894 */
1314afb647cSTimo Kreuzer     1.10132329201033226127e+04,  /* 0x40c5829dd053712d */
1324afb647cSTimo Kreuzer     2.99370708659497577173e+04,  /* 0x40dd3c4489115627 */
1334afb647cSTimo Kreuzer     8.13773957125740562333e+04,  /* 0x40f3de1654d6b543 */
1344afb647cSTimo Kreuzer     2.21206696005590405548e+05,  /* 0x410b00b5916b6105 */
1354afb647cSTimo Kreuzer     6.01302142082804115489e+05,  /* 0x412259ac48bf13ca */
1364afb647cSTimo Kreuzer     1.63450868623620807193e+06,  /* 0x4138f0ccafad2d17 */
1374afb647cSTimo Kreuzer     4.44305526025399193168e+06,  /* 0x4150f2ebd0a8005c */
1384afb647cSTimo Kreuzer     1.20774763767876680940e+07,  /* 0x416709348c0ea503 */
1394afb647cSTimo Kreuzer     3.28299845686652623117e+07,  /* 0x417f4f22091940bf */
1404afb647cSTimo Kreuzer     8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
1414afb647cSTimo Kreuzer     2.42582597704895138741e+08,  /* 0x41aceb088b68e804 */
1424afb647cSTimo Kreuzer     6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
1434afb647cSTimo Kreuzer     1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
1444afb647cSTimo Kreuzer     4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
1454afb647cSTimo Kreuzer     1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
1464afb647cSTimo Kreuzer     3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
1474afb647cSTimo Kreuzer     9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
1484afb647cSTimo Kreuzer     2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
1494afb647cSTimo Kreuzer     7.23128532145737548828e+11,  /* 0x42650bba3796379a */
1504afb647cSTimo Kreuzer     1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
1514afb647cSTimo Kreuzer     5.34323729076223046875e+12,  /* 0x429370470aec28ec */
1524afb647cSTimo Kreuzer     1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
1534afb647cSTimo Kreuzer     3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
1544afb647cSTimo Kreuzer     1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
1554afb647cSTimo Kreuzer     2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
1564afb647cSTimo Kreuzer     7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
1574afb647cSTimo Kreuzer     2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
1584afb647cSTimo Kreuzer 
159*9e8ed3f8STimo Kreuzer   unsigned long long ux, aux, xneg;
1604afb647cSTimo Kreuzer   unsigned int uhx;
1614afb647cSTimo Kreuzer   double x = fx, y, z, z1, z2;
1624afb647cSTimo Kreuzer   int m;
1634afb647cSTimo Kreuzer 
1644afb647cSTimo Kreuzer   /* Special cases */
1654afb647cSTimo Kreuzer 
1664afb647cSTimo Kreuzer   GET_BITS_DP64(x, ux);
1674afb647cSTimo Kreuzer   aux = ux & ~SIGNBIT_DP64;
1684afb647cSTimo Kreuzer   if (aux < 0x3f10000000000000) /* |x| small enough that cosh(x) = 1 */
1694afb647cSTimo Kreuzer     {
1704afb647cSTimo Kreuzer       if (aux == 0) return (float)1.0; /* with no inexact */
1714afb647cSTimo Kreuzer       if (LAMBDA_DP64 + x  > 1.0) return valf_with_flags((float)1.0, AMD_F_INEXACT); /* with inexact */
1724afb647cSTimo Kreuzer     }
1734afb647cSTimo Kreuzer   else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */
174*9e8ed3f8STimo Kreuzer     {
1754afb647cSTimo Kreuzer       if (aux > PINFBITPATT_DP64) /* x is NaN */
1764afb647cSTimo Kreuzer       {
1774afb647cSTimo Kreuzer         GET_BITS_SP32(fx, uhx);
1784afb647cSTimo Kreuzer         return _handle_errorf("coshf",OP_COSH,uhx|0x00400000,_DOMAIN, 0,
1794afb647cSTimo Kreuzer                         EDOM, fx, 0.0, 1);
1804afb647cSTimo Kreuzer       }
1814afb647cSTimo Kreuzer       else     /* x is infinity */
1824afb647cSTimo Kreuzer         return infinityf_with_flags(0);
183*9e8ed3f8STimo Kreuzer     }
1844afb647cSTimo Kreuzer   xneg = (aux != ux);
1854afb647cSTimo Kreuzer 
1864afb647cSTimo Kreuzer   y = x;
1874afb647cSTimo Kreuzer   if (xneg) y = -x;
1884afb647cSTimo Kreuzer 
1894afb647cSTimo Kreuzer   if (y >= max_cosh_arg)
1904afb647cSTimo Kreuzer     /* Return +infinity with overflow flag. */
1914afb647cSTimo Kreuzer          return _handle_errorf("coshf",OP_COSH,PINFBITPATT_SP32,_OVERFLOW,
1924afb647cSTimo Kreuzer                         AMD_F_INEXACT|AMD_F_OVERFLOW,ERANGE, fx, 0.0, 1);
1934afb647cSTimo Kreuzer //    z = infinity_with_flags(AMD_F_OVERFLOW);
1944afb647cSTimo Kreuzer   else if (y >= small_threshold)
1954afb647cSTimo Kreuzer     {
1964afb647cSTimo Kreuzer       /* In this range y is large enough so that
1974afb647cSTimo Kreuzer          the negative exponential is negligible,
1984afb647cSTimo Kreuzer          so cosh(y) is approximated by sign(x)*exp(y)/2. The
1994afb647cSTimo Kreuzer          code below is an inlined version of that from
2004afb647cSTimo Kreuzer          exp() with two changes (it operates on
2014afb647cSTimo Kreuzer          y instead of x, and the division by 2 is
2024afb647cSTimo Kreuzer          done by reducing m by 1). */
2034afb647cSTimo Kreuzer 
2044afb647cSTimo Kreuzer       splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
2054afb647cSTimo Kreuzer                log2_by_32_tail, &m, &z1, &z2);
2064afb647cSTimo Kreuzer       m -= 1;
2074afb647cSTimo Kreuzer 
2084afb647cSTimo Kreuzer       /* scaleDouble_1 is always safe because the argument x was
2094afb647cSTimo Kreuzer          float, rather than double */
2104afb647cSTimo Kreuzer       z = scaleDouble_1((z1+z2),m);
2114afb647cSTimo Kreuzer     }
2124afb647cSTimo Kreuzer   else
2134afb647cSTimo Kreuzer     {
2144afb647cSTimo Kreuzer       /* In this range we find the integer part y0 of y
2154afb647cSTimo Kreuzer          and the increment dy = y - y0. We then compute
2164afb647cSTimo Kreuzer 
2174afb647cSTimo Kreuzer          z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
2184afb647cSTimo Kreuzer          z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
2194afb647cSTimo Kreuzer 
2204afb647cSTimo Kreuzer          where sinh(y0) and cosh(y0) are tabulated above. */
2214afb647cSTimo Kreuzer 
2224afb647cSTimo Kreuzer       int ind;
2234afb647cSTimo Kreuzer       double dy, dy2, sdy, cdy;
2244afb647cSTimo Kreuzer 
2254afb647cSTimo Kreuzer       ind = (int)y;
2264afb647cSTimo Kreuzer       dy = y - ind;
2274afb647cSTimo Kreuzer 
2284afb647cSTimo Kreuzer       dy2 = dy*dy;
2294afb647cSTimo Kreuzer 
2304afb647cSTimo Kreuzer       sdy = dy + dy*dy2*(0.166666666666666667013899e0 +
2314afb647cSTimo Kreuzer 			 (0.833333333333329931873097e-2 +
2324afb647cSTimo Kreuzer 			  (0.198412698413242405162014e-3 +
2334afb647cSTimo Kreuzer 			   (0.275573191913636406057211e-5 +
2344afb647cSTimo Kreuzer 			    (0.250521176994133472333666e-7 +
2354afb647cSTimo Kreuzer 			     (0.160576793121939886190847e-9 +
2364afb647cSTimo Kreuzer 			      0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
2374afb647cSTimo Kreuzer 
2384afb647cSTimo Kreuzer       cdy = 1 + dy2*(0.500000000000000005911074e0 +
2394afb647cSTimo Kreuzer 		     (0.416666666666660876512776e-1 +
2404afb647cSTimo Kreuzer 		      (0.138888888889814854814536e-2 +
2414afb647cSTimo Kreuzer 		       (0.248015872460622433115785e-4 +
2424afb647cSTimo Kreuzer 			(0.275573350756016588011357e-6 +
2434afb647cSTimo Kreuzer 			 (0.208744349831471353536305e-8 +
2444afb647cSTimo Kreuzer 			  0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
2454afb647cSTimo Kreuzer 
2464afb647cSTimo Kreuzer       z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy;
2474afb647cSTimo Kreuzer     }
2484afb647cSTimo Kreuzer 
2494afb647cSTimo Kreuzer //  if (xneg) z = - z;
2504afb647cSTimo Kreuzer   return (float)z;
2514afb647cSTimo Kreuzer }
252