14afb647cSTimo Kreuzer
24afb647cSTimo Kreuzer /*******************************************************************************
34afb647cSTimo Kreuzer MIT License
44afb647cSTimo Kreuzer -----------
54afb647cSTimo Kreuzer
64afb647cSTimo Kreuzer Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
74afb647cSTimo Kreuzer
84afb647cSTimo Kreuzer Permission is hereby granted, free of charge, to any person obtaining a copy
94afb647cSTimo Kreuzer of this Software and associated documentaon files (the "Software"), to deal
104afb647cSTimo Kreuzer in the Software without restriction, including without limitation the rights
114afb647cSTimo Kreuzer to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
124afb647cSTimo Kreuzer copies of the Software, and to permit persons to whom the Software is
134afb647cSTimo Kreuzer furnished to do so, subject to the following conditions:
144afb647cSTimo Kreuzer
154afb647cSTimo Kreuzer The above copyright notice and this permission notice shall be included in
164afb647cSTimo Kreuzer all copies or substantial portions of the Software.
174afb647cSTimo Kreuzer
184afb647cSTimo Kreuzer THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
194afb647cSTimo Kreuzer IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
204afb647cSTimo Kreuzer FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
214afb647cSTimo Kreuzer AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
224afb647cSTimo Kreuzer LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
234afb647cSTimo Kreuzer OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
244afb647cSTimo Kreuzer THE SOFTWARE.
254afb647cSTimo Kreuzer *******************************************************************************/
264afb647cSTimo Kreuzer
274afb647cSTimo Kreuzer #include "libm.h"
284afb647cSTimo Kreuzer #include "libm_util.h"
294afb647cSTimo Kreuzer
304afb647cSTimo Kreuzer #define USE_SPLITEXP
314afb647cSTimo Kreuzer #define USE_SCALEDOUBLE_1
324afb647cSTimo Kreuzer #define USE_SCALEDOUBLE_2
334afb647cSTimo Kreuzer #define USE_INFINITYF_WITH_FLAGS
344afb647cSTimo Kreuzer #define USE_VALF_WITH_FLAGS
354afb647cSTimo Kreuzer #define USE_HANDLE_ERRORF
364afb647cSTimo Kreuzer #include "libm_inlines.h"
374afb647cSTimo Kreuzer #undef USE_SPLITEXP
384afb647cSTimo Kreuzer #undef USE_SCALEDOUBLE_1
394afb647cSTimo Kreuzer #undef USE_SCALEDOUBLE_2
404afb647cSTimo Kreuzer #undef USE_INFINITYF_WITH_FLAGS
414afb647cSTimo Kreuzer #undef USE_VALF_WITH_FLAGS
424afb647cSTimo Kreuzer #undef USE_HANDLE_ERRORF
434afb647cSTimo Kreuzer
44*9e8ed3f8STimo Kreuzer #ifdef _MSC_VER
454afb647cSTimo Kreuzer // Disable "C4163: not available as intrinsic function" warning that older
464afb647cSTimo Kreuzer // compilers may issue here.
474afb647cSTimo Kreuzer #pragma warning(disable:4163)
484afb647cSTimo Kreuzer #pragma function(coshf)
49*9e8ed3f8STimo Kreuzer #endif
504afb647cSTimo Kreuzer
coshf(float fx)514afb647cSTimo Kreuzer float coshf(float fx)
524afb647cSTimo Kreuzer {
534afb647cSTimo Kreuzer /*
544afb647cSTimo Kreuzer After dealing with special cases the computation is split into
554afb647cSTimo Kreuzer regions as follows:
564afb647cSTimo Kreuzer
574afb647cSTimo Kreuzer abs(x) >= max_cosh_arg:
584afb647cSTimo Kreuzer cosh(x) = sign(x)*Inf
594afb647cSTimo Kreuzer
604afb647cSTimo Kreuzer abs(x) >= small_threshold:
614afb647cSTimo Kreuzer cosh(x) = sign(x)*exp(abs(x))/2 computed using the
624afb647cSTimo Kreuzer splitexp and scaleDouble functions as for exp_amd().
634afb647cSTimo Kreuzer
644afb647cSTimo Kreuzer abs(x) < small_threshold:
654afb647cSTimo Kreuzer compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
664afb647cSTimo Kreuzer cosh(x) is then sign(x)*z. */
674afb647cSTimo Kreuzer
684afb647cSTimo Kreuzer static const double
694afb647cSTimo Kreuzer /* The max argument of coshf, but stored as a double */
704afb647cSTimo Kreuzer max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */
714afb647cSTimo Kreuzer thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
724afb647cSTimo Kreuzer log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
734afb647cSTimo Kreuzer log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
744afb647cSTimo Kreuzer
754afb647cSTimo Kreuzer small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
764afb647cSTimo Kreuzer // small_threshold = 20.0;
774afb647cSTimo Kreuzer /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
784afb647cSTimo Kreuzer
794afb647cSTimo Kreuzer /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */
804afb647cSTimo Kreuzer
814afb647cSTimo Kreuzer static const double sinh_lead[ 37] = {
824afb647cSTimo Kreuzer 0.00000000000000000000e+00, /* 0x0000000000000000 */
834afb647cSTimo Kreuzer 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */
844afb647cSTimo Kreuzer 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */
854afb647cSTimo Kreuzer 1.00178749274099008204e+01, /* 0x40240926e70949ad */
864afb647cSTimo Kreuzer 2.72899171971277496596e+01, /* 0x403b4a3803703630 */
874afb647cSTimo Kreuzer 7.42032105777887522891e+01, /* 0x40528d0166f07374 */
884afb647cSTimo Kreuzer 2.01713157370279219549e+02, /* 0x406936d22f67c805 */
894afb647cSTimo Kreuzer 5.48316123273246489589e+02, /* 0x408122876ba380c9 */
904afb647cSTimo Kreuzer 1.49047882578955000099e+03, /* 0x409749ea514eca65 */
914afb647cSTimo Kreuzer 4.05154190208278987484e+03, /* 0x40afa7157430966f */
924afb647cSTimo Kreuzer 1.10132328747033916443e+04, /* 0x40c5829dced69991 */
934afb647cSTimo Kreuzer 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */
944afb647cSTimo Kreuzer 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */
954afb647cSTimo Kreuzer 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */
964afb647cSTimo Kreuzer 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */
974afb647cSTimo Kreuzer 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */
984afb647cSTimo Kreuzer 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */
994afb647cSTimo Kreuzer 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */
1004afb647cSTimo Kreuzer 3.28299845686652474105e+07, /* 0x417f4f22091940bb */
1014afb647cSTimo Kreuzer 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */
1024afb647cSTimo Kreuzer 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */
1034afb647cSTimo Kreuzer 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */
1044afb647cSTimo Kreuzer 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */
1054afb647cSTimo Kreuzer 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */
1064afb647cSTimo Kreuzer 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */
1074afb647cSTimo Kreuzer 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */
1084afb647cSTimo Kreuzer 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */
1094afb647cSTimo Kreuzer 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */
1104afb647cSTimo Kreuzer 7.23128532145737548828e+11, /* 0x42650bba3796379a */
1114afb647cSTimo Kreuzer 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */
1124afb647cSTimo Kreuzer 5.34323729076223046875e+12, /* 0x429370470aec28ec */
1134afb647cSTimo Kreuzer 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */
1144afb647cSTimo Kreuzer 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */
1154afb647cSTimo Kreuzer 1.07321789892958031250e+14, /* 0x42d866f34a725782 */
1164afb647cSTimo Kreuzer 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */
1174afb647cSTimo Kreuzer 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */
1184afb647cSTimo Kreuzer 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
1194afb647cSTimo Kreuzer
1204afb647cSTimo Kreuzer static const double cosh_lead[ 37] = {
1214afb647cSTimo Kreuzer 1.00000000000000000000e+00, /* 0x3ff0000000000000 */
1224afb647cSTimo Kreuzer 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */
1234afb647cSTimo Kreuzer 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */
1244afb647cSTimo Kreuzer 1.00676619957777653269e+01, /* 0x402422a497d6185e */
1254afb647cSTimo Kreuzer 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */
1264afb647cSTimo Kreuzer 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */
1274afb647cSTimo Kreuzer 2.01715636122455890700e+02, /* 0x406936e67db9b919 */
1284afb647cSTimo Kreuzer 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */
1294afb647cSTimo Kreuzer 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */
1304afb647cSTimo Kreuzer 4.05154202549259389343e+03, /* 0x40afa715845d8894 */
1314afb647cSTimo Kreuzer 1.10132329201033226127e+04, /* 0x40c5829dd053712d */
1324afb647cSTimo Kreuzer 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */
1334afb647cSTimo Kreuzer 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */
1344afb647cSTimo Kreuzer 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */
1354afb647cSTimo Kreuzer 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */
1364afb647cSTimo Kreuzer 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */
1374afb647cSTimo Kreuzer 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */
1384afb647cSTimo Kreuzer 1.20774763767876680940e+07, /* 0x416709348c0ea503 */
1394afb647cSTimo Kreuzer 3.28299845686652623117e+07, /* 0x417f4f22091940bf */
1404afb647cSTimo Kreuzer 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */
1414afb647cSTimo Kreuzer 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */
1424afb647cSTimo Kreuzer 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */
1434afb647cSTimo Kreuzer 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */
1444afb647cSTimo Kreuzer 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */
1454afb647cSTimo Kreuzer 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */
1464afb647cSTimo Kreuzer 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */
1474afb647cSTimo Kreuzer 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */
1484afb647cSTimo Kreuzer 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */
1494afb647cSTimo Kreuzer 7.23128532145737548828e+11, /* 0x42650bba3796379a */
1504afb647cSTimo Kreuzer 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */
1514afb647cSTimo Kreuzer 5.34323729076223046875e+12, /* 0x429370470aec28ec */
1524afb647cSTimo Kreuzer 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */
1534afb647cSTimo Kreuzer 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */
1544afb647cSTimo Kreuzer 1.07321789892958031250e+14, /* 0x42d866f34a725782 */
1554afb647cSTimo Kreuzer 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */
1564afb647cSTimo Kreuzer 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */
1574afb647cSTimo Kreuzer 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
1584afb647cSTimo Kreuzer
159*9e8ed3f8STimo Kreuzer unsigned long long ux, aux, xneg;
1604afb647cSTimo Kreuzer unsigned int uhx;
1614afb647cSTimo Kreuzer double x = fx, y, z, z1, z2;
1624afb647cSTimo Kreuzer int m;
1634afb647cSTimo Kreuzer
1644afb647cSTimo Kreuzer /* Special cases */
1654afb647cSTimo Kreuzer
1664afb647cSTimo Kreuzer GET_BITS_DP64(x, ux);
1674afb647cSTimo Kreuzer aux = ux & ~SIGNBIT_DP64;
1684afb647cSTimo Kreuzer if (aux < 0x3f10000000000000) /* |x| small enough that cosh(x) = 1 */
1694afb647cSTimo Kreuzer {
1704afb647cSTimo Kreuzer if (aux == 0) return (float)1.0; /* with no inexact */
1714afb647cSTimo Kreuzer if (LAMBDA_DP64 + x > 1.0) return valf_with_flags((float)1.0, AMD_F_INEXACT); /* with inexact */
1724afb647cSTimo Kreuzer }
1734afb647cSTimo Kreuzer else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */
174*9e8ed3f8STimo Kreuzer {
1754afb647cSTimo Kreuzer if (aux > PINFBITPATT_DP64) /* x is NaN */
1764afb647cSTimo Kreuzer {
1774afb647cSTimo Kreuzer GET_BITS_SP32(fx, uhx);
1784afb647cSTimo Kreuzer return _handle_errorf("coshf",OP_COSH,uhx|0x00400000,_DOMAIN, 0,
1794afb647cSTimo Kreuzer EDOM, fx, 0.0, 1);
1804afb647cSTimo Kreuzer }
1814afb647cSTimo Kreuzer else /* x is infinity */
1824afb647cSTimo Kreuzer return infinityf_with_flags(0);
183*9e8ed3f8STimo Kreuzer }
1844afb647cSTimo Kreuzer xneg = (aux != ux);
1854afb647cSTimo Kreuzer
1864afb647cSTimo Kreuzer y = x;
1874afb647cSTimo Kreuzer if (xneg) y = -x;
1884afb647cSTimo Kreuzer
1894afb647cSTimo Kreuzer if (y >= max_cosh_arg)
1904afb647cSTimo Kreuzer /* Return +infinity with overflow flag. */
1914afb647cSTimo Kreuzer return _handle_errorf("coshf",OP_COSH,PINFBITPATT_SP32,_OVERFLOW,
1924afb647cSTimo Kreuzer AMD_F_INEXACT|AMD_F_OVERFLOW,ERANGE, fx, 0.0, 1);
1934afb647cSTimo Kreuzer // z = infinity_with_flags(AMD_F_OVERFLOW);
1944afb647cSTimo Kreuzer else if (y >= small_threshold)
1954afb647cSTimo Kreuzer {
1964afb647cSTimo Kreuzer /* In this range y is large enough so that
1974afb647cSTimo Kreuzer the negative exponential is negligible,
1984afb647cSTimo Kreuzer so cosh(y) is approximated by sign(x)*exp(y)/2. The
1994afb647cSTimo Kreuzer code below is an inlined version of that from
2004afb647cSTimo Kreuzer exp() with two changes (it operates on
2014afb647cSTimo Kreuzer y instead of x, and the division by 2 is
2024afb647cSTimo Kreuzer done by reducing m by 1). */
2034afb647cSTimo Kreuzer
2044afb647cSTimo Kreuzer splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
2054afb647cSTimo Kreuzer log2_by_32_tail, &m, &z1, &z2);
2064afb647cSTimo Kreuzer m -= 1;
2074afb647cSTimo Kreuzer
2084afb647cSTimo Kreuzer /* scaleDouble_1 is always safe because the argument x was
2094afb647cSTimo Kreuzer float, rather than double */
2104afb647cSTimo Kreuzer z = scaleDouble_1((z1+z2),m);
2114afb647cSTimo Kreuzer }
2124afb647cSTimo Kreuzer else
2134afb647cSTimo Kreuzer {
2144afb647cSTimo Kreuzer /* In this range we find the integer part y0 of y
2154afb647cSTimo Kreuzer and the increment dy = y - y0. We then compute
2164afb647cSTimo Kreuzer
2174afb647cSTimo Kreuzer z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
2184afb647cSTimo Kreuzer z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
2194afb647cSTimo Kreuzer
2204afb647cSTimo Kreuzer where sinh(y0) and cosh(y0) are tabulated above. */
2214afb647cSTimo Kreuzer
2224afb647cSTimo Kreuzer int ind;
2234afb647cSTimo Kreuzer double dy, dy2, sdy, cdy;
2244afb647cSTimo Kreuzer
2254afb647cSTimo Kreuzer ind = (int)y;
2264afb647cSTimo Kreuzer dy = y - ind;
2274afb647cSTimo Kreuzer
2284afb647cSTimo Kreuzer dy2 = dy*dy;
2294afb647cSTimo Kreuzer
2304afb647cSTimo Kreuzer sdy = dy + dy*dy2*(0.166666666666666667013899e0 +
2314afb647cSTimo Kreuzer (0.833333333333329931873097e-2 +
2324afb647cSTimo Kreuzer (0.198412698413242405162014e-3 +
2334afb647cSTimo Kreuzer (0.275573191913636406057211e-5 +
2344afb647cSTimo Kreuzer (0.250521176994133472333666e-7 +
2354afb647cSTimo Kreuzer (0.160576793121939886190847e-9 +
2364afb647cSTimo Kreuzer 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
2374afb647cSTimo Kreuzer
2384afb647cSTimo Kreuzer cdy = 1 + dy2*(0.500000000000000005911074e0 +
2394afb647cSTimo Kreuzer (0.416666666666660876512776e-1 +
2404afb647cSTimo Kreuzer (0.138888888889814854814536e-2 +
2414afb647cSTimo Kreuzer (0.248015872460622433115785e-4 +
2424afb647cSTimo Kreuzer (0.275573350756016588011357e-6 +
2434afb647cSTimo Kreuzer (0.208744349831471353536305e-8 +
2444afb647cSTimo Kreuzer 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
2454afb647cSTimo Kreuzer
2464afb647cSTimo Kreuzer z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy;
2474afb647cSTimo Kreuzer }
2484afb647cSTimo Kreuzer
2494afb647cSTimo Kreuzer // if (xneg) z = - z;
2504afb647cSTimo Kreuzer return (float)z;
2514afb647cSTimo Kreuzer }
252