1 2 /******************************************************************************* 3 MIT License 4 ----------- 5 6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy 9 of this Software and associated documentaon files (the "Software"), to deal 10 in the Software without restriction, including without limitation the rights 11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 copies of the Software, and to permit persons to whom the Software is 13 furnished to do so, subject to the following conditions: 14 15 The above copyright notice and this permission notice shall be included in 16 all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 THE SOFTWARE. 25 *******************************************************************************/ 26 27 #include "libm.h" 28 #include "libm_util.h" 29 30 #define USE_SPLITEXP 31 #define USE_SCALEDOUBLE_1 32 #define USE_SCALEDOUBLE_2 33 #define USE_INFINITYF_WITH_FLAGS 34 #define USE_VALF_WITH_FLAGS 35 #define USE_HANDLE_ERRORF 36 #include "libm_inlines.h" 37 #undef USE_SPLITEXP 38 #undef USE_SCALEDOUBLE_1 39 #undef USE_SCALEDOUBLE_2 40 #undef USE_INFINITYF_WITH_FLAGS 41 #undef USE_VALF_WITH_FLAGS 42 #undef USE_HANDLE_ERRORF 43 44 #ifdef _MSC_VER 45 // Disable "C4163: not available as intrinsic function" warning that older 46 // compilers may issue here. 47 #pragma warning(disable:4163) 48 #pragma function(coshf) 49 #endif 50 51 float coshf(float fx) 52 { 53 /* 54 After dealing with special cases the computation is split into 55 regions as follows: 56 57 abs(x) >= max_cosh_arg: 58 cosh(x) = sign(x)*Inf 59 60 abs(x) >= small_threshold: 61 cosh(x) = sign(x)*exp(abs(x))/2 computed using the 62 splitexp and scaleDouble functions as for exp_amd(). 63 64 abs(x) < small_threshold: 65 compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) 66 cosh(x) is then sign(x)*z. */ 67 68 static const double 69 /* The max argument of coshf, but stored as a double */ 70 max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */ 71 thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ 72 log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ 73 log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ 74 75 small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; 76 // small_threshold = 20.0; 77 /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ 78 79 /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */ 80 81 static const double sinh_lead[ 37] = { 82 0.00000000000000000000e+00, /* 0x0000000000000000 */ 83 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */ 84 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */ 85 1.00178749274099008204e+01, /* 0x40240926e70949ad */ 86 2.72899171971277496596e+01, /* 0x403b4a3803703630 */ 87 7.42032105777887522891e+01, /* 0x40528d0166f07374 */ 88 2.01713157370279219549e+02, /* 0x406936d22f67c805 */ 89 5.48316123273246489589e+02, /* 0x408122876ba380c9 */ 90 1.49047882578955000099e+03, /* 0x409749ea514eca65 */ 91 4.05154190208278987484e+03, /* 0x40afa7157430966f */ 92 1.10132328747033916443e+04, /* 0x40c5829dced69991 */ 93 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */ 94 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */ 95 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */ 96 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */ 97 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */ 98 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */ 99 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */ 100 3.28299845686652474105e+07, /* 0x417f4f22091940bb */ 101 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 102 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */ 103 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 104 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 105 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 106 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 107 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 108 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 109 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 110 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 111 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 112 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 113 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 114 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 115 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 116 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 117 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 118 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 119 120 static const double cosh_lead[ 37] = { 121 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ 122 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */ 123 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */ 124 1.00676619957777653269e+01, /* 0x402422a497d6185e */ 125 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */ 126 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */ 127 2.01715636122455890700e+02, /* 0x406936e67db9b919 */ 128 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */ 129 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */ 130 4.05154202549259389343e+03, /* 0x40afa715845d8894 */ 131 1.10132329201033226127e+04, /* 0x40c5829dd053712d */ 132 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */ 133 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */ 134 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */ 135 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */ 136 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */ 137 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */ 138 1.20774763767876680940e+07, /* 0x416709348c0ea503 */ 139 3.28299845686652623117e+07, /* 0x417f4f22091940bf */ 140 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 141 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */ 142 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 143 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 144 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 145 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 146 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 147 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 148 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 149 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 150 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 151 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 152 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 153 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 154 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 155 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 156 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 157 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 158 159 unsigned long long ux, aux, xneg; 160 unsigned int uhx; 161 double x = fx, y, z, z1, z2; 162 int m; 163 164 /* Special cases */ 165 166 GET_BITS_DP64(x, ux); 167 aux = ux & ~SIGNBIT_DP64; 168 if (aux < 0x3f10000000000000) /* |x| small enough that cosh(x) = 1 */ 169 { 170 if (aux == 0) return (float)1.0; /* with no inexact */ 171 if (LAMBDA_DP64 + x > 1.0) return valf_with_flags((float)1.0, AMD_F_INEXACT); /* with inexact */ 172 } 173 else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */ 174 { 175 if (aux > PINFBITPATT_DP64) /* x is NaN */ 176 { 177 GET_BITS_SP32(fx, uhx); 178 return _handle_errorf("coshf",OP_COSH,uhx|0x00400000,_DOMAIN, 0, 179 EDOM, fx, 0.0, 1); 180 } 181 else /* x is infinity */ 182 return infinityf_with_flags(0); 183 } 184 xneg = (aux != ux); 185 186 y = x; 187 if (xneg) y = -x; 188 189 if (y >= max_cosh_arg) 190 /* Return +infinity with overflow flag. */ 191 return _handle_errorf("coshf",OP_COSH,PINFBITPATT_SP32,_OVERFLOW, 192 AMD_F_INEXACT|AMD_F_OVERFLOW,ERANGE, fx, 0.0, 1); 193 // z = infinity_with_flags(AMD_F_OVERFLOW); 194 else if (y >= small_threshold) 195 { 196 /* In this range y is large enough so that 197 the negative exponential is negligible, 198 so cosh(y) is approximated by sign(x)*exp(y)/2. The 199 code below is an inlined version of that from 200 exp() with two changes (it operates on 201 y instead of x, and the division by 2 is 202 done by reducing m by 1). */ 203 204 splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, 205 log2_by_32_tail, &m, &z1, &z2); 206 m -= 1; 207 208 /* scaleDouble_1 is always safe because the argument x was 209 float, rather than double */ 210 z = scaleDouble_1((z1+z2),m); 211 } 212 else 213 { 214 /* In this range we find the integer part y0 of y 215 and the increment dy = y - y0. We then compute 216 217 z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) 218 z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy) 219 220 where sinh(y0) and cosh(y0) are tabulated above. */ 221 222 int ind; 223 double dy, dy2, sdy, cdy; 224 225 ind = (int)y; 226 dy = y - ind; 227 228 dy2 = dy*dy; 229 230 sdy = dy + dy*dy2*(0.166666666666666667013899e0 + 231 (0.833333333333329931873097e-2 + 232 (0.198412698413242405162014e-3 + 233 (0.275573191913636406057211e-5 + 234 (0.250521176994133472333666e-7 + 235 (0.160576793121939886190847e-9 + 236 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 237 238 cdy = 1 + dy2*(0.500000000000000005911074e0 + 239 (0.416666666666660876512776e-1 + 240 (0.138888888889814854814536e-2 + 241 (0.248015872460622433115785e-4 + 242 (0.275573350756016588011357e-6 + 243 (0.208744349831471353536305e-8 + 244 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 245 246 z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy; 247 } 248 249 // if (xneg) z = - z; 250 return (float)z; 251 } 252