1 2 /******************************************************************************* 3 MIT License 4 ----------- 5 6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy 9 of this Software and associated documentaon files (the "Software"), to deal 10 in the Software without restriction, including without limitation the rights 11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 copies of the Software, and to permit persons to whom the Software is 13 furnished to do so, subject to the following conditions: 14 15 The above copyright notice and this permission notice shall be included in 16 all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 THE SOFTWARE. 25 *******************************************************************************/ 26 27 #include "libm.h" 28 #include "libm_util.h" 29 30 #define USE_SPLITEXP 31 #define USE_SCALEDOUBLE_1 32 #define USE_INFINITY_WITH_FLAGS 33 #define USE_VALF_WITH_FLAGS 34 #define USE_HANDLE_ERRORF 35 #include "libm_inlines.h" 36 #undef USE_SPLITEXP 37 #undef USE_SCALEDOUBLE_1 38 #undef USE_INFINITY_WITH_FLAGS 39 #undef USE_VALF_WITH_FLAGS 40 #undef USE_HANDLE_ERRORF 41 42 #include "libm_errno.h" 43 44 #ifdef _MSC_VER 45 // Disable "C4163: not available as intrinsic function" warning that older 46 // compilers may issue here. 47 #pragma warning(disable:4163) 48 #pragma function(sinhf) 49 #endif 50 51 float sinhf(float fx) 52 { 53 /* 54 After dealing with special cases the computation is split into 55 regions as follows: 56 57 abs(x) >= max_sinh_arg: 58 sinh(x) = sign(x)*Inf 59 60 abs(x) >= small_threshold: 61 sinh(x) = sign(x)*exp(abs(x))/2 computed using the 62 splitexp and scaleDouble functions as for exp_amd(). 63 64 abs(x) < small_threshold: 65 compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) 66 sinh(x) is then sign(x)*z. */ 67 68 static const double 69 /* The max argument of sinhf, but stored as a double */ 70 max_sinh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */ 71 thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ 72 log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ 73 log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ 74 small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; 75 /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ 76 77 /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */ 78 79 static const double sinh_lead[37] = { 80 0.00000000000000000000e+00, /* 0x0000000000000000 */ 81 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */ 82 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */ 83 1.00178749274099008204e+01, /* 0x40240926e70949ad */ 84 2.72899171971277496596e+01, /* 0x403b4a3803703630 */ 85 7.42032105777887522891e+01, /* 0x40528d0166f07374 */ 86 2.01713157370279219549e+02, /* 0x406936d22f67c805 */ 87 5.48316123273246489589e+02, /* 0x408122876ba380c9 */ 88 1.49047882578955000099e+03, /* 0x409749ea514eca65 */ 89 4.05154190208278987484e+03, /* 0x40afa7157430966f */ 90 1.10132328747033916443e+04, /* 0x40c5829dced69991 */ 91 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */ 92 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */ 93 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */ 94 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */ 95 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */ 96 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */ 97 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */ 98 3.28299845686652474105e+07, /* 0x417f4f22091940bb */ 99 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 100 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */ 101 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 102 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 103 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 104 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 105 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 106 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 107 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 108 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 109 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 110 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 111 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 112 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 113 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 114 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 115 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 116 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 117 118 static const double cosh_lead[37] = { 119 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ 120 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */ 121 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */ 122 1.00676619957777653269e+01, /* 0x402422a497d6185e */ 123 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */ 124 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */ 125 2.01715636122455890700e+02, /* 0x406936e67db9b919 */ 126 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */ 127 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */ 128 4.05154202549259389343e+03, /* 0x40afa715845d8894 */ 129 1.10132329201033226127e+04, /* 0x40c5829dd053712d */ 130 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */ 131 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */ 132 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */ 133 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */ 134 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */ 135 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */ 136 1.20774763767876680940e+07, /* 0x416709348c0ea503 */ 137 3.28299845686652623117e+07, /* 0x417f4f22091940bf */ 138 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ 139 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */ 140 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ 141 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ 142 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ 143 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ 144 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ 145 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ 146 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ 147 7.23128532145737548828e+11, /* 0x42650bba3796379a */ 148 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ 149 5.34323729076223046875e+12, /* 0x429370470aec28ec */ 150 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ 151 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ 152 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ 153 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ 154 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ 155 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ 156 157 unsigned long long ux, aux, xneg; 158 double x = fx, y, z, z1, z2; 159 int m; 160 161 /* Special cases */ 162 163 GET_BITS_DP64(x, ux); 164 aux = ux & ~SIGNBIT_DP64; 165 if (aux < 0x3f10000000000000) /* |x| small enough that sinh(x) = x */ 166 { 167 if (aux == 0) 168 /* with no inexact */ 169 return fx; 170 else 171 return valf_with_flags(fx, AMD_F_INEXACT); 172 } 173 else if (aux >= 0x7ff0000000000000) /* |x| is NaN or Inf */ 174 { 175 if (aux > 0x7ff0000000000000) 176 { 177 /* x is NaN */ 178 unsigned int uhx; 179 GET_BITS_SP32(fx, uhx); 180 return _handle_errorf("sinhf", OP_SINH, uhx|0x00400000, _DOMAIN, 181 0, EDOM, fx, 0.0F, 1); 182 } 183 else 184 return fx + fx; 185 } 186 187 xneg = (aux != ux); 188 189 y = x; 190 if (xneg) y = -x; 191 192 if (y >= max_sinh_arg) 193 { 194 /* Return infinity with overflow flag. */ 195 if (xneg) 196 return _handle_errorf("sinhf", OP_SINH, NINFBITPATT_SP32, _OVERFLOW, 197 AMD_F_OVERFLOW, ERANGE, fx, 0.0F, 1); 198 else 199 return _handle_errorf("sinhf", OP_SINH, PINFBITPATT_SP32, _OVERFLOW, 200 AMD_F_OVERFLOW, ERANGE, fx, 0.0F, 1); 201 } 202 else if (y >= small_threshold) 203 { 204 /* In this range y is large enough so that 205 the negative exponential is negligible, 206 so sinh(y) is approximated by sign(x)*exp(y)/2. The 207 code below is an inlined version of that from 208 exp() with two changes (it operates on 209 y instead of x, and the division by 2 is 210 done by reducing m by 1). */ 211 212 splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, 213 log2_by_32_tail, &m, &z1, &z2); 214 m -= 1; 215 /* scaleDouble_1 is always safe because the argument x was 216 float, rather than double */ 217 z = scaleDouble_1((z1+z2),m); 218 } 219 else 220 { 221 /* In this range we find the integer part y0 of y 222 and the increment dy = y - y0. We then compute 223 224 z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) 225 226 where sinh(y0) and cosh(y0) are tabulated above. */ 227 228 int ind; 229 double dy, dy2, sdy, cdy; 230 231 ind = (int)y; 232 dy = y - ind; 233 234 dy2 = dy*dy; 235 236 sdy = dy + dy*dy2*(0.166666666666666667013899e0 + 237 (0.833333333333329931873097e-2 + 238 (0.198412698413242405162014e-3 + 239 (0.275573191913636406057211e-5 + 240 (0.250521176994133472333666e-7 + 241 (0.160576793121939886190847e-9 + 242 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 243 244 cdy = 1 + dy2*(0.500000000000000005911074e0 + 245 (0.416666666666660876512776e-1 + 246 (0.138888888889814854814536e-2 + 247 (0.248015872460622433115785e-4 + 248 (0.275573350756016588011357e-6 + 249 (0.208744349831471353536305e-8 + 250 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 251 252 z = sinh_lead[ind]*cdy + cosh_lead[ind]*sdy; 253 } 254 255 if (xneg) z = - z; 256 return (float)z; 257 } 258