1 2 /******************************************************************************* 3 MIT License 4 ----------- 5 6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy 9 of this Software and associated documentaon files (the "Software"), to deal 10 in the Software without restriction, including without limitation the rights 11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 copies of the Software, and to permit persons to whom the Software is 13 furnished to do so, subject to the following conditions: 14 15 The above copyright notice and this permission notice shall be included in 16 all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 THE SOFTWARE. 25 *******************************************************************************/ 26 27 #include "libm.h" 28 #include "libm_util.h" 29 30 #define USE_SPLITEXP 31 #define USE_SCALEDOUBLE_1 32 #define USE_SCALEDOUBLE_2 33 #define USE_INFINITY_WITH_FLAGS 34 #define USE_VAL_WITH_FLAGS 35 #define USE_HANDLE_ERROR 36 #include "libm_inlines.h" 37 #undef USE_SPLITEXP 38 #undef USE_SCALEDOUBLE_1 39 #undef USE_SCALEDOUBLE_2 40 #undef USE_INFINITY_WITH_FLAGS 41 #undef USE_VAL_WITH_FLAGS 42 #undef USE_HANDLE_ERROR 43 44 #ifdef _MSC_VER 45 #pragma function(cosh) 46 #endif 47 48 double cosh(double x) 49 { 50 /* 51 Derived from sinh subroutine 52 53 After dealing with special cases the computation is split into 54 regions as follows: 55 56 abs(x) >= max_cosh_arg: 57 cosh(x) = sign(x)*Inf 58 59 abs(x) >= small_threshold: 60 cosh(x) = sign(x)*exp(abs(x))/2 computed using the 61 splitexp and scaleDouble functions as for exp_amd(). 62 63 abs(x) < small_threshold: 64 compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) 65 cosh(x) is then sign(x)*z. */ 66 67 static const double 68 max_cosh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */ 69 thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ 70 log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ 71 log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ 72 // small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; 73 small_threshold = 20.0; 74 /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ 75 76 /* Lead and tail tabulated values of sinh(i) and cosh(i) 77 for i = 0,...,36. The lead part has 26 leading bits. */ 78 79 static const double sinh_lead[ 37] = { 80 0.00000000000000000000e+00, /* 0x0000000000000000 */ 81 1.17520117759704589844e+00, /* 0x3ff2cd9fc0000000 */ 82 3.62686038017272949219e+00, /* 0x400d03cf60000000 */ 83 1.00178747177124023438e+01, /* 0x40240926e0000000 */ 84 2.72899169921875000000e+01, /* 0x403b4a3800000000 */ 85 7.42032089233398437500e+01, /* 0x40528d0160000000 */ 86 2.01713153839111328125e+02, /* 0x406936d228000000 */ 87 5.48316116333007812500e+02, /* 0x4081228768000000 */ 88 1.49047882080078125000e+03, /* 0x409749ea50000000 */ 89 4.05154187011718750000e+03, /* 0x40afa71570000000 */ 90 1.10132326660156250000e+04, /* 0x40c5829dc8000000 */ 91 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ 92 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ 93 2.21206695312500000000e+05, /* 0x410b00b590000000 */ 94 6.01302140625000000000e+05, /* 0x412259ac48000000 */ 95 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ 96 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ 97 1.20774762500000000000e+07, /* 0x4167093488000000 */ 98 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ 99 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ 100 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ 101 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ 102 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ 103 4.87240166400000000000e+09, /* 0x41f226af30000000 */ 104 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ 105 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ 106 9.78648043520000000000e+10, /* 0x4236c93268000000 */ 107 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ 108 7.23128516608000000000e+11, /* 0x42650bba30000000 */ 109 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ 110 5.34323724288000000000e+12, /* 0x4293704708000000 */ 111 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ 112 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ 113 1.07321789251584000000e+14, /* 0x42d866f348000000 */ 114 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ 115 7.93006722514944000000e+14, /* 0x430689e220000000 */ 116 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ 117 118 static const double sinh_tail[ 37] = { 119 0.00000000000000000000e+00, /* 0x0000000000000000 */ 120 1.60467555584448807892e-08, /* 0x3e513ae6096a0092 */ 121 2.76742892754807136947e-08, /* 0x3e5db70cfb79a640 */ 122 2.09697499555224576530e-07, /* 0x3e8c2526b66dc067 */ 123 2.04940252448908240062e-07, /* 0x3e8b81b18647f380 */ 124 1.65444891522700935932e-06, /* 0x3ebbc1cdd1e1eb08 */ 125 3.53116789999998198721e-06, /* 0x3ecd9f201534fb09 */ 126 6.94023870987375490695e-06, /* 0x3edd1c064a4e9954 */ 127 4.98876893611587449271e-06, /* 0x3ed4eca65d06ea74 */ 128 3.19656024605152215752e-05, /* 0x3f00c259bcc0ecc5 */ 129 2.08687768377236501204e-04, /* 0x3f2b5a6647cf9016 */ 130 4.84668088325403796299e-05, /* 0x3f09691adefb0870 */ 131 1.17517985422733832468e-03, /* 0x3f53410fc29cde38 */ 132 6.90830086959560562415e-04, /* 0x3f46a31a50b6fb3c */ 133 1.45697262451506548420e-03, /* 0x3f57defc71805c40 */ 134 2.99859023684906737806e-02, /* 0x3f9eb49fd80e0bab */ 135 1.02538800507941396667e-02, /* 0x3f84fffc7bcd5920 */ 136 1.26787628407699110022e-01, /* 0x3fc03a93b6c63435 */ 137 6.86652479544033744752e-02, /* 0x3fb1940bb255fd1c */ 138 4.81593627621056619148e-01, /* 0x3fded26e14260b50 */ 139 1.70489513795397629181e+00, /* 0x3ffb47401fc9f2a2 */ 140 1.12416073482258713767e+01, /* 0x40267bb3f55634f1 */ 141 7.06579578070110514432e+00, /* 0x401c435ff8194ddc */ 142 5.91244512999659974639e+01, /* 0x404d8fee052ba63a */ 143 1.68921736147050694399e+02, /* 0x40651d7edccde3f6 */ 144 2.60692936262073658327e+02, /* 0x40704b1644557d1a */ 145 3.62419382134885609048e+02, /* 0x4076a6b5ca0a9dc4 */ 146 4.07689930834187271103e+03, /* 0x40afd9cc72249aba */ 147 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ 148 2.53720210371943067003e+04, /* 0x40d8c70158ac6363 */ 149 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ 150 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ 151 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ 152 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ 153 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ 154 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ 155 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ 156 157 static const double cosh_lead[ 37] = { 158 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ 159 1.54308062791824340820e+00, /* 0x3ff8b07550000000 */ 160 3.76219564676284790039e+00, /* 0x400e18fa08000000 */ 161 1.00676617622375488281e+01, /* 0x402422a490000000 */ 162 2.73082327842712402344e+01, /* 0x403b4ee858000000 */ 163 7.42099475860595703125e+01, /* 0x40528d6fc8000000 */ 164 2.01715633392333984375e+02, /* 0x406936e678000000 */ 165 5.48317031860351562500e+02, /* 0x4081228948000000 */ 166 1.49047915649414062500e+03, /* 0x409749eaa8000000 */ 167 4.05154199218750000000e+03, /* 0x40afa71580000000 */ 168 1.10132329101562500000e+04, /* 0x40c5829dd0000000 */ 169 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ 170 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ 171 2.21206695312500000000e+05, /* 0x410b00b590000000 */ 172 6.01302140625000000000e+05, /* 0x412259ac48000000 */ 173 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ 174 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ 175 1.20774762500000000000e+07, /* 0x4167093488000000 */ 176 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ 177 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ 178 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ 179 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ 180 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ 181 4.87240166400000000000e+09, /* 0x41f226af30000000 */ 182 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ 183 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ 184 9.78648043520000000000e+10, /* 0x4236c93268000000 */ 185 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ 186 7.23128516608000000000e+11, /* 0x42650bba30000000 */ 187 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ 188 5.34323724288000000000e+12, /* 0x4293704708000000 */ 189 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ 190 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ 191 1.07321789251584000000e+14, /* 0x42d866f348000000 */ 192 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ 193 7.93006722514944000000e+14, /* 0x430689e220000000 */ 194 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ 195 196 static const double cosh_tail[ 37] = { 197 0.00000000000000000000e+00, /* 0x0000000000000000 */ 198 6.89700037027478056904e-09, /* 0x3e3d9f5504c2bd28 */ 199 4.43207835591715833630e-08, /* 0x3e67cb66f0a4c9fd */ 200 2.33540217013828929694e-07, /* 0x3e8f58617928e588 */ 201 5.17452463948269748331e-08, /* 0x3e6bc7d000c38d48 */ 202 9.38728274131605919153e-07, /* 0x3eaf7f9d4e329998 */ 203 2.73012191010840495544e-06, /* 0x3ec6e6e464885269 */ 204 3.29486051438996307950e-06, /* 0x3ecba3a8b946c154 */ 205 4.75803746362771416375e-06, /* 0x3ed3f4e76110d5a4 */ 206 3.33050940471947692369e-05, /* 0x3f017622515a3e2b */ 207 9.94707313972136215365e-06, /* 0x3ee4dc4b528af3d0 */ 208 6.51685096227860253398e-05, /* 0x3f11156278615e10 */ 209 1.18132406658066663359e-03, /* 0x3f535ad50ed821f5 */ 210 6.93090416366541877541e-04, /* 0x3f46b61055f2935c */ 211 1.45780415323416845386e-03, /* 0x3f57e2794a601240 */ 212 2.99862082708111758744e-02, /* 0x3f9eb4b45f6aadd3 */ 213 1.02539925859688602072e-02, /* 0x3f85000b967b3698 */ 214 1.26787669807076286421e-01, /* 0x3fc03a940fadc092 */ 215 6.86652631843830962843e-02, /* 0x3fb1940bf3bf874c */ 216 4.81593633223853068159e-01, /* 0x3fded26e1a2a2110 */ 217 1.70489514001513020602e+00, /* 0x3ffb4740205796d6 */ 218 1.12416073489841270572e+01, /* 0x40267bb3f55cb85d */ 219 7.06579578098005001152e+00, /* 0x401c435ff81e18ac */ 220 5.91244513000686140458e+01, /* 0x404d8fee052bdea4 */ 221 1.68921736147088438429e+02, /* 0x40651d7edccde926 */ 222 2.60692936262087528121e+02, /* 0x40704b1644557e0e */ 223 3.62419382134890611269e+02, /* 0x4076a6b5ca0a9e1c */ 224 4.07689930834187453002e+03, /* 0x40afd9cc72249abe */ 225 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ 226 2.53720210371943103382e+04, /* 0x40d8c70158ac6364 */ 227 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ 228 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ 229 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ 230 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ 231 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ 232 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ 233 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ 234 235 unsigned long long ux, aux, xneg; 236 double y, z, z1, z2; 237 int m; 238 239 /* Special cases */ 240 241 GET_BITS_DP64(x, ux); 242 aux = ux & ~SIGNBIT_DP64; 243 if (aux < 0x3e30000000000000) /* |x| small enough that cosh(x) = 1 */ 244 { 245 if (aux == 0) 246 /* with no inexact */ 247 return 1.0; 248 else 249 return val_with_flags(1.0, AMD_F_INEXACT); 250 } 251 else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */ 252 { 253 if (aux > PINFBITPATT_DP64) /* x is NaN */ 254 return _handle_error("cosh", OP_COSH, ux|0x0008000000000000,_DOMAIN, 255 0,EDOM, x, 0.0, 1); 256 else /* x is infinity */ 257 return infinity_with_flags(0); 258 } 259 260 xneg = (aux != ux); 261 262 y = x; 263 if (xneg) y = -x; 264 265 if (y >= max_cosh_arg) 266 { 267 return _handle_error("cosh", OP_COSH, PINFBITPATT_DP64,_OVERFLOW, 268 AMD_F_INEXACT|AMD_F_OVERFLOW,ERANGE, x, 0.0, 1); 269 270 // z = infinity_with_flags(AMD_F_OVERFLOW); 271 } 272 else if (y >= small_threshold) 273 { 274 /* In this range y is large enough so that 275 the negative exponential is negligible, 276 so cosh(y) is approximated by sign(x)*exp(y)/2. The 277 code below is an inlined version of that from 278 exp() with two changes (it operates on 279 y instead of x, and the division by 2 is 280 done by reducing m by 1). */ 281 282 splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, 283 log2_by_32_tail, &m, &z1, &z2); 284 m -= 1; 285 286 if (m >= EMIN_DP64 && m <= EMAX_DP64) 287 z = scaleDouble_1((z1+z2),m); 288 else 289 z = scaleDouble_2((z1+z2),m); 290 } 291 else 292 { 293 /* In this range we find the integer part y0 of y 294 and the increment dy = y - y0. We then compute 295 296 z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) 297 z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy) 298 299 where sinh(y0) and cosh(y0) are tabulated above. */ 300 301 int ind; 302 double dy, dy2, sdy, cdy; 303 304 ind = (int)y; 305 dy = y - ind; 306 307 dy2 = dy*dy; 308 sdy = dy*dy2*(0.166666666666666667013899e0 + 309 (0.833333333333329931873097e-2 + 310 (0.198412698413242405162014e-3 + 311 (0.275573191913636406057211e-5 + 312 (0.250521176994133472333666e-7 + 313 (0.160576793121939886190847e-9 + 314 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 315 316 cdy = dy2*(0.500000000000000005911074e0 + 317 (0.416666666666660876512776e-1 + 318 (0.138888888889814854814536e-2 + 319 (0.248015872460622433115785e-4 + 320 (0.275573350756016588011357e-6 + 321 (0.208744349831471353536305e-8 + 322 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); 323 324 /* At this point sinh(dy) is approximated by dy + sdy, and cosh(dy) is approximated by 1 + cdy. 325 Shift some significant bits from dy to cdy. */ 326 #if 0 327 double sdy1,sdy2; 328 GET_BITS_DP64(dy, ux); 329 ux &= 0xfffffffff8000000; 330 PUT_BITS_DP64(ux, sdy1); // sdy1 is upper 53-27=26 significant bits of dy. 331 sdy2 = sdy + (dy - sdy1); // sdy2 is sdy + lower bits of dy 332 333 z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy2) 334 + sinh_tail[ind]*sdy1) + cosh_tail[ind]) 335 + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy2) 336 + sinh_lead[ind]*sdy1) + cosh_lead[ind]; 337 #else 338 z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy) 339 + sinh_tail[ind]*dy) + cosh_tail[ind]) 340 + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy) 341 + sinh_lead[ind]*dy) + cosh_lead[ind]; 342 #endif 343 } 344 345 return z; 346 } 347