1
2 /*******************************************************************************
3 MIT License
4 -----------
5
6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
7
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this Software and associated documentaon files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 THE SOFTWARE.
25 *******************************************************************************/
26
27 #include "libm.h"
28 #include "libm_util.h"
29
30 #define USE_SPLITEXP
31 #define USE_SCALEDOUBLE_1
32 #define USE_SCALEDOUBLE_2
33 #define USE_INFINITYF_WITH_FLAGS
34 #define USE_VALF_WITH_FLAGS
35 #define USE_HANDLE_ERRORF
36 #include "libm_inlines.h"
37 #undef USE_SPLITEXP
38 #undef USE_SCALEDOUBLE_1
39 #undef USE_SCALEDOUBLE_2
40 #undef USE_INFINITYF_WITH_FLAGS
41 #undef USE_VALF_WITH_FLAGS
42 #undef USE_HANDLE_ERRORF
43
44 #ifdef _MSC_VER
45 // Disable "C4163: not available as intrinsic function" warning that older
46 // compilers may issue here.
47 #pragma warning(disable:4163)
48 #pragma function(coshf)
49 #endif
50
coshf(float fx)51 float coshf(float fx)
52 {
53 /*
54 After dealing with special cases the computation is split into
55 regions as follows:
56
57 abs(x) >= max_cosh_arg:
58 cosh(x) = sign(x)*Inf
59
60 abs(x) >= small_threshold:
61 cosh(x) = sign(x)*exp(abs(x))/2 computed using the
62 splitexp and scaleDouble functions as for exp_amd().
63
64 abs(x) < small_threshold:
65 compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
66 cosh(x) is then sign(x)*z. */
67
68 static const double
69 /* The max argument of coshf, but stored as a double */
70 max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */
71 thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
72 log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
73 log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
74
75 small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
76 // small_threshold = 20.0;
77 /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
78
79 /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */
80
81 static const double sinh_lead[ 37] = {
82 0.00000000000000000000e+00, /* 0x0000000000000000 */
83 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */
84 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */
85 1.00178749274099008204e+01, /* 0x40240926e70949ad */
86 2.72899171971277496596e+01, /* 0x403b4a3803703630 */
87 7.42032105777887522891e+01, /* 0x40528d0166f07374 */
88 2.01713157370279219549e+02, /* 0x406936d22f67c805 */
89 5.48316123273246489589e+02, /* 0x408122876ba380c9 */
90 1.49047882578955000099e+03, /* 0x409749ea514eca65 */
91 4.05154190208278987484e+03, /* 0x40afa7157430966f */
92 1.10132328747033916443e+04, /* 0x40c5829dced69991 */
93 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */
94 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */
95 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */
96 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */
97 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */
98 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */
99 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */
100 3.28299845686652474105e+07, /* 0x417f4f22091940bb */
101 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */
102 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */
103 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */
104 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */
105 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */
106 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */
107 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */
108 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */
109 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */
110 7.23128532145737548828e+11, /* 0x42650bba3796379a */
111 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */
112 5.34323729076223046875e+12, /* 0x429370470aec28ec */
113 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */
114 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */
115 1.07321789892958031250e+14, /* 0x42d866f34a725782 */
116 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */
117 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */
118 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
119
120 static const double cosh_lead[ 37] = {
121 1.00000000000000000000e+00, /* 0x3ff0000000000000 */
122 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */
123 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */
124 1.00676619957777653269e+01, /* 0x402422a497d6185e */
125 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */
126 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */
127 2.01715636122455890700e+02, /* 0x406936e67db9b919 */
128 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */
129 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */
130 4.05154202549259389343e+03, /* 0x40afa715845d8894 */
131 1.10132329201033226127e+04, /* 0x40c5829dd053712d */
132 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */
133 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */
134 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */
135 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */
136 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */
137 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */
138 1.20774763767876680940e+07, /* 0x416709348c0ea503 */
139 3.28299845686652623117e+07, /* 0x417f4f22091940bf */
140 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */
141 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */
142 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */
143 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */
144 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */
145 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */
146 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */
147 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */
148 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */
149 7.23128532145737548828e+11, /* 0x42650bba3796379a */
150 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */
151 5.34323729076223046875e+12, /* 0x429370470aec28ec */
152 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */
153 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */
154 1.07321789892958031250e+14, /* 0x42d866f34a725782 */
155 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */
156 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */
157 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
158
159 unsigned long long ux, aux, xneg;
160 unsigned int uhx;
161 double x = fx, y, z, z1, z2;
162 int m;
163
164 /* Special cases */
165
166 GET_BITS_DP64(x, ux);
167 aux = ux & ~SIGNBIT_DP64;
168 if (aux < 0x3f10000000000000) /* |x| small enough that cosh(x) = 1 */
169 {
170 if (aux == 0) return (float)1.0; /* with no inexact */
171 if (LAMBDA_DP64 + x > 1.0) return valf_with_flags((float)1.0, AMD_F_INEXACT); /* with inexact */
172 }
173 else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */
174 {
175 if (aux > PINFBITPATT_DP64) /* x is NaN */
176 {
177 GET_BITS_SP32(fx, uhx);
178 return _handle_errorf("coshf",OP_COSH,uhx|0x00400000,_DOMAIN, 0,
179 EDOM, fx, 0.0, 1);
180 }
181 else /* x is infinity */
182 return infinityf_with_flags(0);
183 }
184 xneg = (aux != ux);
185
186 y = x;
187 if (xneg) y = -x;
188
189 if (y >= max_cosh_arg)
190 /* Return +infinity with overflow flag. */
191 return _handle_errorf("coshf",OP_COSH,PINFBITPATT_SP32,_OVERFLOW,
192 AMD_F_INEXACT|AMD_F_OVERFLOW,ERANGE, fx, 0.0, 1);
193 // z = infinity_with_flags(AMD_F_OVERFLOW);
194 else if (y >= small_threshold)
195 {
196 /* In this range y is large enough so that
197 the negative exponential is negligible,
198 so cosh(y) is approximated by sign(x)*exp(y)/2. The
199 code below is an inlined version of that from
200 exp() with two changes (it operates on
201 y instead of x, and the division by 2 is
202 done by reducing m by 1). */
203
204 splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
205 log2_by_32_tail, &m, &z1, &z2);
206 m -= 1;
207
208 /* scaleDouble_1 is always safe because the argument x was
209 float, rather than double */
210 z = scaleDouble_1((z1+z2),m);
211 }
212 else
213 {
214 /* In this range we find the integer part y0 of y
215 and the increment dy = y - y0. We then compute
216
217 z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
218 z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
219
220 where sinh(y0) and cosh(y0) are tabulated above. */
221
222 int ind;
223 double dy, dy2, sdy, cdy;
224
225 ind = (int)y;
226 dy = y - ind;
227
228 dy2 = dy*dy;
229
230 sdy = dy + dy*dy2*(0.166666666666666667013899e0 +
231 (0.833333333333329931873097e-2 +
232 (0.198412698413242405162014e-3 +
233 (0.275573191913636406057211e-5 +
234 (0.250521176994133472333666e-7 +
235 (0.160576793121939886190847e-9 +
236 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
237
238 cdy = 1 + dy2*(0.500000000000000005911074e0 +
239 (0.416666666666660876512776e-1 +
240 (0.138888888889814854814536e-2 +
241 (0.248015872460622433115785e-4 +
242 (0.275573350756016588011357e-6 +
243 (0.208744349831471353536305e-8 +
244 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
245
246 z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy;
247 }
248
249 // if (xneg) z = - z;
250 return (float)z;
251 }
252