xref: /reactos/sdk/lib/crt/math/libm_sse2/coshf.c (revision 84344399)
1 
2 /*******************************************************************************
3 MIT License
4 -----------
5 
6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this Software and associated documentaon files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 THE SOFTWARE.
25 *******************************************************************************/
26 
27 #include "libm.h"
28 #include "libm_util.h"
29 
30 #define USE_SPLITEXP
31 #define USE_SCALEDOUBLE_1
32 #define USE_SCALEDOUBLE_2
33 #define USE_INFINITYF_WITH_FLAGS
34 #define USE_VALF_WITH_FLAGS
35 #define USE_HANDLE_ERRORF
36 #include "libm_inlines.h"
37 #undef USE_SPLITEXP
38 #undef USE_SCALEDOUBLE_1
39 #undef USE_SCALEDOUBLE_2
40 #undef USE_INFINITYF_WITH_FLAGS
41 #undef USE_VALF_WITH_FLAGS
42 #undef USE_HANDLE_ERRORF
43 
44 #ifdef _MSC_VER
45 // Disable "C4163: not available as intrinsic function" warning that older
46 // compilers may issue here.
47 #pragma warning(disable:4163)
48 #pragma function(coshf)
49 #endif
50 
51 float coshf(float fx)
52 {
53   /*
54     After dealing with special cases the computation is split into
55     regions as follows:
56 
57     abs(x) >= max_cosh_arg:
58     cosh(x) = sign(x)*Inf
59 
60     abs(x) >= small_threshold:
61     cosh(x) = sign(x)*exp(abs(x))/2 computed using the
62     splitexp and scaleDouble functions as for exp_amd().
63 
64     abs(x) < small_threshold:
65     compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
66     cosh(x) is then sign(x)*z.                             */
67 
68   static const double
69     /* The max argument of coshf, but stored as a double */
70     max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */
71     thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
72     log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
73     log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
74 
75     small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
76 //    small_threshold = 20.0;
77   /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
78 
79   /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */
80 
81   static const double sinh_lead[   37] = {
82     0.00000000000000000000e+00,  /* 0x0000000000000000 */
83     1.17520119364380137839e+00,  /* 0x3ff2cd9fc44eb982 */
84     3.62686040784701857476e+00,  /* 0x400d03cf63b6e19f */
85     1.00178749274099008204e+01,  /* 0x40240926e70949ad */
86     2.72899171971277496596e+01,  /* 0x403b4a3803703630 */
87     7.42032105777887522891e+01,  /* 0x40528d0166f07374 */
88     2.01713157370279219549e+02,  /* 0x406936d22f67c805 */
89     5.48316123273246489589e+02,  /* 0x408122876ba380c9 */
90     1.49047882578955000099e+03,  /* 0x409749ea514eca65 */
91     4.05154190208278987484e+03,  /* 0x40afa7157430966f */
92     1.10132328747033916443e+04,  /* 0x40c5829dced69991 */
93     2.99370708492480553105e+04,  /* 0x40dd3c4488cb48d6 */
94     8.13773957064298447222e+04,  /* 0x40f3de1654d043f0 */
95     2.21206696003330085659e+05,  /* 0x410b00b5916a31a5 */
96     6.01302142081972560845e+05,  /* 0x412259ac48bef7e3 */
97     1.63450868623590236530e+06,  /* 0x4138f0ccafad27f6 */
98     4.44305526025387924165e+06,  /* 0x4150f2ebd0a7ffe3 */
99     1.20774763767876271158e+07,  /* 0x416709348c0ea4ed */
100     3.28299845686652474105e+07,  /* 0x417f4f22091940bb */
101     8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
102     2.42582597704895108938e+08,  /* 0x41aceb088b68e803 */
103     6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
104     1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
105     4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
106     1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
107     3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
108     9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
109     2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
110     7.23128532145737548828e+11,  /* 0x42650bba3796379a */
111     1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
112     5.34323729076223046875e+12,  /* 0x429370470aec28ec */
113     1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
114     3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
115     1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
116     2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
117     7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
118     2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
119 
120   static const double cosh_lead[   37] = {
121     1.00000000000000000000e+00,  /* 0x3ff0000000000000 */
122     1.54308063481524371241e+00,  /* 0x3ff8b07551d9f550 */
123     3.76219569108363138810e+00,  /* 0x400e18fa0df2d9bc */
124     1.00676619957777653269e+01,  /* 0x402422a497d6185e */
125     2.73082328360164865444e+01,  /* 0x403b4ee858de3e80 */
126     7.42099485247878334349e+01,  /* 0x40528d6fcbeff3a9 */
127     2.01715636122455890700e+02,  /* 0x406936e67db9b919 */
128     5.48317035155212010977e+02,  /* 0x4081228949ba3a8b */
129     1.49047916125217807348e+03,  /* 0x409749eaa93f4e76 */
130     4.05154202549259389343e+03,  /* 0x40afa715845d8894 */
131     1.10132329201033226127e+04,  /* 0x40c5829dd053712d */
132     2.99370708659497577173e+04,  /* 0x40dd3c4489115627 */
133     8.13773957125740562333e+04,  /* 0x40f3de1654d6b543 */
134     2.21206696005590405548e+05,  /* 0x410b00b5916b6105 */
135     6.01302142082804115489e+05,  /* 0x412259ac48bf13ca */
136     1.63450868623620807193e+06,  /* 0x4138f0ccafad2d17 */
137     4.44305526025399193168e+06,  /* 0x4150f2ebd0a8005c */
138     1.20774763767876680940e+07,  /* 0x416709348c0ea503 */
139     3.28299845686652623117e+07,  /* 0x417f4f22091940bf */
140     8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
141     2.42582597704895138741e+08,  /* 0x41aceb088b68e804 */
142     6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
143     1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
144     4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
145     1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
146     3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
147     9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
148     2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
149     7.23128532145737548828e+11,  /* 0x42650bba3796379a */
150     1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
151     5.34323729076223046875e+12,  /* 0x429370470aec28ec */
152     1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
153     3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
154     1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
155     2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
156     7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
157     2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
158 
159   unsigned long long ux, aux, xneg;
160   unsigned int uhx;
161   double x = fx, y, z, z1, z2;
162   int m;
163 
164   /* Special cases */
165 
166   GET_BITS_DP64(x, ux);
167   aux = ux & ~SIGNBIT_DP64;
168   if (aux < 0x3f10000000000000) /* |x| small enough that cosh(x) = 1 */
169     {
170       if (aux == 0) return (float)1.0; /* with no inexact */
171       if (LAMBDA_DP64 + x  > 1.0) return valf_with_flags((float)1.0, AMD_F_INEXACT); /* with inexact */
172     }
173   else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */
174     {
175       if (aux > PINFBITPATT_DP64) /* x is NaN */
176       {
177         GET_BITS_SP32(fx, uhx);
178         return _handle_errorf("coshf",OP_COSH,uhx|0x00400000,_DOMAIN, 0,
179                         EDOM, fx, 0.0, 1);
180       }
181       else     /* x is infinity */
182         return infinityf_with_flags(0);
183     }
184   xneg = (aux != ux);
185 
186   y = x;
187   if (xneg) y = -x;
188 
189   if (y >= max_cosh_arg)
190     /* Return +infinity with overflow flag. */
191          return _handle_errorf("coshf",OP_COSH,PINFBITPATT_SP32,_OVERFLOW,
192                         AMD_F_INEXACT|AMD_F_OVERFLOW,ERANGE, fx, 0.0, 1);
193 //    z = infinity_with_flags(AMD_F_OVERFLOW);
194   else if (y >= small_threshold)
195     {
196       /* In this range y is large enough so that
197          the negative exponential is negligible,
198          so cosh(y) is approximated by sign(x)*exp(y)/2. The
199          code below is an inlined version of that from
200          exp() with two changes (it operates on
201          y instead of x, and the division by 2 is
202          done by reducing m by 1). */
203 
204       splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
205                log2_by_32_tail, &m, &z1, &z2);
206       m -= 1;
207 
208       /* scaleDouble_1 is always safe because the argument x was
209          float, rather than double */
210       z = scaleDouble_1((z1+z2),m);
211     }
212   else
213     {
214       /* In this range we find the integer part y0 of y
215          and the increment dy = y - y0. We then compute
216 
217          z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
218          z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
219 
220          where sinh(y0) and cosh(y0) are tabulated above. */
221 
222       int ind;
223       double dy, dy2, sdy, cdy;
224 
225       ind = (int)y;
226       dy = y - ind;
227 
228       dy2 = dy*dy;
229 
230       sdy = dy + dy*dy2*(0.166666666666666667013899e0 +
231 			 (0.833333333333329931873097e-2 +
232 			  (0.198412698413242405162014e-3 +
233 			   (0.275573191913636406057211e-5 +
234 			    (0.250521176994133472333666e-7 +
235 			     (0.160576793121939886190847e-9 +
236 			      0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
237 
238       cdy = 1 + dy2*(0.500000000000000005911074e0 +
239 		     (0.416666666666660876512776e-1 +
240 		      (0.138888888889814854814536e-2 +
241 		       (0.248015872460622433115785e-4 +
242 			(0.275573350756016588011357e-6 +
243 			 (0.208744349831471353536305e-8 +
244 			  0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
245 
246       z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy;
247     }
248 
249 //  if (xneg) z = - z;
250   return (float)z;
251 }
252