xref: /reactos/sdk/lib/crt/math/libm_sse2/cosh.c (revision d326ca1f)
1 
2 /*******************************************************************************
3 MIT License
4 -----------
5 
6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this Software and associated documentaon files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 THE SOFTWARE.
25 *******************************************************************************/
26 
27 #include "libm.h"
28 #include "libm_util.h"
29 
30 #define USE_SPLITEXP
31 #define USE_SCALEDOUBLE_1
32 #define USE_SCALEDOUBLE_2
33 #define USE_INFINITY_WITH_FLAGS
34 #define USE_VAL_WITH_FLAGS
35 #define USE_HANDLE_ERROR
36 #include "libm_inlines.h"
37 #undef USE_SPLITEXP
38 #undef USE_SCALEDOUBLE_1
39 #undef USE_SCALEDOUBLE_2
40 #undef USE_INFINITY_WITH_FLAGS
41 #undef USE_VAL_WITH_FLAGS
42 #undef USE_HANDLE_ERROR
43 
44 #ifdef _MSC_VER
45 #pragma function(cosh)
46 #endif
47 
48 double cosh(double x)
49 {
50   /*
51     Derived from sinh subroutine
52 
53     After dealing with special cases the computation is split into
54     regions as follows:
55 
56     abs(x) >= max_cosh_arg:
57     cosh(x) = sign(x)*Inf
58 
59     abs(x) >= small_threshold:
60     cosh(x) = sign(x)*exp(abs(x))/2 computed using the
61     splitexp and scaleDouble functions as for exp_amd().
62 
63     abs(x) < small_threshold:
64     compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
65     cosh(x) is then sign(x)*z.                             */
66 
67   static const double
68     max_cosh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */
69     thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
70     log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
71     log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
72 //    small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
73     small_threshold = 20.0;
74   /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
75 
76   /* Lead and tail tabulated values of sinh(i) and cosh(i)
77      for i = 0,...,36. The lead part has 26 leading bits. */
78 
79   static const double sinh_lead[   37] = {
80     0.00000000000000000000e+00,  /* 0x0000000000000000 */
81     1.17520117759704589844e+00,  /* 0x3ff2cd9fc0000000 */
82     3.62686038017272949219e+00,  /* 0x400d03cf60000000 */
83     1.00178747177124023438e+01,  /* 0x40240926e0000000 */
84     2.72899169921875000000e+01,  /* 0x403b4a3800000000 */
85     7.42032089233398437500e+01,  /* 0x40528d0160000000 */
86     2.01713153839111328125e+02,  /* 0x406936d228000000 */
87     5.48316116333007812500e+02,  /* 0x4081228768000000 */
88     1.49047882080078125000e+03,  /* 0x409749ea50000000 */
89     4.05154187011718750000e+03,  /* 0x40afa71570000000 */
90     1.10132326660156250000e+04,  /* 0x40c5829dc8000000 */
91     2.99370708007812500000e+04,  /* 0x40dd3c4488000000 */
92     8.13773945312500000000e+04,  /* 0x40f3de1650000000 */
93     2.21206695312500000000e+05,  /* 0x410b00b590000000 */
94     6.01302140625000000000e+05,  /* 0x412259ac48000000 */
95     1.63450865625000000000e+06,  /* 0x4138f0cca8000000 */
96     4.44305525000000000000e+06,  /* 0x4150f2ebd0000000 */
97     1.20774762500000000000e+07,  /* 0x4167093488000000 */
98     3.28299845000000000000e+07,  /* 0x417f4f2208000000 */
99     8.92411500000000000000e+07,  /* 0x419546d8f8000000 */
100     2.42582596000000000000e+08,  /* 0x41aceb0888000000 */
101     6.59407856000000000000e+08,  /* 0x41c3a6e1f8000000 */
102     1.79245641600000000000e+09,  /* 0x41dab5adb8000000 */
103     4.87240166400000000000e+09,  /* 0x41f226af30000000 */
104     1.32445608960000000000e+10,  /* 0x4208ab7fb0000000 */
105     3.60024494080000000000e+10,  /* 0x4220c3d390000000 */
106     9.78648043520000000000e+10,  /* 0x4236c93268000000 */
107     2.66024116224000000000e+11,  /* 0x424ef822f0000000 */
108     7.23128516608000000000e+11,  /* 0x42650bba30000000 */
109     1.96566712320000000000e+12,  /* 0x427c9aae40000000 */
110     5.34323724288000000000e+12,  /* 0x4293704708000000 */
111     1.45244246507520000000e+13,  /* 0x42aa6b7658000000 */
112     3.94814795284480000000e+13,  /* 0x42c1f43fc8000000 */
113     1.07321789251584000000e+14,  /* 0x42d866f348000000 */
114     2.91730863685632000000e+14,  /* 0x42f0953e28000000 */
115     7.93006722514944000000e+14,  /* 0x430689e220000000 */
116     2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
117 
118   static const double sinh_tail[   37] = {
119     0.00000000000000000000e+00,  /* 0x0000000000000000 */
120     1.60467555584448807892e-08,  /* 0x3e513ae6096a0092 */
121     2.76742892754807136947e-08,  /* 0x3e5db70cfb79a640 */
122     2.09697499555224576530e-07,  /* 0x3e8c2526b66dc067 */
123     2.04940252448908240062e-07,  /* 0x3e8b81b18647f380 */
124     1.65444891522700935932e-06,  /* 0x3ebbc1cdd1e1eb08 */
125     3.53116789999998198721e-06,  /* 0x3ecd9f201534fb09 */
126     6.94023870987375490695e-06,  /* 0x3edd1c064a4e9954 */
127     4.98876893611587449271e-06,  /* 0x3ed4eca65d06ea74 */
128     3.19656024605152215752e-05,  /* 0x3f00c259bcc0ecc5 */
129     2.08687768377236501204e-04,  /* 0x3f2b5a6647cf9016 */
130     4.84668088325403796299e-05,  /* 0x3f09691adefb0870 */
131     1.17517985422733832468e-03,  /* 0x3f53410fc29cde38 */
132     6.90830086959560562415e-04,  /* 0x3f46a31a50b6fb3c */
133     1.45697262451506548420e-03,  /* 0x3f57defc71805c40 */
134     2.99859023684906737806e-02,  /* 0x3f9eb49fd80e0bab */
135     1.02538800507941396667e-02,  /* 0x3f84fffc7bcd5920 */
136     1.26787628407699110022e-01,  /* 0x3fc03a93b6c63435 */
137     6.86652479544033744752e-02,  /* 0x3fb1940bb255fd1c */
138     4.81593627621056619148e-01,  /* 0x3fded26e14260b50 */
139     1.70489513795397629181e+00,  /* 0x3ffb47401fc9f2a2 */
140     1.12416073482258713767e+01,  /* 0x40267bb3f55634f1 */
141     7.06579578070110514432e+00,  /* 0x401c435ff8194ddc */
142     5.91244512999659974639e+01,  /* 0x404d8fee052ba63a */
143     1.68921736147050694399e+02,  /* 0x40651d7edccde3f6 */
144     2.60692936262073658327e+02,  /* 0x40704b1644557d1a */
145     3.62419382134885609048e+02,  /* 0x4076a6b5ca0a9dc4 */
146     4.07689930834187271103e+03,  /* 0x40afd9cc72249aba */
147     1.55377375868385224749e+04,  /* 0x40ce58de693edab5 */
148     2.53720210371943067003e+04,  /* 0x40d8c70158ac6363 */
149     4.78822310734952334315e+04,  /* 0x40e7614764f43e20 */
150     1.81871712615542812273e+05,  /* 0x4106337db36fc718 */
151     5.62892347580489004031e+05,  /* 0x41212d98b1f611e2 */
152     6.41374032312148716301e+05,  /* 0x412392bc108b37cc */
153     7.57809544070145115256e+06,  /* 0x415ce87bdc3473dc */
154     3.64177136406482197344e+06,  /* 0x414bc8d5ae99ad14 */
155     7.63580561355670914054e+06}; /* 0x415d20d76744835c */
156 
157   static const double cosh_lead[   37] = {
158     1.00000000000000000000e+00,  /* 0x3ff0000000000000 */
159     1.54308062791824340820e+00,  /* 0x3ff8b07550000000 */
160     3.76219564676284790039e+00,  /* 0x400e18fa08000000 */
161     1.00676617622375488281e+01,  /* 0x402422a490000000 */
162     2.73082327842712402344e+01,  /* 0x403b4ee858000000 */
163     7.42099475860595703125e+01,  /* 0x40528d6fc8000000 */
164     2.01715633392333984375e+02,  /* 0x406936e678000000 */
165     5.48317031860351562500e+02,  /* 0x4081228948000000 */
166     1.49047915649414062500e+03,  /* 0x409749eaa8000000 */
167     4.05154199218750000000e+03,  /* 0x40afa71580000000 */
168     1.10132329101562500000e+04,  /* 0x40c5829dd0000000 */
169     2.99370708007812500000e+04,  /* 0x40dd3c4488000000 */
170     8.13773945312500000000e+04,  /* 0x40f3de1650000000 */
171     2.21206695312500000000e+05,  /* 0x410b00b590000000 */
172     6.01302140625000000000e+05,  /* 0x412259ac48000000 */
173     1.63450865625000000000e+06,  /* 0x4138f0cca8000000 */
174     4.44305525000000000000e+06,  /* 0x4150f2ebd0000000 */
175     1.20774762500000000000e+07,  /* 0x4167093488000000 */
176     3.28299845000000000000e+07,  /* 0x417f4f2208000000 */
177     8.92411500000000000000e+07,  /* 0x419546d8f8000000 */
178     2.42582596000000000000e+08,  /* 0x41aceb0888000000 */
179     6.59407856000000000000e+08,  /* 0x41c3a6e1f8000000 */
180     1.79245641600000000000e+09,  /* 0x41dab5adb8000000 */
181     4.87240166400000000000e+09,  /* 0x41f226af30000000 */
182     1.32445608960000000000e+10,  /* 0x4208ab7fb0000000 */
183     3.60024494080000000000e+10,  /* 0x4220c3d390000000 */
184     9.78648043520000000000e+10,  /* 0x4236c93268000000 */
185     2.66024116224000000000e+11,  /* 0x424ef822f0000000 */
186     7.23128516608000000000e+11,  /* 0x42650bba30000000 */
187     1.96566712320000000000e+12,  /* 0x427c9aae40000000 */
188     5.34323724288000000000e+12,  /* 0x4293704708000000 */
189     1.45244246507520000000e+13,  /* 0x42aa6b7658000000 */
190     3.94814795284480000000e+13,  /* 0x42c1f43fc8000000 */
191     1.07321789251584000000e+14,  /* 0x42d866f348000000 */
192     2.91730863685632000000e+14,  /* 0x42f0953e28000000 */
193     7.93006722514944000000e+14,  /* 0x430689e220000000 */
194     2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
195 
196   static const double cosh_tail[   37] = {
197     0.00000000000000000000e+00,  /* 0x0000000000000000 */
198     6.89700037027478056904e-09,  /* 0x3e3d9f5504c2bd28 */
199     4.43207835591715833630e-08,  /* 0x3e67cb66f0a4c9fd */
200     2.33540217013828929694e-07,  /* 0x3e8f58617928e588 */
201     5.17452463948269748331e-08,  /* 0x3e6bc7d000c38d48 */
202     9.38728274131605919153e-07,  /* 0x3eaf7f9d4e329998 */
203     2.73012191010840495544e-06,  /* 0x3ec6e6e464885269 */
204     3.29486051438996307950e-06,  /* 0x3ecba3a8b946c154 */
205     4.75803746362771416375e-06,  /* 0x3ed3f4e76110d5a4 */
206     3.33050940471947692369e-05,  /* 0x3f017622515a3e2b */
207     9.94707313972136215365e-06,  /* 0x3ee4dc4b528af3d0 */
208     6.51685096227860253398e-05,  /* 0x3f11156278615e10 */
209     1.18132406658066663359e-03,  /* 0x3f535ad50ed821f5 */
210     6.93090416366541877541e-04,  /* 0x3f46b61055f2935c */
211     1.45780415323416845386e-03,  /* 0x3f57e2794a601240 */
212     2.99862082708111758744e-02,  /* 0x3f9eb4b45f6aadd3 */
213     1.02539925859688602072e-02,  /* 0x3f85000b967b3698 */
214     1.26787669807076286421e-01,  /* 0x3fc03a940fadc092 */
215     6.86652631843830962843e-02,  /* 0x3fb1940bf3bf874c */
216     4.81593633223853068159e-01,  /* 0x3fded26e1a2a2110 */
217     1.70489514001513020602e+00,  /* 0x3ffb4740205796d6 */
218     1.12416073489841270572e+01,  /* 0x40267bb3f55cb85d */
219     7.06579578098005001152e+00,  /* 0x401c435ff81e18ac */
220     5.91244513000686140458e+01,  /* 0x404d8fee052bdea4 */
221     1.68921736147088438429e+02,  /* 0x40651d7edccde926 */
222     2.60692936262087528121e+02,  /* 0x40704b1644557e0e */
223     3.62419382134890611269e+02,  /* 0x4076a6b5ca0a9e1c */
224     4.07689930834187453002e+03,  /* 0x40afd9cc72249abe */
225     1.55377375868385224749e+04,  /* 0x40ce58de693edab5 */
226     2.53720210371943103382e+04,  /* 0x40d8c70158ac6364 */
227     4.78822310734952334315e+04,  /* 0x40e7614764f43e20 */
228     1.81871712615542812273e+05,  /* 0x4106337db36fc718 */
229     5.62892347580489004031e+05,  /* 0x41212d98b1f611e2 */
230     6.41374032312148716301e+05,  /* 0x412392bc108b37cc */
231     7.57809544070145115256e+06,  /* 0x415ce87bdc3473dc */
232     3.64177136406482197344e+06,  /* 0x414bc8d5ae99ad14 */
233     7.63580561355670914054e+06}; /* 0x415d20d76744835c */
234 
235   unsigned long long ux, aux, xneg;
236   double y, z, z1, z2;
237   int m;
238 
239   /* Special cases */
240 
241   GET_BITS_DP64(x, ux);
242   aux = ux & ~SIGNBIT_DP64;
243   if (aux < 0x3e30000000000000) /* |x| small enough that cosh(x) = 1 */
244   {
245       if (aux == 0)
246         /* with no inexact */
247         return 1.0;
248       else
249         return val_with_flags(1.0, AMD_F_INEXACT);
250   }
251   else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */
252   {
253       if (aux > PINFBITPATT_DP64) /* x is NaN */
254         return _handle_error("cosh", OP_COSH, ux|0x0008000000000000,_DOMAIN,
255                             0,EDOM, x, 0.0, 1);
256       else     /* x is infinity */
257         return infinity_with_flags(0);
258   }
259 
260   xneg = (aux != ux);
261 
262   y = x;
263   if (xneg) y = -x;
264 
265   if (y >= max_cosh_arg)
266       {
267              return _handle_error("cosh", OP_COSH, PINFBITPATT_DP64,_OVERFLOW,
268                         AMD_F_INEXACT|AMD_F_OVERFLOW,ERANGE, x, 0.0, 1);
269 
270 //    z = infinity_with_flags(AMD_F_OVERFLOW);
271       }
272   else if (y >= small_threshold)
273     {
274       /* In this range y is large enough so that
275          the negative exponential is negligible,
276          so cosh(y) is approximated by sign(x)*exp(y)/2. The
277          code below is an inlined version of that from
278          exp() with two changes (it operates on
279          y instead of x, and the division by 2 is
280          done by reducing m by 1). */
281 
282       splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
283                log2_by_32_tail, &m, &z1, &z2);
284       m -= 1;
285 
286       if (m >= EMIN_DP64 && m <= EMAX_DP64)
287         z = scaleDouble_1((z1+z2),m);
288       else
289         z = scaleDouble_2((z1+z2),m);
290     }
291   else
292     {
293       /* In this range we find the integer part y0 of y
294          and the increment dy = y - y0. We then compute
295 
296          z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
297          z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
298 
299          where sinh(y0) and cosh(y0) are tabulated above. */
300 
301       int ind;
302       double dy, dy2, sdy, cdy;
303 
304       ind = (int)y;
305       dy = y - ind;
306 
307       dy2 = dy*dy;
308       sdy = dy*dy2*(0.166666666666666667013899e0 +
309                     (0.833333333333329931873097e-2 +
310                      (0.198412698413242405162014e-3 +
311                       (0.275573191913636406057211e-5 +
312                        (0.250521176994133472333666e-7 +
313                         (0.160576793121939886190847e-9 +
314                          0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
315 
316       cdy = dy2*(0.500000000000000005911074e0 +
317                  (0.416666666666660876512776e-1 +
318                   (0.138888888889814854814536e-2 +
319                    (0.248015872460622433115785e-4 +
320                     (0.275573350756016588011357e-6 +
321                      (0.208744349831471353536305e-8 +
322                       0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
323 
324       /* At this point sinh(dy) is approximated by dy + sdy, and cosh(dy) is approximated by 1 + cdy.
325 	 Shift some significant bits from dy to cdy. */
326 #if 0
327     double  sdy1,sdy2;
328       GET_BITS_DP64(dy, ux);
329       ux &= 0xfffffffff8000000;
330       PUT_BITS_DP64(ux, sdy1);    // sdy1 is  upper 53-27=26 significant bits of dy.
331       sdy2 = sdy + (dy - sdy1);   // sdy2 is  sdy + lower bits of dy
332 
333       z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy2)
334 	       + sinh_tail[ind]*sdy1) + cosh_tail[ind])
335 	     + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy2)
336 	   + sinh_lead[ind]*sdy1) + cosh_lead[ind];
337 #else
338       z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy)
339 	       + sinh_tail[ind]*dy) + cosh_tail[ind])
340 	     + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy)
341 	   + sinh_lead[ind]*dy) + cosh_lead[ind];
342 #endif
343     }
344 
345   return z;
346 }
347