1*81418a27Smrg /*
2*81418a27Smrg * ====================================================
3*81418a27Smrg * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4*81418a27Smrg *
5*81418a27Smrg * Developed at SunPro, a Sun Microsystems, Inc. business.
6*81418a27Smrg * Permission to use, copy, modify, and distribute this
7*81418a27Smrg * software is freely granted, provided that this notice
8*81418a27Smrg * is preserved.
9*81418a27Smrg * ====================================================
10*81418a27Smrg */
11*81418a27Smrg
12*81418a27Smrg /*
13*81418a27Smrg Long double expansions are
14*81418a27Smrg Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
15*81418a27Smrg and are incorporated herein by permission of the author. The author
16*81418a27Smrg reserves the right to distribute this material elsewhere under different
17*81418a27Smrg copying permissions. These modifications are distributed here under
18*81418a27Smrg the following terms:
19*81418a27Smrg
20*81418a27Smrg This library is free software; you can redistribute it and/or
21*81418a27Smrg modify it under the terms of the GNU Lesser General Public
22*81418a27Smrg License as published by the Free Software Foundation; either
23*81418a27Smrg version 2.1 of the License, or (at your option) any later version.
24*81418a27Smrg
25*81418a27Smrg This library is distributed in the hope that it will be useful,
26*81418a27Smrg but WITHOUT ANY WARRANTY; without even the implied warranty of
27*81418a27Smrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28*81418a27Smrg Lesser General Public License for more details.
29*81418a27Smrg
30*81418a27Smrg You should have received a copy of the GNU Lesser General Public
31*81418a27Smrg License along with this library; if not, see
32*81418a27Smrg <http://www.gnu.org/licenses/>. */
33*81418a27Smrg
34*81418a27Smrg /* acosq(x)
35*81418a27Smrg * Method :
36*81418a27Smrg * acos(x) = pi/2 - asin(x)
37*81418a27Smrg * acos(-x) = pi/2 + asin(x)
38*81418a27Smrg * For |x| <= 0.375
39*81418a27Smrg * acos(x) = pi/2 - asin(x)
40*81418a27Smrg * Between .375 and .5 the approximation is
41*81418a27Smrg * acos(0.4375 + x) = acos(0.4375) + x P(x) / Q(x)
42*81418a27Smrg * Between .5 and .625 the approximation is
43*81418a27Smrg * acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
44*81418a27Smrg * For x > 0.625,
45*81418a27Smrg * acos(x) = 2 asin(sqrt((1-x)/2))
46*81418a27Smrg * computed with an extended precision square root in the leading term.
47*81418a27Smrg * For x < -0.625
48*81418a27Smrg * acos(x) = pi - 2 asin(sqrt((1-|x|)/2))
49*81418a27Smrg *
50*81418a27Smrg * Special cases:
51*81418a27Smrg * if x is NaN, return x itself;
52*81418a27Smrg * if |x|>1, return NaN with invalid signal.
53*81418a27Smrg *
54*81418a27Smrg * Functions needed: sqrtq.
55*81418a27Smrg */
56*81418a27Smrg
57*81418a27Smrg #include "quadmath-imp.h"
58*81418a27Smrg
59*81418a27Smrg static const __float128
60*81418a27Smrg one = 1,
61*81418a27Smrg pio2_hi = 1.5707963267948966192313216916397514420986Q,
62*81418a27Smrg pio2_lo = 4.3359050650618905123985220130216759843812E-35Q,
63*81418a27Smrg
64*81418a27Smrg /* acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
65*81418a27Smrg -0.0625 <= x <= 0.0625
66*81418a27Smrg peak relative error 3.3e-35 */
67*81418a27Smrg
68*81418a27Smrg rS0 = 5.619049346208901520945464704848780243887E0Q,
69*81418a27Smrg rS1 = -4.460504162777731472539175700169871920352E1Q,
70*81418a27Smrg rS2 = 1.317669505315409261479577040530751477488E2Q,
71*81418a27Smrg rS3 = -1.626532582423661989632442410808596009227E2Q,
72*81418a27Smrg rS4 = 3.144806644195158614904369445440583873264E1Q,
73*81418a27Smrg rS5 = 9.806674443470740708765165604769099559553E1Q,
74*81418a27Smrg rS6 = -5.708468492052010816555762842394927806920E1Q,
75*81418a27Smrg rS7 = -1.396540499232262112248553357962639431922E1Q,
76*81418a27Smrg rS8 = 1.126243289311910363001762058295832610344E1Q,
77*81418a27Smrg rS9 = 4.956179821329901954211277873774472383512E-1Q,
78*81418a27Smrg rS10 = -3.313227657082367169241333738391762525780E-1Q,
79*81418a27Smrg
80*81418a27Smrg sS0 = -4.645814742084009935700221277307007679325E0Q,
81*81418a27Smrg sS1 = 3.879074822457694323970438316317961918430E1Q,
82*81418a27Smrg sS2 = -1.221986588013474694623973554726201001066E2Q,
83*81418a27Smrg sS3 = 1.658821150347718105012079876756201905822E2Q,
84*81418a27Smrg sS4 = -4.804379630977558197953176474426239748977E1Q,
85*81418a27Smrg sS5 = -1.004296417397316948114344573811562952793E2Q,
86*81418a27Smrg sS6 = 7.530281592861320234941101403870010111138E1Q,
87*81418a27Smrg sS7 = 1.270735595411673647119592092304357226607E1Q,
88*81418a27Smrg sS8 = -1.815144839646376500705105967064792930282E1Q,
89*81418a27Smrg sS9 = -7.821597334910963922204235247786840828217E-2Q,
90*81418a27Smrg /* 1.000000000000000000000000000000000000000E0 */
91*81418a27Smrg
92*81418a27Smrg acosr5625 = 9.7338991014954640492751132535550279812151E-1Q,
93*81418a27Smrg pimacosr5625 = 2.1682027434402468335351320579240000860757E0Q,
94*81418a27Smrg
95*81418a27Smrg /* acos(0.4375 + x) = acos(0.4375) + x rS(x) / sS(x)
96*81418a27Smrg -0.0625 <= x <= 0.0625
97*81418a27Smrg peak relative error 2.1e-35 */
98*81418a27Smrg
99*81418a27Smrg P0 = 2.177690192235413635229046633751390484892E0Q,
100*81418a27Smrg P1 = -2.848698225706605746657192566166142909573E1Q,
101*81418a27Smrg P2 = 1.040076477655245590871244795403659880304E2Q,
102*81418a27Smrg P3 = -1.400087608918906358323551402881238180553E2Q,
103*81418a27Smrg P4 = 2.221047917671449176051896400503615543757E1Q,
104*81418a27Smrg P5 = 9.643714856395587663736110523917499638702E1Q,
105*81418a27Smrg P6 = -5.158406639829833829027457284942389079196E1Q,
106*81418a27Smrg P7 = -1.578651828337585944715290382181219741813E1Q,
107*81418a27Smrg P8 = 1.093632715903802870546857764647931045906E1Q,
108*81418a27Smrg P9 = 5.448925479898460003048760932274085300103E-1Q,
109*81418a27Smrg P10 = -3.315886001095605268470690485170092986337E-1Q,
110*81418a27Smrg Q0 = -1.958219113487162405143608843774587557016E0Q,
111*81418a27Smrg Q1 = 2.614577866876185080678907676023269360520E1Q,
112*81418a27Smrg Q2 = -9.990858606464150981009763389881793660938E1Q,
113*81418a27Smrg Q3 = 1.443958741356995763628660823395334281596E2Q,
114*81418a27Smrg Q4 = -3.206441012484232867657763518369723873129E1Q,
115*81418a27Smrg Q5 = -1.048560885341833443564920145642588991492E2Q,
116*81418a27Smrg Q6 = 6.745883931909770880159915641984874746358E1Q,
117*81418a27Smrg Q7 = 1.806809656342804436118449982647641392951E1Q,
118*81418a27Smrg Q8 = -1.770150690652438294290020775359580915464E1Q,
119*81418a27Smrg Q9 = -5.659156469628629327045433069052560211164E-1Q,
120*81418a27Smrg /* 1.000000000000000000000000000000000000000E0 */
121*81418a27Smrg
122*81418a27Smrg acosr4375 = 1.1179797320499710475919903296900511518755E0Q,
123*81418a27Smrg pimacosr4375 = 2.0236129215398221908706530535894517323217E0Q,
124*81418a27Smrg
125*81418a27Smrg /* asin(x) = x + x^3 pS(x^2) / qS(x^2)
126*81418a27Smrg 0 <= x <= 0.5
127*81418a27Smrg peak relative error 1.9e-35 */
128*81418a27Smrg pS0 = -8.358099012470680544198472400254596543711E2Q,
129*81418a27Smrg pS1 = 3.674973957689619490312782828051860366493E3Q,
130*81418a27Smrg pS2 = -6.730729094812979665807581609853656623219E3Q,
131*81418a27Smrg pS3 = 6.643843795209060298375552684423454077633E3Q,
132*81418a27Smrg pS4 = -3.817341990928606692235481812252049415993E3Q,
133*81418a27Smrg pS5 = 1.284635388402653715636722822195716476156E3Q,
134*81418a27Smrg pS6 = -2.410736125231549204856567737329112037867E2Q,
135*81418a27Smrg pS7 = 2.219191969382402856557594215833622156220E1Q,
136*81418a27Smrg pS8 = -7.249056260830627156600112195061001036533E-1Q,
137*81418a27Smrg pS9 = 1.055923570937755300061509030361395604448E-3Q,
138*81418a27Smrg
139*81418a27Smrg qS0 = -5.014859407482408326519083440151745519205E3Q,
140*81418a27Smrg qS1 = 2.430653047950480068881028451580393430537E4Q,
141*81418a27Smrg qS2 = -4.997904737193653607449250593976069726962E4Q,
142*81418a27Smrg qS3 = 5.675712336110456923807959930107347511086E4Q,
143*81418a27Smrg qS4 = -3.881523118339661268482937768522572588022E4Q,
144*81418a27Smrg qS5 = 1.634202194895541569749717032234510811216E4Q,
145*81418a27Smrg qS6 = -4.151452662440709301601820849901296953752E3Q,
146*81418a27Smrg qS7 = 5.956050864057192019085175976175695342168E2Q,
147*81418a27Smrg qS8 = -4.175375777334867025769346564600396877176E1Q;
148*81418a27Smrg /* 1.000000000000000000000000000000000000000E0 */
149*81418a27Smrg
150*81418a27Smrg __float128
acosq(__float128 x)151*81418a27Smrg acosq (__float128 x)
152*81418a27Smrg {
153*81418a27Smrg __float128 z, r, w, p, q, s, t, f2;
154*81418a27Smrg int32_t ix, sign;
155*81418a27Smrg ieee854_float128 u;
156*81418a27Smrg
157*81418a27Smrg u.value = x;
158*81418a27Smrg sign = u.words32.w0;
159*81418a27Smrg ix = sign & 0x7fffffff;
160*81418a27Smrg u.words32.w0 = ix; /* |x| */
161*81418a27Smrg if (ix >= 0x3fff0000) /* |x| >= 1 */
162*81418a27Smrg {
163*81418a27Smrg if (ix == 0x3fff0000
164*81418a27Smrg && (u.words32.w1 | u.words32.w2 | u.words32.w3) == 0)
165*81418a27Smrg { /* |x| == 1 */
166*81418a27Smrg if ((sign & 0x80000000) == 0)
167*81418a27Smrg return 0.0; /* acos(1) = 0 */
168*81418a27Smrg else
169*81418a27Smrg return (2.0 * pio2_hi) + (2.0 * pio2_lo); /* acos(-1)= pi */
170*81418a27Smrg }
171*81418a27Smrg return (x - x) / (x - x); /* acos(|x| > 1) is NaN */
172*81418a27Smrg }
173*81418a27Smrg else if (ix < 0x3ffe0000) /* |x| < 0.5 */
174*81418a27Smrg {
175*81418a27Smrg if (ix < 0x3f8e0000) /* |x| < 2**-113 */
176*81418a27Smrg return pio2_hi + pio2_lo;
177*81418a27Smrg if (ix < 0x3ffde000) /* |x| < .4375 */
178*81418a27Smrg {
179*81418a27Smrg /* Arcsine of x. */
180*81418a27Smrg z = x * x;
181*81418a27Smrg p = (((((((((pS9 * z
182*81418a27Smrg + pS8) * z
183*81418a27Smrg + pS7) * z
184*81418a27Smrg + pS6) * z
185*81418a27Smrg + pS5) * z
186*81418a27Smrg + pS4) * z
187*81418a27Smrg + pS3) * z
188*81418a27Smrg + pS2) * z
189*81418a27Smrg + pS1) * z
190*81418a27Smrg + pS0) * z;
191*81418a27Smrg q = (((((((( z
192*81418a27Smrg + qS8) * z
193*81418a27Smrg + qS7) * z
194*81418a27Smrg + qS6) * z
195*81418a27Smrg + qS5) * z
196*81418a27Smrg + qS4) * z
197*81418a27Smrg + qS3) * z
198*81418a27Smrg + qS2) * z
199*81418a27Smrg + qS1) * z
200*81418a27Smrg + qS0;
201*81418a27Smrg r = x + x * p / q;
202*81418a27Smrg z = pio2_hi - (r - pio2_lo);
203*81418a27Smrg return z;
204*81418a27Smrg }
205*81418a27Smrg /* .4375 <= |x| < .5 */
206*81418a27Smrg t = u.value - 0.4375Q;
207*81418a27Smrg p = ((((((((((P10 * t
208*81418a27Smrg + P9) * t
209*81418a27Smrg + P8) * t
210*81418a27Smrg + P7) * t
211*81418a27Smrg + P6) * t
212*81418a27Smrg + P5) * t
213*81418a27Smrg + P4) * t
214*81418a27Smrg + P3) * t
215*81418a27Smrg + P2) * t
216*81418a27Smrg + P1) * t
217*81418a27Smrg + P0) * t;
218*81418a27Smrg
219*81418a27Smrg q = (((((((((t
220*81418a27Smrg + Q9) * t
221*81418a27Smrg + Q8) * t
222*81418a27Smrg + Q7) * t
223*81418a27Smrg + Q6) * t
224*81418a27Smrg + Q5) * t
225*81418a27Smrg + Q4) * t
226*81418a27Smrg + Q3) * t
227*81418a27Smrg + Q2) * t
228*81418a27Smrg + Q1) * t
229*81418a27Smrg + Q0;
230*81418a27Smrg r = p / q;
231*81418a27Smrg if (sign & 0x80000000)
232*81418a27Smrg r = pimacosr4375 - r;
233*81418a27Smrg else
234*81418a27Smrg r = acosr4375 + r;
235*81418a27Smrg return r;
236*81418a27Smrg }
237*81418a27Smrg else if (ix < 0x3ffe4000) /* |x| < 0.625 */
238*81418a27Smrg {
239*81418a27Smrg t = u.value - 0.5625Q;
240*81418a27Smrg p = ((((((((((rS10 * t
241*81418a27Smrg + rS9) * t
242*81418a27Smrg + rS8) * t
243*81418a27Smrg + rS7) * t
244*81418a27Smrg + rS6) * t
245*81418a27Smrg + rS5) * t
246*81418a27Smrg + rS4) * t
247*81418a27Smrg + rS3) * t
248*81418a27Smrg + rS2) * t
249*81418a27Smrg + rS1) * t
250*81418a27Smrg + rS0) * t;
251*81418a27Smrg
252*81418a27Smrg q = (((((((((t
253*81418a27Smrg + sS9) * t
254*81418a27Smrg + sS8) * t
255*81418a27Smrg + sS7) * t
256*81418a27Smrg + sS6) * t
257*81418a27Smrg + sS5) * t
258*81418a27Smrg + sS4) * t
259*81418a27Smrg + sS3) * t
260*81418a27Smrg + sS2) * t
261*81418a27Smrg + sS1) * t
262*81418a27Smrg + sS0;
263*81418a27Smrg if (sign & 0x80000000)
264*81418a27Smrg r = pimacosr5625 - p / q;
265*81418a27Smrg else
266*81418a27Smrg r = acosr5625 + p / q;
267*81418a27Smrg return r;
268*81418a27Smrg }
269*81418a27Smrg else
270*81418a27Smrg { /* |x| >= .625 */
271*81418a27Smrg z = (one - u.value) * 0.5;
272*81418a27Smrg s = sqrtq (z);
273*81418a27Smrg /* Compute an extended precision square root from
274*81418a27Smrg the Newton iteration s -> 0.5 * (s + z / s).
275*81418a27Smrg The change w from s to the improved value is
276*81418a27Smrg w = 0.5 * (s + z / s) - s = (s^2 + z)/2s - s = (z - s^2)/2s.
277*81418a27Smrg Express s = f1 + f2 where f1 * f1 is exactly representable.
278*81418a27Smrg w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
279*81418a27Smrg s + w has extended precision. */
280*81418a27Smrg u.value = s;
281*81418a27Smrg u.words32.w2 = 0;
282*81418a27Smrg u.words32.w3 = 0;
283*81418a27Smrg f2 = s - u.value;
284*81418a27Smrg w = z - u.value * u.value;
285*81418a27Smrg w = w - 2.0 * u.value * f2;
286*81418a27Smrg w = w - f2 * f2;
287*81418a27Smrg w = w / (2.0 * s);
288*81418a27Smrg /* Arcsine of s. */
289*81418a27Smrg p = (((((((((pS9 * z
290*81418a27Smrg + pS8) * z
291*81418a27Smrg + pS7) * z
292*81418a27Smrg + pS6) * z
293*81418a27Smrg + pS5) * z
294*81418a27Smrg + pS4) * z
295*81418a27Smrg + pS3) * z
296*81418a27Smrg + pS2) * z
297*81418a27Smrg + pS1) * z
298*81418a27Smrg + pS0) * z;
299*81418a27Smrg q = (((((((( z
300*81418a27Smrg + qS8) * z
301*81418a27Smrg + qS7) * z
302*81418a27Smrg + qS6) * z
303*81418a27Smrg + qS5) * z
304*81418a27Smrg + qS4) * z
305*81418a27Smrg + qS3) * z
306*81418a27Smrg + qS2) * z
307*81418a27Smrg + qS1) * z
308*81418a27Smrg + qS0;
309*81418a27Smrg r = s + (w + s * p / q);
310*81418a27Smrg
311*81418a27Smrg if (sign & 0x80000000)
312*81418a27Smrg w = pio2_hi + (pio2_lo - r);
313*81418a27Smrg else
314*81418a27Smrg w = r;
315*81418a27Smrg return 2.0 * w;
316*81418a27Smrg }
317*81418a27Smrg }
318