1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /*
13    Long double expansions are
14    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
15    and are incorporated herein by permission of the author.  The author
16    reserves the right to distribute this material elsewhere under different
17    copying permissions.  These modifications are distributed here under
18    the following terms:
19 
20     This library is free software; you can redistribute it and/or
21     modify it under the terms of the GNU Lesser General Public
22     License as published by the Free Software Foundation; either
23     version 2.1 of the License, or (at your option) any later version.
24 
25     This library is distributed in the hope that it will be useful,
26     but WITHOUT ANY WARRANTY; without even the implied warranty of
27     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28     Lesser General Public License for more details.
29 
30     You should have received a copy of the GNU Lesser General Public
31     License along with this library; if not, see
32     <http://www.gnu.org/licenses/>.  */
33 
34 /* acosq(x)
35  * Method :
36  *      acos(x)  = pi/2 - asin(x)
37  *      acos(-x) = pi/2 + asin(x)
38  * For |x| <= 0.375
39  *      acos(x) = pi/2 - asin(x)
40  * Between .375 and .5 the approximation is
41  *      acos(0.4375 + x) = acos(0.4375) + x P(x) / Q(x)
42  * Between .5 and .625 the approximation is
43  *      acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
44  * For x > 0.625,
45  *      acos(x) = 2 asin(sqrt((1-x)/2))
46  *      computed with an extended precision square root in the leading term.
47  * For x < -0.625
48  *      acos(x) = pi - 2 asin(sqrt((1-|x|)/2))
49  *
50  * Special cases:
51  *      if x is NaN, return x itself;
52  *      if |x|>1, return NaN with invalid signal.
53  *
54  * Functions needed: sqrtq.
55  */
56 
57 #include "quadmath-imp.h"
58 
59 static const __float128
60   one = 1,
61   pio2_hi = 1.5707963267948966192313216916397514420986Q,
62   pio2_lo = 4.3359050650618905123985220130216759843812E-35Q,
63 
64   /* acos(0.5625 + x) = acos(0.5625) + x rS(x) / sS(x)
65      -0.0625 <= x <= 0.0625
66      peak relative error 3.3e-35  */
67 
68   rS0 =  5.619049346208901520945464704848780243887E0Q,
69   rS1 = -4.460504162777731472539175700169871920352E1Q,
70   rS2 =  1.317669505315409261479577040530751477488E2Q,
71   rS3 = -1.626532582423661989632442410808596009227E2Q,
72   rS4 =  3.144806644195158614904369445440583873264E1Q,
73   rS5 =  9.806674443470740708765165604769099559553E1Q,
74   rS6 = -5.708468492052010816555762842394927806920E1Q,
75   rS7 = -1.396540499232262112248553357962639431922E1Q,
76   rS8 =  1.126243289311910363001762058295832610344E1Q,
77   rS9 =  4.956179821329901954211277873774472383512E-1Q,
78   rS10 = -3.313227657082367169241333738391762525780E-1Q,
79 
80   sS0 = -4.645814742084009935700221277307007679325E0Q,
81   sS1 =  3.879074822457694323970438316317961918430E1Q,
82   sS2 = -1.221986588013474694623973554726201001066E2Q,
83   sS3 =  1.658821150347718105012079876756201905822E2Q,
84   sS4 = -4.804379630977558197953176474426239748977E1Q,
85   sS5 = -1.004296417397316948114344573811562952793E2Q,
86   sS6 =  7.530281592861320234941101403870010111138E1Q,
87   sS7 =  1.270735595411673647119592092304357226607E1Q,
88   sS8 = -1.815144839646376500705105967064792930282E1Q,
89   sS9 = -7.821597334910963922204235247786840828217E-2Q,
90   /* 1.000000000000000000000000000000000000000E0 */
91 
92   acosr5625 = 9.7338991014954640492751132535550279812151E-1Q,
93   pimacosr5625 = 2.1682027434402468335351320579240000860757E0Q,
94 
95   /* acos(0.4375 + x) = acos(0.4375) + x rS(x) / sS(x)
96      -0.0625 <= x <= 0.0625
97      peak relative error 2.1e-35  */
98 
99   P0 =  2.177690192235413635229046633751390484892E0Q,
100   P1 = -2.848698225706605746657192566166142909573E1Q,
101   P2 =  1.040076477655245590871244795403659880304E2Q,
102   P3 = -1.400087608918906358323551402881238180553E2Q,
103   P4 =  2.221047917671449176051896400503615543757E1Q,
104   P5 =  9.643714856395587663736110523917499638702E1Q,
105   P6 = -5.158406639829833829027457284942389079196E1Q,
106   P7 = -1.578651828337585944715290382181219741813E1Q,
107   P8 =  1.093632715903802870546857764647931045906E1Q,
108   P9 =  5.448925479898460003048760932274085300103E-1Q,
109   P10 = -3.315886001095605268470690485170092986337E-1Q,
110   Q0 = -1.958219113487162405143608843774587557016E0Q,
111   Q1 =  2.614577866876185080678907676023269360520E1Q,
112   Q2 = -9.990858606464150981009763389881793660938E1Q,
113   Q3 =  1.443958741356995763628660823395334281596E2Q,
114   Q4 = -3.206441012484232867657763518369723873129E1Q,
115   Q5 = -1.048560885341833443564920145642588991492E2Q,
116   Q6 =  6.745883931909770880159915641984874746358E1Q,
117   Q7 =  1.806809656342804436118449982647641392951E1Q,
118   Q8 = -1.770150690652438294290020775359580915464E1Q,
119   Q9 = -5.659156469628629327045433069052560211164E-1Q,
120   /* 1.000000000000000000000000000000000000000E0 */
121 
122   acosr4375 = 1.1179797320499710475919903296900511518755E0Q,
123   pimacosr4375 = 2.0236129215398221908706530535894517323217E0Q,
124 
125   /* asin(x) = x + x^3 pS(x^2) / qS(x^2)
126      0 <= x <= 0.5
127      peak relative error 1.9e-35  */
128   pS0 = -8.358099012470680544198472400254596543711E2Q,
129   pS1 =  3.674973957689619490312782828051860366493E3Q,
130   pS2 = -6.730729094812979665807581609853656623219E3Q,
131   pS3 =  6.643843795209060298375552684423454077633E3Q,
132   pS4 = -3.817341990928606692235481812252049415993E3Q,
133   pS5 =  1.284635388402653715636722822195716476156E3Q,
134   pS6 = -2.410736125231549204856567737329112037867E2Q,
135   pS7 =  2.219191969382402856557594215833622156220E1Q,
136   pS8 = -7.249056260830627156600112195061001036533E-1Q,
137   pS9 =  1.055923570937755300061509030361395604448E-3Q,
138 
139   qS0 = -5.014859407482408326519083440151745519205E3Q,
140   qS1 =  2.430653047950480068881028451580393430537E4Q,
141   qS2 = -4.997904737193653607449250593976069726962E4Q,
142   qS3 =  5.675712336110456923807959930107347511086E4Q,
143   qS4 = -3.881523118339661268482937768522572588022E4Q,
144   qS5 =  1.634202194895541569749717032234510811216E4Q,
145   qS6 = -4.151452662440709301601820849901296953752E3Q,
146   qS7 =  5.956050864057192019085175976175695342168E2Q,
147   qS8 = -4.175375777334867025769346564600396877176E1Q;
148   /* 1.000000000000000000000000000000000000000E0 */
149 
150 __float128
acosq(__float128 x)151 acosq (__float128 x)
152 {
153   __float128 z, r, w, p, q, s, t, f2;
154   int32_t ix, sign;
155   ieee854_float128 u;
156 
157   u.value = x;
158   sign = u.words32.w0;
159   ix = sign & 0x7fffffff;
160   u.words32.w0 = ix;		/* |x| */
161   if (ix >= 0x3fff0000)		/* |x| >= 1 */
162     {
163       if (ix == 0x3fff0000
164 	  && (u.words32.w1 | u.words32.w2 | u.words32.w3) == 0)
165 	{			/* |x| == 1 */
166 	  if ((sign & 0x80000000) == 0)
167 	    return 0.0;		/* acos(1) = 0  */
168 	  else
169 	    return (2.0 * pio2_hi) + (2.0 * pio2_lo);	/* acos(-1)= pi */
170 	}
171       return (x - x) / (x - x);	/* acos(|x| > 1) is NaN */
172     }
173   else if (ix < 0x3ffe0000)	/* |x| < 0.5 */
174     {
175       if (ix < 0x3f8e0000)	/* |x| < 2**-113 */
176 	return pio2_hi + pio2_lo;
177       if (ix < 0x3ffde000)	/* |x| < .4375 */
178 	{
179 	  /* Arcsine of x.  */
180 	  z = x * x;
181 	  p = (((((((((pS9 * z
182 		       + pS8) * z
183 		      + pS7) * z
184 		     + pS6) * z
185 		    + pS5) * z
186 		   + pS4) * z
187 		  + pS3) * z
188 		 + pS2) * z
189 		+ pS1) * z
190 	       + pS0) * z;
191 	  q = (((((((( z
192 		       + qS8) * z
193 		     + qS7) * z
194 		    + qS6) * z
195 		   + qS5) * z
196 		  + qS4) * z
197 		 + qS3) * z
198 		+ qS2) * z
199 	       + qS1) * z
200 	    + qS0;
201 	  r = x + x * p / q;
202 	  z = pio2_hi - (r - pio2_lo);
203 	  return z;
204 	}
205       /* .4375 <= |x| < .5 */
206       t = u.value - 0.4375Q;
207       p = ((((((((((P10 * t
208 		    + P9) * t
209 		   + P8) * t
210 		  + P7) * t
211 		 + P6) * t
212 		+ P5) * t
213 	       + P4) * t
214 	      + P3) * t
215 	     + P2) * t
216 	    + P1) * t
217 	   + P0) * t;
218 
219       q = (((((((((t
220 		   + Q9) * t
221 		  + Q8) * t
222 		 + Q7) * t
223 		+ Q6) * t
224 	       + Q5) * t
225 	      + Q4) * t
226 	     + Q3) * t
227 	    + Q2) * t
228 	   + Q1) * t
229 	+ Q0;
230       r = p / q;
231       if (sign & 0x80000000)
232 	r = pimacosr4375 - r;
233       else
234 	r = acosr4375 + r;
235       return r;
236     }
237   else if (ix < 0x3ffe4000)	/* |x| < 0.625 */
238     {
239       t = u.value - 0.5625Q;
240       p = ((((((((((rS10 * t
241 		    + rS9) * t
242 		   + rS8) * t
243 		  + rS7) * t
244 		 + rS6) * t
245 		+ rS5) * t
246 	       + rS4) * t
247 	      + rS3) * t
248 	     + rS2) * t
249 	    + rS1) * t
250 	   + rS0) * t;
251 
252       q = (((((((((t
253 		   + sS9) * t
254 		  + sS8) * t
255 		 + sS7) * t
256 		+ sS6) * t
257 	       + sS5) * t
258 	      + sS4) * t
259 	     + sS3) * t
260 	    + sS2) * t
261 	   + sS1) * t
262 	+ sS0;
263       if (sign & 0x80000000)
264 	r = pimacosr5625 - p / q;
265       else
266 	r = acosr5625 + p / q;
267       return r;
268     }
269   else
270     {				/* |x| >= .625 */
271       z = (one - u.value) * 0.5;
272       s = sqrtq (z);
273       /* Compute an extended precision square root from
274 	 the Newton iteration  s -> 0.5 * (s + z / s).
275 	 The change w from s to the improved value is
276 	    w = 0.5 * (s + z / s) - s  = (s^2 + z)/2s - s = (z - s^2)/2s.
277 	  Express s = f1 + f2 where f1 * f1 is exactly representable.
278 	  w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
279 	  s + w has extended precision.  */
280       u.value = s;
281       u.words32.w2 = 0;
282       u.words32.w3 = 0;
283       f2 = s - u.value;
284       w = z - u.value * u.value;
285       w = w - 2.0 * u.value * f2;
286       w = w - f2 * f2;
287       w = w / (2.0 * s);
288       /* Arcsine of s.  */
289       p = (((((((((pS9 * z
290 		   + pS8) * z
291 		  + pS7) * z
292 		 + pS6) * z
293 		+ pS5) * z
294 	       + pS4) * z
295 	      + pS3) * z
296 	     + pS2) * z
297 	    + pS1) * z
298 	   + pS0) * z;
299       q = (((((((( z
300 		   + qS8) * z
301 		 + qS7) * z
302 		+ qS6) * z
303 	       + qS5) * z
304 	      + qS4) * z
305 	     + qS3) * z
306 	    + qS2) * z
307 	   + qS1) * z
308 	+ qS0;
309       r = s + (w + s * p / q);
310 
311       if (sign & 0x80000000)
312 	w = pio2_hi + (pio2_lo - r);
313       else
314 	w = r;
315       return 2.0 * w;
316     }
317 }
318