1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /* Expansions and modifications for 128-bit long double are
13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14    and are incorporated herein by permission of the author.  The author
15    reserves the right to distribute this material elsewhere under different
16    copying permissions.  These modifications are distributed here under
17    the following terms:
18 
19     This library is free software; you can redistribute it and/or
20     modify it under the terms of the GNU Lesser General Public
21     License as published by the Free Software Foundation; either
22     version 2.1 of the License, or (at your option) any later version.
23 
24     This library is distributed in the hope that it will be useful,
25     but WITHOUT ANY WARRANTY; without even the implied warranty of
26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27     Lesser General Public License for more details.
28 
29     You should have received a copy of the GNU Lesser General Public
30     License along with this library; if not, write to the Free Software
31     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
32 
33 /* powq(x,y) return x**y
34  *
35  *		      n
36  * Method:  Let x =  2   * (1+f)
37  *	1. Compute and return log2(x) in two pieces:
38  *		log2(x) = w1 + w2,
39  *	   where w1 has 113-53 = 60 bit trailing zeros.
40  *	2. Perform y*log2(x) = n+y' by simulating muti-precision
41  *	   arithmetic, where |y'|<=0.5.
42  *	3. Return x**y = 2**n*exp(y'*log2)
43  *
44  * Special cases:
45  *	1.  (anything) ** 0  is 1
46  *	2.  (anything) ** 1  is itself
47  *	3.  (anything) ** NAN is NAN
48  *	4.  NAN ** (anything except 0) is NAN
49  *	5.  +-(|x| > 1) **  +INF is +INF
50  *	6.  +-(|x| > 1) **  -INF is +0
51  *	7.  +-(|x| < 1) **  +INF is +0
52  *	8.  +-(|x| < 1) **  -INF is +INF
53  *	9.  +-1         ** +-INF is NAN
54  *	10. +0 ** (+anything except 0, NAN)               is +0
55  *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
56  *	12. +0 ** (-anything except 0, NAN)               is +INF
57  *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
58  *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59  *	15. +INF ** (+anything except 0,NAN) is +INF
60  *	16. +INF ** (-anything except 0,NAN) is +0
61  *	17. -INF ** (anything)  = -0 ** (-anything)
62  *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63  *	19. (-anything except 0 and inf) ** (non-integer) is NAN
64  *
65  */
66 
67 #include "quadmath-imp.h"
68 
69 static const __float128 bp[] = {
70   1.0Q,
71   1.5Q,
72 };
73 
74 /* log_2(1.5) */
75 static const __float128 dp_h[] = {
76   0.0,
77   5.8496250072115607565592654282227158546448E-1Q
78 };
79 
80 /* Low part of log_2(1.5) */
81 static const __float128 dp_l[] = {
82   0.0,
83   1.0579781240112554492329533686862998106046E-16Q
84 };
85 
86 static const __float128 zero = 0.0Q,
87   one = 1.0Q,
88   two = 2.0Q,
89   two113 = 1.0384593717069655257060992658440192E34Q,
90   huge = 1.0e3000Q,
91   tiny = 1.0e-3000Q;
92 
93 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
94    z = (x-1)/(x+1)
95    1 <= x <= 1.25
96    Peak relative error 2.3e-37 */
97 static const __float128 LN[] =
98 {
99  -3.0779177200290054398792536829702930623200E1Q,
100   6.5135778082209159921251824580292116201640E1Q,
101  -4.6312921812152436921591152809994014413540E1Q,
102   1.2510208195629420304615674658258363295208E1Q,
103  -9.9266909031921425609179910128531667336670E-1Q
104 };
105 static const __float128 LD[] =
106 {
107  -5.129862866715009066465422805058933131960E1Q,
108   1.452015077564081884387441590064272782044E2Q,
109  -1.524043275549860505277434040464085593165E2Q,
110   7.236063513651544224319663428634139768808E1Q,
111  -1.494198912340228235853027849917095580053E1Q
112   /* 1.0E0 */
113 };
114 
115 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
116    0 <= x <= 0.5
117    Peak relative error 5.7e-38  */
118 static const __float128 PN[] =
119 {
120   5.081801691915377692446852383385968225675E8Q,
121   9.360895299872484512023336636427675327355E6Q,
122   4.213701282274196030811629773097579432957E4Q,
123   5.201006511142748908655720086041570288182E1Q,
124   9.088368420359444263703202925095675982530E-3Q,
125 };
126 static const __float128 PD[] =
127 {
128   3.049081015149226615468111430031590411682E9Q,
129   1.069833887183886839966085436512368982758E8Q,
130   8.259257717868875207333991924545445705394E5Q,
131   1.872583833284143212651746812884298360922E3Q,
132   /* 1.0E0 */
133 };
134 
135 static const __float128
136   /* ln 2 */
137   lg2 = 6.9314718055994530941723212145817656807550E-1Q,
138   lg2_h = 6.9314718055994528622676398299518041312695E-1Q,
139   lg2_l = 2.3190468138462996154948554638754786504121E-17Q,
140   ovt = 8.0085662595372944372e-0017Q,
141   /* 2/(3*log(2)) */
142   cp = 9.6179669392597560490661645400126142495110E-1Q,
143   cp_h = 9.6179669392597555432899980587535537779331E-1Q,
144   cp_l = 5.0577616648125906047157785230014751039424E-17Q;
145 
146 __float128
powq(__float128 x,__float128 y)147 powq (__float128 x, __float128 y)
148 {
149   __float128 z, ax, z_h, z_l, p_h, p_l;
150   __float128 y1, t1, t2, r, s, sgn, t, u, v, w;
151   __float128 s2, s_h, s_l, t_h, t_l, ay;
152   int32_t i, j, k, yisint, n;
153   uint32_t ix, iy;
154   int32_t hx, hy;
155   ieee854_float128 o, p, q;
156 
157   p.value = x;
158   hx = p.words32.w0;
159   ix = hx & 0x7fffffff;
160 
161   q.value = y;
162   hy = q.words32.w0;
163   iy = hy & 0x7fffffff;
164 
165 
166   /* y==zero: x**0 = 1 */
167   if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
168     return one;
169 
170   /* 1.0**y = 1; -1.0**+-Inf = 1 */
171   if (x == one)
172     return one;
173   if (x == -1.0Q && iy == 0x7fff0000
174       && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
175     return one;
176 
177   /* +-NaN return x+y */
178   if ((ix > 0x7fff0000)
179       || ((ix == 0x7fff0000)
180 	  && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0))
181       || (iy > 0x7fff0000)
182       || ((iy == 0x7fff0000)
183 	  && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0)))
184     return x + y;
185 
186   /* determine if y is an odd int when x < 0
187    * yisint = 0       ... y is not an integer
188    * yisint = 1       ... y is an odd int
189    * yisint = 2       ... y is an even int
190    */
191   yisint = 0;
192   if (hx < 0)
193     {
194       if (iy >= 0x40700000)	/* 2^113 */
195 	yisint = 2;		/* even integer y */
196       else if (iy >= 0x3fff0000)	/* 1.0 */
197 	{
198 	  if (floorq (y) == y)
199 	    {
200 	      z = 0.5 * y;
201 	      if (floorq (z) == z)
202 		yisint = 2;
203 	      else
204 		yisint = 1;
205 	    }
206 	}
207     }
208 
209   /* special value of y */
210   if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
211     {
212       if (iy == 0x7fff0000)	/* y is +-inf */
213 	{
214 	  if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3)
215 	      == 0)
216 	    return y - y;	/* +-1**inf is NaN */
217 	  else if (ix >= 0x3fff0000)	/* (|x|>1)**+-inf = inf,0 */
218 	    return (hy >= 0) ? y : zero;
219 	  else			/* (|x|<1)**-,+inf = inf,0 */
220 	    return (hy < 0) ? -y : zero;
221 	}
222       if (iy == 0x3fff0000)
223 	{			/* y is  +-1 */
224 	  if (hy < 0)
225 	    return one / x;
226 	  else
227 	    return x;
228 	}
229       if (hy == 0x40000000)
230 	return x * x;		/* y is  2 */
231       if (hy == 0x3ffe0000)
232 	{			/* y is  0.5 */
233 	  if (hx >= 0)		/* x >= +0 */
234 	    return sqrtq (x);
235 	}
236     }
237 
238   ax = fabsq (x);
239   /* special value of x */
240   if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0)
241     {
242       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
243 	{
244 	  z = ax;		/*x is +-0,+-inf,+-1 */
245 	  if (hy < 0)
246 	    z = one / z;	/* z = (1/|x|) */
247 	  if (hx < 0)
248 	    {
249 	      if (((ix - 0x3fff0000) | yisint) == 0)
250 		{
251 		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
252 		}
253 	      else if (yisint == 1)
254 		z = -z;		/* (x<0)**odd = -(|x|**odd) */
255 	    }
256 	  return z;
257 	}
258     }
259 
260   /* (x<0)**(non-int) is NaN */
261   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
262     return (x - x) / (x - x);
263 
264   /* sgn (sign of result -ve**odd) = -1 else = 1 */
265   sgn = one;
266   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
267     sgn = -one;				/* (-ve)**(odd int) */
268 
269   /* |y| is huge.
270      2^-16495 = 1/2 of smallest representable value.
271      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
272   if (iy > 0x401d654b)
273     {
274       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
275       if (iy > 0x407d654b)
276 	{
277 	  if (ix <= 0x3ffeffff)
278 	    return (hy < 0) ? huge * huge : tiny * tiny;
279 	  if (ix >= 0x3fff0000)
280 	    return (hy > 0) ? huge * huge : tiny * tiny;
281 	}
282       /* over/underflow if x is not close to one */
283       if (ix < 0x3ffeffff)
284 	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
285       if (ix > 0x3fff0000)
286 	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
287     }
288 
289   ay = y > 0 ? y : -y;
290   if (ay < 0x1p-128)
291     y = y < 0 ? -0x1p-128 : 0x1p-128;
292 
293   n = 0;
294   /* take care subnormal number */
295   if (ix < 0x00010000)
296     {
297       ax *= two113;
298       n -= 113;
299       o.value = ax;
300       ix = o.words32.w0;
301     }
302   n += ((ix) >> 16) - 0x3fff;
303   j = ix & 0x0000ffff;
304   /* determine interval */
305   ix = j | 0x3fff0000;		/* normalize ix */
306   if (j <= 0x3988)
307     k = 0;			/* |x|<sqrt(3/2) */
308   else if (j < 0xbb67)
309     k = 1;			/* |x|<sqrt(3)   */
310   else
311     {
312       k = 0;
313       n += 1;
314       ix -= 0x00010000;
315     }
316 
317   o.value = ax;
318   o.words32.w0 = ix;
319   ax = o.value;
320 
321   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
322   u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
323   v = one / (ax + bp[k]);
324   s = u * v;
325   s_h = s;
326 
327   o.value = s_h;
328   o.words32.w3 = 0;
329   o.words32.w2 &= 0xf8000000;
330   s_h = o.value;
331   /* t_h=ax+bp[k] High */
332   t_h = ax + bp[k];
333   o.value = t_h;
334   o.words32.w3 = 0;
335   o.words32.w2 &= 0xf8000000;
336   t_h = o.value;
337   t_l = ax - (t_h - bp[k]);
338   s_l = v * ((u - s_h * t_h) - s_h * t_l);
339   /* compute log(ax) */
340   s2 = s * s;
341   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
342   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
343   r = s2 * s2 * u / v;
344   r += s_l * (s_h + s);
345   s2 = s_h * s_h;
346   t_h = 3.0 + s2 + r;
347   o.value = t_h;
348   o.words32.w3 = 0;
349   o.words32.w2 &= 0xf8000000;
350   t_h = o.value;
351   t_l = r - ((t_h - 3.0) - s2);
352   /* u+v = s*(1+...) */
353   u = s_h * t_h;
354   v = s_l * t_h + t_l * s;
355   /* 2/(3log2)*(s+...) */
356   p_h = u + v;
357   o.value = p_h;
358   o.words32.w3 = 0;
359   o.words32.w2 &= 0xf8000000;
360   p_h = o.value;
361   p_l = v - (p_h - u);
362   z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
363   z_l = cp_l * p_h + p_l * cp + dp_l[k];
364   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
365   t = (__float128) n;
366   t1 = (((z_h + z_l) + dp_h[k]) + t);
367   o.value = t1;
368   o.words32.w3 = 0;
369   o.words32.w2 &= 0xf8000000;
370   t1 = o.value;
371   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
372 
373   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
374   y1 = y;
375   o.value = y1;
376   o.words32.w3 = 0;
377   o.words32.w2 &= 0xf8000000;
378   y1 = o.value;
379   p_l = (y - y1) * t1 + y * t2;
380   p_h = y1 * t1;
381   z = p_l + p_h;
382   o.value = z;
383   j = o.words32.w0;
384   if (j >= 0x400d0000) /* z >= 16384 */
385     {
386       /* if z > 16384 */
387       if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0)
388 	return sgn * huge * huge;	/* overflow */
389       else
390 	{
391 	  if (p_l + ovt > z - p_h)
392 	    return sgn * huge * huge;	/* overflow */
393 	}
394     }
395   else if ((j & 0x7fffffff) >= 0x400d01b9)	/* z <= -16495 */
396     {
397       /* z < -16495 */
398       if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3)
399 	  != 0)
400 	return sgn * tiny * tiny;	/* underflow */
401       else
402 	{
403 	  if (p_l <= z - p_h)
404 	    return sgn * tiny * tiny;	/* underflow */
405 	}
406     }
407   /* compute 2**(p_h+p_l) */
408   i = j & 0x7fffffff;
409   k = (i >> 16) - 0x3fff;
410   n = 0;
411   if (i > 0x3ffe0000)
412     {				/* if |z| > 0.5, set n = [z+0.5] */
413       n = floorq (z + 0.5Q);
414       t = n;
415       p_h -= t;
416     }
417   t = p_l + p_h;
418   o.value = t;
419   o.words32.w3 = 0;
420   o.words32.w2 &= 0xf8000000;
421   t = o.value;
422   u = t * lg2_h;
423   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
424   z = u + v;
425   w = v - (z - u);
426   /*  exp(z) */
427   t = z * z;
428   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
429   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
430   t1 = z - t * u / v;
431   r = (z * t1) / (t1 - two) - (w + z * w);
432   z = one - (r - z);
433   o.value = z;
434   j = o.words32.w0;
435   j += (n << 16);
436   if ((j >> 16) <= 0)
437     {
438       z = scalbnq (z, n);	/* subnormal output */
439       __float128 force_underflow = z * z;
440       math_force_eval (force_underflow);
441     }
442   else
443     {
444       o.words32.w0 = j;
445       z = o.value;
446     }
447   return sgn * z;
448 }
449