1 /*
2  * ====================================================
3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4  *
5  * Developed at SunPro, a Sun Microsystems, Inc. business.
6  * Permission to use, copy, modify, and distribute this
7  * software is freely granted, provided that this notice
8  * is preserved.
9  * ====================================================
10  */
11 
12 /* Expansions and modifications for 128-bit long double are
13    Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14    and are incorporated herein by permission of the author.  The author
15    reserves the right to distribute this material elsewhere under different
16    copying permissions.  These modifications are distributed here under
17    the following terms:
18 
19     This library is free software; you can redistribute it and/or
20     modify it under the terms of the GNU Lesser General Public
21     License as published by the Free Software Foundation; either
22     version 2.1 of the License, or (at your option) any later version.
23 
24     This library is distributed in the hope that it will be useful,
25     but WITHOUT ANY WARRANTY; without even the implied warranty of
26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27     Lesser General Public License for more details.
28 
29     You should have received a copy of the GNU Lesser General Public
30     License along with this library; if not, see
31     <http://www.gnu.org/licenses/>.  */
32 
33 /* powq(x,y) return x**y
34  *
35  *		      n
36  * Method:  Let x =  2   * (1+f)
37  *	1. Compute and return log2(x) in two pieces:
38  *		log2(x) = w1 + w2,
39  *	   where w1 has 113-53 = 60 bit trailing zeros.
40  *	2. Perform y*log2(x) = n+y' by simulating muti-precision
41  *	   arithmetic, where |y'|<=0.5.
42  *	3. Return x**y = 2**n*exp(y'*log2)
43  *
44  * Special cases:
45  *	1.  (anything) ** 0  is 1
46  *	2.  (anything) ** 1  is itself
47  *	3.  (anything) ** NAN is NAN
48  *	4.  NAN ** (anything except 0) is NAN
49  *	5.  +-(|x| > 1) **  +INF is +INF
50  *	6.  +-(|x| > 1) **  -INF is +0
51  *	7.  +-(|x| < 1) **  +INF is +0
52  *	8.  +-(|x| < 1) **  -INF is +INF
53  *	9.  +-1         ** +-INF is NAN
54  *	10. +0 ** (+anything except 0, NAN)               is +0
55  *	11. -0 ** (+anything except 0, NAN, odd integer)  is +0
56  *	12. +0 ** (-anything except 0, NAN)               is +INF
57  *	13. -0 ** (-anything except 0, NAN, odd integer)  is +INF
58  *	14. -0 ** (odd integer) = -( +0 ** (odd integer) )
59  *	15. +INF ** (+anything except 0,NAN) is +INF
60  *	16. +INF ** (-anything except 0,NAN) is +0
61  *	17. -INF ** (anything)  = -0 ** (-anything)
62  *	18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
63  *	19. (-anything except 0 and inf) ** (non-integer) is NAN
64  *
65  */
66 
67 #include "quadmath-imp.h"
68 
69 static const __float128 bp[] = {
70   1,
71   1.5Q,
72 };
73 
74 /* log_2(1.5) */
75 static const __float128 dp_h[] = {
76   0.0,
77   5.8496250072115607565592654282227158546448E-1Q
78 };
79 
80 /* Low part of log_2(1.5) */
81 static const __float128 dp_l[] = {
82   0.0,
83   1.0579781240112554492329533686862998106046E-16Q
84 };
85 
86 static const __float128 zero = 0,
87   one = 1,
88   two = 2,
89   two113 = 1.0384593717069655257060992658440192E34Q,
90   huge = 1.0e3000Q,
91   tiny = 1.0e-3000Q;
92 
93 /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2))
94    z = (x-1)/(x+1)
95    1 <= x <= 1.25
96    Peak relative error 2.3e-37 */
97 static const __float128 LN[] =
98 {
99  -3.0779177200290054398792536829702930623200E1Q,
100   6.5135778082209159921251824580292116201640E1Q,
101  -4.6312921812152436921591152809994014413540E1Q,
102   1.2510208195629420304615674658258363295208E1Q,
103  -9.9266909031921425609179910128531667336670E-1Q
104 };
105 static const __float128 LD[] =
106 {
107  -5.129862866715009066465422805058933131960E1Q,
108   1.452015077564081884387441590064272782044E2Q,
109  -1.524043275549860505277434040464085593165E2Q,
110   7.236063513651544224319663428634139768808E1Q,
111  -1.494198912340228235853027849917095580053E1Q
112   /* 1.0E0 */
113 };
114 
115 /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2)))
116    0 <= x <= 0.5
117    Peak relative error 5.7e-38  */
118 static const __float128 PN[] =
119 {
120   5.081801691915377692446852383385968225675E8Q,
121   9.360895299872484512023336636427675327355E6Q,
122   4.213701282274196030811629773097579432957E4Q,
123   5.201006511142748908655720086041570288182E1Q,
124   9.088368420359444263703202925095675982530E-3Q,
125 };
126 static const __float128 PD[] =
127 {
128   3.049081015149226615468111430031590411682E9Q,
129   1.069833887183886839966085436512368982758E8Q,
130   8.259257717868875207333991924545445705394E5Q,
131   1.872583833284143212651746812884298360922E3Q,
132   /* 1.0E0 */
133 };
134 
135 static const __float128
136   /* ln 2 */
137   lg2 = 6.9314718055994530941723212145817656807550E-1Q,
138   lg2_h = 6.9314718055994528622676398299518041312695E-1Q,
139   lg2_l = 2.3190468138462996154948554638754786504121E-17Q,
140   ovt = 8.0085662595372944372e-0017Q,
141   /* 2/(3*log(2)) */
142   cp = 9.6179669392597560490661645400126142495110E-1Q,
143   cp_h = 9.6179669392597555432899980587535537779331E-1Q,
144   cp_l = 5.0577616648125906047157785230014751039424E-17Q;
145 
146 __float128
powq(__float128 x,__float128 y)147 powq (__float128 x, __float128 y)
148 {
149   __float128 z, ax, z_h, z_l, p_h, p_l;
150   __float128 y1, t1, t2, r, s, sgn, t, u, v, w;
151   __float128 s2, s_h, s_l, t_h, t_l, ay;
152   int32_t i, j, k, yisint, n;
153   uint32_t ix, iy;
154   int32_t hx, hy;
155   ieee854_float128 o, p, q;
156 
157   p.value = x;
158   hx = p.words32.w0;
159   ix = hx & 0x7fffffff;
160 
161   q.value = y;
162   hy = q.words32.w0;
163   iy = hy & 0x7fffffff;
164 
165 
166   /* y==zero: x**0 = 1 */
167   if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0
168       && !issignalingq (x))
169     return one;
170 
171   /* 1.0**y = 1; -1.0**+-Inf = 1 */
172   if (x == one && !issignalingq (y))
173     return one;
174   if (x == -1 && iy == 0x7fff0000
175       && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
176     return one;
177 
178   /* +-NaN return x+y */
179   if ((ix > 0x7fff0000)
180       || ((ix == 0x7fff0000)
181 	  && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0))
182       || (iy > 0x7fff0000)
183       || ((iy == 0x7fff0000)
184 	  && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0)))
185     return x + y;
186 
187   /* determine if y is an odd int when x < 0
188    * yisint = 0       ... y is not an integer
189    * yisint = 1       ... y is an odd int
190    * yisint = 2       ... y is an even int
191    */
192   yisint = 0;
193   if (hx < 0)
194     {
195       if (iy >= 0x40700000)	/* 2^113 */
196 	yisint = 2;		/* even integer y */
197       else if (iy >= 0x3fff0000)	/* 1.0 */
198 	{
199 	  if (floorq (y) == y)
200 	    {
201 	      z = 0.5 * y;
202 	      if (floorq (z) == z)
203 		yisint = 2;
204 	      else
205 		yisint = 1;
206 	    }
207 	}
208     }
209 
210   /* special value of y */
211   if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0)
212     {
213       if (iy == 0x7fff0000)	/* y is +-inf */
214 	{
215 	  if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3)
216 	      == 0)
217 	    return y - y;	/* +-1**inf is NaN */
218 	  else if (ix >= 0x3fff0000)	/* (|x|>1)**+-inf = inf,0 */
219 	    return (hy >= 0) ? y : zero;
220 	  else			/* (|x|<1)**-,+inf = inf,0 */
221 	    return (hy < 0) ? -y : zero;
222 	}
223       if (iy == 0x3fff0000)
224 	{			/* y is  +-1 */
225 	  if (hy < 0)
226 	    return one / x;
227 	  else
228 	    return x;
229 	}
230       if (hy == 0x40000000)
231 	return x * x;		/* y is  2 */
232       if (hy == 0x3ffe0000)
233 	{			/* y is  0.5 */
234 	  if (hx >= 0)		/* x >= +0 */
235 	    return sqrtq (x);
236 	}
237     }
238 
239   ax = fabsq (x);
240   /* special value of x */
241   if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0)
242     {
243       if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000)
244 	{
245 	  z = ax;		/*x is +-0,+-inf,+-1 */
246 	  if (hy < 0)
247 	    z = one / z;	/* z = (1/|x|) */
248 	  if (hx < 0)
249 	    {
250 	      if (((ix - 0x3fff0000) | yisint) == 0)
251 		{
252 		  z = (z - z) / (z - z);	/* (-1)**non-int is NaN */
253 		}
254 	      else if (yisint == 1)
255 		z = -z;		/* (x<0)**odd = -(|x|**odd) */
256 	    }
257 	  return z;
258 	}
259     }
260 
261   /* (x<0)**(non-int) is NaN */
262   if (((((uint32_t) hx >> 31) - 1) | yisint) == 0)
263     return (x - x) / (x - x);
264 
265   /* sgn (sign of result -ve**odd) = -1 else = 1 */
266   sgn = one;
267   if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0)
268     sgn = -one;			/* (-ve)**(odd int) */
269 
270   /* |y| is huge.
271      2^-16495 = 1/2 of smallest representable value.
272      If (1 - 1/131072)^y underflows, y > 1.4986e9 */
273   if (iy > 0x401d654b)
274     {
275       /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */
276       if (iy > 0x407d654b)
277 	{
278 	  if (ix <= 0x3ffeffff)
279 	    return (hy < 0) ? huge * huge : tiny * tiny;
280 	  if (ix >= 0x3fff0000)
281 	    return (hy > 0) ? huge * huge : tiny * tiny;
282 	}
283       /* over/underflow if x is not close to one */
284       if (ix < 0x3ffeffff)
285 	return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny;
286       if (ix > 0x3fff0000)
287 	return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny;
288     }
289 
290   ay = y > 0 ? y : -y;
291   if (ay < 0x1p-128)
292     y = y < 0 ? -0x1p-128 : 0x1p-128;
293 
294   n = 0;
295   /* take care subnormal number */
296   if (ix < 0x00010000)
297     {
298       ax *= two113;
299       n -= 113;
300       o.value = ax;
301       ix = o.words32.w0;
302     }
303   n += ((ix) >> 16) - 0x3fff;
304   j = ix & 0x0000ffff;
305   /* determine interval */
306   ix = j | 0x3fff0000;		/* normalize ix */
307   if (j <= 0x3988)
308     k = 0;			/* |x|<sqrt(3/2) */
309   else if (j < 0xbb67)
310     k = 1;			/* |x|<sqrt(3)   */
311   else
312     {
313       k = 0;
314       n += 1;
315       ix -= 0x00010000;
316     }
317 
318   o.value = ax;
319   o.words32.w0 = ix;
320   ax = o.value;
321 
322   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
323   u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
324   v = one / (ax + bp[k]);
325   s = u * v;
326   s_h = s;
327 
328   o.value = s_h;
329   o.words32.w3 = 0;
330   o.words32.w2 &= 0xf8000000;
331   s_h = o.value;
332   /* t_h=ax+bp[k] High */
333   t_h = ax + bp[k];
334   o.value = t_h;
335   o.words32.w3 = 0;
336   o.words32.w2 &= 0xf8000000;
337   t_h = o.value;
338   t_l = ax - (t_h - bp[k]);
339   s_l = v * ((u - s_h * t_h) - s_h * t_l);
340   /* compute log(ax) */
341   s2 = s * s;
342   u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4])));
343   v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2))));
344   r = s2 * s2 * u / v;
345   r += s_l * (s_h + s);
346   s2 = s_h * s_h;
347   t_h = 3.0 + s2 + r;
348   o.value = t_h;
349   o.words32.w3 = 0;
350   o.words32.w2 &= 0xf8000000;
351   t_h = o.value;
352   t_l = r - ((t_h - 3.0) - s2);
353   /* u+v = s*(1+...) */
354   u = s_h * t_h;
355   v = s_l * t_h + t_l * s;
356   /* 2/(3log2)*(s+...) */
357   p_h = u + v;
358   o.value = p_h;
359   o.words32.w3 = 0;
360   o.words32.w2 &= 0xf8000000;
361   p_h = o.value;
362   p_l = v - (p_h - u);
363   z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
364   z_l = cp_l * p_h + p_l * cp + dp_l[k];
365   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
366   t = (__float128) n;
367   t1 = (((z_h + z_l) + dp_h[k]) + t);
368   o.value = t1;
369   o.words32.w3 = 0;
370   o.words32.w2 &= 0xf8000000;
371   t1 = o.value;
372   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
373 
374   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
375   y1 = y;
376   o.value = y1;
377   o.words32.w3 = 0;
378   o.words32.w2 &= 0xf8000000;
379   y1 = o.value;
380   p_l = (y - y1) * t1 + y * t2;
381   p_h = y1 * t1;
382   z = p_l + p_h;
383   o.value = z;
384   j = o.words32.w0;
385   if (j >= 0x400d0000) /* z >= 16384 */
386     {
387       /* if z > 16384 */
388       if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0)
389 	return sgn * huge * huge;	/* overflow */
390       else
391 	{
392 	  if (p_l + ovt > z - p_h)
393 	    return sgn * huge * huge;	/* overflow */
394 	}
395     }
396   else if ((j & 0x7fffffff) >= 0x400d01b9)	/* z <= -16495 */
397     {
398       /* z < -16495 */
399       if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3)
400 	  != 0)
401 	return sgn * tiny * tiny;	/* underflow */
402       else
403 	{
404 	  if (p_l <= z - p_h)
405 	    return sgn * tiny * tiny;	/* underflow */
406 	}
407     }
408   /* compute 2**(p_h+p_l) */
409   i = j & 0x7fffffff;
410   k = (i >> 16) - 0x3fff;
411   n = 0;
412   if (i > 0x3ffe0000)
413     {				/* if |z| > 0.5, set n = [z+0.5] */
414       n = floorq (z + 0.5Q);
415       t = n;
416       p_h -= t;
417     }
418   t = p_l + p_h;
419   o.value = t;
420   o.words32.w3 = 0;
421   o.words32.w2 &= 0xf8000000;
422   t = o.value;
423   u = t * lg2_h;
424   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
425   z = u + v;
426   w = v - (z - u);
427   /*  exp(z) */
428   t = z * z;
429   u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4])));
430   v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t)));
431   t1 = z - t * u / v;
432   r = (z * t1) / (t1 - two) - (w + z * w);
433   z = one - (r - z);
434   o.value = z;
435   j = o.words32.w0;
436   j += (n << 16);
437   if ((j >> 16) <= 0)
438     {
439       z = scalbnq (z, n);	/* subnormal output */
440       __float128 force_underflow = z * z;
441       math_force_eval (force_underflow);
442     }
443   else
444     {
445       o.words32.w0 = j;
446       z = o.value;
447     }
448   return sgn * z;
449 }
450