1 /* Compute x^2 + y^2 - 1, without large cancellation error.
2    Copyright (C) 2012 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <http://www.gnu.org/licenses/>.  */
18 
19 #include "quadmath-imp.h"
20 #include <stdlib.h>
21 
22 /* Calculate X + Y exactly and store the result in *HI + *LO.  It is
23    given that |X| >= |Y| and the values are small enough that no
24    overflow occurs.  */
25 
26 static inline void
add_split(__float128 * hi,__float128 * lo,__float128 x,__float128 y)27 add_split (__float128 *hi, __float128 *lo, __float128 x, __float128 y)
28 {
29   /* Apply Dekker's algorithm.  */
30   *hi = x + y;
31   *lo = (x - *hi) + y;
32 }
33 
34 /* Calculate X * Y exactly and store the result in *HI + *LO.  It is
35    given that the values are small enough that no overflow occurs and
36    large enough (or zero) that no underflow occurs.  */
37 
38 static inline void
mul_split(__float128 * hi,__float128 * lo,__float128 x,__float128 y)39 mul_split (__float128 *hi, __float128 *lo, __float128 x, __float128 y)
40 {
41   /* Fast built-in fused multiply-add.  */
42   *hi = x * y;
43   *lo = fmaq (x, y, -*hi);
44 }
45 
46 /* Compare absolute values of floating-point values pointed to by P
47    and Q for qsort.  */
48 
49 static int
compare(const void * p,const void * q)50 compare (const void *p, const void *q)
51 {
52   __float128 pld = fabsq (*(const __float128 *) p);
53   __float128 qld = fabsq (*(const __float128 *) q);
54   if (pld < qld)
55     return -1;
56   else if (pld == qld)
57     return 0;
58   else
59     return 1;
60 }
61 
62 /* Return X^2 + Y^2 - 1, computed without large cancellation error.
63    It is given that 1 > X >= Y >= epsilon / 2, and that either X >=
64    0.75 or Y >= 0.5.  */
65 
66 __float128
__quadmath_x2y2m1q(__float128 x,__float128 y)67 __quadmath_x2y2m1q (__float128 x, __float128 y)
68 {
69   __float128 vals[4];
70   size_t i;
71 
72   /* FIXME:  SET_RESTORE_ROUNDL (FE_TONEAREST);  */
73   mul_split (&vals[1], &vals[0], x, x);
74   mul_split (&vals[3], &vals[2], y, y);
75   if (x >= 0.75Q)
76     vals[1] -= 1.0Q;
77   else
78     {
79       vals[1] -= 0.5Q;
80       vals[3] -= 0.5Q;
81     }
82   qsort (vals, 4, sizeof (__float128), compare);
83   /* Add up the values so that each element of VALS has absolute value
84      at most equal to the last set bit of the next nonzero
85      element.  */
86   for (i = 0; i <= 2; i++)
87     {
88       add_split (&vals[i + 1], &vals[i], vals[i + 1], vals[i]);
89       qsort (vals + i + 1, 3 - i, sizeof (__float128), compare);
90     }
91   /* Now any error from this addition will be small.  */
92   return vals[3] + vals[2] + vals[1] + vals[0];
93 }
94