1 /* Copyright (C) 2008-2018 Free Software Foundation, Inc.
2    Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3 		on behalf of Synopsys Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20 
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 <http://www.gnu.org/licenses/>.  */
25 
26 /* We use a polynom similar to a Tchebycheff polynom to get an initial
27    seed, and then use a newton-raphson iteration step to get an
28    approximate result
29    If this result can't be rounded to the exact result with confidence, we
30    round to the value between the two closest representable values, and
31    test if the correctly rounded value is above or below this value.
32 
33    Because of the Newton-raphson iteration step, an error in the seed at X
34    is amplified by X.  Therefore, we don't want a Tchebycheff polynom
35    or a polynom that is close to optimal according to the maximum norm
36    on the errro of the seed value; we want one that is close to optimal
37    according to the maximum norm on the error of the result, i.e. we
38    want the maxima of the polynom to increase linearily.
39    Given an interval [X0,X2) over which to approximate,
40    with X1 := (X0+X2)/2,  D := X1-X0, F := 1/D, and S := D/X1 we have,
41    like for Tchebycheff polynoms:
42    P(0) := 1
43    but then we have:
44    P(1) := X + S*D
45    P(2) := 2 * X^2 + S*D * X - D^2
46    Then again:
47    P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
48  */
49 
50 static long double merr = 42.;
51 
52 double
err(long double a0,long double a1,long double x)53 err (long double a0, long double a1, long double x)
54 {
55   long double y0 = a0 + (x-1)*a1;
56 
57   long double approx = 2. * y0 - y0 * x * y0;
58   long double true = 1./x;
59   long double err = approx - true;
60 
61   if (err <= -1./65536./16384.)
62     printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n",
63 	    (double)x, (double)approx, (double)true);
64   if (merr > err)
65     merr = err;
66   return err;
67 }
68 
69 int
main(void)70 main (void)
71 {
72   long double T[5]; /* Taylor polynom */
73   long double P[5][5];
74   int i, j;
75   long double X0, X1, X2, S;
76   long double inc = 1./64;
77   long double D = inc*0.5;
78   long i0, i1, i2, io;
79 
80   memset (P, 0, sizeof (P));
81   P[0][0] = 1.;
82   for (i = 1; i < 5; i++)
83     P[i][i] = 1 << i-1;
84   P[2][0] = -D*D;
85   for (X0 = 1.; X0 < 2.; X0 += inc)
86     {
87       X1 = X0 + inc * 0.5;
88       X2 = X0 + inc;
89       S = D / X1;
90       T[0] = 1./X1;
91       for (i = 1; i < 5; i++)
92 	T[i] = T[i-1] * -T[0];
93 #if 0
94       printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
95 (double)T[3], (double)T[4]);
96 #endif
97       P[1][0] = S*D;
98       P[2][1] = S*D;
99       for (i = 3; i < 5; i++)
100 	{
101 	  P[i][0] = -D*D*P[i-2][0];
102 	  for (j = 1; j < i; j++)
103 	    P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
104 	}
105 #if 0
106       printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
107 (double)P[3][3], (double)P[3][4]);
108       printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
109 (double)P[4][3], (double)P[4][4]);
110 #endif
111       for (i = 4; i > 1; i--)
112 	{
113 	  long double a = T[i]/P[i][i];
114 
115 	  for (j = 0; j < i; j++)
116 	    T[j] -= a * P[i][j];
117 	}
118 #if 0
119       printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
120 #endif
121 #if 0
122       i2 = T[2]*1024;
123       long double a = (T[2]-i/1024.)/P[2][2];
124       for (j = 0; j < 2; j++)
125 	T[j] -= a * P[2][j];
126 #else
127       i2 = 0;
128 #endif
129 	  long double T0, Ti1;
130       for (i = 0, i0 = 0; i < 4; i++)
131 	{
132 
133 	  i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5;
134 	  i1 = - (-i1 & 0x0fff);
135 	  Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL);
136 	  T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
137 	  i0 = T0 * 1024 * 1024 + 0.5;
138 	  i0 &= 0xfffff;
139 	}
140 #if 0
141       printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
142 #endif
143       io = (unsigned)(-i1 << 20) | i0;
144       long double A1 = (unsigned)io/-65536./65536.;
145       long double A0 =  (unsigned)(io << 12)/65536./65536.;
146       long double Xm0 = 1./sqrt (-A1);
147       long double Xm1 = 0.5+0.5*-A0/A1;
148 #if 0
149       printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0);
150       printf ("%.12f %.12f %.12f\n",
151 	      err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2));
152       printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1);
153       printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1));
154 #endif
155       printf ("\t.long 0x%x\n", io);
156    }
157 #if 0
158   printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2));
159 #endif
160   return 0;
161 }
162