1 /* mathfuncs.c: standalone log(), sqrt(), pow() functions not requiring libm
2 Copyright (C) 1996-2017 Paul Sheer
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307, USA.
18 */
19
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include "mad.h"
23
24 /* this file is to avoid having to link with the whole libm when all
25 we need is a pow() function. But its more an interesting exercise. */
26
27 /* to force use of softcoded pow even on linux-x86's: */
28 /* #define USE_OWN_POW */
29
30 #define floating_p_error(x) float_error(__FILE__, __LINE__)
31
float_error(char * file,int line)32 static void float_error (char *file, int line)
33 {
34 fprintf (stderr, "%s:%d: floating point error\n", file, line);
35 abort ();
36 }
37
38 #if !defined(__i386__) || !defined(__GNUC__) || !defined(__linux__) || defined(USE_OWN_POW)
39
40 /*
41 Libc pow() using an FPU is timed to be
42 23 times faster than this soft-coded
43 pow function. In the test below.
44 486DX4-100
45 gcc-2.7.2 -O3 -fomit-frame-pointer
46 libc-5.4.17 -O2
47 */
48
49 /*
50 Principles:
51
52 x^y :
53
54 taylor(x^y)
55 =
56 ... + y^5*z^5/120 + y^4*z^4/24 + y^3*z^3/6 + y^2*z^2/2 + y*z + 1,
57 where z = log_e(x)
58
59 log_e(q + x) :
60
61 taylor(log_e(q + x)) =
62 log_e(q) + ... + x^5/(5*q^5)-x^4/(4*q^4) + x^3/(3*q^3)-x^2/(2*q^2) + x/q
63 */
64
65 /* results are this accurate */
66 #define F_ACCURACY 1e-15
67
68 /* e^4, e, e^0.25 */
69 #define VITAMIN_E4 54.598150033144239078
70 #define VITAMIN_E 2.7182818284590452353
71 #define VITAMIN_E25 1.2840254166877414840
72
73 #define my_fabs(t) ((t) >= 0 ? (t) : -(t))
my_log(double x)74
75 double my_log (double x)
76 {
77 int i = 1, j;
78 double t, q = 1, ans = 0;
79
80 if (x <= 0)
81 floating_p_error(0);
82
83 /* find an initial approximation */
84 if (x > 1.0) {
85 do {
86 ans += 4;
87 } while ((q *= VITAMIN_E4) < x);
88 do {
89 ans -= 1;
90 } while ((q /= VITAMIN_E) > x);
91 while ((q *= VITAMIN_E25) < x)
92 ans += 0.25;
93 q /= VITAMIN_E25;
94 } else if (x < 1.0) {
95 do {
96 ans -= 4;
97 } while ((q /= VITAMIN_E4) > x);
98 do {
99 ans += 1;
100 } while ((q *= VITAMIN_E) < x);
101 do {
102 ans -= 0.25;
103 } while ((q /= VITAMIN_E25) > x);
104 } else
105 return 0.0;
106
107 x -= q;
108
109 /* taylor series */
110 do {
111 t = 1;
112 for (j = 0; j < i; j++)
113 t *= -x / q;
114 t /= (double) i++;
115 ans -= t;
116 if (i > 200) /* shouldn't happen */
117 floating_p_error(0);
118 } while (my_fabs (t * ans) > F_ACCURACY);
119
120 return ans;
121 }
122
my_sqrt(double x)123
124 double my_sqrt (double x)
125 {
126 double last_ans, ans = 2;
127
128 if (x < 0.0)
129 floating_p_error(0);
130 if (x == 0.0)
131 return 0.0;
132
133 do {
134 last_ans = ans;
135 ans = (ans + x / ans) / 2;
136 } while (my_fabs ((ans - last_ans) / ans) > F_ACCURACY);
137
138 return ans;
139 }
140
my_pow(double x,double y)141
142 double my_pow (double x, double y)
143 {
144 double z, ans = 1, ans2 = 1, t;
145 long i, j, inv = 0;
146 unsigned long max, negative = 0;
147
148 if (y == 0.0)
149 return 1.0;
150 if (x == 0.0) {
151 if (y < 0.0) {
152 floating_p_error(0);
153 } else
154 return 0.0;
155 }
156 if (y == 1.0)
157 return x;
158 if (y < 0.0) {
159 y = -y;
160 inv = 1;
161 }
162 z = my_log (x);
163
164 max = (unsigned long) -1;
165 if ((double) y > (double) max / 4) {
166 if (inv)
167 return 0.0;
168 else
169 floating_p_error(0);
170 }
171
172 if (x < 0.0) {
173 negative = ((long) y);
174 if (y != (double) negative)
175 floating_p_error(0);
176 negative &= 1;
177 x = -x;
178 }
179
180 y *= 2;
181 j = y;
182 y -= (double) j;
183 y /= 2;
184
185 ans2 = x;
186
187 /* calc to the nearest 0.5 of a power, */
188 if (j % 2)
189 ans = my_sqrt (x);
190
191 /* multiply it up in squares */
192 while ((j >>= 1)) {
193 if (j & 1)
194 ans *= ans2;
195 ans2 *= ans2;
196 }
197
198 j = 1;
199 ans2 = 1;
200 /* taylor series for the remaining */
201 do {
202 t = 1;
203 for (i = 1; i <= j; i++)
204 t *= y * z / i;
205 ans2 += t;
206 j++;
207 if (j > 200)
208 floating_p_error(0); /* shouldn't happen */
209 } while (my_fabs (t / (ans * ans2)) > F_ACCURACY);
210
211 if (negative)
212 ans = -ans;
213
214 if (inv)
215 return 1.0 / (ans * ans2);
216 else
217 return ans * ans2;
218 }
219
220
221 #else /* defined(__i386__) && defined(__GNUC__) */
222
223 /* The following routines are from libc-5.4.17
224 written by Hongjiu Lu, but are changed a lot here */
225
my_log(double x)226
227 double my_log (double x)
228 {
229 if (x <= 0.0)
230 floating_p_error(0);
231
232 __asm__ __volatile__ ("fldln2\n\t"
233 "fxch %%st(1)\n\t"
234 "fyl2x"
235 :"=t" (x):"0" (x));
236 return x;
237 }
my_sqrt(double x)238
239 double my_sqrt (double x)
240 {
241 const double zero = 0.0;
242
243 if (x >= zero) {
244 if (x != zero) {
245 __asm__ __volatile__ ("fsqrt"
246 :"=t" (x):"0" (x));
247 }
248 return x;
249 }
250 floating_p_error(0);
251 return 0;
252 }
my_pow(double x,double y)253
254 double my_pow (double x, double y)
255 {
256 int negative;
257 __volatile__ unsigned short cw, saved_cw;
258
259 if (y == 0.0)
260 return 1.0;
261 if (x == 0.0) {
262 if (y < 0.0)
263 floating_p_error(0);
264 else
265 return 0.0;
266 }
267 if (y == 1.0)
268 return x;
269
270 if (x < 0.0) {
271 long tmp; /* should be long long */
272 tmp = (long) y; /* should be (long long) */
273 negative = tmp & 1;
274 if (y != (double) tmp)
275 floating_p_error(0);
276 x = -x;
277 } else {
278 negative = 0;
279 }
280
281 __asm__ __volatile__ ("fnstcw %0":"=m" (cw):);
282 saved_cw = cw;
283
284 cw &= 0xf3ff;
285 cw |= 0x003f;
286
287 __asm__ __volatile__ ("fldcw %0"::"m" (cw));
288
289 __asm__ __volatile__ ("fyl2x;fstl %2;frndint;fstl %%st(2);fsubrp;f2xm1;"
290 "fld1;faddp;fscale"
291 :"=t" (x):"0" (x), "u" (y));
292
293 __asm__ __volatile__ ("fldcw %0"::"m" (saved_cw));
294
295 return (negative) ? -x : x;
296 }
297
298 #endif /* defined(__i386__) && defined(__GNUC__) */
299
300 /* #define TEST_MY_POW */
301
302 #ifdef TEST_MY_POW
main(void)303
304 void main (void)
305 {
306 int i = 0;
307 double p, q, h, g;
308 for (q = -10; q < 10; q += 0.23) {
309 for (p = 0.1; p < 10; p += 0.23) {
310 i++;
311 g = pow (p, q);
312 h = my_pow (p, q);
313 if (g != 0) {
314 if (my_fabs (h - g) / g > 1e-13)
315 abort ();
316 } else {
317 if (h != 0)
318 abort ();
319 }
320 }
321 }
322 printf ("Done %d.\n", i);
323 }
324
325 #endif /* TEST_MY_POW */
326
327