1 /* mathfuncs.c: standalone log(), sqrt(), pow() functions not requiring libm
2    Copyright (C) 1996-2017 Paul Sheer
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17    02111-1307, USA.
18  */
19 
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include "mad.h"
23 
24 /* this file is to avoid having to link with the whole libm when all
25    we need is a pow() function. But its more an interesting exercise. */
26 
27 /* to force use of softcoded pow even on linux-x86's: */
28 /* #define USE_OWN_POW */
29 
30 #define floating_p_error(x) float_error(__FILE__, __LINE__)
31 
float_error(char * file,int line)32 static void float_error (char *file, int line)
33 {
34     fprintf (stderr, "%s:%d: floating point error\n", file, line);
35     abort ();
36 }
37 
38 #if !defined(__i386__) || !defined(__GNUC__) || !defined(__linux__) || defined(USE_OWN_POW)
39 
40 /*
41    Libc pow() using an FPU is timed to be
42    23 times faster than this soft-coded
43    pow function. In the test below.
44    486DX4-100
45    gcc-2.7.2 -O3 -fomit-frame-pointer
46    libc-5.4.17 -O2
47 */
48 
49 /*
50    Principles:
51 
52    x^y :
53 
54    taylor(x^y)
55  =
56       ... + y^5*z^5/120 + y^4*z^4/24 + y^3*z^3/6 + y^2*z^2/2 + y*z + 1,
57          where z = log_e(x)
58 
59    log_e(q + x) :
60 
61    taylor(log_e(q + x)) =
62       log_e(q) + ... + x^5/(5*q^5)-x^4/(4*q^4) + x^3/(3*q^3)-x^2/(2*q^2) + x/q
63  */
64 
65 /* results are this accurate */
66 #define F_ACCURACY 1e-15
67 
68 /* e^4, e, e^0.25 */
69 #define VITAMIN_E4   54.598150033144239078
70 #define VITAMIN_E    2.7182818284590452353
71 #define VITAMIN_E25  1.2840254166877414840
72 
73 #define my_fabs(t) ((t) >= 0 ? (t) : -(t))
my_log(double x)74 
75 double my_log (double x)
76 {
77     int i = 1, j;
78     double t, q = 1, ans = 0;
79 
80     if (x <= 0)
81 	floating_p_error(0);
82 
83 /* find an initial approximation */
84     if (x > 1.0) {
85 	do {
86 	    ans += 4;
87 	} while ((q *= VITAMIN_E4) < x);
88 	do {
89 	    ans -= 1;
90 	} while ((q /= VITAMIN_E) > x);
91 	while ((q *= VITAMIN_E25) < x)
92 	    ans += 0.25;
93 	q /= VITAMIN_E25;
94     } else if (x < 1.0) {
95 	do {
96 	    ans -= 4;
97 	} while ((q /= VITAMIN_E4) > x);
98 	do {
99 	    ans += 1;
100 	} while ((q *= VITAMIN_E) < x);
101 	do {
102 	    ans -= 0.25;
103 	} while ((q /= VITAMIN_E25) > x);
104     } else
105 	return 0.0;
106 
107     x -= q;
108 
109 /* taylor series */
110     do {
111 	t = 1;
112 	for (j = 0; j < i; j++)
113 	    t *= -x / q;
114 	t /= (double) i++;
115 	ans -= t;
116 	if (i > 200)	/* shouldn't happen */
117 	    floating_p_error(0);
118     } while (my_fabs (t * ans) > F_ACCURACY);
119 
120     return ans;
121 }
122 
my_sqrt(double x)123 
124 double my_sqrt (double x)
125 {
126     double last_ans, ans = 2;
127 
128     if (x < 0.0)
129 	floating_p_error(0);
130     if (x == 0.0)
131 	return 0.0;
132 
133     do {
134 	last_ans = ans;
135 	ans = (ans + x / ans) / 2;
136     } while (my_fabs ((ans - last_ans) / ans) > F_ACCURACY);
137 
138     return ans;
139 }
140 
my_pow(double x,double y)141 
142 double my_pow (double x, double y)
143 {
144     double z, ans = 1, ans2 = 1, t;
145     long i, j, inv = 0;
146     unsigned long max, negative = 0;
147 
148     if (y == 0.0)
149 	return 1.0;
150     if (x == 0.0) {
151 	if (y < 0.0) {
152 	    floating_p_error(0);
153 	} else
154 	    return 0.0;
155     }
156     if (y == 1.0)
157 	return x;
158     if (y < 0.0) {
159 	y = -y;
160 	inv = 1;
161     }
162     z = my_log (x);
163 
164     max = (unsigned long) -1;
165     if ((double) y > (double) max / 4) {
166 	if (inv)
167 	    return 0.0;
168 	else
169 	    floating_p_error(0);
170     }
171 
172     if (x < 0.0) {
173 	negative = ((long) y);
174 	if (y != (double) negative)
175 	    floating_p_error(0);
176 	negative &= 1;
177 	x = -x;
178     }
179 
180     y *= 2;
181     j = y;
182     y -= (double) j;
183     y /= 2;
184 
185     ans2 = x;
186 
187 /* calc to the nearest 0.5 of a power, */
188     if (j % 2)
189 	ans = my_sqrt (x);
190 
191 /* multiply it up in squares */
192     while ((j >>= 1)) {
193 	if (j & 1)
194 	    ans *= ans2;
195 	ans2 *= ans2;
196     }
197 
198     j = 1;
199     ans2 = 1;
200 /* taylor series for the remaining */
201     do {
202 	t = 1;
203 	for (i = 1; i <= j; i++)
204 	    t *= y * z / i;
205 	ans2 += t;
206 	j++;
207 	if (j > 200)
208 	    floating_p_error(0);	/* shouldn't happen */
209     } while (my_fabs (t / (ans * ans2)) > F_ACCURACY);
210 
211     if (negative)
212 	ans = -ans;
213 
214     if (inv)
215 	return 1.0 / (ans * ans2);
216     else
217 	return ans * ans2;
218 }
219 
220 
221 #else				/* defined(__i386__) && defined(__GNUC__) */
222 
223 /* The following routines are from libc-5.4.17
224    written by Hongjiu Lu, but are changed a lot here */
225 
my_log(double x)226 
227 double my_log (double x)
228 {
229     if (x <= 0.0)
230 	floating_p_error(0);
231 
232     __asm__ __volatile__ ("fldln2\n\t"
233 			  "fxch %%st(1)\n\t"
234 			  "fyl2x"
235 			  :"=t" (x):"0" (x));
236     return x;
237 }
my_sqrt(double x)238 
239 double my_sqrt (double x)
240 {
241     const double zero = 0.0;
242 
243     if (x >= zero) {
244 	if (x != zero) {
245 	    __asm__ __volatile__ ("fsqrt"
246 				  :"=t" (x):"0" (x));
247 	}
248 	return x;
249     }
250     floating_p_error(0);
251     return 0;
252 }
my_pow(double x,double y)253 
254 double my_pow (double x, double y)
255 {
256     int negative;
257     __volatile__ unsigned short cw, saved_cw;
258 
259     if (y == 0.0)
260 	return 1.0;
261     if (x == 0.0) {
262 	if (y < 0.0)
263 	    floating_p_error(0);
264 	else
265 	    return 0.0;
266     }
267     if (y == 1.0)
268 	return x;
269 
270     if (x < 0.0) {
271 	long tmp;	/* should be long long */
272 	tmp = (long) y;	/* should be (long long) */
273 	negative = tmp & 1;
274 	if (y != (double) tmp)
275 	    floating_p_error(0);
276 	x = -x;
277     } else {
278 	negative = 0;
279     }
280 
281     __asm__ __volatile__ ("fnstcw %0":"=m" (cw):);
282     saved_cw = cw;
283 
284     cw &= 0xf3ff;
285     cw |= 0x003f;
286 
287     __asm__ __volatile__ ("fldcw %0"::"m" (cw));
288 
289     __asm__ __volatile__ ("fyl2x;fstl %2;frndint;fstl %%st(2);fsubrp;f2xm1;"
290 			  "fld1;faddp;fscale"
291 			  :"=t" (x):"0" (x), "u" (y));
292 
293     __asm__ __volatile__ ("fldcw %0"::"m" (saved_cw));
294 
295     return (negative) ? -x : x;
296 }
297 
298 #endif				/* defined(__i386__) && defined(__GNUC__) */
299 
300 /* #define TEST_MY_POW */
301 
302 #ifdef TEST_MY_POW
main(void)303 
304 void main (void)
305 {
306     int i = 0;
307     double p, q, h, g;
308     for (q = -10; q < 10; q += 0.23) {
309 	for (p = 0.1; p < 10; p += 0.23) {
310 	    i++;
311 	    g = pow (p, q);
312 	    h = my_pow (p, q);
313 	    if (g != 0) {
314 		if (my_fabs (h - g) / g > 1e-13)
315 		    abort ();
316 	    } else {
317 		if (h != 0)
318 		    abort ();
319 	    }
320 	}
321     }
322     printf ("Done %d.\n", i);
323 }
324 
325 #endif				/* TEST_MY_POW */
326 
327