1 /*-------------------------------------------------------------------------
2    expf.c - Computes e**x of a 32-bit float as outlined in [1]
3 
4    Copyright (C) 2001, 2002, Jesus Calvino-Fraga, jesusc@ieee.org
5 
6    This library is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by the
8    Free Software Foundation; either version 2, or (at your option) any
9    later version.
10 
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this library; see the file COPYING. If not, write to the
18    Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
19    MA 02110-1301, USA.
20 
21    As a special exception, if you link this library with other files,
22    some of which are compiled with SDCC, to produce an executable,
23    this library does not by itself cause the resulting executable to
24    be covered by the GNU General Public License. This exception does
25    not however invalidate any other reasons why the executable file
26    might be covered by the GNU General Public License.
27 -------------------------------------------------------------------------*/
28 
29 /* [1] William James Cody and W.  M.  Waite.  _Software manual for the
30    elementary functions_, Englewood Cliffs, N.J.:Prentice-Hall, 1980. */
31 
32 /* Version 1.0 - Initial release */
33 
34 #define __SDCC_MATH_LIB
35 #include <math.h>
36 #include <errno.h>
37 #include <stdbool.h>
38 
39 #ifdef MATH_ASM_MCS51
40 
41 #define __SDCC_FLOAT_LIB
42 #include <float.h>
43 
44 // TODO: share with other temps
45 static __bit sign_bit;
46 static __data unsigned char expf_y[4];
47 static __data unsigned char n;
48 
expf(float x)49 float expf(float x)
50 {
51 	x;
52 	__asm
53 	mov	c, acc.7
54 	mov	_sign_bit, c	// remember sign
55 	clr	acc.7		// and make input positive
56 	mov	r0, a
57 	mov	c, b.7
58 	rlc	a		// extract exponent
59 	add	a, #153
60 	jc	expf_not_zero
61 	// input is a very small number, so e^x is 1.000000
62 	mov	dptr, #0
63 	mov	b, #0x80
64 	mov	a, #0x3F
65 	ljmp	expf_exit
66 expf_not_zero:
67 	// TODO: check exponent for very small values, and return zero
68 	mov	_n, #0
69 	mov	a, dpl
70 	add	a, #0xE8	// is x >= 0.69314718
71 	mov	a, dph
72 	addc	a, #0x8D
73 	mov	a, b
74 	addc	a, #0xCE
75 	mov	a, r0
76 	addc	a, #0xC0
77 	mov	a, r0
78 	jnc	expf_no_range_reduction
79 expf_range_reduction:
80 	mov	(_expf_y + 0), dpl	// keep copy of x in "exp_y"
81 	mov	(_expf_y + 1), dph
82 	mov	(_expf_y + 2), b
83 	mov	(_expf_y + 3), a
84 	mov     r0, #0x3B
85 	push    ar0
86 	mov     r0, #0xAA
87 	push    ar0
88 	mov     r0, #0xB8
89 	push    ar0
90 	mov     r0, #0x3F
91 	push    ar0
92 	lcall	___fsmul	// x * 1.442695041 = x / ln(2)
93 	dec	sp
94 	dec	sp
95 	dec	sp
96 	dec	sp
97 	lcall	___fs2uchar	// n = int(x * 1.442695041)
98 	mov	a, dpl
99 	mov	_n, a
100 	add	a, #128
101 	jnc	expf_range_ok
102 	lcall	fs_return_inf	// exponent overflow
103 	ljmp	expf_exit
104 expf_range_ok:
105 	mov     r0,#0x00
106 	mov     r1,#0x80
107 	mov     r2,#0x31
108 	mov     r3,#0xBF
109 	lcall	expf_scale_and_add
110 	mov	(_expf_y + 0), dpl
111 	mov	(_expf_y + 1), dph
112 	mov	(_expf_y + 2), b
113 	mov	(_expf_y + 3), a
114 	mov     r0,#0x83
115 	mov     r1,#0x80
116 	mov     r2,#0x5E
117 	mov     r3,#0x39
118 	lcall	expf_scale_and_add
119 expf_no_range_reduction:
120 
121 
122 // Compute e^x using the cordic algorithm.  This works over an
123 // input range of 0 to 0.69314712.  Can be extended to work from
124 // 0 to 1.0 if the results are normalized, but over the range
125 // we use, the result is always from 1.0 to 1.99999988 (fixed
126 // exponent of 127)
127 
128 expf_cordic_begin:
129 	mov	c, b.7
130 	rlc	a		// extract exponent to acc
131 	setb	b.7
132 	mov	r1, dpl		// mantissa to r4/r3/r2/r1
133 	mov	r2, dph
134 	mov	r3, b
135 	mov	r4, #0
136 
137 	// first, align the input into a 32 bit long
138 	cjne	a, #121, exp_lshift
139 	sjmp	exp_noshift
140 exp_lshift:
141 	jc	exp_rshift
142 	// exp_a is greater than 121, so left shift
143 	add	a, #135
144 	lcall	fs_lshift_a
145 	sjmp	exp_noshift
146 exp_rshift:
147 	// exp_a is less than 121, so right shift
148 	cpl	a
149 	add	a, #122
150 	lcall	fs_rshift_a
151 exp_noshift:				// r4/r3/r2/r1 = x
152 	clr	a
153 	mov	(_expf_y + 0), a	// y = 1.0;
154 	mov	(_expf_y + 1), a
155 	mov	(_expf_y + 2), a
156 	mov	(_expf_y + 3), #0x20
157 	mov	dptr, #__fs_natural_log_table
158 	mov	r0, a			// r0 = i
159 exp_cordic_loop:
160 	clr	a
161 	movc	a, @a+dptr
162 	mov	b, a
163 	inc	dptr
164 	clr	a
165 	movc	a, @a+dptr		// r7/r6/r5/b = table[i]
166 	mov	r5, a
167 	inc	dptr
168 	clr	a
169 	movc	a, @a+dptr
170 	mov	r6, a
171 	inc	dptr
172 	clr	a
173 	movc	a, @a+dptr
174 	mov	r7, a
175 	inc	dptr
176 	clr	c
177 	mov	a, r1
178 	subb	a, b			// compare x to table[i]
179 	mov	a, r2
180 	subb	a, r5
181 	mov	a, r3
182 	subb	a, r6
183 	mov	a, r4
184 	subb	a, r7
185 	jc	exp_cordic_skip		// if table[i] < x
186 	clr	c
187 	mov	a, r1
188 	subb	a, b
189 	mov	r1, a			// x -= table[i]
190 	mov	a, r2
191 	subb	a, r5
192 	mov	r2, a
193 	mov	a, r3
194 	subb	a, r6
195 	mov	r3, a
196 	mov	a, r4
197 	subb	a, r7
198 	mov	r4, a
199 	mov	b,  (_expf_y + 0)
200 	mov	r5, (_expf_y + 1)	// r7/r6/r5/b = y >> i
201 	mov	r6, (_expf_y + 2)
202 	mov	r7, (_expf_y + 3)
203 	mov	a, r0
204 	lcall	__fs_cordic_rshift_r765_unsigned
205 	mov	a, (_expf_y + 0)
206 	add	a, b
207 	mov	(_expf_y + 0), a
208 	mov	a, (_expf_y + 1)
209 	addc	a, r5
210 	mov	(_expf_y + 1), a	// y += (y >> i)
211 	mov	a, (_expf_y + 2)
212 	addc	a, r6
213 	mov	(_expf_y + 2), a
214 	mov	a, (_expf_y + 3)
215 	addc	a, r7
216 	mov	(_expf_y + 3), a
217 exp_cordic_skip:
218 	inc	r0
219 	cjne	r0, #27, exp_cordic_loop
220 	mov	r4, (_expf_y + 3)
221 	mov	r3, (_expf_y + 2)
222 	mov	r2, (_expf_y + 1)
223 	mov	r1, (_expf_y + 0)
224 	mov	exp_a, #121
225 	lcall	fs_normalize_a		// end of cordic
226 
227 	mov	a, #127
228 	add	a, _n			// ldexpf(x, n)
229 	mov	exp_a, a
230 	lcall	fs_round_and_return
231 
232 	jnb	_sign_bit, expf_done
233 	push	dpl
234 	push	dph
235 	push	b
236 	push	acc
237 	mov	dptr, #0
238 	mov	b, #0x80
239 	mov	a, #0x3F
240 	lcall	___fsdiv		// 1.0 / x
241 	dec	sp
242 	dec	sp
243 	dec	sp
244 	dec	sp
245 expf_done:
246 	clr	acc.7		// Result is always positive!
247 expf_exit:
248 	__endasm;
249 #pragma less_pedantic
250 }
251 
dummy1(void)252 static void dummy1(void) __naked
253 {
254 	__asm
255 	.globl	fs_lshift_a
256 expf_scale_and_add:
257 	push    ar0
258 	push    ar1
259 	push    ar2
260 	push    ar3
261 	mov	dpl, _n
262 	lcall	___uchar2fs	// turn n into float
263 	lcall	___fsmul	// n * scale_factor
264 	dec	sp
265 	dec	sp
266 	dec	sp
267 	dec	sp
268 	push	dpl
269 	push	dph
270 	push	b
271 	push	acc
272 	mov	dpl, (_expf_y + 0)
273 	mov	dph, (_expf_y + 1)
274 	mov	b,   (_expf_y + 2)
275 	mov	a,   (_expf_y + 3)
276 	lcall	___fsadd	// x += (n * scale_factor)
277 	dec	sp
278 	dec	sp
279 	dec	sp
280 	dec	sp
281 	ret
282 	__endasm;
283 }
284 
dummy(void)285 static void dummy(void) __naked
286 {
287 	__asm
288 	.globl	fs_lshift_a
289 fs_lshift_a:
290 	jz	fs_lshift_done
291 	push	ar0
292 	mov	r0, a
293 fs_lshift_loop:
294 	clr	c
295 	mov	a, r1
296 	rlc	a
297 	mov	r1, a
298 	mov	a, r2
299 	rlc	a
300 	mov	r2, a
301 	mov	a, r3
302 	rlc	a
303 	mov	r3, a
304 	mov	a, r4
305 	rlc	a
306 	mov	r4, a
307 	djnz	r0, fs_lshift_loop
308 	pop	ar0
309 fs_lshift_done:
310 	ret
311 	__endasm;
312 }
313 
314 #else // not MATH_ASM_MCS51
315 
316 #define P0      0.2499999995E+0
317 #define P1      0.4160288626E-2
318 #define Q0      0.5000000000E+0
319 #define Q1      0.4998717877E-1
320 
321 #define P(z) ((P1*z)+P0)
322 #define Q(z) ((Q1*z)+Q0)
323 
324 #define C1       0.693359375
325 #define C2      -2.1219444005469058277e-4
326 
327 #define BIGX    88.72283911  /* ln(HUGE_VALF) */
328 #define EXPEPS  1.0E-7       /* exp(1.0E-7)=0.0000001 */
329 #define K1      1.4426950409 /* 1/ln(2) */
330 
expf(float x)331 float expf(float x) _FLOAT_FUNC_REENTRANT
332 {
333     int n;
334     float xn, g, r, z, y;
335     bool sign;
336 
337     if(x>=0.0)
338         { y=x;  sign=0; }
339     else
340         { y=-x; sign=1; }
341 
342     if(y<EXPEPS) return 1.0;
343 
344     if(y>BIGX)
345     {
346         if(sign)
347         {
348             errno=ERANGE;
349             return HUGE_VALF
350             ;
351         }
352         else
353         {
354             return 0.0;
355         }
356     }
357 
358     z=y*K1;
359     n=z;
360 
361     if(n<0) --n;
362     if(z-n>=0.5) ++n;
363     xn=n;
364     g=((y-xn*C1))-xn*C2;
365     z=g*g;
366     r=P(z)*g;
367     r=0.5+(r/(Q(z)-r));
368 
369     n++;
370     z=ldexpf(r, n);
371     if(sign)
372         return 1.0/z;
373     else
374         return z;
375 }
376 
377 #endif
378