1 /*-------------------------------------------------------------------------
2 expf.c - Computes e**x of a 32-bit float as outlined in [1]
3
4 Copyright (C) 2001, 2002, Jesus Calvino-Fraga, jesusc@ieee.org
5
6 This library is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this library; see the file COPYING. If not, write to the
18 Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
19 MA 02110-1301, USA.
20
21 As a special exception, if you link this library with other files,
22 some of which are compiled with SDCC, to produce an executable,
23 this library does not by itself cause the resulting executable to
24 be covered by the GNU General Public License. This exception does
25 not however invalidate any other reasons why the executable file
26 might be covered by the GNU General Public License.
27 -------------------------------------------------------------------------*/
28
29 /* [1] William James Cody and W. M. Waite. _Software manual for the
30 elementary functions_, Englewood Cliffs, N.J.:Prentice-Hall, 1980. */
31
32 /* Version 1.0 - Initial release */
33
34 #define __SDCC_MATH_LIB
35 #include <math.h>
36 #include <errno.h>
37 #include <stdbool.h>
38
39 #ifdef MATH_ASM_MCS51
40
41 #define __SDCC_FLOAT_LIB
42 #include <float.h>
43
44 // TODO: share with other temps
45 static __bit sign_bit;
46 static __data unsigned char expf_y[4];
47 static __data unsigned char n;
48
expf(float x)49 float expf(float x)
50 {
51 x;
52 __asm
53 mov c, acc.7
54 mov _sign_bit, c // remember sign
55 clr acc.7 // and make input positive
56 mov r0, a
57 mov c, b.7
58 rlc a // extract exponent
59 add a, #153
60 jc expf_not_zero
61 // input is a very small number, so e^x is 1.000000
62 mov dptr, #0
63 mov b, #0x80
64 mov a, #0x3F
65 ljmp expf_exit
66 expf_not_zero:
67 // TODO: check exponent for very small values, and return zero
68 mov _n, #0
69 mov a, dpl
70 add a, #0xE8 // is x >= 0.69314718
71 mov a, dph
72 addc a, #0x8D
73 mov a, b
74 addc a, #0xCE
75 mov a, r0
76 addc a, #0xC0
77 mov a, r0
78 jnc expf_no_range_reduction
79 expf_range_reduction:
80 mov (_expf_y + 0), dpl // keep copy of x in "exp_y"
81 mov (_expf_y + 1), dph
82 mov (_expf_y + 2), b
83 mov (_expf_y + 3), a
84 mov r0, #0x3B
85 push ar0
86 mov r0, #0xAA
87 push ar0
88 mov r0, #0xB8
89 push ar0
90 mov r0, #0x3F
91 push ar0
92 lcall ___fsmul // x * 1.442695041 = x / ln(2)
93 dec sp
94 dec sp
95 dec sp
96 dec sp
97 lcall ___fs2uchar // n = int(x * 1.442695041)
98 mov a, dpl
99 mov _n, a
100 add a, #128
101 jnc expf_range_ok
102 lcall fs_return_inf // exponent overflow
103 ljmp expf_exit
104 expf_range_ok:
105 mov r0,#0x00
106 mov r1,#0x80
107 mov r2,#0x31
108 mov r3,#0xBF
109 lcall expf_scale_and_add
110 mov (_expf_y + 0), dpl
111 mov (_expf_y + 1), dph
112 mov (_expf_y + 2), b
113 mov (_expf_y + 3), a
114 mov r0,#0x83
115 mov r1,#0x80
116 mov r2,#0x5E
117 mov r3,#0x39
118 lcall expf_scale_and_add
119 expf_no_range_reduction:
120
121
122 // Compute e^x using the cordic algorithm. This works over an
123 // input range of 0 to 0.69314712. Can be extended to work from
124 // 0 to 1.0 if the results are normalized, but over the range
125 // we use, the result is always from 1.0 to 1.99999988 (fixed
126 // exponent of 127)
127
128 expf_cordic_begin:
129 mov c, b.7
130 rlc a // extract exponent to acc
131 setb b.7
132 mov r1, dpl // mantissa to r4/r3/r2/r1
133 mov r2, dph
134 mov r3, b
135 mov r4, #0
136
137 // first, align the input into a 32 bit long
138 cjne a, #121, exp_lshift
139 sjmp exp_noshift
140 exp_lshift:
141 jc exp_rshift
142 // exp_a is greater than 121, so left shift
143 add a, #135
144 lcall fs_lshift_a
145 sjmp exp_noshift
146 exp_rshift:
147 // exp_a is less than 121, so right shift
148 cpl a
149 add a, #122
150 lcall fs_rshift_a
151 exp_noshift: // r4/r3/r2/r1 = x
152 clr a
153 mov (_expf_y + 0), a // y = 1.0;
154 mov (_expf_y + 1), a
155 mov (_expf_y + 2), a
156 mov (_expf_y + 3), #0x20
157 mov dptr, #__fs_natural_log_table
158 mov r0, a // r0 = i
159 exp_cordic_loop:
160 clr a
161 movc a, @a+dptr
162 mov b, a
163 inc dptr
164 clr a
165 movc a, @a+dptr // r7/r6/r5/b = table[i]
166 mov r5, a
167 inc dptr
168 clr a
169 movc a, @a+dptr
170 mov r6, a
171 inc dptr
172 clr a
173 movc a, @a+dptr
174 mov r7, a
175 inc dptr
176 clr c
177 mov a, r1
178 subb a, b // compare x to table[i]
179 mov a, r2
180 subb a, r5
181 mov a, r3
182 subb a, r6
183 mov a, r4
184 subb a, r7
185 jc exp_cordic_skip // if table[i] < x
186 clr c
187 mov a, r1
188 subb a, b
189 mov r1, a // x -= table[i]
190 mov a, r2
191 subb a, r5
192 mov r2, a
193 mov a, r3
194 subb a, r6
195 mov r3, a
196 mov a, r4
197 subb a, r7
198 mov r4, a
199 mov b, (_expf_y + 0)
200 mov r5, (_expf_y + 1) // r7/r6/r5/b = y >> i
201 mov r6, (_expf_y + 2)
202 mov r7, (_expf_y + 3)
203 mov a, r0
204 lcall __fs_cordic_rshift_r765_unsigned
205 mov a, (_expf_y + 0)
206 add a, b
207 mov (_expf_y + 0), a
208 mov a, (_expf_y + 1)
209 addc a, r5
210 mov (_expf_y + 1), a // y += (y >> i)
211 mov a, (_expf_y + 2)
212 addc a, r6
213 mov (_expf_y + 2), a
214 mov a, (_expf_y + 3)
215 addc a, r7
216 mov (_expf_y + 3), a
217 exp_cordic_skip:
218 inc r0
219 cjne r0, #27, exp_cordic_loop
220 mov r4, (_expf_y + 3)
221 mov r3, (_expf_y + 2)
222 mov r2, (_expf_y + 1)
223 mov r1, (_expf_y + 0)
224 mov exp_a, #121
225 lcall fs_normalize_a // end of cordic
226
227 mov a, #127
228 add a, _n // ldexpf(x, n)
229 mov exp_a, a
230 lcall fs_round_and_return
231
232 jnb _sign_bit, expf_done
233 push dpl
234 push dph
235 push b
236 push acc
237 mov dptr, #0
238 mov b, #0x80
239 mov a, #0x3F
240 lcall ___fsdiv // 1.0 / x
241 dec sp
242 dec sp
243 dec sp
244 dec sp
245 expf_done:
246 clr acc.7 // Result is always positive!
247 expf_exit:
248 __endasm;
249 #pragma less_pedantic
250 }
251
dummy1(void)252 static void dummy1(void) __naked
253 {
254 __asm
255 .globl fs_lshift_a
256 expf_scale_and_add:
257 push ar0
258 push ar1
259 push ar2
260 push ar3
261 mov dpl, _n
262 lcall ___uchar2fs // turn n into float
263 lcall ___fsmul // n * scale_factor
264 dec sp
265 dec sp
266 dec sp
267 dec sp
268 push dpl
269 push dph
270 push b
271 push acc
272 mov dpl, (_expf_y + 0)
273 mov dph, (_expf_y + 1)
274 mov b, (_expf_y + 2)
275 mov a, (_expf_y + 3)
276 lcall ___fsadd // x += (n * scale_factor)
277 dec sp
278 dec sp
279 dec sp
280 dec sp
281 ret
282 __endasm;
283 }
284
dummy(void)285 static void dummy(void) __naked
286 {
287 __asm
288 .globl fs_lshift_a
289 fs_lshift_a:
290 jz fs_lshift_done
291 push ar0
292 mov r0, a
293 fs_lshift_loop:
294 clr c
295 mov a, r1
296 rlc a
297 mov r1, a
298 mov a, r2
299 rlc a
300 mov r2, a
301 mov a, r3
302 rlc a
303 mov r3, a
304 mov a, r4
305 rlc a
306 mov r4, a
307 djnz r0, fs_lshift_loop
308 pop ar0
309 fs_lshift_done:
310 ret
311 __endasm;
312 }
313
314 #else // not MATH_ASM_MCS51
315
316 #define P0 0.2499999995E+0
317 #define P1 0.4160288626E-2
318 #define Q0 0.5000000000E+0
319 #define Q1 0.4998717877E-1
320
321 #define P(z) ((P1*z)+P0)
322 #define Q(z) ((Q1*z)+Q0)
323
324 #define C1 0.693359375
325 #define C2 -2.1219444005469058277e-4
326
327 #define BIGX 88.72283911 /* ln(HUGE_VALF) */
328 #define EXPEPS 1.0E-7 /* exp(1.0E-7)=0.0000001 */
329 #define K1 1.4426950409 /* 1/ln(2) */
330
expf(float x)331 float expf(float x) _FLOAT_FUNC_REENTRANT
332 {
333 int n;
334 float xn, g, r, z, y;
335 bool sign;
336
337 if(x>=0.0)
338 { y=x; sign=0; }
339 else
340 { y=-x; sign=1; }
341
342 if(y<EXPEPS) return 1.0;
343
344 if(y>BIGX)
345 {
346 if(sign)
347 {
348 errno=ERANGE;
349 return HUGE_VALF
350 ;
351 }
352 else
353 {
354 return 0.0;
355 }
356 }
357
358 z=y*K1;
359 n=z;
360
361 if(n<0) --n;
362 if(z-n>=0.5) ++n;
363 xn=n;
364 g=((y-xn*C1))-xn*C2;
365 z=g*g;
366 r=P(z)*g;
367 r=0.5+(r/(Q(z)-r));
368
369 n++;
370 z=ldexpf(r, n);
371 if(sign)
372 return 1.0/z;
373 else
374 return z;
375 }
376
377 #endif
378