xref: /reactos/sdk/lib/crt/math/i386/pow_asm.s (revision 2aca4b27)
1/* ix87 specific implementation of pow function.
2   Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004, 2005, 2007
3   Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library; if not, write to the Free
19   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20   02111-1307 USA.  */
21
22/* ReactOS modifications */
23#include <asm.inc>
24
25#define ALIGNARG(log2) log2
26#define ASM_TYPE_DIRECTIVE(name,typearg)
27#define ASM_SIZE_DIRECTIVE(name)
28#define cfi_adjust_cfa_offset(x)
29
30PUBLIC _pow
31
32.data
33ASSUME cs:nothing
34
35	.align ALIGNARG(4)
36	ASM_TYPE_DIRECTIVE(infinity,@object)
37
38inf_zero:
39infinity:
40	.byte 0, 0, 0, 0, 0, 0, HEX(f0), HEX(7f)
41	ASM_SIZE_DIRECTIVE(infinity)
42	ASM_TYPE_DIRECTIVE(zero,@object)
43zero:
44	.double 0.0
45	ASM_SIZE_DIRECTIVE(zero)
46	ASM_TYPE_DIRECTIVE(minf_mzero,@object)
47
48minf_mzero:
49minfinity:
50	.byte 0, 0, 0, 0, 0, 0, HEX(f0), HEX(ff)
51
52mzero:
53	.byte 0, 0, 0, 0, 0, 0, 0, HEX(80)
54	ASM_SIZE_DIRECTIVE(minf_mzero)
55	ASM_TYPE_DIRECTIVE(one,@object)
56
57one:
58	.double 1.0
59	ASM_SIZE_DIRECTIVE(one)
60	ASM_TYPE_DIRECTIVE(limit,@object)
61
62limit:
63	.double 0.29
64	ASM_SIZE_DIRECTIVE(limit)
65	ASM_TYPE_DIRECTIVE(p63,@object)
66
67p63:
68	.byte 0, 0, 0, 0, 0, 0, HEX(e0), HEX(43)
69	ASM_SIZE_DIRECTIVE(p63)
70
71#ifdef PIC
72#define MO(op) op##@GOTOFF(%ecx)
73#define MOX(op,x,f) op##@GOTOFF(%ecx,x,f)
74#else
75#define MO(op) op
76#define MOX(op,x,f) op[x*f]
77#endif
78
79.code
80_pow:
81	fld qword ptr [esp + 12]	// y
82	fxam
83
84#ifdef	PIC
85	LOAD_PIC_REG (cx)
86#endif
87
88	fnstsw ax
89	mov dl, ah
90	and ah, HEX(045)
91	cmp	ah, HEX(040)	// is y == 0 ?
92	je	L11
93
94	cmp ah, 5	// is y == �inf ?
95	je	L12
96
97	cmp ah, 1	// is y == NaN ?
98	je	L30
99
100	fld qword ptr [esp + 4]	// x : y
101
102	sub esp, 8
103	cfi_adjust_cfa_offset (8)
104
105	fxam
106	fnstsw ax
107	mov dh, ah
108	and ah, HEX(45)
109	cmp ah, HEX(040)
110	je	L20		// x is �0
111
112	cmp ah, 5
113	je	L15		// x is �inf
114
115	fxch st(1)			// y : x
116
117	/* fistpll raises invalid exception for |y| >= 1L<<63.  */
118	fld	st		// y : y : x
119	fabs			// |y| : y : x
120	fcomp qword ptr ds:MO(p63)		// y : x
121	fnstsw ax
122	sahf
123	jnc	L2
124
125	/* First see whether `y' is a natural number.  In this case we
126	   can use a more precise algorithm.  */
127	fld	st		// y : y : x
128	fistp qword ptr [esp]		// y : x
129	fild qword ptr [esp]		// int(y) : y : x
130	fucomp st(1)		// y : x
131	fnstsw ax
132	sahf
133	jne	L2
134
135	/* OK, we have an integer value for y.  */
136	pop eax
137	cfi_adjust_cfa_offset (-4)
138	pop	edx
139	cfi_adjust_cfa_offset (-4)
140	or edx, 0
141	fstp st		// x
142	jns	L4		// y >= 0, jump
143	fdivr qword ptr MO(one)		// 1/x		(now referred to as x)
144	neg eax
145	adc edx, 0
146	neg edx
147L4:	fld qword ptr MO(one)		// 1 : x
148	fxch st(1)
149
150L6:	shrd eax, edx, 1
151	jnc	L5
152	fxch st(1)
153	fmul st, st(1)		// x : ST*x
154	fxch st(1)
155L5:	fmul st, st	// x*x : ST*x
156	shr edx, 1
157	mov ecx, eax
158	or ecx, edx
159	jnz	L6
160	fstp st		// ST*x
161	ret
162
163	/* y is �NAN */
164L30:
165	fld qword ptr [esp + 4]		// x : y
166	fld qword ptr MO(one)		// 1.0 : x : y
167	fucomp st(1)		// x : y
168	fnstsw ax
169	sahf
170	je	L31
171	fxch st(1)			// y : x
172L31:fstp st(1)
173	ret
174
175	cfi_adjust_cfa_offset (8)
176	.align ALIGNARG(4)
177L2:	/* y is a real number.  */
178	fxch st(1)			// x : y
179	fld qword ptr MO(one)		// 1.0 : x : y
180	fld qword ptr MO(limit)	// 0.29 : 1.0 : x : y
181	fld	st(2)		// x : 0.29 : 1.0 : x : y
182	fsub st, st(2)		// x-1 : 0.29 : 1.0 : x : y
183	fabs			// |x-1| : 0.29 : 1.0 : x : y
184	fucompp			// 1.0 : x : y
185	fnstsw ax
186	fxch st(1)			// x : 1.0 : y
187	sahf
188	ja	L7
189	fsub st, st(1)		// x-1 : 1.0 : y
190	fyl2xp1			// log2(x) : y
191	jmp	L8
192
193L7:	fyl2x			// log2(x) : y
194L8:	fmul st, st(1)		// y*log2(x) : y
195	fst st(1)		// y*log2(x) : y*log2(x)
196	frndint			// int(y*log2(x)) : y*log2(x)
197	fsub st(1), st	// int(y*log2(x)) : fract(y*log2(x))
198	fxch			// fract(y*log2(x)) : int(y*log2(x))
199	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
200	fadd qword ptr MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
201	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
202	add esp, 8
203	cfi_adjust_cfa_offset (-8)
204	fstp st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
205	ret
206
207
208	// pow(x,�0) = 1
209	.align ALIGNARG(4)
210L11:fstp st(0)		// pop y
211	fld qword ptr MO(one)
212	ret
213
214	// y == �inf
215	.align ALIGNARG(4)
216L12:	fstp st(0)		// pop y
217	fld qword ptr MO(one)		// 1
218	fld qword ptr [esp + 4]		// x : 1
219	fabs			// abs(x) : 1
220	fucompp			// < 1, == 1, or > 1
221	fnstsw ax
222	and ah, HEX(45)
223	cmp ah, HEX(45)
224	je	L13		// jump if x is NaN
225
226	cmp ah, HEX(40)
227	je	L14		// jump if |x| == 1
228
229	shl ah, 1
230	xor dl, ah
231	and edx, 2
232	fld qword ptr MOX(inf_zero, edx, 4)
233	ret
234
235	.align ALIGNARG(4)
236L14:fld qword ptr MO(one)
237	ret
238
239	.align ALIGNARG(4)
240L13:fld qword ptr [esp + 4]		// load x == NaN
241	ret
242
243	cfi_adjust_cfa_offset (8)
244	.align ALIGNARG(4)
245	// x is �inf
246L15:	fstp st(0)		// y
247	test dh, 2
248	jz	L16		// jump if x == +inf
249
250	// We must find out whether y is an odd integer.
251	fld	st		// y : y
252	fistp qword ptr [esp]		// y
253	fild qword ptr [esp]		// int(y) : y
254	fucompp			// <empty>
255	fnstsw ax
256	sahf
257	jne	L17
258
259	// OK, the value is an integer, but is the number of bits small
260	// enough so that all are coming from the mantissa?
261	pop eax
262	cfi_adjust_cfa_offset (-4)
263	pop edx
264	cfi_adjust_cfa_offset (-4)
265	and al, 1
266	jz	L18		// jump if not odd
267	mov eax, edx
268	or edx, edx
269	jns	L155
270	neg eax
271L155:
272	cmp eax, HEX(000200000)
273	ja	L18		// does not fit in mantissa bits
274	// It's an odd integer.
275	shr edx, 31
276	fld qword ptr MOX(minf_mzero, edx, 8)
277	ret
278
279	cfi_adjust_cfa_offset (8)
280	.align ALIGNARG(4)
281L16:fcomp qword ptr ds:MO(zero)
282	add esp, 8
283	cfi_adjust_cfa_offset (-8)
284	fnstsw ax
285	shr eax, 5
286	and eax, 8
287	fld qword ptr MOX(inf_zero, eax, 1)
288	ret
289
290	cfi_adjust_cfa_offset (8)
291	.align ALIGNARG(4)
292L17:	shl edx, 30	// sign bit for y in right position
293	add esp, 8
294	cfi_adjust_cfa_offset (-8)
295L18:	shr edx, 31
296	fld qword ptr MOX(inf_zero, edx, 8)
297	ret
298
299	cfi_adjust_cfa_offset (8)
300	.align ALIGNARG(4)
301	// x is �0
302L20:	fstp st(0)		// y
303	test dl, 2
304	jz	L21		// y > 0
305
306	// x is �0 and y is < 0.  We must find out whether y is an odd integer.
307	test dh, 2
308	jz	L25
309
310	fld st		// y : y
311	fistp qword ptr [esp]		// y
312	fild qword ptr [esp]		// int(y) : y
313	fucompp			// <empty>
314	fnstsw ax
315	sahf
316	jne	L26
317
318	// OK, the value is an integer, but is the number of bits small
319	// enough so that all are coming from the mantissa?
320	pop eax
321	cfi_adjust_cfa_offset (-4)
322	pop edx
323	cfi_adjust_cfa_offset (-4)
324	and al, 1
325	jz	L27		// jump if not odd
326	cmp edx, HEX(0ffe00000)
327	jbe	L27		// does not fit in mantissa bits
328	// It's an odd integer.
329	// Raise divide-by-zero exception and get minus infinity value.
330	fld qword ptr MO(one)
331	fdiv qword ptr MO(zero)
332	fchs
333	ret
334
335	cfi_adjust_cfa_offset (8)
336L25:	fstp st(0)
337L26:	add esp, 8
338	cfi_adjust_cfa_offset (-8)
339L27:	// Raise divide-by-zero exception and get infinity value.
340	fld qword ptr MO(one)
341	fdiv qword ptr MO(zero)
342	ret
343
344	cfi_adjust_cfa_offset (8)
345	.align ALIGNARG(4)
346	// x is �0 and y is > 0.  We must find out whether y is an odd integer.
347L21:test dh, 2
348	jz	L22
349
350	fld st		// y : y
351	fistp qword ptr [esp]		// y
352	fild qword ptr [esp]		// int(y) : y
353	fucompp			// <empty>
354	fnstsw ax
355	sahf
356	jne	L23
357
358	// OK, the value is an integer, but is the number of bits small
359	// enough so that all are coming from the mantissa?
360	pop eax
361	cfi_adjust_cfa_offset (-4)
362	pop edx
363	cfi_adjust_cfa_offset (-4)
364	and al, 1
365	jz	L24		// jump if not odd
366	cmp edx, HEX(0ffe00000)
367	jae	L24		// does not fit in mantissa bits
368	// It's an odd integer.
369	fld qword ptr MO(mzero)
370	ret
371
372	cfi_adjust_cfa_offset (8)
373L22:	fstp st(0)
374L23:	add esp, 8	// Don't use 2 x pop
375	cfi_adjust_cfa_offset (-8)
376L24:	fld qword ptr MO(zero)
377	ret
378
379END
380
381
382