xref: /openbsd/lib/libm/arch/amd64/s_log1pf.S (revision 09467b48)
1/*	$OpenBSD: s_log1pf.S,v 1.6 2018/07/03 22:43:34 mortimer Exp $ */
2/*
3 * Written by J.T. Conklin <jtc@NetBSD.org>.
4 * Public domain.
5 */
6
7/*
8 * Modified by Lex Wennmacher <wennmach@NetBSD.org>
9 * Still public domain.
10 */
11
12#include <machine/asm.h>
13
14#include "abi.h"
15
16/*
17 * The log1pf() function is provided to compute an accurate value of
18 * log(1 + x), even for tiny values of x. The i387 FPU provides the
19 * fyl2xp1 instruction for this purpose. However, the range of this
20 * instruction is limited to:
21 * 		-(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
22 *                         -0.292893 <= x <= 0.414214
23 * at least on older processor versions.
24 *
25 * log1pf() is implemented by testing the range of the argument.
26 * If it is appropriate for fyl2xp1, this instruction is used.
27 * Else, we compute log1pf(x) = ln(2)*ld(1 + x) the traditional way
28 * (using fyl2x).
29 *
30 * The range testing costs speed, but as the rationale for the very
31 * existence of this function is accuracy, we accept that.
32 *
33 * In order to reduce the cost for testing the range, we check if
34 * the argument is in the range
35 *                             -0.25 <= x <= 0.25
36 * which can be done with just one conditional branch. If x is
37 * inside this range, we use fyl2xp1. Outside of this range,
38 * the use of fyl2x is accurate enough.
39 *
40 */
41
42ENTRY(log1pf)
43	RETGUARD_SETUP(log1pf, r11)
44	XMM_ONE_ARG_FLOAT_PROLOGUE
45	flds	ARG_FLOAT_ONE
46	fabs
47	fld1				/* ... x 1 */
48	fadd	%st(0)			/* ... x 2 */
49	fadd	%st(0)			/* ... x 4 */
50	fld1				/* ... 4 1 */
51	fdivp				/* ... x 0.25 */
52	fcompp
53	fnstsw	%ax
54	andb	$69,%ah
55	jne	use_fyl2x
56	jmp	use_fyl2xp1
57
58	.align	4,0xcc
59use_fyl2x:
60	fldln2
61	flds	ARG_FLOAT_ONE
62	fld1
63	faddp
64	fyl2x
65	XMM_FLOAT_EPILOGUE
66	RETGUARD_CHECK(log1pf, r11)
67	ret
68
69	.align	4,0xcc
70use_fyl2xp1:
71	fldln2
72	flds	ARG_FLOAT_ONE
73	fyl2xp1
74	XMM_FLOAT_EPILOGUE
75	RETGUARD_CHECK(log1pf, r11)
76	ret
77END_STD(log1pf)
78