1// This file is dual licensed under the MIT and the University of Illinois Open
2// Source Licenses. See LICENSE.TXT for details.
3
4#include "../assembly.h"
5
6// long double __floatundixf(du_int a);
7
8#ifdef __x86_64__
9
10#if defined(__APPLE__)
11	.const
12#elif defined(__ELF__)
13	.section .rodata
14#else
15	.section .rdata,"rd"
16#endif
17
18	.balign 16
19twop64:
20	.quad 0x43f0000000000000
21
22#define REL_ADDR(_a)	(_a)(%rip)
23
24	.text
25
26	.balign 4
27DEFINE_COMPILERRT_FUNCTION(__floatundixf)
28	movq	%rdi,	 -8(%rsp)
29	fildq	-8(%rsp)
30	test	%rdi,		%rdi
31	js		1f
32	ret
331:	faddl	REL_ADDR(twop64)
34	ret
35END_COMPILERRT_FUNCTION(__floatundixf)
36
37#endif // __x86_64__
38
39
40/* Branch-free implementation is ever so slightly slower, but more beautiful.
41   It is likely superior for inlining, so I kept it around for future reference.
42
43#ifdef __x86_64__
44
45#if defined(__APPLE__)
46	.const
47#elif defined(__ELF__)
48	.rdata
49#else
50	.section .rdata,"rd"
51#endif
52	.balign 4
53twop52:
54	.quad 0x4330000000000000
55twop84_plus_twop52_neg:
56	.quad 0xc530000000100000
57twop84:
58	.quad 0x4530000000000000
59
60#define REL_ADDR(_a)	(_a)(%rip)
61
62.text
63.balign 4
64DEFINE_COMPILERRT_FUNCTION(__floatundixf)
65	movl	%edi,				%esi			// low 32 bits of input
66	shrq	$32,				%rdi			// hi 32 bits of input
67	orq		REL_ADDR(twop84),	%rdi			// 2^84 + hi (as a double)
68	orq		REL_ADDR(twop52),	%rsi			// 2^52 + lo (as a double)
69	movq	%rdi,			 -8(%rsp)
70	movq	%rsi,			-16(%rsp)
71	fldl	REL_ADDR(twop84_plus_twop52_neg)
72	faddl	-8(%rsp)	// hi - 2^52 (as double extended, no rounding occurs)
73	faddl	-16(%rsp)	// hi + lo (as double extended)
74	ret
75END_COMPILERRT_FUNCTION(__floatundixf)
76
77#endif // __x86_64__
78
79*/
80