xref: /illumos-gate/usr/src/lib/libm/amd64/src/rndintl.S (revision 55fea89d)
1*5d9d9091SRichard Lowe/*
2*5d9d9091SRichard Lowe * CDDL HEADER START
3*5d9d9091SRichard Lowe *
4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the
5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License").
6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License.
7*5d9d9091SRichard Lowe *
8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing.
10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions
11*5d9d9091SRichard Lowe * and limitations under the License.
12*5d9d9091SRichard Lowe *
13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
18*5d9d9091SRichard Lowe *
19*5d9d9091SRichard Lowe * CDDL HEADER END
20*5d9d9091SRichard Lowe */
21*5d9d9091SRichard Lowe/*
22*5d9d9091SRichard Lowe * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*5d9d9091SRichard Lowe */
24*5d9d9091SRichard Lowe/*
25*5d9d9091SRichard Lowe * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26*5d9d9091SRichard Lowe * Use is subject to license terms.
27*5d9d9091SRichard Lowe */
28*5d9d9091SRichard Lowe
29*5d9d9091SRichard Lowe        .file "rndintl.s"
30*5d9d9091SRichard Lowe
31*5d9d9091SRichard Lowe#include "libm.h"
32*5d9d9091SRichard Lowe
33*5d9d9091SRichard Lowe	ENTRY(aintl)
34*5d9d9091SRichard Lowe	movq	%rsp,%rax
35*5d9d9091SRichard Lowe	subq	$16,%rsp
36*5d9d9091SRichard Lowe	fstcw	-8(%rax)
37*5d9d9091SRichard Lowe	fldt	8(%rax)
38*5d9d9091SRichard Lowe	movw	-8(%rax),%cx
39*5d9d9091SRichard Lowe	orw	$0x0c00,%cx
40*5d9d9091SRichard Lowe	movw	%cx,-4(%rax)
41*5d9d9091SRichard Lowe	fldcw	-4(%rax)		/ set RD = to_zero
42*5d9d9091SRichard Lowe	frndint
43*5d9d9091SRichard Lowe	fstcw	-4(%rax)
44*5d9d9091SRichard Lowe	movw	-4(%rax),%dx
45*5d9d9091SRichard Lowe	andw	$0xf3ff,%dx
46*5d9d9091SRichard Lowe	movw	-8(%rax),%cx
47*5d9d9091SRichard Lowe	andw	$0x0c00,%cx
48*5d9d9091SRichard Lowe	orw	%dx,%cx
49*5d9d9091SRichard Lowe	movw	%cx,-8(%rax)
50*5d9d9091SRichard Lowe	fldcw	-8(%rax)		/ restore RD
51*5d9d9091SRichard Lowe	addq	$16,%rsp
52*5d9d9091SRichard Lowe	ret
53*5d9d9091SRichard Lowe	.align	16
54*5d9d9091SRichard Lowe	SET_SIZE(aintl)
55*5d9d9091SRichard Lowe
56*5d9d9091SRichard Lowe	ENTRY(irintl)
57*5d9d9091SRichard Lowe	movq	%rsp,%rcx
58*5d9d9091SRichard Lowe	subq	$16,%rsp
59*5d9d9091SRichard Lowe	fldt	8(%rcx)			/ load x
60*5d9d9091SRichard Lowe	fistpl	-8(%rcx)		/ [x]
61*5d9d9091SRichard Lowe	fwait
62*5d9d9091SRichard Lowe	movslq	-8(%rcx),%rax
63*5d9d9091SRichard Lowe	addq	$16,%rsp
64*5d9d9091SRichard Lowe	ret
65*5d9d9091SRichard Lowe	.align	16
66*5d9d9091SRichard Lowe	SET_SIZE(irintl)
67*5d9d9091SRichard Lowe
68*5d9d9091SRichard Lowe	.data
69*5d9d9091SRichard Lowe	.align	16
70*5d9d9091SRichard Lowehalf:	.float	0.5
71*5d9d9091SRichard Lowe
72*5d9d9091SRichard Lowe	ENTRY(anintl)
73*5d9d9091SRichard Lowe.Lanintl:
74*5d9d9091SRichard Lowe	movq	%rsp,%rcx
75*5d9d9091SRichard Lowe	subq	$16,%rsp
76*5d9d9091SRichard Lowe	fstcw	-8(%rcx)
77*5d9d9091SRichard Lowe	fldt	8(%rcx)
78*5d9d9091SRichard Lowe	movw	-8(%rcx),%dx
79*5d9d9091SRichard Lowe	andw	$0xf3ff,%dx
80*5d9d9091SRichard Lowe	movw	%dx,-4(%rcx)
81*5d9d9091SRichard Lowe	fldcw	-4(%rcx)		/ set RD = to_nearest
82*5d9d9091SRichard Lowe	fld	%st(0)
83*5d9d9091SRichard Lowe	frndint				/ [x],x
84*5d9d9091SRichard Lowe	fstcw	-4(%rcx)
85*5d9d9091SRichard Lowe	movw	-4(%rcx),%dx
86*5d9d9091SRichard Lowe	andw	$0xf3ff,%dx
87*5d9d9091SRichard Lowe	movw	-8(%rcx),%ax
88*5d9d9091SRichard Lowe	andw	$0x0c00,%ax
89*5d9d9091SRichard Lowe	orw	%dx,%ax
90*5d9d9091SRichard Lowe	movw	%ax,-8(%rcx)
91*5d9d9091SRichard Lowe	fldcw	-8(%rcx)		/ restore RD
92*5d9d9091SRichard Lowe	fucomi	%st(1),%st		/ check if x is already an integer
93*5d9d9091SRichard Lowe	jp	.L0
94*5d9d9091SRichard Lowe	je	.L0
95*5d9d9091SRichard Lowe	fxch				/ x,[x]
96*5d9d9091SRichard Lowe	fsub	%st(1),%st		/ x-[x],[x]
97*5d9d9091SRichard Lowe	fabs				/ |x-[x]|,[x]
98*5d9d9091SRichard Lowe	PIC_SETUP(1)
99*5d9d9091SRichard Lowe	flds	PIC_L(half)
100*5d9d9091SRichard Lowe	fcomip	%st(1),%st		/ compare 0.5 with |x-[x]|
101*5d9d9091SRichard Lowe	PIC_WRAPUP
102*5d9d9091SRichard Lowe	je	.halfway		/ if 0.5 = |x-[x]| goto halfway,
103*5d9d9091SRichard Lowe					/ most cases will not take branch.
104*5d9d9091SRichard Lowe.L0:
105*5d9d9091SRichard Lowe	addq	$16,%rsp
106*5d9d9091SRichard Lowe	fstp	%st(0)
107*5d9d9091SRichard Lowe	ret
108*5d9d9091SRichard Lowe.halfway:
109*5d9d9091SRichard Lowe	/ x = n+0.5, recompute anint(x) as x+sign(x)*0.5
110*5d9d9091SRichard Lowe	fldt	8(%rcx)			/ x, 0.5, [x]
111*5d9d9091SRichard Lowe	movw	16(%rcx),%ax		/ sign+exp part of x
112*5d9d9091SRichard Lowe	andw	$0x8000,%ax		/ look at sign bit
113*5d9d9091SRichard Lowe	jnz	.x_neg
114*5d9d9091SRichard Lowe	faddp
115*5d9d9091SRichard Lowe	addq	$16,%rsp
116*5d9d9091SRichard Lowe	fstp	%st(1)
117*5d9d9091SRichard Lowe	ret
118*5d9d9091SRichard Lowe.x_neg:
119*5d9d9091SRichard Lowe	/ here, x is negative, so return x-0.5
120*5d9d9091SRichard Lowe	fsubp	%st,%st(1)		/ x-0.5,[x]
121*5d9d9091SRichard Lowe	addq	$16,%rsp
122*5d9d9091SRichard Lowe	fstp	%st(1)
123*5d9d9091SRichard Lowe	ret
124*5d9d9091SRichard Lowe	.align	16
125*5d9d9091SRichard Lowe	SET_SIZE(anintl)
126*5d9d9091SRichard Lowe
127*5d9d9091SRichard Lowe	ENTRY(nintl)
128*5d9d9091SRichard Lowe	pushq	%rbp
129*5d9d9091SRichard Lowe	movq	%rsp,%rbp
130*5d9d9091SRichard Lowe	subq	$16,%rsp
131*5d9d9091SRichard Lowe	pushq	24(%rbp)
132*5d9d9091SRichard Lowe	pushq	16(%rbp)
133*5d9d9091SRichard Lowe	call	.Lanintl		/// LOCAL
134*5d9d9091SRichard Lowe	fistpl	-8(%rbp)
135*5d9d9091SRichard Lowe	fwait
136*5d9d9091SRichard Lowe	movslq	-8(%rbp),%rax
137*5d9d9091SRichard Lowe	leave
138*5d9d9091SRichard Lowe	ret
139*5d9d9091SRichard Lowe	.align	16
140*5d9d9091SRichard Lowe	SET_SIZE(nintl)
141