xref: /openbsd/sys/lib/libkern/arch/alpha/__reml.S (revision 771fbea0)
1
2
3/*	$OpenBSD: __reml.S,v 1.1 2007/11/25 18:25:34 deraadt Exp $	*/
4/*	$NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $	*/
5
6/*
7 * Copyright (c) 1994, 1995 Carnegie-Mellon University.
8 * All rights reserved.
9 *
10 * Author: Chris G. Demetriou
11 *
12 * Permission to use, copy, modify and distribute this software and
13 * its documentation is hereby granted, provided that both the copyright
14 * notice and this permission notice appear in all copies of the
15 * software, derivative works or modified versions, and any portions
16 * thereof, and that both notices appear in supporting documentation.
17 *
18 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
19 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
20 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
21 *
22 * Carnegie Mellon requests users of this software to return to
23 *
24 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
25 *  School of Computer Science
26 *  Carnegie Mellon University
27 *  Pittsburgh PA 15213-3890
28 *
29 * any improvements or extensions that they make and grant Carnegie the
30 * rights to redistribute these changes.
31 */
32
33/*
34 * Division and remainder.
35 *
36 * The use of m4 is modeled after the sparc code, but the algorithm is
37 * simple binary long division.
38 *
39 * Note that the loops could probably benefit from unrolling.
40 */
41
42/*
43 * M4 Parameters
44 * __reml		name of function to generate
45 * rem		rem=div: t10 / t11 -> t12; rem=rem: t10 % t11 -> t12
46 * true		true=true: signed; true=false: unsigned
47 * 32	total number of bits
48 */
49
50
51
52
53
54
55
56
57
58
59
60#include <machine/asm.h>
61
62LEAF(__reml, 0)					/* XXX */
63	lda	sp, -64(sp)
64	stq	t0, 0(sp)
65	stq	t1, 8(sp)
66	stq	t2, 16(sp)
67	stq	t3, 24(sp)
68	stq	t4, 32(sp)
69	stq	t10, 40(sp)
70	stq	t11, 48(sp)
71	mov	zero, t12			/* Initialize result to zero */
72
73
74	/* Compute sign of result.  If either is negative, this is easy.  */
75	or	t10, t11, t4			/* not the sign, but... */
76	srl	t4, 32 - 1, t4		/* rather, or of high bits */
77	blbc	t4, Ldoit			/* neither negative? do it! */
78
79	mov	t10, t4				/* sign follows t10. */
80
81	srl	t4, 32 - 1, t4		/* make negation the low bit. */
82
83	srl	t10, 32 - 1, t1		/* is t10 negative? */
84	blbc	t1, LnegB			/* no. */
85	/* t10 is negative; flip it. */
86
87	/* top 32 bits may be random junk */
88	zap	t10, 0xf0, t10
89
90	subq	zero, t10, t10
91	srl	t11, 32 - 1, t1		/* is t11 negative? */
92	blbc	t1, Ldoit			/* no. */
93LnegB:
94	/* t11 is definitely negative, no matter how we got here. */
95
96	/* top 32 bits may be random junk */
97	zap	t11, 0xf0, t11
98
99	subq	zero, t11, t11
100Ldoit:
101
102
103	/*
104	 * Clear the top 32 bits of each operand, as they may
105	 * sign extension (if negated above), or random junk.
106	 */
107	zap	t10, 0xf0, t10
108	zap	t11, 0xf0, t11
109
110
111	/* kill the special cases. */
112	beq	t11, Ldotrap			/* division by zero! */
113
114	cmpult	t10, t11, t2			/* t10 < t11? */
115	/* t12 is already zero, from above.  t10 is untouched. */
116	bne	t2, Lret_result
117
118	cmpeq	t10, t11, t2			/* t10 == t11? */
119	cmovne	t2, 1, t12
120	cmovne	t2, zero, t10
121	bne	t2, Lret_result
122
123	/*
124	 * Find out how many bits of zeros are at the beginning of the divisor.
125	 */
126LBbits:
127	ldiq	t3, 1				/* t1 = 0; t0 = 1<<32-1 */
128	mov	zero, t1
129	sll	t3, 32-1, t0
130LBloop:
131	and	t11, t0, t2			/* if bit in t11 is set, done. */
132	bne	t2, LAbits
133	addq	t1, 1, t1				/* increment t1,  bit */
134	srl	t0, 1, t0
135	cmplt	t1, 32-1, t2		/* if t1 leaves one bit, done. */
136	bne	t2, LBloop
137
138LAbits:
139	beq	t1, Ldodiv			/* If t1 = 0, divide now.  */
140	ldiq	t3, 1				/* t0 = 1<<32-1 */
141	sll	t3, 32-1, t0
142
143LAloop:
144	and	t10, t0, t2			/* if bit in t10 is set, done. */
145	bne	t2, Ldodiv
146	subq	t1, 1, t1				/* decrement t1,  bit */
147	srl     t0, 1, t0
148	bne	t1, LAloop			/* If t1 != 0, loop again */
149
150Ldodiv:
151	sll	t11, t1, t11				/* t11 <<= i */
152	ldiq	t3, 1
153	sll	t3, t1, t0
154
155Ldivloop:
156	cmpult	t10, t11, t2
157	or	t12, t0, t3
158	cmoveq	t2, t3, t12
159	subq	t10, t11, t3
160	cmoveq	t2, t3, t10
161	srl	t0, 1, t0
162	srl	t11, 1, t11
163	beq	t10, Lret_result
164	bne	t0, Ldivloop
165
166Lret_result:
167	mov	t10, t12
168
169
170	/* Check to see if we should negate it. */
171	subqv	zero, t12, t3
172	cmovlbs	t4, t3, t12
173
174
175	ldq	t0, 0(sp)
176	ldq	t1, 8(sp)
177	ldq	t2, 16(sp)
178	ldq	t3, 24(sp)
179	ldq	t4, 32(sp)
180	ldq	t10, 40(sp)
181	ldq	t11, 48(sp)
182	lda	sp, 64(sp)
183	ret	zero, (t9), 1
184
185Ldotrap:
186	ldiq	a0, -2			/* This is the signal to SIGFPE! */
187	call_pal PAL_gentrap
188	mov	zero, t10			/* so that zero will be returned */
189
190	br	zero, Lret_result
191
192END(__reml)
193