xref: /openbsd/sys/lib/libkern/arch/alpha/__remq.S (revision 4d6af78a)
1
2
3/*	$OpenBSD: __remq.S,v 1.1 2007/11/25 18:25:34 deraadt Exp $	*/
4/*	$NetBSD: divrem.m4,v 1.5 1996/10/17 04:26:25 cgd Exp $	*/
5
6/*
7 * Copyright (c) 1994, 1995 Carnegie-Mellon University.
8 * All rights reserved.
9 *
10 * Author: Chris G. Demetriou
11 *
12 * Permission to use, copy, modify and distribute this software and
13 * its documentation is hereby granted, provided that both the copyright
14 * notice and this permission notice appear in all copies of the
15 * software, derivative works or modified versions, and any portions
16 * thereof, and that both notices appear in supporting documentation.
17 *
18 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
19 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
20 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
21 *
22 * Carnegie Mellon requests users of this software to return to
23 *
24 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
25 *  School of Computer Science
26 *  Carnegie Mellon University
27 *  Pittsburgh PA 15213-3890
28 *
29 * any improvements or extensions that they make and grant Carnegie the
30 * rights to redistribute these changes.
31 */
32
33/*
34 * Division and remainder.
35 *
36 * The use of m4 is modeled after the sparc code, but the algorithm is
37 * simple binary long division.
38 *
39 * Note that the loops could probably benefit from unrolling.
40 */
41
42/*
43 * M4 Parameters
44 * __remq		name of function to generate
45 * rem		rem=div: t10 / t11 -> t12; rem=rem: t10 % t11 -> t12
46 * true		true=true: signed; true=false: unsigned
47 * 64	total number of bits
48 */
49
50
51
52
53
54
55
56
57
58
59
60#include <machine/asm.h>
61
62LEAF(__remq, 0)					/* XXX */
63	lda	sp, -64(sp)
64	stq	t0, 0(sp)
65	stq	t1, 8(sp)
66	stq	t2, 16(sp)
67	stq	t3, 24(sp)
68	stq	t4, 32(sp)
69	stq	t10, 40(sp)
70	stq	t11, 48(sp)
71	mov	zero, t12			/* Initialize result to zero */
72
73
74	/* Compute sign of result.  If either is negative, this is easy.  */
75	or	t10, t11, t4			/* not the sign, but... */
76	srl	t4, 64 - 1, t4		/* rather, or of high bits */
77	blbc	t4, Ldoit			/* neither negative? do it! */
78
79	mov	t10, t4				/* sign follows t10. */
80
81	srl	t4, 64 - 1, t4		/* make negation the low bit. */
82
83	srl	t10, 64 - 1, t1		/* is t10 negative? */
84	blbc	t1, LnegB			/* no. */
85	/* t10 is negative; flip it. */
86
87	subq	zero, t10, t10
88	srl	t11, 64 - 1, t1		/* is t11 negative? */
89	blbc	t1, Ldoit			/* no. */
90LnegB:
91	/* t11 is definitely negative, no matter how we got here. */
92
93	subq	zero, t11, t11
94Ldoit:
95
96
97
98	/* kill the special cases. */
99	beq	t11, Ldotrap			/* division by zero! */
100
101	cmpult	t10, t11, t2			/* t10 < t11? */
102	/* t12 is already zero, from above.  t10 is untouched. */
103	bne	t2, Lret_result
104
105	cmpeq	t10, t11, t2			/* t10 == t11? */
106	cmovne	t2, 1, t12
107	cmovne	t2, zero, t10
108	bne	t2, Lret_result
109
110	/*
111	 * Find out how many bits of zeros are at the beginning of the divisor.
112	 */
113LBbits:
114	ldiq	t3, 1				/* t1 = 0; t0 = 1<<64-1 */
115	mov	zero, t1
116	sll	t3, 64-1, t0
117LBloop:
118	and	t11, t0, t2			/* if bit in t11 is set, done. */
119	bne	t2, LAbits
120	addq	t1, 1, t1				/* increment t1,  bit */
121	srl	t0, 1, t0
122	cmplt	t1, 64-1, t2		/* if t1 leaves one bit, done. */
123	bne	t2, LBloop
124
125LAbits:
126	beq	t1, Ldodiv			/* If t1 = 0, divide now.  */
127	ldiq	t3, 1				/* t0 = 1<<64-1 */
128	sll	t3, 64-1, t0
129
130LAloop:
131	and	t10, t0, t2			/* if bit in t10 is set, done. */
132	bne	t2, Ldodiv
133	subq	t1, 1, t1				/* decrement t1,  bit */
134	srl     t0, 1, t0
135	bne	t1, LAloop			/* If t1 != 0, loop again */
136
137Ldodiv:
138	sll	t11, t1, t11				/* t11 <<= i */
139	ldiq	t3, 1
140	sll	t3, t1, t0
141
142Ldivloop:
143	cmpult	t10, t11, t2
144	or	t12, t0, t3
145	cmoveq	t2, t3, t12
146	subq	t10, t11, t3
147	cmoveq	t2, t3, t10
148	srl	t0, 1, t0
149	srl	t11, 1, t11
150	beq	t10, Lret_result
151	bne	t0, Ldivloop
152
153Lret_result:
154	mov	t10, t12
155
156
157	/* Check to see if we should negate it. */
158	subqv	zero, t12, t3
159	cmovlbs	t4, t3, t12
160
161
162	ldq	t0, 0(sp)
163	ldq	t1, 8(sp)
164	ldq	t2, 16(sp)
165	ldq	t3, 24(sp)
166	ldq	t4, 32(sp)
167	ldq	t10, 40(sp)
168	ldq	t11, 48(sp)
169	lda	sp, 64(sp)
170	ret	zero, (t9), 1
171
172Ldotrap:
173	ldiq	a0, -2			/* This is the signal to SIGFPE! */
174	call_pal PAL_gentrap
175	mov	zero, t10			/* so that zero will be returned */
176
177	br	zero, Lret_result
178
179END(__remq)
180