xref: /openbsd/lib/libc/arch/alpha/gen/divrem.m4 (revision ba4624ee)
1/*	$OpenBSD: divrem.m4,v 1.5 2015/06/01 19:02:11 miod Exp $	*/
2/*	$NetBSD: divrem.m4,v 1.7 1996/10/17 03:08:04 cgd Exp $	*/
3
4/*
5 * Copyright (c) 1994, 1995 Carnegie-Mellon University.
6 * All rights reserved.
7 *
8 * Author: Chris G. Demetriou
9 *
10 * Permission to use, copy, modify and distribute this software and
11 * its documentation is hereby granted, provided that both the copyright
12 * notice and this permission notice appear in all copies of the
13 * software, derivative works or modified versions, and any portions
14 * thereof, and that both notices appear in supporting documentation.
15 *
16 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19 *
20 * Carnegie Mellon requests users of this software to return to
21 *
22 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23 *  School of Computer Science
24 *  Carnegie Mellon University
25 *  Pittsburgh PA 15213-3890
26 *
27 * any improvements or extensions that they make and grant Carnegie the
28 * rights to redistribute these changes.
29 */
30
31/*
32 * Division and remainder.
33 *
34 * The use of m4 is modeled after the sparc code, but the algorithm is
35 * simple binary long division.
36 *
37 * Note that the loops could probably benefit from unrolling.
38 */
39
40/*
41 * M4 Parameters
42 * NAME		name of function to generate
43 * OP		OP=div: t10 / t11 -> t12; OP=rem: t10 % t11 -> t12
44 * S		S=true: signed; S=false: unsigned
45 * WORDSIZE	total number of bits
46 */
47
48define(A, `t10')
49define(B, `t11')
50define(RESULT, `t12')
51
52define(BIT, `t0')
53define(I, `t1')
54define(CC, `t2')
55define(T_0, `t3')
56ifelse(S, `true', `define(NEG, `t4')')
57
58#include <machine/asm.h>
59
60/*
61 * These functions use t11 as an input, which makes them incompatible with
62 * the secureplt calling sequence. The compiler knows about this, and will
63 * ask for a call through a got relocation. But this can only work if the
64 * linker omits creating a plt entry for the symbol. In order to achieve
65 * this, we need to declare it as `notype' instead of `function', which
66 * means that LEAF(NAME, 0) can't be used as it uses .ent which forces the
67 * `function' type.
68 */
69	.globl	NAME
70	.type	NAME, @notype
71	.usepv	NAME, no
72
73	.cfi_startproc
74	.cfi_return_column ra
75NAME:
76	MCOUNT
77	lda	sp, -64(sp)
78	.cfi_def_cfa_offset 64
79	stq	BIT, 0(sp)
80	.cfi_rel_offset BIT, 0
81	stq	I, 8(sp)
82	.cfi_rel_offset I, 8
83	stq	CC, 16(sp)
84	.cfi_rel_offset CC, 16
85	stq	T_0, 24(sp)
86	.cfi_rel_offset T_0, 24
87ifelse(S, `true',`dnl
88	stq	NEG, 32(sp)
89	.cfi_rel_offset NEG, 32
90')dnl
91	stq	A, 40(sp)
92	.cfi_rel_offset A, 40
93	stq	B, 48(sp)
94	.cfi_rel_offset B, 48
95	mov	zero, RESULT			/* Initialize result to zero */
96
97ifelse(S, `true',
98`
99	/* Compute sign of result.  If either is negative, this is easy.  */
100	or	A, B, NEG			/* not the sign, but... */
101	srl	NEG, WORDSIZE - 1, NEG		/* rather, or of high bits */
102	blbc	NEG, Ldoit			/* neither negative? do it! */
103
104ifelse(OP, `div',
105`	xor	A, B, NEG			/* THIS is the sign! */
106', `	mov	A, NEG				/* sign follows A. */
107')
108	srl	NEG, WORDSIZE - 1, NEG		/* make negation the low bit. */
109
110	srl	A, WORDSIZE - 1, I		/* is A negative? */
111	blbc	I, LnegB			/* no. */
112	/* A is negative; flip it. */
113ifelse(WORDSIZE, `32', `
114	/* top 32 bits may be random junk */
115	zap	A, 0xf0, A
116')
117	subq	zero, A, A
118	srl	B, WORDSIZE - 1, I		/* is B negative? */
119	blbc	I, Ldoit			/* no. */
120LnegB:
121	/* B is definitely negative, no matter how we got here. */
122ifelse(WORDSIZE, `32', `
123	/* top 32 bits may be random junk */
124	zap	B, 0xf0, B
125')
126	subq	zero, B, B
127Ldoit:
128')
129ifelse(WORDSIZE, `32', `
130	/*
131	 * Clear the top 32 bits of each operand, as they may
132	 * sign extension (if negated above), or random junk.
133	 */
134	zap	A, 0xf0, A
135	zap	B, 0xf0, B
136')
137
138	/* kill the special cases. */
139	beq	B, Ldotrap			/* division by zero! */
140
141	cmpult	A, B, CC			/* A < B? */
142	/* RESULT is already zero, from above.  A is untouched. */
143	bne	CC, Lret_result
144
145	cmpeq	A, B, CC			/* A == B? */
146	cmovne	CC, 1, RESULT
147	cmovne	CC, zero, A
148	bne	CC, Lret_result
149
150	/*
151	 * Find out how many bits of zeros are at the beginning of the divisor.
152	 */
153LBbits:
154	ldiq	T_0, 1				/* I = 0; BIT = 1<<WORDSIZE-1 */
155	mov	zero, I
156	sll	T_0, WORDSIZE-1, BIT
157LBloop:
158	and	B, BIT, CC			/* if bit in B is set, done. */
159	bne	CC, LAbits
160	addq	I, 1, I				/* increment I, shift bit */
161	srl	BIT, 1, BIT
162	cmplt	I, WORDSIZE-1, CC		/* if I leaves one bit, done. */
163	bne	CC, LBloop
164
165LAbits:
166	beq	I, Ldodiv			/* If I = 0, divide now.  */
167	ldiq	T_0, 1				/* BIT = 1<<WORDSIZE-1 */
168	sll	T_0, WORDSIZE-1, BIT
169
170LAloop:
171	and	A, BIT, CC			/* if bit in A is set, done. */
172	bne	CC, Ldodiv
173	subq	I, 1, I				/* decrement I, shift bit */
174	srl     BIT, 1, BIT
175	bne	I, LAloop			/* If I != 0, loop again */
176
177Ldodiv:
178	sll	B, I, B				/* B <<= i */
179	ldiq	T_0, 1
180	sll	T_0, I, BIT
181
182Ldivloop:
183	cmpult	A, B, CC
184	or	RESULT, BIT, T_0
185	cmoveq	CC, T_0, RESULT
186	subq	A, B, T_0
187	cmoveq	CC, T_0, A
188	srl	BIT, 1, BIT
189	srl	B, 1, B
190	beq	A, Lret_result
191	bne	BIT, Ldivloop
192
193Lret_result:
194ifelse(OP, `div',
195`', `	mov	A, RESULT
196')
197ifelse(S, `true',
198`
199	/* Check to see if we should negate it. */
200	subq	zero, RESULT, T_0
201	cmovlbs	NEG, T_0, RESULT
202')
203
204	ldq	BIT, 0(sp)
205	.cfi_restore BIT
206	ldq	I, 8(sp)
207	.cfi_restore I
208	ldq	CC, 16(sp)
209	.cfi_restore CC
210	ldq	T_0, 24(sp)
211	.cfi_restore T_0
212ifelse(S, `true',`dnl
213	ldq	NEG, 32(sp)
214	.cfi_restore NEG
215')dnl
216	ldq	A, 40(sp)
217	.cfi_restore A
218	ldq	B, 48(sp)
219	.cfi_restore B
220	lda	sp, 64(sp)
221	.cfi_def_cfa_offset 0
222	ret	zero, (t9), 1
223
224Ldotrap:
225	ldiq	a0, -2			/* This is the signal to SIGFPE! */
226	call_pal PAL_gentrap
227ifelse(OP, `div',
228`', `	mov	zero, A			/* so that zero will be returned */
229')
230	br	zero, Lret_result
231
232/*
233 * For the reasons stated above, we can not use END(NAME) either, as it
234 * expands to .end which requires a matching .ent.
235 */
236	.cfi_endproc
237	.size	NAME, . - NAME
238