xref: /netbsd/sys/arch/hppa/spmath/impys.S (revision c4a72b64)
1/*	$NetBSD: impys.S,v 1.1 2002/06/05 01:04:25 fredette Exp $	*/
2
3/*	$OpenBSD: impys.S,v 1.5 2001/03/29 03:58:18 mickey Exp $	*/
4
5/*
6 * Copyright 1996 1995 by Open Software Foundation, Inc.
7 *              All Rights Reserved
8 *
9 * Permission to use, copy, modify, and distribute this software and
10 * its documentation for any purpose and without fee is hereby granted,
11 * provided that the above copyright notice appears in all copies and
12 * that both the copyright notice and this permission notice appear in
13 * supporting documentation.
14 *
15 * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
16 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
17 * FOR A PARTICULAR PURPOSE.
18 *
19 * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
20 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
21 * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
22 * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
23 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 *
25 */
26/*
27 * pmk1.1
28 */
29/*
30 * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
31 *
32 * To anyone who acknowledges that this file is provided "AS IS"
33 * without any express or implied warranty:
34 *     permission to use, copy, modify, and distribute this file
35 * for any purpose is hereby granted without fee, provided that
36 * the above copyright notice and this notice appears in all
37 * copies, and that the name of Hewlett-Packard Company not be
38 * used in advertising or publicity pertaining to distribution
39 * of the software without specific, written prior permission.
40 * Hewlett-Packard Company makes no representations about the
41 * suitability of this software for any purpose.
42 */
43
44#include <machine/asm.h>
45
46/****************************************************************************
47 *
48 * Implement an integer multiply routine for 32-bit operands and 64-bit product
49 * with operand values of zero (multiplicand only) and -2**31 treated specially.
50 * The algorithm uses the absolute value of the multiplier, four bits at a time,
51 * from right to left, to generate partial product.  Execution speed is more
52 * important than program size in this implementation.
53 *
54 ***************************************************************************/
55/*
56 * Definitions - General registers
57 */
58gr0	.equ		0		/* General register zero */
59pu	.equ		3		/* upper part of product */
60pl	.equ		4		/* lower part of product */
61op2	.equ		4		/* multiplier */
62op1	.equ		5		/* multiplicand */
63cnt	.equ		6		/* count in multiply */
64brindex	.equ		7		/* index into the br. table */
65sign	.equ		8		/* sign of product */
66pc	.equ		9		/* carry bit of product, = 00...01 */
67pm	.equ	       10		/* value of -1 used in shifting */
68
69	.text
70
71ENTRY(impys,32)
72	stws,ma		pu,4(sp)		; save registers on stack
73	stws,ma		pl,4(sp)		; save registers on stack
74	stws,ma		op1,4(sp)		; save registers on stack
75	stws,ma		cnt,4(sp)		; save registers on stack
76	stws,ma		brindex,4(sp)		; save registers on stack
77	stws,ma		sign,4(sp)		; save registers on stack
78	stws,ma		pc,4(sp)		; save registers on stack
79	stws,ma		pm,4(sp)		; save registers on stack
80;
81;   Start multiply process
82;
83	ldws		0(arg1),op2		; get multiplier
84	ldws		0(arg0),op1		; get multiplicand
85	addi		-1,gr0,pm		; initialize pm to 111...1
86	comb,<		op2,gr0,mpyb		; br. if multiplier < 0
87	xor		op2,op1,sign		; sign(0) = sign of product
88mpy1	comb,<		op1,gr0,mpya		; br. if multiplicand < 0
89	addi		0,gr0,pu		; clear product
90	addib,=		0,op1,fini0		; op1 = 0, product = 0
91mpy2	addi		1,gr0,pc		; initialize pc to 00...01
92	movib,tr	8,cnt,mloop		; set count for mpy loop
93	extru		op2,31,4,brindex	; 4 bits as index into table
94;
95	.align		8
96;
97	b		sh4c			; br. if sign overflow
98sh4n	shd		pu,pl,4,pl		; shift product right 4 bits
99	addib,<=	-1,cnt,mulend		; reduce count by 1, exit if
100	extru		pu,27,28,pu		;   <= zero
101;
102mloop	blr		brindex,gr0		; br. into table
103						;   entries of 2 words
104	extru		op2,27,4,brindex	; next 4 bits into index
105;
106;
107;	branch table for the multiplication process with four multiplier bits
108;
109mtable						; two words per entry
110;
111; ----	bits = 0000 ---- shift product 4 bits -------------------------------
112;
113	b		sh4n+4			; just shift partial
114	shd		pu,pl,4,pl		;   product right 4 bits
115;
116;  ----	bits = 0001 ---- add op1, then shift 4 bits
117;
118	addb,tr		op1,pu,sh4n+4		; add op1 to product, to shift
119	shd		pu,pl,4,pl		;   product right 4 bits
120;
121;  ----	bits = 0010 ---- add op1, add op1, then shift 4 bits
122;
123	addb,tr		op1,pu,sh4n		; add 2*op1, to shift
124	addb,uv		op1,pu,sh4c		;   product right 4 bits
125;
126;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
127;
128	addb,tr		op1,pu,sh4n-4		; add op1 & 2*op1, shift
129	sh1add,nsv	op1,pu,pu		;   product right 4 bits
130;
131;  ----	bits = 0100 ---- shift 2, add op1, shift 2
132;
133	b		sh2sa
134	shd		pu,pl,2,pl		; shift product 2 bits
135;
136;  ----	bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
137;
138	addb,tr		op1,pu,sh2us		; add op1 to product
139	shd		pu,pl,2,pl		; shift 2 bits
140;
141;  ----	bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
142;
143	addb,tr		op1,pu,sh2c		; add 2*op1, to shift 2 bits
144	addb,nuv	op1,pu,sh2us		; br. if not overflow
145;
146;  ----	bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
147;
148	b		sh3s
149	sub		pu,op1,pu		; subtract op1, br. to sh3s
150
151;
152;  ----	bits = 1000 ---- shift 3, add op1, shift 1
153;
154	b		sh3sa
155	shd		pu,pl,3,pl		; shift product right 3 bits
156;
157;  ----	bits = 1001 ---- add op1, shift 3, add op1, shift 1
158;
159	addb,tr		op1,pu,sh3us		; add op1, to shift 3, add op1,
160	shd		pu,pl,3,pl		;   and shift 1
161;
162;  ----	bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
163;
164	addb,tr		op1,pu,sh3c		; add 2*op1, to shift 3 bits
165	addb,nuv	op1,pu,sh3us		;   br. if no overflow
166;
167;  ----	bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
168;
169	addib,tr	1,brindex,sh2s		; add 1 to index, subtract op1,
170	sub		pu,op1,pu		;   shift 2 with minus sign
171;
172;  ----	bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
173;
174	addib,tr	1,brindex,sh2sb		; add 1 to index, to shift
175	shd		pu,pl,2,pl		; shift right 2 bits signed
176;
177;  ----	bits = 1101 ---- add op1, shift 2, add -op1, shift 2
178;
179	addb,tr		op1,pu,sh2ns		; add op1, to shift 2
180	shd		pu,pl,2,pl		;   right 2 unsigned, etc.
181;
182;  ----	bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
183;
184	addib,tr	1,brindex,sh1sa		; add 1 to index, to shift
185	shd		pu,pl,1,pl		; shift 1 bit
186;
187;  ----	bits = 1111 ---- add -op1, shift 4 signed
188;
189	addib,tr	1,brindex,sh4s		; add 1 to index, subtract op1,
190	sub		pu,op1,pu		;   to shift 4 signed
191
192;
193;  ----	bits = 10000 ---- shift 4 signed
194;
195	addib,tr	1,brindex,sh4s+4	; add 1 to index
196	shd		pu,pl,4,pl		; shift 4 signed
197;
198;  ---- end of table ---------------------------------------------------------
199;
200sh4s	shd		pu,pl,4,pl
201	addib,tr	-1,cnt,mloop		; loop (count > 0 always here)
202	shd		pm,pu,4,pu		; shift 4, minus signed
203;
204sh4c	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
205	shd		pc,pu,4,pu		; shift 4 with overflow
206	b		signs			; end of multiply
207	bb,>=,n		sign,0,fini		; test sign of procduct
208;
209mpyb	add,=		op2,op2,gr0		; if <> 0, back to main sect.
210	b		mpy1
211	sub		0,op2,op2		; op2 = |multiplier|
212	add,>=		op1,gr0,gr0		; if op1 < 0, invert sign,
213	xor		pm,sign,sign		;   for correct result
214;
215;	special case for multiplier = -2**31, op1 = signed multiplicand
216;		or multiplicand = -2**31, op1 = signed multiplier
217;
218	shd		op1,0,1,pl		; shift op1 left 31 bits
219mmax	extrs		op1,30,31,pu
220	b		signs			; negate product (if needed)
221	bb,>=,n		sign,0,fini		; test sign of product
222;
223mpya	add,=		op1,op1,gr0		; op1 = -2**31, special case
224	b		mpy2
225	sub		0,op1,op1		; op1 = |multiplicand|
226	add,>=		op2,gr0,gr0		; if op2 < 0, invert sign,
227	xor		pm,sign,sign		;   for correct result
228	movb,tr		op2,op1,mmax		; use op2 as multiplicand
229	shd		op1,0,1,pl		; shift it left 31 bits
230;
231sh3c	shd		pu,pl,3,pl		; shift product 3 bits
232	shd		pc,pu,3,pu		; shift 3 signed
233	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
234	shd		pu,pl,1,pl
235;
236sh3us	extru		pu,28,29,pu		; shift 3 unsigned
237	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
238	shd		pu,pl,1,pl
239;
240sh3sa	extrs		pu,28,29,pu		; shift 3 signed
241	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
242	shd		pu,pl,1,pl
243;
244sh3s	shd		pu,pl,3,pl		; shift 3 minus signed
245	shd		pm,pu,3,pu
246	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
247	shd		pu,pl,1,pl
248;
249sh1	addib,>		-1,cnt,mloop		; loop if count > 0
250	extru		pu,30,31,pu
251	b		signs			; end of multiply
252	bb,>=,n		sign,0,fini		; test sign of product
253;
254sh2ns	addib,tr	1,brindex,sh2sb+4	; increment index
255	extru		pu,29,30,pu		; shift unsigned
256;
257sh2s	shd		pu,pl,2,pl		; shift with minus sign
258	shd		pm,pu,2,pu		;
259	sub		pu,op1,pu		; subtract op1
260	shd		pu,pl,2,pl		; shift with minus sign
261	addib,tr	-1,cnt,mloop		; decrement count, loop
262	shd		pm,pu,2,pu		; shift with minus sign
263						; count never reaches 0 here
264;
265sh2sb	extrs		pu,29,30,pu		; shift 2 signed
266	sub		pu,op1,pu		; subtract op1 from product
267	shd		pu,pl,2,pl		; shift with minus sign
268	addib,tr	-1,cnt,mloop		; decrement count, loop
269	shd		pm,pu,2,pu		; shift with minus sign
270						; count never reaches 0 here
271;
272sh1sa	extrs		pu,30,31,pu		;   signed
273	sub		pu,op1,pu		; subtract op1 from product
274	shd		pu,pl,3,pl		; shift 3 with minus sign
275	addib,tr	-1,cnt,mloop		; dec. count, to loop
276	shd		pm,pu,3,pu		; count never reaches 0 here
277;
278fini0	movib,tr,n	0,pl,fini		; product = 0 as op1 = 0
279;
280sh2us	extru		pu,29,30,pu		; shift 2 unsigned
281	addb,tr		op1,pu,sh2a		; add op1
282	shd		pu,pl,2,pl		; shift 2 bits
283;
284sh2c	shd		pu,pl,2,pl
285	shd		pc,pu,2,pu		; shift with carry
286	addb,tr		op1,pu,sh2a		; add op1 to product
287	shd		pu,pl,2,pl		; br. to sh2 to shift pu
288;
289sh2sa	extrs		pu,29,30,pu		; shift with sign
290	addb,tr		op1,pu,sh2a		; add op1 to product
291	shd		pu,pl,2,pl		; br. to sh2 to shift pu
292;
293sh2a	addib,>		-1,cnt,mloop		; loop if count > 0
294	extru		pu,29,30,pu
295;
296mulend	bb,>=,n		sign,0,fini		; test sign of product
297signs	sub		0,pl,pl			; negate product if sign
298	subb		0,pu,pu			;   is negative
299;
300;	finish
301;
302fini	stws		pu,0(arg2)		; save high part of result
303	stws		pl,4(arg2)		; save low part of result
304
305	ldws,mb		-4(sp),pm		; restore registers
306	ldws,mb		-4(sp),pc		; restore registers
307	ldws,mb		-4(sp),sign		; restore registers
308	ldws,mb		-4(sp),brindex		; restore registers
309	ldws,mb		-4(sp),cnt		; restore registers
310	ldws,mb		-4(sp),op1		; restore registers
311	ldws,mb		-4(sp),pl		; restore registers
312	bv		0(rp)			; return
313	ldws,mb		-4(sp),pu		; restore registers
314
315EXIT(impys)
316	.end
317