xref: /openbsd/sys/arch/hppa/spmath/impys.S (revision 133306f0)
1/*	$OpenBSD: impys.S,v 1.3 1998/07/02 19:05:32 mickey Exp $	*/
2
3/*
4 * Copyright 1996 1995 by Open Software Foundation, Inc.
5 *              All Rights Reserved
6 *
7 * Permission to use, copy, modify, and distribute this software and
8 * its documentation for any purpose and without fee is hereby granted,
9 * provided that the above copyright notice appears in all copies and
10 * that both the copyright notice and this permission notice appear in
11 * supporting documentation.
12 *
13 * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
14 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
15 * FOR A PARTICULAR PURPOSE.
16 *
17 * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
18 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
19 * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
20 * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
21 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 *
23 */
24/*
25 * pmk1.1
26 */
27/*
28 * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
29 *
30 * To anyone who acknowledges that this file is provided "AS IS"
31 * without any express or implied warranty:
32 *     permission to use, copy, modify, and distribute this file
33 * for any purpose is hereby granted without fee, provided that
34 * the above copyright notice and this notice appears in all
35 * copies, and that the name of Hewlett-Packard Company not be
36 * used in advertising or publicity pertaining to distribution
37 * of the software without specific, written prior permission.
38 * Hewlett-Packard Company makes no representations about the
39 * suitability of this software for any purpose.
40 */
41
42#include <machine/asm.h>
43
44/****************************************************************************
45 *
46 * Implement an integer multiply routine for 32-bit operands and 64-bit product
47 * with operand values of zero (multiplicand only) and -2**31 treated specially.
48 * The algorithm uses the absolute value of the multiplier, four bits at a time,
49 * from right to left, to generate partial product.  Execution speed is more
50 * important than program size in this implementation.
51 *
52 ***************************************************************************/
53/*
54 * Definitions - General registers
55 */
56gr0	.equ		0		/* General register zero */
57pu	.equ		3		/* upper part of product */
58pl	.equ		4		/* lower part of product */
59op2	.equ		4		/* multiplier */
60op1	.equ		5		/* multiplicand */
61cnt	.equ		6		/* count in multiply */
62brindex	.equ		7		/* index into the br. table */
63sign	.equ		8		/* sign of product */
64pc	.equ		9		/* carry bit of product, = 00...01 */
65pm	.equ	       10		/* value of -1 used in shifting */
66
67ENTRY(impys)
68	stws,ma		pu,4(sp)		; save registers on stack
69	stws,ma		pl,4(sp)		; save registers on stack
70	stws,ma		op1,4(sp)		; save registers on stack
71	stws,ma		cnt,4(sp)		; save registers on stack
72	stws,ma		brindex,4(sp)		; save registers on stack
73	stws,ma		sign,4(sp)		; save registers on stack
74	stws,ma		pc,4(sp)		; save registers on stack
75	stws,ma		pm,4(sp)		; save registers on stack
76;
77;   Start multiply process
78;
79	ldws		0(arg1),op2		; get multiplier
80	ldws		0(arg0),op1		; get multiplicand
81	addi		-1,gr0,pm		; initialize pm to 111...1
82	comb,<		op2,gr0,mpyb		; br. if multiplier < 0
83	xor		op2,op1,sign		; sign(0) = sign of product
84mpy1	comb,<		op1,gr0,mpya		; br. if multiplicand < 0
85	addi		0,gr0,pu		; clear product
86	addib,=		0,op1,fini0		; op1 = 0, product = 0
87mpy2	addi		1,gr0,pc		; initialize pc to 00...01
88	movib,tr	8,cnt,mloop		; set count for mpy loop
89	extru		op2,31,4,brindex	; 4 bits as index into table
90;
91	.align		8
92;
93	b		sh4c			; br. if sign overflow
94sh4n	shd		pu,pl,4,pl		; shift product right 4 bits
95	addib,<=	-1,cnt,mulend		; reduce count by 1, exit if
96	extru  		pu,27,28,pu		;   <= zero
97;
98mloop	blr		brindex,gr0		; br. into table
99						;   entries of 2 words
100	extru		op2,27,4,brindex	; next 4 bits into index
101;
102;
103;	branch table for the multiplication process with four multiplier bits
104;
105mtable						; two words per entry
106;
107; ----	bits = 0000 ---- shift product 4 bits -------------------------------
108;
109	b		sh4n+4			; just shift partial
110	shd		pu,pl,4,pl		;   product right 4 bits
111;
112;  ----	bits = 0001 ---- add op1, then shift 4 bits
113;
114	addb,tr		op1,pu,sh4n+4		; add op1 to product, to shift
115	shd		pu,pl,4,pl		;   product right 4 bits
116;
117;  ----	bits = 0010 ---- add op1, add op1, then shift 4 bits
118;
119	addb,tr		op1,pu,sh4n		; add 2*op1, to shift
120	addb,uv 	op1,pu,sh4c		;   product right 4 bits
121;
122;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
123;
124	addb,tr		op1,pu,sh4n-4		; add op1 & 2*op1, shift
125	sh1add,nsv 	op1,pu,pu		;   product right 4 bits
126;
127;  ----	bits = 0100 ---- shift 2, add op1, shift 2
128;
129	b		sh2sa
130	shd		pu,pl,2,pl		; shift product 2 bits
131;
132;  ----	bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
133;
134	addb,tr		op1,pu,sh2us		; add op1 to product
135	shd		pu,pl,2,pl		; shift 2 bits
136;
137;  ----	bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
138;
139	addb,tr		op1,pu,sh2c		; add 2*op1, to shift 2 bits
140	addb,nuv	op1,pu,sh2us		; br. if not overflow
141;
142;  ----	bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
143;
144	b		sh3s
145	sub		pu,op1,pu		; subtract op1, br. to sh3s
146
147;
148;  ----	bits = 1000 ---- shift 3, add op1, shift 1
149;
150	b		sh3sa
151	shd		pu,pl,3,pl		; shift product right 3 bits
152;
153;  ----	bits = 1001 ---- add op1, shift 3, add op1, shift 1
154;
155	addb,tr		op1,pu,sh3us		; add op1, to shift 3, add op1,
156	shd		pu,pl,3,pl		;   and shift 1
157;
158;  ----	bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
159;
160	addb,tr		op1,pu,sh3c		; add 2*op1, to shift 3 bits
161	addb,nuv	op1,pu,sh3us		;   br. if no overflow
162;
163;  ----	bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
164;
165	addib,tr	1,brindex,sh2s		; add 1 to index, subtract op1,
166	sub		pu,op1,pu		;   shift 2 with minus sign
167;
168;  ----	bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
169;
170	addib,tr	1,brindex,sh2sb		; add 1 to index, to shift
171	shd		pu,pl,2,pl		; shift right 2 bits signed
172;
173;  ----	bits = 1101 ---- add op1, shift 2, add -op1, shift 2
174;
175	addb,tr		op1,pu,sh2ns		; add op1, to shift 2
176	shd		pu,pl,2,pl		;   right 2 unsigned, etc.
177;
178;  ----	bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
179;
180	addib,tr	1,brindex,sh1sa		; add 1 to index, to shift
181	shd		pu,pl,1,pl		; shift 1 bit
182;
183;  ----	bits = 1111 ---- add -op1, shift 4 signed
184;
185	addib,tr	1,brindex,sh4s		; add 1 to index, subtract op1,
186	sub		pu,op1,pu		;   to shift 4 signed
187
188;
189;  ----	bits = 10000 ---- shift 4 signed
190;
191	addib,tr	1,brindex,sh4s+4	; add 1 to index
192	shd		pu,pl,4,pl		; shift 4 signed
193;
194;  ---- end of table ---------------------------------------------------------
195;
196sh4s	shd		pu,pl,4,pl
197	addib,tr	-1,cnt,mloop		; loop (count > 0 always here)
198	shd		pm,pu,4,pu		; shift 4, minus signed
199;
200sh4c	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
201	shd		pc,pu,4,pu		; shift 4 with overflow
202	b		signs			; end of multiply
203	bb,>=,n		sign,0,fini		; test sign of procduct
204;
205mpyb	add,=		op2,op2,gr0		; if <> 0, back to main sect.
206	b		mpy1
207	sub		0,op2,op2		; op2 = |multiplier|
208	add,>=		op1,gr0,gr0		; if op1 < 0, invert sign,
209	xor		pm,sign,sign		;   for correct result
210;
211;	special case for multiplier = -2**31, op1 = signed multiplicand
212;		or multiplicand = -2**31, op1 = signed multiplier
213;
214	shd		op1,0,1,pl		; shift op1 left 31 bits
215mmax	extrs		op1,30,31,pu
216	b		signs			; negate product (if needed)
217	bb,>=,n		sign,0,fini		; test sign of product
218;
219mpya	add,=		op1,op1,gr0		; op1 = -2**31, special case
220	b		mpy2
221	sub		0,op1,op1		; op1 = |multiplicand|
222	add,>=		op2,gr0,gr0		; if op2 < 0, invert sign,
223	xor		pm,sign,sign		;   for correct result
224	movb,tr		op2,op1,mmax		; use op2 as multiplicand
225	shd		op1,0,1,pl		; shift it left 31 bits
226;
227sh3c	shd		pu,pl,3,pl		; shift product 3 bits
228	shd		pc,pu,3,pu		; shift 3 signed
229	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
230	shd		pu,pl,1,pl
231;
232sh3us	extru		pu,28,29,pu		; shift 3 unsigned
233	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
234	shd		pu,pl,1,pl
235;
236sh3sa	extrs		pu,28,29,pu		; shift 3 signed
237	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
238	shd		pu,pl,1,pl
239;
240sh3s	shd		pu,pl,3,pl		; shift 3 minus signed
241	shd		pm,pu,3,pu
242	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
243	shd		pu,pl,1,pl
244;
245sh1	addib,>		-1,cnt,mloop		; loop if count > 0
246	extru		pu,30,31,pu
247	b		signs			; end of multiply
248	bb,>=,n		sign,0,fini		; test sign of product
249;
250sh2ns	addib,tr	1,brindex,sh2sb+4	; increment index
251	extru		pu,29,30,pu		; shift unsigned
252;
253sh2s	shd		pu,pl,2,pl		; shift with minus sign
254	shd		pm,pu,2,pu		;
255	sub		pu,op1,pu		; subtract op1
256	shd		pu,pl,2,pl		; shift with minus sign
257	addib,tr	-1,cnt,mloop		; decrement count, loop
258	shd		pm,pu,2,pu		; shift with minus sign
259						; count never reaches 0 here
260;
261sh2sb	extrs		pu,29,30,pu		; shift 2 signed
262	sub		pu,op1,pu		; subtract op1 from product
263	shd		pu,pl,2,pl		; shift with minus sign
264	addib,tr	-1,cnt,mloop		; decrement count, loop
265	shd		pm,pu,2,pu		; shift with minus sign
266						; count never reaches 0 here
267;
268sh1sa	extrs		pu,30,31,pu		;   signed
269	sub		pu,op1,pu		; subtract op1 from product
270	shd		pu,pl,3,pl		; shift 3 with minus sign
271	addib,tr	-1,cnt,mloop		; dec. count, to loop
272	shd		pm,pu,3,pu		; count never reaches 0 here
273;
274fini0	movib,tr,n	0,pl,fini		; product = 0 as op1 = 0
275;
276sh2us	extru		pu,29,30,pu		; shift 2 unsigned
277	addb,tr		op1,pu,sh2a		; add op1
278	shd		pu,pl,2,pl		; shift 2 bits
279;
280sh2c	shd		pu,pl,2,pl
281	shd		pc,pu,2,pu		; shift with carry
282	addb,tr		op1,pu,sh2a		; add op1 to product
283	shd		pu,pl,2,pl		; br. to sh2 to shift pu
284;
285sh2sa	extrs		pu,29,30,pu		; shift with sign
286	addb,tr		op1,pu,sh2a		; add op1 to product
287	shd		pu,pl,2,pl		; br. to sh2 to shift pu
288;
289sh2a	addib,>		-1,cnt,mloop		; loop if count > 0
290	extru		pu,29,30,pu
291;
292mulend	bb,>=,n		sign,0,fini		; test sign of product
293signs	sub		0,pl,pl			; negate product if sign
294	subb		0,pu,pu			;   is negative
295;
296;	finish
297;
298fini	stws		pu,0(arg2)		; save high part of result
299	stws		pl,4(arg2)		; save low part of result
300
301	ldws,mb		-4(sp),pm		; restore registers
302	ldws,mb		-4(sp),pc		; restore registers
303	ldws,mb		-4(sp),sign		; restore registers
304	ldws,mb		-4(sp),brindex		; restore registers
305	ldws,mb		-4(sp),cnt		; restore registers
306	ldws,mb		-4(sp),op1		; restore registers
307	ldws,mb		-4(sp),pl		; restore registers
308	bv		0(rp)			; return
309	ldws,mb		-4(sp),pu		; restore registers
310
311EXIT(impys)
312	.end
313