xref: /netbsd/sys/arch/hppa/spmath/impyu.S (revision c4a72b64)
1/*	$NetBSD: impyu.S,v 1.1 2002/06/05 01:04:25 fredette Exp $	*/
2
3/*	$OpenBSD: impyu.S,v 1.5 2001/03/29 03:58:18 mickey Exp $	*/
4
5/*
6 * Copyright 1996 1995 by Open Software Foundation, Inc.
7 *              All Rights Reserved
8 *
9 * Permission to use, copy, modify, and distribute this software and
10 * its documentation for any purpose and without fee is hereby granted,
11 * provided that the above copyright notice appears in all copies and
12 * that both the copyright notice and this permission notice appear in
13 * supporting documentation.
14 *
15 * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
16 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
17 * FOR A PARTICULAR PURPOSE.
18 *
19 * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
20 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
21 * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
22 * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
23 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 *
25 */
26/*
27 * pmk1.1
28 */
29/*
30 * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
31 *
32 * To anyone who acknowledges that this file is provided "AS IS"
33 * without any express or implied warranty:
34 *     permission to use, copy, modify, and distribute this file
35 * for any purpose is hereby granted without fee, provided that
36 * the above copyright notice and this notice appears in all
37 * copies, and that the name of Hewlett-Packard Company not be
38 * used in advertising or publicity pertaining to distribution
39 * of the software without specific, written prior permission.
40 * Hewlett-Packard Company makes no representations about the
41 * suitability of this software for any purpose.
42 */
43
44#include <machine/asm.h>
45
46/****************************************************************************
47 *
48 *Implement an integer multiply routine for 32-bit operands and 64-bit product
49 * with operand values of zero (multiplicand only) and 2**32reated specially.
50 * The algorithm uses the multiplier, four bits at a time, from right to left,
51 * to generate partial product.  Execution speed is more important than program
52 * size in this implementation.
53 *
54 *****************************************************************************/
55;
56; Definitions - General registers
57;
58gr0	.equ		0		; General register zero
59pu	.equ		3		; upper part of product
60pl	.equ		4		; lower part of product
61op2	.equ		4		; multiplier
62op1	.equ		5		; multiplicand
63cnt	.equ		6		; count in multiply
64brindex	.equ		7		; index into the br. table
65saveop2	.equ		8		; save op2 if high bit of multiplicand
66					; is set
67pc	.equ		9		; carry bit of product, = 00...01
68pm	.equ	       10		; value of -1 used in shifting
69temp	.equ		6
70
71;****************************************************************************
72	.export impyu,entry
73	.text
74	.align 4
75	.proc
76	.callinfo
77;
78;****************************************************************************
79impyu	stws,ma		pu,4(sp)		; save registers on stack
80	stws,ma		pl,4(sp)		; save registers on stack
81	stws,ma		op1,4(sp)		; save registers on stack
82	stws,ma		cnt,4(sp)		; save registers on stack
83	stws,ma		brindex,4(sp)		; save registers on stack
84	stws,ma		saveop2,4(sp)		; save registers on stack
85	stws,ma		pc,4(sp)		; save registers on stack
86	stws,ma		pm,4(sp)		; save registers on stack
87;
88;   Start multiply process
89;
90	ldws		0(arg0),op1		; get multiplicand
91	ldws		0(arg1),op2		; get multiplier
92	addib,=		0,op1,fini0		; op1 = 0, product = 0
93	addi		0,gr0,pu		; clear product
94	bb,>=		op1,0,mpy1		; test msb of multiplicand
95	addi		0,gr0,saveop2		; clear saveop2
96;
97; msb of multiplicand is set so will save multiplier for a final
98; addition into the result
99;
100	extru,=		op1,31,31,op1		; clear msb of multiplicand
101	b		mpy1			; if op1 < 2**32, start multiply
102	add		op2,gr0,saveop2		;   save op2 in saveop2
103	shd		gr0,op2,1,pu		; shift op2 left 31 for result
104	b		fini			; go to finish
105	shd		op2,gr0,1,pl
106;
107mpy1	addi		-1,gr0,pm		; initialize pm to 111...1
108	addi		1,gr0,pc		; initialize pc to 00...01
109	movib,tr	8,cnt,mloop		; set count for mpy loop
110	extru		op2,31,4,brindex	; 4 bits as index into table
111;
112	.align		8
113;
114	b		sh4c			; br. if sign overflow
115sh4n	shd		pu,pl,4,pl		; shift product right 4 bits
116	addib,<=	-1,cnt,mulend		; reduce count by 1, exit if
117	extru		pu,27,28,pu		;   <= zero
118;
119mloop	blr		brindex,gr0		; br. into table
120						;   entries of 2 words
121	extru		op2,27,4,brindex	; next 4 bits into index
122;
123;
124;	branch table for the multiplication process with four multiplier bits
125;
126mtable						; two words per entry
127;
128; ----	bits = 0000 ---- shift product 4 bits -------------------------------
129;
130	b		sh4n+4			; just shift partial
131	shd		pu,pl,4,pl		;   product right 4 bits
132;
133;  ----	bits = 0001 ---- add op1, then shift 4 bits
134;
135	addb,tr		op1,pu,sh4n+4		; add op1 to product, to shift
136	shd		pu,pl,4,pl		;   product right 4 bits
137;
138;  ----	bits = 0010 ---- add op1, add op1, then shift 4 bits
139;
140	addb,tr		op1,pu,sh4n		; add 2*op1, to shift
141	addb,uv		op1,pu,sh4c		;   product right 4 bits
142;
143;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
144;
145	addb,tr		op1,pu,sh4n-4		; add op1 & 2*op1, shift
146	sh1add,nuv	op1,pu,pu			;   product right 4 bits
147;
148;  ----	bits = 0100 ---- shift 2, add op1, shift 2
149;
150	b		sh2sa
151	shd		pu,pl,2,pl		; shift product 2 bits
152;
153;  ----	bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
154;
155	addb,tr		op1,pu,sh2us		; add op1 to product
156	shd		pu,pl,2,pl		; shift 2 bits
157;
158;  ----	bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
159;
160	addb,tr		op1,pu,sh2c		; add 2*op1, to shift 2 bits
161	addb,nuv	op1,pu,sh2us		; br. if not overflow
162;
163;  ----	bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
164;
165	b		sh3s
166	sub		pu,op1,pu		; subtract op1, br. to sh3s
167
168;
169;  ----	bits = 1000 ---- shift 3, add op1, shift 1
170;
171	b		sh3sa
172	shd		pu,pl,3,pl		; shift product right 3 bits
173;
174;  ----	bits = 1001 ---- add op1, shift 3, add op1, shift 1
175;
176	addb,tr		op1,pu,sh3us		; add op1, to shift 3, add op1,
177	shd		pu,pl,3,pl		;   and shift 1
178;
179;  ----	bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
180;
181	addb,tr		op1,pu,sh3c		; add 2*op1, to shift 3 bits
182	addb,nuv	op1,pu,sh3us		;   br. if no overflow
183;
184;  ----	bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
185;
186	addib,tr	1,brindex,sh2s		; add 1 to index, subtract op1,
187	sub		pu,op1,pu		;   shift 2 with minus sign
188;
189;  ----	bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
190;
191	addib,tr	1,brindex,sh2sb		; add 1 to index, to shift
192	shd		pu,pl,2,pl		; shift right 2 bits signed
193;
194;  ----	bits = 1101 ---- add op1, shift 2, add -op1, shift 2
195;
196	addb,tr		op1,pu,sh2ns		; add op1, to shift 2
197	shd		pu,pl,2,pl		;   right 2 unsigned, etc.
198;
199;  ----	bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
200;
201	addib,tr	1,brindex,sh1sa		; add 1 to index, to shift
202	shd		pu,pl,1,pl		; shift 1 bit
203;
204;  ----	bits = 1111 ---- add -op1, shift 4 signed
205;
206	addib,tr	1,brindex,sh4s		; add 1 to index, subtract op1,
207	sub		pu,op1,pu		;   to shift 4 signed
208
209;
210;  ----	bits = 10000 ---- shift 4 signed
211;
212	addib,tr	1,brindex,sh4s+4		; add 1 to index
213	shd		pu,pl,4,pl		; shift 4 signed
214;
215;  ---- end of table ---------------------------------------------------------
216;
217sh4s	shd		pu,pl,4,pl
218	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
219	shd		pm,pu,4,pu		; shift 4, minus signed
220	addb,tr		op1,pu,lastadd		; do one more add, then finish
221	addb,=,n	saveop2,gr0,fini	; check saveop2
222;
223sh4c	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
224	shd		pc,pu,4,pu		; shift 4 with overflow
225	b		lastadd			; end of multiply
226	addb,=,n	saveop2,gr0,fini	; check saveop2
227;
228sh3c	shd		pu,pl,3,pl		; shift product 3 bits
229	shd		pc,pu,3,pu		; shift 3 signed
230	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
231	shd		pu,pl,1,pl
232;
233sh3us	extru		pu,28,29,pu		; shift 3 unsigned
234	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
235	shd		pu,pl,1,pl
236;
237sh3sa	extrs		pu,28,29,pu		; shift 3 signed
238	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
239	shd		pu,pl,1,pl
240;
241sh3s	shd		pu,pl,3,pl		; shift 3 minus signed
242	shd		pm,pu,3,pu
243	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
244	shd		pu,pl,1,pl
245;
246sh1	addib,>		-1,cnt,mloop		; loop if count > 0
247	extru		pu,30,31,pu
248	b		lastadd			; end of multiply
249	addb,=,n	saveop2,gr0,fini	; check saveop2
250;
251sh2ns	addib,tr	1,brindex,sh2sb+4	; increment index
252	extru		pu,29,30,pu		; shift unsigned
253;
254sh2s	shd		pu,pl,2,pl		; shift with minus sign
255	shd		pm,pu,2,pu		;
256	sub		pu,op1,pu		; subtract op1
257	shd		pu,pl,2,pl		; shift with minus sign
258	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
259	shd		pm,pu,2,pu		; shift with minus sign
260	addb,tr		op1,pu,lastadd		; do one more add, then finish
261	addb,=,n	saveop2,gr0,fini	; check saveop2
262;
263sh2sb	extrs		pu,29,30,pu		; shift 2 signed
264	sub		pu,op1,pu		; subtract op1 from product
265	shd		pu,pl,2,pl		; shift with minus sign
266	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
267	shd		pm,pu,2,pu		; shift with minus sign
268	addb,tr		op1,pu,lastadd		; do one more add, then finish
269	addb,=,n	saveop2,gr0,fini	; check saveop2
270;
271sh1sa	extrs		pu,30,31,pu		;   signed
272	sub		pu,op1,pu		; subtract op1 from product
273	shd		pu,pl,3,pl		; shift 3 with minus sign
274	addib,>		-1,cnt,mloop		; decrement count, loop if >0
275	shd		pm,pu,3,pu
276	addb,tr		op1,pu,lastadd		; do one more add, then finish
277	addb,=,n	saveop2,gr0,fini	; check saveop2
278;
279fini0	movib,tr	0,pl,fini		; product = 0 as op1 = 0
280	stws		pu,0(arg2)		; save high part of result
281;
282sh2us	extru		pu,29,30,pu		; shift 2 unsigned
283	addb,tr		op1,pu,sh2a		; add op1
284	shd		pu,pl,2,pl		; shift 2 bits
285;
286sh2c	shd		pu,pl,2,pl
287	shd		pc,pu,2,pu		; shift with carry
288	addb,tr		op1,pu,sh2a		; add op1 to product
289	shd		pu,pl,2,pl		; br. to sh2 to shift pu
290;
291sh2sa	extrs		pu,29,30,pu		; shift with sign
292	addb,tr		op1,pu,sh2a		; add op1 to product
293	shd		pu,pl,2,pl		; br. to sh2 to shift pu
294;
295sh2a	addib,>		-1,cnt,mloop		; loop if count > 0
296	extru		pu,29,30,pu
297;
298mulend	addb,=,n	saveop2,gr0,fini	; check saveop2
299lastadd	shd		saveop2,gr0,1,temp	;  if saveop2 <> 0, shift it
300	shd		gr0,saveop2,1,saveop2	;  left 31 and add to result
301	add		pl,temp,pl
302	addc		pu,saveop2,pu
303;
304;	finish
305;
306fini	stws		pu,0(arg2)		; save high part of result
307	stws		pl,4(arg2)		; save low part of result
308
309	ldws,mb		-4(sp),pm		; restore registers
310	ldws,mb		-4(sp),pc		; restore registers
311	ldws,mb		-4(sp),saveop2		; restore registers
312	ldws,mb		-4(sp),brindex		; restore registers
313	ldws,mb		-4(sp),cnt		; restore registers
314	ldws,mb		-4(sp),op1		; restore registers
315	ldws,mb		-4(sp),pl		; restore registers
316	bv		0(rp)			; return
317	ldws,mb		-4(sp),pu		; restore registers
318
319	.procend
320	.end
321