xref: /openbsd/sys/arch/hppa/spmath/impyu.S (revision 09467b48)
1/*	$OpenBSD: impyu.S,v 1.12 2011/04/16 20:52:12 deraadt Exp $	*/
2/*
3  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
4  To anyone who acknowledges that this file is provided "AS IS"
5  without any express or implied warranty:
6      permission to use, copy, modify, and distribute this file
7  for any purpose is hereby granted without fee, provided that
8  the above copyright notice and this notice appears in all
9  copies, and that the name of Hewlett-Packard Company not be
10  used in advertising or publicity pertaining to distribution
11  of the software without specific, written prior permission.
12  Hewlett-Packard Company makes no representations about the
13  suitability of this software for any purpose.
14*/
15/* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */
16
17#include <machine/asm.h>
18#include <machine/frame.h>
19
20;****************************************************************************
21;
22;Implement an integer multiply routine for 32-bit operands and 64-bit product
23; with operand values of zero (multiplicand only) and 2**32reated specially.
24; The algorithm uses the multiplier, four bits at a time, from right to left,
25; to generate partial product.  Execution speed is more important than program
26; size in this implementation.
27;
28;******************************************************************************
29;
30; Definitions - General registers
31;
32gr0	.reg		%r0		; General register zero
33pu	.reg		%r3		; upper part of product
34pl	.reg		%r4		; lower part of product
35op2	.reg		%r4		; multiplier
36op1	.reg		%r5		; multiplicand
37cnt	.reg		%r6		; count in multiply
38brindex	.reg		%r7		; index into the br. table
39saveop2	.reg		%r8		; save op2 if high bit of multiplicand
40					; is set
41pc	.reg		%r9		; carry bit of product, = 00...01
42pm	.reg		%r10		; value of -1 used in shifting
43temp	.reg		%r6
44
45;****************************************************************************
46	.text
47LEAF_ENTRY(u_xmpy)
48	stws,ma		pu,4(sp)		; save registers on stack
49	stws,ma		pl,4(sp)		; save registers on stack
50	stws,ma		op1,4(sp)		; save registers on stack
51	stws,ma		cnt,4(sp)		; save registers on stack
52	stws,ma		brindex,4(sp)		; save registers on stack
53	stws,ma		saveop2,4(sp)		; save registers on stack
54	stws,ma		pc,4(sp)		; save registers on stack
55	stws,ma		pm,4(sp)		; save registers on stack
56;
57;   Start multiply process
58;
59	ldws		0(arg0),op1		; get multiplicand
60	ldws		0(arg1),op2		; get multiplier
61	addib,=		0,op1,fini0		; op1 = 0, product = 0
62	addi		0,gr0,pu		; clear product
63	bb,>=		op1,0,mpy1		; test msb of multiplicand
64	addi		0,gr0,saveop2		; clear saveop2
65;
66; msb of multiplicand is set so will save multiplier for a final
67; addition into the result
68;
69	extru,=		op1,31,31,op1		; clear msb of multiplicand
70	b		mpy1			; if op1 < 2**32, start multiply
71	add		op2,gr0,saveop2		;   save op2 in saveop2
72	shd		gr0,op2,1,pu		; shift op2 left 31 for result
73	b		fini			; go to finish
74	shd		op2,gr0,1,pl
75;
76mpy1	addi		-1,gr0,pm		; initialize pm to 111...1
77	addi		1,gr0,pc		; initialize pc to 00...01
78	movib,tr	8,cnt,mloop		; set count for mpy loop
79	extru		op2,31,4,brindex	; 4 bits as index into table
80;
81	.align		8
82;
83	b		sh4c			; br. if sign overflow
84sh4n	shd		pu,pl,4,pl		; shift product right 4 bits
85	addib,<=	-1,cnt,mulend		; reduce count by 1, exit if
86	extru		pu,27,28,pu		;   <= zero
87;
88mloop	blr		brindex,gr0		; br. into table
89						;   entries of 2 words
90	extru		op2,27,4,brindex	; next 4 bits into index
91;
92;
93;	branch table for the multiplication process with four multiplier bits
94;
95mtable						; two words per entry
96;
97; ----	bits = 0000 ---- shift product 4 bits -------------------------------
98;
99	b		sh4n+4			; just shift partial
100	shd		pu,pl,4,pl		;   product right 4 bits
101;
102;  ----	bits = 0001 ---- add op1, then shift 4 bits
103;
104	addb,tr		op1,pu,sh4n+4		; add op1 to product, to shift
105	shd		pu,pl,4,pl		;   product right 4 bits
106;
107;  ----	bits = 0010 ---- add op1, add op1, then shift 4 bits
108;
109	addb,tr		op1,pu,sh4n		; add 2*op1, to shift
110	addb,uv		op1,pu,sh4c		;   product right 4 bits
111;
112;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
113;
114	addb,tr		op1,pu,sh4n-4		; add op1 & 2*op1, shift
115	sh1add,nuv	op1,pu,pu			;   product right 4 bits
116;
117;  ----	bits = 0100 ---- shift 2, add op1, shift 2
118;
119	b		sh2sa
120	shd		pu,pl,2,pl		; shift product 2 bits
121;
122;  ----	bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
123;
124	addb,tr		op1,pu,sh2us		; add op1 to product
125	shd		pu,pl,2,pl		; shift 2 bits
126;
127;  ----	bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
128;
129	addb,tr		op1,pu,sh2c		; add 2*op1, to shift 2 bits
130	addb,nuv	op1,pu,sh2us		; br. if not overflow
131;
132;  ----	bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
133;
134	b		sh3s
135	sub		pu,op1,pu		; subtract op1, br. to sh3s
136
137;
138;  ----	bits = 1000 ---- shift 3, add op1, shift 1
139;
140	b		sh3sa
141	shd		pu,pl,3,pl		; shift product right 3 bits
142;
143;  ----	bits = 1001 ---- add op1, shift 3, add op1, shift 1
144;
145	addb,tr		op1,pu,sh3us		; add op1, to shift 3, add op1,
146	shd		pu,pl,3,pl		;   and shift 1
147;
148;  ----	bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
149;
150	addb,tr		op1,pu,sh3c		; add 2*op1, to shift 3 bits
151	addb,nuv	op1,pu,sh3us		;   br. if no overflow
152;
153;  ----	bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
154;
155	addib,tr	1,brindex,sh2s		; add 1 to index, subtract op1,
156	sub		pu,op1,pu		;   shift 2 with minus sign
157;
158;  ----	bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
159;
160	addib,tr	1,brindex,sh2sb		; add 1 to index, to shift
161	shd		pu,pl,2,pl		; shift right 2 bits signed
162;
163;  ----	bits = 1101 ---- add op1, shift 2, add -op1, shift 2
164;
165	addb,tr		op1,pu,sh2ns		; add op1, to shift 2
166	shd		pu,pl,2,pl		;   right 2 unsigned, etc.
167;
168;  ----	bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
169;
170	addib,tr	1,brindex,sh1sa		; add 1 to index, to shift
171	shd		pu,pl,1,pl		; shift 1 bit
172;
173;  ----	bits = 1111 ---- add -op1, shift 4 signed
174;
175	addib,tr	1,brindex,sh4s		; add 1 to index, subtract op1,
176	sub		pu,op1,pu		;   to shift 4 signed
177
178;
179;  ----	bits = 10000 ---- shift 4 signed
180;
181	addib,tr	1,brindex,sh4s+4		; add 1 to index
182	shd		pu,pl,4,pl		; shift 4 signed
183;
184;  ---- end of table ---------------------------------------------------------
185;
186sh4s	shd		pu,pl,4,pl
187	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
188	shd		pm,pu,4,pu		; shift 4, minus signed
189	addb,tr		op1,pu,lastadd		; do one more add, then finish
190	addb,=,n	saveop2,gr0,fini	; check saveop2
191;
192sh4c	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
193	shd		pc,pu,4,pu		; shift 4 with overflow
194	b		lastadd			; end of multiply
195	addb,=,n	saveop2,gr0,fini	; check saveop2
196;
197sh3c	shd		pu,pl,3,pl		; shift product 3 bits
198	shd		pc,pu,3,pu		; shift 3 signed
199	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
200	shd		pu,pl,1,pl
201;
202sh3us	extru		pu,28,29,pu		; shift 3 unsigned
203	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
204	shd		pu,pl,1,pl
205;
206sh3sa	extrs		pu,28,29,pu		; shift 3 signed
207	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
208	shd		pu,pl,1,pl
209;
210sh3s	shd		pu,pl,3,pl		; shift 3 minus signed
211	shd		pm,pu,3,pu
212	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
213	shd		pu,pl,1,pl
214;
215sh1	addib,>		-1,cnt,mloop		; loop if count > 0
216	extru		pu,30,31,pu
217	b		lastadd			; end of multiply
218	addb,=,n	saveop2,gr0,fini	; check saveop2
219;
220sh2ns	addib,tr	1,brindex,sh2sb+4	; increment index
221	extru		pu,29,30,pu		; shift unsigned
222;
223sh2s	shd		pu,pl,2,pl		; shift with minus sign
224	shd		pm,pu,2,pu		;
225	sub		pu,op1,pu		; subtract op1
226	shd		pu,pl,2,pl		; shift with minus sign
227	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
228	shd		pm,pu,2,pu		; shift with minus sign
229	addb,tr		op1,pu,lastadd		; do one more add, then finish
230	addb,=,n	saveop2,gr0,fini	; check saveop2
231;
232sh2sb	extrs		pu,29,30,pu		; shift 2 signed
233	sub		pu,op1,pu		; subtract op1 from product
234	shd		pu,pl,2,pl		; shift with minus sign
235	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
236	shd		pm,pu,2,pu		; shift with minus sign
237	addb,tr		op1,pu,lastadd		; do one more add, then finish
238	addb,=,n	saveop2,gr0,fini	; check saveop2
239;
240sh1sa	extrs		pu,30,31,pu		;   signed
241	sub		pu,op1,pu		; subtract op1 from product
242	shd		pu,pl,3,pl		; shift 3 with minus sign
243	addib,>		-1,cnt,mloop		; decrement count, loop if >0
244	shd		pm,pu,3,pu
245	addb,tr		op1,pu,lastadd		; do one more add, then finish
246	addb,=,n	saveop2,gr0,fini	; check saveop2
247;
248fini0	movib,tr	0,pl,fini		; product = 0 as op1 = 0
249	stws		pu,0(arg2)		; save high part of result
250;
251sh2us	extru		pu,29,30,pu		; shift 2 unsigned
252	addb,tr		op1,pu,sh2a		; add op1
253	shd		pu,pl,2,pl		; shift 2 bits
254;
255sh2c	shd		pu,pl,2,pl
256	shd		pc,pu,2,pu		; shift with carry
257	addb,tr		op1,pu,sh2a		; add op1 to product
258	shd		pu,pl,2,pl		; br. to sh2 to shift pu
259;
260sh2sa	extrs		pu,29,30,pu		; shift with sign
261	addb,tr		op1,pu,sh2a		; add op1 to product
262	shd		pu,pl,2,pl		; br. to sh2 to shift pu
263;
264sh2a	addib,>		-1,cnt,mloop		; loop if count > 0
265	extru		pu,29,30,pu
266;
267mulend	addb,=,n	saveop2,gr0,fini	; check saveop2
268lastadd	shd		saveop2,gr0,1,temp	;  if saveop2 <> 0, shift it
269	shd		gr0,saveop2,1,saveop2	;  left 31 and add to result
270	add		pl,temp,pl
271	addc		pu,saveop2,pu
272;
273;	finish
274;
275fini	stws		pu,0(arg2)		; save high part of result
276	stws		pl,4(arg2)		; save low part of result
277
278	ldws,mb		-4(sp),pm		; restore registers
279	ldws,mb		-4(sp),pc		; restore registers
280	ldws,mb		-4(sp),saveop2		; restore registers
281	ldws,mb		-4(sp),brindex		; restore registers
282	ldws,mb		-4(sp),cnt		; restore registers
283	ldws,mb		-4(sp),op1		; restore registers
284	ldws,mb		-4(sp),pl		; restore registers
285	bv		0(rp)			; return
286	ldws,mb		-4(sp),pu		; restore registers
287EXIT(u_xmpy)
288
289	.end
290