xref: /openbsd/sys/arch/hppa/spmath/impyu.S (revision 404b540a)
1/*	$OpenBSD: impyu.S,v 1.11 2005/01/23 18:01:30 mickey Exp $	*/
2/*
3  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
4  To anyone who acknowledges that this file is provided "AS IS"
5  without any express or implied warranty:
6      permission to use, copy, modify, and distribute this file
7  for any purpose is hereby granted without fee, provided that
8  the above copyright notice and this notice appears in all
9  copies, and that the name of Hewlett-Packard Company not be
10  used in advertising or publicity pertaining to distribution
11  of the software without specific, written prior permission.
12  Hewlett-Packard Company makes no representations about the
13  suitability of this software for any purpose.
14*/
15/* @(#)impyu.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:31 */
16
17#include <machine/asm.h>
18#define _LOCORE
19#include <machine/frame.h>
20
21;****************************************************************************
22;
23;Implement an integer multiply routine for 32-bit operands and 64-bit product
24; with operand values of zero (multiplicand only) and 2**32reated specially.
25; The algorithm uses the multiplier, four bits at a time, from right to left,
26; to generate partial product.  Execution speed is more important than program
27; size in this implementation.
28;
29;******************************************************************************
30;
31; Definitions - General registers
32;
33gr0	.reg		%r0		; General register zero
34pu	.reg		%r3		; upper part of product
35pl	.reg		%r4		; lower part of product
36op2	.reg		%r4		; multiplier
37op1	.reg		%r5		; multiplicand
38cnt	.reg		%r6		; count in multiply
39brindex	.reg		%r7		; index into the br. table
40saveop2	.reg		%r8		; save op2 if high bit of multiplicand
41					; is set
42pc	.reg		%r9		; carry bit of product, = 00...01
43pm	.reg		%r10		; value of -1 used in shifting
44temp	.reg		%r6
45
46;****************************************************************************
47	.text
48LEAF_ENTRY(u_xmpy)
49	stws,ma		pu,4(sp)		; save registers on stack
50	stws,ma		pl,4(sp)		; save registers on stack
51	stws,ma		op1,4(sp)		; save registers on stack
52	stws,ma		cnt,4(sp)		; save registers on stack
53	stws,ma		brindex,4(sp)		; save registers on stack
54	stws,ma		saveop2,4(sp)		; save registers on stack
55	stws,ma		pc,4(sp)		; save registers on stack
56	stws,ma		pm,4(sp)		; save registers on stack
57;
58;   Start multiply process
59;
60	ldws		0(arg0),op1		; get multiplicand
61	ldws		0(arg1),op2		; get multiplier
62	addib,=		0,op1,fini0		; op1 = 0, product = 0
63	addi		0,gr0,pu		; clear product
64	bb,>=		op1,0,mpy1		; test msb of multiplicand
65	addi		0,gr0,saveop2		; clear saveop2
66;
67; msb of multiplicand is set so will save multiplier for a final
68; addition into the result
69;
70	extru,=		op1,31,31,op1		; clear msb of multiplicand
71	b		mpy1			; if op1 < 2**32, start multiply
72	add		op2,gr0,saveop2		;   save op2 in saveop2
73	shd		gr0,op2,1,pu		; shift op2 left 31 for result
74	b		fini			; go to finish
75	shd		op2,gr0,1,pl
76;
77mpy1	addi		-1,gr0,pm		; initialize pm to 111...1
78	addi		1,gr0,pc		; initialize pc to 00...01
79	movib,tr	8,cnt,mloop		; set count for mpy loop
80	extru		op2,31,4,brindex	; 4 bits as index into table
81;
82	.align		8
83;
84	b		sh4c			; br. if sign overflow
85sh4n	shd		pu,pl,4,pl		; shift product right 4 bits
86	addib,<=	-1,cnt,mulend		; reduce count by 1, exit if
87	extru		pu,27,28,pu		;   <= zero
88;
89mloop	blr		brindex,gr0		; br. into table
90						;   entries of 2 words
91	extru		op2,27,4,brindex	; next 4 bits into index
92;
93;
94;	branch table for the multiplication process with four multiplier bits
95;
96mtable						; two words per entry
97;
98; ----	bits = 0000 ---- shift product 4 bits -------------------------------
99;
100	b		sh4n+4			; just shift partial
101	shd		pu,pl,4,pl		;   product right 4 bits
102;
103;  ----	bits = 0001 ---- add op1, then shift 4 bits
104;
105	addb,tr		op1,pu,sh4n+4		; add op1 to product, to shift
106	shd		pu,pl,4,pl		;   product right 4 bits
107;
108;  ----	bits = 0010 ---- add op1, add op1, then shift 4 bits
109;
110	addb,tr		op1,pu,sh4n		; add 2*op1, to shift
111	addb,uv		op1,pu,sh4c		;   product right 4 bits
112;
113;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
114;
115	addb,tr		op1,pu,sh4n-4		; add op1 & 2*op1, shift
116	sh1add,nuv	op1,pu,pu			;   product right 4 bits
117;
118;  ----	bits = 0100 ---- shift 2, add op1, shift 2
119;
120	b		sh2sa
121	shd		pu,pl,2,pl		; shift product 2 bits
122;
123;  ----	bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
124;
125	addb,tr		op1,pu,sh2us		; add op1 to product
126	shd		pu,pl,2,pl		; shift 2 bits
127;
128;  ----	bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
129;
130	addb,tr		op1,pu,sh2c		; add 2*op1, to shift 2 bits
131	addb,nuv	op1,pu,sh2us		; br. if not overflow
132;
133;  ----	bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
134;
135	b		sh3s
136	sub		pu,op1,pu		; subtract op1, br. to sh3s
137
138;
139;  ----	bits = 1000 ---- shift 3, add op1, shift 1
140;
141	b		sh3sa
142	shd		pu,pl,3,pl		; shift product right 3 bits
143;
144;  ----	bits = 1001 ---- add op1, shift 3, add op1, shift 1
145;
146	addb,tr		op1,pu,sh3us		; add op1, to shift 3, add op1,
147	shd		pu,pl,3,pl		;   and shift 1
148;
149;  ----	bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
150;
151	addb,tr		op1,pu,sh3c		; add 2*op1, to shift 3 bits
152	addb,nuv	op1,pu,sh3us		;   br. if no overflow
153;
154;  ----	bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
155;
156	addib,tr	1,brindex,sh2s		; add 1 to index, subtract op1,
157	sub		pu,op1,pu		;   shift 2 with minus sign
158;
159;  ----	bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
160;
161	addib,tr	1,brindex,sh2sb		; add 1 to index, to shift
162	shd		pu,pl,2,pl		; shift right 2 bits signed
163;
164;  ----	bits = 1101 ---- add op1, shift 2, add -op1, shift 2
165;
166	addb,tr		op1,pu,sh2ns		; add op1, to shift 2
167	shd		pu,pl,2,pl		;   right 2 unsigned, etc.
168;
169;  ----	bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
170;
171	addib,tr	1,brindex,sh1sa		; add 1 to index, to shift
172	shd		pu,pl,1,pl		; shift 1 bit
173;
174;  ----	bits = 1111 ---- add -op1, shift 4 signed
175;
176	addib,tr	1,brindex,sh4s		; add 1 to index, subtract op1,
177	sub		pu,op1,pu		;   to shift 4 signed
178
179;
180;  ----	bits = 10000 ---- shift 4 signed
181;
182	addib,tr	1,brindex,sh4s+4		; add 1 to index
183	shd		pu,pl,4,pl		; shift 4 signed
184;
185;  ---- end of table ---------------------------------------------------------
186;
187sh4s	shd		pu,pl,4,pl
188	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
189	shd		pm,pu,4,pu		; shift 4, minus signed
190	addb,tr		op1,pu,lastadd		; do one more add, then finish
191	addb,=,n	saveop2,gr0,fini	; check saveop2
192;
193sh4c	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
194	shd		pc,pu,4,pu		; shift 4 with overflow
195	b		lastadd			; end of multiply
196	addb,=,n	saveop2,gr0,fini	; check saveop2
197;
198sh3c	shd		pu,pl,3,pl		; shift product 3 bits
199	shd		pc,pu,3,pu		; shift 3 signed
200	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
201	shd		pu,pl,1,pl
202;
203sh3us	extru		pu,28,29,pu		; shift 3 unsigned
204	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
205	shd		pu,pl,1,pl
206;
207sh3sa	extrs		pu,28,29,pu		; shift 3 signed
208	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
209	shd		pu,pl,1,pl
210;
211sh3s	shd		pu,pl,3,pl		; shift 3 minus signed
212	shd		pm,pu,3,pu
213	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
214	shd		pu,pl,1,pl
215;
216sh1	addib,>		-1,cnt,mloop		; loop if count > 0
217	extru		pu,30,31,pu
218	b		lastadd			; end of multiply
219	addb,=,n	saveop2,gr0,fini	; check saveop2
220;
221sh2ns	addib,tr	1,brindex,sh2sb+4	; increment index
222	extru		pu,29,30,pu		; shift unsigned
223;
224sh2s	shd		pu,pl,2,pl		; shift with minus sign
225	shd		pm,pu,2,pu		;
226	sub		pu,op1,pu		; subtract op1
227	shd		pu,pl,2,pl		; shift with minus sign
228	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
229	shd		pm,pu,2,pu		; shift with minus sign
230	addb,tr		op1,pu,lastadd		; do one more add, then finish
231	addb,=,n	saveop2,gr0,fini	; check saveop2
232;
233sh2sb	extrs		pu,29,30,pu		; shift 2 signed
234	sub		pu,op1,pu		; subtract op1 from product
235	shd		pu,pl,2,pl		; shift with minus sign
236	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
237	shd		pm,pu,2,pu		; shift with minus sign
238	addb,tr		op1,pu,lastadd		; do one more add, then finish
239	addb,=,n	saveop2,gr0,fini	; check saveop2
240;
241sh1sa	extrs		pu,30,31,pu		;   signed
242	sub		pu,op1,pu		; subtract op1 from product
243	shd		pu,pl,3,pl		; shift 3 with minus sign
244	addib,>		-1,cnt,mloop		; decrement count, loop if >0
245	shd		pm,pu,3,pu
246	addb,tr		op1,pu,lastadd		; do one more add, then finish
247	addb,=,n	saveop2,gr0,fini	; check saveop2
248;
249fini0	movib,tr	0,pl,fini		; product = 0 as op1 = 0
250	stws		pu,0(arg2)		; save high part of result
251;
252sh2us	extru		pu,29,30,pu		; shift 2 unsigned
253	addb,tr		op1,pu,sh2a		; add op1
254	shd		pu,pl,2,pl		; shift 2 bits
255;
256sh2c	shd		pu,pl,2,pl
257	shd		pc,pu,2,pu		; shift with carry
258	addb,tr		op1,pu,sh2a		; add op1 to product
259	shd		pu,pl,2,pl		; br. to sh2 to shift pu
260;
261sh2sa	extrs		pu,29,30,pu		; shift with sign
262	addb,tr		op1,pu,sh2a		; add op1 to product
263	shd		pu,pl,2,pl		; br. to sh2 to shift pu
264;
265sh2a	addib,>		-1,cnt,mloop		; loop if count > 0
266	extru		pu,29,30,pu
267;
268mulend	addb,=,n	saveop2,gr0,fini	; check saveop2
269lastadd	shd		saveop2,gr0,1,temp	;  if saveop2 <> 0, shift it
270	shd		gr0,saveop2,1,saveop2	;  left 31 and add to result
271	add		pl,temp,pl
272	addc		pu,saveop2,pu
273;
274;	finish
275;
276fini	stws		pu,0(arg2)		; save high part of result
277	stws		pl,4(arg2)		; save low part of result
278
279	ldws,mb		-4(sp),pm		; restore registers
280	ldws,mb		-4(sp),pc		; restore registers
281	ldws,mb		-4(sp),saveop2		; restore registers
282	ldws,mb		-4(sp),brindex		; restore registers
283	ldws,mb		-4(sp),cnt		; restore registers
284	ldws,mb		-4(sp),op1		; restore registers
285	ldws,mb		-4(sp),pl		; restore registers
286	bv		0(rp)			; return
287	ldws,mb		-4(sp),pu		; restore registers
288EXIT(u_xmpy)
289
290	.end
291