xref: /openbsd/sys/arch/hppa/spmath/impys.S (revision d415bd75)
1/*	$OpenBSD: impys.S,v 1.12 2011/04/16 20:52:12 deraadt Exp $	*/
2/*
3  (c) Copyright 1986 HEWLETT-PACKARD COMPANY
4  To anyone who acknowledges that this file is provided "AS IS"
5  without any express or implied warranty:
6      permission to use, copy, modify, and distribute this file
7  for any purpose is hereby granted without fee, provided that
8  the above copyright notice and this notice appears in all
9  copies, and that the name of Hewlett-Packard Company not be
10  used in advertising or publicity pertaining to distribution
11  of the software without specific, written prior permission.
12  Hewlett-Packard Company makes no representations about the
13  suitability of this software for any purpose.
14*/
15/* @(#)impys.s: Revision: 1.11.88.1 Date: 93/12/07 15:06:28 */
16
17#include <machine/asm.h>
18#include <machine/frame.h>
19
20;****************************************************************************
21;
22;Implement an integer multiply routine for 32-bit operands and 64-bit product
23;with operand values of zero (multiplicand only) and -2**31 treated specially.
24;The algorithm uses the absolute value of the multiplier, four bits at a time,
25;from right to left, to generate partial product.  Execution speed is more
26;important than program size in this implementation.
27;
28;****************************************************************************
29;
30; Definitions - General registers
31;
32gr0	.reg		%r0		; General register zero
33pu	.reg		%r3		; upper part of product
34pl	.reg		%r4		; lower part of product
35op2	.reg		%r4		; multiplier
36op1	.reg		%r5		; multiplicand
37cnt	.reg		%r6		; count in multiply
38brindex	.reg		%r7		; index into the br. table
39sign	.reg		%r8		; sign of product
40pc	.reg		%r9		; carry bit of product, = 00...01
41pm	.reg		%r10		; value of -1 used in shifting
42
43;*****************************************************************************
44	.text
45
46LEAF_ENTRY(s_xmpy)
47	stws,ma		pu,4(sp)		; save registers on stack
48	stws,ma		pl,4(sp)		; save registers on stack
49	stws,ma		op1,4(sp)		; save registers on stack
50	stws,ma		cnt,4(sp)		; save registers on stack
51	stws,ma		brindex,4(sp)		; save registers on stack
52	stws,ma		sign,4(sp)		; save registers on stack
53	stws,ma		pc,4(sp)		; save registers on stack
54	stws,ma		pm,4(sp)		; save registers on stack
55;
56;   Start multiply process
57;
58	ldws		0(arg1),op2		; get multiplier
59	ldws		0(arg0),op1		; get multiplicand
60	addi		-1,gr0,pm		; initialize pm to 111...1
61	comb,<		op2,gr0,mpyb		; br. if multiplier < 0
62	xor		op2,op1,sign		; sign(0) = sign of product
63mpy1	comb,<		op1,gr0,mpya		; br. if multiplicand < 0
64	addi		0,gr0,pu		; clear product
65	addib,=		0,op1,fini0		; op1 = 0, product = 0
66mpy2	addi		1,gr0,pc		; initialize pc to 00...01
67	movib,tr	8,cnt,mloop		; set count for mpy loop
68	extru		op2,31,4,brindex	; 4 bits as index into table
69;
70	.align		8
71;
72	b		sh4c			; br. if sign overflow
73sh4n	shd		pu,pl,4,pl		; shift product right 4 bits
74	addib,<=	-1,cnt,mulend		; reduce count by 1, exit if
75	extru		pu,27,28,pu		;   <= zero
76;
77mloop	blr		brindex,gr0		; br. into table
78						;   entries of 2 words
79	extru		op2,27,4,brindex	; next 4 bits into index
80;
81;
82;	branch table for the multiplication process with four multiplier bits
83;
84mtable						; two words per entry
85;
86; ----	bits = 0000 ---- shift product 4 bits -------------------------------
87;
88	b		sh4n+4			; just shift partial
89	shd		pu,pl,4,pl		;   product right 4 bits
90;
91;  ----	bits = 0001 ---- add op1, then shift 4 bits
92;
93	addb,tr		op1,pu,sh4n+4		; add op1 to product, to shift
94	shd		pu,pl,4,pl		;   product right 4 bits
95;
96;  ----	bits = 0010 ---- add op1, add op1, then shift 4 bits
97;
98	addb,tr		op1,pu,sh4n		; add 2*op1, to shift
99	addb,uv		op1,pu,sh4c		;   product right 4 bits
100;
101;  ---- bits = 0011 ---- add op1, add 2*op1, shift 4 bits
102;
103	addb,tr		op1,pu,sh4n-4		; add op1 & 2*op1, shift
104	sh1add,nsv	op1,pu,pu			;   product right 4 bits
105;
106;  ----	bits = 0100 ---- shift 2, add op1, shift 2
107;
108	b		sh2sa
109	shd		pu,pl,2,pl		; shift product 2 bits
110;
111;  ----	bits = 0101 ---- add op1, shift 2, add op1, and shift 2 again
112;
113	addb,tr		op1,pu,sh2us		; add op1 to product
114	shd		pu,pl,2,pl		; shift 2 bits
115;
116;  ----	bits = 0110 ---- add op1, add op1, shift 2, add op1, and shift 2 again
117;
118	addb,tr		op1,pu,sh2c		; add 2*op1, to shift 2 bits
119	addb,nuv	op1,pu,sh2us		; br. if not overflow
120;
121;  ----	bits = 0111 ---- subtract op1, shift 3, add op1, and shift 1
122;
123	b		sh3s
124	sub		pu,op1,pu		; subtract op1, br. to sh3s
125
126;
127;  ----	bits = 1000 ---- shift 3, add op1, shift 1
128;
129	b		sh3sa
130	shd		pu,pl,3,pl		; shift product right 3 bits
131;
132;  ----	bits = 1001 ---- add op1, shift 3, add op1, shift 1
133;
134	addb,tr		op1,pu,sh3us		; add op1, to shift 3, add op1,
135	shd		pu,pl,3,pl		;   and shift 1
136;
137;  ----	bits = 1010 ---- add op1, add op1, shift 3, add op1, shift 1
138;
139	addb,tr		op1,pu,sh3c		; add 2*op1, to shift 3 bits
140	addb,nuv	op1,pu,sh3us		;   br. if no overflow
141;
142;  ----	bits = 1011 ---- add -op1, shift 2, add -op1, shift 2, inc. next index
143;
144	addib,tr	1,brindex,sh2s		; add 1 to index, subtract op1,
145	sub		pu,op1,pu		;   shift 2 with minus sign
146;
147;  ----	bits = 1100 ---- shift 2, subtract op1, shift 2, increment next index
148;
149	addib,tr	1,brindex,sh2sb		; add 1 to index, to shift
150	shd		pu,pl,2,pl		; shift right 2 bits signed
151;
152;  ----	bits = 1101 ---- add op1, shift 2, add -op1, shift 2
153;
154	addb,tr		op1,pu,sh2ns		; add op1, to shift 2
155	shd		pu,pl,2,pl		;   right 2 unsigned, etc.
156;
157;  ----	bits = 1110 ---- shift 1 signed, add -op1, shift 3 signed
158;
159	addib,tr	1,brindex,sh1sa		; add 1 to index, to shift
160	shd		pu,pl,1,pl		; shift 1 bit
161;
162;  ----	bits = 1111 ---- add -op1, shift 4 signed
163;
164	addib,tr	1,brindex,sh4s		; add 1 to index, subtract op1,
165	sub		pu,op1,pu		;   to shift 4 signed
166
167;
168;  ----	bits = 10000 ---- shift 4 signed
169;
170	addib,tr	1,brindex,sh4s+4	; add 1 to index
171	shd		pu,pl,4,pl		; shift 4 signed
172;
173;  ---- end of table ---------------------------------------------------------
174;
175sh4s	shd		pu,pl,4,pl
176	addib,tr	-1,cnt,mloop		; loop (count > 0 always here)
177	shd		pm,pu,4,pu		; shift 4, minus signed
178;
179sh4c	addib,>		-1,cnt,mloop		; decrement count, loop if > 0
180	shd		pc,pu,4,pu		; shift 4 with overflow
181	b		signs			; end of multiply
182	bb,>=,n		sign,0,fini		; test sign of procduct
183;
184mpyb	add,=		op2,op2,gr0		; if <> 0, back to main sect.
185	b		mpy1
186	sub		0,op2,op2		; op2 = |multiplier|
187	add,>=		op1,gr0,gr0		; if op1 < 0, invert sign,
188	xor		pm,sign,sign		;   for correct result
189;
190;	special case for multiplier = -2**31, op1 = signed multiplicand
191;		or multiplicand = -2**31, op1 = signed multiplier
192;
193	shd		op1,0,1,pl		; shift op1 left 31 bits
194mmax	extrs		op1,30,31,pu
195	b		signs			; negate product (if needed)
196	bb,>=,n		sign,0,fini		; test sign of product
197;
198mpya	add,=		op1,op1,gr0		; op1 = -2**31, special case
199	b		mpy2
200	sub		0,op1,op1		; op1 = |multiplicand|
201	add,>=		op2,gr0,gr0		; if op2 < 0, invert sign,
202	xor		pm,sign,sign		;   for correct result
203	movb,tr		op2,op1,mmax		; use op2 as multiplicand
204	shd		op1,0,1,pl		; shift it left 31 bits
205;
206sh3c	shd		pu,pl,3,pl		; shift product 3 bits
207	shd		pc,pu,3,pu		; shift 3 signed
208	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
209	shd		pu,pl,1,pl
210;
211sh3us	extru		pu,28,29,pu		; shift 3 unsigned
212	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
213	shd		pu,pl,1,pl
214;
215sh3sa	extrs		pu,28,29,pu		; shift 3 signed
216	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
217	shd		pu,pl,1,pl
218;
219sh3s	shd		pu,pl,3,pl		; shift 3 minus signed
220	shd		pm,pu,3,pu
221	addb,tr		op1,pu,sh1		; add op1, to shift 1 bit
222	shd		pu,pl,1,pl
223;
224sh1	addib,>		-1,cnt,mloop		; loop if count > 0
225	extru		pu,30,31,pu
226	b		signs			; end of multiply
227	bb,>=,n		sign,0,fini		; test sign of product
228;
229sh2ns	addib,tr	1,brindex,sh2sb+4	; increment index
230	extru		pu,29,30,pu		; shift unsigned
231;
232sh2s	shd		pu,pl,2,pl		; shift with minus sign
233	shd		pm,pu,2,pu		;
234	sub		pu,op1,pu		; subtract op1
235	shd		pu,pl,2,pl		; shift with minus sign
236	addib,tr	-1,cnt,mloop		; decrement count, loop
237	shd		pm,pu,2,pu		; shift with minus sign
238						; count never reaches 0 here
239;
240sh2sb	extrs		pu,29,30,pu		; shift 2 signed
241	sub		pu,op1,pu		; subtract op1 from product
242	shd		pu,pl,2,pl		; shift with minus sign
243	addib,tr	-1,cnt,mloop		; decrement count, loop
244	shd		pm,pu,2,pu		; shift with minus sign
245						; count never reaches 0 here
246;
247sh1sa	extrs		pu,30,31,pu		;   signed
248	sub		pu,op1,pu		; subtract op1 from product
249	shd		pu,pl,3,pl		; shift 3 with minus sign
250	addib,tr	-1,cnt,mloop		; dec. count, to loop
251	shd		pm,pu,3,pu		; count never reaches 0 here
252;
253fini0	movib,tr,n	0,pl,fini		; product = 0 as op1 = 0
254;
255sh2us	extru		pu,29,30,pu		; shift 2 unsigned
256	addb,tr		op1,pu,sh2a		; add op1
257	shd		pu,pl,2,pl		; shift 2 bits
258;
259sh2c	shd		pu,pl,2,pl
260	shd		pc,pu,2,pu		; shift with carry
261	addb,tr		op1,pu,sh2a		; add op1 to product
262	shd		pu,pl,2,pl		; br. to sh2 to shift pu
263;
264sh2sa	extrs		pu,29,30,pu		; shift with sign
265	addb,tr		op1,pu,sh2a		; add op1 to product
266	shd		pu,pl,2,pl		; br. to sh2 to shift pu
267;
268sh2a	addib,>		-1,cnt,mloop		; loop if count > 0
269	extru		pu,29,30,pu
270;
271mulend	bb,>=,n		sign,0,fini		; test sign of product
272signs	sub		0,pl,pl			; negate product if sign
273	subb		0,pu,pu			;   is negative
274;
275;	finish
276;
277fini	stws		pu,0(arg2)		; save high part of result
278	stws		pl,4(arg2)		; save low part of result
279
280	ldws,mb		-4(sp),pm		; restore registers
281	ldws,mb		-4(sp),pc		; restore registers
282	ldws,mb		-4(sp),sign		; restore registers
283	ldws,mb		-4(sp),brindex		; restore registers
284	ldws,mb		-4(sp),cnt		; restore registers
285	ldws,mb		-4(sp),op1		; restore registers
286	ldws,mb		-4(sp),pl		; restore registers
287	bv		0(rp)			; return
288	ldws,mb		-4(sp),pu		; restore registers
289EXIT(s_xmpy)
290
291	.end
292