xref: /netbsd/external/lgpl3/gmp/dist/mpn/alpha/rshift.asm (revision f81b1c5b)
1dnl  Alpha mpn_rshift -- Shift a number right.
2
3dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     ?
35C EV5:     3.25
36C EV6:     1.75
37
38C  INPUT PARAMETERS
39C  rp	r16
40C  up	r17
41C  n	r18
42C  cnt	r19
43
44
45ASM_START()
46PROLOGUE(mpn_rshift)
47	ldq	r4,0(r17)	C load first limb
48	subq	r31,r19,r20
49	subq	r18,1,r18
50	and	r18,4-1,r28	C number of limbs in first loop
51	sll	r4,r20,r0	C compute function result
52
53	beq	r28,L(L0)
54	subq	r18,r28,r18
55
56	ALIGN(8)
57L(top0):
58	ldq	r3,8(r17)
59	addq	r16,8,r16
60	srl	r4,r19,r5
61	addq	r17,8,r17
62	subq	r28,1,r28
63	sll	r3,r20,r6
64	bis	r3,r3,r4
65	bis	r5,r6,r8
66	stq	r8,-8(r16)
67	bne	r28,L(top0)
68
69L(L0):	srl	r4,r19,r24
70	beq	r18,L(end)
71C warm up phase 1
72	ldq	r1,8(r17)
73	subq	r18,4,r18
74	ldq	r2,16(r17)
75	ldq	r3,24(r17)
76	ldq	r4,32(r17)
77C warm up phase 2
78	sll	r1,r20,r7
79	srl	r1,r19,r21
80	sll	r2,r20,r8
81	beq	r18,L(end1)
82	ldq	r1,40(r17)
83	srl	r2,r19,r22
84	ldq	r2,48(r17)
85	sll	r3,r20,r5
86	bis	r7,r24,r7
87	srl	r3,r19,r23
88	bis	r8,r21,r8
89	sll	r4,r20,r6
90	ldq	r3,56(r17)
91	srl	r4,r19,r24
92	ldq	r4,64(r17)
93	subq	r18,4,r18
94	beq	r18,L(end2)
95	ALIGN(16)
96C main loop
97L(top):	stq	r7,0(r16)
98	bis	r5,r22,r5
99	stq	r8,8(r16)
100	bis	r6,r23,r6
101
102	sll	r1,r20,r7
103	subq	r18,4,r18
104	srl	r1,r19,r21
105	unop	C ldq	r31,-96(r17)
106
107	sll	r2,r20,r8
108	ldq	r1,72(r17)
109	srl	r2,r19,r22
110	ldq	r2,80(r17)
111
112	stq	r5,16(r16)
113	bis	r7,r24,r7
114	stq	r6,24(r16)
115	bis	r8,r21,r8
116
117	sll	r3,r20,r5
118	unop	C ldq	r31,-96(r17)
119	srl	r3,r19,r23
120	addq	r16,32,r16
121
122	sll	r4,r20,r6
123	ldq	r3,88(r17)
124	srl	r4,r19,r24
125	ldq	r4,96(r17)
126
127	addq	r17,32,r17
128	bne	r18,L(top)
129C cool down phase 2/1
130L(end2):
131	stq	r7,0(r16)
132	bis	r5,r22,r5
133	stq	r8,8(r16)
134	bis	r6,r23,r6
135	sll	r1,r20,r7
136	srl	r1,r19,r21
137	sll	r2,r20,r8
138	srl	r2,r19,r22
139	stq	r5,16(r16)
140	bis	r7,r24,r7
141	stq	r6,24(r16)
142	bis	r8,r21,r8
143	sll	r3,r20,r5
144	srl	r3,r19,r23
145	sll	r4,r20,r6
146	srl	r4,r19,r24
147C cool down phase 2/2
148	stq	r7,32(r16)
149	bis	r5,r22,r5
150	stq	r8,40(r16)
151	bis	r6,r23,r6
152	stq	r5,48(r16)
153	stq	r6,56(r16)
154C cool down phase 2/3
155	stq	r24,64(r16)
156	ret	r31,(r26),1
157
158C cool down phase 1/1
159L(end1):
160	srl	r2,r19,r22
161	sll	r3,r20,r5
162	bis	r7,r24,r7
163	srl	r3,r19,r23
164	bis	r8,r21,r8
165	sll	r4,r20,r6
166	srl	r4,r19,r24
167C cool down phase 1/2
168	stq	r7,0(r16)
169	bis	r5,r22,r5
170	stq	r8,8(r16)
171	bis	r6,r23,r6
172	stq	r5,16(r16)
173	stq	r6,24(r16)
174	stq	r24,32(r16)
175	ret	r31,(r26),1
176
177L(end):	stq	r24,0(r16)
178	ret	r31,(r26),1
179EPILOGUE(mpn_rshift)
180ASM_END()
181