xref: /netbsd/external/lgpl3/gmp/dist/mpn/alpha/lshift.asm (revision f81b1c5b)
1dnl  Alpha mpn_lshift -- Shift a number left.
2
3dnl  Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     ?
35C EV5:     3.25
36C EV6:     1.75
37
38C  INPUT PARAMETERS
39C  rp	r16
40C  up	r17
41C  n	r18
42C  cnt	r19
43
44
45ASM_START()
46PROLOGUE(mpn_lshift)
47	s8addq	r18,r17,r17	C make r17 point at end of s1
48	ldq	r4,-8(r17)	C load first limb
49	subq	r31,r19,r20
50	s8addq	r18,r16,r16	C make r16 point at end of RES
51	subq	r18,1,r18
52	and	r18,4-1,r28	C number of limbs in first loop
53	srl	r4,r20,r0	C compute function result
54
55	beq	r28,L(L0)
56	subq	r18,r28,r18
57
58	ALIGN(8)
59L(top0):
60	ldq	r3,-16(r17)
61	subq	r16,8,r16
62	sll	r4,r19,r5
63	subq	r17,8,r17
64	subq	r28,1,r28
65	srl	r3,r20,r6
66	bis	r3,r3,r4
67	bis	r5,r6,r8
68	stq	r8,0(r16)
69	bne	r28,L(top0)
70
71L(L0):	sll	r4,r19,r24
72	beq	r18,L(end)
73C warm up phase 1
74	ldq	r1,-16(r17)
75	subq	r18,4,r18
76	ldq	r2,-24(r17)
77	ldq	r3,-32(r17)
78	ldq	r4,-40(r17)
79C warm up phase 2
80	srl	r1,r20,r7
81	sll	r1,r19,r21
82	srl	r2,r20,r8
83	beq	r18,L(end1)
84	ldq	r1,-48(r17)
85	sll	r2,r19,r22
86	ldq	r2,-56(r17)
87	srl	r3,r20,r5
88	bis	r7,r24,r7
89	sll	r3,r19,r23
90	bis	r8,r21,r8
91	srl	r4,r20,r6
92	ldq	r3,-64(r17)
93	sll	r4,r19,r24
94	ldq	r4,-72(r17)
95	subq	r18,4,r18
96	beq	r18,L(end2)
97	ALIGN(16)
98C main loop
99L(top):	stq	r7,-8(r16)
100	bis	r5,r22,r5
101	stq	r8,-16(r16)
102	bis	r6,r23,r6
103
104	srl	r1,r20,r7
105	subq	r18,4,r18
106	sll	r1,r19,r21
107	unop	C ldq	r31,-96(r17)
108
109	srl	r2,r20,r8
110	ldq	r1,-80(r17)
111	sll	r2,r19,r22
112	ldq	r2,-88(r17)
113
114	stq	r5,-24(r16)
115	bis	r7,r24,r7
116	stq	r6,-32(r16)
117	bis	r8,r21,r8
118
119	srl	r3,r20,r5
120	unop	C ldq	r31,-96(r17)
121	sll	r3,r19,r23
122	subq	r16,32,r16
123
124	srl	r4,r20,r6
125	ldq	r3,-96(r17)
126	sll	r4,r19,r24
127	ldq	r4,-104(r17)
128
129	subq	r17,32,r17
130	bne	r18,L(top)
131C cool down phase 2/1
132L(end2):
133	stq	r7,-8(r16)
134	bis	r5,r22,r5
135	stq	r8,-16(r16)
136	bis	r6,r23,r6
137	srl	r1,r20,r7
138	sll	r1,r19,r21
139	srl	r2,r20,r8
140	sll	r2,r19,r22
141	stq	r5,-24(r16)
142	bis	r7,r24,r7
143	stq	r6,-32(r16)
144	bis	r8,r21,r8
145	srl	r3,r20,r5
146	sll	r3,r19,r23
147	srl	r4,r20,r6
148	sll	r4,r19,r24
149C cool down phase 2/2
150	stq	r7,-40(r16)
151	bis	r5,r22,r5
152	stq	r8,-48(r16)
153	bis	r6,r23,r6
154	stq	r5,-56(r16)
155	stq	r6,-64(r16)
156C cool down phase 2/3
157	stq	r24,-72(r16)
158	ret	r31,(r26),1
159
160C cool down phase 1/1
161L(end1):
162	sll	r2,r19,r22
163	srl	r3,r20,r5
164	bis	r7,r24,r7
165	sll	r3,r19,r23
166	bis	r8,r21,r8
167	srl	r4,r20,r6
168	sll	r4,r19,r24
169C cool down phase 1/2
170	stq	r7,-8(r16)
171	bis	r5,r22,r5
172	stq	r8,-16(r16)
173	bis	r6,r23,r6
174	stq	r5,-24(r16)
175	stq	r6,-32(r16)
176	stq	r24,-40(r16)
177	ret	r31,(r26),1
178
179L(end):	stq	r24,-8(r16)
180	ret	r31,(r26),1
181EPILOGUE(mpn_lshift)
182ASM_END()
183