1/* Pentium optimized __mpn_lshift --
2   Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, write to the Free
17   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18   02111-1307 USA.  */
19
20#if 0 /* vstr */
21#include "sysdep.h"
22#include "asm-syntax.h"
23#include "bp-sym.h"
24#include "bp-asm.h"
25#endif /* vstr */
26
27#define PARMS	LINKAGE+16		/* space for 4 saved regs */
28#define RES	PARMS
29#define S	RES+PTR_SIZE
30#define SIZE	S+PTR_SIZE
31#define CNT	SIZE+4
32
33	.text
34ENTRY (BP_SYM (__mpn_lshift))
35	ENTER
36
37	pushl	%edi
38	pushl	%esi
39	pushl	%ebp
40	pushl	%ebx
41
42	movl	RES(%esp),%edi
43	movl	S(%esp),%esi
44	movl	SIZE(%esp),%ebx
45	movl	CNT(%esp),%ecx
46#if __BOUNDED_POINTERS__
47	shll	$2, %ebx		/* convert limbs to bytes */
48	CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx)
49	CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx)
50	shrl	$2, %ebx
51#endif
52
53/* We can use faster code for shift-by-1 under certain conditions.  */
54	cmp	$1,%ecx
55	jne	L(normal)
56	leal	4(%esi),%eax
57	cmpl	%edi,%eax
58	jnc	L(special)		/* jump if s_ptr + 1 >= res_ptr */
59	leal	(%esi,%ebx,4),%eax
60	cmpl	%eax,%edi
61	jnc	L(special)		/* jump if res_ptr >= s_ptr + size */
62
63L(normal):
64	leal	-4(%edi,%ebx,4),%edi
65	leal	-4(%esi,%ebx,4),%esi
66
67	movl	(%esi),%edx
68	subl	$4,%esi
69	xorl	%eax,%eax
70	shldl	%cl,%edx,%eax		/* compute carry limb */
71	pushl	%eax			/* push carry limb onto stack */
72
73	decl	%ebx
74	pushl	%ebx
75	shrl	$3,%ebx
76	jz	L(end)
77
78	movl	(%edi),%eax		/* fetch destination cache line */
79
80	ALIGN	(2)
81L(oop):	movl	-28(%edi),%eax		/* fetch destination cache line */
82	movl	%edx,%ebp
83
84	movl	(%esi),%eax
85	movl	-4(%esi),%edx
86	shldl	%cl,%eax,%ebp
87	shldl	%cl,%edx,%eax
88	movl	%ebp,(%edi)
89	movl	%eax,-4(%edi)
90
91	movl	-8(%esi),%ebp
92	movl	-12(%esi),%eax
93	shldl	%cl,%ebp,%edx
94	shldl	%cl,%eax,%ebp
95	movl	%edx,-8(%edi)
96	movl	%ebp,-12(%edi)
97
98	movl	-16(%esi),%edx
99	movl	-20(%esi),%ebp
100	shldl	%cl,%edx,%eax
101	shldl	%cl,%ebp,%edx
102	movl	%eax,-16(%edi)
103	movl	%edx,-20(%edi)
104
105	movl	-24(%esi),%eax
106	movl	-28(%esi),%edx
107	shldl	%cl,%eax,%ebp
108	shldl	%cl,%edx,%eax
109	movl	%ebp,-24(%edi)
110	movl	%eax,-28(%edi)
111
112	subl	$32,%esi
113	subl	$32,%edi
114	decl	%ebx
115	jnz	L(oop)
116
117L(end):	popl	%ebx
118	andl	$7,%ebx
119	jz	L(end2)
120L(oop2):
121	movl	(%esi),%eax
122	shldl	%cl,%eax,%edx
123	movl	%edx,(%edi)
124	movl	%eax,%edx
125	subl	$4,%esi
126	subl	$4,%edi
127	decl	%ebx
128	jnz	L(oop2)
129
130L(end2):
131	shll	%cl,%edx		/* compute least significant limb */
132	movl	%edx,(%edi)		/* store it */
133
134	popl	%eax			/* pop carry limb */
135
136	popl	%ebx
137	popl	%ebp
138	popl	%esi
139	popl	%edi
140
141	LEAVE
142	ret
143
144/* We loop from least significant end of the arrays, which is only
145   permissible if the source and destination don't overlap, since the
146   function is documented to work for overlapping source and destination.
147*/
148
149L(special):
150	movl	(%esi),%edx
151	addl	$4,%esi
152
153	decl	%ebx
154	pushl	%ebx
155	shrl	$3,%ebx
156
157	addl	%edx,%edx
158	incl	%ebx
159	decl	%ebx
160	jz	L(Lend)
161
162	movl	(%edi),%eax		/* fetch destination cache line */
163
164	ALIGN	(2)
165L(Loop):
166	movl	28(%edi),%eax		/* fetch destination cache line */
167	movl	%edx,%ebp
168
169	movl	(%esi),%eax
170	movl	4(%esi),%edx
171	adcl	%eax,%eax
172	movl	%ebp,(%edi)
173	adcl	%edx,%edx
174	movl	%eax,4(%edi)
175
176	movl	8(%esi),%ebp
177	movl	12(%esi),%eax
178	adcl	%ebp,%ebp
179	movl	%edx,8(%edi)
180	adcl	%eax,%eax
181	movl	%ebp,12(%edi)
182
183	movl	16(%esi),%edx
184	movl	20(%esi),%ebp
185	adcl	%edx,%edx
186	movl	%eax,16(%edi)
187	adcl	%ebp,%ebp
188	movl	%edx,20(%edi)
189
190	movl	24(%esi),%eax
191	movl	28(%esi),%edx
192	adcl	%eax,%eax
193	movl	%ebp,24(%edi)
194	adcl	%edx,%edx
195	movl	%eax,28(%edi)
196
197	leal	32(%esi),%esi		/* use leal not to clobber carry */
198	leal	32(%edi),%edi
199	decl	%ebx
200	jnz	L(Loop)
201
202L(Lend):
203	popl	%ebx
204	sbbl	%eax,%eax		/* save carry in %eax */
205	andl	$7,%ebx
206	jz	L(Lend2)
207	addl	%eax,%eax		/* restore carry from eax */
208L(Loop2):
209	movl	%edx,%ebp
210	movl	(%esi),%edx
211	adcl	%edx,%edx
212	movl	%ebp,(%edi)
213
214	leal	4(%esi),%esi		/* use leal not to clobber carry */
215	leal	4(%edi),%edi
216	decl	%ebx
217	jnz	L(Loop2)
218
219	jmp	L(L1)
220L(Lend2):
221	addl	%eax,%eax		/* restore carry from eax */
222L(L1):	movl	%edx,(%edi)		/* store last limb */
223
224	sbbl	%eax,%eax
225	negl	%eax
226
227	popl	%ebx
228	popl	%ebp
229	popl	%esi
230	popl	%edi
231
232	LEAVE
233	ret
234END (BP_SYM (__mpn_lshift))
235