1dnl  Intel Atom mpn_lshiftc -- mpn left shift with complement.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
36C			 unsigned cnt);
37
38C				cycles/limb
39C P5
40C P6 model 0-8,10-12
41C P6 model 9  (Banias)
42C P6 model 13 (Dothan)
43C P4 model 0  (Willamette)
44C P4 model 1  (?)
45C P4 model 2  (Northwood)
46C P4 model 3  (Prescott)
47C P4 model 4  (Nocona)
48C Intel Atom			 5.5
49C AMD K6
50C AMD K7
51C AMD K8
52C AMD K10
53
54defframe(PARAM_CNT, 16)
55defframe(PARAM_SIZE,12)
56defframe(PARAM_SRC,  8)
57defframe(PARAM_DST,  4)
58
59dnl  re-use parameter space
60define(SAVE_UP,`PARAM_CNT')
61define(VAR_COUNT,`PARAM_SIZE')
62define(SAVE_EBX,`PARAM_SRC')
63define(SAVE_EBP,`PARAM_DST')
64
65define(`rp',  `%edi')
66define(`up',  `%esi')
67define(`cnt',  `%ecx')
68
69ASM_START()
70	TEXT
71
72PROLOGUE(mpn_lshiftc)
73deflit(`FRAME',0)
74	mov	PARAM_CNT, cnt
75	mov	PARAM_SIZE, %edx
76	mov	up, SAVE_UP
77	mov	PARAM_SRC, up
78	push	rp			FRAME_pushl()
79	mov	PARAM_DST, rp
80
81	lea	-4(up,%edx,4), up
82	mov	%ebx, SAVE_EBX
83	lea	-4(rp,%edx,4), rp
84
85	shr	%edx
86	mov	(up), %eax
87	mov	%edx, VAR_COUNT
88	jnc	L(evn)
89
90	mov	%eax, %ebx
91	shl	%cl, %ebx
92	neg	cnt
93	shr	%cl, %eax
94	test	%edx, %edx
95	jnz	L(gt1)
96	not	%ebx
97	mov	%ebx, (rp)
98	jmp	L(quit)
99
100L(gt1):	mov	%ebp, SAVE_EBP
101	push	%eax
102	mov	-4(up), %eax
103	mov	%eax, %ebp
104	shr	%cl, %eax
105	jmp	L(lo1)
106
107L(evn):	mov	%ebp, SAVE_EBP
108	neg	cnt
109	mov	%eax, %ebp
110	mov	-4(up), %edx
111	shr	%cl, %eax
112	mov	%edx, %ebx
113	shr	%cl, %edx
114	neg	cnt
115	decl	VAR_COUNT
116	lea	4(rp), rp
117	lea	-4(up), up
118	jz	L(end)
119	push	%eax			FRAME_pushl()
120
121L(top):	shl	%cl, %ebp
122	or	%ebp, %edx
123	shl	%cl, %ebx
124	neg	cnt
125	not	%edx
126	mov	-4(up), %eax
127	mov	%eax, %ebp
128	mov	%edx, -4(rp)
129	shr	%cl, %eax
130	lea	-8(rp), rp
131L(lo1):	mov	-8(up), %edx
132	or	%ebx, %eax
133	mov	%edx, %ebx
134	shr	%cl, %edx
135	not	%eax
136	lea	-8(up), up
137	neg	cnt
138	mov	%eax, (rp)
139	decl	VAR_COUNT
140	jg	L(top)
141
142	pop	%eax			FRAME_popl()
143L(end):
144	shl	%cl, %ebp
145	shl	%cl, %ebx
146	or	%ebp, %edx
147	mov	SAVE_EBP, %ebp
148	not	%edx
149	not	%ebx
150	mov	%edx, -4(rp)
151	mov	%ebx, -8(rp)
152
153L(quit):
154	mov	SAVE_UP, up
155	mov	SAVE_EBX, %ebx
156	pop	rp			FRAME_popl()
157	ret
158EPILOGUE()
159ASM_END()
160