xref: /freebsd/sys/crypto/openssl/i386/aesni-x86.S (revision c0855eaa)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesni-x86.pl. */
2bc3d5698SJohn Baldwin#ifdef PIC
3bc3d5698SJohn Baldwin.text
4bc3d5698SJohn Baldwin.globl	aesni_encrypt
5bc3d5698SJohn Baldwin.type	aesni_encrypt,@function
6bc3d5698SJohn Baldwin.align	16
7bc3d5698SJohn Baldwinaesni_encrypt:
8bc3d5698SJohn Baldwin.L_aesni_encrypt_begin:
9c0855eaaSJohn Baldwin	#ifdef __CET__
10c0855eaaSJohn Baldwin
11c0855eaaSJohn Baldwin.byte	243,15,30,251
12c0855eaaSJohn Baldwin	#endif
13c0855eaaSJohn Baldwin
14bc3d5698SJohn Baldwin	movl	4(%esp),%eax
15bc3d5698SJohn Baldwin	movl	12(%esp),%edx
16bc3d5698SJohn Baldwin	movups	(%eax),%xmm2
17bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
18bc3d5698SJohn Baldwin	movl	8(%esp),%eax
19bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
20bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
21bc3d5698SJohn Baldwin	leal	32(%edx),%edx
22bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
23bc3d5698SJohn Baldwin.L000enc1_loop_1:
24bc3d5698SJohn Baldwin.byte	102,15,56,220,209
25bc3d5698SJohn Baldwin	decl	%ecx
26bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
27bc3d5698SJohn Baldwin	leal	16(%edx),%edx
28bc3d5698SJohn Baldwin	jnz	.L000enc1_loop_1
29bc3d5698SJohn Baldwin.byte	102,15,56,221,209
30bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
31bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
32bc3d5698SJohn Baldwin	movups	%xmm2,(%eax)
33bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
34bc3d5698SJohn Baldwin	ret
35bc3d5698SJohn Baldwin.size	aesni_encrypt,.-.L_aesni_encrypt_begin
36bc3d5698SJohn Baldwin.globl	aesni_decrypt
37bc3d5698SJohn Baldwin.type	aesni_decrypt,@function
38bc3d5698SJohn Baldwin.align	16
39bc3d5698SJohn Baldwinaesni_decrypt:
40bc3d5698SJohn Baldwin.L_aesni_decrypt_begin:
41c0855eaaSJohn Baldwin	#ifdef __CET__
42c0855eaaSJohn Baldwin
43c0855eaaSJohn Baldwin.byte	243,15,30,251
44c0855eaaSJohn Baldwin	#endif
45c0855eaaSJohn Baldwin
46bc3d5698SJohn Baldwin	movl	4(%esp),%eax
47bc3d5698SJohn Baldwin	movl	12(%esp),%edx
48bc3d5698SJohn Baldwin	movups	(%eax),%xmm2
49bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
50bc3d5698SJohn Baldwin	movl	8(%esp),%eax
51bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
52bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
53bc3d5698SJohn Baldwin	leal	32(%edx),%edx
54bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
55bc3d5698SJohn Baldwin.L001dec1_loop_2:
56bc3d5698SJohn Baldwin.byte	102,15,56,222,209
57bc3d5698SJohn Baldwin	decl	%ecx
58bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
59bc3d5698SJohn Baldwin	leal	16(%edx),%edx
60bc3d5698SJohn Baldwin	jnz	.L001dec1_loop_2
61bc3d5698SJohn Baldwin.byte	102,15,56,223,209
62bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
63bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
64bc3d5698SJohn Baldwin	movups	%xmm2,(%eax)
65bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
66bc3d5698SJohn Baldwin	ret
67bc3d5698SJohn Baldwin.size	aesni_decrypt,.-.L_aesni_decrypt_begin
68bc3d5698SJohn Baldwin.type	_aesni_encrypt2,@function
69bc3d5698SJohn Baldwin.align	16
70bc3d5698SJohn Baldwin_aesni_encrypt2:
71c0855eaaSJohn Baldwin	#ifdef __CET__
72c0855eaaSJohn Baldwin
73c0855eaaSJohn Baldwin.byte	243,15,30,251
74c0855eaaSJohn Baldwin	#endif
75c0855eaaSJohn Baldwin
76bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
77bc3d5698SJohn Baldwin	shll	$4,%ecx
78bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
79bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
80bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
81bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
82bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
83bc3d5698SJohn Baldwin	negl	%ecx
84bc3d5698SJohn Baldwin	addl	$16,%ecx
85bc3d5698SJohn Baldwin.L002enc2_loop:
86bc3d5698SJohn Baldwin.byte	102,15,56,220,209
87bc3d5698SJohn Baldwin.byte	102,15,56,220,217
88bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
89bc3d5698SJohn Baldwin	addl	$32,%ecx
90bc3d5698SJohn Baldwin.byte	102,15,56,220,208
91bc3d5698SJohn Baldwin.byte	102,15,56,220,216
92bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
93bc3d5698SJohn Baldwin	jnz	.L002enc2_loop
94bc3d5698SJohn Baldwin.byte	102,15,56,220,209
95bc3d5698SJohn Baldwin.byte	102,15,56,220,217
96bc3d5698SJohn Baldwin.byte	102,15,56,221,208
97bc3d5698SJohn Baldwin.byte	102,15,56,221,216
98bc3d5698SJohn Baldwin	ret
99bc3d5698SJohn Baldwin.size	_aesni_encrypt2,.-_aesni_encrypt2
100bc3d5698SJohn Baldwin.type	_aesni_decrypt2,@function
101bc3d5698SJohn Baldwin.align	16
102bc3d5698SJohn Baldwin_aesni_decrypt2:
103c0855eaaSJohn Baldwin	#ifdef __CET__
104c0855eaaSJohn Baldwin
105c0855eaaSJohn Baldwin.byte	243,15,30,251
106c0855eaaSJohn Baldwin	#endif
107c0855eaaSJohn Baldwin
108bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
109bc3d5698SJohn Baldwin	shll	$4,%ecx
110bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
111bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
112bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
113bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
114bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
115bc3d5698SJohn Baldwin	negl	%ecx
116bc3d5698SJohn Baldwin	addl	$16,%ecx
117bc3d5698SJohn Baldwin.L003dec2_loop:
118bc3d5698SJohn Baldwin.byte	102,15,56,222,209
119bc3d5698SJohn Baldwin.byte	102,15,56,222,217
120bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
121bc3d5698SJohn Baldwin	addl	$32,%ecx
122bc3d5698SJohn Baldwin.byte	102,15,56,222,208
123bc3d5698SJohn Baldwin.byte	102,15,56,222,216
124bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
125bc3d5698SJohn Baldwin	jnz	.L003dec2_loop
126bc3d5698SJohn Baldwin.byte	102,15,56,222,209
127bc3d5698SJohn Baldwin.byte	102,15,56,222,217
128bc3d5698SJohn Baldwin.byte	102,15,56,223,208
129bc3d5698SJohn Baldwin.byte	102,15,56,223,216
130bc3d5698SJohn Baldwin	ret
131bc3d5698SJohn Baldwin.size	_aesni_decrypt2,.-_aesni_decrypt2
132bc3d5698SJohn Baldwin.type	_aesni_encrypt3,@function
133bc3d5698SJohn Baldwin.align	16
134bc3d5698SJohn Baldwin_aesni_encrypt3:
135c0855eaaSJohn Baldwin	#ifdef __CET__
136c0855eaaSJohn Baldwin
137c0855eaaSJohn Baldwin.byte	243,15,30,251
138c0855eaaSJohn Baldwin	#endif
139c0855eaaSJohn Baldwin
140bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
141bc3d5698SJohn Baldwin	shll	$4,%ecx
142bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
143bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
144bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
145bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
146bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
147bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
148bc3d5698SJohn Baldwin	negl	%ecx
149bc3d5698SJohn Baldwin	addl	$16,%ecx
150bc3d5698SJohn Baldwin.L004enc3_loop:
151bc3d5698SJohn Baldwin.byte	102,15,56,220,209
152bc3d5698SJohn Baldwin.byte	102,15,56,220,217
153bc3d5698SJohn Baldwin.byte	102,15,56,220,225
154bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
155bc3d5698SJohn Baldwin	addl	$32,%ecx
156bc3d5698SJohn Baldwin.byte	102,15,56,220,208
157bc3d5698SJohn Baldwin.byte	102,15,56,220,216
158bc3d5698SJohn Baldwin.byte	102,15,56,220,224
159bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
160bc3d5698SJohn Baldwin	jnz	.L004enc3_loop
161bc3d5698SJohn Baldwin.byte	102,15,56,220,209
162bc3d5698SJohn Baldwin.byte	102,15,56,220,217
163bc3d5698SJohn Baldwin.byte	102,15,56,220,225
164bc3d5698SJohn Baldwin.byte	102,15,56,221,208
165bc3d5698SJohn Baldwin.byte	102,15,56,221,216
166bc3d5698SJohn Baldwin.byte	102,15,56,221,224
167bc3d5698SJohn Baldwin	ret
168bc3d5698SJohn Baldwin.size	_aesni_encrypt3,.-_aesni_encrypt3
169bc3d5698SJohn Baldwin.type	_aesni_decrypt3,@function
170bc3d5698SJohn Baldwin.align	16
171bc3d5698SJohn Baldwin_aesni_decrypt3:
172c0855eaaSJohn Baldwin	#ifdef __CET__
173c0855eaaSJohn Baldwin
174c0855eaaSJohn Baldwin.byte	243,15,30,251
175c0855eaaSJohn Baldwin	#endif
176c0855eaaSJohn Baldwin
177bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
178bc3d5698SJohn Baldwin	shll	$4,%ecx
179bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
180bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
181bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
182bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
183bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
184bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
185bc3d5698SJohn Baldwin	negl	%ecx
186bc3d5698SJohn Baldwin	addl	$16,%ecx
187bc3d5698SJohn Baldwin.L005dec3_loop:
188bc3d5698SJohn Baldwin.byte	102,15,56,222,209
189bc3d5698SJohn Baldwin.byte	102,15,56,222,217
190bc3d5698SJohn Baldwin.byte	102,15,56,222,225
191bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
192bc3d5698SJohn Baldwin	addl	$32,%ecx
193bc3d5698SJohn Baldwin.byte	102,15,56,222,208
194bc3d5698SJohn Baldwin.byte	102,15,56,222,216
195bc3d5698SJohn Baldwin.byte	102,15,56,222,224
196bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
197bc3d5698SJohn Baldwin	jnz	.L005dec3_loop
198bc3d5698SJohn Baldwin.byte	102,15,56,222,209
199bc3d5698SJohn Baldwin.byte	102,15,56,222,217
200bc3d5698SJohn Baldwin.byte	102,15,56,222,225
201bc3d5698SJohn Baldwin.byte	102,15,56,223,208
202bc3d5698SJohn Baldwin.byte	102,15,56,223,216
203bc3d5698SJohn Baldwin.byte	102,15,56,223,224
204bc3d5698SJohn Baldwin	ret
205bc3d5698SJohn Baldwin.size	_aesni_decrypt3,.-_aesni_decrypt3
206bc3d5698SJohn Baldwin.type	_aesni_encrypt4,@function
207bc3d5698SJohn Baldwin.align	16
208bc3d5698SJohn Baldwin_aesni_encrypt4:
209c0855eaaSJohn Baldwin	#ifdef __CET__
210c0855eaaSJohn Baldwin
211c0855eaaSJohn Baldwin.byte	243,15,30,251
212c0855eaaSJohn Baldwin	#endif
213c0855eaaSJohn Baldwin
214bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
215bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
216bc3d5698SJohn Baldwin	shll	$4,%ecx
217bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
218bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
219bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
220bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
221bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
222bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
223bc3d5698SJohn Baldwin	negl	%ecx
224bc3d5698SJohn Baldwin.byte	15,31,64,0
225bc3d5698SJohn Baldwin	addl	$16,%ecx
226bc3d5698SJohn Baldwin.L006enc4_loop:
227bc3d5698SJohn Baldwin.byte	102,15,56,220,209
228bc3d5698SJohn Baldwin.byte	102,15,56,220,217
229bc3d5698SJohn Baldwin.byte	102,15,56,220,225
230bc3d5698SJohn Baldwin.byte	102,15,56,220,233
231bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
232bc3d5698SJohn Baldwin	addl	$32,%ecx
233bc3d5698SJohn Baldwin.byte	102,15,56,220,208
234bc3d5698SJohn Baldwin.byte	102,15,56,220,216
235bc3d5698SJohn Baldwin.byte	102,15,56,220,224
236bc3d5698SJohn Baldwin.byte	102,15,56,220,232
237bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
238bc3d5698SJohn Baldwin	jnz	.L006enc4_loop
239bc3d5698SJohn Baldwin.byte	102,15,56,220,209
240bc3d5698SJohn Baldwin.byte	102,15,56,220,217
241bc3d5698SJohn Baldwin.byte	102,15,56,220,225
242bc3d5698SJohn Baldwin.byte	102,15,56,220,233
243bc3d5698SJohn Baldwin.byte	102,15,56,221,208
244bc3d5698SJohn Baldwin.byte	102,15,56,221,216
245bc3d5698SJohn Baldwin.byte	102,15,56,221,224
246bc3d5698SJohn Baldwin.byte	102,15,56,221,232
247bc3d5698SJohn Baldwin	ret
248bc3d5698SJohn Baldwin.size	_aesni_encrypt4,.-_aesni_encrypt4
249bc3d5698SJohn Baldwin.type	_aesni_decrypt4,@function
250bc3d5698SJohn Baldwin.align	16
251bc3d5698SJohn Baldwin_aesni_decrypt4:
252c0855eaaSJohn Baldwin	#ifdef __CET__
253c0855eaaSJohn Baldwin
254c0855eaaSJohn Baldwin.byte	243,15,30,251
255c0855eaaSJohn Baldwin	#endif
256c0855eaaSJohn Baldwin
257bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
258bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
259bc3d5698SJohn Baldwin	shll	$4,%ecx
260bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
261bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
262bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
263bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
264bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
265bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
266bc3d5698SJohn Baldwin	negl	%ecx
267bc3d5698SJohn Baldwin.byte	15,31,64,0
268bc3d5698SJohn Baldwin	addl	$16,%ecx
269bc3d5698SJohn Baldwin.L007dec4_loop:
270bc3d5698SJohn Baldwin.byte	102,15,56,222,209
271bc3d5698SJohn Baldwin.byte	102,15,56,222,217
272bc3d5698SJohn Baldwin.byte	102,15,56,222,225
273bc3d5698SJohn Baldwin.byte	102,15,56,222,233
274bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
275bc3d5698SJohn Baldwin	addl	$32,%ecx
276bc3d5698SJohn Baldwin.byte	102,15,56,222,208
277bc3d5698SJohn Baldwin.byte	102,15,56,222,216
278bc3d5698SJohn Baldwin.byte	102,15,56,222,224
279bc3d5698SJohn Baldwin.byte	102,15,56,222,232
280bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
281bc3d5698SJohn Baldwin	jnz	.L007dec4_loop
282bc3d5698SJohn Baldwin.byte	102,15,56,222,209
283bc3d5698SJohn Baldwin.byte	102,15,56,222,217
284bc3d5698SJohn Baldwin.byte	102,15,56,222,225
285bc3d5698SJohn Baldwin.byte	102,15,56,222,233
286bc3d5698SJohn Baldwin.byte	102,15,56,223,208
287bc3d5698SJohn Baldwin.byte	102,15,56,223,216
288bc3d5698SJohn Baldwin.byte	102,15,56,223,224
289bc3d5698SJohn Baldwin.byte	102,15,56,223,232
290bc3d5698SJohn Baldwin	ret
291bc3d5698SJohn Baldwin.size	_aesni_decrypt4,.-_aesni_decrypt4
292bc3d5698SJohn Baldwin.type	_aesni_encrypt6,@function
293bc3d5698SJohn Baldwin.align	16
294bc3d5698SJohn Baldwin_aesni_encrypt6:
295c0855eaaSJohn Baldwin	#ifdef __CET__
296c0855eaaSJohn Baldwin
297c0855eaaSJohn Baldwin.byte	243,15,30,251
298c0855eaaSJohn Baldwin	#endif
299c0855eaaSJohn Baldwin
300bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
301bc3d5698SJohn Baldwin	shll	$4,%ecx
302bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
303bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
304bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
305bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
306bc3d5698SJohn Baldwin.byte	102,15,56,220,209
307bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
308bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
309bc3d5698SJohn Baldwin.byte	102,15,56,220,217
310bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
311bc3d5698SJohn Baldwin	negl	%ecx
312bc3d5698SJohn Baldwin.byte	102,15,56,220,225
313bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
314bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm0
315bc3d5698SJohn Baldwin	addl	$16,%ecx
316bc3d5698SJohn Baldwin	jmp	.L008_aesni_encrypt6_inner
317bc3d5698SJohn Baldwin.align	16
318bc3d5698SJohn Baldwin.L009enc6_loop:
319bc3d5698SJohn Baldwin.byte	102,15,56,220,209
320bc3d5698SJohn Baldwin.byte	102,15,56,220,217
321bc3d5698SJohn Baldwin.byte	102,15,56,220,225
322bc3d5698SJohn Baldwin.L008_aesni_encrypt6_inner:
323bc3d5698SJohn Baldwin.byte	102,15,56,220,233
324bc3d5698SJohn Baldwin.byte	102,15,56,220,241
325bc3d5698SJohn Baldwin.byte	102,15,56,220,249
326bc3d5698SJohn Baldwin.L_aesni_encrypt6_enter:
327bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
328bc3d5698SJohn Baldwin	addl	$32,%ecx
329bc3d5698SJohn Baldwin.byte	102,15,56,220,208
330bc3d5698SJohn Baldwin.byte	102,15,56,220,216
331bc3d5698SJohn Baldwin.byte	102,15,56,220,224
332bc3d5698SJohn Baldwin.byte	102,15,56,220,232
333bc3d5698SJohn Baldwin.byte	102,15,56,220,240
334bc3d5698SJohn Baldwin.byte	102,15,56,220,248
335bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
336bc3d5698SJohn Baldwin	jnz	.L009enc6_loop
337bc3d5698SJohn Baldwin.byte	102,15,56,220,209
338bc3d5698SJohn Baldwin.byte	102,15,56,220,217
339bc3d5698SJohn Baldwin.byte	102,15,56,220,225
340bc3d5698SJohn Baldwin.byte	102,15,56,220,233
341bc3d5698SJohn Baldwin.byte	102,15,56,220,241
342bc3d5698SJohn Baldwin.byte	102,15,56,220,249
343bc3d5698SJohn Baldwin.byte	102,15,56,221,208
344bc3d5698SJohn Baldwin.byte	102,15,56,221,216
345bc3d5698SJohn Baldwin.byte	102,15,56,221,224
346bc3d5698SJohn Baldwin.byte	102,15,56,221,232
347bc3d5698SJohn Baldwin.byte	102,15,56,221,240
348bc3d5698SJohn Baldwin.byte	102,15,56,221,248
349bc3d5698SJohn Baldwin	ret
350bc3d5698SJohn Baldwin.size	_aesni_encrypt6,.-_aesni_encrypt6
351bc3d5698SJohn Baldwin.type	_aesni_decrypt6,@function
352bc3d5698SJohn Baldwin.align	16
353bc3d5698SJohn Baldwin_aesni_decrypt6:
354c0855eaaSJohn Baldwin	#ifdef __CET__
355c0855eaaSJohn Baldwin
356c0855eaaSJohn Baldwin.byte	243,15,30,251
357c0855eaaSJohn Baldwin	#endif
358c0855eaaSJohn Baldwin
359bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
360bc3d5698SJohn Baldwin	shll	$4,%ecx
361bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
362bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
363bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
364bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
365bc3d5698SJohn Baldwin.byte	102,15,56,222,209
366bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
367bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
368bc3d5698SJohn Baldwin.byte	102,15,56,222,217
369bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
370bc3d5698SJohn Baldwin	negl	%ecx
371bc3d5698SJohn Baldwin.byte	102,15,56,222,225
372bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
373bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm0
374bc3d5698SJohn Baldwin	addl	$16,%ecx
375bc3d5698SJohn Baldwin	jmp	.L010_aesni_decrypt6_inner
376bc3d5698SJohn Baldwin.align	16
377bc3d5698SJohn Baldwin.L011dec6_loop:
378bc3d5698SJohn Baldwin.byte	102,15,56,222,209
379bc3d5698SJohn Baldwin.byte	102,15,56,222,217
380bc3d5698SJohn Baldwin.byte	102,15,56,222,225
381bc3d5698SJohn Baldwin.L010_aesni_decrypt6_inner:
382bc3d5698SJohn Baldwin.byte	102,15,56,222,233
383bc3d5698SJohn Baldwin.byte	102,15,56,222,241
384bc3d5698SJohn Baldwin.byte	102,15,56,222,249
385bc3d5698SJohn Baldwin.L_aesni_decrypt6_enter:
386bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
387bc3d5698SJohn Baldwin	addl	$32,%ecx
388bc3d5698SJohn Baldwin.byte	102,15,56,222,208
389bc3d5698SJohn Baldwin.byte	102,15,56,222,216
390bc3d5698SJohn Baldwin.byte	102,15,56,222,224
391bc3d5698SJohn Baldwin.byte	102,15,56,222,232
392bc3d5698SJohn Baldwin.byte	102,15,56,222,240
393bc3d5698SJohn Baldwin.byte	102,15,56,222,248
394bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
395bc3d5698SJohn Baldwin	jnz	.L011dec6_loop
396bc3d5698SJohn Baldwin.byte	102,15,56,222,209
397bc3d5698SJohn Baldwin.byte	102,15,56,222,217
398bc3d5698SJohn Baldwin.byte	102,15,56,222,225
399bc3d5698SJohn Baldwin.byte	102,15,56,222,233
400bc3d5698SJohn Baldwin.byte	102,15,56,222,241
401bc3d5698SJohn Baldwin.byte	102,15,56,222,249
402bc3d5698SJohn Baldwin.byte	102,15,56,223,208
403bc3d5698SJohn Baldwin.byte	102,15,56,223,216
404bc3d5698SJohn Baldwin.byte	102,15,56,223,224
405bc3d5698SJohn Baldwin.byte	102,15,56,223,232
406bc3d5698SJohn Baldwin.byte	102,15,56,223,240
407bc3d5698SJohn Baldwin.byte	102,15,56,223,248
408bc3d5698SJohn Baldwin	ret
409bc3d5698SJohn Baldwin.size	_aesni_decrypt6,.-_aesni_decrypt6
410bc3d5698SJohn Baldwin.globl	aesni_ecb_encrypt
411bc3d5698SJohn Baldwin.type	aesni_ecb_encrypt,@function
412bc3d5698SJohn Baldwin.align	16
413bc3d5698SJohn Baldwinaesni_ecb_encrypt:
414bc3d5698SJohn Baldwin.L_aesni_ecb_encrypt_begin:
415c0855eaaSJohn Baldwin	#ifdef __CET__
416c0855eaaSJohn Baldwin
417c0855eaaSJohn Baldwin.byte	243,15,30,251
418c0855eaaSJohn Baldwin	#endif
419c0855eaaSJohn Baldwin
420bc3d5698SJohn Baldwin	pushl	%ebp
421bc3d5698SJohn Baldwin	pushl	%ebx
422bc3d5698SJohn Baldwin	pushl	%esi
423bc3d5698SJohn Baldwin	pushl	%edi
424bc3d5698SJohn Baldwin	movl	20(%esp),%esi
425bc3d5698SJohn Baldwin	movl	24(%esp),%edi
426bc3d5698SJohn Baldwin	movl	28(%esp),%eax
427bc3d5698SJohn Baldwin	movl	32(%esp),%edx
428bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
429bc3d5698SJohn Baldwin	andl	$-16,%eax
430bc3d5698SJohn Baldwin	jz	.L012ecb_ret
431bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
432bc3d5698SJohn Baldwin	testl	%ebx,%ebx
433bc3d5698SJohn Baldwin	jz	.L013ecb_decrypt
434bc3d5698SJohn Baldwin	movl	%edx,%ebp
435bc3d5698SJohn Baldwin	movl	%ecx,%ebx
436bc3d5698SJohn Baldwin	cmpl	$96,%eax
437bc3d5698SJohn Baldwin	jb	.L014ecb_enc_tail
438bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
439bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
440bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
441bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
442bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
443bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
444bc3d5698SJohn Baldwin	leal	96(%esi),%esi
445bc3d5698SJohn Baldwin	subl	$96,%eax
446bc3d5698SJohn Baldwin	jmp	.L015ecb_enc_loop6_enter
447bc3d5698SJohn Baldwin.align	16
448bc3d5698SJohn Baldwin.L016ecb_enc_loop6:
449bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
450bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
451bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
452bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
453bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
454bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
455bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
456bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
457bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
458bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
459bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
460bc3d5698SJohn Baldwin	leal	96(%edi),%edi
461bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
462bc3d5698SJohn Baldwin	leal	96(%esi),%esi
463bc3d5698SJohn Baldwin.L015ecb_enc_loop6_enter:
464bc3d5698SJohn Baldwin	call	_aesni_encrypt6
465bc3d5698SJohn Baldwin	movl	%ebp,%edx
466bc3d5698SJohn Baldwin	movl	%ebx,%ecx
467bc3d5698SJohn Baldwin	subl	$96,%eax
468bc3d5698SJohn Baldwin	jnc	.L016ecb_enc_loop6
469bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
470bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
471bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
472bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
473bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
474bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
475bc3d5698SJohn Baldwin	leal	96(%edi),%edi
476bc3d5698SJohn Baldwin	addl	$96,%eax
477bc3d5698SJohn Baldwin	jz	.L012ecb_ret
478bc3d5698SJohn Baldwin.L014ecb_enc_tail:
479bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
480bc3d5698SJohn Baldwin	cmpl	$32,%eax
481bc3d5698SJohn Baldwin	jb	.L017ecb_enc_one
482bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
483bc3d5698SJohn Baldwin	je	.L018ecb_enc_two
484bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
485bc3d5698SJohn Baldwin	cmpl	$64,%eax
486bc3d5698SJohn Baldwin	jb	.L019ecb_enc_three
487bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
488bc3d5698SJohn Baldwin	je	.L020ecb_enc_four
489bc3d5698SJohn Baldwin	movups	64(%esi),%xmm6
490bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
491bc3d5698SJohn Baldwin	call	_aesni_encrypt6
492bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
493bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
494bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
495bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
496bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
497bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
498bc3d5698SJohn Baldwin.align	16
499bc3d5698SJohn Baldwin.L017ecb_enc_one:
500bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
501bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
502bc3d5698SJohn Baldwin	leal	32(%edx),%edx
503bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
504bc3d5698SJohn Baldwin.L021enc1_loop_3:
505bc3d5698SJohn Baldwin.byte	102,15,56,220,209
506bc3d5698SJohn Baldwin	decl	%ecx
507bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
508bc3d5698SJohn Baldwin	leal	16(%edx),%edx
509bc3d5698SJohn Baldwin	jnz	.L021enc1_loop_3
510bc3d5698SJohn Baldwin.byte	102,15,56,221,209
511bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
512bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
513bc3d5698SJohn Baldwin.align	16
514bc3d5698SJohn Baldwin.L018ecb_enc_two:
515bc3d5698SJohn Baldwin	call	_aesni_encrypt2
516bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
517bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
518bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
519bc3d5698SJohn Baldwin.align	16
520bc3d5698SJohn Baldwin.L019ecb_enc_three:
521bc3d5698SJohn Baldwin	call	_aesni_encrypt3
522bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
523bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
524bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
525bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
526bc3d5698SJohn Baldwin.align	16
527bc3d5698SJohn Baldwin.L020ecb_enc_four:
528bc3d5698SJohn Baldwin	call	_aesni_encrypt4
529bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
530bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
531bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
532bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
533bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
534bc3d5698SJohn Baldwin.align	16
535bc3d5698SJohn Baldwin.L013ecb_decrypt:
536bc3d5698SJohn Baldwin	movl	%edx,%ebp
537bc3d5698SJohn Baldwin	movl	%ecx,%ebx
538bc3d5698SJohn Baldwin	cmpl	$96,%eax
539bc3d5698SJohn Baldwin	jb	.L022ecb_dec_tail
540bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
541bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
542bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
543bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
544bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
545bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
546bc3d5698SJohn Baldwin	leal	96(%esi),%esi
547bc3d5698SJohn Baldwin	subl	$96,%eax
548bc3d5698SJohn Baldwin	jmp	.L023ecb_dec_loop6_enter
549bc3d5698SJohn Baldwin.align	16
550bc3d5698SJohn Baldwin.L024ecb_dec_loop6:
551bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
552bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
553bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
554bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
555bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
556bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
557bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
558bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
559bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
560bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
561bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
562bc3d5698SJohn Baldwin	leal	96(%edi),%edi
563bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
564bc3d5698SJohn Baldwin	leal	96(%esi),%esi
565bc3d5698SJohn Baldwin.L023ecb_dec_loop6_enter:
566bc3d5698SJohn Baldwin	call	_aesni_decrypt6
567bc3d5698SJohn Baldwin	movl	%ebp,%edx
568bc3d5698SJohn Baldwin	movl	%ebx,%ecx
569bc3d5698SJohn Baldwin	subl	$96,%eax
570bc3d5698SJohn Baldwin	jnc	.L024ecb_dec_loop6
571bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
572bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
573bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
574bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
575bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
576bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
577bc3d5698SJohn Baldwin	leal	96(%edi),%edi
578bc3d5698SJohn Baldwin	addl	$96,%eax
579bc3d5698SJohn Baldwin	jz	.L012ecb_ret
580bc3d5698SJohn Baldwin.L022ecb_dec_tail:
581bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
582bc3d5698SJohn Baldwin	cmpl	$32,%eax
583bc3d5698SJohn Baldwin	jb	.L025ecb_dec_one
584bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
585bc3d5698SJohn Baldwin	je	.L026ecb_dec_two
586bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
587bc3d5698SJohn Baldwin	cmpl	$64,%eax
588bc3d5698SJohn Baldwin	jb	.L027ecb_dec_three
589bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
590bc3d5698SJohn Baldwin	je	.L028ecb_dec_four
591bc3d5698SJohn Baldwin	movups	64(%esi),%xmm6
592bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
593bc3d5698SJohn Baldwin	call	_aesni_decrypt6
594bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
595bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
596bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
597bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
598bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
599bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
600bc3d5698SJohn Baldwin.align	16
601bc3d5698SJohn Baldwin.L025ecb_dec_one:
602bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
603bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
604bc3d5698SJohn Baldwin	leal	32(%edx),%edx
605bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
606bc3d5698SJohn Baldwin.L029dec1_loop_4:
607bc3d5698SJohn Baldwin.byte	102,15,56,222,209
608bc3d5698SJohn Baldwin	decl	%ecx
609bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
610bc3d5698SJohn Baldwin	leal	16(%edx),%edx
611bc3d5698SJohn Baldwin	jnz	.L029dec1_loop_4
612bc3d5698SJohn Baldwin.byte	102,15,56,223,209
613bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
614bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
615bc3d5698SJohn Baldwin.align	16
616bc3d5698SJohn Baldwin.L026ecb_dec_two:
617bc3d5698SJohn Baldwin	call	_aesni_decrypt2
618bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
619bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
620bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
621bc3d5698SJohn Baldwin.align	16
622bc3d5698SJohn Baldwin.L027ecb_dec_three:
623bc3d5698SJohn Baldwin	call	_aesni_decrypt3
624bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
625bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
626bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
627bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
628bc3d5698SJohn Baldwin.align	16
629bc3d5698SJohn Baldwin.L028ecb_dec_four:
630bc3d5698SJohn Baldwin	call	_aesni_decrypt4
631bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
632bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
633bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
634bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
635bc3d5698SJohn Baldwin.L012ecb_ret:
636bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
637bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
638bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
639bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
640bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
641bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
642bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
643bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
644bc3d5698SJohn Baldwin	popl	%edi
645bc3d5698SJohn Baldwin	popl	%esi
646bc3d5698SJohn Baldwin	popl	%ebx
647bc3d5698SJohn Baldwin	popl	%ebp
648bc3d5698SJohn Baldwin	ret
649bc3d5698SJohn Baldwin.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
650bc3d5698SJohn Baldwin.globl	aesni_ccm64_encrypt_blocks
651bc3d5698SJohn Baldwin.type	aesni_ccm64_encrypt_blocks,@function
652bc3d5698SJohn Baldwin.align	16
653bc3d5698SJohn Baldwinaesni_ccm64_encrypt_blocks:
654bc3d5698SJohn Baldwin.L_aesni_ccm64_encrypt_blocks_begin:
655c0855eaaSJohn Baldwin	#ifdef __CET__
656c0855eaaSJohn Baldwin
657c0855eaaSJohn Baldwin.byte	243,15,30,251
658c0855eaaSJohn Baldwin	#endif
659c0855eaaSJohn Baldwin
660bc3d5698SJohn Baldwin	pushl	%ebp
661bc3d5698SJohn Baldwin	pushl	%ebx
662bc3d5698SJohn Baldwin	pushl	%esi
663bc3d5698SJohn Baldwin	pushl	%edi
664bc3d5698SJohn Baldwin	movl	20(%esp),%esi
665bc3d5698SJohn Baldwin	movl	24(%esp),%edi
666bc3d5698SJohn Baldwin	movl	28(%esp),%eax
667bc3d5698SJohn Baldwin	movl	32(%esp),%edx
668bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
669bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
670bc3d5698SJohn Baldwin	movl	%esp,%ebp
671bc3d5698SJohn Baldwin	subl	$60,%esp
672bc3d5698SJohn Baldwin	andl	$-16,%esp
673bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
674bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
675bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm3
676bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
677bc3d5698SJohn Baldwin	movl	$202182159,(%esp)
678bc3d5698SJohn Baldwin	movl	$134810123,4(%esp)
679bc3d5698SJohn Baldwin	movl	$67438087,8(%esp)
680bc3d5698SJohn Baldwin	movl	$66051,12(%esp)
681bc3d5698SJohn Baldwin	movl	$1,%ebx
682bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
683bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
684bc3d5698SJohn Baldwin	movl	%ebp,20(%esp)
685bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
686bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
687bc3d5698SJohn Baldwin	shll	$4,%ecx
688bc3d5698SJohn Baldwin	movl	$16,%ebx
689bc3d5698SJohn Baldwin	leal	(%edx),%ebp
690bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm5
691bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
692bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
693bc3d5698SJohn Baldwin	subl	%ecx,%ebx
694bc3d5698SJohn Baldwin.byte	102,15,56,0,253
695bc3d5698SJohn Baldwin.L030ccm64_enc_outer:
696bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
697bc3d5698SJohn Baldwin	movl	%ebx,%ecx
698bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
699bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
700bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
701bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm0
702bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
703bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
704bc3d5698SJohn Baldwin.L031ccm64_enc2_loop:
705bc3d5698SJohn Baldwin.byte	102,15,56,220,209
706bc3d5698SJohn Baldwin.byte	102,15,56,220,217
707bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
708bc3d5698SJohn Baldwin	addl	$32,%ecx
709bc3d5698SJohn Baldwin.byte	102,15,56,220,208
710bc3d5698SJohn Baldwin.byte	102,15,56,220,216
711bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
712bc3d5698SJohn Baldwin	jnz	.L031ccm64_enc2_loop
713bc3d5698SJohn Baldwin.byte	102,15,56,220,209
714bc3d5698SJohn Baldwin.byte	102,15,56,220,217
715bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm7
716bc3d5698SJohn Baldwin	decl	%eax
717bc3d5698SJohn Baldwin.byte	102,15,56,221,208
718bc3d5698SJohn Baldwin.byte	102,15,56,221,216
719bc3d5698SJohn Baldwin	leal	16(%esi),%esi
720bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm6
721bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
722bc3d5698SJohn Baldwin	movups	%xmm6,(%edi)
723bc3d5698SJohn Baldwin.byte	102,15,56,0,213
724bc3d5698SJohn Baldwin	leal	16(%edi),%edi
725bc3d5698SJohn Baldwin	jnz	.L030ccm64_enc_outer
726bc3d5698SJohn Baldwin	movl	48(%esp),%esp
727bc3d5698SJohn Baldwin	movl	40(%esp),%edi
728bc3d5698SJohn Baldwin	movups	%xmm3,(%edi)
729bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
730bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
731bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
732bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
733bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
734bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
735bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
736bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
737bc3d5698SJohn Baldwin	popl	%edi
738bc3d5698SJohn Baldwin	popl	%esi
739bc3d5698SJohn Baldwin	popl	%ebx
740bc3d5698SJohn Baldwin	popl	%ebp
741bc3d5698SJohn Baldwin	ret
742bc3d5698SJohn Baldwin.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
743bc3d5698SJohn Baldwin.globl	aesni_ccm64_decrypt_blocks
744bc3d5698SJohn Baldwin.type	aesni_ccm64_decrypt_blocks,@function
745bc3d5698SJohn Baldwin.align	16
746bc3d5698SJohn Baldwinaesni_ccm64_decrypt_blocks:
747bc3d5698SJohn Baldwin.L_aesni_ccm64_decrypt_blocks_begin:
748c0855eaaSJohn Baldwin	#ifdef __CET__
749c0855eaaSJohn Baldwin
750c0855eaaSJohn Baldwin.byte	243,15,30,251
751c0855eaaSJohn Baldwin	#endif
752c0855eaaSJohn Baldwin
753bc3d5698SJohn Baldwin	pushl	%ebp
754bc3d5698SJohn Baldwin	pushl	%ebx
755bc3d5698SJohn Baldwin	pushl	%esi
756bc3d5698SJohn Baldwin	pushl	%edi
757bc3d5698SJohn Baldwin	movl	20(%esp),%esi
758bc3d5698SJohn Baldwin	movl	24(%esp),%edi
759bc3d5698SJohn Baldwin	movl	28(%esp),%eax
760bc3d5698SJohn Baldwin	movl	32(%esp),%edx
761bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
762bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
763bc3d5698SJohn Baldwin	movl	%esp,%ebp
764bc3d5698SJohn Baldwin	subl	$60,%esp
765bc3d5698SJohn Baldwin	andl	$-16,%esp
766bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
767bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
768bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm3
769bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
770bc3d5698SJohn Baldwin	movl	$202182159,(%esp)
771bc3d5698SJohn Baldwin	movl	$134810123,4(%esp)
772bc3d5698SJohn Baldwin	movl	$67438087,8(%esp)
773bc3d5698SJohn Baldwin	movl	$66051,12(%esp)
774bc3d5698SJohn Baldwin	movl	$1,%ebx
775bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
776bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
777bc3d5698SJohn Baldwin	movl	%ebp,20(%esp)
778bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
779bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
780bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm5
781bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
782bc3d5698SJohn Baldwin	movl	%edx,%ebp
783bc3d5698SJohn Baldwin	movl	%ecx,%ebx
784bc3d5698SJohn Baldwin.byte	102,15,56,0,253
785bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
786bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
787bc3d5698SJohn Baldwin	leal	32(%edx),%edx
788bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
789bc3d5698SJohn Baldwin.L032enc1_loop_5:
790bc3d5698SJohn Baldwin.byte	102,15,56,220,209
791bc3d5698SJohn Baldwin	decl	%ecx
792bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
793bc3d5698SJohn Baldwin	leal	16(%edx),%edx
794bc3d5698SJohn Baldwin	jnz	.L032enc1_loop_5
795bc3d5698SJohn Baldwin.byte	102,15,56,221,209
796bc3d5698SJohn Baldwin	shll	$4,%ebx
797bc3d5698SJohn Baldwin	movl	$16,%ecx
798bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
799bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm7
800bc3d5698SJohn Baldwin	leal	16(%esi),%esi
801bc3d5698SJohn Baldwin	subl	%ebx,%ecx
802bc3d5698SJohn Baldwin	leal	32(%ebp,%ebx,1),%edx
803bc3d5698SJohn Baldwin	movl	%ecx,%ebx
804bc3d5698SJohn Baldwin	jmp	.L033ccm64_dec_outer
805bc3d5698SJohn Baldwin.align	16
806bc3d5698SJohn Baldwin.L033ccm64_dec_outer:
807bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm6
808bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
809bc3d5698SJohn Baldwin	movups	%xmm6,(%edi)
810bc3d5698SJohn Baldwin	leal	16(%edi),%edi
811bc3d5698SJohn Baldwin.byte	102,15,56,0,213
812bc3d5698SJohn Baldwin	subl	$1,%eax
813bc3d5698SJohn Baldwin	jz	.L034ccm64_dec_break
814bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
815bc3d5698SJohn Baldwin	movl	%ebx,%ecx
816bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
817bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
818bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
819bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
820bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
821bc3d5698SJohn Baldwin.L035ccm64_dec2_loop:
822bc3d5698SJohn Baldwin.byte	102,15,56,220,209
823bc3d5698SJohn Baldwin.byte	102,15,56,220,217
824bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
825bc3d5698SJohn Baldwin	addl	$32,%ecx
826bc3d5698SJohn Baldwin.byte	102,15,56,220,208
827bc3d5698SJohn Baldwin.byte	102,15,56,220,216
828bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
829bc3d5698SJohn Baldwin	jnz	.L035ccm64_dec2_loop
830bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
831bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm7
832bc3d5698SJohn Baldwin.byte	102,15,56,220,209
833bc3d5698SJohn Baldwin.byte	102,15,56,220,217
834bc3d5698SJohn Baldwin.byte	102,15,56,221,208
835bc3d5698SJohn Baldwin.byte	102,15,56,221,216
836bc3d5698SJohn Baldwin	leal	16(%esi),%esi
837bc3d5698SJohn Baldwin	jmp	.L033ccm64_dec_outer
838bc3d5698SJohn Baldwin.align	16
839bc3d5698SJohn Baldwin.L034ccm64_dec_break:
840bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
841bc3d5698SJohn Baldwin	movl	%ebp,%edx
842bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
843bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
844bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
845bc3d5698SJohn Baldwin	leal	32(%edx),%edx
846bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
847bc3d5698SJohn Baldwin.L036enc1_loop_6:
848bc3d5698SJohn Baldwin.byte	102,15,56,220,217
849bc3d5698SJohn Baldwin	decl	%ecx
850bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
851bc3d5698SJohn Baldwin	leal	16(%edx),%edx
852bc3d5698SJohn Baldwin	jnz	.L036enc1_loop_6
853bc3d5698SJohn Baldwin.byte	102,15,56,221,217
854bc3d5698SJohn Baldwin	movl	48(%esp),%esp
855bc3d5698SJohn Baldwin	movl	40(%esp),%edi
856bc3d5698SJohn Baldwin	movups	%xmm3,(%edi)
857bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
858bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
859bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
860bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
861bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
862bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
863bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
864bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
865bc3d5698SJohn Baldwin	popl	%edi
866bc3d5698SJohn Baldwin	popl	%esi
867bc3d5698SJohn Baldwin	popl	%ebx
868bc3d5698SJohn Baldwin	popl	%ebp
869bc3d5698SJohn Baldwin	ret
870bc3d5698SJohn Baldwin.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
871bc3d5698SJohn Baldwin.globl	aesni_ctr32_encrypt_blocks
872bc3d5698SJohn Baldwin.type	aesni_ctr32_encrypt_blocks,@function
873bc3d5698SJohn Baldwin.align	16
874bc3d5698SJohn Baldwinaesni_ctr32_encrypt_blocks:
875bc3d5698SJohn Baldwin.L_aesni_ctr32_encrypt_blocks_begin:
876c0855eaaSJohn Baldwin	#ifdef __CET__
877c0855eaaSJohn Baldwin
878c0855eaaSJohn Baldwin.byte	243,15,30,251
879c0855eaaSJohn Baldwin	#endif
880c0855eaaSJohn Baldwin
881bc3d5698SJohn Baldwin	pushl	%ebp
882bc3d5698SJohn Baldwin	pushl	%ebx
883bc3d5698SJohn Baldwin	pushl	%esi
884bc3d5698SJohn Baldwin	pushl	%edi
885bc3d5698SJohn Baldwin	movl	20(%esp),%esi
886bc3d5698SJohn Baldwin	movl	24(%esp),%edi
887bc3d5698SJohn Baldwin	movl	28(%esp),%eax
888bc3d5698SJohn Baldwin	movl	32(%esp),%edx
889bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
890bc3d5698SJohn Baldwin	movl	%esp,%ebp
891bc3d5698SJohn Baldwin	subl	$88,%esp
892bc3d5698SJohn Baldwin	andl	$-16,%esp
893bc3d5698SJohn Baldwin	movl	%ebp,80(%esp)
894bc3d5698SJohn Baldwin	cmpl	$1,%eax
895bc3d5698SJohn Baldwin	je	.L037ctr32_one_shortcut
896bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
897bc3d5698SJohn Baldwin	movl	$202182159,(%esp)
898bc3d5698SJohn Baldwin	movl	$134810123,4(%esp)
899bc3d5698SJohn Baldwin	movl	$67438087,8(%esp)
900bc3d5698SJohn Baldwin	movl	$66051,12(%esp)
901bc3d5698SJohn Baldwin	movl	$6,%ecx
902bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
903bc3d5698SJohn Baldwin	movl	%ecx,16(%esp)
904bc3d5698SJohn Baldwin	movl	%ecx,20(%esp)
905bc3d5698SJohn Baldwin	movl	%ecx,24(%esp)
906bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
907bc3d5698SJohn Baldwin.byte	102,15,58,22,251,3
908bc3d5698SJohn Baldwin.byte	102,15,58,34,253,3
909bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
910bc3d5698SJohn Baldwin	bswap	%ebx
911bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
912bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
913bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm2
914bc3d5698SJohn Baldwin.byte	102,15,58,34,195,0
915bc3d5698SJohn Baldwin	leal	3(%ebx),%ebp
916bc3d5698SJohn Baldwin.byte	102,15,58,34,205,0
917bc3d5698SJohn Baldwin	incl	%ebx
918bc3d5698SJohn Baldwin.byte	102,15,58,34,195,1
919bc3d5698SJohn Baldwin	incl	%ebp
920bc3d5698SJohn Baldwin.byte	102,15,58,34,205,1
921bc3d5698SJohn Baldwin	incl	%ebx
922bc3d5698SJohn Baldwin.byte	102,15,58,34,195,2
923bc3d5698SJohn Baldwin	incl	%ebp
924bc3d5698SJohn Baldwin.byte	102,15,58,34,205,2
925bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
926bc3d5698SJohn Baldwin.byte	102,15,56,0,194
927bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm6
928bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
929bc3d5698SJohn Baldwin.byte	102,15,56,0,202
930bc3d5698SJohn Baldwin	pshufd	$192,%xmm0,%xmm2
931bc3d5698SJohn Baldwin	pshufd	$128,%xmm0,%xmm3
932bc3d5698SJohn Baldwin	cmpl	$6,%eax
933bc3d5698SJohn Baldwin	jb	.L038ctr32_tail
934bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
935bc3d5698SJohn Baldwin	shll	$4,%ecx
936bc3d5698SJohn Baldwin	movl	$16,%ebx
937bc3d5698SJohn Baldwin	movdqa	%xmm7,32(%esp)
938bc3d5698SJohn Baldwin	movl	%edx,%ebp
939bc3d5698SJohn Baldwin	subl	%ecx,%ebx
940bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
941bc3d5698SJohn Baldwin	subl	$6,%eax
942bc3d5698SJohn Baldwin	jmp	.L039ctr32_loop6
943bc3d5698SJohn Baldwin.align	16
944bc3d5698SJohn Baldwin.L039ctr32_loop6:
945bc3d5698SJohn Baldwin	pshufd	$64,%xmm0,%xmm4
946bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm0
947bc3d5698SJohn Baldwin	pshufd	$192,%xmm1,%xmm5
948bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
949bc3d5698SJohn Baldwin	pshufd	$128,%xmm1,%xmm6
950bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
951bc3d5698SJohn Baldwin	pshufd	$64,%xmm1,%xmm7
952bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
953bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
954bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
955bc3d5698SJohn Baldwin.byte	102,15,56,220,209
956bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
957bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
958bc3d5698SJohn Baldwin.byte	102,15,56,220,217
959bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
960bc3d5698SJohn Baldwin	movl	%ebx,%ecx
961bc3d5698SJohn Baldwin.byte	102,15,56,220,225
962bc3d5698SJohn Baldwin.byte	102,15,56,220,233
963bc3d5698SJohn Baldwin.byte	102,15,56,220,241
964bc3d5698SJohn Baldwin.byte	102,15,56,220,249
965bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
966bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
967bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
968bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm2
969bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
970bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
971bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
972bc3d5698SJohn Baldwin	movdqa	16(%esp),%xmm0
973bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
974bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm1
975bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
976bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
977bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm1
978bc3d5698SJohn Baldwin	paddd	48(%esp),%xmm0
979bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm2
980bc3d5698SJohn Baldwin	movups	48(%esi),%xmm3
981bc3d5698SJohn Baldwin	movups	64(%esi),%xmm4
982bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm5
983bc3d5698SJohn Baldwin	movups	80(%esi),%xmm3
984bc3d5698SJohn Baldwin	leal	96(%esi),%esi
985bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
986bc3d5698SJohn Baldwin.byte	102,15,56,0,194
987bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm6
988bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
989bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm7
990bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
991bc3d5698SJohn Baldwin.byte	102,15,56,0,202
992bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
993bc3d5698SJohn Baldwin	pshufd	$192,%xmm0,%xmm2
994bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
995bc3d5698SJohn Baldwin	leal	96(%edi),%edi
996bc3d5698SJohn Baldwin	pshufd	$128,%xmm0,%xmm3
997bc3d5698SJohn Baldwin	subl	$6,%eax
998bc3d5698SJohn Baldwin	jnc	.L039ctr32_loop6
999bc3d5698SJohn Baldwin	addl	$6,%eax
1000bc3d5698SJohn Baldwin	jz	.L040ctr32_ret
1001bc3d5698SJohn Baldwin	movdqu	(%ebp),%xmm7
1002bc3d5698SJohn Baldwin	movl	%ebp,%edx
1003bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm7
1004bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
1005bc3d5698SJohn Baldwin.L038ctr32_tail:
1006bc3d5698SJohn Baldwin	por	%xmm7,%xmm2
1007bc3d5698SJohn Baldwin	cmpl	$2,%eax
1008bc3d5698SJohn Baldwin	jb	.L041ctr32_one
1009bc3d5698SJohn Baldwin	pshufd	$64,%xmm0,%xmm4
1010bc3d5698SJohn Baldwin	por	%xmm7,%xmm3
1011bc3d5698SJohn Baldwin	je	.L042ctr32_two
1012bc3d5698SJohn Baldwin	pshufd	$192,%xmm1,%xmm5
1013bc3d5698SJohn Baldwin	por	%xmm7,%xmm4
1014bc3d5698SJohn Baldwin	cmpl	$4,%eax
1015bc3d5698SJohn Baldwin	jb	.L043ctr32_three
1016bc3d5698SJohn Baldwin	pshufd	$128,%xmm1,%xmm6
1017bc3d5698SJohn Baldwin	por	%xmm7,%xmm5
1018bc3d5698SJohn Baldwin	je	.L044ctr32_four
1019bc3d5698SJohn Baldwin	por	%xmm7,%xmm6
1020bc3d5698SJohn Baldwin	call	_aesni_encrypt6
1021bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
1022bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
1023bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm2
1024bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
1025bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
1026bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
1027bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
1028bc3d5698SJohn Baldwin	movups	64(%esi),%xmm1
1029bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
1030bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1031bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm6
1032bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1033bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1034bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1035bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
1036bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
1037bc3d5698SJohn Baldwin.align	16
1038bc3d5698SJohn Baldwin.L037ctr32_one_shortcut:
1039bc3d5698SJohn Baldwin	movups	(%ebx),%xmm2
1040bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
1041bc3d5698SJohn Baldwin.L041ctr32_one:
1042bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1043bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1044bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1045bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1046bc3d5698SJohn Baldwin.L045enc1_loop_7:
1047bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1048bc3d5698SJohn Baldwin	decl	%ecx
1049bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1050bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1051bc3d5698SJohn Baldwin	jnz	.L045enc1_loop_7
1052bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1053bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
1054bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm6
1055bc3d5698SJohn Baldwin	movups	%xmm6,(%edi)
1056bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
1057bc3d5698SJohn Baldwin.align	16
1058bc3d5698SJohn Baldwin.L042ctr32_two:
1059bc3d5698SJohn Baldwin	call	_aesni_encrypt2
1060bc3d5698SJohn Baldwin	movups	(%esi),%xmm5
1061bc3d5698SJohn Baldwin	movups	16(%esi),%xmm6
1062bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1063bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1064bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1065bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1066bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
1067bc3d5698SJohn Baldwin.align	16
1068bc3d5698SJohn Baldwin.L043ctr32_three:
1069bc3d5698SJohn Baldwin	call	_aesni_encrypt3
1070bc3d5698SJohn Baldwin	movups	(%esi),%xmm5
1071bc3d5698SJohn Baldwin	movups	16(%esi),%xmm6
1072bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1073bc3d5698SJohn Baldwin	movups	32(%esi),%xmm7
1074bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1075bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1076bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1077bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1078bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1079bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
1080bc3d5698SJohn Baldwin.align	16
1081bc3d5698SJohn Baldwin.L044ctr32_four:
1082bc3d5698SJohn Baldwin	call	_aesni_encrypt4
1083bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
1084bc3d5698SJohn Baldwin	movups	16(%esi),%xmm7
1085bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
1086bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
1087bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
1088bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm3
1089bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1090bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
1091bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1092bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
1093bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1094bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1095bc3d5698SJohn Baldwin.L040ctr32_ret:
1096bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1097bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1098bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
1099bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
1100bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
1101bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
1102bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
1103bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
1104bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
1105bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%esp)
1106bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
1107bc3d5698SJohn Baldwin	movl	80(%esp),%esp
1108bc3d5698SJohn Baldwin	popl	%edi
1109bc3d5698SJohn Baldwin	popl	%esi
1110bc3d5698SJohn Baldwin	popl	%ebx
1111bc3d5698SJohn Baldwin	popl	%ebp
1112bc3d5698SJohn Baldwin	ret
1113bc3d5698SJohn Baldwin.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
1114bc3d5698SJohn Baldwin.globl	aesni_xts_encrypt
1115bc3d5698SJohn Baldwin.type	aesni_xts_encrypt,@function
1116bc3d5698SJohn Baldwin.align	16
1117bc3d5698SJohn Baldwinaesni_xts_encrypt:
1118bc3d5698SJohn Baldwin.L_aesni_xts_encrypt_begin:
1119c0855eaaSJohn Baldwin	#ifdef __CET__
1120c0855eaaSJohn Baldwin
1121c0855eaaSJohn Baldwin.byte	243,15,30,251
1122c0855eaaSJohn Baldwin	#endif
1123c0855eaaSJohn Baldwin
1124bc3d5698SJohn Baldwin	pushl	%ebp
1125bc3d5698SJohn Baldwin	pushl	%ebx
1126bc3d5698SJohn Baldwin	pushl	%esi
1127bc3d5698SJohn Baldwin	pushl	%edi
1128bc3d5698SJohn Baldwin	movl	36(%esp),%edx
1129bc3d5698SJohn Baldwin	movl	40(%esp),%esi
1130bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
1131bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1132bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1133bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1134bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1135bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1136bc3d5698SJohn Baldwin.L046enc1_loop_8:
1137bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1138bc3d5698SJohn Baldwin	decl	%ecx
1139bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1140bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1141bc3d5698SJohn Baldwin	jnz	.L046enc1_loop_8
1142bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1143bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1144bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1145bc3d5698SJohn Baldwin	movl	28(%esp),%eax
1146bc3d5698SJohn Baldwin	movl	32(%esp),%edx
1147bc3d5698SJohn Baldwin	movl	%esp,%ebp
1148bc3d5698SJohn Baldwin	subl	$120,%esp
1149bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
1150bc3d5698SJohn Baldwin	andl	$-16,%esp
1151bc3d5698SJohn Baldwin	movl	$135,96(%esp)
1152bc3d5698SJohn Baldwin	movl	$0,100(%esp)
1153bc3d5698SJohn Baldwin	movl	$1,104(%esp)
1154bc3d5698SJohn Baldwin	movl	$0,108(%esp)
1155bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
1156bc3d5698SJohn Baldwin	movl	%ebp,116(%esp)
1157bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
1158bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1159bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
1160bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1161bc3d5698SJohn Baldwin	andl	$-16,%eax
1162bc3d5698SJohn Baldwin	movl	%edx,%ebp
1163bc3d5698SJohn Baldwin	movl	%ecx,%ebx
1164bc3d5698SJohn Baldwin	subl	$96,%eax
1165bc3d5698SJohn Baldwin	jc	.L047xts_enc_short
1166bc3d5698SJohn Baldwin	shll	$4,%ecx
1167bc3d5698SJohn Baldwin	movl	$16,%ebx
1168bc3d5698SJohn Baldwin	subl	%ecx,%ebx
1169bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
1170bc3d5698SJohn Baldwin	jmp	.L048xts_enc_loop6
1171bc3d5698SJohn Baldwin.align	16
1172bc3d5698SJohn Baldwin.L048xts_enc_loop6:
1173bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1174bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1175bc3d5698SJohn Baldwin	movdqa	%xmm1,(%esp)
1176bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1177bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1178bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1179bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1180bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1181bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1182bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
1183bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1184bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1185bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1186bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1187bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1188bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1189bc3d5698SJohn Baldwin	movdqa	%xmm1,32(%esp)
1190bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1191bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1192bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1193bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1194bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1195bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1196bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
1197bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1198bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1199bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1200bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1201bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
1202bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
1203bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1204bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
1205bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
1206bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1207bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1208bc3d5698SJohn Baldwin	movl	%ebx,%ecx
1209bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
1210bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1211bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
1212bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
1213bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
1214bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
1215bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
1216bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
1217bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm1
1218bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
1219bc3d5698SJohn Baldwin	leal	96(%esi),%esi
1220bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
1221bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
1222bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1223bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
1224bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
1225bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
1226bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1227bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
1228bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
1229bc3d5698SJohn Baldwin.byte	102,15,56,220,217
1230bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
1231bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
1232bc3d5698SJohn Baldwin.byte	102,15,56,220,225
1233bc3d5698SJohn Baldwin.byte	102,15,56,220,233
1234bc3d5698SJohn Baldwin.byte	102,15,56,220,241
1235bc3d5698SJohn Baldwin.byte	102,15,56,220,249
1236bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
1237bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm1
1238bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1239bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1240bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1241bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1242bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1243bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
1244bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1245bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
1246bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1247bc3d5698SJohn Baldwin	xorps	64(%esp),%xmm6
1248bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1249bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm7
1250bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
1251bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1252bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
1253bc3d5698SJohn Baldwin	leal	96(%edi),%edi
1254bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
1255bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1256bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1257bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1258bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1259bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1260bc3d5698SJohn Baldwin	subl	$96,%eax
1261bc3d5698SJohn Baldwin	jnc	.L048xts_enc_loop6
1262bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
1263bc3d5698SJohn Baldwin	movl	%ebp,%edx
1264bc3d5698SJohn Baldwin	movl	%ecx,%ebx
1265bc3d5698SJohn Baldwin.L047xts_enc_short:
1266bc3d5698SJohn Baldwin	addl	$96,%eax
1267bc3d5698SJohn Baldwin	jz	.L049xts_enc_done6x
1268bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
1269bc3d5698SJohn Baldwin	cmpl	$32,%eax
1270bc3d5698SJohn Baldwin	jb	.L050xts_enc_one
1271bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1272bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1273bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1274bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1275bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1276bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1277bc3d5698SJohn Baldwin	je	.L051xts_enc_two
1278bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1279bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1280bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
1281bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1282bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1283bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1284bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1285bc3d5698SJohn Baldwin	cmpl	$64,%eax
1286bc3d5698SJohn Baldwin	jb	.L052xts_enc_three
1287bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1288bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1289bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
1290bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1291bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1292bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1293bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1294bc3d5698SJohn Baldwin	movdqa	%xmm5,(%esp)
1295bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%esp)
1296bc3d5698SJohn Baldwin	je	.L053xts_enc_four
1297bc3d5698SJohn Baldwin	movdqa	%xmm7,32(%esp)
1298bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
1299bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
1300bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1301bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
1302bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1303bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
1304bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
1305bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
1306bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
1307bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
1308bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
1309bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
1310bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
1311bc3d5698SJohn Baldwin	leal	80(%esi),%esi
1312bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
1313bc3d5698SJohn Baldwin	movdqa	%xmm7,64(%esp)
1314bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm6
1315bc3d5698SJohn Baldwin	call	_aesni_encrypt6
1316bc3d5698SJohn Baldwin	movaps	64(%esp),%xmm1
1317bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1318bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1319bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
1320bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1321bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
1322bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1323bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm6
1324bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1325bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1326bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
1327bc3d5698SJohn Baldwin	leal	80(%edi),%edi
1328bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
1329bc3d5698SJohn Baldwin.align	16
1330bc3d5698SJohn Baldwin.L050xts_enc_one:
1331bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1332bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1333bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1334bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1335bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1336bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1337bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1338bc3d5698SJohn Baldwin.L055enc1_loop_9:
1339bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1340bc3d5698SJohn Baldwin	decl	%ecx
1341bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1342bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1343bc3d5698SJohn Baldwin	jnz	.L055enc1_loop_9
1344bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1345bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1346bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1347bc3d5698SJohn Baldwin	leal	16(%edi),%edi
1348bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
1349bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
1350bc3d5698SJohn Baldwin.align	16
1351bc3d5698SJohn Baldwin.L051xts_enc_two:
1352bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
1353bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1354bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
1355bc3d5698SJohn Baldwin	leal	32(%esi),%esi
1356bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1357bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1358bc3d5698SJohn Baldwin	call	_aesni_encrypt2
1359bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1360bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1361bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1362bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1363bc3d5698SJohn Baldwin	leal	32(%edi),%edi
1364bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
1365bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
1366bc3d5698SJohn Baldwin.align	16
1367bc3d5698SJohn Baldwin.L052xts_enc_three:
1368bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm7
1369bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1370bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
1371bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
1372bc3d5698SJohn Baldwin	leal	48(%esi),%esi
1373bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1374bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1375bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1376bc3d5698SJohn Baldwin	call	_aesni_encrypt3
1377bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1378bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1379bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1380bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1381bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1382bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1383bc3d5698SJohn Baldwin	leal	48(%edi),%edi
1384bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
1385bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
1386bc3d5698SJohn Baldwin.align	16
1387bc3d5698SJohn Baldwin.L053xts_enc_four:
1388bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
1389bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1390bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
1391bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
1392bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1393bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
1394bc3d5698SJohn Baldwin	leal	64(%esi),%esi
1395bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1396bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1397bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
1398bc3d5698SJohn Baldwin	call	_aesni_encrypt4
1399bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1400bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1401bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1402bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1403bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
1404bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1405bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1406bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1407bc3d5698SJohn Baldwin	leal	64(%edi),%edi
1408bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
1409bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
1410bc3d5698SJohn Baldwin.align	16
1411bc3d5698SJohn Baldwin.L049xts_enc_done6x:
1412bc3d5698SJohn Baldwin	movl	112(%esp),%eax
1413bc3d5698SJohn Baldwin	andl	$15,%eax
1414bc3d5698SJohn Baldwin	jz	.L056xts_enc_ret
1415bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
1416bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
1417bc3d5698SJohn Baldwin	jmp	.L057xts_enc_steal
1418bc3d5698SJohn Baldwin.align	16
1419bc3d5698SJohn Baldwin.L054xts_enc_done:
1420bc3d5698SJohn Baldwin	movl	112(%esp),%eax
1421bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1422bc3d5698SJohn Baldwin	andl	$15,%eax
1423bc3d5698SJohn Baldwin	jz	.L056xts_enc_ret
1424bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1425bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
1426bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm5
1427bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1428bc3d5698SJohn Baldwin	pand	96(%esp),%xmm5
1429bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm5
1430bc3d5698SJohn Baldwin.L057xts_enc_steal:
1431bc3d5698SJohn Baldwin	movzbl	(%esi),%ecx
1432bc3d5698SJohn Baldwin	movzbl	-16(%edi),%edx
1433bc3d5698SJohn Baldwin	leal	1(%esi),%esi
1434bc3d5698SJohn Baldwin	movb	%cl,-16(%edi)
1435bc3d5698SJohn Baldwin	movb	%dl,(%edi)
1436bc3d5698SJohn Baldwin	leal	1(%edi),%edi
1437bc3d5698SJohn Baldwin	subl	$1,%eax
1438bc3d5698SJohn Baldwin	jnz	.L057xts_enc_steal
1439bc3d5698SJohn Baldwin	subl	112(%esp),%edi
1440bc3d5698SJohn Baldwin	movl	%ebp,%edx
1441bc3d5698SJohn Baldwin	movl	%ebx,%ecx
1442bc3d5698SJohn Baldwin	movups	-16(%edi),%xmm2
1443bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1444bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1445bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1446bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1447bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1448bc3d5698SJohn Baldwin.L058enc1_loop_10:
1449bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1450bc3d5698SJohn Baldwin	decl	%ecx
1451bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1452bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1453bc3d5698SJohn Baldwin	jnz	.L058enc1_loop_10
1454bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1455bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1456bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edi)
1457bc3d5698SJohn Baldwin.L056xts_enc_ret:
1458bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1459bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1460bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
1461bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
1462bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
1463bc3d5698SJohn Baldwin	movdqa	%xmm0,16(%esp)
1464bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
1465bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
1466bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
1467bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
1468bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
1469bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%esp)
1470bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
1471bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
1472bc3d5698SJohn Baldwin	movl	116(%esp),%esp
1473bc3d5698SJohn Baldwin	popl	%edi
1474bc3d5698SJohn Baldwin	popl	%esi
1475bc3d5698SJohn Baldwin	popl	%ebx
1476bc3d5698SJohn Baldwin	popl	%ebp
1477bc3d5698SJohn Baldwin	ret
1478bc3d5698SJohn Baldwin.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
1479bc3d5698SJohn Baldwin.globl	aesni_xts_decrypt
1480bc3d5698SJohn Baldwin.type	aesni_xts_decrypt,@function
1481bc3d5698SJohn Baldwin.align	16
1482bc3d5698SJohn Baldwinaesni_xts_decrypt:
1483bc3d5698SJohn Baldwin.L_aesni_xts_decrypt_begin:
1484c0855eaaSJohn Baldwin	#ifdef __CET__
1485c0855eaaSJohn Baldwin
1486c0855eaaSJohn Baldwin.byte	243,15,30,251
1487c0855eaaSJohn Baldwin	#endif
1488c0855eaaSJohn Baldwin
1489bc3d5698SJohn Baldwin	pushl	%ebp
1490bc3d5698SJohn Baldwin	pushl	%ebx
1491bc3d5698SJohn Baldwin	pushl	%esi
1492bc3d5698SJohn Baldwin	pushl	%edi
1493bc3d5698SJohn Baldwin	movl	36(%esp),%edx
1494bc3d5698SJohn Baldwin	movl	40(%esp),%esi
1495bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
1496bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1497bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1498bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1499bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1500bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1501bc3d5698SJohn Baldwin.L059enc1_loop_11:
1502bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1503bc3d5698SJohn Baldwin	decl	%ecx
1504bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1505bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1506bc3d5698SJohn Baldwin	jnz	.L059enc1_loop_11
1507bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1508bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1509bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1510bc3d5698SJohn Baldwin	movl	28(%esp),%eax
1511bc3d5698SJohn Baldwin	movl	32(%esp),%edx
1512bc3d5698SJohn Baldwin	movl	%esp,%ebp
1513bc3d5698SJohn Baldwin	subl	$120,%esp
1514bc3d5698SJohn Baldwin	andl	$-16,%esp
1515bc3d5698SJohn Baldwin	xorl	%ebx,%ebx
1516bc3d5698SJohn Baldwin	testl	$15,%eax
1517bc3d5698SJohn Baldwin	setnz	%bl
1518bc3d5698SJohn Baldwin	shll	$4,%ebx
1519bc3d5698SJohn Baldwin	subl	%ebx,%eax
1520bc3d5698SJohn Baldwin	movl	$135,96(%esp)
1521bc3d5698SJohn Baldwin	movl	$0,100(%esp)
1522bc3d5698SJohn Baldwin	movl	$1,104(%esp)
1523bc3d5698SJohn Baldwin	movl	$0,108(%esp)
1524bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
1525bc3d5698SJohn Baldwin	movl	%ebp,116(%esp)
1526bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
1527bc3d5698SJohn Baldwin	movl	%edx,%ebp
1528bc3d5698SJohn Baldwin	movl	%ecx,%ebx
1529bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
1530bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1531bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
1532bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1533bc3d5698SJohn Baldwin	andl	$-16,%eax
1534bc3d5698SJohn Baldwin	subl	$96,%eax
1535bc3d5698SJohn Baldwin	jc	.L060xts_dec_short
1536bc3d5698SJohn Baldwin	shll	$4,%ecx
1537bc3d5698SJohn Baldwin	movl	$16,%ebx
1538bc3d5698SJohn Baldwin	subl	%ecx,%ebx
1539bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
1540bc3d5698SJohn Baldwin	jmp	.L061xts_dec_loop6
1541bc3d5698SJohn Baldwin.align	16
1542bc3d5698SJohn Baldwin.L061xts_dec_loop6:
1543bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1544bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1545bc3d5698SJohn Baldwin	movdqa	%xmm1,(%esp)
1546bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1547bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1548bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1549bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1550bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1551bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1552bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
1553bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1554bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1555bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1556bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1557bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1558bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1559bc3d5698SJohn Baldwin	movdqa	%xmm1,32(%esp)
1560bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1561bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1562bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1563bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1564bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1565bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1566bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
1567bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1568bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1569bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1570bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1571bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
1572bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
1573bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1574bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
1575bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
1576bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1577bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1578bc3d5698SJohn Baldwin	movl	%ebx,%ecx
1579bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
1580bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1581bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
1582bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
1583bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
1584bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
1585bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
1586bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
1587bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm1
1588bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
1589bc3d5698SJohn Baldwin	leal	96(%esi),%esi
1590bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
1591bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
1592bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1593bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
1594bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
1595bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
1596bc3d5698SJohn Baldwin.byte	102,15,56,222,209
1597bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
1598bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
1599bc3d5698SJohn Baldwin.byte	102,15,56,222,217
1600bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
1601bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
1602bc3d5698SJohn Baldwin.byte	102,15,56,222,225
1603bc3d5698SJohn Baldwin.byte	102,15,56,222,233
1604bc3d5698SJohn Baldwin.byte	102,15,56,222,241
1605bc3d5698SJohn Baldwin.byte	102,15,56,222,249
1606bc3d5698SJohn Baldwin	call	.L_aesni_decrypt6_enter
1607bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm1
1608bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1609bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1610bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1611bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1612bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1613bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
1614bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1615bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
1616bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1617bc3d5698SJohn Baldwin	xorps	64(%esp),%xmm6
1618bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1619bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm7
1620bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
1621bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1622bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
1623bc3d5698SJohn Baldwin	leal	96(%edi),%edi
1624bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
1625bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1626bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1627bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1628bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1629bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1630bc3d5698SJohn Baldwin	subl	$96,%eax
1631bc3d5698SJohn Baldwin	jnc	.L061xts_dec_loop6
1632bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
1633bc3d5698SJohn Baldwin	movl	%ebp,%edx
1634bc3d5698SJohn Baldwin	movl	%ecx,%ebx
1635bc3d5698SJohn Baldwin.L060xts_dec_short:
1636bc3d5698SJohn Baldwin	addl	$96,%eax
1637bc3d5698SJohn Baldwin	jz	.L062xts_dec_done6x
1638bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
1639bc3d5698SJohn Baldwin	cmpl	$32,%eax
1640bc3d5698SJohn Baldwin	jb	.L063xts_dec_one
1641bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1642bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1643bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1644bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1645bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1646bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1647bc3d5698SJohn Baldwin	je	.L064xts_dec_two
1648bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1649bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1650bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
1651bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1652bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1653bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1654bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1655bc3d5698SJohn Baldwin	cmpl	$64,%eax
1656bc3d5698SJohn Baldwin	jb	.L065xts_dec_three
1657bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1658bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1659bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
1660bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1661bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1662bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1663bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1664bc3d5698SJohn Baldwin	movdqa	%xmm5,(%esp)
1665bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%esp)
1666bc3d5698SJohn Baldwin	je	.L066xts_dec_four
1667bc3d5698SJohn Baldwin	movdqa	%xmm7,32(%esp)
1668bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
1669bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
1670bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1671bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
1672bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
1673bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
1674bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
1675bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
1676bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
1677bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
1678bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
1679bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
1680bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
1681bc3d5698SJohn Baldwin	leal	80(%esi),%esi
1682bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
1683bc3d5698SJohn Baldwin	movdqa	%xmm7,64(%esp)
1684bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm6
1685bc3d5698SJohn Baldwin	call	_aesni_decrypt6
1686bc3d5698SJohn Baldwin	movaps	64(%esp),%xmm1
1687bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1688bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1689bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
1690bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1691bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
1692bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1693bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm6
1694bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1695bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1696bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
1697bc3d5698SJohn Baldwin	leal	80(%edi),%edi
1698bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
1699bc3d5698SJohn Baldwin.align	16
1700bc3d5698SJohn Baldwin.L063xts_dec_one:
1701bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1702bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1703bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1704bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1705bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1706bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1707bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1708bc3d5698SJohn Baldwin.L068dec1_loop_12:
1709bc3d5698SJohn Baldwin.byte	102,15,56,222,209
1710bc3d5698SJohn Baldwin	decl	%ecx
1711bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1712bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1713bc3d5698SJohn Baldwin	jnz	.L068dec1_loop_12
1714bc3d5698SJohn Baldwin.byte	102,15,56,223,209
1715bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1716bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1717bc3d5698SJohn Baldwin	leal	16(%edi),%edi
1718bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
1719bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
1720bc3d5698SJohn Baldwin.align	16
1721bc3d5698SJohn Baldwin.L064xts_dec_two:
1722bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
1723bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1724bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
1725bc3d5698SJohn Baldwin	leal	32(%esi),%esi
1726bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1727bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1728bc3d5698SJohn Baldwin	call	_aesni_decrypt2
1729bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1730bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1731bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1732bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1733bc3d5698SJohn Baldwin	leal	32(%edi),%edi
1734bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
1735bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
1736bc3d5698SJohn Baldwin.align	16
1737bc3d5698SJohn Baldwin.L065xts_dec_three:
1738bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm7
1739bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1740bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
1741bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
1742bc3d5698SJohn Baldwin	leal	48(%esi),%esi
1743bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1744bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1745bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1746bc3d5698SJohn Baldwin	call	_aesni_decrypt3
1747bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1748bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
1749bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1750bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1751bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1752bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1753bc3d5698SJohn Baldwin	leal	48(%edi),%edi
1754bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
1755bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
1756bc3d5698SJohn Baldwin.align	16
1757bc3d5698SJohn Baldwin.L066xts_dec_four:
1758bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
1759bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1760bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
1761bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
1762bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1763bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
1764bc3d5698SJohn Baldwin	leal	64(%esi),%esi
1765bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1766bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1767bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
1768bc3d5698SJohn Baldwin	call	_aesni_decrypt4
1769bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
1770bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
1771bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
1772bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1773bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
1774bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
1775bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
1776bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
1777bc3d5698SJohn Baldwin	leal	64(%edi),%edi
1778bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
1779bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
1780bc3d5698SJohn Baldwin.align	16
1781bc3d5698SJohn Baldwin.L062xts_dec_done6x:
1782bc3d5698SJohn Baldwin	movl	112(%esp),%eax
1783bc3d5698SJohn Baldwin	andl	$15,%eax
1784bc3d5698SJohn Baldwin	jz	.L069xts_dec_ret
1785bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
1786bc3d5698SJohn Baldwin	jmp	.L070xts_dec_only_one_more
1787bc3d5698SJohn Baldwin.align	16
1788bc3d5698SJohn Baldwin.L067xts_dec_done:
1789bc3d5698SJohn Baldwin	movl	112(%esp),%eax
1790bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1791bc3d5698SJohn Baldwin	andl	$15,%eax
1792bc3d5698SJohn Baldwin	jz	.L069xts_dec_ret
1793bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1794bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
1795bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
1796bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1797bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
1798bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1799bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
1800bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
1801bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1802bc3d5698SJohn Baldwin.L070xts_dec_only_one_more:
1803bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm5
1804bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
1805bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
1806bc3d5698SJohn Baldwin	pand	%xmm3,%xmm5
1807bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm5
1808bc3d5698SJohn Baldwin	movl	%ebp,%edx
1809bc3d5698SJohn Baldwin	movl	%ebx,%ecx
1810bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
1811bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1812bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1813bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1814bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1815bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1816bc3d5698SJohn Baldwin.L071dec1_loop_13:
1817bc3d5698SJohn Baldwin.byte	102,15,56,222,209
1818bc3d5698SJohn Baldwin	decl	%ecx
1819bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1820bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1821bc3d5698SJohn Baldwin	jnz	.L071dec1_loop_13
1822bc3d5698SJohn Baldwin.byte	102,15,56,223,209
1823bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
1824bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1825bc3d5698SJohn Baldwin.L072xts_dec_steal:
1826bc3d5698SJohn Baldwin	movzbl	16(%esi),%ecx
1827bc3d5698SJohn Baldwin	movzbl	(%edi),%edx
1828bc3d5698SJohn Baldwin	leal	1(%esi),%esi
1829bc3d5698SJohn Baldwin	movb	%cl,(%edi)
1830bc3d5698SJohn Baldwin	movb	%dl,16(%edi)
1831bc3d5698SJohn Baldwin	leal	1(%edi),%edi
1832bc3d5698SJohn Baldwin	subl	$1,%eax
1833bc3d5698SJohn Baldwin	jnz	.L072xts_dec_steal
1834bc3d5698SJohn Baldwin	subl	112(%esp),%edi
1835bc3d5698SJohn Baldwin	movl	%ebp,%edx
1836bc3d5698SJohn Baldwin	movl	%ebx,%ecx
1837bc3d5698SJohn Baldwin	movups	(%edi),%xmm2
1838bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
1839bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1840bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1841bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1842bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1843bc3d5698SJohn Baldwin.L073dec1_loop_14:
1844bc3d5698SJohn Baldwin.byte	102,15,56,222,209
1845bc3d5698SJohn Baldwin	decl	%ecx
1846bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1847bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1848bc3d5698SJohn Baldwin	jnz	.L073dec1_loop_14
1849bc3d5698SJohn Baldwin.byte	102,15,56,223,209
1850bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
1851bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
1852bc3d5698SJohn Baldwin.L069xts_dec_ret:
1853bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1854bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
1855bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
1856bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
1857bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
1858bc3d5698SJohn Baldwin	movdqa	%xmm0,16(%esp)
1859bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
1860bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
1861bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
1862bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
1863bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
1864bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%esp)
1865bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
1866bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
1867bc3d5698SJohn Baldwin	movl	116(%esp),%esp
1868bc3d5698SJohn Baldwin	popl	%edi
1869bc3d5698SJohn Baldwin	popl	%esi
1870bc3d5698SJohn Baldwin	popl	%ebx
1871bc3d5698SJohn Baldwin	popl	%ebp
1872bc3d5698SJohn Baldwin	ret
1873bc3d5698SJohn Baldwin.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
1874bc3d5698SJohn Baldwin.globl	aesni_ocb_encrypt
1875bc3d5698SJohn Baldwin.type	aesni_ocb_encrypt,@function
1876bc3d5698SJohn Baldwin.align	16
1877bc3d5698SJohn Baldwinaesni_ocb_encrypt:
1878bc3d5698SJohn Baldwin.L_aesni_ocb_encrypt_begin:
1879c0855eaaSJohn Baldwin	#ifdef __CET__
1880c0855eaaSJohn Baldwin
1881c0855eaaSJohn Baldwin.byte	243,15,30,251
1882c0855eaaSJohn Baldwin	#endif
1883c0855eaaSJohn Baldwin
1884bc3d5698SJohn Baldwin	pushl	%ebp
1885bc3d5698SJohn Baldwin	pushl	%ebx
1886bc3d5698SJohn Baldwin	pushl	%esi
1887bc3d5698SJohn Baldwin	pushl	%edi
1888bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
1889bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
1890bc3d5698SJohn Baldwin	movl	20(%esp),%esi
1891bc3d5698SJohn Baldwin	movl	24(%esp),%edi
1892bc3d5698SJohn Baldwin	movl	28(%esp),%eax
1893bc3d5698SJohn Baldwin	movl	32(%esp),%edx
1894bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm0
1895bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
1896bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm1
1897bc3d5698SJohn Baldwin	movl	44(%esp),%ebx
1898bc3d5698SJohn Baldwin	movl	%esp,%ecx
1899bc3d5698SJohn Baldwin	subl	$132,%esp
1900bc3d5698SJohn Baldwin	andl	$-16,%esp
1901bc3d5698SJohn Baldwin	subl	%esi,%edi
1902bc3d5698SJohn Baldwin	shll	$4,%eax
1903bc3d5698SJohn Baldwin	leal	-96(%esi,%eax,1),%eax
1904bc3d5698SJohn Baldwin	movl	%edi,120(%esp)
1905bc3d5698SJohn Baldwin	movl	%eax,124(%esp)
1906bc3d5698SJohn Baldwin	movl	%ecx,128(%esp)
1907bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
1908bc3d5698SJohn Baldwin	testl	$1,%ebp
1909bc3d5698SJohn Baldwin	jnz	.L074odd
1910bc3d5698SJohn Baldwin	bsfl	%ebp,%eax
1911bc3d5698SJohn Baldwin	addl	$1,%ebp
1912bc3d5698SJohn Baldwin	shll	$4,%eax
1913bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
1914bc3d5698SJohn Baldwin	movl	%edx,%eax
1915bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
1916bc3d5698SJohn Baldwin	leal	16(%esi),%esi
1917bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
1918bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1919bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
1920bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
1921bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
1922bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
1923bc3d5698SJohn Baldwin	leal	32(%edx),%edx
1924bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
1925bc3d5698SJohn Baldwin.L075enc1_loop_15:
1926bc3d5698SJohn Baldwin.byte	102,15,56,220,209
1927bc3d5698SJohn Baldwin	decl	%ecx
1928bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
1929bc3d5698SJohn Baldwin	leal	16(%edx),%edx
1930bc3d5698SJohn Baldwin	jnz	.L075enc1_loop_15
1931bc3d5698SJohn Baldwin.byte	102,15,56,221,209
1932bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
1933bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
1934bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
1935bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edi,%esi,1)
1936bc3d5698SJohn Baldwin	movl	240(%eax),%ecx
1937bc3d5698SJohn Baldwin	movl	%eax,%edx
1938bc3d5698SJohn Baldwin	movl	124(%esp),%eax
1939bc3d5698SJohn Baldwin.L074odd:
1940bc3d5698SJohn Baldwin	shll	$4,%ecx
1941bc3d5698SJohn Baldwin	movl	$16,%edi
1942bc3d5698SJohn Baldwin	subl	%ecx,%edi
1943bc3d5698SJohn Baldwin	movl	%edx,112(%esp)
1944bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
1945bc3d5698SJohn Baldwin	movl	%edi,116(%esp)
1946bc3d5698SJohn Baldwin	cmpl	%eax,%esi
1947bc3d5698SJohn Baldwin	ja	.L076short
1948bc3d5698SJohn Baldwin	jmp	.L077grandloop
1949bc3d5698SJohn Baldwin.align	32
1950bc3d5698SJohn Baldwin.L077grandloop:
1951bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
1952bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
1953bc3d5698SJohn Baldwin	leal	5(%ebp),%edi
1954bc3d5698SJohn Baldwin	addl	$6,%ebp
1955bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
1956bc3d5698SJohn Baldwin	bsfl	%eax,%eax
1957bc3d5698SJohn Baldwin	bsfl	%edi,%edi
1958bc3d5698SJohn Baldwin	shll	$4,%ecx
1959bc3d5698SJohn Baldwin	shll	$4,%eax
1960bc3d5698SJohn Baldwin	shll	$4,%edi
1961bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
1962bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
1963bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
1964bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
1965bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
1966bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
1967bc3d5698SJohn Baldwin	movdqu	(%ebx,%edi,1),%xmm7
1968bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
1969bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
1970bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
1971bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
1972bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
1973bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
1974bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
1975bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
1976bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
1977bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
1978bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
1979bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
1980bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
1981bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
1982bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
1983bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
1984bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
1985bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
1986bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
1987bc3d5698SJohn Baldwin	leal	96(%esi),%esi
1988bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
1989bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
1990bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
1991bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
1992bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
1993bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
1994bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
1995bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
1996bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
1997bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
1998bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm1
1999bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
2000bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2001bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
2002bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2003bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2004bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2005bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2006bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
2007bc3d5698SJohn Baldwin	pxor	80(%esp),%xmm7
2008bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
2009bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2010bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2011bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2012bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2013bc3d5698SJohn Baldwin.byte	102,15,56,220,241
2014bc3d5698SJohn Baldwin.byte	102,15,56,220,249
2015bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2016bc3d5698SJohn Baldwin	movl	124(%esp),%eax
2017bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
2018bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm0
2019bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2020bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2021bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2022bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2023bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
2024bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
2025bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2026bc3d5698SJohn Baldwin	movdqu	%xmm2,-96(%edi,%esi,1)
2027bc3d5698SJohn Baldwin	movdqu	%xmm3,-80(%edi,%esi,1)
2028bc3d5698SJohn Baldwin	movdqu	%xmm4,-64(%edi,%esi,1)
2029bc3d5698SJohn Baldwin	movdqu	%xmm5,-48(%edi,%esi,1)
2030bc3d5698SJohn Baldwin	movdqu	%xmm6,-32(%edi,%esi,1)
2031bc3d5698SJohn Baldwin	movdqu	%xmm7,-16(%edi,%esi,1)
2032bc3d5698SJohn Baldwin	cmpl	%eax,%esi
20339576bca5SJung-uk Kim	jbe	.L077grandloop
2034bc3d5698SJohn Baldwin.L076short:
2035bc3d5698SJohn Baldwin	addl	$96,%eax
2036bc3d5698SJohn Baldwin	subl	%esi,%eax
2037bc3d5698SJohn Baldwin	jz	.L078done
2038bc3d5698SJohn Baldwin	cmpl	$32,%eax
2039bc3d5698SJohn Baldwin	jb	.L079one
2040bc3d5698SJohn Baldwin	je	.L080two
2041bc3d5698SJohn Baldwin	cmpl	$64,%eax
2042bc3d5698SJohn Baldwin	jb	.L081three
2043bc3d5698SJohn Baldwin	je	.L082four
2044bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2045bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
2046bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2047bc3d5698SJohn Baldwin	bsfl	%eax,%eax
2048bc3d5698SJohn Baldwin	shll	$4,%ecx
2049bc3d5698SJohn Baldwin	shll	$4,%eax
2050bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
2051bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
2052bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
2053bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
2054bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
2055bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
2056bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
2057bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
2058bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
2059bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
2060bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
2061bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
2062bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
2063bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
2064bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
2065bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2066bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
2067bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
2068bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2069bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2070bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2071bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
2072bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
2073bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2074bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2075bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
2076bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2077bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
2078bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
2079bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
2080bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
2081bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
2082bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
2083bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
2084bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2085bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
2086bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2087bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2088bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2089bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2090bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
2091bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
2092bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2093bc3d5698SJohn Baldwin.byte	102,15,56,220,217
2094bc3d5698SJohn Baldwin.byte	102,15,56,220,225
2095bc3d5698SJohn Baldwin.byte	102,15,56,220,233
2096bc3d5698SJohn Baldwin.byte	102,15,56,220,241
2097bc3d5698SJohn Baldwin.byte	102,15,56,220,249
2098bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2099bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
2100bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm0
2101bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2102bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2103bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2104bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2105bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
2106bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2107bc3d5698SJohn Baldwin	movdqu	%xmm2,(%edi,%esi,1)
2108bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%edi,%esi,1)
2109bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%edi,%esi,1)
2110bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%edi,%esi,1)
2111bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%edi,%esi,1)
2112bc3d5698SJohn Baldwin	jmp	.L078done
2113bc3d5698SJohn Baldwin.align	16
2114bc3d5698SJohn Baldwin.L079one:
2115bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
2116bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2117bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2118bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2119bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
2120bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2121bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
2122bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
2123bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2124bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
2125bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
2126bc3d5698SJohn Baldwin	leal	32(%edx),%edx
2127bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2128bc3d5698SJohn Baldwin.L083enc1_loop_16:
2129bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2130bc3d5698SJohn Baldwin	decl	%ecx
2131bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
2132bc3d5698SJohn Baldwin	leal	16(%edx),%edx
2133bc3d5698SJohn Baldwin	jnz	.L083enc1_loop_16
2134bc3d5698SJohn Baldwin.byte	102,15,56,221,209
2135bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2136bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2137bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
2138bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2139bc3d5698SJohn Baldwin	jmp	.L078done
2140bc3d5698SJohn Baldwin.align	16
2141bc3d5698SJohn Baldwin.L080two:
2142bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2143bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2144bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2145bc3d5698SJohn Baldwin	shll	$4,%ecx
2146bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm6
2147bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm7
2148bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2149bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2150bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2151bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
2152bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2153bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2154bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm2
2155bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2156bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm3
2157bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
2158bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2159bc3d5698SJohn Baldwin	call	_aesni_encrypt2
2160bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
2161bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm3
2162bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2163bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
2164bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2165bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
2166bc3d5698SJohn Baldwin	jmp	.L078done
2167bc3d5698SJohn Baldwin.align	16
2168bc3d5698SJohn Baldwin.L081three:
2169bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2170bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2171bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2172bc3d5698SJohn Baldwin	shll	$4,%ecx
2173bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm5
2174bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm6
2175bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm7
2176bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2177bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2178bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2179bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2180bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
2181bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
2182bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2183bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2184bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
2185bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2186bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm3
2187bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
2188bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm4
2189bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2190bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2191bc3d5698SJohn Baldwin	call	_aesni_encrypt3
2192bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
2193bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
2194bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
2195bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2196bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2197bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2198bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
2199bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
2200bc3d5698SJohn Baldwin	jmp	.L078done
2201bc3d5698SJohn Baldwin.align	16
2202bc3d5698SJohn Baldwin.L082four:
2203bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2204bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
2205bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2206bc3d5698SJohn Baldwin	bsfl	%eax,%eax
2207bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2208bc3d5698SJohn Baldwin	shll	$4,%ecx
2209bc3d5698SJohn Baldwin	shll	$4,%eax
2210bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm4
2211bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm5
2212bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm6
2213bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
2214bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
2215bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2216bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
2217bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2218bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
2219bc3d5698SJohn Baldwin	movdqa	%xmm4,(%esp)
2220bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2221bc3d5698SJohn Baldwin	movdqa	%xmm5,16(%esp)
2222bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2223bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
2224bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2225bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2226bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2227bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2228bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2229bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
2230bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm4
2231bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
2232bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm5
2233bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2234bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2235bc3d5698SJohn Baldwin	call	_aesni_encrypt4
2236bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
2237bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
2238bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm4
2239bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2240bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm5
2241bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
2242bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2243bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
2244bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2245bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi,%esi,1)
2246bc3d5698SJohn Baldwin.L078done:
2247bc3d5698SJohn Baldwin	movl	128(%esp),%edx
2248bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
2249bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2250bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
2251bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2252bc3d5698SJohn Baldwin	movdqa	%xmm2,16(%esp)
2253bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2254bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
2255bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
2256bc3d5698SJohn Baldwin	movdqa	%xmm2,48(%esp)
2257bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2258bc3d5698SJohn Baldwin	movdqa	%xmm2,64(%esp)
2259bc3d5698SJohn Baldwin	movdqa	%xmm2,80(%esp)
2260bc3d5698SJohn Baldwin	movdqa	%xmm2,96(%esp)
2261bc3d5698SJohn Baldwin	leal	(%edx),%esp
2262bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
2263bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
2264bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ecx)
2265bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
2266bc3d5698SJohn Baldwin	movdqu	%xmm1,(%ebx)
2267bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
2268bc3d5698SJohn Baldwin	popl	%edi
2269bc3d5698SJohn Baldwin	popl	%esi
2270bc3d5698SJohn Baldwin	popl	%ebx
2271bc3d5698SJohn Baldwin	popl	%ebp
2272bc3d5698SJohn Baldwin	ret
2273bc3d5698SJohn Baldwin.size	aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin
2274bc3d5698SJohn Baldwin.globl	aesni_ocb_decrypt
2275bc3d5698SJohn Baldwin.type	aesni_ocb_decrypt,@function
2276bc3d5698SJohn Baldwin.align	16
2277bc3d5698SJohn Baldwinaesni_ocb_decrypt:
2278bc3d5698SJohn Baldwin.L_aesni_ocb_decrypt_begin:
2279c0855eaaSJohn Baldwin	#ifdef __CET__
2280c0855eaaSJohn Baldwin
2281c0855eaaSJohn Baldwin.byte	243,15,30,251
2282c0855eaaSJohn Baldwin	#endif
2283c0855eaaSJohn Baldwin
2284bc3d5698SJohn Baldwin	pushl	%ebp
2285bc3d5698SJohn Baldwin	pushl	%ebx
2286bc3d5698SJohn Baldwin	pushl	%esi
2287bc3d5698SJohn Baldwin	pushl	%edi
2288bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
2289bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
2290bc3d5698SJohn Baldwin	movl	20(%esp),%esi
2291bc3d5698SJohn Baldwin	movl	24(%esp),%edi
2292bc3d5698SJohn Baldwin	movl	28(%esp),%eax
2293bc3d5698SJohn Baldwin	movl	32(%esp),%edx
2294bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm0
2295bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
2296bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm1
2297bc3d5698SJohn Baldwin	movl	44(%esp),%ebx
2298bc3d5698SJohn Baldwin	movl	%esp,%ecx
2299bc3d5698SJohn Baldwin	subl	$132,%esp
2300bc3d5698SJohn Baldwin	andl	$-16,%esp
2301bc3d5698SJohn Baldwin	subl	%esi,%edi
2302bc3d5698SJohn Baldwin	shll	$4,%eax
2303bc3d5698SJohn Baldwin	leal	-96(%esi,%eax,1),%eax
2304bc3d5698SJohn Baldwin	movl	%edi,120(%esp)
2305bc3d5698SJohn Baldwin	movl	%eax,124(%esp)
2306bc3d5698SJohn Baldwin	movl	%ecx,128(%esp)
2307bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2308bc3d5698SJohn Baldwin	testl	$1,%ebp
2309bc3d5698SJohn Baldwin	jnz	.L084odd
2310bc3d5698SJohn Baldwin	bsfl	%ebp,%eax
2311bc3d5698SJohn Baldwin	addl	$1,%ebp
2312bc3d5698SJohn Baldwin	shll	$4,%eax
2313bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
2314bc3d5698SJohn Baldwin	movl	%edx,%eax
2315bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2316bc3d5698SJohn Baldwin	leal	16(%esi),%esi
2317bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
2318bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
2319bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
2320bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
2321bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
2322bc3d5698SJohn Baldwin	leal	32(%edx),%edx
2323bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2324bc3d5698SJohn Baldwin.L085dec1_loop_17:
2325bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2326bc3d5698SJohn Baldwin	decl	%ecx
2327bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
2328bc3d5698SJohn Baldwin	leal	16(%edx),%edx
2329bc3d5698SJohn Baldwin	jnz	.L085dec1_loop_17
2330bc3d5698SJohn Baldwin.byte	102,15,56,223,209
2331bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2332bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm1
2333bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2334bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm1
2335bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edi,%esi,1)
2336bc3d5698SJohn Baldwin	movl	240(%eax),%ecx
2337bc3d5698SJohn Baldwin	movl	%eax,%edx
2338bc3d5698SJohn Baldwin	movl	124(%esp),%eax
2339bc3d5698SJohn Baldwin.L084odd:
2340bc3d5698SJohn Baldwin	shll	$4,%ecx
2341bc3d5698SJohn Baldwin	movl	$16,%edi
2342bc3d5698SJohn Baldwin	subl	%ecx,%edi
2343bc3d5698SJohn Baldwin	movl	%edx,112(%esp)
2344bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
2345bc3d5698SJohn Baldwin	movl	%edi,116(%esp)
2346bc3d5698SJohn Baldwin	cmpl	%eax,%esi
2347bc3d5698SJohn Baldwin	ja	.L086short
2348bc3d5698SJohn Baldwin	jmp	.L087grandloop
2349bc3d5698SJohn Baldwin.align	32
2350bc3d5698SJohn Baldwin.L087grandloop:
2351bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2352bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
2353bc3d5698SJohn Baldwin	leal	5(%ebp),%edi
2354bc3d5698SJohn Baldwin	addl	$6,%ebp
2355bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2356bc3d5698SJohn Baldwin	bsfl	%eax,%eax
2357bc3d5698SJohn Baldwin	bsfl	%edi,%edi
2358bc3d5698SJohn Baldwin	shll	$4,%ecx
2359bc3d5698SJohn Baldwin	shll	$4,%eax
2360bc3d5698SJohn Baldwin	shll	$4,%edi
2361bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
2362bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
2363bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
2364bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
2365bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
2366bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
2367bc3d5698SJohn Baldwin	movdqu	(%ebx,%edi,1),%xmm7
2368bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
2369bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
2370bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
2371bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
2372bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
2373bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
2374bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
2375bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
2376bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
2377bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2378bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
2379bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
2380bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
2381bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2382bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2383bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2384bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
2385bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
2386bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
2387bc3d5698SJohn Baldwin	leal	96(%esi),%esi
2388bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2389bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
2390bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
2391bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
2392bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
2393bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
2394bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
2395bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
2396bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2397bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2398bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2399bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2400bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
2401bc3d5698SJohn Baldwin	pxor	80(%esp),%xmm7
2402bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
2403bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2404bc3d5698SJohn Baldwin.byte	102,15,56,222,217
2405bc3d5698SJohn Baldwin.byte	102,15,56,222,225
2406bc3d5698SJohn Baldwin.byte	102,15,56,222,233
2407bc3d5698SJohn Baldwin.byte	102,15,56,222,241
2408bc3d5698SJohn Baldwin.byte	102,15,56,222,249
2409bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2410bc3d5698SJohn Baldwin	movl	124(%esp),%eax
2411bc3d5698SJohn Baldwin	call	.L_aesni_decrypt6_enter
2412bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm0
2413bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2414bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2415bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2416bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2417bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2418bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
2419bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
2420bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2421bc3d5698SJohn Baldwin	movdqu	%xmm2,-96(%edi,%esi,1)
2422bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2423bc3d5698SJohn Baldwin	movdqu	%xmm3,-80(%edi,%esi,1)
2424bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
2425bc3d5698SJohn Baldwin	movdqu	%xmm4,-64(%edi,%esi,1)
2426bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
2427bc3d5698SJohn Baldwin	movdqu	%xmm5,-48(%edi,%esi,1)
2428bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
2429bc3d5698SJohn Baldwin	movdqu	%xmm6,-32(%edi,%esi,1)
2430bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm1
2431bc3d5698SJohn Baldwin	movdqu	%xmm7,-16(%edi,%esi,1)
2432bc3d5698SJohn Baldwin	cmpl	%eax,%esi
24339576bca5SJung-uk Kim	jbe	.L087grandloop
2434bc3d5698SJohn Baldwin.L086short:
2435bc3d5698SJohn Baldwin	addl	$96,%eax
2436bc3d5698SJohn Baldwin	subl	%esi,%eax
2437bc3d5698SJohn Baldwin	jz	.L088done
2438bc3d5698SJohn Baldwin	cmpl	$32,%eax
2439bc3d5698SJohn Baldwin	jb	.L089one
2440bc3d5698SJohn Baldwin	je	.L090two
2441bc3d5698SJohn Baldwin	cmpl	$64,%eax
2442bc3d5698SJohn Baldwin	jb	.L091three
2443bc3d5698SJohn Baldwin	je	.L092four
2444bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2445bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
2446bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2447bc3d5698SJohn Baldwin	bsfl	%eax,%eax
2448bc3d5698SJohn Baldwin	shll	$4,%ecx
2449bc3d5698SJohn Baldwin	shll	$4,%eax
2450bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
2451bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
2452bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
2453bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
2454bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
2455bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
2456bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
2457bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
2458bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
2459bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
2460bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
2461bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
2462bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
2463bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
2464bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
2465bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2466bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
2467bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
2468bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2469bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2470bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2471bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
2472bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
2473bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2474bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2475bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
2476bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
2477bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
2478bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
2479bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
2480bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
2481bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2482bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2483bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2484bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2485bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
2486bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
2487bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2488bc3d5698SJohn Baldwin.byte	102,15,56,222,217
2489bc3d5698SJohn Baldwin.byte	102,15,56,222,225
2490bc3d5698SJohn Baldwin.byte	102,15,56,222,233
2491bc3d5698SJohn Baldwin.byte	102,15,56,222,241
2492bc3d5698SJohn Baldwin.byte	102,15,56,222,249
2493bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2494bc3d5698SJohn Baldwin	call	.L_aesni_decrypt6_enter
2495bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm0
2496bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2497bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2498bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2499bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
2500bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
2501bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
2502bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2503bc3d5698SJohn Baldwin	movdqu	%xmm2,(%edi,%esi,1)
2504bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2505bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%edi,%esi,1)
2506bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
2507bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%edi,%esi,1)
2508bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
2509bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%edi,%esi,1)
2510bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
2511bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%edi,%esi,1)
2512bc3d5698SJohn Baldwin	jmp	.L088done
2513bc3d5698SJohn Baldwin.align	16
2514bc3d5698SJohn Baldwin.L089one:
2515bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
2516bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2517bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2518bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2519bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
2520bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
2521bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
2522bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2523bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
2524bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
2525bc3d5698SJohn Baldwin	leal	32(%edx),%edx
2526bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2527bc3d5698SJohn Baldwin.L093dec1_loop_18:
2528bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2529bc3d5698SJohn Baldwin	decl	%ecx
2530bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
2531bc3d5698SJohn Baldwin	leal	16(%edx),%edx
2532bc3d5698SJohn Baldwin	jnz	.L093dec1_loop_18
2533bc3d5698SJohn Baldwin.byte	102,15,56,223,209
2534bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2535bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm1
2536bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2537bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm1
2538bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2539bc3d5698SJohn Baldwin	jmp	.L088done
2540bc3d5698SJohn Baldwin.align	16
2541bc3d5698SJohn Baldwin.L090two:
2542bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2543bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2544bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2545bc3d5698SJohn Baldwin	shll	$4,%ecx
2546bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm6
2547bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm7
2548bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2549bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2550bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2551bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
2552bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
2553bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2554bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm2
2555bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm3
2556bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2557bc3d5698SJohn Baldwin	call	_aesni_decrypt2
2558bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
2559bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm3
2560bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2561bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm5
2562bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2563bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm5
2564bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
2565bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm1
2566bc3d5698SJohn Baldwin	jmp	.L088done
2567bc3d5698SJohn Baldwin.align	16
2568bc3d5698SJohn Baldwin.L091three:
2569bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2570bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2571bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2572bc3d5698SJohn Baldwin	shll	$4,%ecx
2573bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm5
2574bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm6
2575bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm7
2576bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2577bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2578bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2579bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2580bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2581bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
2582bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
2583bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2584bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
2585bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm3
2586bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm4
2587bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2588bc3d5698SJohn Baldwin	call	_aesni_decrypt3
2589bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2590bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
2591bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
2592bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
2593bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2594bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2595bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2596bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
2597bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2598bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
2599bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
2600bc3d5698SJohn Baldwin	jmp	.L088done
2601bc3d5698SJohn Baldwin.align	16
2602bc3d5698SJohn Baldwin.L092four:
2603bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
2604bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
2605bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
2606bc3d5698SJohn Baldwin	bsfl	%eax,%eax
2607bc3d5698SJohn Baldwin	movl	112(%esp),%edx
2608bc3d5698SJohn Baldwin	shll	$4,%ecx
2609bc3d5698SJohn Baldwin	shll	$4,%eax
2610bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm4
2611bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm5
2612bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm6
2613bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
2614bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
2615bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2616bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
2617bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2618bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
2619bc3d5698SJohn Baldwin	movdqa	%xmm4,(%esp)
2620bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
2621bc3d5698SJohn Baldwin	movdqa	%xmm5,16(%esp)
2622bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2623bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
2624bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2625bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
2626bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
2627bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
2628bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm4
2629bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm5
2630bc3d5698SJohn Baldwin	movl	120(%esp),%edi
2631bc3d5698SJohn Baldwin	call	_aesni_decrypt4
2632bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
2633bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
2634bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
2635bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm4
2636bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
2637bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
2638bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm5
2639bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
2640bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
2641bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
2642bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
2643bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
2644bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi,%esi,1)
2645bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
2646bc3d5698SJohn Baldwin.L088done:
2647bc3d5698SJohn Baldwin	movl	128(%esp),%edx
2648bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
2649bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2650bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
2651bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2652bc3d5698SJohn Baldwin	movdqa	%xmm2,16(%esp)
2653bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2654bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
2655bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
2656bc3d5698SJohn Baldwin	movdqa	%xmm2,48(%esp)
2657bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2658bc3d5698SJohn Baldwin	movdqa	%xmm2,64(%esp)
2659bc3d5698SJohn Baldwin	movdqa	%xmm2,80(%esp)
2660bc3d5698SJohn Baldwin	movdqa	%xmm2,96(%esp)
2661bc3d5698SJohn Baldwin	leal	(%edx),%esp
2662bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
2663bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
2664bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ecx)
2665bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
2666bc3d5698SJohn Baldwin	movdqu	%xmm1,(%ebx)
2667bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
2668bc3d5698SJohn Baldwin	popl	%edi
2669bc3d5698SJohn Baldwin	popl	%esi
2670bc3d5698SJohn Baldwin	popl	%ebx
2671bc3d5698SJohn Baldwin	popl	%ebp
2672bc3d5698SJohn Baldwin	ret
2673bc3d5698SJohn Baldwin.size	aesni_ocb_decrypt,.-.L_aesni_ocb_decrypt_begin
2674bc3d5698SJohn Baldwin.globl	aesni_cbc_encrypt
2675bc3d5698SJohn Baldwin.type	aesni_cbc_encrypt,@function
2676bc3d5698SJohn Baldwin.align	16
2677bc3d5698SJohn Baldwinaesni_cbc_encrypt:
2678bc3d5698SJohn Baldwin.L_aesni_cbc_encrypt_begin:
2679c0855eaaSJohn Baldwin	#ifdef __CET__
2680c0855eaaSJohn Baldwin
2681c0855eaaSJohn Baldwin.byte	243,15,30,251
2682c0855eaaSJohn Baldwin	#endif
2683c0855eaaSJohn Baldwin
2684bc3d5698SJohn Baldwin	pushl	%ebp
2685bc3d5698SJohn Baldwin	pushl	%ebx
2686bc3d5698SJohn Baldwin	pushl	%esi
2687bc3d5698SJohn Baldwin	pushl	%edi
2688bc3d5698SJohn Baldwin	movl	20(%esp),%esi
2689bc3d5698SJohn Baldwin	movl	%esp,%ebx
2690bc3d5698SJohn Baldwin	movl	24(%esp),%edi
2691bc3d5698SJohn Baldwin	subl	$24,%ebx
2692bc3d5698SJohn Baldwin	movl	28(%esp),%eax
2693bc3d5698SJohn Baldwin	andl	$-16,%ebx
2694bc3d5698SJohn Baldwin	movl	32(%esp),%edx
2695bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
2696bc3d5698SJohn Baldwin	testl	%eax,%eax
2697bc3d5698SJohn Baldwin	jz	.L094cbc_abort
2698bc3d5698SJohn Baldwin	cmpl	$0,40(%esp)
2699bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
2700bc3d5698SJohn Baldwin	movups	(%ebp),%xmm7
2701bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
2702bc3d5698SJohn Baldwin	movl	%edx,%ebp
2703bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
2704bc3d5698SJohn Baldwin	movl	%ecx,%ebx
2705bc3d5698SJohn Baldwin	je	.L095cbc_decrypt
2706bc3d5698SJohn Baldwin	movaps	%xmm7,%xmm2
2707bc3d5698SJohn Baldwin	cmpl	$16,%eax
2708bc3d5698SJohn Baldwin	jb	.L096cbc_enc_tail
2709bc3d5698SJohn Baldwin	subl	$16,%eax
2710bc3d5698SJohn Baldwin	jmp	.L097cbc_enc_loop
2711bc3d5698SJohn Baldwin.align	16
2712bc3d5698SJohn Baldwin.L097cbc_enc_loop:
2713bc3d5698SJohn Baldwin	movups	(%esi),%xmm7
2714bc3d5698SJohn Baldwin	leal	16(%esi),%esi
2715bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
2716bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
2717bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm7
2718bc3d5698SJohn Baldwin	leal	32(%edx),%edx
2719bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2720bc3d5698SJohn Baldwin.L098enc1_loop_19:
2721bc3d5698SJohn Baldwin.byte	102,15,56,220,209
2722bc3d5698SJohn Baldwin	decl	%ecx
2723bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
2724bc3d5698SJohn Baldwin	leal	16(%edx),%edx
2725bc3d5698SJohn Baldwin	jnz	.L098enc1_loop_19
2726bc3d5698SJohn Baldwin.byte	102,15,56,221,209
2727bc3d5698SJohn Baldwin	movl	%ebx,%ecx
2728bc3d5698SJohn Baldwin	movl	%ebp,%edx
2729bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2730bc3d5698SJohn Baldwin	leal	16(%edi),%edi
2731bc3d5698SJohn Baldwin	subl	$16,%eax
2732bc3d5698SJohn Baldwin	jnc	.L097cbc_enc_loop
2733bc3d5698SJohn Baldwin	addl	$16,%eax
2734bc3d5698SJohn Baldwin	jnz	.L096cbc_enc_tail
2735bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm7
2736bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
2737bc3d5698SJohn Baldwin	jmp	.L099cbc_ret
2738bc3d5698SJohn Baldwin.L096cbc_enc_tail:
2739bc3d5698SJohn Baldwin	movl	%eax,%ecx
2740bc3d5698SJohn Baldwin.long	2767451785
2741bc3d5698SJohn Baldwin	movl	$16,%ecx
2742bc3d5698SJohn Baldwin	subl	%eax,%ecx
2743bc3d5698SJohn Baldwin	xorl	%eax,%eax
2744bc3d5698SJohn Baldwin.long	2868115081
2745bc3d5698SJohn Baldwin	leal	-16(%edi),%edi
2746bc3d5698SJohn Baldwin	movl	%ebx,%ecx
2747bc3d5698SJohn Baldwin	movl	%edi,%esi
2748bc3d5698SJohn Baldwin	movl	%ebp,%edx
2749bc3d5698SJohn Baldwin	jmp	.L097cbc_enc_loop
2750bc3d5698SJohn Baldwin.align	16
2751bc3d5698SJohn Baldwin.L095cbc_decrypt:
2752bc3d5698SJohn Baldwin	cmpl	$80,%eax
2753bc3d5698SJohn Baldwin	jbe	.L100cbc_dec_tail
2754bc3d5698SJohn Baldwin	movaps	%xmm7,(%esp)
2755bc3d5698SJohn Baldwin	subl	$80,%eax
2756bc3d5698SJohn Baldwin	jmp	.L101cbc_dec_loop6_enter
2757bc3d5698SJohn Baldwin.align	16
2758bc3d5698SJohn Baldwin.L102cbc_dec_loop6:
2759bc3d5698SJohn Baldwin	movaps	%xmm0,(%esp)
2760bc3d5698SJohn Baldwin	movups	%xmm7,(%edi)
2761bc3d5698SJohn Baldwin	leal	16(%edi),%edi
2762bc3d5698SJohn Baldwin.L101cbc_dec_loop6_enter:
2763bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
2764bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
2765bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
2766bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
2767bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
2768bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
2769bc3d5698SJohn Baldwin	call	_aesni_decrypt6
2770bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
2771bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
2772bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
2773bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm3
2774bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
2775bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
2776bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
2777bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm5
2778bc3d5698SJohn Baldwin	movups	64(%esi),%xmm1
2779bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
2780bc3d5698SJohn Baldwin	movups	80(%esi),%xmm0
2781bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm7
2782bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2783bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
2784bc3d5698SJohn Baldwin	leal	96(%esi),%esi
2785bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
2786bc3d5698SJohn Baldwin	movl	%ebx,%ecx
2787bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
2788bc3d5698SJohn Baldwin	movl	%ebp,%edx
2789bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
2790bc3d5698SJohn Baldwin	leal	80(%edi),%edi
2791bc3d5698SJohn Baldwin	subl	$96,%eax
2792bc3d5698SJohn Baldwin	ja	.L102cbc_dec_loop6
2793bc3d5698SJohn Baldwin	movaps	%xmm7,%xmm2
2794bc3d5698SJohn Baldwin	movaps	%xmm0,%xmm7
2795bc3d5698SJohn Baldwin	addl	$80,%eax
2796bc3d5698SJohn Baldwin	jle	.L103cbc_dec_clear_tail_collected
2797bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2798bc3d5698SJohn Baldwin	leal	16(%edi),%edi
2799bc3d5698SJohn Baldwin.L100cbc_dec_tail:
2800bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
2801bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm6
2802bc3d5698SJohn Baldwin	cmpl	$16,%eax
2803bc3d5698SJohn Baldwin	jbe	.L104cbc_dec_one
2804bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
2805bc3d5698SJohn Baldwin	movaps	%xmm3,%xmm5
2806bc3d5698SJohn Baldwin	cmpl	$32,%eax
2807bc3d5698SJohn Baldwin	jbe	.L105cbc_dec_two
2808bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
2809bc3d5698SJohn Baldwin	cmpl	$48,%eax
2810bc3d5698SJohn Baldwin	jbe	.L106cbc_dec_three
2811bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
2812bc3d5698SJohn Baldwin	cmpl	$64,%eax
2813bc3d5698SJohn Baldwin	jbe	.L107cbc_dec_four
2814bc3d5698SJohn Baldwin	movups	64(%esi),%xmm6
2815bc3d5698SJohn Baldwin	movaps	%xmm7,(%esp)
2816bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
2817bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
2818bc3d5698SJohn Baldwin	call	_aesni_decrypt6
2819bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
2820bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
2821bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
2822bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm3
2823bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
2824bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
2825bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
2826bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm5
2827bc3d5698SJohn Baldwin	movups	64(%esi),%xmm7
2828bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
2829bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2830bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
2831bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2832bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
2833bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2834bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
2835bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2836bc3d5698SJohn Baldwin	leal	64(%edi),%edi
2837bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm2
2838bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
2839bc3d5698SJohn Baldwin	subl	$80,%eax
2840bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
2841bc3d5698SJohn Baldwin.align	16
2842bc3d5698SJohn Baldwin.L104cbc_dec_one:
2843bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
2844bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
2845bc3d5698SJohn Baldwin	leal	32(%edx),%edx
2846bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
2847bc3d5698SJohn Baldwin.L109dec1_loop_20:
2848bc3d5698SJohn Baldwin.byte	102,15,56,222,209
2849bc3d5698SJohn Baldwin	decl	%ecx
2850bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
2851bc3d5698SJohn Baldwin	leal	16(%edx),%edx
2852bc3d5698SJohn Baldwin	jnz	.L109dec1_loop_20
2853bc3d5698SJohn Baldwin.byte	102,15,56,223,209
2854bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2855bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm7
2856bc3d5698SJohn Baldwin	subl	$16,%eax
2857bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
2858bc3d5698SJohn Baldwin.align	16
2859bc3d5698SJohn Baldwin.L105cbc_dec_two:
2860bc3d5698SJohn Baldwin	call	_aesni_decrypt2
2861bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2862bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
2863bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2864bc3d5698SJohn Baldwin	movaps	%xmm3,%xmm2
2865bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2866bc3d5698SJohn Baldwin	leal	16(%edi),%edi
2867bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm7
2868bc3d5698SJohn Baldwin	subl	$32,%eax
2869bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
2870bc3d5698SJohn Baldwin.align	16
2871bc3d5698SJohn Baldwin.L106cbc_dec_three:
2872bc3d5698SJohn Baldwin	call	_aesni_decrypt3
2873bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2874bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
2875bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm4
2876bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2877bc3d5698SJohn Baldwin	movaps	%xmm4,%xmm2
2878bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2879bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
2880bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2881bc3d5698SJohn Baldwin	leal	32(%edi),%edi
2882bc3d5698SJohn Baldwin	movups	32(%esi),%xmm7
2883bc3d5698SJohn Baldwin	subl	$48,%eax
2884bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
2885bc3d5698SJohn Baldwin.align	16
2886bc3d5698SJohn Baldwin.L107cbc_dec_four:
2887bc3d5698SJohn Baldwin	call	_aesni_decrypt4
2888bc3d5698SJohn Baldwin	movups	16(%esi),%xmm1
2889bc3d5698SJohn Baldwin	movups	32(%esi),%xmm0
2890bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
2891bc3d5698SJohn Baldwin	movups	48(%esi),%xmm7
2892bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
2893bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2894bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
2895bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
2896bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2897bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
2898bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
2899bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2900bc3d5698SJohn Baldwin	leal	48(%edi),%edi
2901bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm2
2902bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2903bc3d5698SJohn Baldwin	subl	$64,%eax
2904bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
2905bc3d5698SJohn Baldwin.align	16
2906bc3d5698SJohn Baldwin.L103cbc_dec_clear_tail_collected:
2907bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
2908bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
2909bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
2910bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
2911bc3d5698SJohn Baldwin.L108cbc_dec_tail_collected:
2912bc3d5698SJohn Baldwin	andl	$15,%eax
2913bc3d5698SJohn Baldwin	jnz	.L110cbc_dec_tail_partial
2914bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
2915bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
2916bc3d5698SJohn Baldwin	jmp	.L099cbc_ret
2917bc3d5698SJohn Baldwin.align	16
2918bc3d5698SJohn Baldwin.L110cbc_dec_tail_partial:
2919bc3d5698SJohn Baldwin	movaps	%xmm2,(%esp)
2920bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
2921bc3d5698SJohn Baldwin	movl	$16,%ecx
2922bc3d5698SJohn Baldwin	movl	%esp,%esi
2923bc3d5698SJohn Baldwin	subl	%eax,%ecx
2924bc3d5698SJohn Baldwin.long	2767451785
2925bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
2926bc3d5698SJohn Baldwin.L099cbc_ret:
2927bc3d5698SJohn Baldwin	movl	16(%esp),%esp
2928bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
2929bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
2930bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
2931bc3d5698SJohn Baldwin	movups	%xmm7,(%ebp)
2932bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
2933bc3d5698SJohn Baldwin.L094cbc_abort:
2934bc3d5698SJohn Baldwin	popl	%edi
2935bc3d5698SJohn Baldwin	popl	%esi
2936bc3d5698SJohn Baldwin	popl	%ebx
2937bc3d5698SJohn Baldwin	popl	%ebp
2938bc3d5698SJohn Baldwin	ret
2939bc3d5698SJohn Baldwin.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
2940bc3d5698SJohn Baldwin.type	_aesni_set_encrypt_key,@function
2941bc3d5698SJohn Baldwin.align	16
2942bc3d5698SJohn Baldwin_aesni_set_encrypt_key:
2943c0855eaaSJohn Baldwin	#ifdef __CET__
2944c0855eaaSJohn Baldwin
2945c0855eaaSJohn Baldwin.byte	243,15,30,251
2946c0855eaaSJohn Baldwin	#endif
2947c0855eaaSJohn Baldwin
2948bc3d5698SJohn Baldwin	pushl	%ebp
2949bc3d5698SJohn Baldwin	pushl	%ebx
2950bc3d5698SJohn Baldwin	testl	%eax,%eax
2951bc3d5698SJohn Baldwin	jz	.L111bad_pointer
2952bc3d5698SJohn Baldwin	testl	%edx,%edx
2953bc3d5698SJohn Baldwin	jz	.L111bad_pointer
2954bc3d5698SJohn Baldwin	call	.L112pic
2955bc3d5698SJohn Baldwin.L112pic:
2956bc3d5698SJohn Baldwin	popl	%ebx
2957bc3d5698SJohn Baldwin	leal	.Lkey_const-.L112pic(%ebx),%ebx
2958bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
2959bc3d5698SJohn Baldwin	movups	(%eax),%xmm0
2960bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm4
2961bc3d5698SJohn Baldwin	movl	4(%ebp),%ebp
2962bc3d5698SJohn Baldwin	leal	16(%edx),%edx
2963bc3d5698SJohn Baldwin	andl	$268437504,%ebp
2964bc3d5698SJohn Baldwin	cmpl	$256,%ecx
2965bc3d5698SJohn Baldwin	je	.L11314rounds
2966bc3d5698SJohn Baldwin	cmpl	$192,%ecx
2967bc3d5698SJohn Baldwin	je	.L11412rounds
2968bc3d5698SJohn Baldwin	cmpl	$128,%ecx
2969bc3d5698SJohn Baldwin	jne	.L115bad_keybits
2970bc3d5698SJohn Baldwin.align	16
2971bc3d5698SJohn Baldwin.L11610rounds:
2972bc3d5698SJohn Baldwin	cmpl	$268435456,%ebp
2973bc3d5698SJohn Baldwin	je	.L11710rounds_alt
2974bc3d5698SJohn Baldwin	movl	$9,%ecx
2975bc3d5698SJohn Baldwin	movups	%xmm0,-16(%edx)
2976bc3d5698SJohn Baldwin.byte	102,15,58,223,200,1
2977bc3d5698SJohn Baldwin	call	.L118key_128_cold
2978bc3d5698SJohn Baldwin.byte	102,15,58,223,200,2
2979bc3d5698SJohn Baldwin	call	.L119key_128
2980bc3d5698SJohn Baldwin.byte	102,15,58,223,200,4
2981bc3d5698SJohn Baldwin	call	.L119key_128
2982bc3d5698SJohn Baldwin.byte	102,15,58,223,200,8
2983bc3d5698SJohn Baldwin	call	.L119key_128
2984bc3d5698SJohn Baldwin.byte	102,15,58,223,200,16
2985bc3d5698SJohn Baldwin	call	.L119key_128
2986bc3d5698SJohn Baldwin.byte	102,15,58,223,200,32
2987bc3d5698SJohn Baldwin	call	.L119key_128
2988bc3d5698SJohn Baldwin.byte	102,15,58,223,200,64
2989bc3d5698SJohn Baldwin	call	.L119key_128
2990bc3d5698SJohn Baldwin.byte	102,15,58,223,200,128
2991bc3d5698SJohn Baldwin	call	.L119key_128
2992bc3d5698SJohn Baldwin.byte	102,15,58,223,200,27
2993bc3d5698SJohn Baldwin	call	.L119key_128
2994bc3d5698SJohn Baldwin.byte	102,15,58,223,200,54
2995bc3d5698SJohn Baldwin	call	.L119key_128
2996bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
2997bc3d5698SJohn Baldwin	movl	%ecx,80(%edx)
2998bc3d5698SJohn Baldwin	jmp	.L120good_key
2999bc3d5698SJohn Baldwin.align	16
3000bc3d5698SJohn Baldwin.L119key_128:
3001bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
3002bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3003bc3d5698SJohn Baldwin.L118key_128_cold:
3004bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
3005bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
3006bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
3007bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
3008bc3d5698SJohn Baldwin	shufps	$255,%xmm1,%xmm1
3009bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm0
3010bc3d5698SJohn Baldwin	ret
3011bc3d5698SJohn Baldwin.align	16
3012bc3d5698SJohn Baldwin.L11710rounds_alt:
3013bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm5
3014bc3d5698SJohn Baldwin	movl	$8,%ecx
3015bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
3016bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
3017bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
3018bc3d5698SJohn Baldwin.L121loop_key128:
3019bc3d5698SJohn Baldwin.byte	102,15,56,0,197
3020bc3d5698SJohn Baldwin.byte	102,15,56,221,196
3021bc3d5698SJohn Baldwin	pslld	$1,%xmm4
3022bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3023bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
3024bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3025bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
3026bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3027bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
3028bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3029bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
3030bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
3031bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
3032bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
3033bc3d5698SJohn Baldwin	decl	%ecx
3034bc3d5698SJohn Baldwin	jnz	.L121loop_key128
3035bc3d5698SJohn Baldwin	movdqa	48(%ebx),%xmm4
3036bc3d5698SJohn Baldwin.byte	102,15,56,0,197
3037bc3d5698SJohn Baldwin.byte	102,15,56,221,196
3038bc3d5698SJohn Baldwin	pslld	$1,%xmm4
3039bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
3040bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3041bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
3042bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3043bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
3044bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3045bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
3046bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
3047bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
3048bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
3049bc3d5698SJohn Baldwin.byte	102,15,56,0,197
3050bc3d5698SJohn Baldwin.byte	102,15,56,221,196
3051bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
3052bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3053bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
3054bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3055bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
3056bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
3057bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
3058bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
3059bc3d5698SJohn Baldwin	movdqu	%xmm0,16(%edx)
3060bc3d5698SJohn Baldwin	movl	$9,%ecx
3061bc3d5698SJohn Baldwin	movl	%ecx,96(%edx)
3062bc3d5698SJohn Baldwin	jmp	.L120good_key
3063bc3d5698SJohn Baldwin.align	16
3064bc3d5698SJohn Baldwin.L11412rounds:
3065bc3d5698SJohn Baldwin	movq	16(%eax),%xmm2
3066bc3d5698SJohn Baldwin	cmpl	$268435456,%ebp
3067bc3d5698SJohn Baldwin	je	.L12212rounds_alt
3068bc3d5698SJohn Baldwin	movl	$11,%ecx
3069bc3d5698SJohn Baldwin	movups	%xmm0,-16(%edx)
3070bc3d5698SJohn Baldwin.byte	102,15,58,223,202,1
3071bc3d5698SJohn Baldwin	call	.L123key_192a_cold
3072bc3d5698SJohn Baldwin.byte	102,15,58,223,202,2
3073bc3d5698SJohn Baldwin	call	.L124key_192b
3074bc3d5698SJohn Baldwin.byte	102,15,58,223,202,4
3075bc3d5698SJohn Baldwin	call	.L125key_192a
3076bc3d5698SJohn Baldwin.byte	102,15,58,223,202,8
3077bc3d5698SJohn Baldwin	call	.L124key_192b
3078bc3d5698SJohn Baldwin.byte	102,15,58,223,202,16
3079bc3d5698SJohn Baldwin	call	.L125key_192a
3080bc3d5698SJohn Baldwin.byte	102,15,58,223,202,32
3081bc3d5698SJohn Baldwin	call	.L124key_192b
3082bc3d5698SJohn Baldwin.byte	102,15,58,223,202,64
3083bc3d5698SJohn Baldwin	call	.L125key_192a
3084bc3d5698SJohn Baldwin.byte	102,15,58,223,202,128
3085bc3d5698SJohn Baldwin	call	.L124key_192b
3086bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
3087bc3d5698SJohn Baldwin	movl	%ecx,48(%edx)
3088bc3d5698SJohn Baldwin	jmp	.L120good_key
3089bc3d5698SJohn Baldwin.align	16
3090bc3d5698SJohn Baldwin.L125key_192a:
3091bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
3092bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3093bc3d5698SJohn Baldwin.align	16
3094bc3d5698SJohn Baldwin.L123key_192a_cold:
3095bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm5
3096bc3d5698SJohn Baldwin.L126key_192b_warm:
3097bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
3098bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
3099bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
3100bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
3101bc3d5698SJohn Baldwin	pslldq	$4,%xmm3
3102bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
3103bc3d5698SJohn Baldwin	pshufd	$85,%xmm1,%xmm1
3104bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
3105bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
3106bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm3
3107bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
3108bc3d5698SJohn Baldwin	ret
3109bc3d5698SJohn Baldwin.align	16
3110bc3d5698SJohn Baldwin.L124key_192b:
3111bc3d5698SJohn Baldwin	movaps	%xmm0,%xmm3
3112bc3d5698SJohn Baldwin	shufps	$68,%xmm0,%xmm5
3113bc3d5698SJohn Baldwin	movups	%xmm5,(%edx)
3114bc3d5698SJohn Baldwin	shufps	$78,%xmm2,%xmm3
3115bc3d5698SJohn Baldwin	movups	%xmm3,16(%edx)
3116bc3d5698SJohn Baldwin	leal	32(%edx),%edx
3117bc3d5698SJohn Baldwin	jmp	.L126key_192b_warm
3118bc3d5698SJohn Baldwin.align	16
3119bc3d5698SJohn Baldwin.L12212rounds_alt:
3120bc3d5698SJohn Baldwin	movdqa	16(%ebx),%xmm5
3121bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
3122bc3d5698SJohn Baldwin	movl	$8,%ecx
3123bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
3124bc3d5698SJohn Baldwin.L127loop_key192:
3125bc3d5698SJohn Baldwin	movq	%xmm2,(%edx)
3126bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
3127bc3d5698SJohn Baldwin.byte	102,15,56,0,213
3128bc3d5698SJohn Baldwin.byte	102,15,56,221,212
3129bc3d5698SJohn Baldwin	pslld	$1,%xmm4
3130bc3d5698SJohn Baldwin	leal	24(%edx),%edx
3131bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
3132bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
3133bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3134bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
3135bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3136bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
3137bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
3138bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm3
3139bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
3140bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
3141bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
3142bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
3143bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
3144bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
3145bc3d5698SJohn Baldwin	decl	%ecx
3146bc3d5698SJohn Baldwin	jnz	.L127loop_key192
3147bc3d5698SJohn Baldwin	movl	$11,%ecx
3148bc3d5698SJohn Baldwin	movl	%ecx,32(%edx)
3149bc3d5698SJohn Baldwin	jmp	.L120good_key
3150bc3d5698SJohn Baldwin.align	16
3151bc3d5698SJohn Baldwin.L11314rounds:
3152bc3d5698SJohn Baldwin	movups	16(%eax),%xmm2
3153bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3154bc3d5698SJohn Baldwin	cmpl	$268435456,%ebp
3155bc3d5698SJohn Baldwin	je	.L12814rounds_alt
3156bc3d5698SJohn Baldwin	movl	$13,%ecx
3157bc3d5698SJohn Baldwin	movups	%xmm0,-32(%edx)
3158bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edx)
3159bc3d5698SJohn Baldwin.byte	102,15,58,223,202,1
3160bc3d5698SJohn Baldwin	call	.L129key_256a_cold
3161bc3d5698SJohn Baldwin.byte	102,15,58,223,200,1
3162bc3d5698SJohn Baldwin	call	.L130key_256b
3163bc3d5698SJohn Baldwin.byte	102,15,58,223,202,2
3164bc3d5698SJohn Baldwin	call	.L131key_256a
3165bc3d5698SJohn Baldwin.byte	102,15,58,223,200,2
3166bc3d5698SJohn Baldwin	call	.L130key_256b
3167bc3d5698SJohn Baldwin.byte	102,15,58,223,202,4
3168bc3d5698SJohn Baldwin	call	.L131key_256a
3169bc3d5698SJohn Baldwin.byte	102,15,58,223,200,4
3170bc3d5698SJohn Baldwin	call	.L130key_256b
3171bc3d5698SJohn Baldwin.byte	102,15,58,223,202,8
3172bc3d5698SJohn Baldwin	call	.L131key_256a
3173bc3d5698SJohn Baldwin.byte	102,15,58,223,200,8
3174bc3d5698SJohn Baldwin	call	.L130key_256b
3175bc3d5698SJohn Baldwin.byte	102,15,58,223,202,16
3176bc3d5698SJohn Baldwin	call	.L131key_256a
3177bc3d5698SJohn Baldwin.byte	102,15,58,223,200,16
3178bc3d5698SJohn Baldwin	call	.L130key_256b
3179bc3d5698SJohn Baldwin.byte	102,15,58,223,202,32
3180bc3d5698SJohn Baldwin	call	.L131key_256a
3181bc3d5698SJohn Baldwin.byte	102,15,58,223,200,32
3182bc3d5698SJohn Baldwin	call	.L130key_256b
3183bc3d5698SJohn Baldwin.byte	102,15,58,223,202,64
3184bc3d5698SJohn Baldwin	call	.L131key_256a
3185bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
3186bc3d5698SJohn Baldwin	movl	%ecx,16(%edx)
3187bc3d5698SJohn Baldwin	xorl	%eax,%eax
3188bc3d5698SJohn Baldwin	jmp	.L120good_key
3189bc3d5698SJohn Baldwin.align	16
3190bc3d5698SJohn Baldwin.L131key_256a:
3191bc3d5698SJohn Baldwin	movups	%xmm2,(%edx)
3192bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3193bc3d5698SJohn Baldwin.L129key_256a_cold:
3194bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
3195bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
3196bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
3197bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
3198bc3d5698SJohn Baldwin	shufps	$255,%xmm1,%xmm1
3199bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm0
3200bc3d5698SJohn Baldwin	ret
3201bc3d5698SJohn Baldwin.align	16
3202bc3d5698SJohn Baldwin.L130key_256b:
3203bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
3204bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3205bc3d5698SJohn Baldwin	shufps	$16,%xmm2,%xmm4
3206bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm2
3207bc3d5698SJohn Baldwin	shufps	$140,%xmm2,%xmm4
3208bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm2
3209bc3d5698SJohn Baldwin	shufps	$170,%xmm1,%xmm1
3210bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm2
3211bc3d5698SJohn Baldwin	ret
3212bc3d5698SJohn Baldwin.align	16
3213bc3d5698SJohn Baldwin.L12814rounds_alt:
3214bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm5
3215bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
3216bc3d5698SJohn Baldwin	movl	$7,%ecx
3217bc3d5698SJohn Baldwin	movdqu	%xmm0,-32(%edx)
3218bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
3219bc3d5698SJohn Baldwin	movdqu	%xmm2,-16(%edx)
3220bc3d5698SJohn Baldwin.L132loop_key256:
3221bc3d5698SJohn Baldwin.byte	102,15,56,0,213
3222bc3d5698SJohn Baldwin.byte	102,15,56,221,212
3223bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
3224bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
3225bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3226bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
3227bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3228bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
3229bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
3230bc3d5698SJohn Baldwin	pslld	$1,%xmm4
3231bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
3232bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
3233bc3d5698SJohn Baldwin	decl	%ecx
3234bc3d5698SJohn Baldwin	jz	.L133done_key256
3235bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm2
3236bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3237bc3d5698SJohn Baldwin.byte	102,15,56,221,211
3238bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm3
3239bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
3240bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
3241bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
3242bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
3243bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
3244bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
3245bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm2
3246bc3d5698SJohn Baldwin	movdqu	%xmm2,16(%edx)
3247bc3d5698SJohn Baldwin	leal	32(%edx),%edx
3248bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
3249bc3d5698SJohn Baldwin	jmp	.L132loop_key256
3250bc3d5698SJohn Baldwin.L133done_key256:
3251bc3d5698SJohn Baldwin	movl	$13,%ecx
3252bc3d5698SJohn Baldwin	movl	%ecx,16(%edx)
3253bc3d5698SJohn Baldwin.L120good_key:
3254bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
3255bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
3256bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
3257bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
3258bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
3259bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
3260bc3d5698SJohn Baldwin	xorl	%eax,%eax
3261bc3d5698SJohn Baldwin	popl	%ebx
3262bc3d5698SJohn Baldwin	popl	%ebp
3263bc3d5698SJohn Baldwin	ret
3264bc3d5698SJohn Baldwin.align	4
3265bc3d5698SJohn Baldwin.L111bad_pointer:
3266bc3d5698SJohn Baldwin	movl	$-1,%eax
3267bc3d5698SJohn Baldwin	popl	%ebx
3268bc3d5698SJohn Baldwin	popl	%ebp
3269bc3d5698SJohn Baldwin	ret
3270bc3d5698SJohn Baldwin.align	4
3271bc3d5698SJohn Baldwin.L115bad_keybits:
3272bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
3273bc3d5698SJohn Baldwin	movl	$-2,%eax
3274bc3d5698SJohn Baldwin	popl	%ebx
3275bc3d5698SJohn Baldwin	popl	%ebp
3276bc3d5698SJohn Baldwin	ret
3277bc3d5698SJohn Baldwin.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
3278bc3d5698SJohn Baldwin.globl	aesni_set_encrypt_key
3279bc3d5698SJohn Baldwin.type	aesni_set_encrypt_key,@function
3280bc3d5698SJohn Baldwin.align	16
3281bc3d5698SJohn Baldwinaesni_set_encrypt_key:
3282bc3d5698SJohn Baldwin.L_aesni_set_encrypt_key_begin:
3283c0855eaaSJohn Baldwin	#ifdef __CET__
3284c0855eaaSJohn Baldwin
3285c0855eaaSJohn Baldwin.byte	243,15,30,251
3286c0855eaaSJohn Baldwin	#endif
3287c0855eaaSJohn Baldwin
3288bc3d5698SJohn Baldwin	movl	4(%esp),%eax
3289bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
3290bc3d5698SJohn Baldwin	movl	12(%esp),%edx
3291bc3d5698SJohn Baldwin	call	_aesni_set_encrypt_key
3292bc3d5698SJohn Baldwin	ret
3293bc3d5698SJohn Baldwin.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
3294bc3d5698SJohn Baldwin.globl	aesni_set_decrypt_key
3295bc3d5698SJohn Baldwin.type	aesni_set_decrypt_key,@function
3296bc3d5698SJohn Baldwin.align	16
3297bc3d5698SJohn Baldwinaesni_set_decrypt_key:
3298bc3d5698SJohn Baldwin.L_aesni_set_decrypt_key_begin:
3299c0855eaaSJohn Baldwin	#ifdef __CET__
3300c0855eaaSJohn Baldwin
3301c0855eaaSJohn Baldwin.byte	243,15,30,251
3302c0855eaaSJohn Baldwin	#endif
3303c0855eaaSJohn Baldwin
3304bc3d5698SJohn Baldwin	movl	4(%esp),%eax
3305bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
3306bc3d5698SJohn Baldwin	movl	12(%esp),%edx
3307bc3d5698SJohn Baldwin	call	_aesni_set_encrypt_key
3308bc3d5698SJohn Baldwin	movl	12(%esp),%edx
3309bc3d5698SJohn Baldwin	shll	$4,%ecx
3310bc3d5698SJohn Baldwin	testl	%eax,%eax
3311bc3d5698SJohn Baldwin	jnz	.L134dec_key_ret
3312bc3d5698SJohn Baldwin	leal	16(%edx,%ecx,1),%eax
3313bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3314bc3d5698SJohn Baldwin	movups	(%eax),%xmm1
3315bc3d5698SJohn Baldwin	movups	%xmm0,(%eax)
3316bc3d5698SJohn Baldwin	movups	%xmm1,(%edx)
3317bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3318bc3d5698SJohn Baldwin	leal	-16(%eax),%eax
3319bc3d5698SJohn Baldwin.L135dec_key_inverse:
3320bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3321bc3d5698SJohn Baldwin	movups	(%eax),%xmm1
3322bc3d5698SJohn Baldwin.byte	102,15,56,219,192
3323bc3d5698SJohn Baldwin.byte	102,15,56,219,201
3324bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3325bc3d5698SJohn Baldwin	leal	-16(%eax),%eax
3326bc3d5698SJohn Baldwin	movups	%xmm0,16(%eax)
3327bc3d5698SJohn Baldwin	movups	%xmm1,-16(%edx)
3328bc3d5698SJohn Baldwin	cmpl	%edx,%eax
3329bc3d5698SJohn Baldwin	ja	.L135dec_key_inverse
3330bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3331bc3d5698SJohn Baldwin.byte	102,15,56,219,192
3332bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
3333bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
3334bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
3335bc3d5698SJohn Baldwin	xorl	%eax,%eax
3336bc3d5698SJohn Baldwin.L134dec_key_ret:
3337bc3d5698SJohn Baldwin	ret
3338bc3d5698SJohn Baldwin.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
3339bc3d5698SJohn Baldwin.align	64
3340bc3d5698SJohn Baldwin.Lkey_const:
3341bc3d5698SJohn Baldwin.long	202313229,202313229,202313229,202313229
3342bc3d5698SJohn Baldwin.long	67569157,67569157,67569157,67569157
3343bc3d5698SJohn Baldwin.long	1,1,1,1
3344bc3d5698SJohn Baldwin.long	27,27,27,27
3345bc3d5698SJohn Baldwin.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
3346bc3d5698SJohn Baldwin.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
3347bc3d5698SJohn Baldwin.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
3348bc3d5698SJohn Baldwin.byte	115,108,46,111,114,103,62,0
3349bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
3350c0855eaaSJohn Baldwin
3351c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
3352c0855eaaSJohn Baldwin	.p2align 2
3353c0855eaaSJohn Baldwin	.long 1f - 0f
3354c0855eaaSJohn Baldwin	.long 4f - 1f
3355c0855eaaSJohn Baldwin	.long 5
3356c0855eaaSJohn Baldwin0:
3357c0855eaaSJohn Baldwin	.asciz "GNU"
3358c0855eaaSJohn Baldwin1:
3359c0855eaaSJohn Baldwin	.p2align 2
3360c0855eaaSJohn Baldwin	.long 0xc0000002
3361c0855eaaSJohn Baldwin	.long 3f - 2f
3362c0855eaaSJohn Baldwin2:
3363c0855eaaSJohn Baldwin	.long 3
3364c0855eaaSJohn Baldwin3:
3365c0855eaaSJohn Baldwin	.p2align 2
3366c0855eaaSJohn Baldwin4:
3367bc3d5698SJohn Baldwin#else
3368bc3d5698SJohn Baldwin.text
3369bc3d5698SJohn Baldwin.globl	aesni_encrypt
3370bc3d5698SJohn Baldwin.type	aesni_encrypt,@function
3371bc3d5698SJohn Baldwin.align	16
3372bc3d5698SJohn Baldwinaesni_encrypt:
3373bc3d5698SJohn Baldwin.L_aesni_encrypt_begin:
3374c0855eaaSJohn Baldwin	#ifdef __CET__
3375c0855eaaSJohn Baldwin
3376c0855eaaSJohn Baldwin.byte	243,15,30,251
3377c0855eaaSJohn Baldwin	#endif
3378c0855eaaSJohn Baldwin
3379bc3d5698SJohn Baldwin	movl	4(%esp),%eax
3380bc3d5698SJohn Baldwin	movl	12(%esp),%edx
3381bc3d5698SJohn Baldwin	movups	(%eax),%xmm2
3382bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
3383bc3d5698SJohn Baldwin	movl	8(%esp),%eax
3384bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3385bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3386bc3d5698SJohn Baldwin	leal	32(%edx),%edx
3387bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3388bc3d5698SJohn Baldwin.L000enc1_loop_1:
3389bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3390bc3d5698SJohn Baldwin	decl	%ecx
3391bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
3392bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3393bc3d5698SJohn Baldwin	jnz	.L000enc1_loop_1
3394bc3d5698SJohn Baldwin.byte	102,15,56,221,209
3395bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
3396bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
3397bc3d5698SJohn Baldwin	movups	%xmm2,(%eax)
3398bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
3399bc3d5698SJohn Baldwin	ret
3400bc3d5698SJohn Baldwin.size	aesni_encrypt,.-.L_aesni_encrypt_begin
3401bc3d5698SJohn Baldwin.globl	aesni_decrypt
3402bc3d5698SJohn Baldwin.type	aesni_decrypt,@function
3403bc3d5698SJohn Baldwin.align	16
3404bc3d5698SJohn Baldwinaesni_decrypt:
3405bc3d5698SJohn Baldwin.L_aesni_decrypt_begin:
3406c0855eaaSJohn Baldwin	#ifdef __CET__
3407c0855eaaSJohn Baldwin
3408c0855eaaSJohn Baldwin.byte	243,15,30,251
3409c0855eaaSJohn Baldwin	#endif
3410c0855eaaSJohn Baldwin
3411bc3d5698SJohn Baldwin	movl	4(%esp),%eax
3412bc3d5698SJohn Baldwin	movl	12(%esp),%edx
3413bc3d5698SJohn Baldwin	movups	(%eax),%xmm2
3414bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
3415bc3d5698SJohn Baldwin	movl	8(%esp),%eax
3416bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3417bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3418bc3d5698SJohn Baldwin	leal	32(%edx),%edx
3419bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3420bc3d5698SJohn Baldwin.L001dec1_loop_2:
3421bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3422bc3d5698SJohn Baldwin	decl	%ecx
3423bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
3424bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3425bc3d5698SJohn Baldwin	jnz	.L001dec1_loop_2
3426bc3d5698SJohn Baldwin.byte	102,15,56,223,209
3427bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
3428bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
3429bc3d5698SJohn Baldwin	movups	%xmm2,(%eax)
3430bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
3431bc3d5698SJohn Baldwin	ret
3432bc3d5698SJohn Baldwin.size	aesni_decrypt,.-.L_aesni_decrypt_begin
3433bc3d5698SJohn Baldwin.type	_aesni_encrypt2,@function
3434bc3d5698SJohn Baldwin.align	16
3435bc3d5698SJohn Baldwin_aesni_encrypt2:
3436c0855eaaSJohn Baldwin	#ifdef __CET__
3437c0855eaaSJohn Baldwin
3438c0855eaaSJohn Baldwin.byte	243,15,30,251
3439c0855eaaSJohn Baldwin	#endif
3440c0855eaaSJohn Baldwin
3441bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3442bc3d5698SJohn Baldwin	shll	$4,%ecx
3443bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3444bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3445bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3446bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
3447bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3448bc3d5698SJohn Baldwin	negl	%ecx
3449bc3d5698SJohn Baldwin	addl	$16,%ecx
3450bc3d5698SJohn Baldwin.L002enc2_loop:
3451bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3452bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3453bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3454bc3d5698SJohn Baldwin	addl	$32,%ecx
3455bc3d5698SJohn Baldwin.byte	102,15,56,220,208
3456bc3d5698SJohn Baldwin.byte	102,15,56,220,216
3457bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3458bc3d5698SJohn Baldwin	jnz	.L002enc2_loop
3459bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3460bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3461bc3d5698SJohn Baldwin.byte	102,15,56,221,208
3462bc3d5698SJohn Baldwin.byte	102,15,56,221,216
3463bc3d5698SJohn Baldwin	ret
3464bc3d5698SJohn Baldwin.size	_aesni_encrypt2,.-_aesni_encrypt2
3465bc3d5698SJohn Baldwin.type	_aesni_decrypt2,@function
3466bc3d5698SJohn Baldwin.align	16
3467bc3d5698SJohn Baldwin_aesni_decrypt2:
3468c0855eaaSJohn Baldwin	#ifdef __CET__
3469c0855eaaSJohn Baldwin
3470c0855eaaSJohn Baldwin.byte	243,15,30,251
3471c0855eaaSJohn Baldwin	#endif
3472c0855eaaSJohn Baldwin
3473bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3474bc3d5698SJohn Baldwin	shll	$4,%ecx
3475bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3476bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3477bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3478bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
3479bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3480bc3d5698SJohn Baldwin	negl	%ecx
3481bc3d5698SJohn Baldwin	addl	$16,%ecx
3482bc3d5698SJohn Baldwin.L003dec2_loop:
3483bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3484bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3485bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3486bc3d5698SJohn Baldwin	addl	$32,%ecx
3487bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3488bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3489bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3490bc3d5698SJohn Baldwin	jnz	.L003dec2_loop
3491bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3492bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3493bc3d5698SJohn Baldwin.byte	102,15,56,223,208
3494bc3d5698SJohn Baldwin.byte	102,15,56,223,216
3495bc3d5698SJohn Baldwin	ret
3496bc3d5698SJohn Baldwin.size	_aesni_decrypt2,.-_aesni_decrypt2
3497bc3d5698SJohn Baldwin.type	_aesni_encrypt3,@function
3498bc3d5698SJohn Baldwin.align	16
3499bc3d5698SJohn Baldwin_aesni_encrypt3:
3500c0855eaaSJohn Baldwin	#ifdef __CET__
3501c0855eaaSJohn Baldwin
3502c0855eaaSJohn Baldwin.byte	243,15,30,251
3503c0855eaaSJohn Baldwin	#endif
3504c0855eaaSJohn Baldwin
3505bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3506bc3d5698SJohn Baldwin	shll	$4,%ecx
3507bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3508bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3509bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3510bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
3511bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
3512bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3513bc3d5698SJohn Baldwin	negl	%ecx
3514bc3d5698SJohn Baldwin	addl	$16,%ecx
3515bc3d5698SJohn Baldwin.L004enc3_loop:
3516bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3517bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3518bc3d5698SJohn Baldwin.byte	102,15,56,220,225
3519bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3520bc3d5698SJohn Baldwin	addl	$32,%ecx
3521bc3d5698SJohn Baldwin.byte	102,15,56,220,208
3522bc3d5698SJohn Baldwin.byte	102,15,56,220,216
3523bc3d5698SJohn Baldwin.byte	102,15,56,220,224
3524bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3525bc3d5698SJohn Baldwin	jnz	.L004enc3_loop
3526bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3527bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3528bc3d5698SJohn Baldwin.byte	102,15,56,220,225
3529bc3d5698SJohn Baldwin.byte	102,15,56,221,208
3530bc3d5698SJohn Baldwin.byte	102,15,56,221,216
3531bc3d5698SJohn Baldwin.byte	102,15,56,221,224
3532bc3d5698SJohn Baldwin	ret
3533bc3d5698SJohn Baldwin.size	_aesni_encrypt3,.-_aesni_encrypt3
3534bc3d5698SJohn Baldwin.type	_aesni_decrypt3,@function
3535bc3d5698SJohn Baldwin.align	16
3536bc3d5698SJohn Baldwin_aesni_decrypt3:
3537c0855eaaSJohn Baldwin	#ifdef __CET__
3538c0855eaaSJohn Baldwin
3539c0855eaaSJohn Baldwin.byte	243,15,30,251
3540c0855eaaSJohn Baldwin	#endif
3541c0855eaaSJohn Baldwin
3542bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3543bc3d5698SJohn Baldwin	shll	$4,%ecx
3544bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3545bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3546bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3547bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
3548bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
3549bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3550bc3d5698SJohn Baldwin	negl	%ecx
3551bc3d5698SJohn Baldwin	addl	$16,%ecx
3552bc3d5698SJohn Baldwin.L005dec3_loop:
3553bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3554bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3555bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3556bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3557bc3d5698SJohn Baldwin	addl	$32,%ecx
3558bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3559bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3560bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3561bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3562bc3d5698SJohn Baldwin	jnz	.L005dec3_loop
3563bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3564bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3565bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3566bc3d5698SJohn Baldwin.byte	102,15,56,223,208
3567bc3d5698SJohn Baldwin.byte	102,15,56,223,216
3568bc3d5698SJohn Baldwin.byte	102,15,56,223,224
3569bc3d5698SJohn Baldwin	ret
3570bc3d5698SJohn Baldwin.size	_aesni_decrypt3,.-_aesni_decrypt3
3571bc3d5698SJohn Baldwin.type	_aesni_encrypt4,@function
3572bc3d5698SJohn Baldwin.align	16
3573bc3d5698SJohn Baldwin_aesni_encrypt4:
3574c0855eaaSJohn Baldwin	#ifdef __CET__
3575c0855eaaSJohn Baldwin
3576c0855eaaSJohn Baldwin.byte	243,15,30,251
3577c0855eaaSJohn Baldwin	#endif
3578c0855eaaSJohn Baldwin
3579bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3580bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3581bc3d5698SJohn Baldwin	shll	$4,%ecx
3582bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3583bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3584bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
3585bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
3586bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
3587bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3588bc3d5698SJohn Baldwin	negl	%ecx
3589bc3d5698SJohn Baldwin.byte	15,31,64,0
3590bc3d5698SJohn Baldwin	addl	$16,%ecx
3591bc3d5698SJohn Baldwin.L006enc4_loop:
3592bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3593bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3594bc3d5698SJohn Baldwin.byte	102,15,56,220,225
3595bc3d5698SJohn Baldwin.byte	102,15,56,220,233
3596bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3597bc3d5698SJohn Baldwin	addl	$32,%ecx
3598bc3d5698SJohn Baldwin.byte	102,15,56,220,208
3599bc3d5698SJohn Baldwin.byte	102,15,56,220,216
3600bc3d5698SJohn Baldwin.byte	102,15,56,220,224
3601bc3d5698SJohn Baldwin.byte	102,15,56,220,232
3602bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3603bc3d5698SJohn Baldwin	jnz	.L006enc4_loop
3604bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3605bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3606bc3d5698SJohn Baldwin.byte	102,15,56,220,225
3607bc3d5698SJohn Baldwin.byte	102,15,56,220,233
3608bc3d5698SJohn Baldwin.byte	102,15,56,221,208
3609bc3d5698SJohn Baldwin.byte	102,15,56,221,216
3610bc3d5698SJohn Baldwin.byte	102,15,56,221,224
3611bc3d5698SJohn Baldwin.byte	102,15,56,221,232
3612bc3d5698SJohn Baldwin	ret
3613bc3d5698SJohn Baldwin.size	_aesni_encrypt4,.-_aesni_encrypt4
3614bc3d5698SJohn Baldwin.type	_aesni_decrypt4,@function
3615bc3d5698SJohn Baldwin.align	16
3616bc3d5698SJohn Baldwin_aesni_decrypt4:
3617c0855eaaSJohn Baldwin	#ifdef __CET__
3618c0855eaaSJohn Baldwin
3619c0855eaaSJohn Baldwin.byte	243,15,30,251
3620c0855eaaSJohn Baldwin	#endif
3621c0855eaaSJohn Baldwin
3622bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3623bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3624bc3d5698SJohn Baldwin	shll	$4,%ecx
3625bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3626bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3627bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
3628bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
3629bc3d5698SJohn Baldwin	movups	32(%edx),%xmm0
3630bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3631bc3d5698SJohn Baldwin	negl	%ecx
3632bc3d5698SJohn Baldwin.byte	15,31,64,0
3633bc3d5698SJohn Baldwin	addl	$16,%ecx
3634bc3d5698SJohn Baldwin.L007dec4_loop:
3635bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3636bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3637bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3638bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3639bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3640bc3d5698SJohn Baldwin	addl	$32,%ecx
3641bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3642bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3643bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3644bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3645bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3646bc3d5698SJohn Baldwin	jnz	.L007dec4_loop
3647bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3648bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3649bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3650bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3651bc3d5698SJohn Baldwin.byte	102,15,56,223,208
3652bc3d5698SJohn Baldwin.byte	102,15,56,223,216
3653bc3d5698SJohn Baldwin.byte	102,15,56,223,224
3654bc3d5698SJohn Baldwin.byte	102,15,56,223,232
3655bc3d5698SJohn Baldwin	ret
3656bc3d5698SJohn Baldwin.size	_aesni_decrypt4,.-_aesni_decrypt4
3657bc3d5698SJohn Baldwin.type	_aesni_encrypt6,@function
3658bc3d5698SJohn Baldwin.align	16
3659bc3d5698SJohn Baldwin_aesni_encrypt6:
3660c0855eaaSJohn Baldwin	#ifdef __CET__
3661c0855eaaSJohn Baldwin
3662c0855eaaSJohn Baldwin.byte	243,15,30,251
3663c0855eaaSJohn Baldwin	#endif
3664c0855eaaSJohn Baldwin
3665bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3666bc3d5698SJohn Baldwin	shll	$4,%ecx
3667bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3668bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3669bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3670bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
3671bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3672bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
3673bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
3674bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3675bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3676bc3d5698SJohn Baldwin	negl	%ecx
3677bc3d5698SJohn Baldwin.byte	102,15,56,220,225
3678bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
3679bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm0
3680bc3d5698SJohn Baldwin	addl	$16,%ecx
3681bc3d5698SJohn Baldwin	jmp	.L008_aesni_encrypt6_inner
3682bc3d5698SJohn Baldwin.align	16
3683bc3d5698SJohn Baldwin.L009enc6_loop:
3684bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3685bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3686bc3d5698SJohn Baldwin.byte	102,15,56,220,225
3687bc3d5698SJohn Baldwin.L008_aesni_encrypt6_inner:
3688bc3d5698SJohn Baldwin.byte	102,15,56,220,233
3689bc3d5698SJohn Baldwin.byte	102,15,56,220,241
3690bc3d5698SJohn Baldwin.byte	102,15,56,220,249
3691bc3d5698SJohn Baldwin.L_aesni_encrypt6_enter:
3692bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3693bc3d5698SJohn Baldwin	addl	$32,%ecx
3694bc3d5698SJohn Baldwin.byte	102,15,56,220,208
3695bc3d5698SJohn Baldwin.byte	102,15,56,220,216
3696bc3d5698SJohn Baldwin.byte	102,15,56,220,224
3697bc3d5698SJohn Baldwin.byte	102,15,56,220,232
3698bc3d5698SJohn Baldwin.byte	102,15,56,220,240
3699bc3d5698SJohn Baldwin.byte	102,15,56,220,248
3700bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3701bc3d5698SJohn Baldwin	jnz	.L009enc6_loop
3702bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3703bc3d5698SJohn Baldwin.byte	102,15,56,220,217
3704bc3d5698SJohn Baldwin.byte	102,15,56,220,225
3705bc3d5698SJohn Baldwin.byte	102,15,56,220,233
3706bc3d5698SJohn Baldwin.byte	102,15,56,220,241
3707bc3d5698SJohn Baldwin.byte	102,15,56,220,249
3708bc3d5698SJohn Baldwin.byte	102,15,56,221,208
3709bc3d5698SJohn Baldwin.byte	102,15,56,221,216
3710bc3d5698SJohn Baldwin.byte	102,15,56,221,224
3711bc3d5698SJohn Baldwin.byte	102,15,56,221,232
3712bc3d5698SJohn Baldwin.byte	102,15,56,221,240
3713bc3d5698SJohn Baldwin.byte	102,15,56,221,248
3714bc3d5698SJohn Baldwin	ret
3715bc3d5698SJohn Baldwin.size	_aesni_encrypt6,.-_aesni_encrypt6
3716bc3d5698SJohn Baldwin.type	_aesni_decrypt6,@function
3717bc3d5698SJohn Baldwin.align	16
3718bc3d5698SJohn Baldwin_aesni_decrypt6:
3719c0855eaaSJohn Baldwin	#ifdef __CET__
3720c0855eaaSJohn Baldwin
3721c0855eaaSJohn Baldwin.byte	243,15,30,251
3722c0855eaaSJohn Baldwin	#endif
3723c0855eaaSJohn Baldwin
3724bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3725bc3d5698SJohn Baldwin	shll	$4,%ecx
3726bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3727bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3728bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
3729bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
3730bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3731bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
3732bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
3733bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3734bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
3735bc3d5698SJohn Baldwin	negl	%ecx
3736bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3737bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
3738bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm0
3739bc3d5698SJohn Baldwin	addl	$16,%ecx
3740bc3d5698SJohn Baldwin	jmp	.L010_aesni_decrypt6_inner
3741bc3d5698SJohn Baldwin.align	16
3742bc3d5698SJohn Baldwin.L011dec6_loop:
3743bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3744bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3745bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3746bc3d5698SJohn Baldwin.L010_aesni_decrypt6_inner:
3747bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3748bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3749bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3750bc3d5698SJohn Baldwin.L_aesni_decrypt6_enter:
3751bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
3752bc3d5698SJohn Baldwin	addl	$32,%ecx
3753bc3d5698SJohn Baldwin.byte	102,15,56,222,208
3754bc3d5698SJohn Baldwin.byte	102,15,56,222,216
3755bc3d5698SJohn Baldwin.byte	102,15,56,222,224
3756bc3d5698SJohn Baldwin.byte	102,15,56,222,232
3757bc3d5698SJohn Baldwin.byte	102,15,56,222,240
3758bc3d5698SJohn Baldwin.byte	102,15,56,222,248
3759bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
3760bc3d5698SJohn Baldwin	jnz	.L011dec6_loop
3761bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3762bc3d5698SJohn Baldwin.byte	102,15,56,222,217
3763bc3d5698SJohn Baldwin.byte	102,15,56,222,225
3764bc3d5698SJohn Baldwin.byte	102,15,56,222,233
3765bc3d5698SJohn Baldwin.byte	102,15,56,222,241
3766bc3d5698SJohn Baldwin.byte	102,15,56,222,249
3767bc3d5698SJohn Baldwin.byte	102,15,56,223,208
3768bc3d5698SJohn Baldwin.byte	102,15,56,223,216
3769bc3d5698SJohn Baldwin.byte	102,15,56,223,224
3770bc3d5698SJohn Baldwin.byte	102,15,56,223,232
3771bc3d5698SJohn Baldwin.byte	102,15,56,223,240
3772bc3d5698SJohn Baldwin.byte	102,15,56,223,248
3773bc3d5698SJohn Baldwin	ret
3774bc3d5698SJohn Baldwin.size	_aesni_decrypt6,.-_aesni_decrypt6
3775bc3d5698SJohn Baldwin.globl	aesni_ecb_encrypt
3776bc3d5698SJohn Baldwin.type	aesni_ecb_encrypt,@function
3777bc3d5698SJohn Baldwin.align	16
3778bc3d5698SJohn Baldwinaesni_ecb_encrypt:
3779bc3d5698SJohn Baldwin.L_aesni_ecb_encrypt_begin:
3780c0855eaaSJohn Baldwin	#ifdef __CET__
3781c0855eaaSJohn Baldwin
3782c0855eaaSJohn Baldwin.byte	243,15,30,251
3783c0855eaaSJohn Baldwin	#endif
3784c0855eaaSJohn Baldwin
3785bc3d5698SJohn Baldwin	pushl	%ebp
3786bc3d5698SJohn Baldwin	pushl	%ebx
3787bc3d5698SJohn Baldwin	pushl	%esi
3788bc3d5698SJohn Baldwin	pushl	%edi
3789bc3d5698SJohn Baldwin	movl	20(%esp),%esi
3790bc3d5698SJohn Baldwin	movl	24(%esp),%edi
3791bc3d5698SJohn Baldwin	movl	28(%esp),%eax
3792bc3d5698SJohn Baldwin	movl	32(%esp),%edx
3793bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
3794bc3d5698SJohn Baldwin	andl	$-16,%eax
3795bc3d5698SJohn Baldwin	jz	.L012ecb_ret
3796bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
3797bc3d5698SJohn Baldwin	testl	%ebx,%ebx
3798bc3d5698SJohn Baldwin	jz	.L013ecb_decrypt
3799bc3d5698SJohn Baldwin	movl	%edx,%ebp
3800bc3d5698SJohn Baldwin	movl	%ecx,%ebx
3801bc3d5698SJohn Baldwin	cmpl	$96,%eax
3802bc3d5698SJohn Baldwin	jb	.L014ecb_enc_tail
3803bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
3804bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
3805bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
3806bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
3807bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
3808bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
3809bc3d5698SJohn Baldwin	leal	96(%esi),%esi
3810bc3d5698SJohn Baldwin	subl	$96,%eax
3811bc3d5698SJohn Baldwin	jmp	.L015ecb_enc_loop6_enter
3812bc3d5698SJohn Baldwin.align	16
3813bc3d5698SJohn Baldwin.L016ecb_enc_loop6:
3814bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3815bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
3816bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3817bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
3818bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3819bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
3820bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
3821bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
3822bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
3823bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
3824bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
3825bc3d5698SJohn Baldwin	leal	96(%edi),%edi
3826bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
3827bc3d5698SJohn Baldwin	leal	96(%esi),%esi
3828bc3d5698SJohn Baldwin.L015ecb_enc_loop6_enter:
3829bc3d5698SJohn Baldwin	call	_aesni_encrypt6
3830bc3d5698SJohn Baldwin	movl	%ebp,%edx
3831bc3d5698SJohn Baldwin	movl	%ebx,%ecx
3832bc3d5698SJohn Baldwin	subl	$96,%eax
3833bc3d5698SJohn Baldwin	jnc	.L016ecb_enc_loop6
3834bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3835bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3836bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3837bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
3838bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
3839bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
3840bc3d5698SJohn Baldwin	leal	96(%edi),%edi
3841bc3d5698SJohn Baldwin	addl	$96,%eax
3842bc3d5698SJohn Baldwin	jz	.L012ecb_ret
3843bc3d5698SJohn Baldwin.L014ecb_enc_tail:
3844bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
3845bc3d5698SJohn Baldwin	cmpl	$32,%eax
3846bc3d5698SJohn Baldwin	jb	.L017ecb_enc_one
3847bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
3848bc3d5698SJohn Baldwin	je	.L018ecb_enc_two
3849bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
3850bc3d5698SJohn Baldwin	cmpl	$64,%eax
3851bc3d5698SJohn Baldwin	jb	.L019ecb_enc_three
3852bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
3853bc3d5698SJohn Baldwin	je	.L020ecb_enc_four
3854bc3d5698SJohn Baldwin	movups	64(%esi),%xmm6
3855bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
3856bc3d5698SJohn Baldwin	call	_aesni_encrypt6
3857bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3858bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3859bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3860bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
3861bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
3862bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3863bc3d5698SJohn Baldwin.align	16
3864bc3d5698SJohn Baldwin.L017ecb_enc_one:
3865bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3866bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3867bc3d5698SJohn Baldwin	leal	32(%edx),%edx
3868bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3869bc3d5698SJohn Baldwin.L021enc1_loop_3:
3870bc3d5698SJohn Baldwin.byte	102,15,56,220,209
3871bc3d5698SJohn Baldwin	decl	%ecx
3872bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
3873bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3874bc3d5698SJohn Baldwin	jnz	.L021enc1_loop_3
3875bc3d5698SJohn Baldwin.byte	102,15,56,221,209
3876bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3877bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3878bc3d5698SJohn Baldwin.align	16
3879bc3d5698SJohn Baldwin.L018ecb_enc_two:
3880bc3d5698SJohn Baldwin	call	_aesni_encrypt2
3881bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3882bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3883bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3884bc3d5698SJohn Baldwin.align	16
3885bc3d5698SJohn Baldwin.L019ecb_enc_three:
3886bc3d5698SJohn Baldwin	call	_aesni_encrypt3
3887bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3888bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3889bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3890bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3891bc3d5698SJohn Baldwin.align	16
3892bc3d5698SJohn Baldwin.L020ecb_enc_four:
3893bc3d5698SJohn Baldwin	call	_aesni_encrypt4
3894bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3895bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3896bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3897bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
3898bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3899bc3d5698SJohn Baldwin.align	16
3900bc3d5698SJohn Baldwin.L013ecb_decrypt:
3901bc3d5698SJohn Baldwin	movl	%edx,%ebp
3902bc3d5698SJohn Baldwin	movl	%ecx,%ebx
3903bc3d5698SJohn Baldwin	cmpl	$96,%eax
3904bc3d5698SJohn Baldwin	jb	.L022ecb_dec_tail
3905bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
3906bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
3907bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
3908bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
3909bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
3910bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
3911bc3d5698SJohn Baldwin	leal	96(%esi),%esi
3912bc3d5698SJohn Baldwin	subl	$96,%eax
3913bc3d5698SJohn Baldwin	jmp	.L023ecb_dec_loop6_enter
3914bc3d5698SJohn Baldwin.align	16
3915bc3d5698SJohn Baldwin.L024ecb_dec_loop6:
3916bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3917bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
3918bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3919bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
3920bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3921bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
3922bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
3923bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
3924bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
3925bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
3926bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
3927bc3d5698SJohn Baldwin	leal	96(%edi),%edi
3928bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
3929bc3d5698SJohn Baldwin	leal	96(%esi),%esi
3930bc3d5698SJohn Baldwin.L023ecb_dec_loop6_enter:
3931bc3d5698SJohn Baldwin	call	_aesni_decrypt6
3932bc3d5698SJohn Baldwin	movl	%ebp,%edx
3933bc3d5698SJohn Baldwin	movl	%ebx,%ecx
3934bc3d5698SJohn Baldwin	subl	$96,%eax
3935bc3d5698SJohn Baldwin	jnc	.L024ecb_dec_loop6
3936bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3937bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3938bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3939bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
3940bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
3941bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
3942bc3d5698SJohn Baldwin	leal	96(%edi),%edi
3943bc3d5698SJohn Baldwin	addl	$96,%eax
3944bc3d5698SJohn Baldwin	jz	.L012ecb_ret
3945bc3d5698SJohn Baldwin.L022ecb_dec_tail:
3946bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
3947bc3d5698SJohn Baldwin	cmpl	$32,%eax
3948bc3d5698SJohn Baldwin	jb	.L025ecb_dec_one
3949bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
3950bc3d5698SJohn Baldwin	je	.L026ecb_dec_two
3951bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
3952bc3d5698SJohn Baldwin	cmpl	$64,%eax
3953bc3d5698SJohn Baldwin	jb	.L027ecb_dec_three
3954bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
3955bc3d5698SJohn Baldwin	je	.L028ecb_dec_four
3956bc3d5698SJohn Baldwin	movups	64(%esi),%xmm6
3957bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
3958bc3d5698SJohn Baldwin	call	_aesni_decrypt6
3959bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3960bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3961bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3962bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
3963bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
3964bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3965bc3d5698SJohn Baldwin.align	16
3966bc3d5698SJohn Baldwin.L025ecb_dec_one:
3967bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
3968bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
3969bc3d5698SJohn Baldwin	leal	32(%edx),%edx
3970bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
3971bc3d5698SJohn Baldwin.L029dec1_loop_4:
3972bc3d5698SJohn Baldwin.byte	102,15,56,222,209
3973bc3d5698SJohn Baldwin	decl	%ecx
3974bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
3975bc3d5698SJohn Baldwin	leal	16(%edx),%edx
3976bc3d5698SJohn Baldwin	jnz	.L029dec1_loop_4
3977bc3d5698SJohn Baldwin.byte	102,15,56,223,209
3978bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3979bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3980bc3d5698SJohn Baldwin.align	16
3981bc3d5698SJohn Baldwin.L026ecb_dec_two:
3982bc3d5698SJohn Baldwin	call	_aesni_decrypt2
3983bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3984bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3985bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3986bc3d5698SJohn Baldwin.align	16
3987bc3d5698SJohn Baldwin.L027ecb_dec_three:
3988bc3d5698SJohn Baldwin	call	_aesni_decrypt3
3989bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3990bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3991bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3992bc3d5698SJohn Baldwin	jmp	.L012ecb_ret
3993bc3d5698SJohn Baldwin.align	16
3994bc3d5698SJohn Baldwin.L028ecb_dec_four:
3995bc3d5698SJohn Baldwin	call	_aesni_decrypt4
3996bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
3997bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
3998bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
3999bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4000bc3d5698SJohn Baldwin.L012ecb_ret:
4001bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4002bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4003bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4004bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4005bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4006bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4007bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
4008bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
4009bc3d5698SJohn Baldwin	popl	%edi
4010bc3d5698SJohn Baldwin	popl	%esi
4011bc3d5698SJohn Baldwin	popl	%ebx
4012bc3d5698SJohn Baldwin	popl	%ebp
4013bc3d5698SJohn Baldwin	ret
4014bc3d5698SJohn Baldwin.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
4015bc3d5698SJohn Baldwin.globl	aesni_ccm64_encrypt_blocks
4016bc3d5698SJohn Baldwin.type	aesni_ccm64_encrypt_blocks,@function
4017bc3d5698SJohn Baldwin.align	16
4018bc3d5698SJohn Baldwinaesni_ccm64_encrypt_blocks:
4019bc3d5698SJohn Baldwin.L_aesni_ccm64_encrypt_blocks_begin:
4020c0855eaaSJohn Baldwin	#ifdef __CET__
4021c0855eaaSJohn Baldwin
4022c0855eaaSJohn Baldwin.byte	243,15,30,251
4023c0855eaaSJohn Baldwin	#endif
4024c0855eaaSJohn Baldwin
4025bc3d5698SJohn Baldwin	pushl	%ebp
4026bc3d5698SJohn Baldwin	pushl	%ebx
4027bc3d5698SJohn Baldwin	pushl	%esi
4028bc3d5698SJohn Baldwin	pushl	%edi
4029bc3d5698SJohn Baldwin	movl	20(%esp),%esi
4030bc3d5698SJohn Baldwin	movl	24(%esp),%edi
4031bc3d5698SJohn Baldwin	movl	28(%esp),%eax
4032bc3d5698SJohn Baldwin	movl	32(%esp),%edx
4033bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
4034bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
4035bc3d5698SJohn Baldwin	movl	%esp,%ebp
4036bc3d5698SJohn Baldwin	subl	$60,%esp
4037bc3d5698SJohn Baldwin	andl	$-16,%esp
4038bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
4039bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
4040bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm3
4041bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4042bc3d5698SJohn Baldwin	movl	$202182159,(%esp)
4043bc3d5698SJohn Baldwin	movl	$134810123,4(%esp)
4044bc3d5698SJohn Baldwin	movl	$67438087,8(%esp)
4045bc3d5698SJohn Baldwin	movl	$66051,12(%esp)
4046bc3d5698SJohn Baldwin	movl	$1,%ebx
4047bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
4048bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
4049bc3d5698SJohn Baldwin	movl	%ebp,20(%esp)
4050bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
4051bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
4052bc3d5698SJohn Baldwin	shll	$4,%ecx
4053bc3d5698SJohn Baldwin	movl	$16,%ebx
4054bc3d5698SJohn Baldwin	leal	(%edx),%ebp
4055bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm5
4056bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
4057bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
4058bc3d5698SJohn Baldwin	subl	%ecx,%ebx
4059bc3d5698SJohn Baldwin.byte	102,15,56,0,253
4060bc3d5698SJohn Baldwin.L030ccm64_enc_outer:
4061bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
4062bc3d5698SJohn Baldwin	movl	%ebx,%ecx
4063bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
4064bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4065bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
4066bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm0
4067bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
4068bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
4069bc3d5698SJohn Baldwin.L031ccm64_enc2_loop:
4070bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4071bc3d5698SJohn Baldwin.byte	102,15,56,220,217
4072bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
4073bc3d5698SJohn Baldwin	addl	$32,%ecx
4074bc3d5698SJohn Baldwin.byte	102,15,56,220,208
4075bc3d5698SJohn Baldwin.byte	102,15,56,220,216
4076bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
4077bc3d5698SJohn Baldwin	jnz	.L031ccm64_enc2_loop
4078bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4079bc3d5698SJohn Baldwin.byte	102,15,56,220,217
4080bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm7
4081bc3d5698SJohn Baldwin	decl	%eax
4082bc3d5698SJohn Baldwin.byte	102,15,56,221,208
4083bc3d5698SJohn Baldwin.byte	102,15,56,221,216
4084bc3d5698SJohn Baldwin	leal	16(%esi),%esi
4085bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm6
4086bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
4087bc3d5698SJohn Baldwin	movups	%xmm6,(%edi)
4088bc3d5698SJohn Baldwin.byte	102,15,56,0,213
4089bc3d5698SJohn Baldwin	leal	16(%edi),%edi
4090bc3d5698SJohn Baldwin	jnz	.L030ccm64_enc_outer
4091bc3d5698SJohn Baldwin	movl	48(%esp),%esp
4092bc3d5698SJohn Baldwin	movl	40(%esp),%edi
4093bc3d5698SJohn Baldwin	movups	%xmm3,(%edi)
4094bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4095bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4096bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4097bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4098bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4099bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4100bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
4101bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
4102bc3d5698SJohn Baldwin	popl	%edi
4103bc3d5698SJohn Baldwin	popl	%esi
4104bc3d5698SJohn Baldwin	popl	%ebx
4105bc3d5698SJohn Baldwin	popl	%ebp
4106bc3d5698SJohn Baldwin	ret
4107bc3d5698SJohn Baldwin.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
4108bc3d5698SJohn Baldwin.globl	aesni_ccm64_decrypt_blocks
4109bc3d5698SJohn Baldwin.type	aesni_ccm64_decrypt_blocks,@function
4110bc3d5698SJohn Baldwin.align	16
4111bc3d5698SJohn Baldwinaesni_ccm64_decrypt_blocks:
4112bc3d5698SJohn Baldwin.L_aesni_ccm64_decrypt_blocks_begin:
4113c0855eaaSJohn Baldwin	#ifdef __CET__
4114c0855eaaSJohn Baldwin
4115c0855eaaSJohn Baldwin.byte	243,15,30,251
4116c0855eaaSJohn Baldwin	#endif
4117c0855eaaSJohn Baldwin
4118bc3d5698SJohn Baldwin	pushl	%ebp
4119bc3d5698SJohn Baldwin	pushl	%ebx
4120bc3d5698SJohn Baldwin	pushl	%esi
4121bc3d5698SJohn Baldwin	pushl	%edi
4122bc3d5698SJohn Baldwin	movl	20(%esp),%esi
4123bc3d5698SJohn Baldwin	movl	24(%esp),%edi
4124bc3d5698SJohn Baldwin	movl	28(%esp),%eax
4125bc3d5698SJohn Baldwin	movl	32(%esp),%edx
4126bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
4127bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
4128bc3d5698SJohn Baldwin	movl	%esp,%ebp
4129bc3d5698SJohn Baldwin	subl	$60,%esp
4130bc3d5698SJohn Baldwin	andl	$-16,%esp
4131bc3d5698SJohn Baldwin	movl	%ebp,48(%esp)
4132bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
4133bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm3
4134bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4135bc3d5698SJohn Baldwin	movl	$202182159,(%esp)
4136bc3d5698SJohn Baldwin	movl	$134810123,4(%esp)
4137bc3d5698SJohn Baldwin	movl	$67438087,8(%esp)
4138bc3d5698SJohn Baldwin	movl	$66051,12(%esp)
4139bc3d5698SJohn Baldwin	movl	$1,%ebx
4140bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
4141bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
4142bc3d5698SJohn Baldwin	movl	%ebp,20(%esp)
4143bc3d5698SJohn Baldwin	movl	%ebp,24(%esp)
4144bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
4145bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm5
4146bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
4147bc3d5698SJohn Baldwin	movl	%edx,%ebp
4148bc3d5698SJohn Baldwin	movl	%ecx,%ebx
4149bc3d5698SJohn Baldwin.byte	102,15,56,0,253
4150bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
4151bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
4152bc3d5698SJohn Baldwin	leal	32(%edx),%edx
4153bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4154bc3d5698SJohn Baldwin.L032enc1_loop_5:
4155bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4156bc3d5698SJohn Baldwin	decl	%ecx
4157bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
4158bc3d5698SJohn Baldwin	leal	16(%edx),%edx
4159bc3d5698SJohn Baldwin	jnz	.L032enc1_loop_5
4160bc3d5698SJohn Baldwin.byte	102,15,56,221,209
4161bc3d5698SJohn Baldwin	shll	$4,%ebx
4162bc3d5698SJohn Baldwin	movl	$16,%ecx
4163bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
4164bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm7
4165bc3d5698SJohn Baldwin	leal	16(%esi),%esi
4166bc3d5698SJohn Baldwin	subl	%ebx,%ecx
4167bc3d5698SJohn Baldwin	leal	32(%ebp,%ebx,1),%edx
4168bc3d5698SJohn Baldwin	movl	%ecx,%ebx
4169bc3d5698SJohn Baldwin	jmp	.L033ccm64_dec_outer
4170bc3d5698SJohn Baldwin.align	16
4171bc3d5698SJohn Baldwin.L033ccm64_dec_outer:
4172bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm6
4173bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm2
4174bc3d5698SJohn Baldwin	movups	%xmm6,(%edi)
4175bc3d5698SJohn Baldwin	leal	16(%edi),%edi
4176bc3d5698SJohn Baldwin.byte	102,15,56,0,213
4177bc3d5698SJohn Baldwin	subl	$1,%eax
4178bc3d5698SJohn Baldwin	jz	.L034ccm64_dec_break
4179bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
4180bc3d5698SJohn Baldwin	movl	%ebx,%ecx
4181bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
4182bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
4183bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4184bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4185bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
4186bc3d5698SJohn Baldwin.L035ccm64_dec2_loop:
4187bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4188bc3d5698SJohn Baldwin.byte	102,15,56,220,217
4189bc3d5698SJohn Baldwin	movups	(%edx,%ecx,1),%xmm1
4190bc3d5698SJohn Baldwin	addl	$32,%ecx
4191bc3d5698SJohn Baldwin.byte	102,15,56,220,208
4192bc3d5698SJohn Baldwin.byte	102,15,56,220,216
4193bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
4194bc3d5698SJohn Baldwin	jnz	.L035ccm64_dec2_loop
4195bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
4196bc3d5698SJohn Baldwin	paddq	16(%esp),%xmm7
4197bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4198bc3d5698SJohn Baldwin.byte	102,15,56,220,217
4199bc3d5698SJohn Baldwin.byte	102,15,56,221,208
4200bc3d5698SJohn Baldwin.byte	102,15,56,221,216
4201bc3d5698SJohn Baldwin	leal	16(%esi),%esi
4202bc3d5698SJohn Baldwin	jmp	.L033ccm64_dec_outer
4203bc3d5698SJohn Baldwin.align	16
4204bc3d5698SJohn Baldwin.L034ccm64_dec_break:
4205bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
4206bc3d5698SJohn Baldwin	movl	%ebp,%edx
4207bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
4208bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
4209bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
4210bc3d5698SJohn Baldwin	leal	32(%edx),%edx
4211bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4212bc3d5698SJohn Baldwin.L036enc1_loop_6:
4213bc3d5698SJohn Baldwin.byte	102,15,56,220,217
4214bc3d5698SJohn Baldwin	decl	%ecx
4215bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
4216bc3d5698SJohn Baldwin	leal	16(%edx),%edx
4217bc3d5698SJohn Baldwin	jnz	.L036enc1_loop_6
4218bc3d5698SJohn Baldwin.byte	102,15,56,221,217
4219bc3d5698SJohn Baldwin	movl	48(%esp),%esp
4220bc3d5698SJohn Baldwin	movl	40(%esp),%edi
4221bc3d5698SJohn Baldwin	movups	%xmm3,(%edi)
4222bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4223bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4224bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4225bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4226bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4227bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4228bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
4229bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
4230bc3d5698SJohn Baldwin	popl	%edi
4231bc3d5698SJohn Baldwin	popl	%esi
4232bc3d5698SJohn Baldwin	popl	%ebx
4233bc3d5698SJohn Baldwin	popl	%ebp
4234bc3d5698SJohn Baldwin	ret
4235bc3d5698SJohn Baldwin.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
4236bc3d5698SJohn Baldwin.globl	aesni_ctr32_encrypt_blocks
4237bc3d5698SJohn Baldwin.type	aesni_ctr32_encrypt_blocks,@function
4238bc3d5698SJohn Baldwin.align	16
4239bc3d5698SJohn Baldwinaesni_ctr32_encrypt_blocks:
4240bc3d5698SJohn Baldwin.L_aesni_ctr32_encrypt_blocks_begin:
4241c0855eaaSJohn Baldwin	#ifdef __CET__
4242c0855eaaSJohn Baldwin
4243c0855eaaSJohn Baldwin.byte	243,15,30,251
4244c0855eaaSJohn Baldwin	#endif
4245c0855eaaSJohn Baldwin
4246bc3d5698SJohn Baldwin	pushl	%ebp
4247bc3d5698SJohn Baldwin	pushl	%ebx
4248bc3d5698SJohn Baldwin	pushl	%esi
4249bc3d5698SJohn Baldwin	pushl	%edi
4250bc3d5698SJohn Baldwin	movl	20(%esp),%esi
4251bc3d5698SJohn Baldwin	movl	24(%esp),%edi
4252bc3d5698SJohn Baldwin	movl	28(%esp),%eax
4253bc3d5698SJohn Baldwin	movl	32(%esp),%edx
4254bc3d5698SJohn Baldwin	movl	36(%esp),%ebx
4255bc3d5698SJohn Baldwin	movl	%esp,%ebp
4256bc3d5698SJohn Baldwin	subl	$88,%esp
4257bc3d5698SJohn Baldwin	andl	$-16,%esp
4258bc3d5698SJohn Baldwin	movl	%ebp,80(%esp)
4259bc3d5698SJohn Baldwin	cmpl	$1,%eax
4260bc3d5698SJohn Baldwin	je	.L037ctr32_one_shortcut
4261bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
4262bc3d5698SJohn Baldwin	movl	$202182159,(%esp)
4263bc3d5698SJohn Baldwin	movl	$134810123,4(%esp)
4264bc3d5698SJohn Baldwin	movl	$67438087,8(%esp)
4265bc3d5698SJohn Baldwin	movl	$66051,12(%esp)
4266bc3d5698SJohn Baldwin	movl	$6,%ecx
4267bc3d5698SJohn Baldwin	xorl	%ebp,%ebp
4268bc3d5698SJohn Baldwin	movl	%ecx,16(%esp)
4269bc3d5698SJohn Baldwin	movl	%ecx,20(%esp)
4270bc3d5698SJohn Baldwin	movl	%ecx,24(%esp)
4271bc3d5698SJohn Baldwin	movl	%ebp,28(%esp)
4272bc3d5698SJohn Baldwin.byte	102,15,58,22,251,3
4273bc3d5698SJohn Baldwin.byte	102,15,58,34,253,3
4274bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4275bc3d5698SJohn Baldwin	bswap	%ebx
4276bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4277bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4278bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm2
4279bc3d5698SJohn Baldwin.byte	102,15,58,34,195,0
4280bc3d5698SJohn Baldwin	leal	3(%ebx),%ebp
4281bc3d5698SJohn Baldwin.byte	102,15,58,34,205,0
4282bc3d5698SJohn Baldwin	incl	%ebx
4283bc3d5698SJohn Baldwin.byte	102,15,58,34,195,1
4284bc3d5698SJohn Baldwin	incl	%ebp
4285bc3d5698SJohn Baldwin.byte	102,15,58,34,205,1
4286bc3d5698SJohn Baldwin	incl	%ebx
4287bc3d5698SJohn Baldwin.byte	102,15,58,34,195,2
4288bc3d5698SJohn Baldwin	incl	%ebp
4289bc3d5698SJohn Baldwin.byte	102,15,58,34,205,2
4290bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
4291bc3d5698SJohn Baldwin.byte	102,15,56,0,194
4292bc3d5698SJohn Baldwin	movdqu	(%edx),%xmm6
4293bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
4294bc3d5698SJohn Baldwin.byte	102,15,56,0,202
4295bc3d5698SJohn Baldwin	pshufd	$192,%xmm0,%xmm2
4296bc3d5698SJohn Baldwin	pshufd	$128,%xmm0,%xmm3
4297bc3d5698SJohn Baldwin	cmpl	$6,%eax
4298bc3d5698SJohn Baldwin	jb	.L038ctr32_tail
4299bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
4300bc3d5698SJohn Baldwin	shll	$4,%ecx
4301bc3d5698SJohn Baldwin	movl	$16,%ebx
4302bc3d5698SJohn Baldwin	movdqa	%xmm7,32(%esp)
4303bc3d5698SJohn Baldwin	movl	%edx,%ebp
4304bc3d5698SJohn Baldwin	subl	%ecx,%ebx
4305bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
4306bc3d5698SJohn Baldwin	subl	$6,%eax
4307bc3d5698SJohn Baldwin	jmp	.L039ctr32_loop6
4308bc3d5698SJohn Baldwin.align	16
4309bc3d5698SJohn Baldwin.L039ctr32_loop6:
4310bc3d5698SJohn Baldwin	pshufd	$64,%xmm0,%xmm4
4311bc3d5698SJohn Baldwin	movdqa	32(%esp),%xmm0
4312bc3d5698SJohn Baldwin	pshufd	$192,%xmm1,%xmm5
4313bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
4314bc3d5698SJohn Baldwin	pshufd	$128,%xmm1,%xmm6
4315bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
4316bc3d5698SJohn Baldwin	pshufd	$64,%xmm1,%xmm7
4317bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
4318bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
4319bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
4320bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4321bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
4322bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
4323bc3d5698SJohn Baldwin.byte	102,15,56,220,217
4324bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
4325bc3d5698SJohn Baldwin	movl	%ebx,%ecx
4326bc3d5698SJohn Baldwin.byte	102,15,56,220,225
4327bc3d5698SJohn Baldwin.byte	102,15,56,220,233
4328bc3d5698SJohn Baldwin.byte	102,15,56,220,241
4329bc3d5698SJohn Baldwin.byte	102,15,56,220,249
4330bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
4331bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
4332bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
4333bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm2
4334bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
4335bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
4336bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4337bc3d5698SJohn Baldwin	movdqa	16(%esp),%xmm0
4338bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
4339bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm1
4340bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4341bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4342bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm1
4343bc3d5698SJohn Baldwin	paddd	48(%esp),%xmm0
4344bc3d5698SJohn Baldwin	movdqa	(%esp),%xmm2
4345bc3d5698SJohn Baldwin	movups	48(%esi),%xmm3
4346bc3d5698SJohn Baldwin	movups	64(%esi),%xmm4
4347bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm5
4348bc3d5698SJohn Baldwin	movups	80(%esi),%xmm3
4349bc3d5698SJohn Baldwin	leal	96(%esi),%esi
4350bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
4351bc3d5698SJohn Baldwin.byte	102,15,56,0,194
4352bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm6
4353bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4354bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm7
4355bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
4356bc3d5698SJohn Baldwin.byte	102,15,56,0,202
4357bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
4358bc3d5698SJohn Baldwin	pshufd	$192,%xmm0,%xmm2
4359bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
4360bc3d5698SJohn Baldwin	leal	96(%edi),%edi
4361bc3d5698SJohn Baldwin	pshufd	$128,%xmm0,%xmm3
4362bc3d5698SJohn Baldwin	subl	$6,%eax
4363bc3d5698SJohn Baldwin	jnc	.L039ctr32_loop6
4364bc3d5698SJohn Baldwin	addl	$6,%eax
4365bc3d5698SJohn Baldwin	jz	.L040ctr32_ret
4366bc3d5698SJohn Baldwin	movdqu	(%ebp),%xmm7
4367bc3d5698SJohn Baldwin	movl	%ebp,%edx
4368bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm7
4369bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
4370bc3d5698SJohn Baldwin.L038ctr32_tail:
4371bc3d5698SJohn Baldwin	por	%xmm7,%xmm2
4372bc3d5698SJohn Baldwin	cmpl	$2,%eax
4373bc3d5698SJohn Baldwin	jb	.L041ctr32_one
4374bc3d5698SJohn Baldwin	pshufd	$64,%xmm0,%xmm4
4375bc3d5698SJohn Baldwin	por	%xmm7,%xmm3
4376bc3d5698SJohn Baldwin	je	.L042ctr32_two
4377bc3d5698SJohn Baldwin	pshufd	$192,%xmm1,%xmm5
4378bc3d5698SJohn Baldwin	por	%xmm7,%xmm4
4379bc3d5698SJohn Baldwin	cmpl	$4,%eax
4380bc3d5698SJohn Baldwin	jb	.L043ctr32_three
4381bc3d5698SJohn Baldwin	pshufd	$128,%xmm1,%xmm6
4382bc3d5698SJohn Baldwin	por	%xmm7,%xmm5
4383bc3d5698SJohn Baldwin	je	.L044ctr32_four
4384bc3d5698SJohn Baldwin	por	%xmm7,%xmm6
4385bc3d5698SJohn Baldwin	call	_aesni_encrypt6
4386bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
4387bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
4388bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm2
4389bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
4390bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm3
4391bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
4392bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
4393bc3d5698SJohn Baldwin	movups	64(%esi),%xmm1
4394bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
4395bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4396bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm6
4397bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4398bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4399bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4400bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
4401bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
4402bc3d5698SJohn Baldwin.align	16
4403bc3d5698SJohn Baldwin.L037ctr32_one_shortcut:
4404bc3d5698SJohn Baldwin	movups	(%ebx),%xmm2
4405bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4406bc3d5698SJohn Baldwin.L041ctr32_one:
4407bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
4408bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
4409bc3d5698SJohn Baldwin	leal	32(%edx),%edx
4410bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4411bc3d5698SJohn Baldwin.L045enc1_loop_7:
4412bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4413bc3d5698SJohn Baldwin	decl	%ecx
4414bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
4415bc3d5698SJohn Baldwin	leal	16(%edx),%edx
4416bc3d5698SJohn Baldwin	jnz	.L045enc1_loop_7
4417bc3d5698SJohn Baldwin.byte	102,15,56,221,209
4418bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
4419bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm6
4420bc3d5698SJohn Baldwin	movups	%xmm6,(%edi)
4421bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
4422bc3d5698SJohn Baldwin.align	16
4423bc3d5698SJohn Baldwin.L042ctr32_two:
4424bc3d5698SJohn Baldwin	call	_aesni_encrypt2
4425bc3d5698SJohn Baldwin	movups	(%esi),%xmm5
4426bc3d5698SJohn Baldwin	movups	16(%esi),%xmm6
4427bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4428bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4429bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4430bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4431bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
4432bc3d5698SJohn Baldwin.align	16
4433bc3d5698SJohn Baldwin.L043ctr32_three:
4434bc3d5698SJohn Baldwin	call	_aesni_encrypt3
4435bc3d5698SJohn Baldwin	movups	(%esi),%xmm5
4436bc3d5698SJohn Baldwin	movups	16(%esi),%xmm6
4437bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4438bc3d5698SJohn Baldwin	movups	32(%esi),%xmm7
4439bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4440bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4441bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
4442bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4443bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4444bc3d5698SJohn Baldwin	jmp	.L040ctr32_ret
4445bc3d5698SJohn Baldwin.align	16
4446bc3d5698SJohn Baldwin.L044ctr32_four:
4447bc3d5698SJohn Baldwin	call	_aesni_encrypt4
4448bc3d5698SJohn Baldwin	movups	(%esi),%xmm6
4449bc3d5698SJohn Baldwin	movups	16(%esi),%xmm7
4450bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
4451bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
4452bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
4453bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm3
4454bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4455bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
4456bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4457bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
4458bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4459bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4460bc3d5698SJohn Baldwin.L040ctr32_ret:
4461bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4462bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4463bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4464bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4465bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4466bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
4467bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4468bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
4469bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
4470bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%esp)
4471bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
4472bc3d5698SJohn Baldwin	movl	80(%esp),%esp
4473bc3d5698SJohn Baldwin	popl	%edi
4474bc3d5698SJohn Baldwin	popl	%esi
4475bc3d5698SJohn Baldwin	popl	%ebx
4476bc3d5698SJohn Baldwin	popl	%ebp
4477bc3d5698SJohn Baldwin	ret
4478bc3d5698SJohn Baldwin.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
4479bc3d5698SJohn Baldwin.globl	aesni_xts_encrypt
4480bc3d5698SJohn Baldwin.type	aesni_xts_encrypt,@function
4481bc3d5698SJohn Baldwin.align	16
4482bc3d5698SJohn Baldwinaesni_xts_encrypt:
4483bc3d5698SJohn Baldwin.L_aesni_xts_encrypt_begin:
4484c0855eaaSJohn Baldwin	#ifdef __CET__
4485c0855eaaSJohn Baldwin
4486c0855eaaSJohn Baldwin.byte	243,15,30,251
4487c0855eaaSJohn Baldwin	#endif
4488c0855eaaSJohn Baldwin
4489bc3d5698SJohn Baldwin	pushl	%ebp
4490bc3d5698SJohn Baldwin	pushl	%ebx
4491bc3d5698SJohn Baldwin	pushl	%esi
4492bc3d5698SJohn Baldwin	pushl	%edi
4493bc3d5698SJohn Baldwin	movl	36(%esp),%edx
4494bc3d5698SJohn Baldwin	movl	40(%esp),%esi
4495bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4496bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4497bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
4498bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
4499bc3d5698SJohn Baldwin	leal	32(%edx),%edx
4500bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4501bc3d5698SJohn Baldwin.L046enc1_loop_8:
4502bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4503bc3d5698SJohn Baldwin	decl	%ecx
4504bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
4505bc3d5698SJohn Baldwin	leal	16(%edx),%edx
4506bc3d5698SJohn Baldwin	jnz	.L046enc1_loop_8
4507bc3d5698SJohn Baldwin.byte	102,15,56,221,209
4508bc3d5698SJohn Baldwin	movl	20(%esp),%esi
4509bc3d5698SJohn Baldwin	movl	24(%esp),%edi
4510bc3d5698SJohn Baldwin	movl	28(%esp),%eax
4511bc3d5698SJohn Baldwin	movl	32(%esp),%edx
4512bc3d5698SJohn Baldwin	movl	%esp,%ebp
4513bc3d5698SJohn Baldwin	subl	$120,%esp
4514bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4515bc3d5698SJohn Baldwin	andl	$-16,%esp
4516bc3d5698SJohn Baldwin	movl	$135,96(%esp)
4517bc3d5698SJohn Baldwin	movl	$0,100(%esp)
4518bc3d5698SJohn Baldwin	movl	$1,104(%esp)
4519bc3d5698SJohn Baldwin	movl	$0,108(%esp)
4520bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
4521bc3d5698SJohn Baldwin	movl	%ebp,116(%esp)
4522bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
4523bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4524bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
4525bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4526bc3d5698SJohn Baldwin	andl	$-16,%eax
4527bc3d5698SJohn Baldwin	movl	%edx,%ebp
4528bc3d5698SJohn Baldwin	movl	%ecx,%ebx
4529bc3d5698SJohn Baldwin	subl	$96,%eax
4530bc3d5698SJohn Baldwin	jc	.L047xts_enc_short
4531bc3d5698SJohn Baldwin	shll	$4,%ecx
4532bc3d5698SJohn Baldwin	movl	$16,%ebx
4533bc3d5698SJohn Baldwin	subl	%ecx,%ebx
4534bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
4535bc3d5698SJohn Baldwin	jmp	.L048xts_enc_loop6
4536bc3d5698SJohn Baldwin.align	16
4537bc3d5698SJohn Baldwin.L048xts_enc_loop6:
4538bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4539bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4540bc3d5698SJohn Baldwin	movdqa	%xmm1,(%esp)
4541bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4542bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4543bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4544bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4545bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4546bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4547bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
4548bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4549bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4550bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4551bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4552bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4553bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4554bc3d5698SJohn Baldwin	movdqa	%xmm1,32(%esp)
4555bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4556bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4557bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4558bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4559bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4560bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4561bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
4562bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4563bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4564bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4565bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4566bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
4567bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
4568bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4569bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
4570bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
4571bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4572bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
4573bc3d5698SJohn Baldwin	movl	%ebx,%ecx
4574bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
4575bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4576bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
4577bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
4578bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
4579bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
4580bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
4581bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
4582bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm1
4583bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
4584bc3d5698SJohn Baldwin	leal	96(%esi),%esi
4585bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
4586bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
4587bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
4588bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
4589bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
4590bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
4591bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4592bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
4593bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
4594bc3d5698SJohn Baldwin.byte	102,15,56,220,217
4595bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
4596bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
4597bc3d5698SJohn Baldwin.byte	102,15,56,220,225
4598bc3d5698SJohn Baldwin.byte	102,15,56,220,233
4599bc3d5698SJohn Baldwin.byte	102,15,56,220,241
4600bc3d5698SJohn Baldwin.byte	102,15,56,220,249
4601bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
4602bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm1
4603bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4604bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
4605bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4606bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
4607bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4608bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
4609bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4610bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
4611bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4612bc3d5698SJohn Baldwin	xorps	64(%esp),%xmm6
4613bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4614bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm7
4615bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
4616bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4617bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
4618bc3d5698SJohn Baldwin	leal	96(%edi),%edi
4619bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
4620bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4621bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4622bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4623bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4624bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4625bc3d5698SJohn Baldwin	subl	$96,%eax
4626bc3d5698SJohn Baldwin	jnc	.L048xts_enc_loop6
4627bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
4628bc3d5698SJohn Baldwin	movl	%ebp,%edx
4629bc3d5698SJohn Baldwin	movl	%ecx,%ebx
4630bc3d5698SJohn Baldwin.L047xts_enc_short:
4631bc3d5698SJohn Baldwin	addl	$96,%eax
4632bc3d5698SJohn Baldwin	jz	.L049xts_enc_done6x
4633bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
4634bc3d5698SJohn Baldwin	cmpl	$32,%eax
4635bc3d5698SJohn Baldwin	jb	.L050xts_enc_one
4636bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4637bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4638bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4639bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4640bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4641bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4642bc3d5698SJohn Baldwin	je	.L051xts_enc_two
4643bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4644bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4645bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
4646bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4647bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4648bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4649bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4650bc3d5698SJohn Baldwin	cmpl	$64,%eax
4651bc3d5698SJohn Baldwin	jb	.L052xts_enc_three
4652bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4653bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4654bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
4655bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4656bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4657bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4658bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4659bc3d5698SJohn Baldwin	movdqa	%xmm5,(%esp)
4660bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%esp)
4661bc3d5698SJohn Baldwin	je	.L053xts_enc_four
4662bc3d5698SJohn Baldwin	movdqa	%xmm7,32(%esp)
4663bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
4664bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
4665bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4666bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
4667bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
4668bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
4669bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
4670bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
4671bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
4672bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
4673bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
4674bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
4675bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
4676bc3d5698SJohn Baldwin	leal	80(%esi),%esi
4677bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
4678bc3d5698SJohn Baldwin	movdqa	%xmm7,64(%esp)
4679bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm6
4680bc3d5698SJohn Baldwin	call	_aesni_encrypt6
4681bc3d5698SJohn Baldwin	movaps	64(%esp),%xmm1
4682bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
4683bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
4684bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
4685bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4686bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
4687bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4688bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm6
4689bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4690bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4691bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
4692bc3d5698SJohn Baldwin	leal	80(%edi),%edi
4693bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
4694bc3d5698SJohn Baldwin.align	16
4695bc3d5698SJohn Baldwin.L050xts_enc_one:
4696bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4697bc3d5698SJohn Baldwin	leal	16(%esi),%esi
4698bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4699bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
4700bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
4701bc3d5698SJohn Baldwin	leal	32(%edx),%edx
4702bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4703bc3d5698SJohn Baldwin.L055enc1_loop_9:
4704bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4705bc3d5698SJohn Baldwin	decl	%ecx
4706bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
4707bc3d5698SJohn Baldwin	leal	16(%edx),%edx
4708bc3d5698SJohn Baldwin	jnz	.L055enc1_loop_9
4709bc3d5698SJohn Baldwin.byte	102,15,56,221,209
4710bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4711bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4712bc3d5698SJohn Baldwin	leal	16(%edi),%edi
4713bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
4714bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
4715bc3d5698SJohn Baldwin.align	16
4716bc3d5698SJohn Baldwin.L051xts_enc_two:
4717bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
4718bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4719bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
4720bc3d5698SJohn Baldwin	leal	32(%esi),%esi
4721bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4722bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4723bc3d5698SJohn Baldwin	call	_aesni_encrypt2
4724bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4725bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4726bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4727bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4728bc3d5698SJohn Baldwin	leal	32(%edi),%edi
4729bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
4730bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
4731bc3d5698SJohn Baldwin.align	16
4732bc3d5698SJohn Baldwin.L052xts_enc_three:
4733bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm7
4734bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4735bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
4736bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
4737bc3d5698SJohn Baldwin	leal	48(%esi),%esi
4738bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4739bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4740bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
4741bc3d5698SJohn Baldwin	call	_aesni_encrypt3
4742bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4743bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
4744bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
4745bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4746bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4747bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4748bc3d5698SJohn Baldwin	leal	48(%edi),%edi
4749bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
4750bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
4751bc3d5698SJohn Baldwin.align	16
4752bc3d5698SJohn Baldwin.L053xts_enc_four:
4753bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
4754bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4755bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
4756bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
4757bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
4758bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
4759bc3d5698SJohn Baldwin	leal	64(%esi),%esi
4760bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
4761bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
4762bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
4763bc3d5698SJohn Baldwin	call	_aesni_encrypt4
4764bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
4765bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
4766bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
4767bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4768bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
4769bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4770bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4771bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4772bc3d5698SJohn Baldwin	leal	64(%edi),%edi
4773bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
4774bc3d5698SJohn Baldwin	jmp	.L054xts_enc_done
4775bc3d5698SJohn Baldwin.align	16
4776bc3d5698SJohn Baldwin.L049xts_enc_done6x:
4777bc3d5698SJohn Baldwin	movl	112(%esp),%eax
4778bc3d5698SJohn Baldwin	andl	$15,%eax
4779bc3d5698SJohn Baldwin	jz	.L056xts_enc_ret
4780bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
4781bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
4782bc3d5698SJohn Baldwin	jmp	.L057xts_enc_steal
4783bc3d5698SJohn Baldwin.align	16
4784bc3d5698SJohn Baldwin.L054xts_enc_done:
4785bc3d5698SJohn Baldwin	movl	112(%esp),%eax
4786bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4787bc3d5698SJohn Baldwin	andl	$15,%eax
4788bc3d5698SJohn Baldwin	jz	.L056xts_enc_ret
4789bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4790bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
4791bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm5
4792bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4793bc3d5698SJohn Baldwin	pand	96(%esp),%xmm5
4794bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm5
4795bc3d5698SJohn Baldwin.L057xts_enc_steal:
4796bc3d5698SJohn Baldwin	movzbl	(%esi),%ecx
4797bc3d5698SJohn Baldwin	movzbl	-16(%edi),%edx
4798bc3d5698SJohn Baldwin	leal	1(%esi),%esi
4799bc3d5698SJohn Baldwin	movb	%cl,-16(%edi)
4800bc3d5698SJohn Baldwin	movb	%dl,(%edi)
4801bc3d5698SJohn Baldwin	leal	1(%edi),%edi
4802bc3d5698SJohn Baldwin	subl	$1,%eax
4803bc3d5698SJohn Baldwin	jnz	.L057xts_enc_steal
4804bc3d5698SJohn Baldwin	subl	112(%esp),%edi
4805bc3d5698SJohn Baldwin	movl	%ebp,%edx
4806bc3d5698SJohn Baldwin	movl	%ebx,%ecx
4807bc3d5698SJohn Baldwin	movups	-16(%edi),%xmm2
4808bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4809bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
4810bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
4811bc3d5698SJohn Baldwin	leal	32(%edx),%edx
4812bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4813bc3d5698SJohn Baldwin.L058enc1_loop_10:
4814bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4815bc3d5698SJohn Baldwin	decl	%ecx
4816bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
4817bc3d5698SJohn Baldwin	leal	16(%edx),%edx
4818bc3d5698SJohn Baldwin	jnz	.L058enc1_loop_10
4819bc3d5698SJohn Baldwin.byte	102,15,56,221,209
4820bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
4821bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edi)
4822bc3d5698SJohn Baldwin.L056xts_enc_ret:
4823bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4824bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
4825bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
4826bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
4827bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
4828bc3d5698SJohn Baldwin	movdqa	%xmm0,16(%esp)
4829bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
4830bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
4831bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
4832bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
4833bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
4834bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%esp)
4835bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
4836bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
4837bc3d5698SJohn Baldwin	movl	116(%esp),%esp
4838bc3d5698SJohn Baldwin	popl	%edi
4839bc3d5698SJohn Baldwin	popl	%esi
4840bc3d5698SJohn Baldwin	popl	%ebx
4841bc3d5698SJohn Baldwin	popl	%ebp
4842bc3d5698SJohn Baldwin	ret
4843bc3d5698SJohn Baldwin.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
4844bc3d5698SJohn Baldwin.globl	aesni_xts_decrypt
4845bc3d5698SJohn Baldwin.type	aesni_xts_decrypt,@function
4846bc3d5698SJohn Baldwin.align	16
4847bc3d5698SJohn Baldwinaesni_xts_decrypt:
4848bc3d5698SJohn Baldwin.L_aesni_xts_decrypt_begin:
4849c0855eaaSJohn Baldwin	#ifdef __CET__
4850c0855eaaSJohn Baldwin
4851c0855eaaSJohn Baldwin.byte	243,15,30,251
4852c0855eaaSJohn Baldwin	#endif
4853c0855eaaSJohn Baldwin
4854bc3d5698SJohn Baldwin	pushl	%ebp
4855bc3d5698SJohn Baldwin	pushl	%ebx
4856bc3d5698SJohn Baldwin	pushl	%esi
4857bc3d5698SJohn Baldwin	pushl	%edi
4858bc3d5698SJohn Baldwin	movl	36(%esp),%edx
4859bc3d5698SJohn Baldwin	movl	40(%esp),%esi
4860bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4861bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4862bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
4863bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
4864bc3d5698SJohn Baldwin	leal	32(%edx),%edx
4865bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4866bc3d5698SJohn Baldwin.L059enc1_loop_11:
4867bc3d5698SJohn Baldwin.byte	102,15,56,220,209
4868bc3d5698SJohn Baldwin	decl	%ecx
4869bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
4870bc3d5698SJohn Baldwin	leal	16(%edx),%edx
4871bc3d5698SJohn Baldwin	jnz	.L059enc1_loop_11
4872bc3d5698SJohn Baldwin.byte	102,15,56,221,209
4873bc3d5698SJohn Baldwin	movl	20(%esp),%esi
4874bc3d5698SJohn Baldwin	movl	24(%esp),%edi
4875bc3d5698SJohn Baldwin	movl	28(%esp),%eax
4876bc3d5698SJohn Baldwin	movl	32(%esp),%edx
4877bc3d5698SJohn Baldwin	movl	%esp,%ebp
4878bc3d5698SJohn Baldwin	subl	$120,%esp
4879bc3d5698SJohn Baldwin	andl	$-16,%esp
4880bc3d5698SJohn Baldwin	xorl	%ebx,%ebx
4881bc3d5698SJohn Baldwin	testl	$15,%eax
4882bc3d5698SJohn Baldwin	setnz	%bl
4883bc3d5698SJohn Baldwin	shll	$4,%ebx
4884bc3d5698SJohn Baldwin	subl	%ebx,%eax
4885bc3d5698SJohn Baldwin	movl	$135,96(%esp)
4886bc3d5698SJohn Baldwin	movl	$0,100(%esp)
4887bc3d5698SJohn Baldwin	movl	$1,104(%esp)
4888bc3d5698SJohn Baldwin	movl	$0,108(%esp)
4889bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
4890bc3d5698SJohn Baldwin	movl	%ebp,116(%esp)
4891bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
4892bc3d5698SJohn Baldwin	movl	%edx,%ebp
4893bc3d5698SJohn Baldwin	movl	%ecx,%ebx
4894bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
4895bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4896bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
4897bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4898bc3d5698SJohn Baldwin	andl	$-16,%eax
4899bc3d5698SJohn Baldwin	subl	$96,%eax
4900bc3d5698SJohn Baldwin	jc	.L060xts_dec_short
4901bc3d5698SJohn Baldwin	shll	$4,%ecx
4902bc3d5698SJohn Baldwin	movl	$16,%ebx
4903bc3d5698SJohn Baldwin	subl	%ecx,%ebx
4904bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
4905bc3d5698SJohn Baldwin	jmp	.L061xts_dec_loop6
4906bc3d5698SJohn Baldwin.align	16
4907bc3d5698SJohn Baldwin.L061xts_dec_loop6:
4908bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4909bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4910bc3d5698SJohn Baldwin	movdqa	%xmm1,(%esp)
4911bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4912bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4913bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4914bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4915bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4916bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4917bc3d5698SJohn Baldwin	movdqa	%xmm1,16(%esp)
4918bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4919bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4920bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4921bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4922bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4923bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4924bc3d5698SJohn Baldwin	movdqa	%xmm1,32(%esp)
4925bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4926bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4927bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4928bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4929bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4930bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4931bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
4932bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4933bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4934bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4935bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4936bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
4937bc3d5698SJohn Baldwin	movdqa	%xmm1,64(%esp)
4938bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4939bc3d5698SJohn Baldwin	movups	(%ebp),%xmm0
4940bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
4941bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
4942bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
4943bc3d5698SJohn Baldwin	movl	%ebx,%ecx
4944bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
4945bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
4946bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
4947bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
4948bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
4949bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
4950bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
4951bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
4952bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm1
4953bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
4954bc3d5698SJohn Baldwin	leal	96(%esi),%esi
4955bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
4956bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
4957bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
4958bc3d5698SJohn Baldwin	movups	16(%ebp),%xmm1
4959bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
4960bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
4961bc3d5698SJohn Baldwin.byte	102,15,56,222,209
4962bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
4963bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
4964bc3d5698SJohn Baldwin.byte	102,15,56,222,217
4965bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
4966bc3d5698SJohn Baldwin	movups	32(%ebp),%xmm0
4967bc3d5698SJohn Baldwin.byte	102,15,56,222,225
4968bc3d5698SJohn Baldwin.byte	102,15,56,222,233
4969bc3d5698SJohn Baldwin.byte	102,15,56,222,241
4970bc3d5698SJohn Baldwin.byte	102,15,56,222,249
4971bc3d5698SJohn Baldwin	call	.L_aesni_decrypt6_enter
4972bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm1
4973bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4974bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
4975bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4976bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
4977bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
4978bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
4979bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
4980bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
4981bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
4982bc3d5698SJohn Baldwin	xorps	64(%esp),%xmm6
4983bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
4984bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm7
4985bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
4986bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
4987bc3d5698SJohn Baldwin	movups	%xmm7,80(%edi)
4988bc3d5698SJohn Baldwin	leal	96(%edi),%edi
4989bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
4990bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
4991bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
4992bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
4993bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
4994bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
4995bc3d5698SJohn Baldwin	subl	$96,%eax
4996bc3d5698SJohn Baldwin	jnc	.L061xts_dec_loop6
4997bc3d5698SJohn Baldwin	movl	240(%ebp),%ecx
4998bc3d5698SJohn Baldwin	movl	%ebp,%edx
4999bc3d5698SJohn Baldwin	movl	%ecx,%ebx
5000bc3d5698SJohn Baldwin.L060xts_dec_short:
5001bc3d5698SJohn Baldwin	addl	$96,%eax
5002bc3d5698SJohn Baldwin	jz	.L062xts_dec_done6x
5003bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
5004bc3d5698SJohn Baldwin	cmpl	$32,%eax
5005bc3d5698SJohn Baldwin	jb	.L063xts_dec_one
5006bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
5007bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
5008bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
5009bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
5010bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
5011bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5012bc3d5698SJohn Baldwin	je	.L064xts_dec_two
5013bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
5014bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
5015bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
5016bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
5017bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
5018bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
5019bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5020bc3d5698SJohn Baldwin	cmpl	$64,%eax
5021bc3d5698SJohn Baldwin	jb	.L065xts_dec_three
5022bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
5023bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
5024bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
5025bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
5026bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
5027bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
5028bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5029bc3d5698SJohn Baldwin	movdqa	%xmm5,(%esp)
5030bc3d5698SJohn Baldwin	movdqa	%xmm6,16(%esp)
5031bc3d5698SJohn Baldwin	je	.L066xts_dec_four
5032bc3d5698SJohn Baldwin	movdqa	%xmm7,32(%esp)
5033bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm7
5034bc3d5698SJohn Baldwin	movdqa	%xmm1,48(%esp)
5035bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
5036bc3d5698SJohn Baldwin	pand	%xmm3,%xmm7
5037bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm7
5038bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5039bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5040bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5041bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5042bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
5043bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5044bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
5045bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5046bc3d5698SJohn Baldwin	leal	80(%esi),%esi
5047bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5048bc3d5698SJohn Baldwin	movdqa	%xmm7,64(%esp)
5049bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm6
5050bc3d5698SJohn Baldwin	call	_aesni_decrypt6
5051bc3d5698SJohn Baldwin	movaps	64(%esp),%xmm1
5052bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
5053bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
5054bc3d5698SJohn Baldwin	xorps	32(%esp),%xmm4
5055bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
5056bc3d5698SJohn Baldwin	xorps	48(%esp),%xmm5
5057bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
5058bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm6
5059bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
5060bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
5061bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
5062bc3d5698SJohn Baldwin	leal	80(%edi),%edi
5063bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
5064bc3d5698SJohn Baldwin.align	16
5065bc3d5698SJohn Baldwin.L063xts_dec_one:
5066bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
5067bc3d5698SJohn Baldwin	leal	16(%esi),%esi
5068bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5069bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
5070bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
5071bc3d5698SJohn Baldwin	leal	32(%edx),%edx
5072bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
5073bc3d5698SJohn Baldwin.L068dec1_loop_12:
5074bc3d5698SJohn Baldwin.byte	102,15,56,222,209
5075bc3d5698SJohn Baldwin	decl	%ecx
5076bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
5077bc3d5698SJohn Baldwin	leal	16(%edx),%edx
5078bc3d5698SJohn Baldwin	jnz	.L068dec1_loop_12
5079bc3d5698SJohn Baldwin.byte	102,15,56,223,209
5080bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5081bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
5082bc3d5698SJohn Baldwin	leal	16(%edi),%edi
5083bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
5084bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
5085bc3d5698SJohn Baldwin.align	16
5086bc3d5698SJohn Baldwin.L064xts_dec_two:
5087bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
5088bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
5089bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
5090bc3d5698SJohn Baldwin	leal	32(%esi),%esi
5091bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5092bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
5093bc3d5698SJohn Baldwin	call	_aesni_decrypt2
5094bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5095bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
5096bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
5097bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
5098bc3d5698SJohn Baldwin	leal	32(%edi),%edi
5099bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
5100bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
5101bc3d5698SJohn Baldwin.align	16
5102bc3d5698SJohn Baldwin.L065xts_dec_three:
5103bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm7
5104bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
5105bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
5106bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
5107bc3d5698SJohn Baldwin	leal	48(%esi),%esi
5108bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5109bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
5110bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
5111bc3d5698SJohn Baldwin	call	_aesni_decrypt3
5112bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5113bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
5114bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
5115bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
5116bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
5117bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
5118bc3d5698SJohn Baldwin	leal	48(%edi),%edi
5119bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm1
5120bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
5121bc3d5698SJohn Baldwin.align	16
5122bc3d5698SJohn Baldwin.L066xts_dec_four:
5123bc3d5698SJohn Baldwin	movaps	%xmm1,%xmm6
5124bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
5125bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
5126bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
5127bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
5128bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
5129bc3d5698SJohn Baldwin	leal	64(%esi),%esi
5130bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
5131bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
5132bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
5133bc3d5698SJohn Baldwin	call	_aesni_decrypt4
5134bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
5135bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
5136bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
5137bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
5138bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm5
5139bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
5140bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
5141bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
5142bc3d5698SJohn Baldwin	leal	64(%edi),%edi
5143bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
5144bc3d5698SJohn Baldwin	jmp	.L067xts_dec_done
5145bc3d5698SJohn Baldwin.align	16
5146bc3d5698SJohn Baldwin.L062xts_dec_done6x:
5147bc3d5698SJohn Baldwin	movl	112(%esp),%eax
5148bc3d5698SJohn Baldwin	andl	$15,%eax
5149bc3d5698SJohn Baldwin	jz	.L069xts_dec_ret
5150bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
5151bc3d5698SJohn Baldwin	jmp	.L070xts_dec_only_one_more
5152bc3d5698SJohn Baldwin.align	16
5153bc3d5698SJohn Baldwin.L067xts_dec_done:
5154bc3d5698SJohn Baldwin	movl	112(%esp),%eax
5155bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
5156bc3d5698SJohn Baldwin	andl	$15,%eax
5157bc3d5698SJohn Baldwin	jz	.L069xts_dec_ret
5158bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
5159bc3d5698SJohn Baldwin	movl	%eax,112(%esp)
5160bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm2
5161bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
5162bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm3
5163bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
5164bc3d5698SJohn Baldwin	pand	%xmm3,%xmm2
5165bc3d5698SJohn Baldwin	pcmpgtd	%xmm1,%xmm0
5166bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5167bc3d5698SJohn Baldwin.L070xts_dec_only_one_more:
5168bc3d5698SJohn Baldwin	pshufd	$19,%xmm0,%xmm5
5169bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
5170bc3d5698SJohn Baldwin	paddq	%xmm1,%xmm1
5171bc3d5698SJohn Baldwin	pand	%xmm3,%xmm5
5172bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm5
5173bc3d5698SJohn Baldwin	movl	%ebp,%edx
5174bc3d5698SJohn Baldwin	movl	%ebx,%ecx
5175bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
5176bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5177bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
5178bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
5179bc3d5698SJohn Baldwin	leal	32(%edx),%edx
5180bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
5181bc3d5698SJohn Baldwin.L071dec1_loop_13:
5182bc3d5698SJohn Baldwin.byte	102,15,56,222,209
5183bc3d5698SJohn Baldwin	decl	%ecx
5184bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
5185bc3d5698SJohn Baldwin	leal	16(%edx),%edx
5186bc3d5698SJohn Baldwin	jnz	.L071dec1_loop_13
5187bc3d5698SJohn Baldwin.byte	102,15,56,223,209
5188bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5189bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
5190bc3d5698SJohn Baldwin.L072xts_dec_steal:
5191bc3d5698SJohn Baldwin	movzbl	16(%esi),%ecx
5192bc3d5698SJohn Baldwin	movzbl	(%edi),%edx
5193bc3d5698SJohn Baldwin	leal	1(%esi),%esi
5194bc3d5698SJohn Baldwin	movb	%cl,(%edi)
5195bc3d5698SJohn Baldwin	movb	%dl,16(%edi)
5196bc3d5698SJohn Baldwin	leal	1(%edi),%edi
5197bc3d5698SJohn Baldwin	subl	$1,%eax
5198bc3d5698SJohn Baldwin	jnz	.L072xts_dec_steal
5199bc3d5698SJohn Baldwin	subl	112(%esp),%edi
5200bc3d5698SJohn Baldwin	movl	%ebp,%edx
5201bc3d5698SJohn Baldwin	movl	%ebx,%ecx
5202bc3d5698SJohn Baldwin	movups	(%edi),%xmm2
5203bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
5204bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
5205bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
5206bc3d5698SJohn Baldwin	leal	32(%edx),%edx
5207bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
5208bc3d5698SJohn Baldwin.L073dec1_loop_14:
5209bc3d5698SJohn Baldwin.byte	102,15,56,222,209
5210bc3d5698SJohn Baldwin	decl	%ecx
5211bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
5212bc3d5698SJohn Baldwin	leal	16(%edx),%edx
5213bc3d5698SJohn Baldwin	jnz	.L073dec1_loop_14
5214bc3d5698SJohn Baldwin.byte	102,15,56,223,209
5215bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
5216bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
5217bc3d5698SJohn Baldwin.L069xts_dec_ret:
5218bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
5219bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
5220bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
5221bc3d5698SJohn Baldwin	movdqa	%xmm0,(%esp)
5222bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
5223bc3d5698SJohn Baldwin	movdqa	%xmm0,16(%esp)
5224bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
5225bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%esp)
5226bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
5227bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%esp)
5228bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
5229bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%esp)
5230bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
5231bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%esp)
5232bc3d5698SJohn Baldwin	movl	116(%esp),%esp
5233bc3d5698SJohn Baldwin	popl	%edi
5234bc3d5698SJohn Baldwin	popl	%esi
5235bc3d5698SJohn Baldwin	popl	%ebx
5236bc3d5698SJohn Baldwin	popl	%ebp
5237bc3d5698SJohn Baldwin	ret
5238bc3d5698SJohn Baldwin.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
5239bc3d5698SJohn Baldwin.globl	aesni_ocb_encrypt
5240bc3d5698SJohn Baldwin.type	aesni_ocb_encrypt,@function
5241bc3d5698SJohn Baldwin.align	16
5242bc3d5698SJohn Baldwinaesni_ocb_encrypt:
5243bc3d5698SJohn Baldwin.L_aesni_ocb_encrypt_begin:
5244c0855eaaSJohn Baldwin	#ifdef __CET__
5245c0855eaaSJohn Baldwin
5246c0855eaaSJohn Baldwin.byte	243,15,30,251
5247c0855eaaSJohn Baldwin	#endif
5248c0855eaaSJohn Baldwin
5249bc3d5698SJohn Baldwin	pushl	%ebp
5250bc3d5698SJohn Baldwin	pushl	%ebx
5251bc3d5698SJohn Baldwin	pushl	%esi
5252bc3d5698SJohn Baldwin	pushl	%edi
5253bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
5254bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
5255bc3d5698SJohn Baldwin	movl	20(%esp),%esi
5256bc3d5698SJohn Baldwin	movl	24(%esp),%edi
5257bc3d5698SJohn Baldwin	movl	28(%esp),%eax
5258bc3d5698SJohn Baldwin	movl	32(%esp),%edx
5259bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm0
5260bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
5261bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm1
5262bc3d5698SJohn Baldwin	movl	44(%esp),%ebx
5263bc3d5698SJohn Baldwin	movl	%esp,%ecx
5264bc3d5698SJohn Baldwin	subl	$132,%esp
5265bc3d5698SJohn Baldwin	andl	$-16,%esp
5266bc3d5698SJohn Baldwin	subl	%esi,%edi
5267bc3d5698SJohn Baldwin	shll	$4,%eax
5268bc3d5698SJohn Baldwin	leal	-96(%esi,%eax,1),%eax
5269bc3d5698SJohn Baldwin	movl	%edi,120(%esp)
5270bc3d5698SJohn Baldwin	movl	%eax,124(%esp)
5271bc3d5698SJohn Baldwin	movl	%ecx,128(%esp)
5272bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5273bc3d5698SJohn Baldwin	testl	$1,%ebp
5274bc3d5698SJohn Baldwin	jnz	.L074odd
5275bc3d5698SJohn Baldwin	bsfl	%ebp,%eax
5276bc3d5698SJohn Baldwin	addl	$1,%ebp
5277bc3d5698SJohn Baldwin	shll	$4,%eax
5278bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
5279bc3d5698SJohn Baldwin	movl	%edx,%eax
5280bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5281bc3d5698SJohn Baldwin	leal	16(%esi),%esi
5282bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5283bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5284bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
5285bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
5286bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
5287bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
5288bc3d5698SJohn Baldwin	leal	32(%edx),%edx
5289bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
5290bc3d5698SJohn Baldwin.L075enc1_loop_15:
5291bc3d5698SJohn Baldwin.byte	102,15,56,220,209
5292bc3d5698SJohn Baldwin	decl	%ecx
5293bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
5294bc3d5698SJohn Baldwin	leal	16(%edx),%edx
5295bc3d5698SJohn Baldwin	jnz	.L075enc1_loop_15
5296bc3d5698SJohn Baldwin.byte	102,15,56,221,209
5297bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
5298bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5299bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
5300bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edi,%esi,1)
5301bc3d5698SJohn Baldwin	movl	240(%eax),%ecx
5302bc3d5698SJohn Baldwin	movl	%eax,%edx
5303bc3d5698SJohn Baldwin	movl	124(%esp),%eax
5304bc3d5698SJohn Baldwin.L074odd:
5305bc3d5698SJohn Baldwin	shll	$4,%ecx
5306bc3d5698SJohn Baldwin	movl	$16,%edi
5307bc3d5698SJohn Baldwin	subl	%ecx,%edi
5308bc3d5698SJohn Baldwin	movl	%edx,112(%esp)
5309bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
5310bc3d5698SJohn Baldwin	movl	%edi,116(%esp)
5311bc3d5698SJohn Baldwin	cmpl	%eax,%esi
5312bc3d5698SJohn Baldwin	ja	.L076short
5313bc3d5698SJohn Baldwin	jmp	.L077grandloop
5314bc3d5698SJohn Baldwin.align	32
5315bc3d5698SJohn Baldwin.L077grandloop:
5316bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5317bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
5318bc3d5698SJohn Baldwin	leal	5(%ebp),%edi
5319bc3d5698SJohn Baldwin	addl	$6,%ebp
5320bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5321bc3d5698SJohn Baldwin	bsfl	%eax,%eax
5322bc3d5698SJohn Baldwin	bsfl	%edi,%edi
5323bc3d5698SJohn Baldwin	shll	$4,%ecx
5324bc3d5698SJohn Baldwin	shll	$4,%eax
5325bc3d5698SJohn Baldwin	shll	$4,%edi
5326bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
5327bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
5328bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
5329bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
5330bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
5331bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
5332bc3d5698SJohn Baldwin	movdqu	(%ebx,%edi,1),%xmm7
5333bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5334bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
5335bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
5336bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
5337bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
5338bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
5339bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
5340bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5341bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
5342bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5343bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
5344bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
5345bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
5346bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5347bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5348bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5349bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
5350bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
5351bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
5352bc3d5698SJohn Baldwin	leal	96(%esi),%esi
5353bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5354bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5355bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5356bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
5357bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
5358bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
5359bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
5360bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
5361bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
5362bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5363bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm1
5364bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5365bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5366bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
5367bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5368bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5369bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5370bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5371bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
5372bc3d5698SJohn Baldwin	pxor	80(%esp),%xmm7
5373bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
5374bc3d5698SJohn Baldwin.byte	102,15,56,220,209
5375bc3d5698SJohn Baldwin.byte	102,15,56,220,217
5376bc3d5698SJohn Baldwin.byte	102,15,56,220,225
5377bc3d5698SJohn Baldwin.byte	102,15,56,220,233
5378bc3d5698SJohn Baldwin.byte	102,15,56,220,241
5379bc3d5698SJohn Baldwin.byte	102,15,56,220,249
5380bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5381bc3d5698SJohn Baldwin	movl	124(%esp),%eax
5382bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
5383bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm0
5384bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5385bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5386bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5387bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5388bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
5389bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5390bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5391bc3d5698SJohn Baldwin	movdqu	%xmm2,-96(%edi,%esi,1)
5392bc3d5698SJohn Baldwin	movdqu	%xmm3,-80(%edi,%esi,1)
5393bc3d5698SJohn Baldwin	movdqu	%xmm4,-64(%edi,%esi,1)
5394bc3d5698SJohn Baldwin	movdqu	%xmm5,-48(%edi,%esi,1)
5395bc3d5698SJohn Baldwin	movdqu	%xmm6,-32(%edi,%esi,1)
5396bc3d5698SJohn Baldwin	movdqu	%xmm7,-16(%edi,%esi,1)
5397bc3d5698SJohn Baldwin	cmpl	%eax,%esi
53989576bca5SJung-uk Kim	jbe	.L077grandloop
5399bc3d5698SJohn Baldwin.L076short:
5400bc3d5698SJohn Baldwin	addl	$96,%eax
5401bc3d5698SJohn Baldwin	subl	%esi,%eax
5402bc3d5698SJohn Baldwin	jz	.L078done
5403bc3d5698SJohn Baldwin	cmpl	$32,%eax
5404bc3d5698SJohn Baldwin	jb	.L079one
5405bc3d5698SJohn Baldwin	je	.L080two
5406bc3d5698SJohn Baldwin	cmpl	$64,%eax
5407bc3d5698SJohn Baldwin	jb	.L081three
5408bc3d5698SJohn Baldwin	je	.L082four
5409bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5410bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
5411bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5412bc3d5698SJohn Baldwin	bsfl	%eax,%eax
5413bc3d5698SJohn Baldwin	shll	$4,%ecx
5414bc3d5698SJohn Baldwin	shll	$4,%eax
5415bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
5416bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
5417bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
5418bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
5419bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
5420bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
5421bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5422bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
5423bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
5424bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
5425bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
5426bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
5427bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
5428bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5429bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
5430bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5431bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
5432bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
5433bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5434bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5435bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5436bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
5437bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
5438bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
5439bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5440bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5441bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5442bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
5443bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
5444bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
5445bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
5446bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
5447bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
5448bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5449bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5450bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
5451bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5452bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5453bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5454bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5455bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
5456bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
5457bc3d5698SJohn Baldwin.byte	102,15,56,220,209
5458bc3d5698SJohn Baldwin.byte	102,15,56,220,217
5459bc3d5698SJohn Baldwin.byte	102,15,56,220,225
5460bc3d5698SJohn Baldwin.byte	102,15,56,220,233
5461bc3d5698SJohn Baldwin.byte	102,15,56,220,241
5462bc3d5698SJohn Baldwin.byte	102,15,56,220,249
5463bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5464bc3d5698SJohn Baldwin	call	.L_aesni_encrypt6_enter
5465bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm0
5466bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5467bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5468bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5469bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5470bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5471bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5472bc3d5698SJohn Baldwin	movdqu	%xmm2,(%edi,%esi,1)
5473bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%edi,%esi,1)
5474bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%edi,%esi,1)
5475bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%edi,%esi,1)
5476bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%edi,%esi,1)
5477bc3d5698SJohn Baldwin	jmp	.L078done
5478bc3d5698SJohn Baldwin.align	16
5479bc3d5698SJohn Baldwin.L079one:
5480bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
5481bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5482bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5483bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5484bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5485bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5486bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
5487bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
5488bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5489bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
5490bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
5491bc3d5698SJohn Baldwin	leal	32(%edx),%edx
5492bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
5493bc3d5698SJohn Baldwin.L083enc1_loop_16:
5494bc3d5698SJohn Baldwin.byte	102,15,56,220,209
5495bc3d5698SJohn Baldwin	decl	%ecx
5496bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
5497bc3d5698SJohn Baldwin	leal	16(%edx),%edx
5498bc3d5698SJohn Baldwin	jnz	.L083enc1_loop_16
5499bc3d5698SJohn Baldwin.byte	102,15,56,221,209
5500bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
5501bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5502bc3d5698SJohn Baldwin	movdqa	%xmm6,%xmm1
5503bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
5504bc3d5698SJohn Baldwin	jmp	.L078done
5505bc3d5698SJohn Baldwin.align	16
5506bc3d5698SJohn Baldwin.L080two:
5507bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5508bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5509bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5510bc3d5698SJohn Baldwin	shll	$4,%ecx
5511bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm6
5512bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm7
5513bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5514bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5515bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5516bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5517bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5518bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5519bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm2
5520bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5521bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm3
5522bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
5523bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5524bc3d5698SJohn Baldwin	call	_aesni_encrypt2
5525bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
5526bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm3
5527bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5528bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm1
5529bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
5530bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
5531bc3d5698SJohn Baldwin	jmp	.L078done
5532bc3d5698SJohn Baldwin.align	16
5533bc3d5698SJohn Baldwin.L081three:
5534bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5535bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5536bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5537bc3d5698SJohn Baldwin	shll	$4,%ecx
5538bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm5
5539bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm6
5540bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm7
5541bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5542bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5543bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5544bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5545bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
5546bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5547bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5548bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5549bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
5550bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5551bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm3
5552bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
5553bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm4
5554bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5555bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5556bc3d5698SJohn Baldwin	call	_aesni_encrypt3
5557bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5558bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
5559bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
5560bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5561bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5562bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
5563bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
5564bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
5565bc3d5698SJohn Baldwin	jmp	.L078done
5566bc3d5698SJohn Baldwin.align	16
5567bc3d5698SJohn Baldwin.L082four:
5568bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5569bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
5570bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5571bc3d5698SJohn Baldwin	bsfl	%eax,%eax
5572bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5573bc3d5698SJohn Baldwin	shll	$4,%ecx
5574bc3d5698SJohn Baldwin	shll	$4,%eax
5575bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm4
5576bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm5
5577bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm6
5578bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
5579bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
5580bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5581bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
5582bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5583bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5584bc3d5698SJohn Baldwin	movdqa	%xmm4,(%esp)
5585bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5586bc3d5698SJohn Baldwin	movdqa	%xmm5,16(%esp)
5587bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5588bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
5589bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5590bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5591bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5592bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5593bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5594bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
5595bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm4
5596bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
5597bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm5
5598bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5599bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5600bc3d5698SJohn Baldwin	call	_aesni_encrypt4
5601bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
5602bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
5603bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm4
5604bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
5605bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm5
5606bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
5607bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5608bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
5609bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5610bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi,%esi,1)
5611bc3d5698SJohn Baldwin.L078done:
5612bc3d5698SJohn Baldwin	movl	128(%esp),%edx
5613bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
5614bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
5615bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
5616bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
5617bc3d5698SJohn Baldwin	movdqa	%xmm2,16(%esp)
5618bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
5619bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
5620bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
5621bc3d5698SJohn Baldwin	movdqa	%xmm2,48(%esp)
5622bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
5623bc3d5698SJohn Baldwin	movdqa	%xmm2,64(%esp)
5624bc3d5698SJohn Baldwin	movdqa	%xmm2,80(%esp)
5625bc3d5698SJohn Baldwin	movdqa	%xmm2,96(%esp)
5626bc3d5698SJohn Baldwin	leal	(%edx),%esp
5627bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
5628bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
5629bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ecx)
5630bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
5631bc3d5698SJohn Baldwin	movdqu	%xmm1,(%ebx)
5632bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
5633bc3d5698SJohn Baldwin	popl	%edi
5634bc3d5698SJohn Baldwin	popl	%esi
5635bc3d5698SJohn Baldwin	popl	%ebx
5636bc3d5698SJohn Baldwin	popl	%ebp
5637bc3d5698SJohn Baldwin	ret
5638bc3d5698SJohn Baldwin.size	aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin
5639bc3d5698SJohn Baldwin.globl	aesni_ocb_decrypt
5640bc3d5698SJohn Baldwin.type	aesni_ocb_decrypt,@function
5641bc3d5698SJohn Baldwin.align	16
5642bc3d5698SJohn Baldwinaesni_ocb_decrypt:
5643bc3d5698SJohn Baldwin.L_aesni_ocb_decrypt_begin:
5644c0855eaaSJohn Baldwin	#ifdef __CET__
5645c0855eaaSJohn Baldwin
5646c0855eaaSJohn Baldwin.byte	243,15,30,251
5647c0855eaaSJohn Baldwin	#endif
5648c0855eaaSJohn Baldwin
5649bc3d5698SJohn Baldwin	pushl	%ebp
5650bc3d5698SJohn Baldwin	pushl	%ebx
5651bc3d5698SJohn Baldwin	pushl	%esi
5652bc3d5698SJohn Baldwin	pushl	%edi
5653bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
5654bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
5655bc3d5698SJohn Baldwin	movl	20(%esp),%esi
5656bc3d5698SJohn Baldwin	movl	24(%esp),%edi
5657bc3d5698SJohn Baldwin	movl	28(%esp),%eax
5658bc3d5698SJohn Baldwin	movl	32(%esp),%edx
5659bc3d5698SJohn Baldwin	movdqu	(%ecx),%xmm0
5660bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
5661bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm1
5662bc3d5698SJohn Baldwin	movl	44(%esp),%ebx
5663bc3d5698SJohn Baldwin	movl	%esp,%ecx
5664bc3d5698SJohn Baldwin	subl	$132,%esp
5665bc3d5698SJohn Baldwin	andl	$-16,%esp
5666bc3d5698SJohn Baldwin	subl	%esi,%edi
5667bc3d5698SJohn Baldwin	shll	$4,%eax
5668bc3d5698SJohn Baldwin	leal	-96(%esi,%eax,1),%eax
5669bc3d5698SJohn Baldwin	movl	%edi,120(%esp)
5670bc3d5698SJohn Baldwin	movl	%eax,124(%esp)
5671bc3d5698SJohn Baldwin	movl	%ecx,128(%esp)
5672bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5673bc3d5698SJohn Baldwin	testl	$1,%ebp
5674bc3d5698SJohn Baldwin	jnz	.L084odd
5675bc3d5698SJohn Baldwin	bsfl	%ebp,%eax
5676bc3d5698SJohn Baldwin	addl	$1,%ebp
5677bc3d5698SJohn Baldwin	shll	$4,%eax
5678bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
5679bc3d5698SJohn Baldwin	movl	%edx,%eax
5680bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5681bc3d5698SJohn Baldwin	leal	16(%esi),%esi
5682bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5683bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
5684bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
5685bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
5686bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
5687bc3d5698SJohn Baldwin	leal	32(%edx),%edx
5688bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
5689bc3d5698SJohn Baldwin.L085dec1_loop_17:
5690bc3d5698SJohn Baldwin.byte	102,15,56,222,209
5691bc3d5698SJohn Baldwin	decl	%ecx
5692bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
5693bc3d5698SJohn Baldwin	leal	16(%edx),%edx
5694bc3d5698SJohn Baldwin	jnz	.L085dec1_loop_17
5695bc3d5698SJohn Baldwin.byte	102,15,56,223,209
5696bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
5697bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm1
5698bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5699bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm1
5700bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edi,%esi,1)
5701bc3d5698SJohn Baldwin	movl	240(%eax),%ecx
5702bc3d5698SJohn Baldwin	movl	%eax,%edx
5703bc3d5698SJohn Baldwin	movl	124(%esp),%eax
5704bc3d5698SJohn Baldwin.L084odd:
5705bc3d5698SJohn Baldwin	shll	$4,%ecx
5706bc3d5698SJohn Baldwin	movl	$16,%edi
5707bc3d5698SJohn Baldwin	subl	%ecx,%edi
5708bc3d5698SJohn Baldwin	movl	%edx,112(%esp)
5709bc3d5698SJohn Baldwin	leal	32(%edx,%ecx,1),%edx
5710bc3d5698SJohn Baldwin	movl	%edi,116(%esp)
5711bc3d5698SJohn Baldwin	cmpl	%eax,%esi
5712bc3d5698SJohn Baldwin	ja	.L086short
5713bc3d5698SJohn Baldwin	jmp	.L087grandloop
5714bc3d5698SJohn Baldwin.align	32
5715bc3d5698SJohn Baldwin.L087grandloop:
5716bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5717bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
5718bc3d5698SJohn Baldwin	leal	5(%ebp),%edi
5719bc3d5698SJohn Baldwin	addl	$6,%ebp
5720bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5721bc3d5698SJohn Baldwin	bsfl	%eax,%eax
5722bc3d5698SJohn Baldwin	bsfl	%edi,%edi
5723bc3d5698SJohn Baldwin	shll	$4,%ecx
5724bc3d5698SJohn Baldwin	shll	$4,%eax
5725bc3d5698SJohn Baldwin	shll	$4,%edi
5726bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
5727bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
5728bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
5729bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
5730bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
5731bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
5732bc3d5698SJohn Baldwin	movdqu	(%ebx,%edi,1),%xmm7
5733bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5734bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
5735bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
5736bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
5737bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
5738bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
5739bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
5740bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5741bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
5742bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5743bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
5744bc3d5698SJohn Baldwin	movdqa	%xmm7,80(%esp)
5745bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
5746bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5747bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5748bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5749bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
5750bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
5751bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
5752bc3d5698SJohn Baldwin	leal	96(%esi),%esi
5753bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5754bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5755bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
5756bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
5757bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
5758bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5759bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5760bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
5761bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5762bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5763bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5764bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5765bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
5766bc3d5698SJohn Baldwin	pxor	80(%esp),%xmm7
5767bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
5768bc3d5698SJohn Baldwin.byte	102,15,56,222,209
5769bc3d5698SJohn Baldwin.byte	102,15,56,222,217
5770bc3d5698SJohn Baldwin.byte	102,15,56,222,225
5771bc3d5698SJohn Baldwin.byte	102,15,56,222,233
5772bc3d5698SJohn Baldwin.byte	102,15,56,222,241
5773bc3d5698SJohn Baldwin.byte	102,15,56,222,249
5774bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5775bc3d5698SJohn Baldwin	movl	124(%esp),%eax
5776bc3d5698SJohn Baldwin	call	.L_aesni_decrypt6_enter
5777bc3d5698SJohn Baldwin	movdqa	80(%esp),%xmm0
5778bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5779bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5780bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5781bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5782bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5783bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
5784bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5785bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5786bc3d5698SJohn Baldwin	movdqu	%xmm2,-96(%edi,%esi,1)
5787bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5788bc3d5698SJohn Baldwin	movdqu	%xmm3,-80(%edi,%esi,1)
5789bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
5790bc3d5698SJohn Baldwin	movdqu	%xmm4,-64(%edi,%esi,1)
5791bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
5792bc3d5698SJohn Baldwin	movdqu	%xmm5,-48(%edi,%esi,1)
5793bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
5794bc3d5698SJohn Baldwin	movdqu	%xmm6,-32(%edi,%esi,1)
5795bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm1
5796bc3d5698SJohn Baldwin	movdqu	%xmm7,-16(%edi,%esi,1)
5797bc3d5698SJohn Baldwin	cmpl	%eax,%esi
57989576bca5SJung-uk Kim	jbe	.L087grandloop
5799bc3d5698SJohn Baldwin.L086short:
5800bc3d5698SJohn Baldwin	addl	$96,%eax
5801bc3d5698SJohn Baldwin	subl	%esi,%eax
5802bc3d5698SJohn Baldwin	jz	.L088done
5803bc3d5698SJohn Baldwin	cmpl	$32,%eax
5804bc3d5698SJohn Baldwin	jb	.L089one
5805bc3d5698SJohn Baldwin	je	.L090two
5806bc3d5698SJohn Baldwin	cmpl	$64,%eax
5807bc3d5698SJohn Baldwin	jb	.L091three
5808bc3d5698SJohn Baldwin	je	.L092four
5809bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5810bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
5811bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5812bc3d5698SJohn Baldwin	bsfl	%eax,%eax
5813bc3d5698SJohn Baldwin	shll	$4,%ecx
5814bc3d5698SJohn Baldwin	shll	$4,%eax
5815bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm2
5816bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm3
5817bc3d5698SJohn Baldwin	movl	116(%esp),%ecx
5818bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm4
5819bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm5
5820bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm6
5821bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5822bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
5823bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
5824bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm4
5825bc3d5698SJohn Baldwin	movdqa	%xmm3,16(%esp)
5826bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
5827bc3d5698SJohn Baldwin	movdqa	%xmm4,32(%esp)
5828bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5829bc3d5698SJohn Baldwin	movdqa	%xmm5,48(%esp)
5830bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5831bc3d5698SJohn Baldwin	movdqa	%xmm6,64(%esp)
5832bc3d5698SJohn Baldwin	movups	-48(%edx,%ecx,1),%xmm0
5833bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5834bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5835bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5836bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
5837bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
5838bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
5839bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5840bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm2
5841bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
5842bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
5843bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
5844bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5845bc3d5698SJohn Baldwin	movups	-32(%edx,%ecx,1),%xmm1
5846bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5847bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5848bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5849bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5850bc3d5698SJohn Baldwin	pxor	64(%esp),%xmm6
5851bc3d5698SJohn Baldwin	movups	-16(%edx,%ecx,1),%xmm0
5852bc3d5698SJohn Baldwin.byte	102,15,56,222,209
5853bc3d5698SJohn Baldwin.byte	102,15,56,222,217
5854bc3d5698SJohn Baldwin.byte	102,15,56,222,225
5855bc3d5698SJohn Baldwin.byte	102,15,56,222,233
5856bc3d5698SJohn Baldwin.byte	102,15,56,222,241
5857bc3d5698SJohn Baldwin.byte	102,15,56,222,249
5858bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5859bc3d5698SJohn Baldwin	call	.L_aesni_decrypt6_enter
5860bc3d5698SJohn Baldwin	movdqa	64(%esp),%xmm0
5861bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5862bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5863bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5864bc3d5698SJohn Baldwin	pxor	32(%esp),%xmm4
5865bc3d5698SJohn Baldwin	pxor	48(%esp),%xmm5
5866bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5867bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5868bc3d5698SJohn Baldwin	movdqu	%xmm2,(%edi,%esi,1)
5869bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5870bc3d5698SJohn Baldwin	movdqu	%xmm3,16(%edi,%esi,1)
5871bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
5872bc3d5698SJohn Baldwin	movdqu	%xmm4,32(%edi,%esi,1)
5873bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
5874bc3d5698SJohn Baldwin	movdqu	%xmm5,48(%edi,%esi,1)
5875bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm1
5876bc3d5698SJohn Baldwin	movdqu	%xmm6,64(%edi,%esi,1)
5877bc3d5698SJohn Baldwin	jmp	.L088done
5878bc3d5698SJohn Baldwin.align	16
5879bc3d5698SJohn Baldwin.L089one:
5880bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm7
5881bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5882bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5883bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5884bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm7
5885bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm2
5886bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm6
5887bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5888bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
5889bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
5890bc3d5698SJohn Baldwin	leal	32(%edx),%edx
5891bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
5892bc3d5698SJohn Baldwin.L093dec1_loop_18:
5893bc3d5698SJohn Baldwin.byte	102,15,56,222,209
5894bc3d5698SJohn Baldwin	decl	%ecx
5895bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
5896bc3d5698SJohn Baldwin	leal	16(%edx),%edx
5897bc3d5698SJohn Baldwin	jnz	.L093dec1_loop_18
5898bc3d5698SJohn Baldwin.byte	102,15,56,223,209
5899bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
5900bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm1
5901bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5902bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm1
5903bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
5904bc3d5698SJohn Baldwin	jmp	.L088done
5905bc3d5698SJohn Baldwin.align	16
5906bc3d5698SJohn Baldwin.L090two:
5907bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5908bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5909bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5910bc3d5698SJohn Baldwin	shll	$4,%ecx
5911bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm6
5912bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm7
5913bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5914bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5915bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5916bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm5
5917bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm6
5918bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5919bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm2
5920bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm3
5921bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5922bc3d5698SJohn Baldwin	call	_aesni_decrypt2
5923bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm2
5924bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm3
5925bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5926bc3d5698SJohn Baldwin	xorps	%xmm2,%xmm5
5927bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
5928bc3d5698SJohn Baldwin	xorps	%xmm3,%xmm5
5929bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
5930bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm1
5931bc3d5698SJohn Baldwin	jmp	.L088done
5932bc3d5698SJohn Baldwin.align	16
5933bc3d5698SJohn Baldwin.L091three:
5934bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5935bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5936bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5937bc3d5698SJohn Baldwin	shll	$4,%ecx
5938bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm5
5939bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm6
5940bc3d5698SJohn Baldwin	movdqa	%xmm5,%xmm7
5941bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5942bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5943bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5944bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5945bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5946bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm5
5947bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5948bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5949bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm2
5950bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm3
5951bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm4
5952bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5953bc3d5698SJohn Baldwin	call	_aesni_decrypt3
5954bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5955bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm2
5956bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
5957bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm4
5958bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
5959bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
5960bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
5961bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
5962bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
5963bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
5964bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
5965bc3d5698SJohn Baldwin	jmp	.L088done
5966bc3d5698SJohn Baldwin.align	16
5967bc3d5698SJohn Baldwin.L092four:
5968bc3d5698SJohn Baldwin	leal	1(%ebp),%ecx
5969bc3d5698SJohn Baldwin	leal	3(%ebp),%eax
5970bc3d5698SJohn Baldwin	bsfl	%ecx,%ecx
5971bc3d5698SJohn Baldwin	bsfl	%eax,%eax
5972bc3d5698SJohn Baldwin	movl	112(%esp),%edx
5973bc3d5698SJohn Baldwin	shll	$4,%ecx
5974bc3d5698SJohn Baldwin	shll	$4,%eax
5975bc3d5698SJohn Baldwin	movdqu	(%ebx),%xmm4
5976bc3d5698SJohn Baldwin	movdqu	(%ebx,%ecx,1),%xmm5
5977bc3d5698SJohn Baldwin	movdqa	%xmm4,%xmm6
5978bc3d5698SJohn Baldwin	movdqu	(%ebx,%eax,1),%xmm7
5979bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm4
5980bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
5981bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm5
5982bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
5983bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm6
5984bc3d5698SJohn Baldwin	movdqa	%xmm4,(%esp)
5985bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm7
5986bc3d5698SJohn Baldwin	movdqa	%xmm5,16(%esp)
5987bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
5988bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
5989bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
5990bc3d5698SJohn Baldwin	movdqa	%xmm1,96(%esp)
5991bc3d5698SJohn Baldwin	pxor	(%esp),%xmm2
5992bc3d5698SJohn Baldwin	pxor	16(%esp),%xmm3
5993bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm4
5994bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm5
5995bc3d5698SJohn Baldwin	movl	120(%esp),%edi
5996bc3d5698SJohn Baldwin	call	_aesni_decrypt4
5997bc3d5698SJohn Baldwin	movdqa	96(%esp),%xmm1
5998bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
5999bc3d5698SJohn Baldwin	xorps	16(%esp),%xmm3
6000bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm4
6001bc3d5698SJohn Baldwin	movups	%xmm2,(%edi,%esi,1)
6002bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm1
6003bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm5
6004bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi,%esi,1)
6005bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
6006bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm0
6007bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi,%esi,1)
6008bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm1
6009bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi,%esi,1)
6010bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm1
6011bc3d5698SJohn Baldwin.L088done:
6012bc3d5698SJohn Baldwin	movl	128(%esp),%edx
6013bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
6014bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6015bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
6016bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
6017bc3d5698SJohn Baldwin	movdqa	%xmm2,16(%esp)
6018bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
6019bc3d5698SJohn Baldwin	movdqa	%xmm2,32(%esp)
6020bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
6021bc3d5698SJohn Baldwin	movdqa	%xmm2,48(%esp)
6022bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
6023bc3d5698SJohn Baldwin	movdqa	%xmm2,64(%esp)
6024bc3d5698SJohn Baldwin	movdqa	%xmm2,80(%esp)
6025bc3d5698SJohn Baldwin	movdqa	%xmm2,96(%esp)
6026bc3d5698SJohn Baldwin	leal	(%edx),%esp
6027bc3d5698SJohn Baldwin	movl	40(%esp),%ecx
6028bc3d5698SJohn Baldwin	movl	48(%esp),%ebx
6029bc3d5698SJohn Baldwin	movdqu	%xmm0,(%ecx)
6030bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
6031bc3d5698SJohn Baldwin	movdqu	%xmm1,(%ebx)
6032bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
6033bc3d5698SJohn Baldwin	popl	%edi
6034bc3d5698SJohn Baldwin	popl	%esi
6035bc3d5698SJohn Baldwin	popl	%ebx
6036bc3d5698SJohn Baldwin	popl	%ebp
6037bc3d5698SJohn Baldwin	ret
6038bc3d5698SJohn Baldwin.size	aesni_ocb_decrypt,.-.L_aesni_ocb_decrypt_begin
6039bc3d5698SJohn Baldwin.globl	aesni_cbc_encrypt
6040bc3d5698SJohn Baldwin.type	aesni_cbc_encrypt,@function
6041bc3d5698SJohn Baldwin.align	16
6042bc3d5698SJohn Baldwinaesni_cbc_encrypt:
6043bc3d5698SJohn Baldwin.L_aesni_cbc_encrypt_begin:
6044c0855eaaSJohn Baldwin	#ifdef __CET__
6045c0855eaaSJohn Baldwin
6046c0855eaaSJohn Baldwin.byte	243,15,30,251
6047c0855eaaSJohn Baldwin	#endif
6048c0855eaaSJohn Baldwin
6049bc3d5698SJohn Baldwin	pushl	%ebp
6050bc3d5698SJohn Baldwin	pushl	%ebx
6051bc3d5698SJohn Baldwin	pushl	%esi
6052bc3d5698SJohn Baldwin	pushl	%edi
6053bc3d5698SJohn Baldwin	movl	20(%esp),%esi
6054bc3d5698SJohn Baldwin	movl	%esp,%ebx
6055bc3d5698SJohn Baldwin	movl	24(%esp),%edi
6056bc3d5698SJohn Baldwin	subl	$24,%ebx
6057bc3d5698SJohn Baldwin	movl	28(%esp),%eax
6058bc3d5698SJohn Baldwin	andl	$-16,%ebx
6059bc3d5698SJohn Baldwin	movl	32(%esp),%edx
6060bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
6061bc3d5698SJohn Baldwin	testl	%eax,%eax
6062bc3d5698SJohn Baldwin	jz	.L094cbc_abort
6063bc3d5698SJohn Baldwin	cmpl	$0,40(%esp)
6064bc3d5698SJohn Baldwin	xchgl	%esp,%ebx
6065bc3d5698SJohn Baldwin	movups	(%ebp),%xmm7
6066bc3d5698SJohn Baldwin	movl	240(%edx),%ecx
6067bc3d5698SJohn Baldwin	movl	%edx,%ebp
6068bc3d5698SJohn Baldwin	movl	%ebx,16(%esp)
6069bc3d5698SJohn Baldwin	movl	%ecx,%ebx
6070bc3d5698SJohn Baldwin	je	.L095cbc_decrypt
6071bc3d5698SJohn Baldwin	movaps	%xmm7,%xmm2
6072bc3d5698SJohn Baldwin	cmpl	$16,%eax
6073bc3d5698SJohn Baldwin	jb	.L096cbc_enc_tail
6074bc3d5698SJohn Baldwin	subl	$16,%eax
6075bc3d5698SJohn Baldwin	jmp	.L097cbc_enc_loop
6076bc3d5698SJohn Baldwin.align	16
6077bc3d5698SJohn Baldwin.L097cbc_enc_loop:
6078bc3d5698SJohn Baldwin	movups	(%esi),%xmm7
6079bc3d5698SJohn Baldwin	leal	16(%esi),%esi
6080bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
6081bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
6082bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm7
6083bc3d5698SJohn Baldwin	leal	32(%edx),%edx
6084bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
6085bc3d5698SJohn Baldwin.L098enc1_loop_19:
6086bc3d5698SJohn Baldwin.byte	102,15,56,220,209
6087bc3d5698SJohn Baldwin	decl	%ecx
6088bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
6089bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6090bc3d5698SJohn Baldwin	jnz	.L098enc1_loop_19
6091bc3d5698SJohn Baldwin.byte	102,15,56,221,209
6092bc3d5698SJohn Baldwin	movl	%ebx,%ecx
6093bc3d5698SJohn Baldwin	movl	%ebp,%edx
6094bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6095bc3d5698SJohn Baldwin	leal	16(%edi),%edi
6096bc3d5698SJohn Baldwin	subl	$16,%eax
6097bc3d5698SJohn Baldwin	jnc	.L097cbc_enc_loop
6098bc3d5698SJohn Baldwin	addl	$16,%eax
6099bc3d5698SJohn Baldwin	jnz	.L096cbc_enc_tail
6100bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm7
6101bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
6102bc3d5698SJohn Baldwin	jmp	.L099cbc_ret
6103bc3d5698SJohn Baldwin.L096cbc_enc_tail:
6104bc3d5698SJohn Baldwin	movl	%eax,%ecx
6105bc3d5698SJohn Baldwin.long	2767451785
6106bc3d5698SJohn Baldwin	movl	$16,%ecx
6107bc3d5698SJohn Baldwin	subl	%eax,%ecx
6108bc3d5698SJohn Baldwin	xorl	%eax,%eax
6109bc3d5698SJohn Baldwin.long	2868115081
6110bc3d5698SJohn Baldwin	leal	-16(%edi),%edi
6111bc3d5698SJohn Baldwin	movl	%ebx,%ecx
6112bc3d5698SJohn Baldwin	movl	%edi,%esi
6113bc3d5698SJohn Baldwin	movl	%ebp,%edx
6114bc3d5698SJohn Baldwin	jmp	.L097cbc_enc_loop
6115bc3d5698SJohn Baldwin.align	16
6116bc3d5698SJohn Baldwin.L095cbc_decrypt:
6117bc3d5698SJohn Baldwin	cmpl	$80,%eax
6118bc3d5698SJohn Baldwin	jbe	.L100cbc_dec_tail
6119bc3d5698SJohn Baldwin	movaps	%xmm7,(%esp)
6120bc3d5698SJohn Baldwin	subl	$80,%eax
6121bc3d5698SJohn Baldwin	jmp	.L101cbc_dec_loop6_enter
6122bc3d5698SJohn Baldwin.align	16
6123bc3d5698SJohn Baldwin.L102cbc_dec_loop6:
6124bc3d5698SJohn Baldwin	movaps	%xmm0,(%esp)
6125bc3d5698SJohn Baldwin	movups	%xmm7,(%edi)
6126bc3d5698SJohn Baldwin	leal	16(%edi),%edi
6127bc3d5698SJohn Baldwin.L101cbc_dec_loop6_enter:
6128bc3d5698SJohn Baldwin	movdqu	(%esi),%xmm2
6129bc3d5698SJohn Baldwin	movdqu	16(%esi),%xmm3
6130bc3d5698SJohn Baldwin	movdqu	32(%esi),%xmm4
6131bc3d5698SJohn Baldwin	movdqu	48(%esi),%xmm5
6132bc3d5698SJohn Baldwin	movdqu	64(%esi),%xmm6
6133bc3d5698SJohn Baldwin	movdqu	80(%esi),%xmm7
6134bc3d5698SJohn Baldwin	call	_aesni_decrypt6
6135bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
6136bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
6137bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
6138bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm3
6139bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
6140bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
6141bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
6142bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm5
6143bc3d5698SJohn Baldwin	movups	64(%esi),%xmm1
6144bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
6145bc3d5698SJohn Baldwin	movups	80(%esi),%xmm0
6146bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm7
6147bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6148bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
6149bc3d5698SJohn Baldwin	leal	96(%esi),%esi
6150bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
6151bc3d5698SJohn Baldwin	movl	%ebx,%ecx
6152bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
6153bc3d5698SJohn Baldwin	movl	%ebp,%edx
6154bc3d5698SJohn Baldwin	movups	%xmm6,64(%edi)
6155bc3d5698SJohn Baldwin	leal	80(%edi),%edi
6156bc3d5698SJohn Baldwin	subl	$96,%eax
6157bc3d5698SJohn Baldwin	ja	.L102cbc_dec_loop6
6158bc3d5698SJohn Baldwin	movaps	%xmm7,%xmm2
6159bc3d5698SJohn Baldwin	movaps	%xmm0,%xmm7
6160bc3d5698SJohn Baldwin	addl	$80,%eax
6161bc3d5698SJohn Baldwin	jle	.L103cbc_dec_clear_tail_collected
6162bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6163bc3d5698SJohn Baldwin	leal	16(%edi),%edi
6164bc3d5698SJohn Baldwin.L100cbc_dec_tail:
6165bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
6166bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm6
6167bc3d5698SJohn Baldwin	cmpl	$16,%eax
6168bc3d5698SJohn Baldwin	jbe	.L104cbc_dec_one
6169bc3d5698SJohn Baldwin	movups	16(%esi),%xmm3
6170bc3d5698SJohn Baldwin	movaps	%xmm3,%xmm5
6171bc3d5698SJohn Baldwin	cmpl	$32,%eax
6172bc3d5698SJohn Baldwin	jbe	.L105cbc_dec_two
6173bc3d5698SJohn Baldwin	movups	32(%esi),%xmm4
6174bc3d5698SJohn Baldwin	cmpl	$48,%eax
6175bc3d5698SJohn Baldwin	jbe	.L106cbc_dec_three
6176bc3d5698SJohn Baldwin	movups	48(%esi),%xmm5
6177bc3d5698SJohn Baldwin	cmpl	$64,%eax
6178bc3d5698SJohn Baldwin	jbe	.L107cbc_dec_four
6179bc3d5698SJohn Baldwin	movups	64(%esi),%xmm6
6180bc3d5698SJohn Baldwin	movaps	%xmm7,(%esp)
6181bc3d5698SJohn Baldwin	movups	(%esi),%xmm2
6182bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm7
6183bc3d5698SJohn Baldwin	call	_aesni_decrypt6
6184bc3d5698SJohn Baldwin	movups	(%esi),%xmm1
6185bc3d5698SJohn Baldwin	movups	16(%esi),%xmm0
6186bc3d5698SJohn Baldwin	xorps	(%esp),%xmm2
6187bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm3
6188bc3d5698SJohn Baldwin	movups	32(%esi),%xmm1
6189bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm4
6190bc3d5698SJohn Baldwin	movups	48(%esi),%xmm0
6191bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm5
6192bc3d5698SJohn Baldwin	movups	64(%esi),%xmm7
6193bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm6
6194bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6195bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
6196bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6197bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
6198bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
6199bc3d5698SJohn Baldwin	movups	%xmm5,48(%edi)
6200bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
6201bc3d5698SJohn Baldwin	leal	64(%edi),%edi
6202bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm2
6203bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
6204bc3d5698SJohn Baldwin	subl	$80,%eax
6205bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
6206bc3d5698SJohn Baldwin.align	16
6207bc3d5698SJohn Baldwin.L104cbc_dec_one:
6208bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
6209bc3d5698SJohn Baldwin	movups	16(%edx),%xmm1
6210bc3d5698SJohn Baldwin	leal	32(%edx),%edx
6211bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm2
6212bc3d5698SJohn Baldwin.L109dec1_loop_20:
6213bc3d5698SJohn Baldwin.byte	102,15,56,222,209
6214bc3d5698SJohn Baldwin	decl	%ecx
6215bc3d5698SJohn Baldwin	movups	(%edx),%xmm1
6216bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6217bc3d5698SJohn Baldwin	jnz	.L109dec1_loop_20
6218bc3d5698SJohn Baldwin.byte	102,15,56,223,209
6219bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
6220bc3d5698SJohn Baldwin	movaps	%xmm6,%xmm7
6221bc3d5698SJohn Baldwin	subl	$16,%eax
6222bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
6223bc3d5698SJohn Baldwin.align	16
6224bc3d5698SJohn Baldwin.L105cbc_dec_two:
6225bc3d5698SJohn Baldwin	call	_aesni_decrypt2
6226bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
6227bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
6228bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6229bc3d5698SJohn Baldwin	movaps	%xmm3,%xmm2
6230bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6231bc3d5698SJohn Baldwin	leal	16(%edi),%edi
6232bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm7
6233bc3d5698SJohn Baldwin	subl	$32,%eax
6234bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
6235bc3d5698SJohn Baldwin.align	16
6236bc3d5698SJohn Baldwin.L106cbc_dec_three:
6237bc3d5698SJohn Baldwin	call	_aesni_decrypt3
6238bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
6239bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
6240bc3d5698SJohn Baldwin	xorps	%xmm5,%xmm4
6241bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6242bc3d5698SJohn Baldwin	movaps	%xmm4,%xmm2
6243bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
6244bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
6245bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6246bc3d5698SJohn Baldwin	leal	32(%edi),%edi
6247bc3d5698SJohn Baldwin	movups	32(%esi),%xmm7
6248bc3d5698SJohn Baldwin	subl	$48,%eax
6249bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
6250bc3d5698SJohn Baldwin.align	16
6251bc3d5698SJohn Baldwin.L107cbc_dec_four:
6252bc3d5698SJohn Baldwin	call	_aesni_decrypt4
6253bc3d5698SJohn Baldwin	movups	16(%esi),%xmm1
6254bc3d5698SJohn Baldwin	movups	32(%esi),%xmm0
6255bc3d5698SJohn Baldwin	xorps	%xmm7,%xmm2
6256bc3d5698SJohn Baldwin	movups	48(%esi),%xmm7
6257bc3d5698SJohn Baldwin	xorps	%xmm6,%xmm3
6258bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6259bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm4
6260bc3d5698SJohn Baldwin	movups	%xmm3,16(%edi)
6261bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6262bc3d5698SJohn Baldwin	xorps	%xmm0,%xmm5
6263bc3d5698SJohn Baldwin	movups	%xmm4,32(%edi)
6264bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
6265bc3d5698SJohn Baldwin	leal	48(%edi),%edi
6266bc3d5698SJohn Baldwin	movaps	%xmm5,%xmm2
6267bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
6268bc3d5698SJohn Baldwin	subl	$64,%eax
6269bc3d5698SJohn Baldwin	jmp	.L108cbc_dec_tail_collected
6270bc3d5698SJohn Baldwin.align	16
6271bc3d5698SJohn Baldwin.L103cbc_dec_clear_tail_collected:
6272bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6273bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
6274bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
6275bc3d5698SJohn Baldwin	pxor	%xmm6,%xmm6
6276bc3d5698SJohn Baldwin.L108cbc_dec_tail_collected:
6277bc3d5698SJohn Baldwin	andl	$15,%eax
6278bc3d5698SJohn Baldwin	jnz	.L110cbc_dec_tail_partial
6279bc3d5698SJohn Baldwin	movups	%xmm2,(%edi)
6280bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
6281bc3d5698SJohn Baldwin	jmp	.L099cbc_ret
6282bc3d5698SJohn Baldwin.align	16
6283bc3d5698SJohn Baldwin.L110cbc_dec_tail_partial:
6284bc3d5698SJohn Baldwin	movaps	%xmm2,(%esp)
6285bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
6286bc3d5698SJohn Baldwin	movl	$16,%ecx
6287bc3d5698SJohn Baldwin	movl	%esp,%esi
6288bc3d5698SJohn Baldwin	subl	%eax,%ecx
6289bc3d5698SJohn Baldwin.long	2767451785
6290bc3d5698SJohn Baldwin	movdqa	%xmm2,(%esp)
6291bc3d5698SJohn Baldwin.L099cbc_ret:
6292bc3d5698SJohn Baldwin	movl	16(%esp),%esp
6293bc3d5698SJohn Baldwin	movl	36(%esp),%ebp
6294bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
6295bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
6296bc3d5698SJohn Baldwin	movups	%xmm7,(%ebp)
6297bc3d5698SJohn Baldwin	pxor	%xmm7,%xmm7
6298bc3d5698SJohn Baldwin.L094cbc_abort:
6299bc3d5698SJohn Baldwin	popl	%edi
6300bc3d5698SJohn Baldwin	popl	%esi
6301bc3d5698SJohn Baldwin	popl	%ebx
6302bc3d5698SJohn Baldwin	popl	%ebp
6303bc3d5698SJohn Baldwin	ret
6304bc3d5698SJohn Baldwin.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
6305bc3d5698SJohn Baldwin.type	_aesni_set_encrypt_key,@function
6306bc3d5698SJohn Baldwin.align	16
6307bc3d5698SJohn Baldwin_aesni_set_encrypt_key:
6308c0855eaaSJohn Baldwin	#ifdef __CET__
6309c0855eaaSJohn Baldwin
6310c0855eaaSJohn Baldwin.byte	243,15,30,251
6311c0855eaaSJohn Baldwin	#endif
6312c0855eaaSJohn Baldwin
6313bc3d5698SJohn Baldwin	pushl	%ebp
6314bc3d5698SJohn Baldwin	pushl	%ebx
6315bc3d5698SJohn Baldwin	testl	%eax,%eax
6316bc3d5698SJohn Baldwin	jz	.L111bad_pointer
6317bc3d5698SJohn Baldwin	testl	%edx,%edx
6318bc3d5698SJohn Baldwin	jz	.L111bad_pointer
6319bc3d5698SJohn Baldwin	call	.L112pic
6320bc3d5698SJohn Baldwin.L112pic:
6321bc3d5698SJohn Baldwin	popl	%ebx
6322bc3d5698SJohn Baldwin	leal	.Lkey_const-.L112pic(%ebx),%ebx
6323bc3d5698SJohn Baldwin	leal	OPENSSL_ia32cap_P,%ebp
6324bc3d5698SJohn Baldwin	movups	(%eax),%xmm0
6325bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm4
6326bc3d5698SJohn Baldwin	movl	4(%ebp),%ebp
6327bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6328bc3d5698SJohn Baldwin	andl	$268437504,%ebp
6329bc3d5698SJohn Baldwin	cmpl	$256,%ecx
6330bc3d5698SJohn Baldwin	je	.L11314rounds
6331bc3d5698SJohn Baldwin	cmpl	$192,%ecx
6332bc3d5698SJohn Baldwin	je	.L11412rounds
6333bc3d5698SJohn Baldwin	cmpl	$128,%ecx
6334bc3d5698SJohn Baldwin	jne	.L115bad_keybits
6335bc3d5698SJohn Baldwin.align	16
6336bc3d5698SJohn Baldwin.L11610rounds:
6337bc3d5698SJohn Baldwin	cmpl	$268435456,%ebp
6338bc3d5698SJohn Baldwin	je	.L11710rounds_alt
6339bc3d5698SJohn Baldwin	movl	$9,%ecx
6340bc3d5698SJohn Baldwin	movups	%xmm0,-16(%edx)
6341bc3d5698SJohn Baldwin.byte	102,15,58,223,200,1
6342bc3d5698SJohn Baldwin	call	.L118key_128_cold
6343bc3d5698SJohn Baldwin.byte	102,15,58,223,200,2
6344bc3d5698SJohn Baldwin	call	.L119key_128
6345bc3d5698SJohn Baldwin.byte	102,15,58,223,200,4
6346bc3d5698SJohn Baldwin	call	.L119key_128
6347bc3d5698SJohn Baldwin.byte	102,15,58,223,200,8
6348bc3d5698SJohn Baldwin	call	.L119key_128
6349bc3d5698SJohn Baldwin.byte	102,15,58,223,200,16
6350bc3d5698SJohn Baldwin	call	.L119key_128
6351bc3d5698SJohn Baldwin.byte	102,15,58,223,200,32
6352bc3d5698SJohn Baldwin	call	.L119key_128
6353bc3d5698SJohn Baldwin.byte	102,15,58,223,200,64
6354bc3d5698SJohn Baldwin	call	.L119key_128
6355bc3d5698SJohn Baldwin.byte	102,15,58,223,200,128
6356bc3d5698SJohn Baldwin	call	.L119key_128
6357bc3d5698SJohn Baldwin.byte	102,15,58,223,200,27
6358bc3d5698SJohn Baldwin	call	.L119key_128
6359bc3d5698SJohn Baldwin.byte	102,15,58,223,200,54
6360bc3d5698SJohn Baldwin	call	.L119key_128
6361bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
6362bc3d5698SJohn Baldwin	movl	%ecx,80(%edx)
6363bc3d5698SJohn Baldwin	jmp	.L120good_key
6364bc3d5698SJohn Baldwin.align	16
6365bc3d5698SJohn Baldwin.L119key_128:
6366bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
6367bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6368bc3d5698SJohn Baldwin.L118key_128_cold:
6369bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
6370bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
6371bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
6372bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
6373bc3d5698SJohn Baldwin	shufps	$255,%xmm1,%xmm1
6374bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm0
6375bc3d5698SJohn Baldwin	ret
6376bc3d5698SJohn Baldwin.align	16
6377bc3d5698SJohn Baldwin.L11710rounds_alt:
6378bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm5
6379bc3d5698SJohn Baldwin	movl	$8,%ecx
6380bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
6381bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
6382bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
6383bc3d5698SJohn Baldwin.L121loop_key128:
6384bc3d5698SJohn Baldwin.byte	102,15,56,0,197
6385bc3d5698SJohn Baldwin.byte	102,15,56,221,196
6386bc3d5698SJohn Baldwin	pslld	$1,%xmm4
6387bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6388bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
6389bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6390bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
6391bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6392bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
6393bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6394bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
6395bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
6396bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
6397bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
6398bc3d5698SJohn Baldwin	decl	%ecx
6399bc3d5698SJohn Baldwin	jnz	.L121loop_key128
6400bc3d5698SJohn Baldwin	movdqa	48(%ebx),%xmm4
6401bc3d5698SJohn Baldwin.byte	102,15,56,0,197
6402bc3d5698SJohn Baldwin.byte	102,15,56,221,196
6403bc3d5698SJohn Baldwin	pslld	$1,%xmm4
6404bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
6405bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6406bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
6407bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6408bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
6409bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6410bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
6411bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
6412bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
6413bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm2
6414bc3d5698SJohn Baldwin.byte	102,15,56,0,197
6415bc3d5698SJohn Baldwin.byte	102,15,56,221,196
6416bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
6417bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6418bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
6419bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6420bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm3
6421bc3d5698SJohn Baldwin	pslldq	$4,%xmm2
6422bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
6423bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
6424bc3d5698SJohn Baldwin	movdqu	%xmm0,16(%edx)
6425bc3d5698SJohn Baldwin	movl	$9,%ecx
6426bc3d5698SJohn Baldwin	movl	%ecx,96(%edx)
6427bc3d5698SJohn Baldwin	jmp	.L120good_key
6428bc3d5698SJohn Baldwin.align	16
6429bc3d5698SJohn Baldwin.L11412rounds:
6430bc3d5698SJohn Baldwin	movq	16(%eax),%xmm2
6431bc3d5698SJohn Baldwin	cmpl	$268435456,%ebp
6432bc3d5698SJohn Baldwin	je	.L12212rounds_alt
6433bc3d5698SJohn Baldwin	movl	$11,%ecx
6434bc3d5698SJohn Baldwin	movups	%xmm0,-16(%edx)
6435bc3d5698SJohn Baldwin.byte	102,15,58,223,202,1
6436bc3d5698SJohn Baldwin	call	.L123key_192a_cold
6437bc3d5698SJohn Baldwin.byte	102,15,58,223,202,2
6438bc3d5698SJohn Baldwin	call	.L124key_192b
6439bc3d5698SJohn Baldwin.byte	102,15,58,223,202,4
6440bc3d5698SJohn Baldwin	call	.L125key_192a
6441bc3d5698SJohn Baldwin.byte	102,15,58,223,202,8
6442bc3d5698SJohn Baldwin	call	.L124key_192b
6443bc3d5698SJohn Baldwin.byte	102,15,58,223,202,16
6444bc3d5698SJohn Baldwin	call	.L125key_192a
6445bc3d5698SJohn Baldwin.byte	102,15,58,223,202,32
6446bc3d5698SJohn Baldwin	call	.L124key_192b
6447bc3d5698SJohn Baldwin.byte	102,15,58,223,202,64
6448bc3d5698SJohn Baldwin	call	.L125key_192a
6449bc3d5698SJohn Baldwin.byte	102,15,58,223,202,128
6450bc3d5698SJohn Baldwin	call	.L124key_192b
6451bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
6452bc3d5698SJohn Baldwin	movl	%ecx,48(%edx)
6453bc3d5698SJohn Baldwin	jmp	.L120good_key
6454bc3d5698SJohn Baldwin.align	16
6455bc3d5698SJohn Baldwin.L125key_192a:
6456bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
6457bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6458bc3d5698SJohn Baldwin.align	16
6459bc3d5698SJohn Baldwin.L123key_192a_cold:
6460bc3d5698SJohn Baldwin	movaps	%xmm2,%xmm5
6461bc3d5698SJohn Baldwin.L126key_192b_warm:
6462bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
6463bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm3
6464bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
6465bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
6466bc3d5698SJohn Baldwin	pslldq	$4,%xmm3
6467bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
6468bc3d5698SJohn Baldwin	pshufd	$85,%xmm1,%xmm1
6469bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
6470bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm0
6471bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm3
6472bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
6473bc3d5698SJohn Baldwin	ret
6474bc3d5698SJohn Baldwin.align	16
6475bc3d5698SJohn Baldwin.L124key_192b:
6476bc3d5698SJohn Baldwin	movaps	%xmm0,%xmm3
6477bc3d5698SJohn Baldwin	shufps	$68,%xmm0,%xmm5
6478bc3d5698SJohn Baldwin	movups	%xmm5,(%edx)
6479bc3d5698SJohn Baldwin	shufps	$78,%xmm2,%xmm3
6480bc3d5698SJohn Baldwin	movups	%xmm3,16(%edx)
6481bc3d5698SJohn Baldwin	leal	32(%edx),%edx
6482bc3d5698SJohn Baldwin	jmp	.L126key_192b_warm
6483bc3d5698SJohn Baldwin.align	16
6484bc3d5698SJohn Baldwin.L12212rounds_alt:
6485bc3d5698SJohn Baldwin	movdqa	16(%ebx),%xmm5
6486bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
6487bc3d5698SJohn Baldwin	movl	$8,%ecx
6488bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
6489bc3d5698SJohn Baldwin.L127loop_key192:
6490bc3d5698SJohn Baldwin	movq	%xmm2,(%edx)
6491bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
6492bc3d5698SJohn Baldwin.byte	102,15,56,0,213
6493bc3d5698SJohn Baldwin.byte	102,15,56,221,212
6494bc3d5698SJohn Baldwin	pslld	$1,%xmm4
6495bc3d5698SJohn Baldwin	leal	24(%edx),%edx
6496bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
6497bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
6498bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
6499bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
6500bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
6501bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
6502bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
6503bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm3
6504bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
6505bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
6506bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
6507bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
6508bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm2
6509bc3d5698SJohn Baldwin	movdqu	%xmm0,-16(%edx)
6510bc3d5698SJohn Baldwin	decl	%ecx
6511bc3d5698SJohn Baldwin	jnz	.L127loop_key192
6512bc3d5698SJohn Baldwin	movl	$11,%ecx
6513bc3d5698SJohn Baldwin	movl	%ecx,32(%edx)
6514bc3d5698SJohn Baldwin	jmp	.L120good_key
6515bc3d5698SJohn Baldwin.align	16
6516bc3d5698SJohn Baldwin.L11314rounds:
6517bc3d5698SJohn Baldwin	movups	16(%eax),%xmm2
6518bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6519bc3d5698SJohn Baldwin	cmpl	$268435456,%ebp
6520bc3d5698SJohn Baldwin	je	.L12814rounds_alt
6521bc3d5698SJohn Baldwin	movl	$13,%ecx
6522bc3d5698SJohn Baldwin	movups	%xmm0,-32(%edx)
6523bc3d5698SJohn Baldwin	movups	%xmm2,-16(%edx)
6524bc3d5698SJohn Baldwin.byte	102,15,58,223,202,1
6525bc3d5698SJohn Baldwin	call	.L129key_256a_cold
6526bc3d5698SJohn Baldwin.byte	102,15,58,223,200,1
6527bc3d5698SJohn Baldwin	call	.L130key_256b
6528bc3d5698SJohn Baldwin.byte	102,15,58,223,202,2
6529bc3d5698SJohn Baldwin	call	.L131key_256a
6530bc3d5698SJohn Baldwin.byte	102,15,58,223,200,2
6531bc3d5698SJohn Baldwin	call	.L130key_256b
6532bc3d5698SJohn Baldwin.byte	102,15,58,223,202,4
6533bc3d5698SJohn Baldwin	call	.L131key_256a
6534bc3d5698SJohn Baldwin.byte	102,15,58,223,200,4
6535bc3d5698SJohn Baldwin	call	.L130key_256b
6536bc3d5698SJohn Baldwin.byte	102,15,58,223,202,8
6537bc3d5698SJohn Baldwin	call	.L131key_256a
6538bc3d5698SJohn Baldwin.byte	102,15,58,223,200,8
6539bc3d5698SJohn Baldwin	call	.L130key_256b
6540bc3d5698SJohn Baldwin.byte	102,15,58,223,202,16
6541bc3d5698SJohn Baldwin	call	.L131key_256a
6542bc3d5698SJohn Baldwin.byte	102,15,58,223,200,16
6543bc3d5698SJohn Baldwin	call	.L130key_256b
6544bc3d5698SJohn Baldwin.byte	102,15,58,223,202,32
6545bc3d5698SJohn Baldwin	call	.L131key_256a
6546bc3d5698SJohn Baldwin.byte	102,15,58,223,200,32
6547bc3d5698SJohn Baldwin	call	.L130key_256b
6548bc3d5698SJohn Baldwin.byte	102,15,58,223,202,64
6549bc3d5698SJohn Baldwin	call	.L131key_256a
6550bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
6551bc3d5698SJohn Baldwin	movl	%ecx,16(%edx)
6552bc3d5698SJohn Baldwin	xorl	%eax,%eax
6553bc3d5698SJohn Baldwin	jmp	.L120good_key
6554bc3d5698SJohn Baldwin.align	16
6555bc3d5698SJohn Baldwin.L131key_256a:
6556bc3d5698SJohn Baldwin	movups	%xmm2,(%edx)
6557bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6558bc3d5698SJohn Baldwin.L129key_256a_cold:
6559bc3d5698SJohn Baldwin	shufps	$16,%xmm0,%xmm4
6560bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
6561bc3d5698SJohn Baldwin	shufps	$140,%xmm0,%xmm4
6562bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm0
6563bc3d5698SJohn Baldwin	shufps	$255,%xmm1,%xmm1
6564bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm0
6565bc3d5698SJohn Baldwin	ret
6566bc3d5698SJohn Baldwin.align	16
6567bc3d5698SJohn Baldwin.L130key_256b:
6568bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
6569bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6570bc3d5698SJohn Baldwin	shufps	$16,%xmm2,%xmm4
6571bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm2
6572bc3d5698SJohn Baldwin	shufps	$140,%xmm2,%xmm4
6573bc3d5698SJohn Baldwin	xorps	%xmm4,%xmm2
6574bc3d5698SJohn Baldwin	shufps	$170,%xmm1,%xmm1
6575bc3d5698SJohn Baldwin	xorps	%xmm1,%xmm2
6576bc3d5698SJohn Baldwin	ret
6577bc3d5698SJohn Baldwin.align	16
6578bc3d5698SJohn Baldwin.L12814rounds_alt:
6579bc3d5698SJohn Baldwin	movdqa	(%ebx),%xmm5
6580bc3d5698SJohn Baldwin	movdqa	32(%ebx),%xmm4
6581bc3d5698SJohn Baldwin	movl	$7,%ecx
6582bc3d5698SJohn Baldwin	movdqu	%xmm0,-32(%edx)
6583bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
6584bc3d5698SJohn Baldwin	movdqu	%xmm2,-16(%edx)
6585bc3d5698SJohn Baldwin.L132loop_key256:
6586bc3d5698SJohn Baldwin.byte	102,15,56,0,213
6587bc3d5698SJohn Baldwin.byte	102,15,56,221,212
6588bc3d5698SJohn Baldwin	movdqa	%xmm0,%xmm3
6589bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
6590bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
6591bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
6592bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm3
6593bc3d5698SJohn Baldwin	pslldq	$4,%xmm0
6594bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm0
6595bc3d5698SJohn Baldwin	pslld	$1,%xmm4
6596bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm0
6597bc3d5698SJohn Baldwin	movdqu	%xmm0,(%edx)
6598bc3d5698SJohn Baldwin	decl	%ecx
6599bc3d5698SJohn Baldwin	jz	.L133done_key256
6600bc3d5698SJohn Baldwin	pshufd	$255,%xmm0,%xmm2
6601bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6602bc3d5698SJohn Baldwin.byte	102,15,56,221,211
6603bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm3
6604bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
6605bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
6606bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
6607bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm3
6608bc3d5698SJohn Baldwin	pslldq	$4,%xmm1
6609bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm1
6610bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm2
6611bc3d5698SJohn Baldwin	movdqu	%xmm2,16(%edx)
6612bc3d5698SJohn Baldwin	leal	32(%edx),%edx
6613bc3d5698SJohn Baldwin	movdqa	%xmm2,%xmm1
6614bc3d5698SJohn Baldwin	jmp	.L132loop_key256
6615bc3d5698SJohn Baldwin.L133done_key256:
6616bc3d5698SJohn Baldwin	movl	$13,%ecx
6617bc3d5698SJohn Baldwin	movl	%ecx,16(%edx)
6618bc3d5698SJohn Baldwin.L120good_key:
6619bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
6620bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
6621bc3d5698SJohn Baldwin	pxor	%xmm2,%xmm2
6622bc3d5698SJohn Baldwin	pxor	%xmm3,%xmm3
6623bc3d5698SJohn Baldwin	pxor	%xmm4,%xmm4
6624bc3d5698SJohn Baldwin	pxor	%xmm5,%xmm5
6625bc3d5698SJohn Baldwin	xorl	%eax,%eax
6626bc3d5698SJohn Baldwin	popl	%ebx
6627bc3d5698SJohn Baldwin	popl	%ebp
6628bc3d5698SJohn Baldwin	ret
6629bc3d5698SJohn Baldwin.align	4
6630bc3d5698SJohn Baldwin.L111bad_pointer:
6631bc3d5698SJohn Baldwin	movl	$-1,%eax
6632bc3d5698SJohn Baldwin	popl	%ebx
6633bc3d5698SJohn Baldwin	popl	%ebp
6634bc3d5698SJohn Baldwin	ret
6635bc3d5698SJohn Baldwin.align	4
6636bc3d5698SJohn Baldwin.L115bad_keybits:
6637bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
6638bc3d5698SJohn Baldwin	movl	$-2,%eax
6639bc3d5698SJohn Baldwin	popl	%ebx
6640bc3d5698SJohn Baldwin	popl	%ebp
6641bc3d5698SJohn Baldwin	ret
6642bc3d5698SJohn Baldwin.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
6643bc3d5698SJohn Baldwin.globl	aesni_set_encrypt_key
6644bc3d5698SJohn Baldwin.type	aesni_set_encrypt_key,@function
6645bc3d5698SJohn Baldwin.align	16
6646bc3d5698SJohn Baldwinaesni_set_encrypt_key:
6647bc3d5698SJohn Baldwin.L_aesni_set_encrypt_key_begin:
6648c0855eaaSJohn Baldwin	#ifdef __CET__
6649c0855eaaSJohn Baldwin
6650c0855eaaSJohn Baldwin.byte	243,15,30,251
6651c0855eaaSJohn Baldwin	#endif
6652c0855eaaSJohn Baldwin
6653bc3d5698SJohn Baldwin	movl	4(%esp),%eax
6654bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
6655bc3d5698SJohn Baldwin	movl	12(%esp),%edx
6656bc3d5698SJohn Baldwin	call	_aesni_set_encrypt_key
6657bc3d5698SJohn Baldwin	ret
6658bc3d5698SJohn Baldwin.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
6659bc3d5698SJohn Baldwin.globl	aesni_set_decrypt_key
6660bc3d5698SJohn Baldwin.type	aesni_set_decrypt_key,@function
6661bc3d5698SJohn Baldwin.align	16
6662bc3d5698SJohn Baldwinaesni_set_decrypt_key:
6663bc3d5698SJohn Baldwin.L_aesni_set_decrypt_key_begin:
6664c0855eaaSJohn Baldwin	#ifdef __CET__
6665c0855eaaSJohn Baldwin
6666c0855eaaSJohn Baldwin.byte	243,15,30,251
6667c0855eaaSJohn Baldwin	#endif
6668c0855eaaSJohn Baldwin
6669bc3d5698SJohn Baldwin	movl	4(%esp),%eax
6670bc3d5698SJohn Baldwin	movl	8(%esp),%ecx
6671bc3d5698SJohn Baldwin	movl	12(%esp),%edx
6672bc3d5698SJohn Baldwin	call	_aesni_set_encrypt_key
6673bc3d5698SJohn Baldwin	movl	12(%esp),%edx
6674bc3d5698SJohn Baldwin	shll	$4,%ecx
6675bc3d5698SJohn Baldwin	testl	%eax,%eax
6676bc3d5698SJohn Baldwin	jnz	.L134dec_key_ret
6677bc3d5698SJohn Baldwin	leal	16(%edx,%ecx,1),%eax
6678bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
6679bc3d5698SJohn Baldwin	movups	(%eax),%xmm1
6680bc3d5698SJohn Baldwin	movups	%xmm0,(%eax)
6681bc3d5698SJohn Baldwin	movups	%xmm1,(%edx)
6682bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6683bc3d5698SJohn Baldwin	leal	-16(%eax),%eax
6684bc3d5698SJohn Baldwin.L135dec_key_inverse:
6685bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
6686bc3d5698SJohn Baldwin	movups	(%eax),%xmm1
6687bc3d5698SJohn Baldwin.byte	102,15,56,219,192
6688bc3d5698SJohn Baldwin.byte	102,15,56,219,201
6689bc3d5698SJohn Baldwin	leal	16(%edx),%edx
6690bc3d5698SJohn Baldwin	leal	-16(%eax),%eax
6691bc3d5698SJohn Baldwin	movups	%xmm0,16(%eax)
6692bc3d5698SJohn Baldwin	movups	%xmm1,-16(%edx)
6693bc3d5698SJohn Baldwin	cmpl	%edx,%eax
6694bc3d5698SJohn Baldwin	ja	.L135dec_key_inverse
6695bc3d5698SJohn Baldwin	movups	(%edx),%xmm0
6696bc3d5698SJohn Baldwin.byte	102,15,56,219,192
6697bc3d5698SJohn Baldwin	movups	%xmm0,(%edx)
6698bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
6699bc3d5698SJohn Baldwin	pxor	%xmm1,%xmm1
6700bc3d5698SJohn Baldwin	xorl	%eax,%eax
6701bc3d5698SJohn Baldwin.L134dec_key_ret:
6702bc3d5698SJohn Baldwin	ret
6703bc3d5698SJohn Baldwin.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
6704bc3d5698SJohn Baldwin.align	64
6705bc3d5698SJohn Baldwin.Lkey_const:
6706bc3d5698SJohn Baldwin.long	202313229,202313229,202313229,202313229
6707bc3d5698SJohn Baldwin.long	67569157,67569157,67569157,67569157
6708bc3d5698SJohn Baldwin.long	1,1,1,1
6709bc3d5698SJohn Baldwin.long	27,27,27,27
6710bc3d5698SJohn Baldwin.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
6711bc3d5698SJohn Baldwin.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
6712bc3d5698SJohn Baldwin.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
6713bc3d5698SJohn Baldwin.byte	115,108,46,111,114,103,62,0
6714bc3d5698SJohn Baldwin.comm	OPENSSL_ia32cap_P,16,4
6715c0855eaaSJohn Baldwin
6716c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
6717c0855eaaSJohn Baldwin	.p2align 2
6718c0855eaaSJohn Baldwin	.long 1f - 0f
6719c0855eaaSJohn Baldwin	.long 4f - 1f
6720c0855eaaSJohn Baldwin	.long 5
6721c0855eaaSJohn Baldwin0:
6722c0855eaaSJohn Baldwin	.asciz "GNU"
6723c0855eaaSJohn Baldwin1:
6724c0855eaaSJohn Baldwin	.p2align 2
6725c0855eaaSJohn Baldwin	.long 0xc0000002
6726c0855eaaSJohn Baldwin	.long 3f - 2f
6727c0855eaaSJohn Baldwin2:
6728c0855eaaSJohn Baldwin	.long 3
6729c0855eaaSJohn Baldwin3:
6730c0855eaaSJohn Baldwin	.p2align 2
6731c0855eaaSJohn Baldwin4:
6732bc3d5698SJohn Baldwin#endif
6733