xref: /freebsd/sys/crypto/openssl/i386/aesni-x86.S (revision 1d386b48)
1/* Do not modify. This file is auto-generated from aesni-x86.pl. */
2#ifdef PIC
3.text
4.globl	aesni_encrypt
5.type	aesni_encrypt,@function
6.align	16
7aesni_encrypt:
8.L_aesni_encrypt_begin:
9	movl	4(%esp),%eax
10	movl	12(%esp),%edx
11	movups	(%eax),%xmm2
12	movl	240(%edx),%ecx
13	movl	8(%esp),%eax
14	movups	(%edx),%xmm0
15	movups	16(%edx),%xmm1
16	leal	32(%edx),%edx
17	xorps	%xmm0,%xmm2
18.L000enc1_loop_1:
19.byte	102,15,56,220,209
20	decl	%ecx
21	movups	(%edx),%xmm1
22	leal	16(%edx),%edx
23	jnz	.L000enc1_loop_1
24.byte	102,15,56,221,209
25	pxor	%xmm0,%xmm0
26	pxor	%xmm1,%xmm1
27	movups	%xmm2,(%eax)
28	pxor	%xmm2,%xmm2
29	ret
30.size	aesni_encrypt,.-.L_aesni_encrypt_begin
31.globl	aesni_decrypt
32.type	aesni_decrypt,@function
33.align	16
34aesni_decrypt:
35.L_aesni_decrypt_begin:
36	movl	4(%esp),%eax
37	movl	12(%esp),%edx
38	movups	(%eax),%xmm2
39	movl	240(%edx),%ecx
40	movl	8(%esp),%eax
41	movups	(%edx),%xmm0
42	movups	16(%edx),%xmm1
43	leal	32(%edx),%edx
44	xorps	%xmm0,%xmm2
45.L001dec1_loop_2:
46.byte	102,15,56,222,209
47	decl	%ecx
48	movups	(%edx),%xmm1
49	leal	16(%edx),%edx
50	jnz	.L001dec1_loop_2
51.byte	102,15,56,223,209
52	pxor	%xmm0,%xmm0
53	pxor	%xmm1,%xmm1
54	movups	%xmm2,(%eax)
55	pxor	%xmm2,%xmm2
56	ret
57.size	aesni_decrypt,.-.L_aesni_decrypt_begin
58.type	_aesni_encrypt2,@function
59.align	16
60_aesni_encrypt2:
61	movups	(%edx),%xmm0
62	shll	$4,%ecx
63	movups	16(%edx),%xmm1
64	xorps	%xmm0,%xmm2
65	pxor	%xmm0,%xmm3
66	movups	32(%edx),%xmm0
67	leal	32(%edx,%ecx,1),%edx
68	negl	%ecx
69	addl	$16,%ecx
70.L002enc2_loop:
71.byte	102,15,56,220,209
72.byte	102,15,56,220,217
73	movups	(%edx,%ecx,1),%xmm1
74	addl	$32,%ecx
75.byte	102,15,56,220,208
76.byte	102,15,56,220,216
77	movups	-16(%edx,%ecx,1),%xmm0
78	jnz	.L002enc2_loop
79.byte	102,15,56,220,209
80.byte	102,15,56,220,217
81.byte	102,15,56,221,208
82.byte	102,15,56,221,216
83	ret
84.size	_aesni_encrypt2,.-_aesni_encrypt2
85.type	_aesni_decrypt2,@function
86.align	16
87_aesni_decrypt2:
88	movups	(%edx),%xmm0
89	shll	$4,%ecx
90	movups	16(%edx),%xmm1
91	xorps	%xmm0,%xmm2
92	pxor	%xmm0,%xmm3
93	movups	32(%edx),%xmm0
94	leal	32(%edx,%ecx,1),%edx
95	negl	%ecx
96	addl	$16,%ecx
97.L003dec2_loop:
98.byte	102,15,56,222,209
99.byte	102,15,56,222,217
100	movups	(%edx,%ecx,1),%xmm1
101	addl	$32,%ecx
102.byte	102,15,56,222,208
103.byte	102,15,56,222,216
104	movups	-16(%edx,%ecx,1),%xmm0
105	jnz	.L003dec2_loop
106.byte	102,15,56,222,209
107.byte	102,15,56,222,217
108.byte	102,15,56,223,208
109.byte	102,15,56,223,216
110	ret
111.size	_aesni_decrypt2,.-_aesni_decrypt2
112.type	_aesni_encrypt3,@function
113.align	16
114_aesni_encrypt3:
115	movups	(%edx),%xmm0
116	shll	$4,%ecx
117	movups	16(%edx),%xmm1
118	xorps	%xmm0,%xmm2
119	pxor	%xmm0,%xmm3
120	pxor	%xmm0,%xmm4
121	movups	32(%edx),%xmm0
122	leal	32(%edx,%ecx,1),%edx
123	negl	%ecx
124	addl	$16,%ecx
125.L004enc3_loop:
126.byte	102,15,56,220,209
127.byte	102,15,56,220,217
128.byte	102,15,56,220,225
129	movups	(%edx,%ecx,1),%xmm1
130	addl	$32,%ecx
131.byte	102,15,56,220,208
132.byte	102,15,56,220,216
133.byte	102,15,56,220,224
134	movups	-16(%edx,%ecx,1),%xmm0
135	jnz	.L004enc3_loop
136.byte	102,15,56,220,209
137.byte	102,15,56,220,217
138.byte	102,15,56,220,225
139.byte	102,15,56,221,208
140.byte	102,15,56,221,216
141.byte	102,15,56,221,224
142	ret
143.size	_aesni_encrypt3,.-_aesni_encrypt3
144.type	_aesni_decrypt3,@function
145.align	16
146_aesni_decrypt3:
147	movups	(%edx),%xmm0
148	shll	$4,%ecx
149	movups	16(%edx),%xmm1
150	xorps	%xmm0,%xmm2
151	pxor	%xmm0,%xmm3
152	pxor	%xmm0,%xmm4
153	movups	32(%edx),%xmm0
154	leal	32(%edx,%ecx,1),%edx
155	negl	%ecx
156	addl	$16,%ecx
157.L005dec3_loop:
158.byte	102,15,56,222,209
159.byte	102,15,56,222,217
160.byte	102,15,56,222,225
161	movups	(%edx,%ecx,1),%xmm1
162	addl	$32,%ecx
163.byte	102,15,56,222,208
164.byte	102,15,56,222,216
165.byte	102,15,56,222,224
166	movups	-16(%edx,%ecx,1),%xmm0
167	jnz	.L005dec3_loop
168.byte	102,15,56,222,209
169.byte	102,15,56,222,217
170.byte	102,15,56,222,225
171.byte	102,15,56,223,208
172.byte	102,15,56,223,216
173.byte	102,15,56,223,224
174	ret
175.size	_aesni_decrypt3,.-_aesni_decrypt3
176.type	_aesni_encrypt4,@function
177.align	16
178_aesni_encrypt4:
179	movups	(%edx),%xmm0
180	movups	16(%edx),%xmm1
181	shll	$4,%ecx
182	xorps	%xmm0,%xmm2
183	pxor	%xmm0,%xmm3
184	pxor	%xmm0,%xmm4
185	pxor	%xmm0,%xmm5
186	movups	32(%edx),%xmm0
187	leal	32(%edx,%ecx,1),%edx
188	negl	%ecx
189.byte	15,31,64,0
190	addl	$16,%ecx
191.L006enc4_loop:
192.byte	102,15,56,220,209
193.byte	102,15,56,220,217
194.byte	102,15,56,220,225
195.byte	102,15,56,220,233
196	movups	(%edx,%ecx,1),%xmm1
197	addl	$32,%ecx
198.byte	102,15,56,220,208
199.byte	102,15,56,220,216
200.byte	102,15,56,220,224
201.byte	102,15,56,220,232
202	movups	-16(%edx,%ecx,1),%xmm0
203	jnz	.L006enc4_loop
204.byte	102,15,56,220,209
205.byte	102,15,56,220,217
206.byte	102,15,56,220,225
207.byte	102,15,56,220,233
208.byte	102,15,56,221,208
209.byte	102,15,56,221,216
210.byte	102,15,56,221,224
211.byte	102,15,56,221,232
212	ret
213.size	_aesni_encrypt4,.-_aesni_encrypt4
214.type	_aesni_decrypt4,@function
215.align	16
216_aesni_decrypt4:
217	movups	(%edx),%xmm0
218	movups	16(%edx),%xmm1
219	shll	$4,%ecx
220	xorps	%xmm0,%xmm2
221	pxor	%xmm0,%xmm3
222	pxor	%xmm0,%xmm4
223	pxor	%xmm0,%xmm5
224	movups	32(%edx),%xmm0
225	leal	32(%edx,%ecx,1),%edx
226	negl	%ecx
227.byte	15,31,64,0
228	addl	$16,%ecx
229.L007dec4_loop:
230.byte	102,15,56,222,209
231.byte	102,15,56,222,217
232.byte	102,15,56,222,225
233.byte	102,15,56,222,233
234	movups	(%edx,%ecx,1),%xmm1
235	addl	$32,%ecx
236.byte	102,15,56,222,208
237.byte	102,15,56,222,216
238.byte	102,15,56,222,224
239.byte	102,15,56,222,232
240	movups	-16(%edx,%ecx,1),%xmm0
241	jnz	.L007dec4_loop
242.byte	102,15,56,222,209
243.byte	102,15,56,222,217
244.byte	102,15,56,222,225
245.byte	102,15,56,222,233
246.byte	102,15,56,223,208
247.byte	102,15,56,223,216
248.byte	102,15,56,223,224
249.byte	102,15,56,223,232
250	ret
251.size	_aesni_decrypt4,.-_aesni_decrypt4
252.type	_aesni_encrypt6,@function
253.align	16
254_aesni_encrypt6:
255	movups	(%edx),%xmm0
256	shll	$4,%ecx
257	movups	16(%edx),%xmm1
258	xorps	%xmm0,%xmm2
259	pxor	%xmm0,%xmm3
260	pxor	%xmm0,%xmm4
261.byte	102,15,56,220,209
262	pxor	%xmm0,%xmm5
263	pxor	%xmm0,%xmm6
264.byte	102,15,56,220,217
265	leal	32(%edx,%ecx,1),%edx
266	negl	%ecx
267.byte	102,15,56,220,225
268	pxor	%xmm0,%xmm7
269	movups	(%edx,%ecx,1),%xmm0
270	addl	$16,%ecx
271	jmp	.L008_aesni_encrypt6_inner
272.align	16
273.L009enc6_loop:
274.byte	102,15,56,220,209
275.byte	102,15,56,220,217
276.byte	102,15,56,220,225
277.L008_aesni_encrypt6_inner:
278.byte	102,15,56,220,233
279.byte	102,15,56,220,241
280.byte	102,15,56,220,249
281.L_aesni_encrypt6_enter:
282	movups	(%edx,%ecx,1),%xmm1
283	addl	$32,%ecx
284.byte	102,15,56,220,208
285.byte	102,15,56,220,216
286.byte	102,15,56,220,224
287.byte	102,15,56,220,232
288.byte	102,15,56,220,240
289.byte	102,15,56,220,248
290	movups	-16(%edx,%ecx,1),%xmm0
291	jnz	.L009enc6_loop
292.byte	102,15,56,220,209
293.byte	102,15,56,220,217
294.byte	102,15,56,220,225
295.byte	102,15,56,220,233
296.byte	102,15,56,220,241
297.byte	102,15,56,220,249
298.byte	102,15,56,221,208
299.byte	102,15,56,221,216
300.byte	102,15,56,221,224
301.byte	102,15,56,221,232
302.byte	102,15,56,221,240
303.byte	102,15,56,221,248
304	ret
305.size	_aesni_encrypt6,.-_aesni_encrypt6
306.type	_aesni_decrypt6,@function
307.align	16
308_aesni_decrypt6:
309	movups	(%edx),%xmm0
310	shll	$4,%ecx
311	movups	16(%edx),%xmm1
312	xorps	%xmm0,%xmm2
313	pxor	%xmm0,%xmm3
314	pxor	%xmm0,%xmm4
315.byte	102,15,56,222,209
316	pxor	%xmm0,%xmm5
317	pxor	%xmm0,%xmm6
318.byte	102,15,56,222,217
319	leal	32(%edx,%ecx,1),%edx
320	negl	%ecx
321.byte	102,15,56,222,225
322	pxor	%xmm0,%xmm7
323	movups	(%edx,%ecx,1),%xmm0
324	addl	$16,%ecx
325	jmp	.L010_aesni_decrypt6_inner
326.align	16
327.L011dec6_loop:
328.byte	102,15,56,222,209
329.byte	102,15,56,222,217
330.byte	102,15,56,222,225
331.L010_aesni_decrypt6_inner:
332.byte	102,15,56,222,233
333.byte	102,15,56,222,241
334.byte	102,15,56,222,249
335.L_aesni_decrypt6_enter:
336	movups	(%edx,%ecx,1),%xmm1
337	addl	$32,%ecx
338.byte	102,15,56,222,208
339.byte	102,15,56,222,216
340.byte	102,15,56,222,224
341.byte	102,15,56,222,232
342.byte	102,15,56,222,240
343.byte	102,15,56,222,248
344	movups	-16(%edx,%ecx,1),%xmm0
345	jnz	.L011dec6_loop
346.byte	102,15,56,222,209
347.byte	102,15,56,222,217
348.byte	102,15,56,222,225
349.byte	102,15,56,222,233
350.byte	102,15,56,222,241
351.byte	102,15,56,222,249
352.byte	102,15,56,223,208
353.byte	102,15,56,223,216
354.byte	102,15,56,223,224
355.byte	102,15,56,223,232
356.byte	102,15,56,223,240
357.byte	102,15,56,223,248
358	ret
359.size	_aesni_decrypt6,.-_aesni_decrypt6
360.globl	aesni_ecb_encrypt
361.type	aesni_ecb_encrypt,@function
362.align	16
363aesni_ecb_encrypt:
364.L_aesni_ecb_encrypt_begin:
365	pushl	%ebp
366	pushl	%ebx
367	pushl	%esi
368	pushl	%edi
369	movl	20(%esp),%esi
370	movl	24(%esp),%edi
371	movl	28(%esp),%eax
372	movl	32(%esp),%edx
373	movl	36(%esp),%ebx
374	andl	$-16,%eax
375	jz	.L012ecb_ret
376	movl	240(%edx),%ecx
377	testl	%ebx,%ebx
378	jz	.L013ecb_decrypt
379	movl	%edx,%ebp
380	movl	%ecx,%ebx
381	cmpl	$96,%eax
382	jb	.L014ecb_enc_tail
383	movdqu	(%esi),%xmm2
384	movdqu	16(%esi),%xmm3
385	movdqu	32(%esi),%xmm4
386	movdqu	48(%esi),%xmm5
387	movdqu	64(%esi),%xmm6
388	movdqu	80(%esi),%xmm7
389	leal	96(%esi),%esi
390	subl	$96,%eax
391	jmp	.L015ecb_enc_loop6_enter
392.align	16
393.L016ecb_enc_loop6:
394	movups	%xmm2,(%edi)
395	movdqu	(%esi),%xmm2
396	movups	%xmm3,16(%edi)
397	movdqu	16(%esi),%xmm3
398	movups	%xmm4,32(%edi)
399	movdqu	32(%esi),%xmm4
400	movups	%xmm5,48(%edi)
401	movdqu	48(%esi),%xmm5
402	movups	%xmm6,64(%edi)
403	movdqu	64(%esi),%xmm6
404	movups	%xmm7,80(%edi)
405	leal	96(%edi),%edi
406	movdqu	80(%esi),%xmm7
407	leal	96(%esi),%esi
408.L015ecb_enc_loop6_enter:
409	call	_aesni_encrypt6
410	movl	%ebp,%edx
411	movl	%ebx,%ecx
412	subl	$96,%eax
413	jnc	.L016ecb_enc_loop6
414	movups	%xmm2,(%edi)
415	movups	%xmm3,16(%edi)
416	movups	%xmm4,32(%edi)
417	movups	%xmm5,48(%edi)
418	movups	%xmm6,64(%edi)
419	movups	%xmm7,80(%edi)
420	leal	96(%edi),%edi
421	addl	$96,%eax
422	jz	.L012ecb_ret
423.L014ecb_enc_tail:
424	movups	(%esi),%xmm2
425	cmpl	$32,%eax
426	jb	.L017ecb_enc_one
427	movups	16(%esi),%xmm3
428	je	.L018ecb_enc_two
429	movups	32(%esi),%xmm4
430	cmpl	$64,%eax
431	jb	.L019ecb_enc_three
432	movups	48(%esi),%xmm5
433	je	.L020ecb_enc_four
434	movups	64(%esi),%xmm6
435	xorps	%xmm7,%xmm7
436	call	_aesni_encrypt6
437	movups	%xmm2,(%edi)
438	movups	%xmm3,16(%edi)
439	movups	%xmm4,32(%edi)
440	movups	%xmm5,48(%edi)
441	movups	%xmm6,64(%edi)
442	jmp	.L012ecb_ret
443.align	16
444.L017ecb_enc_one:
445	movups	(%edx),%xmm0
446	movups	16(%edx),%xmm1
447	leal	32(%edx),%edx
448	xorps	%xmm0,%xmm2
449.L021enc1_loop_3:
450.byte	102,15,56,220,209
451	decl	%ecx
452	movups	(%edx),%xmm1
453	leal	16(%edx),%edx
454	jnz	.L021enc1_loop_3
455.byte	102,15,56,221,209
456	movups	%xmm2,(%edi)
457	jmp	.L012ecb_ret
458.align	16
459.L018ecb_enc_two:
460	call	_aesni_encrypt2
461	movups	%xmm2,(%edi)
462	movups	%xmm3,16(%edi)
463	jmp	.L012ecb_ret
464.align	16
465.L019ecb_enc_three:
466	call	_aesni_encrypt3
467	movups	%xmm2,(%edi)
468	movups	%xmm3,16(%edi)
469	movups	%xmm4,32(%edi)
470	jmp	.L012ecb_ret
471.align	16
472.L020ecb_enc_four:
473	call	_aesni_encrypt4
474	movups	%xmm2,(%edi)
475	movups	%xmm3,16(%edi)
476	movups	%xmm4,32(%edi)
477	movups	%xmm5,48(%edi)
478	jmp	.L012ecb_ret
479.align	16
480.L013ecb_decrypt:
481	movl	%edx,%ebp
482	movl	%ecx,%ebx
483	cmpl	$96,%eax
484	jb	.L022ecb_dec_tail
485	movdqu	(%esi),%xmm2
486	movdqu	16(%esi),%xmm3
487	movdqu	32(%esi),%xmm4
488	movdqu	48(%esi),%xmm5
489	movdqu	64(%esi),%xmm6
490	movdqu	80(%esi),%xmm7
491	leal	96(%esi),%esi
492	subl	$96,%eax
493	jmp	.L023ecb_dec_loop6_enter
494.align	16
495.L024ecb_dec_loop6:
496	movups	%xmm2,(%edi)
497	movdqu	(%esi),%xmm2
498	movups	%xmm3,16(%edi)
499	movdqu	16(%esi),%xmm3
500	movups	%xmm4,32(%edi)
501	movdqu	32(%esi),%xmm4
502	movups	%xmm5,48(%edi)
503	movdqu	48(%esi),%xmm5
504	movups	%xmm6,64(%edi)
505	movdqu	64(%esi),%xmm6
506	movups	%xmm7,80(%edi)
507	leal	96(%edi),%edi
508	movdqu	80(%esi),%xmm7
509	leal	96(%esi),%esi
510.L023ecb_dec_loop6_enter:
511	call	_aesni_decrypt6
512	movl	%ebp,%edx
513	movl	%ebx,%ecx
514	subl	$96,%eax
515	jnc	.L024ecb_dec_loop6
516	movups	%xmm2,(%edi)
517	movups	%xmm3,16(%edi)
518	movups	%xmm4,32(%edi)
519	movups	%xmm5,48(%edi)
520	movups	%xmm6,64(%edi)
521	movups	%xmm7,80(%edi)
522	leal	96(%edi),%edi
523	addl	$96,%eax
524	jz	.L012ecb_ret
525.L022ecb_dec_tail:
526	movups	(%esi),%xmm2
527	cmpl	$32,%eax
528	jb	.L025ecb_dec_one
529	movups	16(%esi),%xmm3
530	je	.L026ecb_dec_two
531	movups	32(%esi),%xmm4
532	cmpl	$64,%eax
533	jb	.L027ecb_dec_three
534	movups	48(%esi),%xmm5
535	je	.L028ecb_dec_four
536	movups	64(%esi),%xmm6
537	xorps	%xmm7,%xmm7
538	call	_aesni_decrypt6
539	movups	%xmm2,(%edi)
540	movups	%xmm3,16(%edi)
541	movups	%xmm4,32(%edi)
542	movups	%xmm5,48(%edi)
543	movups	%xmm6,64(%edi)
544	jmp	.L012ecb_ret
545.align	16
546.L025ecb_dec_one:
547	movups	(%edx),%xmm0
548	movups	16(%edx),%xmm1
549	leal	32(%edx),%edx
550	xorps	%xmm0,%xmm2
551.L029dec1_loop_4:
552.byte	102,15,56,222,209
553	decl	%ecx
554	movups	(%edx),%xmm1
555	leal	16(%edx),%edx
556	jnz	.L029dec1_loop_4
557.byte	102,15,56,223,209
558	movups	%xmm2,(%edi)
559	jmp	.L012ecb_ret
560.align	16
561.L026ecb_dec_two:
562	call	_aesni_decrypt2
563	movups	%xmm2,(%edi)
564	movups	%xmm3,16(%edi)
565	jmp	.L012ecb_ret
566.align	16
567.L027ecb_dec_three:
568	call	_aesni_decrypt3
569	movups	%xmm2,(%edi)
570	movups	%xmm3,16(%edi)
571	movups	%xmm4,32(%edi)
572	jmp	.L012ecb_ret
573.align	16
574.L028ecb_dec_four:
575	call	_aesni_decrypt4
576	movups	%xmm2,(%edi)
577	movups	%xmm3,16(%edi)
578	movups	%xmm4,32(%edi)
579	movups	%xmm5,48(%edi)
580.L012ecb_ret:
581	pxor	%xmm0,%xmm0
582	pxor	%xmm1,%xmm1
583	pxor	%xmm2,%xmm2
584	pxor	%xmm3,%xmm3
585	pxor	%xmm4,%xmm4
586	pxor	%xmm5,%xmm5
587	pxor	%xmm6,%xmm6
588	pxor	%xmm7,%xmm7
589	popl	%edi
590	popl	%esi
591	popl	%ebx
592	popl	%ebp
593	ret
594.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
595.globl	aesni_ccm64_encrypt_blocks
596.type	aesni_ccm64_encrypt_blocks,@function
597.align	16
598aesni_ccm64_encrypt_blocks:
599.L_aesni_ccm64_encrypt_blocks_begin:
600	pushl	%ebp
601	pushl	%ebx
602	pushl	%esi
603	pushl	%edi
604	movl	20(%esp),%esi
605	movl	24(%esp),%edi
606	movl	28(%esp),%eax
607	movl	32(%esp),%edx
608	movl	36(%esp),%ebx
609	movl	40(%esp),%ecx
610	movl	%esp,%ebp
611	subl	$60,%esp
612	andl	$-16,%esp
613	movl	%ebp,48(%esp)
614	movdqu	(%ebx),%xmm7
615	movdqu	(%ecx),%xmm3
616	movl	240(%edx),%ecx
617	movl	$202182159,(%esp)
618	movl	$134810123,4(%esp)
619	movl	$67438087,8(%esp)
620	movl	$66051,12(%esp)
621	movl	$1,%ebx
622	xorl	%ebp,%ebp
623	movl	%ebx,16(%esp)
624	movl	%ebp,20(%esp)
625	movl	%ebp,24(%esp)
626	movl	%ebp,28(%esp)
627	shll	$4,%ecx
628	movl	$16,%ebx
629	leal	(%edx),%ebp
630	movdqa	(%esp),%xmm5
631	movdqa	%xmm7,%xmm2
632	leal	32(%edx,%ecx,1),%edx
633	subl	%ecx,%ebx
634.byte	102,15,56,0,253
635.L030ccm64_enc_outer:
636	movups	(%ebp),%xmm0
637	movl	%ebx,%ecx
638	movups	(%esi),%xmm6
639	xorps	%xmm0,%xmm2
640	movups	16(%ebp),%xmm1
641	xorps	%xmm6,%xmm0
642	xorps	%xmm0,%xmm3
643	movups	32(%ebp),%xmm0
644.L031ccm64_enc2_loop:
645.byte	102,15,56,220,209
646.byte	102,15,56,220,217
647	movups	(%edx,%ecx,1),%xmm1
648	addl	$32,%ecx
649.byte	102,15,56,220,208
650.byte	102,15,56,220,216
651	movups	-16(%edx,%ecx,1),%xmm0
652	jnz	.L031ccm64_enc2_loop
653.byte	102,15,56,220,209
654.byte	102,15,56,220,217
655	paddq	16(%esp),%xmm7
656	decl	%eax
657.byte	102,15,56,221,208
658.byte	102,15,56,221,216
659	leal	16(%esi),%esi
660	xorps	%xmm2,%xmm6
661	movdqa	%xmm7,%xmm2
662	movups	%xmm6,(%edi)
663.byte	102,15,56,0,213
664	leal	16(%edi),%edi
665	jnz	.L030ccm64_enc_outer
666	movl	48(%esp),%esp
667	movl	40(%esp),%edi
668	movups	%xmm3,(%edi)
669	pxor	%xmm0,%xmm0
670	pxor	%xmm1,%xmm1
671	pxor	%xmm2,%xmm2
672	pxor	%xmm3,%xmm3
673	pxor	%xmm4,%xmm4
674	pxor	%xmm5,%xmm5
675	pxor	%xmm6,%xmm6
676	pxor	%xmm7,%xmm7
677	popl	%edi
678	popl	%esi
679	popl	%ebx
680	popl	%ebp
681	ret
682.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
683.globl	aesni_ccm64_decrypt_blocks
684.type	aesni_ccm64_decrypt_blocks,@function
685.align	16
686aesni_ccm64_decrypt_blocks:
687.L_aesni_ccm64_decrypt_blocks_begin:
688	pushl	%ebp
689	pushl	%ebx
690	pushl	%esi
691	pushl	%edi
692	movl	20(%esp),%esi
693	movl	24(%esp),%edi
694	movl	28(%esp),%eax
695	movl	32(%esp),%edx
696	movl	36(%esp),%ebx
697	movl	40(%esp),%ecx
698	movl	%esp,%ebp
699	subl	$60,%esp
700	andl	$-16,%esp
701	movl	%ebp,48(%esp)
702	movdqu	(%ebx),%xmm7
703	movdqu	(%ecx),%xmm3
704	movl	240(%edx),%ecx
705	movl	$202182159,(%esp)
706	movl	$134810123,4(%esp)
707	movl	$67438087,8(%esp)
708	movl	$66051,12(%esp)
709	movl	$1,%ebx
710	xorl	%ebp,%ebp
711	movl	%ebx,16(%esp)
712	movl	%ebp,20(%esp)
713	movl	%ebp,24(%esp)
714	movl	%ebp,28(%esp)
715	movdqa	(%esp),%xmm5
716	movdqa	%xmm7,%xmm2
717	movl	%edx,%ebp
718	movl	%ecx,%ebx
719.byte	102,15,56,0,253
720	movups	(%edx),%xmm0
721	movups	16(%edx),%xmm1
722	leal	32(%edx),%edx
723	xorps	%xmm0,%xmm2
724.L032enc1_loop_5:
725.byte	102,15,56,220,209
726	decl	%ecx
727	movups	(%edx),%xmm1
728	leal	16(%edx),%edx
729	jnz	.L032enc1_loop_5
730.byte	102,15,56,221,209
731	shll	$4,%ebx
732	movl	$16,%ecx
733	movups	(%esi),%xmm6
734	paddq	16(%esp),%xmm7
735	leal	16(%esi),%esi
736	subl	%ebx,%ecx
737	leal	32(%ebp,%ebx,1),%edx
738	movl	%ecx,%ebx
739	jmp	.L033ccm64_dec_outer
740.align	16
741.L033ccm64_dec_outer:
742	xorps	%xmm2,%xmm6
743	movdqa	%xmm7,%xmm2
744	movups	%xmm6,(%edi)
745	leal	16(%edi),%edi
746.byte	102,15,56,0,213
747	subl	$1,%eax
748	jz	.L034ccm64_dec_break
749	movups	(%ebp),%xmm0
750	movl	%ebx,%ecx
751	movups	16(%ebp),%xmm1
752	xorps	%xmm0,%xmm6
753	xorps	%xmm0,%xmm2
754	xorps	%xmm6,%xmm3
755	movups	32(%ebp),%xmm0
756.L035ccm64_dec2_loop:
757.byte	102,15,56,220,209
758.byte	102,15,56,220,217
759	movups	(%edx,%ecx,1),%xmm1
760	addl	$32,%ecx
761.byte	102,15,56,220,208
762.byte	102,15,56,220,216
763	movups	-16(%edx,%ecx,1),%xmm0
764	jnz	.L035ccm64_dec2_loop
765	movups	(%esi),%xmm6
766	paddq	16(%esp),%xmm7
767.byte	102,15,56,220,209
768.byte	102,15,56,220,217
769.byte	102,15,56,221,208
770.byte	102,15,56,221,216
771	leal	16(%esi),%esi
772	jmp	.L033ccm64_dec_outer
773.align	16
774.L034ccm64_dec_break:
775	movl	240(%ebp),%ecx
776	movl	%ebp,%edx
777	movups	(%edx),%xmm0
778	movups	16(%edx),%xmm1
779	xorps	%xmm0,%xmm6
780	leal	32(%edx),%edx
781	xorps	%xmm6,%xmm3
782.L036enc1_loop_6:
783.byte	102,15,56,220,217
784	decl	%ecx
785	movups	(%edx),%xmm1
786	leal	16(%edx),%edx
787	jnz	.L036enc1_loop_6
788.byte	102,15,56,221,217
789	movl	48(%esp),%esp
790	movl	40(%esp),%edi
791	movups	%xmm3,(%edi)
792	pxor	%xmm0,%xmm0
793	pxor	%xmm1,%xmm1
794	pxor	%xmm2,%xmm2
795	pxor	%xmm3,%xmm3
796	pxor	%xmm4,%xmm4
797	pxor	%xmm5,%xmm5
798	pxor	%xmm6,%xmm6
799	pxor	%xmm7,%xmm7
800	popl	%edi
801	popl	%esi
802	popl	%ebx
803	popl	%ebp
804	ret
805.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
806.globl	aesni_ctr32_encrypt_blocks
807.type	aesni_ctr32_encrypt_blocks,@function
808.align	16
809aesni_ctr32_encrypt_blocks:
810.L_aesni_ctr32_encrypt_blocks_begin:
811	pushl	%ebp
812	pushl	%ebx
813	pushl	%esi
814	pushl	%edi
815	movl	20(%esp),%esi
816	movl	24(%esp),%edi
817	movl	28(%esp),%eax
818	movl	32(%esp),%edx
819	movl	36(%esp),%ebx
820	movl	%esp,%ebp
821	subl	$88,%esp
822	andl	$-16,%esp
823	movl	%ebp,80(%esp)
824	cmpl	$1,%eax
825	je	.L037ctr32_one_shortcut
826	movdqu	(%ebx),%xmm7
827	movl	$202182159,(%esp)
828	movl	$134810123,4(%esp)
829	movl	$67438087,8(%esp)
830	movl	$66051,12(%esp)
831	movl	$6,%ecx
832	xorl	%ebp,%ebp
833	movl	%ecx,16(%esp)
834	movl	%ecx,20(%esp)
835	movl	%ecx,24(%esp)
836	movl	%ebp,28(%esp)
837.byte	102,15,58,22,251,3
838.byte	102,15,58,34,253,3
839	movl	240(%edx),%ecx
840	bswap	%ebx
841	pxor	%xmm0,%xmm0
842	pxor	%xmm1,%xmm1
843	movdqa	(%esp),%xmm2
844.byte	102,15,58,34,195,0
845	leal	3(%ebx),%ebp
846.byte	102,15,58,34,205,0
847	incl	%ebx
848.byte	102,15,58,34,195,1
849	incl	%ebp
850.byte	102,15,58,34,205,1
851	incl	%ebx
852.byte	102,15,58,34,195,2
853	incl	%ebp
854.byte	102,15,58,34,205,2
855	movdqa	%xmm0,48(%esp)
856.byte	102,15,56,0,194
857	movdqu	(%edx),%xmm6
858	movdqa	%xmm1,64(%esp)
859.byte	102,15,56,0,202
860	pshufd	$192,%xmm0,%xmm2
861	pshufd	$128,%xmm0,%xmm3
862	cmpl	$6,%eax
863	jb	.L038ctr32_tail
864	pxor	%xmm6,%xmm7
865	shll	$4,%ecx
866	movl	$16,%ebx
867	movdqa	%xmm7,32(%esp)
868	movl	%edx,%ebp
869	subl	%ecx,%ebx
870	leal	32(%edx,%ecx,1),%edx
871	subl	$6,%eax
872	jmp	.L039ctr32_loop6
873.align	16
874.L039ctr32_loop6:
875	pshufd	$64,%xmm0,%xmm4
876	movdqa	32(%esp),%xmm0
877	pshufd	$192,%xmm1,%xmm5
878	pxor	%xmm0,%xmm2
879	pshufd	$128,%xmm1,%xmm6
880	pxor	%xmm0,%xmm3
881	pshufd	$64,%xmm1,%xmm7
882	movups	16(%ebp),%xmm1
883	pxor	%xmm0,%xmm4
884	pxor	%xmm0,%xmm5
885.byte	102,15,56,220,209
886	pxor	%xmm0,%xmm6
887	pxor	%xmm0,%xmm7
888.byte	102,15,56,220,217
889	movups	32(%ebp),%xmm0
890	movl	%ebx,%ecx
891.byte	102,15,56,220,225
892.byte	102,15,56,220,233
893.byte	102,15,56,220,241
894.byte	102,15,56,220,249
895	call	.L_aesni_encrypt6_enter
896	movups	(%esi),%xmm1
897	movups	16(%esi),%xmm0
898	xorps	%xmm1,%xmm2
899	movups	32(%esi),%xmm1
900	xorps	%xmm0,%xmm3
901	movups	%xmm2,(%edi)
902	movdqa	16(%esp),%xmm0
903	xorps	%xmm1,%xmm4
904	movdqa	64(%esp),%xmm1
905	movups	%xmm3,16(%edi)
906	movups	%xmm4,32(%edi)
907	paddd	%xmm0,%xmm1
908	paddd	48(%esp),%xmm0
909	movdqa	(%esp),%xmm2
910	movups	48(%esi),%xmm3
911	movups	64(%esi),%xmm4
912	xorps	%xmm3,%xmm5
913	movups	80(%esi),%xmm3
914	leal	96(%esi),%esi
915	movdqa	%xmm0,48(%esp)
916.byte	102,15,56,0,194
917	xorps	%xmm4,%xmm6
918	movups	%xmm5,48(%edi)
919	xorps	%xmm3,%xmm7
920	movdqa	%xmm1,64(%esp)
921.byte	102,15,56,0,202
922	movups	%xmm6,64(%edi)
923	pshufd	$192,%xmm0,%xmm2
924	movups	%xmm7,80(%edi)
925	leal	96(%edi),%edi
926	pshufd	$128,%xmm0,%xmm3
927	subl	$6,%eax
928	jnc	.L039ctr32_loop6
929	addl	$6,%eax
930	jz	.L040ctr32_ret
931	movdqu	(%ebp),%xmm7
932	movl	%ebp,%edx
933	pxor	32(%esp),%xmm7
934	movl	240(%ebp),%ecx
935.L038ctr32_tail:
936	por	%xmm7,%xmm2
937	cmpl	$2,%eax
938	jb	.L041ctr32_one
939	pshufd	$64,%xmm0,%xmm4
940	por	%xmm7,%xmm3
941	je	.L042ctr32_two
942	pshufd	$192,%xmm1,%xmm5
943	por	%xmm7,%xmm4
944	cmpl	$4,%eax
945	jb	.L043ctr32_three
946	pshufd	$128,%xmm1,%xmm6
947	por	%xmm7,%xmm5
948	je	.L044ctr32_four
949	por	%xmm7,%xmm6
950	call	_aesni_encrypt6
951	movups	(%esi),%xmm1
952	movups	16(%esi),%xmm0
953	xorps	%xmm1,%xmm2
954	movups	32(%esi),%xmm1
955	xorps	%xmm0,%xmm3
956	movups	48(%esi),%xmm0
957	xorps	%xmm1,%xmm4
958	movups	64(%esi),%xmm1
959	xorps	%xmm0,%xmm5
960	movups	%xmm2,(%edi)
961	xorps	%xmm1,%xmm6
962	movups	%xmm3,16(%edi)
963	movups	%xmm4,32(%edi)
964	movups	%xmm5,48(%edi)
965	movups	%xmm6,64(%edi)
966	jmp	.L040ctr32_ret
967.align	16
968.L037ctr32_one_shortcut:
969	movups	(%ebx),%xmm2
970	movl	240(%edx),%ecx
971.L041ctr32_one:
972	movups	(%edx),%xmm0
973	movups	16(%edx),%xmm1
974	leal	32(%edx),%edx
975	xorps	%xmm0,%xmm2
976.L045enc1_loop_7:
977.byte	102,15,56,220,209
978	decl	%ecx
979	movups	(%edx),%xmm1
980	leal	16(%edx),%edx
981	jnz	.L045enc1_loop_7
982.byte	102,15,56,221,209
983	movups	(%esi),%xmm6
984	xorps	%xmm2,%xmm6
985	movups	%xmm6,(%edi)
986	jmp	.L040ctr32_ret
987.align	16
988.L042ctr32_two:
989	call	_aesni_encrypt2
990	movups	(%esi),%xmm5
991	movups	16(%esi),%xmm6
992	xorps	%xmm5,%xmm2
993	xorps	%xmm6,%xmm3
994	movups	%xmm2,(%edi)
995	movups	%xmm3,16(%edi)
996	jmp	.L040ctr32_ret
997.align	16
998.L043ctr32_three:
999	call	_aesni_encrypt3
1000	movups	(%esi),%xmm5
1001	movups	16(%esi),%xmm6
1002	xorps	%xmm5,%xmm2
1003	movups	32(%esi),%xmm7
1004	xorps	%xmm6,%xmm3
1005	movups	%xmm2,(%edi)
1006	xorps	%xmm7,%xmm4
1007	movups	%xmm3,16(%edi)
1008	movups	%xmm4,32(%edi)
1009	jmp	.L040ctr32_ret
1010.align	16
1011.L044ctr32_four:
1012	call	_aesni_encrypt4
1013	movups	(%esi),%xmm6
1014	movups	16(%esi),%xmm7
1015	movups	32(%esi),%xmm1
1016	xorps	%xmm6,%xmm2
1017	movups	48(%esi),%xmm0
1018	xorps	%xmm7,%xmm3
1019	movups	%xmm2,(%edi)
1020	xorps	%xmm1,%xmm4
1021	movups	%xmm3,16(%edi)
1022	xorps	%xmm0,%xmm5
1023	movups	%xmm4,32(%edi)
1024	movups	%xmm5,48(%edi)
1025.L040ctr32_ret:
1026	pxor	%xmm0,%xmm0
1027	pxor	%xmm1,%xmm1
1028	pxor	%xmm2,%xmm2
1029	pxor	%xmm3,%xmm3
1030	pxor	%xmm4,%xmm4
1031	movdqa	%xmm0,32(%esp)
1032	pxor	%xmm5,%xmm5
1033	movdqa	%xmm0,48(%esp)
1034	pxor	%xmm6,%xmm6
1035	movdqa	%xmm0,64(%esp)
1036	pxor	%xmm7,%xmm7
1037	movl	80(%esp),%esp
1038	popl	%edi
1039	popl	%esi
1040	popl	%ebx
1041	popl	%ebp
1042	ret
1043.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
1044.globl	aesni_xts_encrypt
1045.type	aesni_xts_encrypt,@function
1046.align	16
1047aesni_xts_encrypt:
1048.L_aesni_xts_encrypt_begin:
1049	pushl	%ebp
1050	pushl	%ebx
1051	pushl	%esi
1052	pushl	%edi
1053	movl	36(%esp),%edx
1054	movl	40(%esp),%esi
1055	movl	240(%edx),%ecx
1056	movups	(%esi),%xmm2
1057	movups	(%edx),%xmm0
1058	movups	16(%edx),%xmm1
1059	leal	32(%edx),%edx
1060	xorps	%xmm0,%xmm2
1061.L046enc1_loop_8:
1062.byte	102,15,56,220,209
1063	decl	%ecx
1064	movups	(%edx),%xmm1
1065	leal	16(%edx),%edx
1066	jnz	.L046enc1_loop_8
1067.byte	102,15,56,221,209
1068	movl	20(%esp),%esi
1069	movl	24(%esp),%edi
1070	movl	28(%esp),%eax
1071	movl	32(%esp),%edx
1072	movl	%esp,%ebp
1073	subl	$120,%esp
1074	movl	240(%edx),%ecx
1075	andl	$-16,%esp
1076	movl	$135,96(%esp)
1077	movl	$0,100(%esp)
1078	movl	$1,104(%esp)
1079	movl	$0,108(%esp)
1080	movl	%eax,112(%esp)
1081	movl	%ebp,116(%esp)
1082	movdqa	%xmm2,%xmm1
1083	pxor	%xmm0,%xmm0
1084	movdqa	96(%esp),%xmm3
1085	pcmpgtd	%xmm1,%xmm0
1086	andl	$-16,%eax
1087	movl	%edx,%ebp
1088	movl	%ecx,%ebx
1089	subl	$96,%eax
1090	jc	.L047xts_enc_short
1091	shll	$4,%ecx
1092	movl	$16,%ebx
1093	subl	%ecx,%ebx
1094	leal	32(%edx,%ecx,1),%edx
1095	jmp	.L048xts_enc_loop6
1096.align	16
1097.L048xts_enc_loop6:
1098	pshufd	$19,%xmm0,%xmm2
1099	pxor	%xmm0,%xmm0
1100	movdqa	%xmm1,(%esp)
1101	paddq	%xmm1,%xmm1
1102	pand	%xmm3,%xmm2
1103	pcmpgtd	%xmm1,%xmm0
1104	pxor	%xmm2,%xmm1
1105	pshufd	$19,%xmm0,%xmm2
1106	pxor	%xmm0,%xmm0
1107	movdqa	%xmm1,16(%esp)
1108	paddq	%xmm1,%xmm1
1109	pand	%xmm3,%xmm2
1110	pcmpgtd	%xmm1,%xmm0
1111	pxor	%xmm2,%xmm1
1112	pshufd	$19,%xmm0,%xmm2
1113	pxor	%xmm0,%xmm0
1114	movdqa	%xmm1,32(%esp)
1115	paddq	%xmm1,%xmm1
1116	pand	%xmm3,%xmm2
1117	pcmpgtd	%xmm1,%xmm0
1118	pxor	%xmm2,%xmm1
1119	pshufd	$19,%xmm0,%xmm2
1120	pxor	%xmm0,%xmm0
1121	movdqa	%xmm1,48(%esp)
1122	paddq	%xmm1,%xmm1
1123	pand	%xmm3,%xmm2
1124	pcmpgtd	%xmm1,%xmm0
1125	pxor	%xmm2,%xmm1
1126	pshufd	$19,%xmm0,%xmm7
1127	movdqa	%xmm1,64(%esp)
1128	paddq	%xmm1,%xmm1
1129	movups	(%ebp),%xmm0
1130	pand	%xmm3,%xmm7
1131	movups	(%esi),%xmm2
1132	pxor	%xmm1,%xmm7
1133	movl	%ebx,%ecx
1134	movdqu	16(%esi),%xmm3
1135	xorps	%xmm0,%xmm2
1136	movdqu	32(%esi),%xmm4
1137	pxor	%xmm0,%xmm3
1138	movdqu	48(%esi),%xmm5
1139	pxor	%xmm0,%xmm4
1140	movdqu	64(%esi),%xmm6
1141	pxor	%xmm0,%xmm5
1142	movdqu	80(%esi),%xmm1
1143	pxor	%xmm0,%xmm6
1144	leal	96(%esi),%esi
1145	pxor	(%esp),%xmm2
1146	movdqa	%xmm7,80(%esp)
1147	pxor	%xmm1,%xmm7
1148	movups	16(%ebp),%xmm1
1149	pxor	16(%esp),%xmm3
1150	pxor	32(%esp),%xmm4
1151.byte	102,15,56,220,209
1152	pxor	48(%esp),%xmm5
1153	pxor	64(%esp),%xmm6
1154.byte	102,15,56,220,217
1155	pxor	%xmm0,%xmm7
1156	movups	32(%ebp),%xmm0
1157.byte	102,15,56,220,225
1158.byte	102,15,56,220,233
1159.byte	102,15,56,220,241
1160.byte	102,15,56,220,249
1161	call	.L_aesni_encrypt6_enter
1162	movdqa	80(%esp),%xmm1
1163	pxor	%xmm0,%xmm0
1164	xorps	(%esp),%xmm2
1165	pcmpgtd	%xmm1,%xmm0
1166	xorps	16(%esp),%xmm3
1167	movups	%xmm2,(%edi)
1168	xorps	32(%esp),%xmm4
1169	movups	%xmm3,16(%edi)
1170	xorps	48(%esp),%xmm5
1171	movups	%xmm4,32(%edi)
1172	xorps	64(%esp),%xmm6
1173	movups	%xmm5,48(%edi)
1174	xorps	%xmm1,%xmm7
1175	movups	%xmm6,64(%edi)
1176	pshufd	$19,%xmm0,%xmm2
1177	movups	%xmm7,80(%edi)
1178	leal	96(%edi),%edi
1179	movdqa	96(%esp),%xmm3
1180	pxor	%xmm0,%xmm0
1181	paddq	%xmm1,%xmm1
1182	pand	%xmm3,%xmm2
1183	pcmpgtd	%xmm1,%xmm0
1184	pxor	%xmm2,%xmm1
1185	subl	$96,%eax
1186	jnc	.L048xts_enc_loop6
1187	movl	240(%ebp),%ecx
1188	movl	%ebp,%edx
1189	movl	%ecx,%ebx
1190.L047xts_enc_short:
1191	addl	$96,%eax
1192	jz	.L049xts_enc_done6x
1193	movdqa	%xmm1,%xmm5
1194	cmpl	$32,%eax
1195	jb	.L050xts_enc_one
1196	pshufd	$19,%xmm0,%xmm2
1197	pxor	%xmm0,%xmm0
1198	paddq	%xmm1,%xmm1
1199	pand	%xmm3,%xmm2
1200	pcmpgtd	%xmm1,%xmm0
1201	pxor	%xmm2,%xmm1
1202	je	.L051xts_enc_two
1203	pshufd	$19,%xmm0,%xmm2
1204	pxor	%xmm0,%xmm0
1205	movdqa	%xmm1,%xmm6
1206	paddq	%xmm1,%xmm1
1207	pand	%xmm3,%xmm2
1208	pcmpgtd	%xmm1,%xmm0
1209	pxor	%xmm2,%xmm1
1210	cmpl	$64,%eax
1211	jb	.L052xts_enc_three
1212	pshufd	$19,%xmm0,%xmm2
1213	pxor	%xmm0,%xmm0
1214	movdqa	%xmm1,%xmm7
1215	paddq	%xmm1,%xmm1
1216	pand	%xmm3,%xmm2
1217	pcmpgtd	%xmm1,%xmm0
1218	pxor	%xmm2,%xmm1
1219	movdqa	%xmm5,(%esp)
1220	movdqa	%xmm6,16(%esp)
1221	je	.L053xts_enc_four
1222	movdqa	%xmm7,32(%esp)
1223	pshufd	$19,%xmm0,%xmm7
1224	movdqa	%xmm1,48(%esp)
1225	paddq	%xmm1,%xmm1
1226	pand	%xmm3,%xmm7
1227	pxor	%xmm1,%xmm7
1228	movdqu	(%esi),%xmm2
1229	movdqu	16(%esi),%xmm3
1230	movdqu	32(%esi),%xmm4
1231	pxor	(%esp),%xmm2
1232	movdqu	48(%esi),%xmm5
1233	pxor	16(%esp),%xmm3
1234	movdqu	64(%esi),%xmm6
1235	pxor	32(%esp),%xmm4
1236	leal	80(%esi),%esi
1237	pxor	48(%esp),%xmm5
1238	movdqa	%xmm7,64(%esp)
1239	pxor	%xmm7,%xmm6
1240	call	_aesni_encrypt6
1241	movaps	64(%esp),%xmm1
1242	xorps	(%esp),%xmm2
1243	xorps	16(%esp),%xmm3
1244	xorps	32(%esp),%xmm4
1245	movups	%xmm2,(%edi)
1246	xorps	48(%esp),%xmm5
1247	movups	%xmm3,16(%edi)
1248	xorps	%xmm1,%xmm6
1249	movups	%xmm4,32(%edi)
1250	movups	%xmm5,48(%edi)
1251	movups	%xmm6,64(%edi)
1252	leal	80(%edi),%edi
1253	jmp	.L054xts_enc_done
1254.align	16
1255.L050xts_enc_one:
1256	movups	(%esi),%xmm2
1257	leal	16(%esi),%esi
1258	xorps	%xmm5,%xmm2
1259	movups	(%edx),%xmm0
1260	movups	16(%edx),%xmm1
1261	leal	32(%edx),%edx
1262	xorps	%xmm0,%xmm2
1263.L055enc1_loop_9:
1264.byte	102,15,56,220,209
1265	decl	%ecx
1266	movups	(%edx),%xmm1
1267	leal	16(%edx),%edx
1268	jnz	.L055enc1_loop_9
1269.byte	102,15,56,221,209
1270	xorps	%xmm5,%xmm2
1271	movups	%xmm2,(%edi)
1272	leal	16(%edi),%edi
1273	movdqa	%xmm5,%xmm1
1274	jmp	.L054xts_enc_done
1275.align	16
1276.L051xts_enc_two:
1277	movaps	%xmm1,%xmm6
1278	movups	(%esi),%xmm2
1279	movups	16(%esi),%xmm3
1280	leal	32(%esi),%esi
1281	xorps	%xmm5,%xmm2
1282	xorps	%xmm6,%xmm3
1283	call	_aesni_encrypt2
1284	xorps	%xmm5,%xmm2
1285	xorps	%xmm6,%xmm3
1286	movups	%xmm2,(%edi)
1287	movups	%xmm3,16(%edi)
1288	leal	32(%edi),%edi
1289	movdqa	%xmm6,%xmm1
1290	jmp	.L054xts_enc_done
1291.align	16
1292.L052xts_enc_three:
1293	movaps	%xmm1,%xmm7
1294	movups	(%esi),%xmm2
1295	movups	16(%esi),%xmm3
1296	movups	32(%esi),%xmm4
1297	leal	48(%esi),%esi
1298	xorps	%xmm5,%xmm2
1299	xorps	%xmm6,%xmm3
1300	xorps	%xmm7,%xmm4
1301	call	_aesni_encrypt3
1302	xorps	%xmm5,%xmm2
1303	xorps	%xmm6,%xmm3
1304	xorps	%xmm7,%xmm4
1305	movups	%xmm2,(%edi)
1306	movups	%xmm3,16(%edi)
1307	movups	%xmm4,32(%edi)
1308	leal	48(%edi),%edi
1309	movdqa	%xmm7,%xmm1
1310	jmp	.L054xts_enc_done
1311.align	16
1312.L053xts_enc_four:
1313	movaps	%xmm1,%xmm6
1314	movups	(%esi),%xmm2
1315	movups	16(%esi),%xmm3
1316	movups	32(%esi),%xmm4
1317	xorps	(%esp),%xmm2
1318	movups	48(%esi),%xmm5
1319	leal	64(%esi),%esi
1320	xorps	16(%esp),%xmm3
1321	xorps	%xmm7,%xmm4
1322	xorps	%xmm6,%xmm5
1323	call	_aesni_encrypt4
1324	xorps	(%esp),%xmm2
1325	xorps	16(%esp),%xmm3
1326	xorps	%xmm7,%xmm4
1327	movups	%xmm2,(%edi)
1328	xorps	%xmm6,%xmm5
1329	movups	%xmm3,16(%edi)
1330	movups	%xmm4,32(%edi)
1331	movups	%xmm5,48(%edi)
1332	leal	64(%edi),%edi
1333	movdqa	%xmm6,%xmm1
1334	jmp	.L054xts_enc_done
1335.align	16
1336.L049xts_enc_done6x:
1337	movl	112(%esp),%eax
1338	andl	$15,%eax
1339	jz	.L056xts_enc_ret
1340	movdqa	%xmm1,%xmm5
1341	movl	%eax,112(%esp)
1342	jmp	.L057xts_enc_steal
1343.align	16
1344.L054xts_enc_done:
1345	movl	112(%esp),%eax
1346	pxor	%xmm0,%xmm0
1347	andl	$15,%eax
1348	jz	.L056xts_enc_ret
1349	pcmpgtd	%xmm1,%xmm0
1350	movl	%eax,112(%esp)
1351	pshufd	$19,%xmm0,%xmm5
1352	paddq	%xmm1,%xmm1
1353	pand	96(%esp),%xmm5
1354	pxor	%xmm1,%xmm5
1355.L057xts_enc_steal:
1356	movzbl	(%esi),%ecx
1357	movzbl	-16(%edi),%edx
1358	leal	1(%esi),%esi
1359	movb	%cl,-16(%edi)
1360	movb	%dl,(%edi)
1361	leal	1(%edi),%edi
1362	subl	$1,%eax
1363	jnz	.L057xts_enc_steal
1364	subl	112(%esp),%edi
1365	movl	%ebp,%edx
1366	movl	%ebx,%ecx
1367	movups	-16(%edi),%xmm2
1368	xorps	%xmm5,%xmm2
1369	movups	(%edx),%xmm0
1370	movups	16(%edx),%xmm1
1371	leal	32(%edx),%edx
1372	xorps	%xmm0,%xmm2
1373.L058enc1_loop_10:
1374.byte	102,15,56,220,209
1375	decl	%ecx
1376	movups	(%edx),%xmm1
1377	leal	16(%edx),%edx
1378	jnz	.L058enc1_loop_10
1379.byte	102,15,56,221,209
1380	xorps	%xmm5,%xmm2
1381	movups	%xmm2,-16(%edi)
1382.L056xts_enc_ret:
1383	pxor	%xmm0,%xmm0
1384	pxor	%xmm1,%xmm1
1385	pxor	%xmm2,%xmm2
1386	movdqa	%xmm0,(%esp)
1387	pxor	%xmm3,%xmm3
1388	movdqa	%xmm0,16(%esp)
1389	pxor	%xmm4,%xmm4
1390	movdqa	%xmm0,32(%esp)
1391	pxor	%xmm5,%xmm5
1392	movdqa	%xmm0,48(%esp)
1393	pxor	%xmm6,%xmm6
1394	movdqa	%xmm0,64(%esp)
1395	pxor	%xmm7,%xmm7
1396	movdqa	%xmm0,80(%esp)
1397	movl	116(%esp),%esp
1398	popl	%edi
1399	popl	%esi
1400	popl	%ebx
1401	popl	%ebp
1402	ret
1403.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
1404.globl	aesni_xts_decrypt
1405.type	aesni_xts_decrypt,@function
1406.align	16
1407aesni_xts_decrypt:
1408.L_aesni_xts_decrypt_begin:
1409	pushl	%ebp
1410	pushl	%ebx
1411	pushl	%esi
1412	pushl	%edi
1413	movl	36(%esp),%edx
1414	movl	40(%esp),%esi
1415	movl	240(%edx),%ecx
1416	movups	(%esi),%xmm2
1417	movups	(%edx),%xmm0
1418	movups	16(%edx),%xmm1
1419	leal	32(%edx),%edx
1420	xorps	%xmm0,%xmm2
1421.L059enc1_loop_11:
1422.byte	102,15,56,220,209
1423	decl	%ecx
1424	movups	(%edx),%xmm1
1425	leal	16(%edx),%edx
1426	jnz	.L059enc1_loop_11
1427.byte	102,15,56,221,209
1428	movl	20(%esp),%esi
1429	movl	24(%esp),%edi
1430	movl	28(%esp),%eax
1431	movl	32(%esp),%edx
1432	movl	%esp,%ebp
1433	subl	$120,%esp
1434	andl	$-16,%esp
1435	xorl	%ebx,%ebx
1436	testl	$15,%eax
1437	setnz	%bl
1438	shll	$4,%ebx
1439	subl	%ebx,%eax
1440	movl	$135,96(%esp)
1441	movl	$0,100(%esp)
1442	movl	$1,104(%esp)
1443	movl	$0,108(%esp)
1444	movl	%eax,112(%esp)
1445	movl	%ebp,116(%esp)
1446	movl	240(%edx),%ecx
1447	movl	%edx,%ebp
1448	movl	%ecx,%ebx
1449	movdqa	%xmm2,%xmm1
1450	pxor	%xmm0,%xmm0
1451	movdqa	96(%esp),%xmm3
1452	pcmpgtd	%xmm1,%xmm0
1453	andl	$-16,%eax
1454	subl	$96,%eax
1455	jc	.L060xts_dec_short
1456	shll	$4,%ecx
1457	movl	$16,%ebx
1458	subl	%ecx,%ebx
1459	leal	32(%edx,%ecx,1),%edx
1460	jmp	.L061xts_dec_loop6
1461.align	16
1462.L061xts_dec_loop6:
1463	pshufd	$19,%xmm0,%xmm2
1464	pxor	%xmm0,%xmm0
1465	movdqa	%xmm1,(%esp)
1466	paddq	%xmm1,%xmm1
1467	pand	%xmm3,%xmm2
1468	pcmpgtd	%xmm1,%xmm0
1469	pxor	%xmm2,%xmm1
1470	pshufd	$19,%xmm0,%xmm2
1471	pxor	%xmm0,%xmm0
1472	movdqa	%xmm1,16(%esp)
1473	paddq	%xmm1,%xmm1
1474	pand	%xmm3,%xmm2
1475	pcmpgtd	%xmm1,%xmm0
1476	pxor	%xmm2,%xmm1
1477	pshufd	$19,%xmm0,%xmm2
1478	pxor	%xmm0,%xmm0
1479	movdqa	%xmm1,32(%esp)
1480	paddq	%xmm1,%xmm1
1481	pand	%xmm3,%xmm2
1482	pcmpgtd	%xmm1,%xmm0
1483	pxor	%xmm2,%xmm1
1484	pshufd	$19,%xmm0,%xmm2
1485	pxor	%xmm0,%xmm0
1486	movdqa	%xmm1,48(%esp)
1487	paddq	%xmm1,%xmm1
1488	pand	%xmm3,%xmm2
1489	pcmpgtd	%xmm1,%xmm0
1490	pxor	%xmm2,%xmm1
1491	pshufd	$19,%xmm0,%xmm7
1492	movdqa	%xmm1,64(%esp)
1493	paddq	%xmm1,%xmm1
1494	movups	(%ebp),%xmm0
1495	pand	%xmm3,%xmm7
1496	movups	(%esi),%xmm2
1497	pxor	%xmm1,%xmm7
1498	movl	%ebx,%ecx
1499	movdqu	16(%esi),%xmm3
1500	xorps	%xmm0,%xmm2
1501	movdqu	32(%esi),%xmm4
1502	pxor	%xmm0,%xmm3
1503	movdqu	48(%esi),%xmm5
1504	pxor	%xmm0,%xmm4
1505	movdqu	64(%esi),%xmm6
1506	pxor	%xmm0,%xmm5
1507	movdqu	80(%esi),%xmm1
1508	pxor	%xmm0,%xmm6
1509	leal	96(%esi),%esi
1510	pxor	(%esp),%xmm2
1511	movdqa	%xmm7,80(%esp)
1512	pxor	%xmm1,%xmm7
1513	movups	16(%ebp),%xmm1
1514	pxor	16(%esp),%xmm3
1515	pxor	32(%esp),%xmm4
1516.byte	102,15,56,222,209
1517	pxor	48(%esp),%xmm5
1518	pxor	64(%esp),%xmm6
1519.byte	102,15,56,222,217
1520	pxor	%xmm0,%xmm7
1521	movups	32(%ebp),%xmm0
1522.byte	102,15,56,222,225
1523.byte	102,15,56,222,233
1524.byte	102,15,56,222,241
1525.byte	102,15,56,222,249
1526	call	.L_aesni_decrypt6_enter
1527	movdqa	80(%esp),%xmm1
1528	pxor	%xmm0,%xmm0
1529	xorps	(%esp),%xmm2
1530	pcmpgtd	%xmm1,%xmm0
1531	xorps	16(%esp),%xmm3
1532	movups	%xmm2,(%edi)
1533	xorps	32(%esp),%xmm4
1534	movups	%xmm3,16(%edi)
1535	xorps	48(%esp),%xmm5
1536	movups	%xmm4,32(%edi)
1537	xorps	64(%esp),%xmm6
1538	movups	%xmm5,48(%edi)
1539	xorps	%xmm1,%xmm7
1540	movups	%xmm6,64(%edi)
1541	pshufd	$19,%xmm0,%xmm2
1542	movups	%xmm7,80(%edi)
1543	leal	96(%edi),%edi
1544	movdqa	96(%esp),%xmm3
1545	pxor	%xmm0,%xmm0
1546	paddq	%xmm1,%xmm1
1547	pand	%xmm3,%xmm2
1548	pcmpgtd	%xmm1,%xmm0
1549	pxor	%xmm2,%xmm1
1550	subl	$96,%eax
1551	jnc	.L061xts_dec_loop6
1552	movl	240(%ebp),%ecx
1553	movl	%ebp,%edx
1554	movl	%ecx,%ebx
1555.L060xts_dec_short:
1556	addl	$96,%eax
1557	jz	.L062xts_dec_done6x
1558	movdqa	%xmm1,%xmm5
1559	cmpl	$32,%eax
1560	jb	.L063xts_dec_one
1561	pshufd	$19,%xmm0,%xmm2
1562	pxor	%xmm0,%xmm0
1563	paddq	%xmm1,%xmm1
1564	pand	%xmm3,%xmm2
1565	pcmpgtd	%xmm1,%xmm0
1566	pxor	%xmm2,%xmm1
1567	je	.L064xts_dec_two
1568	pshufd	$19,%xmm0,%xmm2
1569	pxor	%xmm0,%xmm0
1570	movdqa	%xmm1,%xmm6
1571	paddq	%xmm1,%xmm1
1572	pand	%xmm3,%xmm2
1573	pcmpgtd	%xmm1,%xmm0
1574	pxor	%xmm2,%xmm1
1575	cmpl	$64,%eax
1576	jb	.L065xts_dec_three
1577	pshufd	$19,%xmm0,%xmm2
1578	pxor	%xmm0,%xmm0
1579	movdqa	%xmm1,%xmm7
1580	paddq	%xmm1,%xmm1
1581	pand	%xmm3,%xmm2
1582	pcmpgtd	%xmm1,%xmm0
1583	pxor	%xmm2,%xmm1
1584	movdqa	%xmm5,(%esp)
1585	movdqa	%xmm6,16(%esp)
1586	je	.L066xts_dec_four
1587	movdqa	%xmm7,32(%esp)
1588	pshufd	$19,%xmm0,%xmm7
1589	movdqa	%xmm1,48(%esp)
1590	paddq	%xmm1,%xmm1
1591	pand	%xmm3,%xmm7
1592	pxor	%xmm1,%xmm7
1593	movdqu	(%esi),%xmm2
1594	movdqu	16(%esi),%xmm3
1595	movdqu	32(%esi),%xmm4
1596	pxor	(%esp),%xmm2
1597	movdqu	48(%esi),%xmm5
1598	pxor	16(%esp),%xmm3
1599	movdqu	64(%esi),%xmm6
1600	pxor	32(%esp),%xmm4
1601	leal	80(%esi),%esi
1602	pxor	48(%esp),%xmm5
1603	movdqa	%xmm7,64(%esp)
1604	pxor	%xmm7,%xmm6
1605	call	_aesni_decrypt6
1606	movaps	64(%esp),%xmm1
1607	xorps	(%esp),%xmm2
1608	xorps	16(%esp),%xmm3
1609	xorps	32(%esp),%xmm4
1610	movups	%xmm2,(%edi)
1611	xorps	48(%esp),%xmm5
1612	movups	%xmm3,16(%edi)
1613	xorps	%xmm1,%xmm6
1614	movups	%xmm4,32(%edi)
1615	movups	%xmm5,48(%edi)
1616	movups	%xmm6,64(%edi)
1617	leal	80(%edi),%edi
1618	jmp	.L067xts_dec_done
1619.align	16
1620.L063xts_dec_one:
1621	movups	(%esi),%xmm2
1622	leal	16(%esi),%esi
1623	xorps	%xmm5,%xmm2
1624	movups	(%edx),%xmm0
1625	movups	16(%edx),%xmm1
1626	leal	32(%edx),%edx
1627	xorps	%xmm0,%xmm2
1628.L068dec1_loop_12:
1629.byte	102,15,56,222,209
1630	decl	%ecx
1631	movups	(%edx),%xmm1
1632	leal	16(%edx),%edx
1633	jnz	.L068dec1_loop_12
1634.byte	102,15,56,223,209
1635	xorps	%xmm5,%xmm2
1636	movups	%xmm2,(%edi)
1637	leal	16(%edi),%edi
1638	movdqa	%xmm5,%xmm1
1639	jmp	.L067xts_dec_done
1640.align	16
1641.L064xts_dec_two:
1642	movaps	%xmm1,%xmm6
1643	movups	(%esi),%xmm2
1644	movups	16(%esi),%xmm3
1645	leal	32(%esi),%esi
1646	xorps	%xmm5,%xmm2
1647	xorps	%xmm6,%xmm3
1648	call	_aesni_decrypt2
1649	xorps	%xmm5,%xmm2
1650	xorps	%xmm6,%xmm3
1651	movups	%xmm2,(%edi)
1652	movups	%xmm3,16(%edi)
1653	leal	32(%edi),%edi
1654	movdqa	%xmm6,%xmm1
1655	jmp	.L067xts_dec_done
1656.align	16
1657.L065xts_dec_three:
1658	movaps	%xmm1,%xmm7
1659	movups	(%esi),%xmm2
1660	movups	16(%esi),%xmm3
1661	movups	32(%esi),%xmm4
1662	leal	48(%esi),%esi
1663	xorps	%xmm5,%xmm2
1664	xorps	%xmm6,%xmm3
1665	xorps	%xmm7,%xmm4
1666	call	_aesni_decrypt3
1667	xorps	%xmm5,%xmm2
1668	xorps	%xmm6,%xmm3
1669	xorps	%xmm7,%xmm4
1670	movups	%xmm2,(%edi)
1671	movups	%xmm3,16(%edi)
1672	movups	%xmm4,32(%edi)
1673	leal	48(%edi),%edi
1674	movdqa	%xmm7,%xmm1
1675	jmp	.L067xts_dec_done
1676.align	16
1677.L066xts_dec_four:
1678	movaps	%xmm1,%xmm6
1679	movups	(%esi),%xmm2
1680	movups	16(%esi),%xmm3
1681	movups	32(%esi),%xmm4
1682	xorps	(%esp),%xmm2
1683	movups	48(%esi),%xmm5
1684	leal	64(%esi),%esi
1685	xorps	16(%esp),%xmm3
1686	xorps	%xmm7,%xmm4
1687	xorps	%xmm6,%xmm5
1688	call	_aesni_decrypt4
1689	xorps	(%esp),%xmm2
1690	xorps	16(%esp),%xmm3
1691	xorps	%xmm7,%xmm4
1692	movups	%xmm2,(%edi)
1693	xorps	%xmm6,%xmm5
1694	movups	%xmm3,16(%edi)
1695	movups	%xmm4,32(%edi)
1696	movups	%xmm5,48(%edi)
1697	leal	64(%edi),%edi
1698	movdqa	%xmm6,%xmm1
1699	jmp	.L067xts_dec_done
1700.align	16
1701.L062xts_dec_done6x:
1702	movl	112(%esp),%eax
1703	andl	$15,%eax
1704	jz	.L069xts_dec_ret
1705	movl	%eax,112(%esp)
1706	jmp	.L070xts_dec_only_one_more
1707.align	16
1708.L067xts_dec_done:
1709	movl	112(%esp),%eax
1710	pxor	%xmm0,%xmm0
1711	andl	$15,%eax
1712	jz	.L069xts_dec_ret
1713	pcmpgtd	%xmm1,%xmm0
1714	movl	%eax,112(%esp)
1715	pshufd	$19,%xmm0,%xmm2
1716	pxor	%xmm0,%xmm0
1717	movdqa	96(%esp),%xmm3
1718	paddq	%xmm1,%xmm1
1719	pand	%xmm3,%xmm2
1720	pcmpgtd	%xmm1,%xmm0
1721	pxor	%xmm2,%xmm1
1722.L070xts_dec_only_one_more:
1723	pshufd	$19,%xmm0,%xmm5
1724	movdqa	%xmm1,%xmm6
1725	paddq	%xmm1,%xmm1
1726	pand	%xmm3,%xmm5
1727	pxor	%xmm1,%xmm5
1728	movl	%ebp,%edx
1729	movl	%ebx,%ecx
1730	movups	(%esi),%xmm2
1731	xorps	%xmm5,%xmm2
1732	movups	(%edx),%xmm0
1733	movups	16(%edx),%xmm1
1734	leal	32(%edx),%edx
1735	xorps	%xmm0,%xmm2
1736.L071dec1_loop_13:
1737.byte	102,15,56,222,209
1738	decl	%ecx
1739	movups	(%edx),%xmm1
1740	leal	16(%edx),%edx
1741	jnz	.L071dec1_loop_13
1742.byte	102,15,56,223,209
1743	xorps	%xmm5,%xmm2
1744	movups	%xmm2,(%edi)
1745.L072xts_dec_steal:
1746	movzbl	16(%esi),%ecx
1747	movzbl	(%edi),%edx
1748	leal	1(%esi),%esi
1749	movb	%cl,(%edi)
1750	movb	%dl,16(%edi)
1751	leal	1(%edi),%edi
1752	subl	$1,%eax
1753	jnz	.L072xts_dec_steal
1754	subl	112(%esp),%edi
1755	movl	%ebp,%edx
1756	movl	%ebx,%ecx
1757	movups	(%edi),%xmm2
1758	xorps	%xmm6,%xmm2
1759	movups	(%edx),%xmm0
1760	movups	16(%edx),%xmm1
1761	leal	32(%edx),%edx
1762	xorps	%xmm0,%xmm2
1763.L073dec1_loop_14:
1764.byte	102,15,56,222,209
1765	decl	%ecx
1766	movups	(%edx),%xmm1
1767	leal	16(%edx),%edx
1768	jnz	.L073dec1_loop_14
1769.byte	102,15,56,223,209
1770	xorps	%xmm6,%xmm2
1771	movups	%xmm2,(%edi)
1772.L069xts_dec_ret:
1773	pxor	%xmm0,%xmm0
1774	pxor	%xmm1,%xmm1
1775	pxor	%xmm2,%xmm2
1776	movdqa	%xmm0,(%esp)
1777	pxor	%xmm3,%xmm3
1778	movdqa	%xmm0,16(%esp)
1779	pxor	%xmm4,%xmm4
1780	movdqa	%xmm0,32(%esp)
1781	pxor	%xmm5,%xmm5
1782	movdqa	%xmm0,48(%esp)
1783	pxor	%xmm6,%xmm6
1784	movdqa	%xmm0,64(%esp)
1785	pxor	%xmm7,%xmm7
1786	movdqa	%xmm0,80(%esp)
1787	movl	116(%esp),%esp
1788	popl	%edi
1789	popl	%esi
1790	popl	%ebx
1791	popl	%ebp
1792	ret
1793.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
1794.globl	aesni_ocb_encrypt
1795.type	aesni_ocb_encrypt,@function
1796.align	16
1797aesni_ocb_encrypt:
1798.L_aesni_ocb_encrypt_begin:
1799	pushl	%ebp
1800	pushl	%ebx
1801	pushl	%esi
1802	pushl	%edi
1803	movl	40(%esp),%ecx
1804	movl	48(%esp),%ebx
1805	movl	20(%esp),%esi
1806	movl	24(%esp),%edi
1807	movl	28(%esp),%eax
1808	movl	32(%esp),%edx
1809	movdqu	(%ecx),%xmm0
1810	movl	36(%esp),%ebp
1811	movdqu	(%ebx),%xmm1
1812	movl	44(%esp),%ebx
1813	movl	%esp,%ecx
1814	subl	$132,%esp
1815	andl	$-16,%esp
1816	subl	%esi,%edi
1817	shll	$4,%eax
1818	leal	-96(%esi,%eax,1),%eax
1819	movl	%edi,120(%esp)
1820	movl	%eax,124(%esp)
1821	movl	%ecx,128(%esp)
1822	movl	240(%edx),%ecx
1823	testl	$1,%ebp
1824	jnz	.L074odd
1825	bsfl	%ebp,%eax
1826	addl	$1,%ebp
1827	shll	$4,%eax
1828	movdqu	(%ebx,%eax,1),%xmm7
1829	movl	%edx,%eax
1830	movdqu	(%esi),%xmm2
1831	leal	16(%esi),%esi
1832	pxor	%xmm0,%xmm7
1833	pxor	%xmm2,%xmm1
1834	pxor	%xmm7,%xmm2
1835	movdqa	%xmm1,%xmm6
1836	movups	(%edx),%xmm0
1837	movups	16(%edx),%xmm1
1838	leal	32(%edx),%edx
1839	xorps	%xmm0,%xmm2
1840.L075enc1_loop_15:
1841.byte	102,15,56,220,209
1842	decl	%ecx
1843	movups	(%edx),%xmm1
1844	leal	16(%edx),%edx
1845	jnz	.L075enc1_loop_15
1846.byte	102,15,56,221,209
1847	xorps	%xmm7,%xmm2
1848	movdqa	%xmm7,%xmm0
1849	movdqa	%xmm6,%xmm1
1850	movups	%xmm2,-16(%edi,%esi,1)
1851	movl	240(%eax),%ecx
1852	movl	%eax,%edx
1853	movl	124(%esp),%eax
1854.L074odd:
1855	shll	$4,%ecx
1856	movl	$16,%edi
1857	subl	%ecx,%edi
1858	movl	%edx,112(%esp)
1859	leal	32(%edx,%ecx,1),%edx
1860	movl	%edi,116(%esp)
1861	cmpl	%eax,%esi
1862	ja	.L076short
1863	jmp	.L077grandloop
1864.align	32
1865.L077grandloop:
1866	leal	1(%ebp),%ecx
1867	leal	3(%ebp),%eax
1868	leal	5(%ebp),%edi
1869	addl	$6,%ebp
1870	bsfl	%ecx,%ecx
1871	bsfl	%eax,%eax
1872	bsfl	%edi,%edi
1873	shll	$4,%ecx
1874	shll	$4,%eax
1875	shll	$4,%edi
1876	movdqu	(%ebx),%xmm2
1877	movdqu	(%ebx,%ecx,1),%xmm3
1878	movl	116(%esp),%ecx
1879	movdqa	%xmm2,%xmm4
1880	movdqu	(%ebx,%eax,1),%xmm5
1881	movdqa	%xmm2,%xmm6
1882	movdqu	(%ebx,%edi,1),%xmm7
1883	pxor	%xmm0,%xmm2
1884	pxor	%xmm2,%xmm3
1885	movdqa	%xmm2,(%esp)
1886	pxor	%xmm3,%xmm4
1887	movdqa	%xmm3,16(%esp)
1888	pxor	%xmm4,%xmm5
1889	movdqa	%xmm4,32(%esp)
1890	pxor	%xmm5,%xmm6
1891	movdqa	%xmm5,48(%esp)
1892	pxor	%xmm6,%xmm7
1893	movdqa	%xmm6,64(%esp)
1894	movdqa	%xmm7,80(%esp)
1895	movups	-48(%edx,%ecx,1),%xmm0
1896	movdqu	(%esi),%xmm2
1897	movdqu	16(%esi),%xmm3
1898	movdqu	32(%esi),%xmm4
1899	movdqu	48(%esi),%xmm5
1900	movdqu	64(%esi),%xmm6
1901	movdqu	80(%esi),%xmm7
1902	leal	96(%esi),%esi
1903	pxor	%xmm2,%xmm1
1904	pxor	%xmm0,%xmm2
1905	pxor	%xmm3,%xmm1
1906	pxor	%xmm0,%xmm3
1907	pxor	%xmm4,%xmm1
1908	pxor	%xmm0,%xmm4
1909	pxor	%xmm5,%xmm1
1910	pxor	%xmm0,%xmm5
1911	pxor	%xmm6,%xmm1
1912	pxor	%xmm0,%xmm6
1913	pxor	%xmm7,%xmm1
1914	pxor	%xmm0,%xmm7
1915	movdqa	%xmm1,96(%esp)
1916	movups	-32(%edx,%ecx,1),%xmm1
1917	pxor	(%esp),%xmm2
1918	pxor	16(%esp),%xmm3
1919	pxor	32(%esp),%xmm4
1920	pxor	48(%esp),%xmm5
1921	pxor	64(%esp),%xmm6
1922	pxor	80(%esp),%xmm7
1923	movups	-16(%edx,%ecx,1),%xmm0
1924.byte	102,15,56,220,209
1925.byte	102,15,56,220,217
1926.byte	102,15,56,220,225
1927.byte	102,15,56,220,233
1928.byte	102,15,56,220,241
1929.byte	102,15,56,220,249
1930	movl	120(%esp),%edi
1931	movl	124(%esp),%eax
1932	call	.L_aesni_encrypt6_enter
1933	movdqa	80(%esp),%xmm0
1934	pxor	(%esp),%xmm2
1935	pxor	16(%esp),%xmm3
1936	pxor	32(%esp),%xmm4
1937	pxor	48(%esp),%xmm5
1938	pxor	64(%esp),%xmm6
1939	pxor	%xmm0,%xmm7
1940	movdqa	96(%esp),%xmm1
1941	movdqu	%xmm2,-96(%edi,%esi,1)
1942	movdqu	%xmm3,-80(%edi,%esi,1)
1943	movdqu	%xmm4,-64(%edi,%esi,1)
1944	movdqu	%xmm5,-48(%edi,%esi,1)
1945	movdqu	%xmm6,-32(%edi,%esi,1)
1946	movdqu	%xmm7,-16(%edi,%esi,1)
1947	cmpl	%eax,%esi
1948	jbe	.L077grandloop
1949.L076short:
1950	addl	$96,%eax
1951	subl	%esi,%eax
1952	jz	.L078done
1953	cmpl	$32,%eax
1954	jb	.L079one
1955	je	.L080two
1956	cmpl	$64,%eax
1957	jb	.L081three
1958	je	.L082four
1959	leal	1(%ebp),%ecx
1960	leal	3(%ebp),%eax
1961	bsfl	%ecx,%ecx
1962	bsfl	%eax,%eax
1963	shll	$4,%ecx
1964	shll	$4,%eax
1965	movdqu	(%ebx),%xmm2
1966	movdqu	(%ebx,%ecx,1),%xmm3
1967	movl	116(%esp),%ecx
1968	movdqa	%xmm2,%xmm4
1969	movdqu	(%ebx,%eax,1),%xmm5
1970	movdqa	%xmm2,%xmm6
1971	pxor	%xmm0,%xmm2
1972	pxor	%xmm2,%xmm3
1973	movdqa	%xmm2,(%esp)
1974	pxor	%xmm3,%xmm4
1975	movdqa	%xmm3,16(%esp)
1976	pxor	%xmm4,%xmm5
1977	movdqa	%xmm4,32(%esp)
1978	pxor	%xmm5,%xmm6
1979	movdqa	%xmm5,48(%esp)
1980	pxor	%xmm6,%xmm7
1981	movdqa	%xmm6,64(%esp)
1982	movups	-48(%edx,%ecx,1),%xmm0
1983	movdqu	(%esi),%xmm2
1984	movdqu	16(%esi),%xmm3
1985	movdqu	32(%esi),%xmm4
1986	movdqu	48(%esi),%xmm5
1987	movdqu	64(%esi),%xmm6
1988	pxor	%xmm7,%xmm7
1989	pxor	%xmm2,%xmm1
1990	pxor	%xmm0,%xmm2
1991	pxor	%xmm3,%xmm1
1992	pxor	%xmm0,%xmm3
1993	pxor	%xmm4,%xmm1
1994	pxor	%xmm0,%xmm4
1995	pxor	%xmm5,%xmm1
1996	pxor	%xmm0,%xmm5
1997	pxor	%xmm6,%xmm1
1998	pxor	%xmm0,%xmm6
1999	movdqa	%xmm1,96(%esp)
2000	movups	-32(%edx,%ecx,1),%xmm1
2001	pxor	(%esp),%xmm2
2002	pxor	16(%esp),%xmm3
2003	pxor	32(%esp),%xmm4
2004	pxor	48(%esp),%xmm5
2005	pxor	64(%esp),%xmm6
2006	movups	-16(%edx,%ecx,1),%xmm0
2007.byte	102,15,56,220,209
2008.byte	102,15,56,220,217
2009.byte	102,15,56,220,225
2010.byte	102,15,56,220,233
2011.byte	102,15,56,220,241
2012.byte	102,15,56,220,249
2013	movl	120(%esp),%edi
2014	call	.L_aesni_encrypt6_enter
2015	movdqa	64(%esp),%xmm0
2016	pxor	(%esp),%xmm2
2017	pxor	16(%esp),%xmm3
2018	pxor	32(%esp),%xmm4
2019	pxor	48(%esp),%xmm5
2020	pxor	%xmm0,%xmm6
2021	movdqa	96(%esp),%xmm1
2022	movdqu	%xmm2,(%edi,%esi,1)
2023	movdqu	%xmm3,16(%edi,%esi,1)
2024	movdqu	%xmm4,32(%edi,%esi,1)
2025	movdqu	%xmm5,48(%edi,%esi,1)
2026	movdqu	%xmm6,64(%edi,%esi,1)
2027	jmp	.L078done
2028.align	16
2029.L079one:
2030	movdqu	(%ebx),%xmm7
2031	movl	112(%esp),%edx
2032	movdqu	(%esi),%xmm2
2033	movl	240(%edx),%ecx
2034	pxor	%xmm0,%xmm7
2035	pxor	%xmm2,%xmm1
2036	pxor	%xmm7,%xmm2
2037	movdqa	%xmm1,%xmm6
2038	movl	120(%esp),%edi
2039	movups	(%edx),%xmm0
2040	movups	16(%edx),%xmm1
2041	leal	32(%edx),%edx
2042	xorps	%xmm0,%xmm2
2043.L083enc1_loop_16:
2044.byte	102,15,56,220,209
2045	decl	%ecx
2046	movups	(%edx),%xmm1
2047	leal	16(%edx),%edx
2048	jnz	.L083enc1_loop_16
2049.byte	102,15,56,221,209
2050	xorps	%xmm7,%xmm2
2051	movdqa	%xmm7,%xmm0
2052	movdqa	%xmm6,%xmm1
2053	movups	%xmm2,(%edi,%esi,1)
2054	jmp	.L078done
2055.align	16
2056.L080two:
2057	leal	1(%ebp),%ecx
2058	movl	112(%esp),%edx
2059	bsfl	%ecx,%ecx
2060	shll	$4,%ecx
2061	movdqu	(%ebx),%xmm6
2062	movdqu	(%ebx,%ecx,1),%xmm7
2063	movdqu	(%esi),%xmm2
2064	movdqu	16(%esi),%xmm3
2065	movl	240(%edx),%ecx
2066	pxor	%xmm0,%xmm6
2067	pxor	%xmm6,%xmm7
2068	pxor	%xmm2,%xmm1
2069	pxor	%xmm6,%xmm2
2070	pxor	%xmm3,%xmm1
2071	pxor	%xmm7,%xmm3
2072	movdqa	%xmm1,%xmm5
2073	movl	120(%esp),%edi
2074	call	_aesni_encrypt2
2075	xorps	%xmm6,%xmm2
2076	xorps	%xmm7,%xmm3
2077	movdqa	%xmm7,%xmm0
2078	movdqa	%xmm5,%xmm1
2079	movups	%xmm2,(%edi,%esi,1)
2080	movups	%xmm3,16(%edi,%esi,1)
2081	jmp	.L078done
2082.align	16
2083.L081three:
2084	leal	1(%ebp),%ecx
2085	movl	112(%esp),%edx
2086	bsfl	%ecx,%ecx
2087	shll	$4,%ecx
2088	movdqu	(%ebx),%xmm5
2089	movdqu	(%ebx,%ecx,1),%xmm6
2090	movdqa	%xmm5,%xmm7
2091	movdqu	(%esi),%xmm2
2092	movdqu	16(%esi),%xmm3
2093	movdqu	32(%esi),%xmm4
2094	movl	240(%edx),%ecx
2095	pxor	%xmm0,%xmm5
2096	pxor	%xmm5,%xmm6
2097	pxor	%xmm6,%xmm7
2098	pxor	%xmm2,%xmm1
2099	pxor	%xmm5,%xmm2
2100	pxor	%xmm3,%xmm1
2101	pxor	%xmm6,%xmm3
2102	pxor	%xmm4,%xmm1
2103	pxor	%xmm7,%xmm4
2104	movdqa	%xmm1,96(%esp)
2105	movl	120(%esp),%edi
2106	call	_aesni_encrypt3
2107	xorps	%xmm5,%xmm2
2108	xorps	%xmm6,%xmm3
2109	xorps	%xmm7,%xmm4
2110	movdqa	%xmm7,%xmm0
2111	movdqa	96(%esp),%xmm1
2112	movups	%xmm2,(%edi,%esi,1)
2113	movups	%xmm3,16(%edi,%esi,1)
2114	movups	%xmm4,32(%edi,%esi,1)
2115	jmp	.L078done
2116.align	16
2117.L082four:
2118	leal	1(%ebp),%ecx
2119	leal	3(%ebp),%eax
2120	bsfl	%ecx,%ecx
2121	bsfl	%eax,%eax
2122	movl	112(%esp),%edx
2123	shll	$4,%ecx
2124	shll	$4,%eax
2125	movdqu	(%ebx),%xmm4
2126	movdqu	(%ebx,%ecx,1),%xmm5
2127	movdqa	%xmm4,%xmm6
2128	movdqu	(%ebx,%eax,1),%xmm7
2129	pxor	%xmm0,%xmm4
2130	movdqu	(%esi),%xmm2
2131	pxor	%xmm4,%xmm5
2132	movdqu	16(%esi),%xmm3
2133	pxor	%xmm5,%xmm6
2134	movdqa	%xmm4,(%esp)
2135	pxor	%xmm6,%xmm7
2136	movdqa	%xmm5,16(%esp)
2137	movdqu	32(%esi),%xmm4
2138	movdqu	48(%esi),%xmm5
2139	movl	240(%edx),%ecx
2140	pxor	%xmm2,%xmm1
2141	pxor	(%esp),%xmm2
2142	pxor	%xmm3,%xmm1
2143	pxor	16(%esp),%xmm3
2144	pxor	%xmm4,%xmm1
2145	pxor	%xmm6,%xmm4
2146	pxor	%xmm5,%xmm1
2147	pxor	%xmm7,%xmm5
2148	movdqa	%xmm1,96(%esp)
2149	movl	120(%esp),%edi
2150	call	_aesni_encrypt4
2151	xorps	(%esp),%xmm2
2152	xorps	16(%esp),%xmm3
2153	xorps	%xmm6,%xmm4
2154	movups	%xmm2,(%edi,%esi,1)
2155	xorps	%xmm7,%xmm5
2156	movups	%xmm3,16(%edi,%esi,1)
2157	movdqa	%xmm7,%xmm0
2158	movups	%xmm4,32(%edi,%esi,1)
2159	movdqa	96(%esp),%xmm1
2160	movups	%xmm5,48(%edi,%esi,1)
2161.L078done:
2162	movl	128(%esp),%edx
2163	pxor	%xmm2,%xmm2
2164	pxor	%xmm3,%xmm3
2165	movdqa	%xmm2,(%esp)
2166	pxor	%xmm4,%xmm4
2167	movdqa	%xmm2,16(%esp)
2168	pxor	%xmm5,%xmm5
2169	movdqa	%xmm2,32(%esp)
2170	pxor	%xmm6,%xmm6
2171	movdqa	%xmm2,48(%esp)
2172	pxor	%xmm7,%xmm7
2173	movdqa	%xmm2,64(%esp)
2174	movdqa	%xmm2,80(%esp)
2175	movdqa	%xmm2,96(%esp)
2176	leal	(%edx),%esp
2177	movl	40(%esp),%ecx
2178	movl	48(%esp),%ebx
2179	movdqu	%xmm0,(%ecx)
2180	pxor	%xmm0,%xmm0
2181	movdqu	%xmm1,(%ebx)
2182	pxor	%xmm1,%xmm1
2183	popl	%edi
2184	popl	%esi
2185	popl	%ebx
2186	popl	%ebp
2187	ret
2188.size	aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin
2189.globl	aesni_ocb_decrypt
2190.type	aesni_ocb_decrypt,@function
2191.align	16
2192aesni_ocb_decrypt:
2193.L_aesni_ocb_decrypt_begin:
2194	pushl	%ebp
2195	pushl	%ebx
2196	pushl	%esi
2197	pushl	%edi
2198	movl	40(%esp),%ecx
2199	movl	48(%esp),%ebx
2200	movl	20(%esp),%esi
2201	movl	24(%esp),%edi
2202	movl	28(%esp),%eax
2203	movl	32(%esp),%edx
2204	movdqu	(%ecx),%xmm0
2205	movl	36(%esp),%ebp
2206	movdqu	(%ebx),%xmm1
2207	movl	44(%esp),%ebx
2208	movl	%esp,%ecx
2209	subl	$132,%esp
2210	andl	$-16,%esp
2211	subl	%esi,%edi
2212	shll	$4,%eax
2213	leal	-96(%esi,%eax,1),%eax
2214	movl	%edi,120(%esp)
2215	movl	%eax,124(%esp)
2216	movl	%ecx,128(%esp)
2217	movl	240(%edx),%ecx
2218	testl	$1,%ebp
2219	jnz	.L084odd
2220	bsfl	%ebp,%eax
2221	addl	$1,%ebp
2222	shll	$4,%eax
2223	movdqu	(%ebx,%eax,1),%xmm7
2224	movl	%edx,%eax
2225	movdqu	(%esi),%xmm2
2226	leal	16(%esi),%esi
2227	pxor	%xmm0,%xmm7
2228	pxor	%xmm7,%xmm2
2229	movdqa	%xmm1,%xmm6
2230	movups	(%edx),%xmm0
2231	movups	16(%edx),%xmm1
2232	leal	32(%edx),%edx
2233	xorps	%xmm0,%xmm2
2234.L085dec1_loop_17:
2235.byte	102,15,56,222,209
2236	decl	%ecx
2237	movups	(%edx),%xmm1
2238	leal	16(%edx),%edx
2239	jnz	.L085dec1_loop_17
2240.byte	102,15,56,223,209
2241	xorps	%xmm7,%xmm2
2242	movaps	%xmm6,%xmm1
2243	movdqa	%xmm7,%xmm0
2244	xorps	%xmm2,%xmm1
2245	movups	%xmm2,-16(%edi,%esi,1)
2246	movl	240(%eax),%ecx
2247	movl	%eax,%edx
2248	movl	124(%esp),%eax
2249.L084odd:
2250	shll	$4,%ecx
2251	movl	$16,%edi
2252	subl	%ecx,%edi
2253	movl	%edx,112(%esp)
2254	leal	32(%edx,%ecx,1),%edx
2255	movl	%edi,116(%esp)
2256	cmpl	%eax,%esi
2257	ja	.L086short
2258	jmp	.L087grandloop
2259.align	32
2260.L087grandloop:
2261	leal	1(%ebp),%ecx
2262	leal	3(%ebp),%eax
2263	leal	5(%ebp),%edi
2264	addl	$6,%ebp
2265	bsfl	%ecx,%ecx
2266	bsfl	%eax,%eax
2267	bsfl	%edi,%edi
2268	shll	$4,%ecx
2269	shll	$4,%eax
2270	shll	$4,%edi
2271	movdqu	(%ebx),%xmm2
2272	movdqu	(%ebx,%ecx,1),%xmm3
2273	movl	116(%esp),%ecx
2274	movdqa	%xmm2,%xmm4
2275	movdqu	(%ebx,%eax,1),%xmm5
2276	movdqa	%xmm2,%xmm6
2277	movdqu	(%ebx,%edi,1),%xmm7
2278	pxor	%xmm0,%xmm2
2279	pxor	%xmm2,%xmm3
2280	movdqa	%xmm2,(%esp)
2281	pxor	%xmm3,%xmm4
2282	movdqa	%xmm3,16(%esp)
2283	pxor	%xmm4,%xmm5
2284	movdqa	%xmm4,32(%esp)
2285	pxor	%xmm5,%xmm6
2286	movdqa	%xmm5,48(%esp)
2287	pxor	%xmm6,%xmm7
2288	movdqa	%xmm6,64(%esp)
2289	movdqa	%xmm7,80(%esp)
2290	movups	-48(%edx,%ecx,1),%xmm0
2291	movdqu	(%esi),%xmm2
2292	movdqu	16(%esi),%xmm3
2293	movdqu	32(%esi),%xmm4
2294	movdqu	48(%esi),%xmm5
2295	movdqu	64(%esi),%xmm6
2296	movdqu	80(%esi),%xmm7
2297	leal	96(%esi),%esi
2298	movdqa	%xmm1,96(%esp)
2299	pxor	%xmm0,%xmm2
2300	pxor	%xmm0,%xmm3
2301	pxor	%xmm0,%xmm4
2302	pxor	%xmm0,%xmm5
2303	pxor	%xmm0,%xmm6
2304	pxor	%xmm0,%xmm7
2305	movups	-32(%edx,%ecx,1),%xmm1
2306	pxor	(%esp),%xmm2
2307	pxor	16(%esp),%xmm3
2308	pxor	32(%esp),%xmm4
2309	pxor	48(%esp),%xmm5
2310	pxor	64(%esp),%xmm6
2311	pxor	80(%esp),%xmm7
2312	movups	-16(%edx,%ecx,1),%xmm0
2313.byte	102,15,56,222,209
2314.byte	102,15,56,222,217
2315.byte	102,15,56,222,225
2316.byte	102,15,56,222,233
2317.byte	102,15,56,222,241
2318.byte	102,15,56,222,249
2319	movl	120(%esp),%edi
2320	movl	124(%esp),%eax
2321	call	.L_aesni_decrypt6_enter
2322	movdqa	80(%esp),%xmm0
2323	pxor	(%esp),%xmm2
2324	movdqa	96(%esp),%xmm1
2325	pxor	16(%esp),%xmm3
2326	pxor	32(%esp),%xmm4
2327	pxor	48(%esp),%xmm5
2328	pxor	64(%esp),%xmm6
2329	pxor	%xmm0,%xmm7
2330	pxor	%xmm2,%xmm1
2331	movdqu	%xmm2,-96(%edi,%esi,1)
2332	pxor	%xmm3,%xmm1
2333	movdqu	%xmm3,-80(%edi,%esi,1)
2334	pxor	%xmm4,%xmm1
2335	movdqu	%xmm4,-64(%edi,%esi,1)
2336	pxor	%xmm5,%xmm1
2337	movdqu	%xmm5,-48(%edi,%esi,1)
2338	pxor	%xmm6,%xmm1
2339	movdqu	%xmm6,-32(%edi,%esi,1)
2340	pxor	%xmm7,%xmm1
2341	movdqu	%xmm7,-16(%edi,%esi,1)
2342	cmpl	%eax,%esi
2343	jbe	.L087grandloop
2344.L086short:
2345	addl	$96,%eax
2346	subl	%esi,%eax
2347	jz	.L088done
2348	cmpl	$32,%eax
2349	jb	.L089one
2350	je	.L090two
2351	cmpl	$64,%eax
2352	jb	.L091three
2353	je	.L092four
2354	leal	1(%ebp),%ecx
2355	leal	3(%ebp),%eax
2356	bsfl	%ecx,%ecx
2357	bsfl	%eax,%eax
2358	shll	$4,%ecx
2359	shll	$4,%eax
2360	movdqu	(%ebx),%xmm2
2361	movdqu	(%ebx,%ecx,1),%xmm3
2362	movl	116(%esp),%ecx
2363	movdqa	%xmm2,%xmm4
2364	movdqu	(%ebx,%eax,1),%xmm5
2365	movdqa	%xmm2,%xmm6
2366	pxor	%xmm0,%xmm2
2367	pxor	%xmm2,%xmm3
2368	movdqa	%xmm2,(%esp)
2369	pxor	%xmm3,%xmm4
2370	movdqa	%xmm3,16(%esp)
2371	pxor	%xmm4,%xmm5
2372	movdqa	%xmm4,32(%esp)
2373	pxor	%xmm5,%xmm6
2374	movdqa	%xmm5,48(%esp)
2375	pxor	%xmm6,%xmm7
2376	movdqa	%xmm6,64(%esp)
2377	movups	-48(%edx,%ecx,1),%xmm0
2378	movdqu	(%esi),%xmm2
2379	movdqu	16(%esi),%xmm3
2380	movdqu	32(%esi),%xmm4
2381	movdqu	48(%esi),%xmm5
2382	movdqu	64(%esi),%xmm6
2383	pxor	%xmm7,%xmm7
2384	movdqa	%xmm1,96(%esp)
2385	pxor	%xmm0,%xmm2
2386	pxor	%xmm0,%xmm3
2387	pxor	%xmm0,%xmm4
2388	pxor	%xmm0,%xmm5
2389	pxor	%xmm0,%xmm6
2390	movups	-32(%edx,%ecx,1),%xmm1
2391	pxor	(%esp),%xmm2
2392	pxor	16(%esp),%xmm3
2393	pxor	32(%esp),%xmm4
2394	pxor	48(%esp),%xmm5
2395	pxor	64(%esp),%xmm6
2396	movups	-16(%edx,%ecx,1),%xmm0
2397.byte	102,15,56,222,209
2398.byte	102,15,56,222,217
2399.byte	102,15,56,222,225
2400.byte	102,15,56,222,233
2401.byte	102,15,56,222,241
2402.byte	102,15,56,222,249
2403	movl	120(%esp),%edi
2404	call	.L_aesni_decrypt6_enter
2405	movdqa	64(%esp),%xmm0
2406	pxor	(%esp),%xmm2
2407	movdqa	96(%esp),%xmm1
2408	pxor	16(%esp),%xmm3
2409	pxor	32(%esp),%xmm4
2410	pxor	48(%esp),%xmm5
2411	pxor	%xmm0,%xmm6
2412	pxor	%xmm2,%xmm1
2413	movdqu	%xmm2,(%edi,%esi,1)
2414	pxor	%xmm3,%xmm1
2415	movdqu	%xmm3,16(%edi,%esi,1)
2416	pxor	%xmm4,%xmm1
2417	movdqu	%xmm4,32(%edi,%esi,1)
2418	pxor	%xmm5,%xmm1
2419	movdqu	%xmm5,48(%edi,%esi,1)
2420	pxor	%xmm6,%xmm1
2421	movdqu	%xmm6,64(%edi,%esi,1)
2422	jmp	.L088done
2423.align	16
2424.L089one:
2425	movdqu	(%ebx),%xmm7
2426	movl	112(%esp),%edx
2427	movdqu	(%esi),%xmm2
2428	movl	240(%edx),%ecx
2429	pxor	%xmm0,%xmm7
2430	pxor	%xmm7,%xmm2
2431	movdqa	%xmm1,%xmm6
2432	movl	120(%esp),%edi
2433	movups	(%edx),%xmm0
2434	movups	16(%edx),%xmm1
2435	leal	32(%edx),%edx
2436	xorps	%xmm0,%xmm2
2437.L093dec1_loop_18:
2438.byte	102,15,56,222,209
2439	decl	%ecx
2440	movups	(%edx),%xmm1
2441	leal	16(%edx),%edx
2442	jnz	.L093dec1_loop_18
2443.byte	102,15,56,223,209
2444	xorps	%xmm7,%xmm2
2445	movaps	%xmm6,%xmm1
2446	movdqa	%xmm7,%xmm0
2447	xorps	%xmm2,%xmm1
2448	movups	%xmm2,(%edi,%esi,1)
2449	jmp	.L088done
2450.align	16
2451.L090two:
2452	leal	1(%ebp),%ecx
2453	movl	112(%esp),%edx
2454	bsfl	%ecx,%ecx
2455	shll	$4,%ecx
2456	movdqu	(%ebx),%xmm6
2457	movdqu	(%ebx,%ecx,1),%xmm7
2458	movdqu	(%esi),%xmm2
2459	movdqu	16(%esi),%xmm3
2460	movl	240(%edx),%ecx
2461	movdqa	%xmm1,%xmm5
2462	pxor	%xmm0,%xmm6
2463	pxor	%xmm6,%xmm7
2464	pxor	%xmm6,%xmm2
2465	pxor	%xmm7,%xmm3
2466	movl	120(%esp),%edi
2467	call	_aesni_decrypt2
2468	xorps	%xmm6,%xmm2
2469	xorps	%xmm7,%xmm3
2470	movdqa	%xmm7,%xmm0
2471	xorps	%xmm2,%xmm5
2472	movups	%xmm2,(%edi,%esi,1)
2473	xorps	%xmm3,%xmm5
2474	movups	%xmm3,16(%edi,%esi,1)
2475	movaps	%xmm5,%xmm1
2476	jmp	.L088done
2477.align	16
2478.L091three:
2479	leal	1(%ebp),%ecx
2480	movl	112(%esp),%edx
2481	bsfl	%ecx,%ecx
2482	shll	$4,%ecx
2483	movdqu	(%ebx),%xmm5
2484	movdqu	(%ebx,%ecx,1),%xmm6
2485	movdqa	%xmm5,%xmm7
2486	movdqu	(%esi),%xmm2
2487	movdqu	16(%esi),%xmm3
2488	movdqu	32(%esi),%xmm4
2489	movl	240(%edx),%ecx
2490	movdqa	%xmm1,96(%esp)
2491	pxor	%xmm0,%xmm5
2492	pxor	%xmm5,%xmm6
2493	pxor	%xmm6,%xmm7
2494	pxor	%xmm5,%xmm2
2495	pxor	%xmm6,%xmm3
2496	pxor	%xmm7,%xmm4
2497	movl	120(%esp),%edi
2498	call	_aesni_decrypt3
2499	movdqa	96(%esp),%xmm1
2500	xorps	%xmm5,%xmm2
2501	xorps	%xmm6,%xmm3
2502	xorps	%xmm7,%xmm4
2503	movups	%xmm2,(%edi,%esi,1)
2504	pxor	%xmm2,%xmm1
2505	movdqa	%xmm7,%xmm0
2506	movups	%xmm3,16(%edi,%esi,1)
2507	pxor	%xmm3,%xmm1
2508	movups	%xmm4,32(%edi,%esi,1)
2509	pxor	%xmm4,%xmm1
2510	jmp	.L088done
2511.align	16
2512.L092four:
2513	leal	1(%ebp),%ecx
2514	leal	3(%ebp),%eax
2515	bsfl	%ecx,%ecx
2516	bsfl	%eax,%eax
2517	movl	112(%esp),%edx
2518	shll	$4,%ecx
2519	shll	$4,%eax
2520	movdqu	(%ebx),%xmm4
2521	movdqu	(%ebx,%ecx,1),%xmm5
2522	movdqa	%xmm4,%xmm6
2523	movdqu	(%ebx,%eax,1),%xmm7
2524	pxor	%xmm0,%xmm4
2525	movdqu	(%esi),%xmm2
2526	pxor	%xmm4,%xmm5
2527	movdqu	16(%esi),%xmm3
2528	pxor	%xmm5,%xmm6
2529	movdqa	%xmm4,(%esp)
2530	pxor	%xmm6,%xmm7
2531	movdqa	%xmm5,16(%esp)
2532	movdqu	32(%esi),%xmm4
2533	movdqu	48(%esi),%xmm5
2534	movl	240(%edx),%ecx
2535	movdqa	%xmm1,96(%esp)
2536	pxor	(%esp),%xmm2
2537	pxor	16(%esp),%xmm3
2538	pxor	%xmm6,%xmm4
2539	pxor	%xmm7,%xmm5
2540	movl	120(%esp),%edi
2541	call	_aesni_decrypt4
2542	movdqa	96(%esp),%xmm1
2543	xorps	(%esp),%xmm2
2544	xorps	16(%esp),%xmm3
2545	xorps	%xmm6,%xmm4
2546	movups	%xmm2,(%edi,%esi,1)
2547	pxor	%xmm2,%xmm1
2548	xorps	%xmm7,%xmm5
2549	movups	%xmm3,16(%edi,%esi,1)
2550	pxor	%xmm3,%xmm1
2551	movdqa	%xmm7,%xmm0
2552	movups	%xmm4,32(%edi,%esi,1)
2553	pxor	%xmm4,%xmm1
2554	movups	%xmm5,48(%edi,%esi,1)
2555	pxor	%xmm5,%xmm1
2556.L088done:
2557	movl	128(%esp),%edx
2558	pxor	%xmm2,%xmm2
2559	pxor	%xmm3,%xmm3
2560	movdqa	%xmm2,(%esp)
2561	pxor	%xmm4,%xmm4
2562	movdqa	%xmm2,16(%esp)
2563	pxor	%xmm5,%xmm5
2564	movdqa	%xmm2,32(%esp)
2565	pxor	%xmm6,%xmm6
2566	movdqa	%xmm2,48(%esp)
2567	pxor	%xmm7,%xmm7
2568	movdqa	%xmm2,64(%esp)
2569	movdqa	%xmm2,80(%esp)
2570	movdqa	%xmm2,96(%esp)
2571	leal	(%edx),%esp
2572	movl	40(%esp),%ecx
2573	movl	48(%esp),%ebx
2574	movdqu	%xmm0,(%ecx)
2575	pxor	%xmm0,%xmm0
2576	movdqu	%xmm1,(%ebx)
2577	pxor	%xmm1,%xmm1
2578	popl	%edi
2579	popl	%esi
2580	popl	%ebx
2581	popl	%ebp
2582	ret
2583.size	aesni_ocb_decrypt,.-.L_aesni_ocb_decrypt_begin
2584.globl	aesni_cbc_encrypt
2585.type	aesni_cbc_encrypt,@function
2586.align	16
2587aesni_cbc_encrypt:
2588.L_aesni_cbc_encrypt_begin:
2589	pushl	%ebp
2590	pushl	%ebx
2591	pushl	%esi
2592	pushl	%edi
2593	movl	20(%esp),%esi
2594	movl	%esp,%ebx
2595	movl	24(%esp),%edi
2596	subl	$24,%ebx
2597	movl	28(%esp),%eax
2598	andl	$-16,%ebx
2599	movl	32(%esp),%edx
2600	movl	36(%esp),%ebp
2601	testl	%eax,%eax
2602	jz	.L094cbc_abort
2603	cmpl	$0,40(%esp)
2604	xchgl	%esp,%ebx
2605	movups	(%ebp),%xmm7
2606	movl	240(%edx),%ecx
2607	movl	%edx,%ebp
2608	movl	%ebx,16(%esp)
2609	movl	%ecx,%ebx
2610	je	.L095cbc_decrypt
2611	movaps	%xmm7,%xmm2
2612	cmpl	$16,%eax
2613	jb	.L096cbc_enc_tail
2614	subl	$16,%eax
2615	jmp	.L097cbc_enc_loop
2616.align	16
2617.L097cbc_enc_loop:
2618	movups	(%esi),%xmm7
2619	leal	16(%esi),%esi
2620	movups	(%edx),%xmm0
2621	movups	16(%edx),%xmm1
2622	xorps	%xmm0,%xmm7
2623	leal	32(%edx),%edx
2624	xorps	%xmm7,%xmm2
2625.L098enc1_loop_19:
2626.byte	102,15,56,220,209
2627	decl	%ecx
2628	movups	(%edx),%xmm1
2629	leal	16(%edx),%edx
2630	jnz	.L098enc1_loop_19
2631.byte	102,15,56,221,209
2632	movl	%ebx,%ecx
2633	movl	%ebp,%edx
2634	movups	%xmm2,(%edi)
2635	leal	16(%edi),%edi
2636	subl	$16,%eax
2637	jnc	.L097cbc_enc_loop
2638	addl	$16,%eax
2639	jnz	.L096cbc_enc_tail
2640	movaps	%xmm2,%xmm7
2641	pxor	%xmm2,%xmm2
2642	jmp	.L099cbc_ret
2643.L096cbc_enc_tail:
2644	movl	%eax,%ecx
2645.long	2767451785
2646	movl	$16,%ecx
2647	subl	%eax,%ecx
2648	xorl	%eax,%eax
2649.long	2868115081
2650	leal	-16(%edi),%edi
2651	movl	%ebx,%ecx
2652	movl	%edi,%esi
2653	movl	%ebp,%edx
2654	jmp	.L097cbc_enc_loop
2655.align	16
2656.L095cbc_decrypt:
2657	cmpl	$80,%eax
2658	jbe	.L100cbc_dec_tail
2659	movaps	%xmm7,(%esp)
2660	subl	$80,%eax
2661	jmp	.L101cbc_dec_loop6_enter
2662.align	16
2663.L102cbc_dec_loop6:
2664	movaps	%xmm0,(%esp)
2665	movups	%xmm7,(%edi)
2666	leal	16(%edi),%edi
2667.L101cbc_dec_loop6_enter:
2668	movdqu	(%esi),%xmm2
2669	movdqu	16(%esi),%xmm3
2670	movdqu	32(%esi),%xmm4
2671	movdqu	48(%esi),%xmm5
2672	movdqu	64(%esi),%xmm6
2673	movdqu	80(%esi),%xmm7
2674	call	_aesni_decrypt6
2675	movups	(%esi),%xmm1
2676	movups	16(%esi),%xmm0
2677	xorps	(%esp),%xmm2
2678	xorps	%xmm1,%xmm3
2679	movups	32(%esi),%xmm1
2680	xorps	%xmm0,%xmm4
2681	movups	48(%esi),%xmm0
2682	xorps	%xmm1,%xmm5
2683	movups	64(%esi),%xmm1
2684	xorps	%xmm0,%xmm6
2685	movups	80(%esi),%xmm0
2686	xorps	%xmm1,%xmm7
2687	movups	%xmm2,(%edi)
2688	movups	%xmm3,16(%edi)
2689	leal	96(%esi),%esi
2690	movups	%xmm4,32(%edi)
2691	movl	%ebx,%ecx
2692	movups	%xmm5,48(%edi)
2693	movl	%ebp,%edx
2694	movups	%xmm6,64(%edi)
2695	leal	80(%edi),%edi
2696	subl	$96,%eax
2697	ja	.L102cbc_dec_loop6
2698	movaps	%xmm7,%xmm2
2699	movaps	%xmm0,%xmm7
2700	addl	$80,%eax
2701	jle	.L103cbc_dec_clear_tail_collected
2702	movups	%xmm2,(%edi)
2703	leal	16(%edi),%edi
2704.L100cbc_dec_tail:
2705	movups	(%esi),%xmm2
2706	movaps	%xmm2,%xmm6
2707	cmpl	$16,%eax
2708	jbe	.L104cbc_dec_one
2709	movups	16(%esi),%xmm3
2710	movaps	%xmm3,%xmm5
2711	cmpl	$32,%eax
2712	jbe	.L105cbc_dec_two
2713	movups	32(%esi),%xmm4
2714	cmpl	$48,%eax
2715	jbe	.L106cbc_dec_three
2716	movups	48(%esi),%xmm5
2717	cmpl	$64,%eax
2718	jbe	.L107cbc_dec_four
2719	movups	64(%esi),%xmm6
2720	movaps	%xmm7,(%esp)
2721	movups	(%esi),%xmm2
2722	xorps	%xmm7,%xmm7
2723	call	_aesni_decrypt6
2724	movups	(%esi),%xmm1
2725	movups	16(%esi),%xmm0
2726	xorps	(%esp),%xmm2
2727	xorps	%xmm1,%xmm3
2728	movups	32(%esi),%xmm1
2729	xorps	%xmm0,%xmm4
2730	movups	48(%esi),%xmm0
2731	xorps	%xmm1,%xmm5
2732	movups	64(%esi),%xmm7
2733	xorps	%xmm0,%xmm6
2734	movups	%xmm2,(%edi)
2735	movups	%xmm3,16(%edi)
2736	pxor	%xmm3,%xmm3
2737	movups	%xmm4,32(%edi)
2738	pxor	%xmm4,%xmm4
2739	movups	%xmm5,48(%edi)
2740	pxor	%xmm5,%xmm5
2741	leal	64(%edi),%edi
2742	movaps	%xmm6,%xmm2
2743	pxor	%xmm6,%xmm6
2744	subl	$80,%eax
2745	jmp	.L108cbc_dec_tail_collected
2746.align	16
2747.L104cbc_dec_one:
2748	movups	(%edx),%xmm0
2749	movups	16(%edx),%xmm1
2750	leal	32(%edx),%edx
2751	xorps	%xmm0,%xmm2
2752.L109dec1_loop_20:
2753.byte	102,15,56,222,209
2754	decl	%ecx
2755	movups	(%edx),%xmm1
2756	leal	16(%edx),%edx
2757	jnz	.L109dec1_loop_20
2758.byte	102,15,56,223,209
2759	xorps	%xmm7,%xmm2
2760	movaps	%xmm6,%xmm7
2761	subl	$16,%eax
2762	jmp	.L108cbc_dec_tail_collected
2763.align	16
2764.L105cbc_dec_two:
2765	call	_aesni_decrypt2
2766	xorps	%xmm7,%xmm2
2767	xorps	%xmm6,%xmm3
2768	movups	%xmm2,(%edi)
2769	movaps	%xmm3,%xmm2
2770	pxor	%xmm3,%xmm3
2771	leal	16(%edi),%edi
2772	movaps	%xmm5,%xmm7
2773	subl	$32,%eax
2774	jmp	.L108cbc_dec_tail_collected
2775.align	16
2776.L106cbc_dec_three:
2777	call	_aesni_decrypt3
2778	xorps	%xmm7,%xmm2
2779	xorps	%xmm6,%xmm3
2780	xorps	%xmm5,%xmm4
2781	movups	%xmm2,(%edi)
2782	movaps	%xmm4,%xmm2
2783	pxor	%xmm4,%xmm4
2784	movups	%xmm3,16(%edi)
2785	pxor	%xmm3,%xmm3
2786	leal	32(%edi),%edi
2787	movups	32(%esi),%xmm7
2788	subl	$48,%eax
2789	jmp	.L108cbc_dec_tail_collected
2790.align	16
2791.L107cbc_dec_four:
2792	call	_aesni_decrypt4
2793	movups	16(%esi),%xmm1
2794	movups	32(%esi),%xmm0
2795	xorps	%xmm7,%xmm2
2796	movups	48(%esi),%xmm7
2797	xorps	%xmm6,%xmm3
2798	movups	%xmm2,(%edi)
2799	xorps	%xmm1,%xmm4
2800	movups	%xmm3,16(%edi)
2801	pxor	%xmm3,%xmm3
2802	xorps	%xmm0,%xmm5
2803	movups	%xmm4,32(%edi)
2804	pxor	%xmm4,%xmm4
2805	leal	48(%edi),%edi
2806	movaps	%xmm5,%xmm2
2807	pxor	%xmm5,%xmm5
2808	subl	$64,%eax
2809	jmp	.L108cbc_dec_tail_collected
2810.align	16
2811.L103cbc_dec_clear_tail_collected:
2812	pxor	%xmm3,%xmm3
2813	pxor	%xmm4,%xmm4
2814	pxor	%xmm5,%xmm5
2815	pxor	%xmm6,%xmm6
2816.L108cbc_dec_tail_collected:
2817	andl	$15,%eax
2818	jnz	.L110cbc_dec_tail_partial
2819	movups	%xmm2,(%edi)
2820	pxor	%xmm0,%xmm0
2821	jmp	.L099cbc_ret
2822.align	16
2823.L110cbc_dec_tail_partial:
2824	movaps	%xmm2,(%esp)
2825	pxor	%xmm0,%xmm0
2826	movl	$16,%ecx
2827	movl	%esp,%esi
2828	subl	%eax,%ecx
2829.long	2767451785
2830	movdqa	%xmm2,(%esp)
2831.L099cbc_ret:
2832	movl	16(%esp),%esp
2833	movl	36(%esp),%ebp
2834	pxor	%xmm2,%xmm2
2835	pxor	%xmm1,%xmm1
2836	movups	%xmm7,(%ebp)
2837	pxor	%xmm7,%xmm7
2838.L094cbc_abort:
2839	popl	%edi
2840	popl	%esi
2841	popl	%ebx
2842	popl	%ebp
2843	ret
2844.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
2845.type	_aesni_set_encrypt_key,@function
2846.align	16
2847_aesni_set_encrypt_key:
2848	pushl	%ebp
2849	pushl	%ebx
2850	testl	%eax,%eax
2851	jz	.L111bad_pointer
2852	testl	%edx,%edx
2853	jz	.L111bad_pointer
2854	call	.L112pic
2855.L112pic:
2856	popl	%ebx
2857	leal	.Lkey_const-.L112pic(%ebx),%ebx
2858	leal	OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
2859	movups	(%eax),%xmm0
2860	xorps	%xmm4,%xmm4
2861	movl	4(%ebp),%ebp
2862	leal	16(%edx),%edx
2863	andl	$268437504,%ebp
2864	cmpl	$256,%ecx
2865	je	.L11314rounds
2866	cmpl	$192,%ecx
2867	je	.L11412rounds
2868	cmpl	$128,%ecx
2869	jne	.L115bad_keybits
2870.align	16
2871.L11610rounds:
2872	cmpl	$268435456,%ebp
2873	je	.L11710rounds_alt
2874	movl	$9,%ecx
2875	movups	%xmm0,-16(%edx)
2876.byte	102,15,58,223,200,1
2877	call	.L118key_128_cold
2878.byte	102,15,58,223,200,2
2879	call	.L119key_128
2880.byte	102,15,58,223,200,4
2881	call	.L119key_128
2882.byte	102,15,58,223,200,8
2883	call	.L119key_128
2884.byte	102,15,58,223,200,16
2885	call	.L119key_128
2886.byte	102,15,58,223,200,32
2887	call	.L119key_128
2888.byte	102,15,58,223,200,64
2889	call	.L119key_128
2890.byte	102,15,58,223,200,128
2891	call	.L119key_128
2892.byte	102,15,58,223,200,27
2893	call	.L119key_128
2894.byte	102,15,58,223,200,54
2895	call	.L119key_128
2896	movups	%xmm0,(%edx)
2897	movl	%ecx,80(%edx)
2898	jmp	.L120good_key
2899.align	16
2900.L119key_128:
2901	movups	%xmm0,(%edx)
2902	leal	16(%edx),%edx
2903.L118key_128_cold:
2904	shufps	$16,%xmm0,%xmm4
2905	xorps	%xmm4,%xmm0
2906	shufps	$140,%xmm0,%xmm4
2907	xorps	%xmm4,%xmm0
2908	shufps	$255,%xmm1,%xmm1
2909	xorps	%xmm1,%xmm0
2910	ret
2911.align	16
2912.L11710rounds_alt:
2913	movdqa	(%ebx),%xmm5
2914	movl	$8,%ecx
2915	movdqa	32(%ebx),%xmm4
2916	movdqa	%xmm0,%xmm2
2917	movdqu	%xmm0,-16(%edx)
2918.L121loop_key128:
2919.byte	102,15,56,0,197
2920.byte	102,15,56,221,196
2921	pslld	$1,%xmm4
2922	leal	16(%edx),%edx
2923	movdqa	%xmm2,%xmm3
2924	pslldq	$4,%xmm2
2925	pxor	%xmm2,%xmm3
2926	pslldq	$4,%xmm2
2927	pxor	%xmm2,%xmm3
2928	pslldq	$4,%xmm2
2929	pxor	%xmm3,%xmm2
2930	pxor	%xmm2,%xmm0
2931	movdqu	%xmm0,-16(%edx)
2932	movdqa	%xmm0,%xmm2
2933	decl	%ecx
2934	jnz	.L121loop_key128
2935	movdqa	48(%ebx),%xmm4
2936.byte	102,15,56,0,197
2937.byte	102,15,56,221,196
2938	pslld	$1,%xmm4
2939	movdqa	%xmm2,%xmm3
2940	pslldq	$4,%xmm2
2941	pxor	%xmm2,%xmm3
2942	pslldq	$4,%xmm2
2943	pxor	%xmm2,%xmm3
2944	pslldq	$4,%xmm2
2945	pxor	%xmm3,%xmm2
2946	pxor	%xmm2,%xmm0
2947	movdqu	%xmm0,(%edx)
2948	movdqa	%xmm0,%xmm2
2949.byte	102,15,56,0,197
2950.byte	102,15,56,221,196
2951	movdqa	%xmm2,%xmm3
2952	pslldq	$4,%xmm2
2953	pxor	%xmm2,%xmm3
2954	pslldq	$4,%xmm2
2955	pxor	%xmm2,%xmm3
2956	pslldq	$4,%xmm2
2957	pxor	%xmm3,%xmm2
2958	pxor	%xmm2,%xmm0
2959	movdqu	%xmm0,16(%edx)
2960	movl	$9,%ecx
2961	movl	%ecx,96(%edx)
2962	jmp	.L120good_key
2963.align	16
2964.L11412rounds:
2965	movq	16(%eax),%xmm2
2966	cmpl	$268435456,%ebp
2967	je	.L12212rounds_alt
2968	movl	$11,%ecx
2969	movups	%xmm0,-16(%edx)
2970.byte	102,15,58,223,202,1
2971	call	.L123key_192a_cold
2972.byte	102,15,58,223,202,2
2973	call	.L124key_192b
2974.byte	102,15,58,223,202,4
2975	call	.L125key_192a
2976.byte	102,15,58,223,202,8
2977	call	.L124key_192b
2978.byte	102,15,58,223,202,16
2979	call	.L125key_192a
2980.byte	102,15,58,223,202,32
2981	call	.L124key_192b
2982.byte	102,15,58,223,202,64
2983	call	.L125key_192a
2984.byte	102,15,58,223,202,128
2985	call	.L124key_192b
2986	movups	%xmm0,(%edx)
2987	movl	%ecx,48(%edx)
2988	jmp	.L120good_key
2989.align	16
2990.L125key_192a:
2991	movups	%xmm0,(%edx)
2992	leal	16(%edx),%edx
2993.align	16
2994.L123key_192a_cold:
2995	movaps	%xmm2,%xmm5
2996.L126key_192b_warm:
2997	shufps	$16,%xmm0,%xmm4
2998	movdqa	%xmm2,%xmm3
2999	xorps	%xmm4,%xmm0
3000	shufps	$140,%xmm0,%xmm4
3001	pslldq	$4,%xmm3
3002	xorps	%xmm4,%xmm0
3003	pshufd	$85,%xmm1,%xmm1
3004	pxor	%xmm3,%xmm2
3005	pxor	%xmm1,%xmm0
3006	pshufd	$255,%xmm0,%xmm3
3007	pxor	%xmm3,%xmm2
3008	ret
3009.align	16
3010.L124key_192b:
3011	movaps	%xmm0,%xmm3
3012	shufps	$68,%xmm0,%xmm5
3013	movups	%xmm5,(%edx)
3014	shufps	$78,%xmm2,%xmm3
3015	movups	%xmm3,16(%edx)
3016	leal	32(%edx),%edx
3017	jmp	.L126key_192b_warm
3018.align	16
3019.L12212rounds_alt:
3020	movdqa	16(%ebx),%xmm5
3021	movdqa	32(%ebx),%xmm4
3022	movl	$8,%ecx
3023	movdqu	%xmm0,-16(%edx)
3024.L127loop_key192:
3025	movq	%xmm2,(%edx)
3026	movdqa	%xmm2,%xmm1
3027.byte	102,15,56,0,213
3028.byte	102,15,56,221,212
3029	pslld	$1,%xmm4
3030	leal	24(%edx),%edx
3031	movdqa	%xmm0,%xmm3
3032	pslldq	$4,%xmm0
3033	pxor	%xmm0,%xmm3
3034	pslldq	$4,%xmm0
3035	pxor	%xmm0,%xmm3
3036	pslldq	$4,%xmm0
3037	pxor	%xmm3,%xmm0
3038	pshufd	$255,%xmm0,%xmm3
3039	pxor	%xmm1,%xmm3
3040	pslldq	$4,%xmm1
3041	pxor	%xmm1,%xmm3
3042	pxor	%xmm2,%xmm0
3043	pxor	%xmm3,%xmm2
3044	movdqu	%xmm0,-16(%edx)
3045	decl	%ecx
3046	jnz	.L127loop_key192
3047	movl	$11,%ecx
3048	movl	%ecx,32(%edx)
3049	jmp	.L120good_key
3050.align	16
3051.L11314rounds:
3052	movups	16(%eax),%xmm2
3053	leal	16(%edx),%edx
3054	cmpl	$268435456,%ebp
3055	je	.L12814rounds_alt
3056	movl	$13,%ecx
3057	movups	%xmm0,-32(%edx)
3058	movups	%xmm2,-16(%edx)
3059.byte	102,15,58,223,202,1
3060	call	.L129key_256a_cold
3061.byte	102,15,58,223,200,1
3062	call	.L130key_256b
3063.byte	102,15,58,223,202,2
3064	call	.L131key_256a
3065.byte	102,15,58,223,200,2
3066	call	.L130key_256b
3067.byte	102,15,58,223,202,4
3068	call	.L131key_256a
3069.byte	102,15,58,223,200,4
3070	call	.L130key_256b
3071.byte	102,15,58,223,202,8
3072	call	.L131key_256a
3073.byte	102,15,58,223,200,8
3074	call	.L130key_256b
3075.byte	102,15,58,223,202,16
3076	call	.L131key_256a
3077.byte	102,15,58,223,200,16
3078	call	.L130key_256b
3079.byte	102,15,58,223,202,32
3080	call	.L131key_256a
3081.byte	102,15,58,223,200,32
3082	call	.L130key_256b
3083.byte	102,15,58,223,202,64
3084	call	.L131key_256a
3085	movups	%xmm0,(%edx)
3086	movl	%ecx,16(%edx)
3087	xorl	%eax,%eax
3088	jmp	.L120good_key
3089.align	16
3090.L131key_256a:
3091	movups	%xmm2,(%edx)
3092	leal	16(%edx),%edx
3093.L129key_256a_cold:
3094	shufps	$16,%xmm0,%xmm4
3095	xorps	%xmm4,%xmm0
3096	shufps	$140,%xmm0,%xmm4
3097	xorps	%xmm4,%xmm0
3098	shufps	$255,%xmm1,%xmm1
3099	xorps	%xmm1,%xmm0
3100	ret
3101.align	16
3102.L130key_256b:
3103	movups	%xmm0,(%edx)
3104	leal	16(%edx),%edx
3105	shufps	$16,%xmm2,%xmm4
3106	xorps	%xmm4,%xmm2
3107	shufps	$140,%xmm2,%xmm4
3108	xorps	%xmm4,%xmm2
3109	shufps	$170,%xmm1,%xmm1
3110	xorps	%xmm1,%xmm2
3111	ret
3112.align	16
3113.L12814rounds_alt:
3114	movdqa	(%ebx),%xmm5
3115	movdqa	32(%ebx),%xmm4
3116	movl	$7,%ecx
3117	movdqu	%xmm0,-32(%edx)
3118	movdqa	%xmm2,%xmm1
3119	movdqu	%xmm2,-16(%edx)
3120.L132loop_key256:
3121.byte	102,15,56,0,213
3122.byte	102,15,56,221,212
3123	movdqa	%xmm0,%xmm3
3124	pslldq	$4,%xmm0
3125	pxor	%xmm0,%xmm3
3126	pslldq	$4,%xmm0
3127	pxor	%xmm0,%xmm3
3128	pslldq	$4,%xmm0
3129	pxor	%xmm3,%xmm0
3130	pslld	$1,%xmm4
3131	pxor	%xmm2,%xmm0
3132	movdqu	%xmm0,(%edx)
3133	decl	%ecx
3134	jz	.L133done_key256
3135	pshufd	$255,%xmm0,%xmm2
3136	pxor	%xmm3,%xmm3
3137.byte	102,15,56,221,211
3138	movdqa	%xmm1,%xmm3
3139	pslldq	$4,%xmm1
3140	pxor	%xmm1,%xmm3
3141	pslldq	$4,%xmm1
3142	pxor	%xmm1,%xmm3
3143	pslldq	$4,%xmm1
3144	pxor	%xmm3,%xmm1
3145	pxor	%xmm1,%xmm2
3146	movdqu	%xmm2,16(%edx)
3147	leal	32(%edx),%edx
3148	movdqa	%xmm2,%xmm1
3149	jmp	.L132loop_key256
3150.L133done_key256:
3151	movl	$13,%ecx
3152	movl	%ecx,16(%edx)
3153.L120good_key:
3154	pxor	%xmm0,%xmm0
3155	pxor	%xmm1,%xmm1
3156	pxor	%xmm2,%xmm2
3157	pxor	%xmm3,%xmm3
3158	pxor	%xmm4,%xmm4
3159	pxor	%xmm5,%xmm5
3160	xorl	%eax,%eax
3161	popl	%ebx
3162	popl	%ebp
3163	ret
3164.align	4
3165.L111bad_pointer:
3166	movl	$-1,%eax
3167	popl	%ebx
3168	popl	%ebp
3169	ret
3170.align	4
3171.L115bad_keybits:
3172	pxor	%xmm0,%xmm0
3173	movl	$-2,%eax
3174	popl	%ebx
3175	popl	%ebp
3176	ret
3177.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
3178.globl	aesni_set_encrypt_key
3179.type	aesni_set_encrypt_key,@function
3180.align	16
3181aesni_set_encrypt_key:
3182.L_aesni_set_encrypt_key_begin:
3183	movl	4(%esp),%eax
3184	movl	8(%esp),%ecx
3185	movl	12(%esp),%edx
3186	call	_aesni_set_encrypt_key
3187	ret
3188.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
3189.globl	aesni_set_decrypt_key
3190.type	aesni_set_decrypt_key,@function
3191.align	16
3192aesni_set_decrypt_key:
3193.L_aesni_set_decrypt_key_begin:
3194	movl	4(%esp),%eax
3195	movl	8(%esp),%ecx
3196	movl	12(%esp),%edx
3197	call	_aesni_set_encrypt_key
3198	movl	12(%esp),%edx
3199	shll	$4,%ecx
3200	testl	%eax,%eax
3201	jnz	.L134dec_key_ret
3202	leal	16(%edx,%ecx,1),%eax
3203	movups	(%edx),%xmm0
3204	movups	(%eax),%xmm1
3205	movups	%xmm0,(%eax)
3206	movups	%xmm1,(%edx)
3207	leal	16(%edx),%edx
3208	leal	-16(%eax),%eax
3209.L135dec_key_inverse:
3210	movups	(%edx),%xmm0
3211	movups	(%eax),%xmm1
3212.byte	102,15,56,219,192
3213.byte	102,15,56,219,201
3214	leal	16(%edx),%edx
3215	leal	-16(%eax),%eax
3216	movups	%xmm0,16(%eax)
3217	movups	%xmm1,-16(%edx)
3218	cmpl	%edx,%eax
3219	ja	.L135dec_key_inverse
3220	movups	(%edx),%xmm0
3221.byte	102,15,56,219,192
3222	movups	%xmm0,(%edx)
3223	pxor	%xmm0,%xmm0
3224	pxor	%xmm1,%xmm1
3225	xorl	%eax,%eax
3226.L134dec_key_ret:
3227	ret
3228.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
3229.align	64
3230.Lkey_const:
3231.long	202313229,202313229,202313229,202313229
3232.long	67569157,67569157,67569157,67569157
3233.long	1,1,1,1
3234.long	27,27,27,27
3235.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
3236.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
3237.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
3238.byte	115,108,46,111,114,103,62,0
3239.comm	OPENSSL_ia32cap_P,16,4
3240#else
3241.text
3242.globl	aesni_encrypt
3243.type	aesni_encrypt,@function
3244.align	16
3245aesni_encrypt:
3246.L_aesni_encrypt_begin:
3247	movl	4(%esp),%eax
3248	movl	12(%esp),%edx
3249	movups	(%eax),%xmm2
3250	movl	240(%edx),%ecx
3251	movl	8(%esp),%eax
3252	movups	(%edx),%xmm0
3253	movups	16(%edx),%xmm1
3254	leal	32(%edx),%edx
3255	xorps	%xmm0,%xmm2
3256.L000enc1_loop_1:
3257.byte	102,15,56,220,209
3258	decl	%ecx
3259	movups	(%edx),%xmm1
3260	leal	16(%edx),%edx
3261	jnz	.L000enc1_loop_1
3262.byte	102,15,56,221,209
3263	pxor	%xmm0,%xmm0
3264	pxor	%xmm1,%xmm1
3265	movups	%xmm2,(%eax)
3266	pxor	%xmm2,%xmm2
3267	ret
3268.size	aesni_encrypt,.-.L_aesni_encrypt_begin
3269.globl	aesni_decrypt
3270.type	aesni_decrypt,@function
3271.align	16
3272aesni_decrypt:
3273.L_aesni_decrypt_begin:
3274	movl	4(%esp),%eax
3275	movl	12(%esp),%edx
3276	movups	(%eax),%xmm2
3277	movl	240(%edx),%ecx
3278	movl	8(%esp),%eax
3279	movups	(%edx),%xmm0
3280	movups	16(%edx),%xmm1
3281	leal	32(%edx),%edx
3282	xorps	%xmm0,%xmm2
3283.L001dec1_loop_2:
3284.byte	102,15,56,222,209
3285	decl	%ecx
3286	movups	(%edx),%xmm1
3287	leal	16(%edx),%edx
3288	jnz	.L001dec1_loop_2
3289.byte	102,15,56,223,209
3290	pxor	%xmm0,%xmm0
3291	pxor	%xmm1,%xmm1
3292	movups	%xmm2,(%eax)
3293	pxor	%xmm2,%xmm2
3294	ret
3295.size	aesni_decrypt,.-.L_aesni_decrypt_begin
3296.type	_aesni_encrypt2,@function
3297.align	16
3298_aesni_encrypt2:
3299	movups	(%edx),%xmm0
3300	shll	$4,%ecx
3301	movups	16(%edx),%xmm1
3302	xorps	%xmm0,%xmm2
3303	pxor	%xmm0,%xmm3
3304	movups	32(%edx),%xmm0
3305	leal	32(%edx,%ecx,1),%edx
3306	negl	%ecx
3307	addl	$16,%ecx
3308.L002enc2_loop:
3309.byte	102,15,56,220,209
3310.byte	102,15,56,220,217
3311	movups	(%edx,%ecx,1),%xmm1
3312	addl	$32,%ecx
3313.byte	102,15,56,220,208
3314.byte	102,15,56,220,216
3315	movups	-16(%edx,%ecx,1),%xmm0
3316	jnz	.L002enc2_loop
3317.byte	102,15,56,220,209
3318.byte	102,15,56,220,217
3319.byte	102,15,56,221,208
3320.byte	102,15,56,221,216
3321	ret
3322.size	_aesni_encrypt2,.-_aesni_encrypt2
3323.type	_aesni_decrypt2,@function
3324.align	16
3325_aesni_decrypt2:
3326	movups	(%edx),%xmm0
3327	shll	$4,%ecx
3328	movups	16(%edx),%xmm1
3329	xorps	%xmm0,%xmm2
3330	pxor	%xmm0,%xmm3
3331	movups	32(%edx),%xmm0
3332	leal	32(%edx,%ecx,1),%edx
3333	negl	%ecx
3334	addl	$16,%ecx
3335.L003dec2_loop:
3336.byte	102,15,56,222,209
3337.byte	102,15,56,222,217
3338	movups	(%edx,%ecx,1),%xmm1
3339	addl	$32,%ecx
3340.byte	102,15,56,222,208
3341.byte	102,15,56,222,216
3342	movups	-16(%edx,%ecx,1),%xmm0
3343	jnz	.L003dec2_loop
3344.byte	102,15,56,222,209
3345.byte	102,15,56,222,217
3346.byte	102,15,56,223,208
3347.byte	102,15,56,223,216
3348	ret
3349.size	_aesni_decrypt2,.-_aesni_decrypt2
3350.type	_aesni_encrypt3,@function
3351.align	16
3352_aesni_encrypt3:
3353	movups	(%edx),%xmm0
3354	shll	$4,%ecx
3355	movups	16(%edx),%xmm1
3356	xorps	%xmm0,%xmm2
3357	pxor	%xmm0,%xmm3
3358	pxor	%xmm0,%xmm4
3359	movups	32(%edx),%xmm0
3360	leal	32(%edx,%ecx,1),%edx
3361	negl	%ecx
3362	addl	$16,%ecx
3363.L004enc3_loop:
3364.byte	102,15,56,220,209
3365.byte	102,15,56,220,217
3366.byte	102,15,56,220,225
3367	movups	(%edx,%ecx,1),%xmm1
3368	addl	$32,%ecx
3369.byte	102,15,56,220,208
3370.byte	102,15,56,220,216
3371.byte	102,15,56,220,224
3372	movups	-16(%edx,%ecx,1),%xmm0
3373	jnz	.L004enc3_loop
3374.byte	102,15,56,220,209
3375.byte	102,15,56,220,217
3376.byte	102,15,56,220,225
3377.byte	102,15,56,221,208
3378.byte	102,15,56,221,216
3379.byte	102,15,56,221,224
3380	ret
3381.size	_aesni_encrypt3,.-_aesni_encrypt3
3382.type	_aesni_decrypt3,@function
3383.align	16
3384_aesni_decrypt3:
3385	movups	(%edx),%xmm0
3386	shll	$4,%ecx
3387	movups	16(%edx),%xmm1
3388	xorps	%xmm0,%xmm2
3389	pxor	%xmm0,%xmm3
3390	pxor	%xmm0,%xmm4
3391	movups	32(%edx),%xmm0
3392	leal	32(%edx,%ecx,1),%edx
3393	negl	%ecx
3394	addl	$16,%ecx
3395.L005dec3_loop:
3396.byte	102,15,56,222,209
3397.byte	102,15,56,222,217
3398.byte	102,15,56,222,225
3399	movups	(%edx,%ecx,1),%xmm1
3400	addl	$32,%ecx
3401.byte	102,15,56,222,208
3402.byte	102,15,56,222,216
3403.byte	102,15,56,222,224
3404	movups	-16(%edx,%ecx,1),%xmm0
3405	jnz	.L005dec3_loop
3406.byte	102,15,56,222,209
3407.byte	102,15,56,222,217
3408.byte	102,15,56,222,225
3409.byte	102,15,56,223,208
3410.byte	102,15,56,223,216
3411.byte	102,15,56,223,224
3412	ret
3413.size	_aesni_decrypt3,.-_aesni_decrypt3
3414.type	_aesni_encrypt4,@function
3415.align	16
3416_aesni_encrypt4:
3417	movups	(%edx),%xmm0
3418	movups	16(%edx),%xmm1
3419	shll	$4,%ecx
3420	xorps	%xmm0,%xmm2
3421	pxor	%xmm0,%xmm3
3422	pxor	%xmm0,%xmm4
3423	pxor	%xmm0,%xmm5
3424	movups	32(%edx),%xmm0
3425	leal	32(%edx,%ecx,1),%edx
3426	negl	%ecx
3427.byte	15,31,64,0
3428	addl	$16,%ecx
3429.L006enc4_loop:
3430.byte	102,15,56,220,209
3431.byte	102,15,56,220,217
3432.byte	102,15,56,220,225
3433.byte	102,15,56,220,233
3434	movups	(%edx,%ecx,1),%xmm1
3435	addl	$32,%ecx
3436.byte	102,15,56,220,208
3437.byte	102,15,56,220,216
3438.byte	102,15,56,220,224
3439.byte	102,15,56,220,232
3440	movups	-16(%edx,%ecx,1),%xmm0
3441	jnz	.L006enc4_loop
3442.byte	102,15,56,220,209
3443.byte	102,15,56,220,217
3444.byte	102,15,56,220,225
3445.byte	102,15,56,220,233
3446.byte	102,15,56,221,208
3447.byte	102,15,56,221,216
3448.byte	102,15,56,221,224
3449.byte	102,15,56,221,232
3450	ret
3451.size	_aesni_encrypt4,.-_aesni_encrypt4
3452.type	_aesni_decrypt4,@function
3453.align	16
3454_aesni_decrypt4:
3455	movups	(%edx),%xmm0
3456	movups	16(%edx),%xmm1
3457	shll	$4,%ecx
3458	xorps	%xmm0,%xmm2
3459	pxor	%xmm0,%xmm3
3460	pxor	%xmm0,%xmm4
3461	pxor	%xmm0,%xmm5
3462	movups	32(%edx),%xmm0
3463	leal	32(%edx,%ecx,1),%edx
3464	negl	%ecx
3465.byte	15,31,64,0
3466	addl	$16,%ecx
3467.L007dec4_loop:
3468.byte	102,15,56,222,209
3469.byte	102,15,56,222,217
3470.byte	102,15,56,222,225
3471.byte	102,15,56,222,233
3472	movups	(%edx,%ecx,1),%xmm1
3473	addl	$32,%ecx
3474.byte	102,15,56,222,208
3475.byte	102,15,56,222,216
3476.byte	102,15,56,222,224
3477.byte	102,15,56,222,232
3478	movups	-16(%edx,%ecx,1),%xmm0
3479	jnz	.L007dec4_loop
3480.byte	102,15,56,222,209
3481.byte	102,15,56,222,217
3482.byte	102,15,56,222,225
3483.byte	102,15,56,222,233
3484.byte	102,15,56,223,208
3485.byte	102,15,56,223,216
3486.byte	102,15,56,223,224
3487.byte	102,15,56,223,232
3488	ret
3489.size	_aesni_decrypt4,.-_aesni_decrypt4
3490.type	_aesni_encrypt6,@function
3491.align	16
3492_aesni_encrypt6:
3493	movups	(%edx),%xmm0
3494	shll	$4,%ecx
3495	movups	16(%edx),%xmm1
3496	xorps	%xmm0,%xmm2
3497	pxor	%xmm0,%xmm3
3498	pxor	%xmm0,%xmm4
3499.byte	102,15,56,220,209
3500	pxor	%xmm0,%xmm5
3501	pxor	%xmm0,%xmm6
3502.byte	102,15,56,220,217
3503	leal	32(%edx,%ecx,1),%edx
3504	negl	%ecx
3505.byte	102,15,56,220,225
3506	pxor	%xmm0,%xmm7
3507	movups	(%edx,%ecx,1),%xmm0
3508	addl	$16,%ecx
3509	jmp	.L008_aesni_encrypt6_inner
3510.align	16
3511.L009enc6_loop:
3512.byte	102,15,56,220,209
3513.byte	102,15,56,220,217
3514.byte	102,15,56,220,225
3515.L008_aesni_encrypt6_inner:
3516.byte	102,15,56,220,233
3517.byte	102,15,56,220,241
3518.byte	102,15,56,220,249
3519.L_aesni_encrypt6_enter:
3520	movups	(%edx,%ecx,1),%xmm1
3521	addl	$32,%ecx
3522.byte	102,15,56,220,208
3523.byte	102,15,56,220,216
3524.byte	102,15,56,220,224
3525.byte	102,15,56,220,232
3526.byte	102,15,56,220,240
3527.byte	102,15,56,220,248
3528	movups	-16(%edx,%ecx,1),%xmm0
3529	jnz	.L009enc6_loop
3530.byte	102,15,56,220,209
3531.byte	102,15,56,220,217
3532.byte	102,15,56,220,225
3533.byte	102,15,56,220,233
3534.byte	102,15,56,220,241
3535.byte	102,15,56,220,249
3536.byte	102,15,56,221,208
3537.byte	102,15,56,221,216
3538.byte	102,15,56,221,224
3539.byte	102,15,56,221,232
3540.byte	102,15,56,221,240
3541.byte	102,15,56,221,248
3542	ret
3543.size	_aesni_encrypt6,.-_aesni_encrypt6
3544.type	_aesni_decrypt6,@function
3545.align	16
3546_aesni_decrypt6:
3547	movups	(%edx),%xmm0
3548	shll	$4,%ecx
3549	movups	16(%edx),%xmm1
3550	xorps	%xmm0,%xmm2
3551	pxor	%xmm0,%xmm3
3552	pxor	%xmm0,%xmm4
3553.byte	102,15,56,222,209
3554	pxor	%xmm0,%xmm5
3555	pxor	%xmm0,%xmm6
3556.byte	102,15,56,222,217
3557	leal	32(%edx,%ecx,1),%edx
3558	negl	%ecx
3559.byte	102,15,56,222,225
3560	pxor	%xmm0,%xmm7
3561	movups	(%edx,%ecx,1),%xmm0
3562	addl	$16,%ecx
3563	jmp	.L010_aesni_decrypt6_inner
3564.align	16
3565.L011dec6_loop:
3566.byte	102,15,56,222,209
3567.byte	102,15,56,222,217
3568.byte	102,15,56,222,225
3569.L010_aesni_decrypt6_inner:
3570.byte	102,15,56,222,233
3571.byte	102,15,56,222,241
3572.byte	102,15,56,222,249
3573.L_aesni_decrypt6_enter:
3574	movups	(%edx,%ecx,1),%xmm1
3575	addl	$32,%ecx
3576.byte	102,15,56,222,208
3577.byte	102,15,56,222,216
3578.byte	102,15,56,222,224
3579.byte	102,15,56,222,232
3580.byte	102,15,56,222,240
3581.byte	102,15,56,222,248
3582	movups	-16(%edx,%ecx,1),%xmm0
3583	jnz	.L011dec6_loop
3584.byte	102,15,56,222,209
3585.byte	102,15,56,222,217
3586.byte	102,15,56,222,225
3587.byte	102,15,56,222,233
3588.byte	102,15,56,222,241
3589.byte	102,15,56,222,249
3590.byte	102,15,56,223,208
3591.byte	102,15,56,223,216
3592.byte	102,15,56,223,224
3593.byte	102,15,56,223,232
3594.byte	102,15,56,223,240
3595.byte	102,15,56,223,248
3596	ret
3597.size	_aesni_decrypt6,.-_aesni_decrypt6
3598.globl	aesni_ecb_encrypt
3599.type	aesni_ecb_encrypt,@function
3600.align	16
3601aesni_ecb_encrypt:
3602.L_aesni_ecb_encrypt_begin:
3603	pushl	%ebp
3604	pushl	%ebx
3605	pushl	%esi
3606	pushl	%edi
3607	movl	20(%esp),%esi
3608	movl	24(%esp),%edi
3609	movl	28(%esp),%eax
3610	movl	32(%esp),%edx
3611	movl	36(%esp),%ebx
3612	andl	$-16,%eax
3613	jz	.L012ecb_ret
3614	movl	240(%edx),%ecx
3615	testl	%ebx,%ebx
3616	jz	.L013ecb_decrypt
3617	movl	%edx,%ebp
3618	movl	%ecx,%ebx
3619	cmpl	$96,%eax
3620	jb	.L014ecb_enc_tail
3621	movdqu	(%esi),%xmm2
3622	movdqu	16(%esi),%xmm3
3623	movdqu	32(%esi),%xmm4
3624	movdqu	48(%esi),%xmm5
3625	movdqu	64(%esi),%xmm6
3626	movdqu	80(%esi),%xmm7
3627	leal	96(%esi),%esi
3628	subl	$96,%eax
3629	jmp	.L015ecb_enc_loop6_enter
3630.align	16
3631.L016ecb_enc_loop6:
3632	movups	%xmm2,(%edi)
3633	movdqu	(%esi),%xmm2
3634	movups	%xmm3,16(%edi)
3635	movdqu	16(%esi),%xmm3
3636	movups	%xmm4,32(%edi)
3637	movdqu	32(%esi),%xmm4
3638	movups	%xmm5,48(%edi)
3639	movdqu	48(%esi),%xmm5
3640	movups	%xmm6,64(%edi)
3641	movdqu	64(%esi),%xmm6
3642	movups	%xmm7,80(%edi)
3643	leal	96(%edi),%edi
3644	movdqu	80(%esi),%xmm7
3645	leal	96(%esi),%esi
3646.L015ecb_enc_loop6_enter:
3647	call	_aesni_encrypt6
3648	movl	%ebp,%edx
3649	movl	%ebx,%ecx
3650	subl	$96,%eax
3651	jnc	.L016ecb_enc_loop6
3652	movups	%xmm2,(%edi)
3653	movups	%xmm3,16(%edi)
3654	movups	%xmm4,32(%edi)
3655	movups	%xmm5,48(%edi)
3656	movups	%xmm6,64(%edi)
3657	movups	%xmm7,80(%edi)
3658	leal	96(%edi),%edi
3659	addl	$96,%eax
3660	jz	.L012ecb_ret
3661.L014ecb_enc_tail:
3662	movups	(%esi),%xmm2
3663	cmpl	$32,%eax
3664	jb	.L017ecb_enc_one
3665	movups	16(%esi),%xmm3
3666	je	.L018ecb_enc_two
3667	movups	32(%esi),%xmm4
3668	cmpl	$64,%eax
3669	jb	.L019ecb_enc_three
3670	movups	48(%esi),%xmm5
3671	je	.L020ecb_enc_four
3672	movups	64(%esi),%xmm6
3673	xorps	%xmm7,%xmm7
3674	call	_aesni_encrypt6
3675	movups	%xmm2,(%edi)
3676	movups	%xmm3,16(%edi)
3677	movups	%xmm4,32(%edi)
3678	movups	%xmm5,48(%edi)
3679	movups	%xmm6,64(%edi)
3680	jmp	.L012ecb_ret
3681.align	16
3682.L017ecb_enc_one:
3683	movups	(%edx),%xmm0
3684	movups	16(%edx),%xmm1
3685	leal	32(%edx),%edx
3686	xorps	%xmm0,%xmm2
3687.L021enc1_loop_3:
3688.byte	102,15,56,220,209
3689	decl	%ecx
3690	movups	(%edx),%xmm1
3691	leal	16(%edx),%edx
3692	jnz	.L021enc1_loop_3
3693.byte	102,15,56,221,209
3694	movups	%xmm2,(%edi)
3695	jmp	.L012ecb_ret
3696.align	16
3697.L018ecb_enc_two:
3698	call	_aesni_encrypt2
3699	movups	%xmm2,(%edi)
3700	movups	%xmm3,16(%edi)
3701	jmp	.L012ecb_ret
3702.align	16
3703.L019ecb_enc_three:
3704	call	_aesni_encrypt3
3705	movups	%xmm2,(%edi)
3706	movups	%xmm3,16(%edi)
3707	movups	%xmm4,32(%edi)
3708	jmp	.L012ecb_ret
3709.align	16
3710.L020ecb_enc_four:
3711	call	_aesni_encrypt4
3712	movups	%xmm2,(%edi)
3713	movups	%xmm3,16(%edi)
3714	movups	%xmm4,32(%edi)
3715	movups	%xmm5,48(%edi)
3716	jmp	.L012ecb_ret
3717.align	16
3718.L013ecb_decrypt:
3719	movl	%edx,%ebp
3720	movl	%ecx,%ebx
3721	cmpl	$96,%eax
3722	jb	.L022ecb_dec_tail
3723	movdqu	(%esi),%xmm2
3724	movdqu	16(%esi),%xmm3
3725	movdqu	32(%esi),%xmm4
3726	movdqu	48(%esi),%xmm5
3727	movdqu	64(%esi),%xmm6
3728	movdqu	80(%esi),%xmm7
3729	leal	96(%esi),%esi
3730	subl	$96,%eax
3731	jmp	.L023ecb_dec_loop6_enter
3732.align	16
3733.L024ecb_dec_loop6:
3734	movups	%xmm2,(%edi)
3735	movdqu	(%esi),%xmm2
3736	movups	%xmm3,16(%edi)
3737	movdqu	16(%esi),%xmm3
3738	movups	%xmm4,32(%edi)
3739	movdqu	32(%esi),%xmm4
3740	movups	%xmm5,48(%edi)
3741	movdqu	48(%esi),%xmm5
3742	movups	%xmm6,64(%edi)
3743	movdqu	64(%esi),%xmm6
3744	movups	%xmm7,80(%edi)
3745	leal	96(%edi),%edi
3746	movdqu	80(%esi),%xmm7
3747	leal	96(%esi),%esi
3748.L023ecb_dec_loop6_enter:
3749	call	_aesni_decrypt6
3750	movl	%ebp,%edx
3751	movl	%ebx,%ecx
3752	subl	$96,%eax
3753	jnc	.L024ecb_dec_loop6
3754	movups	%xmm2,(%edi)
3755	movups	%xmm3,16(%edi)
3756	movups	%xmm4,32(%edi)
3757	movups	%xmm5,48(%edi)
3758	movups	%xmm6,64(%edi)
3759	movups	%xmm7,80(%edi)
3760	leal	96(%edi),%edi
3761	addl	$96,%eax
3762	jz	.L012ecb_ret
3763.L022ecb_dec_tail:
3764	movups	(%esi),%xmm2
3765	cmpl	$32,%eax
3766	jb	.L025ecb_dec_one
3767	movups	16(%esi),%xmm3
3768	je	.L026ecb_dec_two
3769	movups	32(%esi),%xmm4
3770	cmpl	$64,%eax
3771	jb	.L027ecb_dec_three
3772	movups	48(%esi),%xmm5
3773	je	.L028ecb_dec_four
3774	movups	64(%esi),%xmm6
3775	xorps	%xmm7,%xmm7
3776	call	_aesni_decrypt6
3777	movups	%xmm2,(%edi)
3778	movups	%xmm3,16(%edi)
3779	movups	%xmm4,32(%edi)
3780	movups	%xmm5,48(%edi)
3781	movups	%xmm6,64(%edi)
3782	jmp	.L012ecb_ret
3783.align	16
3784.L025ecb_dec_one:
3785	movups	(%edx),%xmm0
3786	movups	16(%edx),%xmm1
3787	leal	32(%edx),%edx
3788	xorps	%xmm0,%xmm2
3789.L029dec1_loop_4:
3790.byte	102,15,56,222,209
3791	decl	%ecx
3792	movups	(%edx),%xmm1
3793	leal	16(%edx),%edx
3794	jnz	.L029dec1_loop_4
3795.byte	102,15,56,223,209
3796	movups	%xmm2,(%edi)
3797	jmp	.L012ecb_ret
3798.align	16
3799.L026ecb_dec_two:
3800	call	_aesni_decrypt2
3801	movups	%xmm2,(%edi)
3802	movups	%xmm3,16(%edi)
3803	jmp	.L012ecb_ret
3804.align	16
3805.L027ecb_dec_three:
3806	call	_aesni_decrypt3
3807	movups	%xmm2,(%edi)
3808	movups	%xmm3,16(%edi)
3809	movups	%xmm4,32(%edi)
3810	jmp	.L012ecb_ret
3811.align	16
3812.L028ecb_dec_four:
3813	call	_aesni_decrypt4
3814	movups	%xmm2,(%edi)
3815	movups	%xmm3,16(%edi)
3816	movups	%xmm4,32(%edi)
3817	movups	%xmm5,48(%edi)
3818.L012ecb_ret:
3819	pxor	%xmm0,%xmm0
3820	pxor	%xmm1,%xmm1
3821	pxor	%xmm2,%xmm2
3822	pxor	%xmm3,%xmm3
3823	pxor	%xmm4,%xmm4
3824	pxor	%xmm5,%xmm5
3825	pxor	%xmm6,%xmm6
3826	pxor	%xmm7,%xmm7
3827	popl	%edi
3828	popl	%esi
3829	popl	%ebx
3830	popl	%ebp
3831	ret
3832.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
3833.globl	aesni_ccm64_encrypt_blocks
3834.type	aesni_ccm64_encrypt_blocks,@function
3835.align	16
3836aesni_ccm64_encrypt_blocks:
3837.L_aesni_ccm64_encrypt_blocks_begin:
3838	pushl	%ebp
3839	pushl	%ebx
3840	pushl	%esi
3841	pushl	%edi
3842	movl	20(%esp),%esi
3843	movl	24(%esp),%edi
3844	movl	28(%esp),%eax
3845	movl	32(%esp),%edx
3846	movl	36(%esp),%ebx
3847	movl	40(%esp),%ecx
3848	movl	%esp,%ebp
3849	subl	$60,%esp
3850	andl	$-16,%esp
3851	movl	%ebp,48(%esp)
3852	movdqu	(%ebx),%xmm7
3853	movdqu	(%ecx),%xmm3
3854	movl	240(%edx),%ecx
3855	movl	$202182159,(%esp)
3856	movl	$134810123,4(%esp)
3857	movl	$67438087,8(%esp)
3858	movl	$66051,12(%esp)
3859	movl	$1,%ebx
3860	xorl	%ebp,%ebp
3861	movl	%ebx,16(%esp)
3862	movl	%ebp,20(%esp)
3863	movl	%ebp,24(%esp)
3864	movl	%ebp,28(%esp)
3865	shll	$4,%ecx
3866	movl	$16,%ebx
3867	leal	(%edx),%ebp
3868	movdqa	(%esp),%xmm5
3869	movdqa	%xmm7,%xmm2
3870	leal	32(%edx,%ecx,1),%edx
3871	subl	%ecx,%ebx
3872.byte	102,15,56,0,253
3873.L030ccm64_enc_outer:
3874	movups	(%ebp),%xmm0
3875	movl	%ebx,%ecx
3876	movups	(%esi),%xmm6
3877	xorps	%xmm0,%xmm2
3878	movups	16(%ebp),%xmm1
3879	xorps	%xmm6,%xmm0
3880	xorps	%xmm0,%xmm3
3881	movups	32(%ebp),%xmm0
3882.L031ccm64_enc2_loop:
3883.byte	102,15,56,220,209
3884.byte	102,15,56,220,217
3885	movups	(%edx,%ecx,1),%xmm1
3886	addl	$32,%ecx
3887.byte	102,15,56,220,208
3888.byte	102,15,56,220,216
3889	movups	-16(%edx,%ecx,1),%xmm0
3890	jnz	.L031ccm64_enc2_loop
3891.byte	102,15,56,220,209
3892.byte	102,15,56,220,217
3893	paddq	16(%esp),%xmm7
3894	decl	%eax
3895.byte	102,15,56,221,208
3896.byte	102,15,56,221,216
3897	leal	16(%esi),%esi
3898	xorps	%xmm2,%xmm6
3899	movdqa	%xmm7,%xmm2
3900	movups	%xmm6,(%edi)
3901.byte	102,15,56,0,213
3902	leal	16(%edi),%edi
3903	jnz	.L030ccm64_enc_outer
3904	movl	48(%esp),%esp
3905	movl	40(%esp),%edi
3906	movups	%xmm3,(%edi)
3907	pxor	%xmm0,%xmm0
3908	pxor	%xmm1,%xmm1
3909	pxor	%xmm2,%xmm2
3910	pxor	%xmm3,%xmm3
3911	pxor	%xmm4,%xmm4
3912	pxor	%xmm5,%xmm5
3913	pxor	%xmm6,%xmm6
3914	pxor	%xmm7,%xmm7
3915	popl	%edi
3916	popl	%esi
3917	popl	%ebx
3918	popl	%ebp
3919	ret
3920.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
3921.globl	aesni_ccm64_decrypt_blocks
3922.type	aesni_ccm64_decrypt_blocks,@function
3923.align	16
3924aesni_ccm64_decrypt_blocks:
3925.L_aesni_ccm64_decrypt_blocks_begin:
3926	pushl	%ebp
3927	pushl	%ebx
3928	pushl	%esi
3929	pushl	%edi
3930	movl	20(%esp),%esi
3931	movl	24(%esp),%edi
3932	movl	28(%esp),%eax
3933	movl	32(%esp),%edx
3934	movl	36(%esp),%ebx
3935	movl	40(%esp),%ecx
3936	movl	%esp,%ebp
3937	subl	$60,%esp
3938	andl	$-16,%esp
3939	movl	%ebp,48(%esp)
3940	movdqu	(%ebx),%xmm7
3941	movdqu	(%ecx),%xmm3
3942	movl	240(%edx),%ecx
3943	movl	$202182159,(%esp)
3944	movl	$134810123,4(%esp)
3945	movl	$67438087,8(%esp)
3946	movl	$66051,12(%esp)
3947	movl	$1,%ebx
3948	xorl	%ebp,%ebp
3949	movl	%ebx,16(%esp)
3950	movl	%ebp,20(%esp)
3951	movl	%ebp,24(%esp)
3952	movl	%ebp,28(%esp)
3953	movdqa	(%esp),%xmm5
3954	movdqa	%xmm7,%xmm2
3955	movl	%edx,%ebp
3956	movl	%ecx,%ebx
3957.byte	102,15,56,0,253
3958	movups	(%edx),%xmm0
3959	movups	16(%edx),%xmm1
3960	leal	32(%edx),%edx
3961	xorps	%xmm0,%xmm2
3962.L032enc1_loop_5:
3963.byte	102,15,56,220,209
3964	decl	%ecx
3965	movups	(%edx),%xmm1
3966	leal	16(%edx),%edx
3967	jnz	.L032enc1_loop_5
3968.byte	102,15,56,221,209
3969	shll	$4,%ebx
3970	movl	$16,%ecx
3971	movups	(%esi),%xmm6
3972	paddq	16(%esp),%xmm7
3973	leal	16(%esi),%esi
3974	subl	%ebx,%ecx
3975	leal	32(%ebp,%ebx,1),%edx
3976	movl	%ecx,%ebx
3977	jmp	.L033ccm64_dec_outer
3978.align	16
3979.L033ccm64_dec_outer:
3980	xorps	%xmm2,%xmm6
3981	movdqa	%xmm7,%xmm2
3982	movups	%xmm6,(%edi)
3983	leal	16(%edi),%edi
3984.byte	102,15,56,0,213
3985	subl	$1,%eax
3986	jz	.L034ccm64_dec_break
3987	movups	(%ebp),%xmm0
3988	movl	%ebx,%ecx
3989	movups	16(%ebp),%xmm1
3990	xorps	%xmm0,%xmm6
3991	xorps	%xmm0,%xmm2
3992	xorps	%xmm6,%xmm3
3993	movups	32(%ebp),%xmm0
3994.L035ccm64_dec2_loop:
3995.byte	102,15,56,220,209
3996.byte	102,15,56,220,217
3997	movups	(%edx,%ecx,1),%xmm1
3998	addl	$32,%ecx
3999.byte	102,15,56,220,208
4000.byte	102,15,56,220,216
4001	movups	-16(%edx,%ecx,1),%xmm0
4002	jnz	.L035ccm64_dec2_loop
4003	movups	(%esi),%xmm6
4004	paddq	16(%esp),%xmm7
4005.byte	102,15,56,220,209
4006.byte	102,15,56,220,217
4007.byte	102,15,56,221,208
4008.byte	102,15,56,221,216
4009	leal	16(%esi),%esi
4010	jmp	.L033ccm64_dec_outer
4011.align	16
4012.L034ccm64_dec_break:
4013	movl	240(%ebp),%ecx
4014	movl	%ebp,%edx
4015	movups	(%edx),%xmm0
4016	movups	16(%edx),%xmm1
4017	xorps	%xmm0,%xmm6
4018	leal	32(%edx),%edx
4019	xorps	%xmm6,%xmm3
4020.L036enc1_loop_6:
4021.byte	102,15,56,220,217
4022	decl	%ecx
4023	movups	(%edx),%xmm1
4024	leal	16(%edx),%edx
4025	jnz	.L036enc1_loop_6
4026.byte	102,15,56,221,217
4027	movl	48(%esp),%esp
4028	movl	40(%esp),%edi
4029	movups	%xmm3,(%edi)
4030	pxor	%xmm0,%xmm0
4031	pxor	%xmm1,%xmm1
4032	pxor	%xmm2,%xmm2
4033	pxor	%xmm3,%xmm3
4034	pxor	%xmm4,%xmm4
4035	pxor	%xmm5,%xmm5
4036	pxor	%xmm6,%xmm6
4037	pxor	%xmm7,%xmm7
4038	popl	%edi
4039	popl	%esi
4040	popl	%ebx
4041	popl	%ebp
4042	ret
4043.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
4044.globl	aesni_ctr32_encrypt_blocks
4045.type	aesni_ctr32_encrypt_blocks,@function
4046.align	16
4047aesni_ctr32_encrypt_blocks:
4048.L_aesni_ctr32_encrypt_blocks_begin:
4049	pushl	%ebp
4050	pushl	%ebx
4051	pushl	%esi
4052	pushl	%edi
4053	movl	20(%esp),%esi
4054	movl	24(%esp),%edi
4055	movl	28(%esp),%eax
4056	movl	32(%esp),%edx
4057	movl	36(%esp),%ebx
4058	movl	%esp,%ebp
4059	subl	$88,%esp
4060	andl	$-16,%esp
4061	movl	%ebp,80(%esp)
4062	cmpl	$1,%eax
4063	je	.L037ctr32_one_shortcut
4064	movdqu	(%ebx),%xmm7
4065	movl	$202182159,(%esp)
4066	movl	$134810123,4(%esp)
4067	movl	$67438087,8(%esp)
4068	movl	$66051,12(%esp)
4069	movl	$6,%ecx
4070	xorl	%ebp,%ebp
4071	movl	%ecx,16(%esp)
4072	movl	%ecx,20(%esp)
4073	movl	%ecx,24(%esp)
4074	movl	%ebp,28(%esp)
4075.byte	102,15,58,22,251,3
4076.byte	102,15,58,34,253,3
4077	movl	240(%edx),%ecx
4078	bswap	%ebx
4079	pxor	%xmm0,%xmm0
4080	pxor	%xmm1,%xmm1
4081	movdqa	(%esp),%xmm2
4082.byte	102,15,58,34,195,0
4083	leal	3(%ebx),%ebp
4084.byte	102,15,58,34,205,0
4085	incl	%ebx
4086.byte	102,15,58,34,195,1
4087	incl	%ebp
4088.byte	102,15,58,34,205,1
4089	incl	%ebx
4090.byte	102,15,58,34,195,2
4091	incl	%ebp
4092.byte	102,15,58,34,205,2
4093	movdqa	%xmm0,48(%esp)
4094.byte	102,15,56,0,194
4095	movdqu	(%edx),%xmm6
4096	movdqa	%xmm1,64(%esp)
4097.byte	102,15,56,0,202
4098	pshufd	$192,%xmm0,%xmm2
4099	pshufd	$128,%xmm0,%xmm3
4100	cmpl	$6,%eax
4101	jb	.L038ctr32_tail
4102	pxor	%xmm6,%xmm7
4103	shll	$4,%ecx
4104	movl	$16,%ebx
4105	movdqa	%xmm7,32(%esp)
4106	movl	%edx,%ebp
4107	subl	%ecx,%ebx
4108	leal	32(%edx,%ecx,1),%edx
4109	subl	$6,%eax
4110	jmp	.L039ctr32_loop6
4111.align	16
4112.L039ctr32_loop6:
4113	pshufd	$64,%xmm0,%xmm4
4114	movdqa	32(%esp),%xmm0
4115	pshufd	$192,%xmm1,%xmm5
4116	pxor	%xmm0,%xmm2
4117	pshufd	$128,%xmm1,%xmm6
4118	pxor	%xmm0,%xmm3
4119	pshufd	$64,%xmm1,%xmm7
4120	movups	16(%ebp),%xmm1
4121	pxor	%xmm0,%xmm4
4122	pxor	%xmm0,%xmm5
4123.byte	102,15,56,220,209
4124	pxor	%xmm0,%xmm6
4125	pxor	%xmm0,%xmm7
4126.byte	102,15,56,220,217
4127	movups	32(%ebp),%xmm0
4128	movl	%ebx,%ecx
4129.byte	102,15,56,220,225
4130.byte	102,15,56,220,233
4131.byte	102,15,56,220,241
4132.byte	102,15,56,220,249
4133	call	.L_aesni_encrypt6_enter
4134	movups	(%esi),%xmm1
4135	movups	16(%esi),%xmm0
4136	xorps	%xmm1,%xmm2
4137	movups	32(%esi),%xmm1
4138	xorps	%xmm0,%xmm3
4139	movups	%xmm2,(%edi)
4140	movdqa	16(%esp),%xmm0
4141	xorps	%xmm1,%xmm4
4142	movdqa	64(%esp),%xmm1
4143	movups	%xmm3,16(%edi)
4144	movups	%xmm4,32(%edi)
4145	paddd	%xmm0,%xmm1
4146	paddd	48(%esp),%xmm0
4147	movdqa	(%esp),%xmm2
4148	movups	48(%esi),%xmm3
4149	movups	64(%esi),%xmm4
4150	xorps	%xmm3,%xmm5
4151	movups	80(%esi),%xmm3
4152	leal	96(%esi),%esi
4153	movdqa	%xmm0,48(%esp)
4154.byte	102,15,56,0,194
4155	xorps	%xmm4,%xmm6
4156	movups	%xmm5,48(%edi)
4157	xorps	%xmm3,%xmm7
4158	movdqa	%xmm1,64(%esp)
4159.byte	102,15,56,0,202
4160	movups	%xmm6,64(%edi)
4161	pshufd	$192,%xmm0,%xmm2
4162	movups	%xmm7,80(%edi)
4163	leal	96(%edi),%edi
4164	pshufd	$128,%xmm0,%xmm3
4165	subl	$6,%eax
4166	jnc	.L039ctr32_loop6
4167	addl	$6,%eax
4168	jz	.L040ctr32_ret
4169	movdqu	(%ebp),%xmm7
4170	movl	%ebp,%edx
4171	pxor	32(%esp),%xmm7
4172	movl	240(%ebp),%ecx
4173.L038ctr32_tail:
4174	por	%xmm7,%xmm2
4175	cmpl	$2,%eax
4176	jb	.L041ctr32_one
4177	pshufd	$64,%xmm0,%xmm4
4178	por	%xmm7,%xmm3
4179	je	.L042ctr32_two
4180	pshufd	$192,%xmm1,%xmm5
4181	por	%xmm7,%xmm4
4182	cmpl	$4,%eax
4183	jb	.L043ctr32_three
4184	pshufd	$128,%xmm1,%xmm6
4185	por	%xmm7,%xmm5
4186	je	.L044ctr32_four
4187	por	%xmm7,%xmm6
4188	call	_aesni_encrypt6
4189	movups	(%esi),%xmm1
4190	movups	16(%esi),%xmm0
4191	xorps	%xmm1,%xmm2
4192	movups	32(%esi),%xmm1
4193	xorps	%xmm0,%xmm3
4194	movups	48(%esi),%xmm0
4195	xorps	%xmm1,%xmm4
4196	movups	64(%esi),%xmm1
4197	xorps	%xmm0,%xmm5
4198	movups	%xmm2,(%edi)
4199	xorps	%xmm1,%xmm6
4200	movups	%xmm3,16(%edi)
4201	movups	%xmm4,32(%edi)
4202	movups	%xmm5,48(%edi)
4203	movups	%xmm6,64(%edi)
4204	jmp	.L040ctr32_ret
4205.align	16
4206.L037ctr32_one_shortcut:
4207	movups	(%ebx),%xmm2
4208	movl	240(%edx),%ecx
4209.L041ctr32_one:
4210	movups	(%edx),%xmm0
4211	movups	16(%edx),%xmm1
4212	leal	32(%edx),%edx
4213	xorps	%xmm0,%xmm2
4214.L045enc1_loop_7:
4215.byte	102,15,56,220,209
4216	decl	%ecx
4217	movups	(%edx),%xmm1
4218	leal	16(%edx),%edx
4219	jnz	.L045enc1_loop_7
4220.byte	102,15,56,221,209
4221	movups	(%esi),%xmm6
4222	xorps	%xmm2,%xmm6
4223	movups	%xmm6,(%edi)
4224	jmp	.L040ctr32_ret
4225.align	16
4226.L042ctr32_two:
4227	call	_aesni_encrypt2
4228	movups	(%esi),%xmm5
4229	movups	16(%esi),%xmm6
4230	xorps	%xmm5,%xmm2
4231	xorps	%xmm6,%xmm3
4232	movups	%xmm2,(%edi)
4233	movups	%xmm3,16(%edi)
4234	jmp	.L040ctr32_ret
4235.align	16
4236.L043ctr32_three:
4237	call	_aesni_encrypt3
4238	movups	(%esi),%xmm5
4239	movups	16(%esi),%xmm6
4240	xorps	%xmm5,%xmm2
4241	movups	32(%esi),%xmm7
4242	xorps	%xmm6,%xmm3
4243	movups	%xmm2,(%edi)
4244	xorps	%xmm7,%xmm4
4245	movups	%xmm3,16(%edi)
4246	movups	%xmm4,32(%edi)
4247	jmp	.L040ctr32_ret
4248.align	16
4249.L044ctr32_four:
4250	call	_aesni_encrypt4
4251	movups	(%esi),%xmm6
4252	movups	16(%esi),%xmm7
4253	movups	32(%esi),%xmm1
4254	xorps	%xmm6,%xmm2
4255	movups	48(%esi),%xmm0
4256	xorps	%xmm7,%xmm3
4257	movups	%xmm2,(%edi)
4258	xorps	%xmm1,%xmm4
4259	movups	%xmm3,16(%edi)
4260	xorps	%xmm0,%xmm5
4261	movups	%xmm4,32(%edi)
4262	movups	%xmm5,48(%edi)
4263.L040ctr32_ret:
4264	pxor	%xmm0,%xmm0
4265	pxor	%xmm1,%xmm1
4266	pxor	%xmm2,%xmm2
4267	pxor	%xmm3,%xmm3
4268	pxor	%xmm4,%xmm4
4269	movdqa	%xmm0,32(%esp)
4270	pxor	%xmm5,%xmm5
4271	movdqa	%xmm0,48(%esp)
4272	pxor	%xmm6,%xmm6
4273	movdqa	%xmm0,64(%esp)
4274	pxor	%xmm7,%xmm7
4275	movl	80(%esp),%esp
4276	popl	%edi
4277	popl	%esi
4278	popl	%ebx
4279	popl	%ebp
4280	ret
4281.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
4282.globl	aesni_xts_encrypt
4283.type	aesni_xts_encrypt,@function
4284.align	16
4285aesni_xts_encrypt:
4286.L_aesni_xts_encrypt_begin:
4287	pushl	%ebp
4288	pushl	%ebx
4289	pushl	%esi
4290	pushl	%edi
4291	movl	36(%esp),%edx
4292	movl	40(%esp),%esi
4293	movl	240(%edx),%ecx
4294	movups	(%esi),%xmm2
4295	movups	(%edx),%xmm0
4296	movups	16(%edx),%xmm1
4297	leal	32(%edx),%edx
4298	xorps	%xmm0,%xmm2
4299.L046enc1_loop_8:
4300.byte	102,15,56,220,209
4301	decl	%ecx
4302	movups	(%edx),%xmm1
4303	leal	16(%edx),%edx
4304	jnz	.L046enc1_loop_8
4305.byte	102,15,56,221,209
4306	movl	20(%esp),%esi
4307	movl	24(%esp),%edi
4308	movl	28(%esp),%eax
4309	movl	32(%esp),%edx
4310	movl	%esp,%ebp
4311	subl	$120,%esp
4312	movl	240(%edx),%ecx
4313	andl	$-16,%esp
4314	movl	$135,96(%esp)
4315	movl	$0,100(%esp)
4316	movl	$1,104(%esp)
4317	movl	$0,108(%esp)
4318	movl	%eax,112(%esp)
4319	movl	%ebp,116(%esp)
4320	movdqa	%xmm2,%xmm1
4321	pxor	%xmm0,%xmm0
4322	movdqa	96(%esp),%xmm3
4323	pcmpgtd	%xmm1,%xmm0
4324	andl	$-16,%eax
4325	movl	%edx,%ebp
4326	movl	%ecx,%ebx
4327	subl	$96,%eax
4328	jc	.L047xts_enc_short
4329	shll	$4,%ecx
4330	movl	$16,%ebx
4331	subl	%ecx,%ebx
4332	leal	32(%edx,%ecx,1),%edx
4333	jmp	.L048xts_enc_loop6
4334.align	16
4335.L048xts_enc_loop6:
4336	pshufd	$19,%xmm0,%xmm2
4337	pxor	%xmm0,%xmm0
4338	movdqa	%xmm1,(%esp)
4339	paddq	%xmm1,%xmm1
4340	pand	%xmm3,%xmm2
4341	pcmpgtd	%xmm1,%xmm0
4342	pxor	%xmm2,%xmm1
4343	pshufd	$19,%xmm0,%xmm2
4344	pxor	%xmm0,%xmm0
4345	movdqa	%xmm1,16(%esp)
4346	paddq	%xmm1,%xmm1
4347	pand	%xmm3,%xmm2
4348	pcmpgtd	%xmm1,%xmm0
4349	pxor	%xmm2,%xmm1
4350	pshufd	$19,%xmm0,%xmm2
4351	pxor	%xmm0,%xmm0
4352	movdqa	%xmm1,32(%esp)
4353	paddq	%xmm1,%xmm1
4354	pand	%xmm3,%xmm2
4355	pcmpgtd	%xmm1,%xmm0
4356	pxor	%xmm2,%xmm1
4357	pshufd	$19,%xmm0,%xmm2
4358	pxor	%xmm0,%xmm0
4359	movdqa	%xmm1,48(%esp)
4360	paddq	%xmm1,%xmm1
4361	pand	%xmm3,%xmm2
4362	pcmpgtd	%xmm1,%xmm0
4363	pxor	%xmm2,%xmm1
4364	pshufd	$19,%xmm0,%xmm7
4365	movdqa	%xmm1,64(%esp)
4366	paddq	%xmm1,%xmm1
4367	movups	(%ebp),%xmm0
4368	pand	%xmm3,%xmm7
4369	movups	(%esi),%xmm2
4370	pxor	%xmm1,%xmm7
4371	movl	%ebx,%ecx
4372	movdqu	16(%esi),%xmm3
4373	xorps	%xmm0,%xmm2
4374	movdqu	32(%esi),%xmm4
4375	pxor	%xmm0,%xmm3
4376	movdqu	48(%esi),%xmm5
4377	pxor	%xmm0,%xmm4
4378	movdqu	64(%esi),%xmm6
4379	pxor	%xmm0,%xmm5
4380	movdqu	80(%esi),%xmm1
4381	pxor	%xmm0,%xmm6
4382	leal	96(%esi),%esi
4383	pxor	(%esp),%xmm2
4384	movdqa	%xmm7,80(%esp)
4385	pxor	%xmm1,%xmm7
4386	movups	16(%ebp),%xmm1
4387	pxor	16(%esp),%xmm3
4388	pxor	32(%esp),%xmm4
4389.byte	102,15,56,220,209
4390	pxor	48(%esp),%xmm5
4391	pxor	64(%esp),%xmm6
4392.byte	102,15,56,220,217
4393	pxor	%xmm0,%xmm7
4394	movups	32(%ebp),%xmm0
4395.byte	102,15,56,220,225
4396.byte	102,15,56,220,233
4397.byte	102,15,56,220,241
4398.byte	102,15,56,220,249
4399	call	.L_aesni_encrypt6_enter
4400	movdqa	80(%esp),%xmm1
4401	pxor	%xmm0,%xmm0
4402	xorps	(%esp),%xmm2
4403	pcmpgtd	%xmm1,%xmm0
4404	xorps	16(%esp),%xmm3
4405	movups	%xmm2,(%edi)
4406	xorps	32(%esp),%xmm4
4407	movups	%xmm3,16(%edi)
4408	xorps	48(%esp),%xmm5
4409	movups	%xmm4,32(%edi)
4410	xorps	64(%esp),%xmm6
4411	movups	%xmm5,48(%edi)
4412	xorps	%xmm1,%xmm7
4413	movups	%xmm6,64(%edi)
4414	pshufd	$19,%xmm0,%xmm2
4415	movups	%xmm7,80(%edi)
4416	leal	96(%edi),%edi
4417	movdqa	96(%esp),%xmm3
4418	pxor	%xmm0,%xmm0
4419	paddq	%xmm1,%xmm1
4420	pand	%xmm3,%xmm2
4421	pcmpgtd	%xmm1,%xmm0
4422	pxor	%xmm2,%xmm1
4423	subl	$96,%eax
4424	jnc	.L048xts_enc_loop6
4425	movl	240(%ebp),%ecx
4426	movl	%ebp,%edx
4427	movl	%ecx,%ebx
4428.L047xts_enc_short:
4429	addl	$96,%eax
4430	jz	.L049xts_enc_done6x
4431	movdqa	%xmm1,%xmm5
4432	cmpl	$32,%eax
4433	jb	.L050xts_enc_one
4434	pshufd	$19,%xmm0,%xmm2
4435	pxor	%xmm0,%xmm0
4436	paddq	%xmm1,%xmm1
4437	pand	%xmm3,%xmm2
4438	pcmpgtd	%xmm1,%xmm0
4439	pxor	%xmm2,%xmm1
4440	je	.L051xts_enc_two
4441	pshufd	$19,%xmm0,%xmm2
4442	pxor	%xmm0,%xmm0
4443	movdqa	%xmm1,%xmm6
4444	paddq	%xmm1,%xmm1
4445	pand	%xmm3,%xmm2
4446	pcmpgtd	%xmm1,%xmm0
4447	pxor	%xmm2,%xmm1
4448	cmpl	$64,%eax
4449	jb	.L052xts_enc_three
4450	pshufd	$19,%xmm0,%xmm2
4451	pxor	%xmm0,%xmm0
4452	movdqa	%xmm1,%xmm7
4453	paddq	%xmm1,%xmm1
4454	pand	%xmm3,%xmm2
4455	pcmpgtd	%xmm1,%xmm0
4456	pxor	%xmm2,%xmm1
4457	movdqa	%xmm5,(%esp)
4458	movdqa	%xmm6,16(%esp)
4459	je	.L053xts_enc_four
4460	movdqa	%xmm7,32(%esp)
4461	pshufd	$19,%xmm0,%xmm7
4462	movdqa	%xmm1,48(%esp)
4463	paddq	%xmm1,%xmm1
4464	pand	%xmm3,%xmm7
4465	pxor	%xmm1,%xmm7
4466	movdqu	(%esi),%xmm2
4467	movdqu	16(%esi),%xmm3
4468	movdqu	32(%esi),%xmm4
4469	pxor	(%esp),%xmm2
4470	movdqu	48(%esi),%xmm5
4471	pxor	16(%esp),%xmm3
4472	movdqu	64(%esi),%xmm6
4473	pxor	32(%esp),%xmm4
4474	leal	80(%esi),%esi
4475	pxor	48(%esp),%xmm5
4476	movdqa	%xmm7,64(%esp)
4477	pxor	%xmm7,%xmm6
4478	call	_aesni_encrypt6
4479	movaps	64(%esp),%xmm1
4480	xorps	(%esp),%xmm2
4481	xorps	16(%esp),%xmm3
4482	xorps	32(%esp),%xmm4
4483	movups	%xmm2,(%edi)
4484	xorps	48(%esp),%xmm5
4485	movups	%xmm3,16(%edi)
4486	xorps	%xmm1,%xmm6
4487	movups	%xmm4,32(%edi)
4488	movups	%xmm5,48(%edi)
4489	movups	%xmm6,64(%edi)
4490	leal	80(%edi),%edi
4491	jmp	.L054xts_enc_done
4492.align	16
4493.L050xts_enc_one:
4494	movups	(%esi),%xmm2
4495	leal	16(%esi),%esi
4496	xorps	%xmm5,%xmm2
4497	movups	(%edx),%xmm0
4498	movups	16(%edx),%xmm1
4499	leal	32(%edx),%edx
4500	xorps	%xmm0,%xmm2
4501.L055enc1_loop_9:
4502.byte	102,15,56,220,209
4503	decl	%ecx
4504	movups	(%edx),%xmm1
4505	leal	16(%edx),%edx
4506	jnz	.L055enc1_loop_9
4507.byte	102,15,56,221,209
4508	xorps	%xmm5,%xmm2
4509	movups	%xmm2,(%edi)
4510	leal	16(%edi),%edi
4511	movdqa	%xmm5,%xmm1
4512	jmp	.L054xts_enc_done
4513.align	16
4514.L051xts_enc_two:
4515	movaps	%xmm1,%xmm6
4516	movups	(%esi),%xmm2
4517	movups	16(%esi),%xmm3
4518	leal	32(%esi),%esi
4519	xorps	%xmm5,%xmm2
4520	xorps	%xmm6,%xmm3
4521	call	_aesni_encrypt2
4522	xorps	%xmm5,%xmm2
4523	xorps	%xmm6,%xmm3
4524	movups	%xmm2,(%edi)
4525	movups	%xmm3,16(%edi)
4526	leal	32(%edi),%edi
4527	movdqa	%xmm6,%xmm1
4528	jmp	.L054xts_enc_done
4529.align	16
4530.L052xts_enc_three:
4531	movaps	%xmm1,%xmm7
4532	movups	(%esi),%xmm2
4533	movups	16(%esi),%xmm3
4534	movups	32(%esi),%xmm4
4535	leal	48(%esi),%esi
4536	xorps	%xmm5,%xmm2
4537	xorps	%xmm6,%xmm3
4538	xorps	%xmm7,%xmm4
4539	call	_aesni_encrypt3
4540	xorps	%xmm5,%xmm2
4541	xorps	%xmm6,%xmm3
4542	xorps	%xmm7,%xmm4
4543	movups	%xmm2,(%edi)
4544	movups	%xmm3,16(%edi)
4545	movups	%xmm4,32(%edi)
4546	leal	48(%edi),%edi
4547	movdqa	%xmm7,%xmm1
4548	jmp	.L054xts_enc_done
4549.align	16
4550.L053xts_enc_four:
4551	movaps	%xmm1,%xmm6
4552	movups	(%esi),%xmm2
4553	movups	16(%esi),%xmm3
4554	movups	32(%esi),%xmm4
4555	xorps	(%esp),%xmm2
4556	movups	48(%esi),%xmm5
4557	leal	64(%esi),%esi
4558	xorps	16(%esp),%xmm3
4559	xorps	%xmm7,%xmm4
4560	xorps	%xmm6,%xmm5
4561	call	_aesni_encrypt4
4562	xorps	(%esp),%xmm2
4563	xorps	16(%esp),%xmm3
4564	xorps	%xmm7,%xmm4
4565	movups	%xmm2,(%edi)
4566	xorps	%xmm6,%xmm5
4567	movups	%xmm3,16(%edi)
4568	movups	%xmm4,32(%edi)
4569	movups	%xmm5,48(%edi)
4570	leal	64(%edi),%edi
4571	movdqa	%xmm6,%xmm1
4572	jmp	.L054xts_enc_done
4573.align	16
4574.L049xts_enc_done6x:
4575	movl	112(%esp),%eax
4576	andl	$15,%eax
4577	jz	.L056xts_enc_ret
4578	movdqa	%xmm1,%xmm5
4579	movl	%eax,112(%esp)
4580	jmp	.L057xts_enc_steal
4581.align	16
4582.L054xts_enc_done:
4583	movl	112(%esp),%eax
4584	pxor	%xmm0,%xmm0
4585	andl	$15,%eax
4586	jz	.L056xts_enc_ret
4587	pcmpgtd	%xmm1,%xmm0
4588	movl	%eax,112(%esp)
4589	pshufd	$19,%xmm0,%xmm5
4590	paddq	%xmm1,%xmm1
4591	pand	96(%esp),%xmm5
4592	pxor	%xmm1,%xmm5
4593.L057xts_enc_steal:
4594	movzbl	(%esi),%ecx
4595	movzbl	-16(%edi),%edx
4596	leal	1(%esi),%esi
4597	movb	%cl,-16(%edi)
4598	movb	%dl,(%edi)
4599	leal	1(%edi),%edi
4600	subl	$1,%eax
4601	jnz	.L057xts_enc_steal
4602	subl	112(%esp),%edi
4603	movl	%ebp,%edx
4604	movl	%ebx,%ecx
4605	movups	-16(%edi),%xmm2
4606	xorps	%xmm5,%xmm2
4607	movups	(%edx),%xmm0
4608	movups	16(%edx),%xmm1
4609	leal	32(%edx),%edx
4610	xorps	%xmm0,%xmm2
4611.L058enc1_loop_10:
4612.byte	102,15,56,220,209
4613	decl	%ecx
4614	movups	(%edx),%xmm1
4615	leal	16(%edx),%edx
4616	jnz	.L058enc1_loop_10
4617.byte	102,15,56,221,209
4618	xorps	%xmm5,%xmm2
4619	movups	%xmm2,-16(%edi)
4620.L056xts_enc_ret:
4621	pxor	%xmm0,%xmm0
4622	pxor	%xmm1,%xmm1
4623	pxor	%xmm2,%xmm2
4624	movdqa	%xmm0,(%esp)
4625	pxor	%xmm3,%xmm3
4626	movdqa	%xmm0,16(%esp)
4627	pxor	%xmm4,%xmm4
4628	movdqa	%xmm0,32(%esp)
4629	pxor	%xmm5,%xmm5
4630	movdqa	%xmm0,48(%esp)
4631	pxor	%xmm6,%xmm6
4632	movdqa	%xmm0,64(%esp)
4633	pxor	%xmm7,%xmm7
4634	movdqa	%xmm0,80(%esp)
4635	movl	116(%esp),%esp
4636	popl	%edi
4637	popl	%esi
4638	popl	%ebx
4639	popl	%ebp
4640	ret
4641.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
4642.globl	aesni_xts_decrypt
4643.type	aesni_xts_decrypt,@function
4644.align	16
4645aesni_xts_decrypt:
4646.L_aesni_xts_decrypt_begin:
4647	pushl	%ebp
4648	pushl	%ebx
4649	pushl	%esi
4650	pushl	%edi
4651	movl	36(%esp),%edx
4652	movl	40(%esp),%esi
4653	movl	240(%edx),%ecx
4654	movups	(%esi),%xmm2
4655	movups	(%edx),%xmm0
4656	movups	16(%edx),%xmm1
4657	leal	32(%edx),%edx
4658	xorps	%xmm0,%xmm2
4659.L059enc1_loop_11:
4660.byte	102,15,56,220,209
4661	decl	%ecx
4662	movups	(%edx),%xmm1
4663	leal	16(%edx),%edx
4664	jnz	.L059enc1_loop_11
4665.byte	102,15,56,221,209
4666	movl	20(%esp),%esi
4667	movl	24(%esp),%edi
4668	movl	28(%esp),%eax
4669	movl	32(%esp),%edx
4670	movl	%esp,%ebp
4671	subl	$120,%esp
4672	andl	$-16,%esp
4673	xorl	%ebx,%ebx
4674	testl	$15,%eax
4675	setnz	%bl
4676	shll	$4,%ebx
4677	subl	%ebx,%eax
4678	movl	$135,96(%esp)
4679	movl	$0,100(%esp)
4680	movl	$1,104(%esp)
4681	movl	$0,108(%esp)
4682	movl	%eax,112(%esp)
4683	movl	%ebp,116(%esp)
4684	movl	240(%edx),%ecx
4685	movl	%edx,%ebp
4686	movl	%ecx,%ebx
4687	movdqa	%xmm2,%xmm1
4688	pxor	%xmm0,%xmm0
4689	movdqa	96(%esp),%xmm3
4690	pcmpgtd	%xmm1,%xmm0
4691	andl	$-16,%eax
4692	subl	$96,%eax
4693	jc	.L060xts_dec_short
4694	shll	$4,%ecx
4695	movl	$16,%ebx
4696	subl	%ecx,%ebx
4697	leal	32(%edx,%ecx,1),%edx
4698	jmp	.L061xts_dec_loop6
4699.align	16
4700.L061xts_dec_loop6:
4701	pshufd	$19,%xmm0,%xmm2
4702	pxor	%xmm0,%xmm0
4703	movdqa	%xmm1,(%esp)
4704	paddq	%xmm1,%xmm1
4705	pand	%xmm3,%xmm2
4706	pcmpgtd	%xmm1,%xmm0
4707	pxor	%xmm2,%xmm1
4708	pshufd	$19,%xmm0,%xmm2
4709	pxor	%xmm0,%xmm0
4710	movdqa	%xmm1,16(%esp)
4711	paddq	%xmm1,%xmm1
4712	pand	%xmm3,%xmm2
4713	pcmpgtd	%xmm1,%xmm0
4714	pxor	%xmm2,%xmm1
4715	pshufd	$19,%xmm0,%xmm2
4716	pxor	%xmm0,%xmm0
4717	movdqa	%xmm1,32(%esp)
4718	paddq	%xmm1,%xmm1
4719	pand	%xmm3,%xmm2
4720	pcmpgtd	%xmm1,%xmm0
4721	pxor	%xmm2,%xmm1
4722	pshufd	$19,%xmm0,%xmm2
4723	pxor	%xmm0,%xmm0
4724	movdqa	%xmm1,48(%esp)
4725	paddq	%xmm1,%xmm1
4726	pand	%xmm3,%xmm2
4727	pcmpgtd	%xmm1,%xmm0
4728	pxor	%xmm2,%xmm1
4729	pshufd	$19,%xmm0,%xmm7
4730	movdqa	%xmm1,64(%esp)
4731	paddq	%xmm1,%xmm1
4732	movups	(%ebp),%xmm0
4733	pand	%xmm3,%xmm7
4734	movups	(%esi),%xmm2
4735	pxor	%xmm1,%xmm7
4736	movl	%ebx,%ecx
4737	movdqu	16(%esi),%xmm3
4738	xorps	%xmm0,%xmm2
4739	movdqu	32(%esi),%xmm4
4740	pxor	%xmm0,%xmm3
4741	movdqu	48(%esi),%xmm5
4742	pxor	%xmm0,%xmm4
4743	movdqu	64(%esi),%xmm6
4744	pxor	%xmm0,%xmm5
4745	movdqu	80(%esi),%xmm1
4746	pxor	%xmm0,%xmm6
4747	leal	96(%esi),%esi
4748	pxor	(%esp),%xmm2
4749	movdqa	%xmm7,80(%esp)
4750	pxor	%xmm1,%xmm7
4751	movups	16(%ebp),%xmm1
4752	pxor	16(%esp),%xmm3
4753	pxor	32(%esp),%xmm4
4754.byte	102,15,56,222,209
4755	pxor	48(%esp),%xmm5
4756	pxor	64(%esp),%xmm6
4757.byte	102,15,56,222,217
4758	pxor	%xmm0,%xmm7
4759	movups	32(%ebp),%xmm0
4760.byte	102,15,56,222,225
4761.byte	102,15,56,222,233
4762.byte	102,15,56,222,241
4763.byte	102,15,56,222,249
4764	call	.L_aesni_decrypt6_enter
4765	movdqa	80(%esp),%xmm1
4766	pxor	%xmm0,%xmm0
4767	xorps	(%esp),%xmm2
4768	pcmpgtd	%xmm1,%xmm0
4769	xorps	16(%esp),%xmm3
4770	movups	%xmm2,(%edi)
4771	xorps	32(%esp),%xmm4
4772	movups	%xmm3,16(%edi)
4773	xorps	48(%esp),%xmm5
4774	movups	%xmm4,32(%edi)
4775	xorps	64(%esp),%xmm6
4776	movups	%xmm5,48(%edi)
4777	xorps	%xmm1,%xmm7
4778	movups	%xmm6,64(%edi)
4779	pshufd	$19,%xmm0,%xmm2
4780	movups	%xmm7,80(%edi)
4781	leal	96(%edi),%edi
4782	movdqa	96(%esp),%xmm3
4783	pxor	%xmm0,%xmm0
4784	paddq	%xmm1,%xmm1
4785	pand	%xmm3,%xmm2
4786	pcmpgtd	%xmm1,%xmm0
4787	pxor	%xmm2,%xmm1
4788	subl	$96,%eax
4789	jnc	.L061xts_dec_loop6
4790	movl	240(%ebp),%ecx
4791	movl	%ebp,%edx
4792	movl	%ecx,%ebx
4793.L060xts_dec_short:
4794	addl	$96,%eax
4795	jz	.L062xts_dec_done6x
4796	movdqa	%xmm1,%xmm5
4797	cmpl	$32,%eax
4798	jb	.L063xts_dec_one
4799	pshufd	$19,%xmm0,%xmm2
4800	pxor	%xmm0,%xmm0
4801	paddq	%xmm1,%xmm1
4802	pand	%xmm3,%xmm2
4803	pcmpgtd	%xmm1,%xmm0
4804	pxor	%xmm2,%xmm1
4805	je	.L064xts_dec_two
4806	pshufd	$19,%xmm0,%xmm2
4807	pxor	%xmm0,%xmm0
4808	movdqa	%xmm1,%xmm6
4809	paddq	%xmm1,%xmm1
4810	pand	%xmm3,%xmm2
4811	pcmpgtd	%xmm1,%xmm0
4812	pxor	%xmm2,%xmm1
4813	cmpl	$64,%eax
4814	jb	.L065xts_dec_three
4815	pshufd	$19,%xmm0,%xmm2
4816	pxor	%xmm0,%xmm0
4817	movdqa	%xmm1,%xmm7
4818	paddq	%xmm1,%xmm1
4819	pand	%xmm3,%xmm2
4820	pcmpgtd	%xmm1,%xmm0
4821	pxor	%xmm2,%xmm1
4822	movdqa	%xmm5,(%esp)
4823	movdqa	%xmm6,16(%esp)
4824	je	.L066xts_dec_four
4825	movdqa	%xmm7,32(%esp)
4826	pshufd	$19,%xmm0,%xmm7
4827	movdqa	%xmm1,48(%esp)
4828	paddq	%xmm1,%xmm1
4829	pand	%xmm3,%xmm7
4830	pxor	%xmm1,%xmm7
4831	movdqu	(%esi),%xmm2
4832	movdqu	16(%esi),%xmm3
4833	movdqu	32(%esi),%xmm4
4834	pxor	(%esp),%xmm2
4835	movdqu	48(%esi),%xmm5
4836	pxor	16(%esp),%xmm3
4837	movdqu	64(%esi),%xmm6
4838	pxor	32(%esp),%xmm4
4839	leal	80(%esi),%esi
4840	pxor	48(%esp),%xmm5
4841	movdqa	%xmm7,64(%esp)
4842	pxor	%xmm7,%xmm6
4843	call	_aesni_decrypt6
4844	movaps	64(%esp),%xmm1
4845	xorps	(%esp),%xmm2
4846	xorps	16(%esp),%xmm3
4847	xorps	32(%esp),%xmm4
4848	movups	%xmm2,(%edi)
4849	xorps	48(%esp),%xmm5
4850	movups	%xmm3,16(%edi)
4851	xorps	%xmm1,%xmm6
4852	movups	%xmm4,32(%edi)
4853	movups	%xmm5,48(%edi)
4854	movups	%xmm6,64(%edi)
4855	leal	80(%edi),%edi
4856	jmp	.L067xts_dec_done
4857.align	16
4858.L063xts_dec_one:
4859	movups	(%esi),%xmm2
4860	leal	16(%esi),%esi
4861	xorps	%xmm5,%xmm2
4862	movups	(%edx),%xmm0
4863	movups	16(%edx),%xmm1
4864	leal	32(%edx),%edx
4865	xorps	%xmm0,%xmm2
4866.L068dec1_loop_12:
4867.byte	102,15,56,222,209
4868	decl	%ecx
4869	movups	(%edx),%xmm1
4870	leal	16(%edx),%edx
4871	jnz	.L068dec1_loop_12
4872.byte	102,15,56,223,209
4873	xorps	%xmm5,%xmm2
4874	movups	%xmm2,(%edi)
4875	leal	16(%edi),%edi
4876	movdqa	%xmm5,%xmm1
4877	jmp	.L067xts_dec_done
4878.align	16
4879.L064xts_dec_two:
4880	movaps	%xmm1,%xmm6
4881	movups	(%esi),%xmm2
4882	movups	16(%esi),%xmm3
4883	leal	32(%esi),%esi
4884	xorps	%xmm5,%xmm2
4885	xorps	%xmm6,%xmm3
4886	call	_aesni_decrypt2
4887	xorps	%xmm5,%xmm2
4888	xorps	%xmm6,%xmm3
4889	movups	%xmm2,(%edi)
4890	movups	%xmm3,16(%edi)
4891	leal	32(%edi),%edi
4892	movdqa	%xmm6,%xmm1
4893	jmp	.L067xts_dec_done
4894.align	16
4895.L065xts_dec_three:
4896	movaps	%xmm1,%xmm7
4897	movups	(%esi),%xmm2
4898	movups	16(%esi),%xmm3
4899	movups	32(%esi),%xmm4
4900	leal	48(%esi),%esi
4901	xorps	%xmm5,%xmm2
4902	xorps	%xmm6,%xmm3
4903	xorps	%xmm7,%xmm4
4904	call	_aesni_decrypt3
4905	xorps	%xmm5,%xmm2
4906	xorps	%xmm6,%xmm3
4907	xorps	%xmm7,%xmm4
4908	movups	%xmm2,(%edi)
4909	movups	%xmm3,16(%edi)
4910	movups	%xmm4,32(%edi)
4911	leal	48(%edi),%edi
4912	movdqa	%xmm7,%xmm1
4913	jmp	.L067xts_dec_done
4914.align	16
4915.L066xts_dec_four:
4916	movaps	%xmm1,%xmm6
4917	movups	(%esi),%xmm2
4918	movups	16(%esi),%xmm3
4919	movups	32(%esi),%xmm4
4920	xorps	(%esp),%xmm2
4921	movups	48(%esi),%xmm5
4922	leal	64(%esi),%esi
4923	xorps	16(%esp),%xmm3
4924	xorps	%xmm7,%xmm4
4925	xorps	%xmm6,%xmm5
4926	call	_aesni_decrypt4
4927	xorps	(%esp),%xmm2
4928	xorps	16(%esp),%xmm3
4929	xorps	%xmm7,%xmm4
4930	movups	%xmm2,(%edi)
4931	xorps	%xmm6,%xmm5
4932	movups	%xmm3,16(%edi)
4933	movups	%xmm4,32(%edi)
4934	movups	%xmm5,48(%edi)
4935	leal	64(%edi),%edi
4936	movdqa	%xmm6,%xmm1
4937	jmp	.L067xts_dec_done
4938.align	16
4939.L062xts_dec_done6x:
4940	movl	112(%esp),%eax
4941	andl	$15,%eax
4942	jz	.L069xts_dec_ret
4943	movl	%eax,112(%esp)
4944	jmp	.L070xts_dec_only_one_more
4945.align	16
4946.L067xts_dec_done:
4947	movl	112(%esp),%eax
4948	pxor	%xmm0,%xmm0
4949	andl	$15,%eax
4950	jz	.L069xts_dec_ret
4951	pcmpgtd	%xmm1,%xmm0
4952	movl	%eax,112(%esp)
4953	pshufd	$19,%xmm0,%xmm2
4954	pxor	%xmm0,%xmm0
4955	movdqa	96(%esp),%xmm3
4956	paddq	%xmm1,%xmm1
4957	pand	%xmm3,%xmm2
4958	pcmpgtd	%xmm1,%xmm0
4959	pxor	%xmm2,%xmm1
4960.L070xts_dec_only_one_more:
4961	pshufd	$19,%xmm0,%xmm5
4962	movdqa	%xmm1,%xmm6
4963	paddq	%xmm1,%xmm1
4964	pand	%xmm3,%xmm5
4965	pxor	%xmm1,%xmm5
4966	movl	%ebp,%edx
4967	movl	%ebx,%ecx
4968	movups	(%esi),%xmm2
4969	xorps	%xmm5,%xmm2
4970	movups	(%edx),%xmm0
4971	movups	16(%edx),%xmm1
4972	leal	32(%edx),%edx
4973	xorps	%xmm0,%xmm2
4974.L071dec1_loop_13:
4975.byte	102,15,56,222,209
4976	decl	%ecx
4977	movups	(%edx),%xmm1
4978	leal	16(%edx),%edx
4979	jnz	.L071dec1_loop_13
4980.byte	102,15,56,223,209
4981	xorps	%xmm5,%xmm2
4982	movups	%xmm2,(%edi)
4983.L072xts_dec_steal:
4984	movzbl	16(%esi),%ecx
4985	movzbl	(%edi),%edx
4986	leal	1(%esi),%esi
4987	movb	%cl,(%edi)
4988	movb	%dl,16(%edi)
4989	leal	1(%edi),%edi
4990	subl	$1,%eax
4991	jnz	.L072xts_dec_steal
4992	subl	112(%esp),%edi
4993	movl	%ebp,%edx
4994	movl	%ebx,%ecx
4995	movups	(%edi),%xmm2
4996	xorps	%xmm6,%xmm2
4997	movups	(%edx),%xmm0
4998	movups	16(%edx),%xmm1
4999	leal	32(%edx),%edx
5000	xorps	%xmm0,%xmm2
5001.L073dec1_loop_14:
5002.byte	102,15,56,222,209
5003	decl	%ecx
5004	movups	(%edx),%xmm1
5005	leal	16(%edx),%edx
5006	jnz	.L073dec1_loop_14
5007.byte	102,15,56,223,209
5008	xorps	%xmm6,%xmm2
5009	movups	%xmm2,(%edi)
5010.L069xts_dec_ret:
5011	pxor	%xmm0,%xmm0
5012	pxor	%xmm1,%xmm1
5013	pxor	%xmm2,%xmm2
5014	movdqa	%xmm0,(%esp)
5015	pxor	%xmm3,%xmm3
5016	movdqa	%xmm0,16(%esp)
5017	pxor	%xmm4,%xmm4
5018	movdqa	%xmm0,32(%esp)
5019	pxor	%xmm5,%xmm5
5020	movdqa	%xmm0,48(%esp)
5021	pxor	%xmm6,%xmm6
5022	movdqa	%xmm0,64(%esp)
5023	pxor	%xmm7,%xmm7
5024	movdqa	%xmm0,80(%esp)
5025	movl	116(%esp),%esp
5026	popl	%edi
5027	popl	%esi
5028	popl	%ebx
5029	popl	%ebp
5030	ret
5031.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
5032.globl	aesni_ocb_encrypt
5033.type	aesni_ocb_encrypt,@function
5034.align	16
5035aesni_ocb_encrypt:
5036.L_aesni_ocb_encrypt_begin:
5037	pushl	%ebp
5038	pushl	%ebx
5039	pushl	%esi
5040	pushl	%edi
5041	movl	40(%esp),%ecx
5042	movl	48(%esp),%ebx
5043	movl	20(%esp),%esi
5044	movl	24(%esp),%edi
5045	movl	28(%esp),%eax
5046	movl	32(%esp),%edx
5047	movdqu	(%ecx),%xmm0
5048	movl	36(%esp),%ebp
5049	movdqu	(%ebx),%xmm1
5050	movl	44(%esp),%ebx
5051	movl	%esp,%ecx
5052	subl	$132,%esp
5053	andl	$-16,%esp
5054	subl	%esi,%edi
5055	shll	$4,%eax
5056	leal	-96(%esi,%eax,1),%eax
5057	movl	%edi,120(%esp)
5058	movl	%eax,124(%esp)
5059	movl	%ecx,128(%esp)
5060	movl	240(%edx),%ecx
5061	testl	$1,%ebp
5062	jnz	.L074odd
5063	bsfl	%ebp,%eax
5064	addl	$1,%ebp
5065	shll	$4,%eax
5066	movdqu	(%ebx,%eax,1),%xmm7
5067	movl	%edx,%eax
5068	movdqu	(%esi),%xmm2
5069	leal	16(%esi),%esi
5070	pxor	%xmm0,%xmm7
5071	pxor	%xmm2,%xmm1
5072	pxor	%xmm7,%xmm2
5073	movdqa	%xmm1,%xmm6
5074	movups	(%edx),%xmm0
5075	movups	16(%edx),%xmm1
5076	leal	32(%edx),%edx
5077	xorps	%xmm0,%xmm2
5078.L075enc1_loop_15:
5079.byte	102,15,56,220,209
5080	decl	%ecx
5081	movups	(%edx),%xmm1
5082	leal	16(%edx),%edx
5083	jnz	.L075enc1_loop_15
5084.byte	102,15,56,221,209
5085	xorps	%xmm7,%xmm2
5086	movdqa	%xmm7,%xmm0
5087	movdqa	%xmm6,%xmm1
5088	movups	%xmm2,-16(%edi,%esi,1)
5089	movl	240(%eax),%ecx
5090	movl	%eax,%edx
5091	movl	124(%esp),%eax
5092.L074odd:
5093	shll	$4,%ecx
5094	movl	$16,%edi
5095	subl	%ecx,%edi
5096	movl	%edx,112(%esp)
5097	leal	32(%edx,%ecx,1),%edx
5098	movl	%edi,116(%esp)
5099	cmpl	%eax,%esi
5100	ja	.L076short
5101	jmp	.L077grandloop
5102.align	32
5103.L077grandloop:
5104	leal	1(%ebp),%ecx
5105	leal	3(%ebp),%eax
5106	leal	5(%ebp),%edi
5107	addl	$6,%ebp
5108	bsfl	%ecx,%ecx
5109	bsfl	%eax,%eax
5110	bsfl	%edi,%edi
5111	shll	$4,%ecx
5112	shll	$4,%eax
5113	shll	$4,%edi
5114	movdqu	(%ebx),%xmm2
5115	movdqu	(%ebx,%ecx,1),%xmm3
5116	movl	116(%esp),%ecx
5117	movdqa	%xmm2,%xmm4
5118	movdqu	(%ebx,%eax,1),%xmm5
5119	movdqa	%xmm2,%xmm6
5120	movdqu	(%ebx,%edi,1),%xmm7
5121	pxor	%xmm0,%xmm2
5122	pxor	%xmm2,%xmm3
5123	movdqa	%xmm2,(%esp)
5124	pxor	%xmm3,%xmm4
5125	movdqa	%xmm3,16(%esp)
5126	pxor	%xmm4,%xmm5
5127	movdqa	%xmm4,32(%esp)
5128	pxor	%xmm5,%xmm6
5129	movdqa	%xmm5,48(%esp)
5130	pxor	%xmm6,%xmm7
5131	movdqa	%xmm6,64(%esp)
5132	movdqa	%xmm7,80(%esp)
5133	movups	-48(%edx,%ecx,1),%xmm0
5134	movdqu	(%esi),%xmm2
5135	movdqu	16(%esi),%xmm3
5136	movdqu	32(%esi),%xmm4
5137	movdqu	48(%esi),%xmm5
5138	movdqu	64(%esi),%xmm6
5139	movdqu	80(%esi),%xmm7
5140	leal	96(%esi),%esi
5141	pxor	%xmm2,%xmm1
5142	pxor	%xmm0,%xmm2
5143	pxor	%xmm3,%xmm1
5144	pxor	%xmm0,%xmm3
5145	pxor	%xmm4,%xmm1
5146	pxor	%xmm0,%xmm4
5147	pxor	%xmm5,%xmm1
5148	pxor	%xmm0,%xmm5
5149	pxor	%xmm6,%xmm1
5150	pxor	%xmm0,%xmm6
5151	pxor	%xmm7,%xmm1
5152	pxor	%xmm0,%xmm7
5153	movdqa	%xmm1,96(%esp)
5154	movups	-32(%edx,%ecx,1),%xmm1
5155	pxor	(%esp),%xmm2
5156	pxor	16(%esp),%xmm3
5157	pxor	32(%esp),%xmm4
5158	pxor	48(%esp),%xmm5
5159	pxor	64(%esp),%xmm6
5160	pxor	80(%esp),%xmm7
5161	movups	-16(%edx,%ecx,1),%xmm0
5162.byte	102,15,56,220,209
5163.byte	102,15,56,220,217
5164.byte	102,15,56,220,225
5165.byte	102,15,56,220,233
5166.byte	102,15,56,220,241
5167.byte	102,15,56,220,249
5168	movl	120(%esp),%edi
5169	movl	124(%esp),%eax
5170	call	.L_aesni_encrypt6_enter
5171	movdqa	80(%esp),%xmm0
5172	pxor	(%esp),%xmm2
5173	pxor	16(%esp),%xmm3
5174	pxor	32(%esp),%xmm4
5175	pxor	48(%esp),%xmm5
5176	pxor	64(%esp),%xmm6
5177	pxor	%xmm0,%xmm7
5178	movdqa	96(%esp),%xmm1
5179	movdqu	%xmm2,-96(%edi,%esi,1)
5180	movdqu	%xmm3,-80(%edi,%esi,1)
5181	movdqu	%xmm4,-64(%edi,%esi,1)
5182	movdqu	%xmm5,-48(%edi,%esi,1)
5183	movdqu	%xmm6,-32(%edi,%esi,1)
5184	movdqu	%xmm7,-16(%edi,%esi,1)
5185	cmpl	%eax,%esi
5186	jbe	.L077grandloop
5187.L076short:
5188	addl	$96,%eax
5189	subl	%esi,%eax
5190	jz	.L078done
5191	cmpl	$32,%eax
5192	jb	.L079one
5193	je	.L080two
5194	cmpl	$64,%eax
5195	jb	.L081three
5196	je	.L082four
5197	leal	1(%ebp),%ecx
5198	leal	3(%ebp),%eax
5199	bsfl	%ecx,%ecx
5200	bsfl	%eax,%eax
5201	shll	$4,%ecx
5202	shll	$4,%eax
5203	movdqu	(%ebx),%xmm2
5204	movdqu	(%ebx,%ecx,1),%xmm3
5205	movl	116(%esp),%ecx
5206	movdqa	%xmm2,%xmm4
5207	movdqu	(%ebx,%eax,1),%xmm5
5208	movdqa	%xmm2,%xmm6
5209	pxor	%xmm0,%xmm2
5210	pxor	%xmm2,%xmm3
5211	movdqa	%xmm2,(%esp)
5212	pxor	%xmm3,%xmm4
5213	movdqa	%xmm3,16(%esp)
5214	pxor	%xmm4,%xmm5
5215	movdqa	%xmm4,32(%esp)
5216	pxor	%xmm5,%xmm6
5217	movdqa	%xmm5,48(%esp)
5218	pxor	%xmm6,%xmm7
5219	movdqa	%xmm6,64(%esp)
5220	movups	-48(%edx,%ecx,1),%xmm0
5221	movdqu	(%esi),%xmm2
5222	movdqu	16(%esi),%xmm3
5223	movdqu	32(%esi),%xmm4
5224	movdqu	48(%esi),%xmm5
5225	movdqu	64(%esi),%xmm6
5226	pxor	%xmm7,%xmm7
5227	pxor	%xmm2,%xmm1
5228	pxor	%xmm0,%xmm2
5229	pxor	%xmm3,%xmm1
5230	pxor	%xmm0,%xmm3
5231	pxor	%xmm4,%xmm1
5232	pxor	%xmm0,%xmm4
5233	pxor	%xmm5,%xmm1
5234	pxor	%xmm0,%xmm5
5235	pxor	%xmm6,%xmm1
5236	pxor	%xmm0,%xmm6
5237	movdqa	%xmm1,96(%esp)
5238	movups	-32(%edx,%ecx,1),%xmm1
5239	pxor	(%esp),%xmm2
5240	pxor	16(%esp),%xmm3
5241	pxor	32(%esp),%xmm4
5242	pxor	48(%esp),%xmm5
5243	pxor	64(%esp),%xmm6
5244	movups	-16(%edx,%ecx,1),%xmm0
5245.byte	102,15,56,220,209
5246.byte	102,15,56,220,217
5247.byte	102,15,56,220,225
5248.byte	102,15,56,220,233
5249.byte	102,15,56,220,241
5250.byte	102,15,56,220,249
5251	movl	120(%esp),%edi
5252	call	.L_aesni_encrypt6_enter
5253	movdqa	64(%esp),%xmm0
5254	pxor	(%esp),%xmm2
5255	pxor	16(%esp),%xmm3
5256	pxor	32(%esp),%xmm4
5257	pxor	48(%esp),%xmm5
5258	pxor	%xmm0,%xmm6
5259	movdqa	96(%esp),%xmm1
5260	movdqu	%xmm2,(%edi,%esi,1)
5261	movdqu	%xmm3,16(%edi,%esi,1)
5262	movdqu	%xmm4,32(%edi,%esi,1)
5263	movdqu	%xmm5,48(%edi,%esi,1)
5264	movdqu	%xmm6,64(%edi,%esi,1)
5265	jmp	.L078done
5266.align	16
5267.L079one:
5268	movdqu	(%ebx),%xmm7
5269	movl	112(%esp),%edx
5270	movdqu	(%esi),%xmm2
5271	movl	240(%edx),%ecx
5272	pxor	%xmm0,%xmm7
5273	pxor	%xmm2,%xmm1
5274	pxor	%xmm7,%xmm2
5275	movdqa	%xmm1,%xmm6
5276	movl	120(%esp),%edi
5277	movups	(%edx),%xmm0
5278	movups	16(%edx),%xmm1
5279	leal	32(%edx),%edx
5280	xorps	%xmm0,%xmm2
5281.L083enc1_loop_16:
5282.byte	102,15,56,220,209
5283	decl	%ecx
5284	movups	(%edx),%xmm1
5285	leal	16(%edx),%edx
5286	jnz	.L083enc1_loop_16
5287.byte	102,15,56,221,209
5288	xorps	%xmm7,%xmm2
5289	movdqa	%xmm7,%xmm0
5290	movdqa	%xmm6,%xmm1
5291	movups	%xmm2,(%edi,%esi,1)
5292	jmp	.L078done
5293.align	16
5294.L080two:
5295	leal	1(%ebp),%ecx
5296	movl	112(%esp),%edx
5297	bsfl	%ecx,%ecx
5298	shll	$4,%ecx
5299	movdqu	(%ebx),%xmm6
5300	movdqu	(%ebx,%ecx,1),%xmm7
5301	movdqu	(%esi),%xmm2
5302	movdqu	16(%esi),%xmm3
5303	movl	240(%edx),%ecx
5304	pxor	%xmm0,%xmm6
5305	pxor	%xmm6,%xmm7
5306	pxor	%xmm2,%xmm1
5307	pxor	%xmm6,%xmm2
5308	pxor	%xmm3,%xmm1
5309	pxor	%xmm7,%xmm3
5310	movdqa	%xmm1,%xmm5
5311	movl	120(%esp),%edi
5312	call	_aesni_encrypt2
5313	xorps	%xmm6,%xmm2
5314	xorps	%xmm7,%xmm3
5315	movdqa	%xmm7,%xmm0
5316	movdqa	%xmm5,%xmm1
5317	movups	%xmm2,(%edi,%esi,1)
5318	movups	%xmm3,16(%edi,%esi,1)
5319	jmp	.L078done
5320.align	16
5321.L081three:
5322	leal	1(%ebp),%ecx
5323	movl	112(%esp),%edx
5324	bsfl	%ecx,%ecx
5325	shll	$4,%ecx
5326	movdqu	(%ebx),%xmm5
5327	movdqu	(%ebx,%ecx,1),%xmm6
5328	movdqa	%xmm5,%xmm7
5329	movdqu	(%esi),%xmm2
5330	movdqu	16(%esi),%xmm3
5331	movdqu	32(%esi),%xmm4
5332	movl	240(%edx),%ecx
5333	pxor	%xmm0,%xmm5
5334	pxor	%xmm5,%xmm6
5335	pxor	%xmm6,%xmm7
5336	pxor	%xmm2,%xmm1
5337	pxor	%xmm5,%xmm2
5338	pxor	%xmm3,%xmm1
5339	pxor	%xmm6,%xmm3
5340	pxor	%xmm4,%xmm1
5341	pxor	%xmm7,%xmm4
5342	movdqa	%xmm1,96(%esp)
5343	movl	120(%esp),%edi
5344	call	_aesni_encrypt3
5345	xorps	%xmm5,%xmm2
5346	xorps	%xmm6,%xmm3
5347	xorps	%xmm7,%xmm4
5348	movdqa	%xmm7,%xmm0
5349	movdqa	96(%esp),%xmm1
5350	movups	%xmm2,(%edi,%esi,1)
5351	movups	%xmm3,16(%edi,%esi,1)
5352	movups	%xmm4,32(%edi,%esi,1)
5353	jmp	.L078done
5354.align	16
5355.L082four:
5356	leal	1(%ebp),%ecx
5357	leal	3(%ebp),%eax
5358	bsfl	%ecx,%ecx
5359	bsfl	%eax,%eax
5360	movl	112(%esp),%edx
5361	shll	$4,%ecx
5362	shll	$4,%eax
5363	movdqu	(%ebx),%xmm4
5364	movdqu	(%ebx,%ecx,1),%xmm5
5365	movdqa	%xmm4,%xmm6
5366	movdqu	(%ebx,%eax,1),%xmm7
5367	pxor	%xmm0,%xmm4
5368	movdqu	(%esi),%xmm2
5369	pxor	%xmm4,%xmm5
5370	movdqu	16(%esi),%xmm3
5371	pxor	%xmm5,%xmm6
5372	movdqa	%xmm4,(%esp)
5373	pxor	%xmm6,%xmm7
5374	movdqa	%xmm5,16(%esp)
5375	movdqu	32(%esi),%xmm4
5376	movdqu	48(%esi),%xmm5
5377	movl	240(%edx),%ecx
5378	pxor	%xmm2,%xmm1
5379	pxor	(%esp),%xmm2
5380	pxor	%xmm3,%xmm1
5381	pxor	16(%esp),%xmm3
5382	pxor	%xmm4,%xmm1
5383	pxor	%xmm6,%xmm4
5384	pxor	%xmm5,%xmm1
5385	pxor	%xmm7,%xmm5
5386	movdqa	%xmm1,96(%esp)
5387	movl	120(%esp),%edi
5388	call	_aesni_encrypt4
5389	xorps	(%esp),%xmm2
5390	xorps	16(%esp),%xmm3
5391	xorps	%xmm6,%xmm4
5392	movups	%xmm2,(%edi,%esi,1)
5393	xorps	%xmm7,%xmm5
5394	movups	%xmm3,16(%edi,%esi,1)
5395	movdqa	%xmm7,%xmm0
5396	movups	%xmm4,32(%edi,%esi,1)
5397	movdqa	96(%esp),%xmm1
5398	movups	%xmm5,48(%edi,%esi,1)
5399.L078done:
5400	movl	128(%esp),%edx
5401	pxor	%xmm2,%xmm2
5402	pxor	%xmm3,%xmm3
5403	movdqa	%xmm2,(%esp)
5404	pxor	%xmm4,%xmm4
5405	movdqa	%xmm2,16(%esp)
5406	pxor	%xmm5,%xmm5
5407	movdqa	%xmm2,32(%esp)
5408	pxor	%xmm6,%xmm6
5409	movdqa	%xmm2,48(%esp)
5410	pxor	%xmm7,%xmm7
5411	movdqa	%xmm2,64(%esp)
5412	movdqa	%xmm2,80(%esp)
5413	movdqa	%xmm2,96(%esp)
5414	leal	(%edx),%esp
5415	movl	40(%esp),%ecx
5416	movl	48(%esp),%ebx
5417	movdqu	%xmm0,(%ecx)
5418	pxor	%xmm0,%xmm0
5419	movdqu	%xmm1,(%ebx)
5420	pxor	%xmm1,%xmm1
5421	popl	%edi
5422	popl	%esi
5423	popl	%ebx
5424	popl	%ebp
5425	ret
5426.size	aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin
5427.globl	aesni_ocb_decrypt
5428.type	aesni_ocb_decrypt,@function
5429.align	16
5430aesni_ocb_decrypt:
5431.L_aesni_ocb_decrypt_begin:
5432	pushl	%ebp
5433	pushl	%ebx
5434	pushl	%esi
5435	pushl	%edi
5436	movl	40(%esp),%ecx
5437	movl	48(%esp),%ebx
5438	movl	20(%esp),%esi
5439	movl	24(%esp),%edi
5440	movl	28(%esp),%eax
5441	movl	32(%esp),%edx
5442	movdqu	(%ecx),%xmm0
5443	movl	36(%esp),%ebp
5444	movdqu	(%ebx),%xmm1
5445	movl	44(%esp),%ebx
5446	movl	%esp,%ecx
5447	subl	$132,%esp
5448	andl	$-16,%esp
5449	subl	%esi,%edi
5450	shll	$4,%eax
5451	leal	-96(%esi,%eax,1),%eax
5452	movl	%edi,120(%esp)
5453	movl	%eax,124(%esp)
5454	movl	%ecx,128(%esp)
5455	movl	240(%edx),%ecx
5456	testl	$1,%ebp
5457	jnz	.L084odd
5458	bsfl	%ebp,%eax
5459	addl	$1,%ebp
5460	shll	$4,%eax
5461	movdqu	(%ebx,%eax,1),%xmm7
5462	movl	%edx,%eax
5463	movdqu	(%esi),%xmm2
5464	leal	16(%esi),%esi
5465	pxor	%xmm0,%xmm7
5466	pxor	%xmm7,%xmm2
5467	movdqa	%xmm1,%xmm6
5468	movups	(%edx),%xmm0
5469	movups	16(%edx),%xmm1
5470	leal	32(%edx),%edx
5471	xorps	%xmm0,%xmm2
5472.L085dec1_loop_17:
5473.byte	102,15,56,222,209
5474	decl	%ecx
5475	movups	(%edx),%xmm1
5476	leal	16(%edx),%edx
5477	jnz	.L085dec1_loop_17
5478.byte	102,15,56,223,209
5479	xorps	%xmm7,%xmm2
5480	movaps	%xmm6,%xmm1
5481	movdqa	%xmm7,%xmm0
5482	xorps	%xmm2,%xmm1
5483	movups	%xmm2,-16(%edi,%esi,1)
5484	movl	240(%eax),%ecx
5485	movl	%eax,%edx
5486	movl	124(%esp),%eax
5487.L084odd:
5488	shll	$4,%ecx
5489	movl	$16,%edi
5490	subl	%ecx,%edi
5491	movl	%edx,112(%esp)
5492	leal	32(%edx,%ecx,1),%edx
5493	movl	%edi,116(%esp)
5494	cmpl	%eax,%esi
5495	ja	.L086short
5496	jmp	.L087grandloop
5497.align	32
5498.L087grandloop:
5499	leal	1(%ebp),%ecx
5500	leal	3(%ebp),%eax
5501	leal	5(%ebp),%edi
5502	addl	$6,%ebp
5503	bsfl	%ecx,%ecx
5504	bsfl	%eax,%eax
5505	bsfl	%edi,%edi
5506	shll	$4,%ecx
5507	shll	$4,%eax
5508	shll	$4,%edi
5509	movdqu	(%ebx),%xmm2
5510	movdqu	(%ebx,%ecx,1),%xmm3
5511	movl	116(%esp),%ecx
5512	movdqa	%xmm2,%xmm4
5513	movdqu	(%ebx,%eax,1),%xmm5
5514	movdqa	%xmm2,%xmm6
5515	movdqu	(%ebx,%edi,1),%xmm7
5516	pxor	%xmm0,%xmm2
5517	pxor	%xmm2,%xmm3
5518	movdqa	%xmm2,(%esp)
5519	pxor	%xmm3,%xmm4
5520	movdqa	%xmm3,16(%esp)
5521	pxor	%xmm4,%xmm5
5522	movdqa	%xmm4,32(%esp)
5523	pxor	%xmm5,%xmm6
5524	movdqa	%xmm5,48(%esp)
5525	pxor	%xmm6,%xmm7
5526	movdqa	%xmm6,64(%esp)
5527	movdqa	%xmm7,80(%esp)
5528	movups	-48(%edx,%ecx,1),%xmm0
5529	movdqu	(%esi),%xmm2
5530	movdqu	16(%esi),%xmm3
5531	movdqu	32(%esi),%xmm4
5532	movdqu	48(%esi),%xmm5
5533	movdqu	64(%esi),%xmm6
5534	movdqu	80(%esi),%xmm7
5535	leal	96(%esi),%esi
5536	movdqa	%xmm1,96(%esp)
5537	pxor	%xmm0,%xmm2
5538	pxor	%xmm0,%xmm3
5539	pxor	%xmm0,%xmm4
5540	pxor	%xmm0,%xmm5
5541	pxor	%xmm0,%xmm6
5542	pxor	%xmm0,%xmm7
5543	movups	-32(%edx,%ecx,1),%xmm1
5544	pxor	(%esp),%xmm2
5545	pxor	16(%esp),%xmm3
5546	pxor	32(%esp),%xmm4
5547	pxor	48(%esp),%xmm5
5548	pxor	64(%esp),%xmm6
5549	pxor	80(%esp),%xmm7
5550	movups	-16(%edx,%ecx,1),%xmm0
5551.byte	102,15,56,222,209
5552.byte	102,15,56,222,217
5553.byte	102,15,56,222,225
5554.byte	102,15,56,222,233
5555.byte	102,15,56,222,241
5556.byte	102,15,56,222,249
5557	movl	120(%esp),%edi
5558	movl	124(%esp),%eax
5559	call	.L_aesni_decrypt6_enter
5560	movdqa	80(%esp),%xmm0
5561	pxor	(%esp),%xmm2
5562	movdqa	96(%esp),%xmm1
5563	pxor	16(%esp),%xmm3
5564	pxor	32(%esp),%xmm4
5565	pxor	48(%esp),%xmm5
5566	pxor	64(%esp),%xmm6
5567	pxor	%xmm0,%xmm7
5568	pxor	%xmm2,%xmm1
5569	movdqu	%xmm2,-96(%edi,%esi,1)
5570	pxor	%xmm3,%xmm1
5571	movdqu	%xmm3,-80(%edi,%esi,1)
5572	pxor	%xmm4,%xmm1
5573	movdqu	%xmm4,-64(%edi,%esi,1)
5574	pxor	%xmm5,%xmm1
5575	movdqu	%xmm5,-48(%edi,%esi,1)
5576	pxor	%xmm6,%xmm1
5577	movdqu	%xmm6,-32(%edi,%esi,1)
5578	pxor	%xmm7,%xmm1
5579	movdqu	%xmm7,-16(%edi,%esi,1)
5580	cmpl	%eax,%esi
5581	jbe	.L087grandloop
5582.L086short:
5583	addl	$96,%eax
5584	subl	%esi,%eax
5585	jz	.L088done
5586	cmpl	$32,%eax
5587	jb	.L089one
5588	je	.L090two
5589	cmpl	$64,%eax
5590	jb	.L091three
5591	je	.L092four
5592	leal	1(%ebp),%ecx
5593	leal	3(%ebp),%eax
5594	bsfl	%ecx,%ecx
5595	bsfl	%eax,%eax
5596	shll	$4,%ecx
5597	shll	$4,%eax
5598	movdqu	(%ebx),%xmm2
5599	movdqu	(%ebx,%ecx,1),%xmm3
5600	movl	116(%esp),%ecx
5601	movdqa	%xmm2,%xmm4
5602	movdqu	(%ebx,%eax,1),%xmm5
5603	movdqa	%xmm2,%xmm6
5604	pxor	%xmm0,%xmm2
5605	pxor	%xmm2,%xmm3
5606	movdqa	%xmm2,(%esp)
5607	pxor	%xmm3,%xmm4
5608	movdqa	%xmm3,16(%esp)
5609	pxor	%xmm4,%xmm5
5610	movdqa	%xmm4,32(%esp)
5611	pxor	%xmm5,%xmm6
5612	movdqa	%xmm5,48(%esp)
5613	pxor	%xmm6,%xmm7
5614	movdqa	%xmm6,64(%esp)
5615	movups	-48(%edx,%ecx,1),%xmm0
5616	movdqu	(%esi),%xmm2
5617	movdqu	16(%esi),%xmm3
5618	movdqu	32(%esi),%xmm4
5619	movdqu	48(%esi),%xmm5
5620	movdqu	64(%esi),%xmm6
5621	pxor	%xmm7,%xmm7
5622	movdqa	%xmm1,96(%esp)
5623	pxor	%xmm0,%xmm2
5624	pxor	%xmm0,%xmm3
5625	pxor	%xmm0,%xmm4
5626	pxor	%xmm0,%xmm5
5627	pxor	%xmm0,%xmm6
5628	movups	-32(%edx,%ecx,1),%xmm1
5629	pxor	(%esp),%xmm2
5630	pxor	16(%esp),%xmm3
5631	pxor	32(%esp),%xmm4
5632	pxor	48(%esp),%xmm5
5633	pxor	64(%esp),%xmm6
5634	movups	-16(%edx,%ecx,1),%xmm0
5635.byte	102,15,56,222,209
5636.byte	102,15,56,222,217
5637.byte	102,15,56,222,225
5638.byte	102,15,56,222,233
5639.byte	102,15,56,222,241
5640.byte	102,15,56,222,249
5641	movl	120(%esp),%edi
5642	call	.L_aesni_decrypt6_enter
5643	movdqa	64(%esp),%xmm0
5644	pxor	(%esp),%xmm2
5645	movdqa	96(%esp),%xmm1
5646	pxor	16(%esp),%xmm3
5647	pxor	32(%esp),%xmm4
5648	pxor	48(%esp),%xmm5
5649	pxor	%xmm0,%xmm6
5650	pxor	%xmm2,%xmm1
5651	movdqu	%xmm2,(%edi,%esi,1)
5652	pxor	%xmm3,%xmm1
5653	movdqu	%xmm3,16(%edi,%esi,1)
5654	pxor	%xmm4,%xmm1
5655	movdqu	%xmm4,32(%edi,%esi,1)
5656	pxor	%xmm5,%xmm1
5657	movdqu	%xmm5,48(%edi,%esi,1)
5658	pxor	%xmm6,%xmm1
5659	movdqu	%xmm6,64(%edi,%esi,1)
5660	jmp	.L088done
5661.align	16
5662.L089one:
5663	movdqu	(%ebx),%xmm7
5664	movl	112(%esp),%edx
5665	movdqu	(%esi),%xmm2
5666	movl	240(%edx),%ecx
5667	pxor	%xmm0,%xmm7
5668	pxor	%xmm7,%xmm2
5669	movdqa	%xmm1,%xmm6
5670	movl	120(%esp),%edi
5671	movups	(%edx),%xmm0
5672	movups	16(%edx),%xmm1
5673	leal	32(%edx),%edx
5674	xorps	%xmm0,%xmm2
5675.L093dec1_loop_18:
5676.byte	102,15,56,222,209
5677	decl	%ecx
5678	movups	(%edx),%xmm1
5679	leal	16(%edx),%edx
5680	jnz	.L093dec1_loop_18
5681.byte	102,15,56,223,209
5682	xorps	%xmm7,%xmm2
5683	movaps	%xmm6,%xmm1
5684	movdqa	%xmm7,%xmm0
5685	xorps	%xmm2,%xmm1
5686	movups	%xmm2,(%edi,%esi,1)
5687	jmp	.L088done
5688.align	16
5689.L090two:
5690	leal	1(%ebp),%ecx
5691	movl	112(%esp),%edx
5692	bsfl	%ecx,%ecx
5693	shll	$4,%ecx
5694	movdqu	(%ebx),%xmm6
5695	movdqu	(%ebx,%ecx,1),%xmm7
5696	movdqu	(%esi),%xmm2
5697	movdqu	16(%esi),%xmm3
5698	movl	240(%edx),%ecx
5699	movdqa	%xmm1,%xmm5
5700	pxor	%xmm0,%xmm6
5701	pxor	%xmm6,%xmm7
5702	pxor	%xmm6,%xmm2
5703	pxor	%xmm7,%xmm3
5704	movl	120(%esp),%edi
5705	call	_aesni_decrypt2
5706	xorps	%xmm6,%xmm2
5707	xorps	%xmm7,%xmm3
5708	movdqa	%xmm7,%xmm0
5709	xorps	%xmm2,%xmm5
5710	movups	%xmm2,(%edi,%esi,1)
5711	xorps	%xmm3,%xmm5
5712	movups	%xmm3,16(%edi,%esi,1)
5713	movaps	%xmm5,%xmm1
5714	jmp	.L088done
5715.align	16
5716.L091three:
5717	leal	1(%ebp),%ecx
5718	movl	112(%esp),%edx
5719	bsfl	%ecx,%ecx
5720	shll	$4,%ecx
5721	movdqu	(%ebx),%xmm5
5722	movdqu	(%ebx,%ecx,1),%xmm6
5723	movdqa	%xmm5,%xmm7
5724	movdqu	(%esi),%xmm2
5725	movdqu	16(%esi),%xmm3
5726	movdqu	32(%esi),%xmm4
5727	movl	240(%edx),%ecx
5728	movdqa	%xmm1,96(%esp)
5729	pxor	%xmm0,%xmm5
5730	pxor	%xmm5,%xmm6
5731	pxor	%xmm6,%xmm7
5732	pxor	%xmm5,%xmm2
5733	pxor	%xmm6,%xmm3
5734	pxor	%xmm7,%xmm4
5735	movl	120(%esp),%edi
5736	call	_aesni_decrypt3
5737	movdqa	96(%esp),%xmm1
5738	xorps	%xmm5,%xmm2
5739	xorps	%xmm6,%xmm3
5740	xorps	%xmm7,%xmm4
5741	movups	%xmm2,(%edi,%esi,1)
5742	pxor	%xmm2,%xmm1
5743	movdqa	%xmm7,%xmm0
5744	movups	%xmm3,16(%edi,%esi,1)
5745	pxor	%xmm3,%xmm1
5746	movups	%xmm4,32(%edi,%esi,1)
5747	pxor	%xmm4,%xmm1
5748	jmp	.L088done
5749.align	16
5750.L092four:
5751	leal	1(%ebp),%ecx
5752	leal	3(%ebp),%eax
5753	bsfl	%ecx,%ecx
5754	bsfl	%eax,%eax
5755	movl	112(%esp),%edx
5756	shll	$4,%ecx
5757	shll	$4,%eax
5758	movdqu	(%ebx),%xmm4
5759	movdqu	(%ebx,%ecx,1),%xmm5
5760	movdqa	%xmm4,%xmm6
5761	movdqu	(%ebx,%eax,1),%xmm7
5762	pxor	%xmm0,%xmm4
5763	movdqu	(%esi),%xmm2
5764	pxor	%xmm4,%xmm5
5765	movdqu	16(%esi),%xmm3
5766	pxor	%xmm5,%xmm6
5767	movdqa	%xmm4,(%esp)
5768	pxor	%xmm6,%xmm7
5769	movdqa	%xmm5,16(%esp)
5770	movdqu	32(%esi),%xmm4
5771	movdqu	48(%esi),%xmm5
5772	movl	240(%edx),%ecx
5773	movdqa	%xmm1,96(%esp)
5774	pxor	(%esp),%xmm2
5775	pxor	16(%esp),%xmm3
5776	pxor	%xmm6,%xmm4
5777	pxor	%xmm7,%xmm5
5778	movl	120(%esp),%edi
5779	call	_aesni_decrypt4
5780	movdqa	96(%esp),%xmm1
5781	xorps	(%esp),%xmm2
5782	xorps	16(%esp),%xmm3
5783	xorps	%xmm6,%xmm4
5784	movups	%xmm2,(%edi,%esi,1)
5785	pxor	%xmm2,%xmm1
5786	xorps	%xmm7,%xmm5
5787	movups	%xmm3,16(%edi,%esi,1)
5788	pxor	%xmm3,%xmm1
5789	movdqa	%xmm7,%xmm0
5790	movups	%xmm4,32(%edi,%esi,1)
5791	pxor	%xmm4,%xmm1
5792	movups	%xmm5,48(%edi,%esi,1)
5793	pxor	%xmm5,%xmm1
5794.L088done:
5795	movl	128(%esp),%edx
5796	pxor	%xmm2,%xmm2
5797	pxor	%xmm3,%xmm3
5798	movdqa	%xmm2,(%esp)
5799	pxor	%xmm4,%xmm4
5800	movdqa	%xmm2,16(%esp)
5801	pxor	%xmm5,%xmm5
5802	movdqa	%xmm2,32(%esp)
5803	pxor	%xmm6,%xmm6
5804	movdqa	%xmm2,48(%esp)
5805	pxor	%xmm7,%xmm7
5806	movdqa	%xmm2,64(%esp)
5807	movdqa	%xmm2,80(%esp)
5808	movdqa	%xmm2,96(%esp)
5809	leal	(%edx),%esp
5810	movl	40(%esp),%ecx
5811	movl	48(%esp),%ebx
5812	movdqu	%xmm0,(%ecx)
5813	pxor	%xmm0,%xmm0
5814	movdqu	%xmm1,(%ebx)
5815	pxor	%xmm1,%xmm1
5816	popl	%edi
5817	popl	%esi
5818	popl	%ebx
5819	popl	%ebp
5820	ret
5821.size	aesni_ocb_decrypt,.-.L_aesni_ocb_decrypt_begin
5822.globl	aesni_cbc_encrypt
5823.type	aesni_cbc_encrypt,@function
5824.align	16
5825aesni_cbc_encrypt:
5826.L_aesni_cbc_encrypt_begin:
5827	pushl	%ebp
5828	pushl	%ebx
5829	pushl	%esi
5830	pushl	%edi
5831	movl	20(%esp),%esi
5832	movl	%esp,%ebx
5833	movl	24(%esp),%edi
5834	subl	$24,%ebx
5835	movl	28(%esp),%eax
5836	andl	$-16,%ebx
5837	movl	32(%esp),%edx
5838	movl	36(%esp),%ebp
5839	testl	%eax,%eax
5840	jz	.L094cbc_abort
5841	cmpl	$0,40(%esp)
5842	xchgl	%esp,%ebx
5843	movups	(%ebp),%xmm7
5844	movl	240(%edx),%ecx
5845	movl	%edx,%ebp
5846	movl	%ebx,16(%esp)
5847	movl	%ecx,%ebx
5848	je	.L095cbc_decrypt
5849	movaps	%xmm7,%xmm2
5850	cmpl	$16,%eax
5851	jb	.L096cbc_enc_tail
5852	subl	$16,%eax
5853	jmp	.L097cbc_enc_loop
5854.align	16
5855.L097cbc_enc_loop:
5856	movups	(%esi),%xmm7
5857	leal	16(%esi),%esi
5858	movups	(%edx),%xmm0
5859	movups	16(%edx),%xmm1
5860	xorps	%xmm0,%xmm7
5861	leal	32(%edx),%edx
5862	xorps	%xmm7,%xmm2
5863.L098enc1_loop_19:
5864.byte	102,15,56,220,209
5865	decl	%ecx
5866	movups	(%edx),%xmm1
5867	leal	16(%edx),%edx
5868	jnz	.L098enc1_loop_19
5869.byte	102,15,56,221,209
5870	movl	%ebx,%ecx
5871	movl	%ebp,%edx
5872	movups	%xmm2,(%edi)
5873	leal	16(%edi),%edi
5874	subl	$16,%eax
5875	jnc	.L097cbc_enc_loop
5876	addl	$16,%eax
5877	jnz	.L096cbc_enc_tail
5878	movaps	%xmm2,%xmm7
5879	pxor	%xmm2,%xmm2
5880	jmp	.L099cbc_ret
5881.L096cbc_enc_tail:
5882	movl	%eax,%ecx
5883.long	2767451785
5884	movl	$16,%ecx
5885	subl	%eax,%ecx
5886	xorl	%eax,%eax
5887.long	2868115081
5888	leal	-16(%edi),%edi
5889	movl	%ebx,%ecx
5890	movl	%edi,%esi
5891	movl	%ebp,%edx
5892	jmp	.L097cbc_enc_loop
5893.align	16
5894.L095cbc_decrypt:
5895	cmpl	$80,%eax
5896	jbe	.L100cbc_dec_tail
5897	movaps	%xmm7,(%esp)
5898	subl	$80,%eax
5899	jmp	.L101cbc_dec_loop6_enter
5900.align	16
5901.L102cbc_dec_loop6:
5902	movaps	%xmm0,(%esp)
5903	movups	%xmm7,(%edi)
5904	leal	16(%edi),%edi
5905.L101cbc_dec_loop6_enter:
5906	movdqu	(%esi),%xmm2
5907	movdqu	16(%esi),%xmm3
5908	movdqu	32(%esi),%xmm4
5909	movdqu	48(%esi),%xmm5
5910	movdqu	64(%esi),%xmm6
5911	movdqu	80(%esi),%xmm7
5912	call	_aesni_decrypt6
5913	movups	(%esi),%xmm1
5914	movups	16(%esi),%xmm0
5915	xorps	(%esp),%xmm2
5916	xorps	%xmm1,%xmm3
5917	movups	32(%esi),%xmm1
5918	xorps	%xmm0,%xmm4
5919	movups	48(%esi),%xmm0
5920	xorps	%xmm1,%xmm5
5921	movups	64(%esi),%xmm1
5922	xorps	%xmm0,%xmm6
5923	movups	80(%esi),%xmm0
5924	xorps	%xmm1,%xmm7
5925	movups	%xmm2,(%edi)
5926	movups	%xmm3,16(%edi)
5927	leal	96(%esi),%esi
5928	movups	%xmm4,32(%edi)
5929	movl	%ebx,%ecx
5930	movups	%xmm5,48(%edi)
5931	movl	%ebp,%edx
5932	movups	%xmm6,64(%edi)
5933	leal	80(%edi),%edi
5934	subl	$96,%eax
5935	ja	.L102cbc_dec_loop6
5936	movaps	%xmm7,%xmm2
5937	movaps	%xmm0,%xmm7
5938	addl	$80,%eax
5939	jle	.L103cbc_dec_clear_tail_collected
5940	movups	%xmm2,(%edi)
5941	leal	16(%edi),%edi
5942.L100cbc_dec_tail:
5943	movups	(%esi),%xmm2
5944	movaps	%xmm2,%xmm6
5945	cmpl	$16,%eax
5946	jbe	.L104cbc_dec_one
5947	movups	16(%esi),%xmm3
5948	movaps	%xmm3,%xmm5
5949	cmpl	$32,%eax
5950	jbe	.L105cbc_dec_two
5951	movups	32(%esi),%xmm4
5952	cmpl	$48,%eax
5953	jbe	.L106cbc_dec_three
5954	movups	48(%esi),%xmm5
5955	cmpl	$64,%eax
5956	jbe	.L107cbc_dec_four
5957	movups	64(%esi),%xmm6
5958	movaps	%xmm7,(%esp)
5959	movups	(%esi),%xmm2
5960	xorps	%xmm7,%xmm7
5961	call	_aesni_decrypt6
5962	movups	(%esi),%xmm1
5963	movups	16(%esi),%xmm0
5964	xorps	(%esp),%xmm2
5965	xorps	%xmm1,%xmm3
5966	movups	32(%esi),%xmm1
5967	xorps	%xmm0,%xmm4
5968	movups	48(%esi),%xmm0
5969	xorps	%xmm1,%xmm5
5970	movups	64(%esi),%xmm7
5971	xorps	%xmm0,%xmm6
5972	movups	%xmm2,(%edi)
5973	movups	%xmm3,16(%edi)
5974	pxor	%xmm3,%xmm3
5975	movups	%xmm4,32(%edi)
5976	pxor	%xmm4,%xmm4
5977	movups	%xmm5,48(%edi)
5978	pxor	%xmm5,%xmm5
5979	leal	64(%edi),%edi
5980	movaps	%xmm6,%xmm2
5981	pxor	%xmm6,%xmm6
5982	subl	$80,%eax
5983	jmp	.L108cbc_dec_tail_collected
5984.align	16
5985.L104cbc_dec_one:
5986	movups	(%edx),%xmm0
5987	movups	16(%edx),%xmm1
5988	leal	32(%edx),%edx
5989	xorps	%xmm0,%xmm2
5990.L109dec1_loop_20:
5991.byte	102,15,56,222,209
5992	decl	%ecx
5993	movups	(%edx),%xmm1
5994	leal	16(%edx),%edx
5995	jnz	.L109dec1_loop_20
5996.byte	102,15,56,223,209
5997	xorps	%xmm7,%xmm2
5998	movaps	%xmm6,%xmm7
5999	subl	$16,%eax
6000	jmp	.L108cbc_dec_tail_collected
6001.align	16
6002.L105cbc_dec_two:
6003	call	_aesni_decrypt2
6004	xorps	%xmm7,%xmm2
6005	xorps	%xmm6,%xmm3
6006	movups	%xmm2,(%edi)
6007	movaps	%xmm3,%xmm2
6008	pxor	%xmm3,%xmm3
6009	leal	16(%edi),%edi
6010	movaps	%xmm5,%xmm7
6011	subl	$32,%eax
6012	jmp	.L108cbc_dec_tail_collected
6013.align	16
6014.L106cbc_dec_three:
6015	call	_aesni_decrypt3
6016	xorps	%xmm7,%xmm2
6017	xorps	%xmm6,%xmm3
6018	xorps	%xmm5,%xmm4
6019	movups	%xmm2,(%edi)
6020	movaps	%xmm4,%xmm2
6021	pxor	%xmm4,%xmm4
6022	movups	%xmm3,16(%edi)
6023	pxor	%xmm3,%xmm3
6024	leal	32(%edi),%edi
6025	movups	32(%esi),%xmm7
6026	subl	$48,%eax
6027	jmp	.L108cbc_dec_tail_collected
6028.align	16
6029.L107cbc_dec_four:
6030	call	_aesni_decrypt4
6031	movups	16(%esi),%xmm1
6032	movups	32(%esi),%xmm0
6033	xorps	%xmm7,%xmm2
6034	movups	48(%esi),%xmm7
6035	xorps	%xmm6,%xmm3
6036	movups	%xmm2,(%edi)
6037	xorps	%xmm1,%xmm4
6038	movups	%xmm3,16(%edi)
6039	pxor	%xmm3,%xmm3
6040	xorps	%xmm0,%xmm5
6041	movups	%xmm4,32(%edi)
6042	pxor	%xmm4,%xmm4
6043	leal	48(%edi),%edi
6044	movaps	%xmm5,%xmm2
6045	pxor	%xmm5,%xmm5
6046	subl	$64,%eax
6047	jmp	.L108cbc_dec_tail_collected
6048.align	16
6049.L103cbc_dec_clear_tail_collected:
6050	pxor	%xmm3,%xmm3
6051	pxor	%xmm4,%xmm4
6052	pxor	%xmm5,%xmm5
6053	pxor	%xmm6,%xmm6
6054.L108cbc_dec_tail_collected:
6055	andl	$15,%eax
6056	jnz	.L110cbc_dec_tail_partial
6057	movups	%xmm2,(%edi)
6058	pxor	%xmm0,%xmm0
6059	jmp	.L099cbc_ret
6060.align	16
6061.L110cbc_dec_tail_partial:
6062	movaps	%xmm2,(%esp)
6063	pxor	%xmm0,%xmm0
6064	movl	$16,%ecx
6065	movl	%esp,%esi
6066	subl	%eax,%ecx
6067.long	2767451785
6068	movdqa	%xmm2,(%esp)
6069.L099cbc_ret:
6070	movl	16(%esp),%esp
6071	movl	36(%esp),%ebp
6072	pxor	%xmm2,%xmm2
6073	pxor	%xmm1,%xmm1
6074	movups	%xmm7,(%ebp)
6075	pxor	%xmm7,%xmm7
6076.L094cbc_abort:
6077	popl	%edi
6078	popl	%esi
6079	popl	%ebx
6080	popl	%ebp
6081	ret
6082.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
6083.type	_aesni_set_encrypt_key,@function
6084.align	16
6085_aesni_set_encrypt_key:
6086	pushl	%ebp
6087	pushl	%ebx
6088	testl	%eax,%eax
6089	jz	.L111bad_pointer
6090	testl	%edx,%edx
6091	jz	.L111bad_pointer
6092	call	.L112pic
6093.L112pic:
6094	popl	%ebx
6095	leal	.Lkey_const-.L112pic(%ebx),%ebx
6096	leal	OPENSSL_ia32cap_P,%ebp
6097	movups	(%eax),%xmm0
6098	xorps	%xmm4,%xmm4
6099	movl	4(%ebp),%ebp
6100	leal	16(%edx),%edx
6101	andl	$268437504,%ebp
6102	cmpl	$256,%ecx
6103	je	.L11314rounds
6104	cmpl	$192,%ecx
6105	je	.L11412rounds
6106	cmpl	$128,%ecx
6107	jne	.L115bad_keybits
6108.align	16
6109.L11610rounds:
6110	cmpl	$268435456,%ebp
6111	je	.L11710rounds_alt
6112	movl	$9,%ecx
6113	movups	%xmm0,-16(%edx)
6114.byte	102,15,58,223,200,1
6115	call	.L118key_128_cold
6116.byte	102,15,58,223,200,2
6117	call	.L119key_128
6118.byte	102,15,58,223,200,4
6119	call	.L119key_128
6120.byte	102,15,58,223,200,8
6121	call	.L119key_128
6122.byte	102,15,58,223,200,16
6123	call	.L119key_128
6124.byte	102,15,58,223,200,32
6125	call	.L119key_128
6126.byte	102,15,58,223,200,64
6127	call	.L119key_128
6128.byte	102,15,58,223,200,128
6129	call	.L119key_128
6130.byte	102,15,58,223,200,27
6131	call	.L119key_128
6132.byte	102,15,58,223,200,54
6133	call	.L119key_128
6134	movups	%xmm0,(%edx)
6135	movl	%ecx,80(%edx)
6136	jmp	.L120good_key
6137.align	16
6138.L119key_128:
6139	movups	%xmm0,(%edx)
6140	leal	16(%edx),%edx
6141.L118key_128_cold:
6142	shufps	$16,%xmm0,%xmm4
6143	xorps	%xmm4,%xmm0
6144	shufps	$140,%xmm0,%xmm4
6145	xorps	%xmm4,%xmm0
6146	shufps	$255,%xmm1,%xmm1
6147	xorps	%xmm1,%xmm0
6148	ret
6149.align	16
6150.L11710rounds_alt:
6151	movdqa	(%ebx),%xmm5
6152	movl	$8,%ecx
6153	movdqa	32(%ebx),%xmm4
6154	movdqa	%xmm0,%xmm2
6155	movdqu	%xmm0,-16(%edx)
6156.L121loop_key128:
6157.byte	102,15,56,0,197
6158.byte	102,15,56,221,196
6159	pslld	$1,%xmm4
6160	leal	16(%edx),%edx
6161	movdqa	%xmm2,%xmm3
6162	pslldq	$4,%xmm2
6163	pxor	%xmm2,%xmm3
6164	pslldq	$4,%xmm2
6165	pxor	%xmm2,%xmm3
6166	pslldq	$4,%xmm2
6167	pxor	%xmm3,%xmm2
6168	pxor	%xmm2,%xmm0
6169	movdqu	%xmm0,-16(%edx)
6170	movdqa	%xmm0,%xmm2
6171	decl	%ecx
6172	jnz	.L121loop_key128
6173	movdqa	48(%ebx),%xmm4
6174.byte	102,15,56,0,197
6175.byte	102,15,56,221,196
6176	pslld	$1,%xmm4
6177	movdqa	%xmm2,%xmm3
6178	pslldq	$4,%xmm2
6179	pxor	%xmm2,%xmm3
6180	pslldq	$4,%xmm2
6181	pxor	%xmm2,%xmm3
6182	pslldq	$4,%xmm2
6183	pxor	%xmm3,%xmm2
6184	pxor	%xmm2,%xmm0
6185	movdqu	%xmm0,(%edx)
6186	movdqa	%xmm0,%xmm2
6187.byte	102,15,56,0,197
6188.byte	102,15,56,221,196
6189	movdqa	%xmm2,%xmm3
6190	pslldq	$4,%xmm2
6191	pxor	%xmm2,%xmm3
6192	pslldq	$4,%xmm2
6193	pxor	%xmm2,%xmm3
6194	pslldq	$4,%xmm2
6195	pxor	%xmm3,%xmm2
6196	pxor	%xmm2,%xmm0
6197	movdqu	%xmm0,16(%edx)
6198	movl	$9,%ecx
6199	movl	%ecx,96(%edx)
6200	jmp	.L120good_key
6201.align	16
6202.L11412rounds:
6203	movq	16(%eax),%xmm2
6204	cmpl	$268435456,%ebp
6205	je	.L12212rounds_alt
6206	movl	$11,%ecx
6207	movups	%xmm0,-16(%edx)
6208.byte	102,15,58,223,202,1
6209	call	.L123key_192a_cold
6210.byte	102,15,58,223,202,2
6211	call	.L124key_192b
6212.byte	102,15,58,223,202,4
6213	call	.L125key_192a
6214.byte	102,15,58,223,202,8
6215	call	.L124key_192b
6216.byte	102,15,58,223,202,16
6217	call	.L125key_192a
6218.byte	102,15,58,223,202,32
6219	call	.L124key_192b
6220.byte	102,15,58,223,202,64
6221	call	.L125key_192a
6222.byte	102,15,58,223,202,128
6223	call	.L124key_192b
6224	movups	%xmm0,(%edx)
6225	movl	%ecx,48(%edx)
6226	jmp	.L120good_key
6227.align	16
6228.L125key_192a:
6229	movups	%xmm0,(%edx)
6230	leal	16(%edx),%edx
6231.align	16
6232.L123key_192a_cold:
6233	movaps	%xmm2,%xmm5
6234.L126key_192b_warm:
6235	shufps	$16,%xmm0,%xmm4
6236	movdqa	%xmm2,%xmm3
6237	xorps	%xmm4,%xmm0
6238	shufps	$140,%xmm0,%xmm4
6239	pslldq	$4,%xmm3
6240	xorps	%xmm4,%xmm0
6241	pshufd	$85,%xmm1,%xmm1
6242	pxor	%xmm3,%xmm2
6243	pxor	%xmm1,%xmm0
6244	pshufd	$255,%xmm0,%xmm3
6245	pxor	%xmm3,%xmm2
6246	ret
6247.align	16
6248.L124key_192b:
6249	movaps	%xmm0,%xmm3
6250	shufps	$68,%xmm0,%xmm5
6251	movups	%xmm5,(%edx)
6252	shufps	$78,%xmm2,%xmm3
6253	movups	%xmm3,16(%edx)
6254	leal	32(%edx),%edx
6255	jmp	.L126key_192b_warm
6256.align	16
6257.L12212rounds_alt:
6258	movdqa	16(%ebx),%xmm5
6259	movdqa	32(%ebx),%xmm4
6260	movl	$8,%ecx
6261	movdqu	%xmm0,-16(%edx)
6262.L127loop_key192:
6263	movq	%xmm2,(%edx)
6264	movdqa	%xmm2,%xmm1
6265.byte	102,15,56,0,213
6266.byte	102,15,56,221,212
6267	pslld	$1,%xmm4
6268	leal	24(%edx),%edx
6269	movdqa	%xmm0,%xmm3
6270	pslldq	$4,%xmm0
6271	pxor	%xmm0,%xmm3
6272	pslldq	$4,%xmm0
6273	pxor	%xmm0,%xmm3
6274	pslldq	$4,%xmm0
6275	pxor	%xmm3,%xmm0
6276	pshufd	$255,%xmm0,%xmm3
6277	pxor	%xmm1,%xmm3
6278	pslldq	$4,%xmm1
6279	pxor	%xmm1,%xmm3
6280	pxor	%xmm2,%xmm0
6281	pxor	%xmm3,%xmm2
6282	movdqu	%xmm0,-16(%edx)
6283	decl	%ecx
6284	jnz	.L127loop_key192
6285	movl	$11,%ecx
6286	movl	%ecx,32(%edx)
6287	jmp	.L120good_key
6288.align	16
6289.L11314rounds:
6290	movups	16(%eax),%xmm2
6291	leal	16(%edx),%edx
6292	cmpl	$268435456,%ebp
6293	je	.L12814rounds_alt
6294	movl	$13,%ecx
6295	movups	%xmm0,-32(%edx)
6296	movups	%xmm2,-16(%edx)
6297.byte	102,15,58,223,202,1
6298	call	.L129key_256a_cold
6299.byte	102,15,58,223,200,1
6300	call	.L130key_256b
6301.byte	102,15,58,223,202,2
6302	call	.L131key_256a
6303.byte	102,15,58,223,200,2
6304	call	.L130key_256b
6305.byte	102,15,58,223,202,4
6306	call	.L131key_256a
6307.byte	102,15,58,223,200,4
6308	call	.L130key_256b
6309.byte	102,15,58,223,202,8
6310	call	.L131key_256a
6311.byte	102,15,58,223,200,8
6312	call	.L130key_256b
6313.byte	102,15,58,223,202,16
6314	call	.L131key_256a
6315.byte	102,15,58,223,200,16
6316	call	.L130key_256b
6317.byte	102,15,58,223,202,32
6318	call	.L131key_256a
6319.byte	102,15,58,223,200,32
6320	call	.L130key_256b
6321.byte	102,15,58,223,202,64
6322	call	.L131key_256a
6323	movups	%xmm0,(%edx)
6324	movl	%ecx,16(%edx)
6325	xorl	%eax,%eax
6326	jmp	.L120good_key
6327.align	16
6328.L131key_256a:
6329	movups	%xmm2,(%edx)
6330	leal	16(%edx),%edx
6331.L129key_256a_cold:
6332	shufps	$16,%xmm0,%xmm4
6333	xorps	%xmm4,%xmm0
6334	shufps	$140,%xmm0,%xmm4
6335	xorps	%xmm4,%xmm0
6336	shufps	$255,%xmm1,%xmm1
6337	xorps	%xmm1,%xmm0
6338	ret
6339.align	16
6340.L130key_256b:
6341	movups	%xmm0,(%edx)
6342	leal	16(%edx),%edx
6343	shufps	$16,%xmm2,%xmm4
6344	xorps	%xmm4,%xmm2
6345	shufps	$140,%xmm2,%xmm4
6346	xorps	%xmm4,%xmm2
6347	shufps	$170,%xmm1,%xmm1
6348	xorps	%xmm1,%xmm2
6349	ret
6350.align	16
6351.L12814rounds_alt:
6352	movdqa	(%ebx),%xmm5
6353	movdqa	32(%ebx),%xmm4
6354	movl	$7,%ecx
6355	movdqu	%xmm0,-32(%edx)
6356	movdqa	%xmm2,%xmm1
6357	movdqu	%xmm2,-16(%edx)
6358.L132loop_key256:
6359.byte	102,15,56,0,213
6360.byte	102,15,56,221,212
6361	movdqa	%xmm0,%xmm3
6362	pslldq	$4,%xmm0
6363	pxor	%xmm0,%xmm3
6364	pslldq	$4,%xmm0
6365	pxor	%xmm0,%xmm3
6366	pslldq	$4,%xmm0
6367	pxor	%xmm3,%xmm0
6368	pslld	$1,%xmm4
6369	pxor	%xmm2,%xmm0
6370	movdqu	%xmm0,(%edx)
6371	decl	%ecx
6372	jz	.L133done_key256
6373	pshufd	$255,%xmm0,%xmm2
6374	pxor	%xmm3,%xmm3
6375.byte	102,15,56,221,211
6376	movdqa	%xmm1,%xmm3
6377	pslldq	$4,%xmm1
6378	pxor	%xmm1,%xmm3
6379	pslldq	$4,%xmm1
6380	pxor	%xmm1,%xmm3
6381	pslldq	$4,%xmm1
6382	pxor	%xmm3,%xmm1
6383	pxor	%xmm1,%xmm2
6384	movdqu	%xmm2,16(%edx)
6385	leal	32(%edx),%edx
6386	movdqa	%xmm2,%xmm1
6387	jmp	.L132loop_key256
6388.L133done_key256:
6389	movl	$13,%ecx
6390	movl	%ecx,16(%edx)
6391.L120good_key:
6392	pxor	%xmm0,%xmm0
6393	pxor	%xmm1,%xmm1
6394	pxor	%xmm2,%xmm2
6395	pxor	%xmm3,%xmm3
6396	pxor	%xmm4,%xmm4
6397	pxor	%xmm5,%xmm5
6398	xorl	%eax,%eax
6399	popl	%ebx
6400	popl	%ebp
6401	ret
6402.align	4
6403.L111bad_pointer:
6404	movl	$-1,%eax
6405	popl	%ebx
6406	popl	%ebp
6407	ret
6408.align	4
6409.L115bad_keybits:
6410	pxor	%xmm0,%xmm0
6411	movl	$-2,%eax
6412	popl	%ebx
6413	popl	%ebp
6414	ret
6415.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
6416.globl	aesni_set_encrypt_key
6417.type	aesni_set_encrypt_key,@function
6418.align	16
6419aesni_set_encrypt_key:
6420.L_aesni_set_encrypt_key_begin:
6421	movl	4(%esp),%eax
6422	movl	8(%esp),%ecx
6423	movl	12(%esp),%edx
6424	call	_aesni_set_encrypt_key
6425	ret
6426.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
6427.globl	aesni_set_decrypt_key
6428.type	aesni_set_decrypt_key,@function
6429.align	16
6430aesni_set_decrypt_key:
6431.L_aesni_set_decrypt_key_begin:
6432	movl	4(%esp),%eax
6433	movl	8(%esp),%ecx
6434	movl	12(%esp),%edx
6435	call	_aesni_set_encrypt_key
6436	movl	12(%esp),%edx
6437	shll	$4,%ecx
6438	testl	%eax,%eax
6439	jnz	.L134dec_key_ret
6440	leal	16(%edx,%ecx,1),%eax
6441	movups	(%edx),%xmm0
6442	movups	(%eax),%xmm1
6443	movups	%xmm0,(%eax)
6444	movups	%xmm1,(%edx)
6445	leal	16(%edx),%edx
6446	leal	-16(%eax),%eax
6447.L135dec_key_inverse:
6448	movups	(%edx),%xmm0
6449	movups	(%eax),%xmm1
6450.byte	102,15,56,219,192
6451.byte	102,15,56,219,201
6452	leal	16(%edx),%edx
6453	leal	-16(%eax),%eax
6454	movups	%xmm0,16(%eax)
6455	movups	%xmm1,-16(%edx)
6456	cmpl	%edx,%eax
6457	ja	.L135dec_key_inverse
6458	movups	(%edx),%xmm0
6459.byte	102,15,56,219,192
6460	movups	%xmm0,(%edx)
6461	pxor	%xmm0,%xmm0
6462	pxor	%xmm1,%xmm1
6463	xorl	%eax,%eax
6464.L134dec_key_ret:
6465	ret
6466.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
6467.align	64
6468.Lkey_const:
6469.long	202313229,202313229,202313229,202313229
6470.long	67569157,67569157,67569157,67569157
6471.long	1,1,1,1
6472.long	27,27,27,27
6473.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
6474.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
6475.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
6476.byte	115,108,46,111,114,103,62,0
6477.comm	OPENSSL_ia32cap_P,16,4
6478#endif
6479