1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from e_padlock-x86_64.pl. */
3.text
4.globl	padlock_capability
5.type	padlock_capability,@function
6.align	16
7padlock_capability:
8	movq	%rbx,%r8
9	xorl	%eax,%eax
10	cpuid
11	xorl	%eax,%eax
12	cmpl	$0x746e6543,%ebx
13	jne	.Lzhaoxin
14	cmpl	$0x48727561,%edx
15	jne	.Lnoluck
16	cmpl	$0x736c7561,%ecx
17	jne	.Lnoluck
18	jmp	.LzhaoxinEnd
19.Lzhaoxin:
20	cmpl	$0x68532020,%ebx
21	jne	.Lnoluck
22	cmpl	$0x68676e61,%edx
23	jne	.Lnoluck
24	cmpl	$0x20206961,%ecx
25	jne	.Lnoluck
26.LzhaoxinEnd:
27	movl	$0xC0000000,%eax
28	cpuid
29	movl	%eax,%edx
30	xorl	%eax,%eax
31	cmpl	$0xC0000001,%edx
32	jb	.Lnoluck
33	movl	$0xC0000001,%eax
34	cpuid
35	movl	%edx,%eax
36	andl	$0xffffffef,%eax
37	orl	$0x10,%eax
38.Lnoluck:
39	movq	%r8,%rbx
40	.byte	0xf3,0xc3
41.size	padlock_capability,.-padlock_capability
42
43.globl	padlock_key_bswap
44.type	padlock_key_bswap,@function
45.align	16
46padlock_key_bswap:
47	movl	240(%rdi),%edx
48	incl	%edx
49	shll	$2,%edx
50.Lbswap_loop:
51	movl	(%rdi),%eax
52	bswapl	%eax
53	movl	%eax,(%rdi)
54	leaq	4(%rdi),%rdi
55	subl	$1,%edx
56	jnz	.Lbswap_loop
57	.byte	0xf3,0xc3
58.size	padlock_key_bswap,.-padlock_key_bswap
59
60.globl	padlock_verify_context
61.type	padlock_verify_context,@function
62.align	16
63padlock_verify_context:
64	movq	%rdi,%rdx
65	pushf
66	leaq	.Lpadlock_saved_context(%rip),%rax
67	call	_padlock_verify_ctx
68	leaq	8(%rsp),%rsp
69	.byte	0xf3,0xc3
70.size	padlock_verify_context,.-padlock_verify_context
71
72.type	_padlock_verify_ctx,@function
73.align	16
74_padlock_verify_ctx:
75	movq	8(%rsp),%r8
76	btq	$30,%r8
77	jnc	.Lverified
78	cmpq	(%rax),%rdx
79	je	.Lverified
80	pushf
81	popf
82.Lverified:
83	movq	%rdx,(%rax)
84	.byte	0xf3,0xc3
85.size	_padlock_verify_ctx,.-_padlock_verify_ctx
86
87.globl	padlock_reload_key
88.type	padlock_reload_key,@function
89.align	16
90padlock_reload_key:
91	pushf
92	popf
93	.byte	0xf3,0xc3
94.size	padlock_reload_key,.-padlock_reload_key
95
96.globl	padlock_aes_block
97.type	padlock_aes_block,@function
98.align	16
99padlock_aes_block:
100	movq	%rbx,%r8
101	movq	$1,%rcx
102	leaq	32(%rdx),%rbx
103	leaq	16(%rdx),%rdx
104.byte	0xf3,0x0f,0xa7,0xc8
105	movq	%r8,%rbx
106	.byte	0xf3,0xc3
107.size	padlock_aes_block,.-padlock_aes_block
108
109.globl	padlock_xstore
110.type	padlock_xstore,@function
111.align	16
112padlock_xstore:
113	movl	%esi,%edx
114.byte	0x0f,0xa7,0xc0
115	.byte	0xf3,0xc3
116.size	padlock_xstore,.-padlock_xstore
117
118.globl	padlock_sha1_oneshot
119.type	padlock_sha1_oneshot,@function
120.align	16
121padlock_sha1_oneshot:
122	movq	%rdx,%rcx
123	movq	%rdi,%rdx
124	movups	(%rdi),%xmm0
125	subq	$128+8,%rsp
126	movl	16(%rdi),%eax
127	movaps	%xmm0,(%rsp)
128	movq	%rsp,%rdi
129	movl	%eax,16(%rsp)
130	xorq	%rax,%rax
131.byte	0xf3,0x0f,0xa6,0xc8
132	movaps	(%rsp),%xmm0
133	movl	16(%rsp),%eax
134	addq	$128+8,%rsp
135	movups	%xmm0,(%rdx)
136	movl	%eax,16(%rdx)
137	.byte	0xf3,0xc3
138.size	padlock_sha1_oneshot,.-padlock_sha1_oneshot
139
140.globl	padlock_sha1_blocks
141.type	padlock_sha1_blocks,@function
142.align	16
143padlock_sha1_blocks:
144	movq	%rdx,%rcx
145	movq	%rdi,%rdx
146	movups	(%rdi),%xmm0
147	subq	$128+8,%rsp
148	movl	16(%rdi),%eax
149	movaps	%xmm0,(%rsp)
150	movq	%rsp,%rdi
151	movl	%eax,16(%rsp)
152	movq	$-1,%rax
153.byte	0xf3,0x0f,0xa6,0xc8
154	movaps	(%rsp),%xmm0
155	movl	16(%rsp),%eax
156	addq	$128+8,%rsp
157	movups	%xmm0,(%rdx)
158	movl	%eax,16(%rdx)
159	.byte	0xf3,0xc3
160.size	padlock_sha1_blocks,.-padlock_sha1_blocks
161
162.globl	padlock_sha256_oneshot
163.type	padlock_sha256_oneshot,@function
164.align	16
165padlock_sha256_oneshot:
166	movq	%rdx,%rcx
167	movq	%rdi,%rdx
168	movups	(%rdi),%xmm0
169	subq	$128+8,%rsp
170	movups	16(%rdi),%xmm1
171	movaps	%xmm0,(%rsp)
172	movq	%rsp,%rdi
173	movaps	%xmm1,16(%rsp)
174	xorq	%rax,%rax
175.byte	0xf3,0x0f,0xa6,0xd0
176	movaps	(%rsp),%xmm0
177	movaps	16(%rsp),%xmm1
178	addq	$128+8,%rsp
179	movups	%xmm0,(%rdx)
180	movups	%xmm1,16(%rdx)
181	.byte	0xf3,0xc3
182.size	padlock_sha256_oneshot,.-padlock_sha256_oneshot
183
184.globl	padlock_sha256_blocks
185.type	padlock_sha256_blocks,@function
186.align	16
187padlock_sha256_blocks:
188	movq	%rdx,%rcx
189	movq	%rdi,%rdx
190	movups	(%rdi),%xmm0
191	subq	$128+8,%rsp
192	movups	16(%rdi),%xmm1
193	movaps	%xmm0,(%rsp)
194	movq	%rsp,%rdi
195	movaps	%xmm1,16(%rsp)
196	movq	$-1,%rax
197.byte	0xf3,0x0f,0xa6,0xd0
198	movaps	(%rsp),%xmm0
199	movaps	16(%rsp),%xmm1
200	addq	$128+8,%rsp
201	movups	%xmm0,(%rdx)
202	movups	%xmm1,16(%rdx)
203	.byte	0xf3,0xc3
204.size	padlock_sha256_blocks,.-padlock_sha256_blocks
205
206.globl	padlock_sha512_blocks
207.type	padlock_sha512_blocks,@function
208.align	16
209padlock_sha512_blocks:
210	movq	%rdx,%rcx
211	movq	%rdi,%rdx
212	movups	(%rdi),%xmm0
213	subq	$128+8,%rsp
214	movups	16(%rdi),%xmm1
215	movups	32(%rdi),%xmm2
216	movups	48(%rdi),%xmm3
217	movaps	%xmm0,(%rsp)
218	movq	%rsp,%rdi
219	movaps	%xmm1,16(%rsp)
220	movaps	%xmm2,32(%rsp)
221	movaps	%xmm3,48(%rsp)
222.byte	0xf3,0x0f,0xa6,0xe0
223	movaps	(%rsp),%xmm0
224	movaps	16(%rsp),%xmm1
225	movaps	32(%rsp),%xmm2
226	movaps	48(%rsp),%xmm3
227	addq	$128+8,%rsp
228	movups	%xmm0,(%rdx)
229	movups	%xmm1,16(%rdx)
230	movups	%xmm2,32(%rdx)
231	movups	%xmm3,48(%rdx)
232	.byte	0xf3,0xc3
233.size	padlock_sha512_blocks,.-padlock_sha512_blocks
234.globl	padlock_ecb_encrypt
235.type	padlock_ecb_encrypt,@function
236.align	16
237padlock_ecb_encrypt:
238	pushq	%rbp
239	pushq	%rbx
240
241	xorl	%eax,%eax
242	testq	$15,%rdx
243	jnz	.Lecb_abort
244	testq	$15,%rcx
245	jnz	.Lecb_abort
246	leaq	.Lpadlock_saved_context(%rip),%rax
247	pushf
248	cld
249	call	_padlock_verify_ctx
250	leaq	16(%rdx),%rdx
251	xorl	%eax,%eax
252	xorl	%ebx,%ebx
253	testl	$32,(%rdx)
254	jnz	.Lecb_aligned
255	testq	$0x0f,%rdi
256	setz	%al
257	testq	$0x0f,%rsi
258	setz	%bl
259	testl	%ebx,%eax
260	jnz	.Lecb_aligned
261	negq	%rax
262	movq	$512,%rbx
263	notq	%rax
264	leaq	(%rsp),%rbp
265	cmpq	%rbx,%rcx
266	cmovcq	%rcx,%rbx
267	andq	%rbx,%rax
268	movq	%rcx,%rbx
269	negq	%rax
270	andq	$512-1,%rbx
271	leaq	(%rax,%rbp,1),%rsp
272	movq	$512,%rax
273	cmovzq	%rax,%rbx
274	cmpq	%rbx,%rcx
275	ja	.Lecb_loop
276	movq	%rsi,%rax
277	cmpq	%rsp,%rbp
278	cmoveq	%rdi,%rax
279	addq	%rcx,%rax
280	negq	%rax
281	andq	$0xfff,%rax
282	cmpq	$128,%rax
283	movq	$-128,%rax
284	cmovaeq	%rbx,%rax
285	andq	%rax,%rbx
286	jz	.Lecb_unaligned_tail
287	jmp	.Lecb_loop
288.align	16
289.Lecb_loop:
290	cmpq	%rcx,%rbx
291	cmovaq	%rcx,%rbx
292	movq	%rdi,%r8
293	movq	%rsi,%r9
294	movq	%rcx,%r10
295	movq	%rbx,%rcx
296	movq	%rbx,%r11
297	testq	$0x0f,%rdi
298	cmovnzq	%rsp,%rdi
299	testq	$0x0f,%rsi
300	jz	.Lecb_inp_aligned
301	shrq	$3,%rcx
302.byte	0xf3,0x48,0xa5
303	subq	%rbx,%rdi
304	movq	%rbx,%rcx
305	movq	%rdi,%rsi
306.Lecb_inp_aligned:
307	leaq	-16(%rdx),%rax
308	leaq	16(%rdx),%rbx
309	shrq	$4,%rcx
310.byte	0xf3,0x0f,0xa7,200
311	movq	%r8,%rdi
312	movq	%r11,%rbx
313	testq	$0x0f,%rdi
314	jz	.Lecb_out_aligned
315	movq	%rbx,%rcx
316	leaq	(%rsp),%rsi
317	shrq	$3,%rcx
318.byte	0xf3,0x48,0xa5
319	subq	%rbx,%rdi
320.Lecb_out_aligned:
321	movq	%r9,%rsi
322	movq	%r10,%rcx
323	addq	%rbx,%rdi
324	addq	%rbx,%rsi
325	subq	%rbx,%rcx
326	movq	$512,%rbx
327	jz	.Lecb_break
328	cmpq	%rbx,%rcx
329	jae	.Lecb_loop
330.Lecb_unaligned_tail:
331	xorl	%eax,%eax
332	cmpq	%rsp,%rbp
333	cmoveq	%rcx,%rax
334	movq	%rdi,%r8
335	movq	%rcx,%rbx
336	subq	%rax,%rsp
337	shrq	$3,%rcx
338	leaq	(%rsp),%rdi
339.byte	0xf3,0x48,0xa5
340	movq	%rsp,%rsi
341	movq	%r8,%rdi
342	movq	%rbx,%rcx
343	jmp	.Lecb_loop
344.align	16
345.Lecb_break:
346	cmpq	%rbp,%rsp
347	je	.Lecb_done
348
349	pxor	%xmm0,%xmm0
350	leaq	(%rsp),%rax
351.Lecb_bzero:
352	movaps	%xmm0,(%rax)
353	leaq	16(%rax),%rax
354	cmpq	%rax,%rbp
355	ja	.Lecb_bzero
356
357.Lecb_done:
358	leaq	(%rbp),%rsp
359	jmp	.Lecb_exit
360
361.align	16
362.Lecb_aligned:
363	leaq	(%rsi,%rcx,1),%rbp
364	negq	%rbp
365	andq	$0xfff,%rbp
366	xorl	%eax,%eax
367	cmpq	$128,%rbp
368	movq	$128-1,%rbp
369	cmovaeq	%rax,%rbp
370	andq	%rcx,%rbp
371	subq	%rbp,%rcx
372	jz	.Lecb_aligned_tail
373	leaq	-16(%rdx),%rax
374	leaq	16(%rdx),%rbx
375	shrq	$4,%rcx
376.byte	0xf3,0x0f,0xa7,200
377	testq	%rbp,%rbp
378	jz	.Lecb_exit
379
380.Lecb_aligned_tail:
381	movq	%rdi,%r8
382	movq	%rbp,%rbx
383	movq	%rbp,%rcx
384	leaq	(%rsp),%rbp
385	subq	%rcx,%rsp
386	shrq	$3,%rcx
387	leaq	(%rsp),%rdi
388.byte	0xf3,0x48,0xa5
389	leaq	(%r8),%rdi
390	leaq	(%rsp),%rsi
391	movq	%rbx,%rcx
392	jmp	.Lecb_loop
393.Lecb_exit:
394	movl	$1,%eax
395	leaq	8(%rsp),%rsp
396.Lecb_abort:
397	popq	%rbx
398	popq	%rbp
399	.byte	0xf3,0xc3
400.size	padlock_ecb_encrypt,.-padlock_ecb_encrypt
401.globl	padlock_cbc_encrypt
402.type	padlock_cbc_encrypt,@function
403.align	16
404padlock_cbc_encrypt:
405	pushq	%rbp
406	pushq	%rbx
407
408	xorl	%eax,%eax
409	testq	$15,%rdx
410	jnz	.Lcbc_abort
411	testq	$15,%rcx
412	jnz	.Lcbc_abort
413	leaq	.Lpadlock_saved_context(%rip),%rax
414	pushf
415	cld
416	call	_padlock_verify_ctx
417	leaq	16(%rdx),%rdx
418	xorl	%eax,%eax
419	xorl	%ebx,%ebx
420	testl	$32,(%rdx)
421	jnz	.Lcbc_aligned
422	testq	$0x0f,%rdi
423	setz	%al
424	testq	$0x0f,%rsi
425	setz	%bl
426	testl	%ebx,%eax
427	jnz	.Lcbc_aligned
428	negq	%rax
429	movq	$512,%rbx
430	notq	%rax
431	leaq	(%rsp),%rbp
432	cmpq	%rbx,%rcx
433	cmovcq	%rcx,%rbx
434	andq	%rbx,%rax
435	movq	%rcx,%rbx
436	negq	%rax
437	andq	$512-1,%rbx
438	leaq	(%rax,%rbp,1),%rsp
439	movq	$512,%rax
440	cmovzq	%rax,%rbx
441	cmpq	%rbx,%rcx
442	ja	.Lcbc_loop
443	movq	%rsi,%rax
444	cmpq	%rsp,%rbp
445	cmoveq	%rdi,%rax
446	addq	%rcx,%rax
447	negq	%rax
448	andq	$0xfff,%rax
449	cmpq	$64,%rax
450	movq	$-64,%rax
451	cmovaeq	%rbx,%rax
452	andq	%rax,%rbx
453	jz	.Lcbc_unaligned_tail
454	jmp	.Lcbc_loop
455.align	16
456.Lcbc_loop:
457	cmpq	%rcx,%rbx
458	cmovaq	%rcx,%rbx
459	movq	%rdi,%r8
460	movq	%rsi,%r9
461	movq	%rcx,%r10
462	movq	%rbx,%rcx
463	movq	%rbx,%r11
464	testq	$0x0f,%rdi
465	cmovnzq	%rsp,%rdi
466	testq	$0x0f,%rsi
467	jz	.Lcbc_inp_aligned
468	shrq	$3,%rcx
469.byte	0xf3,0x48,0xa5
470	subq	%rbx,%rdi
471	movq	%rbx,%rcx
472	movq	%rdi,%rsi
473.Lcbc_inp_aligned:
474	leaq	-16(%rdx),%rax
475	leaq	16(%rdx),%rbx
476	shrq	$4,%rcx
477.byte	0xf3,0x0f,0xa7,208
478	movdqa	(%rax),%xmm0
479	movdqa	%xmm0,-16(%rdx)
480	movq	%r8,%rdi
481	movq	%r11,%rbx
482	testq	$0x0f,%rdi
483	jz	.Lcbc_out_aligned
484	movq	%rbx,%rcx
485	leaq	(%rsp),%rsi
486	shrq	$3,%rcx
487.byte	0xf3,0x48,0xa5
488	subq	%rbx,%rdi
489.Lcbc_out_aligned:
490	movq	%r9,%rsi
491	movq	%r10,%rcx
492	addq	%rbx,%rdi
493	addq	%rbx,%rsi
494	subq	%rbx,%rcx
495	movq	$512,%rbx
496	jz	.Lcbc_break
497	cmpq	%rbx,%rcx
498	jae	.Lcbc_loop
499.Lcbc_unaligned_tail:
500	xorl	%eax,%eax
501	cmpq	%rsp,%rbp
502	cmoveq	%rcx,%rax
503	movq	%rdi,%r8
504	movq	%rcx,%rbx
505	subq	%rax,%rsp
506	shrq	$3,%rcx
507	leaq	(%rsp),%rdi
508.byte	0xf3,0x48,0xa5
509	movq	%rsp,%rsi
510	movq	%r8,%rdi
511	movq	%rbx,%rcx
512	jmp	.Lcbc_loop
513.align	16
514.Lcbc_break:
515	cmpq	%rbp,%rsp
516	je	.Lcbc_done
517
518	pxor	%xmm0,%xmm0
519	leaq	(%rsp),%rax
520.Lcbc_bzero:
521	movaps	%xmm0,(%rax)
522	leaq	16(%rax),%rax
523	cmpq	%rax,%rbp
524	ja	.Lcbc_bzero
525
526.Lcbc_done:
527	leaq	(%rbp),%rsp
528	jmp	.Lcbc_exit
529
530.align	16
531.Lcbc_aligned:
532	leaq	(%rsi,%rcx,1),%rbp
533	negq	%rbp
534	andq	$0xfff,%rbp
535	xorl	%eax,%eax
536	cmpq	$64,%rbp
537	movq	$64-1,%rbp
538	cmovaeq	%rax,%rbp
539	andq	%rcx,%rbp
540	subq	%rbp,%rcx
541	jz	.Lcbc_aligned_tail
542	leaq	-16(%rdx),%rax
543	leaq	16(%rdx),%rbx
544	shrq	$4,%rcx
545.byte	0xf3,0x0f,0xa7,208
546	movdqa	(%rax),%xmm0
547	movdqa	%xmm0,-16(%rdx)
548	testq	%rbp,%rbp
549	jz	.Lcbc_exit
550
551.Lcbc_aligned_tail:
552	movq	%rdi,%r8
553	movq	%rbp,%rbx
554	movq	%rbp,%rcx
555	leaq	(%rsp),%rbp
556	subq	%rcx,%rsp
557	shrq	$3,%rcx
558	leaq	(%rsp),%rdi
559.byte	0xf3,0x48,0xa5
560	leaq	(%r8),%rdi
561	leaq	(%rsp),%rsi
562	movq	%rbx,%rcx
563	jmp	.Lcbc_loop
564.Lcbc_exit:
565	movl	$1,%eax
566	leaq	8(%rsp),%rsp
567.Lcbc_abort:
568	popq	%rbx
569	popq	%rbp
570	.byte	0xf3,0xc3
571.size	padlock_cbc_encrypt,.-padlock_cbc_encrypt
572.globl	padlock_cfb_encrypt
573.type	padlock_cfb_encrypt,@function
574.align	16
575padlock_cfb_encrypt:
576	pushq	%rbp
577	pushq	%rbx
578
579	xorl	%eax,%eax
580	testq	$15,%rdx
581	jnz	.Lcfb_abort
582	testq	$15,%rcx
583	jnz	.Lcfb_abort
584	leaq	.Lpadlock_saved_context(%rip),%rax
585	pushf
586	cld
587	call	_padlock_verify_ctx
588	leaq	16(%rdx),%rdx
589	xorl	%eax,%eax
590	xorl	%ebx,%ebx
591	testl	$32,(%rdx)
592	jnz	.Lcfb_aligned
593	testq	$0x0f,%rdi
594	setz	%al
595	testq	$0x0f,%rsi
596	setz	%bl
597	testl	%ebx,%eax
598	jnz	.Lcfb_aligned
599	negq	%rax
600	movq	$512,%rbx
601	notq	%rax
602	leaq	(%rsp),%rbp
603	cmpq	%rbx,%rcx
604	cmovcq	%rcx,%rbx
605	andq	%rbx,%rax
606	movq	%rcx,%rbx
607	negq	%rax
608	andq	$512-1,%rbx
609	leaq	(%rax,%rbp,1),%rsp
610	movq	$512,%rax
611	cmovzq	%rax,%rbx
612	jmp	.Lcfb_loop
613.align	16
614.Lcfb_loop:
615	cmpq	%rcx,%rbx
616	cmovaq	%rcx,%rbx
617	movq	%rdi,%r8
618	movq	%rsi,%r9
619	movq	%rcx,%r10
620	movq	%rbx,%rcx
621	movq	%rbx,%r11
622	testq	$0x0f,%rdi
623	cmovnzq	%rsp,%rdi
624	testq	$0x0f,%rsi
625	jz	.Lcfb_inp_aligned
626	shrq	$3,%rcx
627.byte	0xf3,0x48,0xa5
628	subq	%rbx,%rdi
629	movq	%rbx,%rcx
630	movq	%rdi,%rsi
631.Lcfb_inp_aligned:
632	leaq	-16(%rdx),%rax
633	leaq	16(%rdx),%rbx
634	shrq	$4,%rcx
635.byte	0xf3,0x0f,0xa7,224
636	movdqa	(%rax),%xmm0
637	movdqa	%xmm0,-16(%rdx)
638	movq	%r8,%rdi
639	movq	%r11,%rbx
640	testq	$0x0f,%rdi
641	jz	.Lcfb_out_aligned
642	movq	%rbx,%rcx
643	leaq	(%rsp),%rsi
644	shrq	$3,%rcx
645.byte	0xf3,0x48,0xa5
646	subq	%rbx,%rdi
647.Lcfb_out_aligned:
648	movq	%r9,%rsi
649	movq	%r10,%rcx
650	addq	%rbx,%rdi
651	addq	%rbx,%rsi
652	subq	%rbx,%rcx
653	movq	$512,%rbx
654	jnz	.Lcfb_loop
655	cmpq	%rbp,%rsp
656	je	.Lcfb_done
657
658	pxor	%xmm0,%xmm0
659	leaq	(%rsp),%rax
660.Lcfb_bzero:
661	movaps	%xmm0,(%rax)
662	leaq	16(%rax),%rax
663	cmpq	%rax,%rbp
664	ja	.Lcfb_bzero
665
666.Lcfb_done:
667	leaq	(%rbp),%rsp
668	jmp	.Lcfb_exit
669
670.align	16
671.Lcfb_aligned:
672	leaq	-16(%rdx),%rax
673	leaq	16(%rdx),%rbx
674	shrq	$4,%rcx
675.byte	0xf3,0x0f,0xa7,224
676	movdqa	(%rax),%xmm0
677	movdqa	%xmm0,-16(%rdx)
678.Lcfb_exit:
679	movl	$1,%eax
680	leaq	8(%rsp),%rsp
681.Lcfb_abort:
682	popq	%rbx
683	popq	%rbp
684	.byte	0xf3,0xc3
685.size	padlock_cfb_encrypt,.-padlock_cfb_encrypt
686.globl	padlock_ofb_encrypt
687.type	padlock_ofb_encrypt,@function
688.align	16
689padlock_ofb_encrypt:
690	pushq	%rbp
691	pushq	%rbx
692
693	xorl	%eax,%eax
694	testq	$15,%rdx
695	jnz	.Lofb_abort
696	testq	$15,%rcx
697	jnz	.Lofb_abort
698	leaq	.Lpadlock_saved_context(%rip),%rax
699	pushf
700	cld
701	call	_padlock_verify_ctx
702	leaq	16(%rdx),%rdx
703	xorl	%eax,%eax
704	xorl	%ebx,%ebx
705	testl	$32,(%rdx)
706	jnz	.Lofb_aligned
707	testq	$0x0f,%rdi
708	setz	%al
709	testq	$0x0f,%rsi
710	setz	%bl
711	testl	%ebx,%eax
712	jnz	.Lofb_aligned
713	negq	%rax
714	movq	$512,%rbx
715	notq	%rax
716	leaq	(%rsp),%rbp
717	cmpq	%rbx,%rcx
718	cmovcq	%rcx,%rbx
719	andq	%rbx,%rax
720	movq	%rcx,%rbx
721	negq	%rax
722	andq	$512-1,%rbx
723	leaq	(%rax,%rbp,1),%rsp
724	movq	$512,%rax
725	cmovzq	%rax,%rbx
726	jmp	.Lofb_loop
727.align	16
728.Lofb_loop:
729	cmpq	%rcx,%rbx
730	cmovaq	%rcx,%rbx
731	movq	%rdi,%r8
732	movq	%rsi,%r9
733	movq	%rcx,%r10
734	movq	%rbx,%rcx
735	movq	%rbx,%r11
736	testq	$0x0f,%rdi
737	cmovnzq	%rsp,%rdi
738	testq	$0x0f,%rsi
739	jz	.Lofb_inp_aligned
740	shrq	$3,%rcx
741.byte	0xf3,0x48,0xa5
742	subq	%rbx,%rdi
743	movq	%rbx,%rcx
744	movq	%rdi,%rsi
745.Lofb_inp_aligned:
746	leaq	-16(%rdx),%rax
747	leaq	16(%rdx),%rbx
748	shrq	$4,%rcx
749.byte	0xf3,0x0f,0xa7,232
750	movdqa	(%rax),%xmm0
751	movdqa	%xmm0,-16(%rdx)
752	movq	%r8,%rdi
753	movq	%r11,%rbx
754	testq	$0x0f,%rdi
755	jz	.Lofb_out_aligned
756	movq	%rbx,%rcx
757	leaq	(%rsp),%rsi
758	shrq	$3,%rcx
759.byte	0xf3,0x48,0xa5
760	subq	%rbx,%rdi
761.Lofb_out_aligned:
762	movq	%r9,%rsi
763	movq	%r10,%rcx
764	addq	%rbx,%rdi
765	addq	%rbx,%rsi
766	subq	%rbx,%rcx
767	movq	$512,%rbx
768	jnz	.Lofb_loop
769	cmpq	%rbp,%rsp
770	je	.Lofb_done
771
772	pxor	%xmm0,%xmm0
773	leaq	(%rsp),%rax
774.Lofb_bzero:
775	movaps	%xmm0,(%rax)
776	leaq	16(%rax),%rax
777	cmpq	%rax,%rbp
778	ja	.Lofb_bzero
779
780.Lofb_done:
781	leaq	(%rbp),%rsp
782	jmp	.Lofb_exit
783
784.align	16
785.Lofb_aligned:
786	leaq	-16(%rdx),%rax
787	leaq	16(%rdx),%rbx
788	shrq	$4,%rcx
789.byte	0xf3,0x0f,0xa7,232
790	movdqa	(%rax),%xmm0
791	movdqa	%xmm0,-16(%rdx)
792.Lofb_exit:
793	movl	$1,%eax
794	leaq	8(%rsp),%rsp
795.Lofb_abort:
796	popq	%rbx
797	popq	%rbp
798	.byte	0xf3,0xc3
799.size	padlock_ofb_encrypt,.-padlock_ofb_encrypt
800.globl	padlock_ctr32_encrypt
801.type	padlock_ctr32_encrypt,@function
802.align	16
803padlock_ctr32_encrypt:
804	pushq	%rbp
805	pushq	%rbx
806
807	xorl	%eax,%eax
808	testq	$15,%rdx
809	jnz	.Lctr32_abort
810	testq	$15,%rcx
811	jnz	.Lctr32_abort
812	leaq	.Lpadlock_saved_context(%rip),%rax
813	pushf
814	cld
815	call	_padlock_verify_ctx
816	leaq	16(%rdx),%rdx
817	xorl	%eax,%eax
818	xorl	%ebx,%ebx
819	testl	$32,(%rdx)
820	jnz	.Lctr32_aligned
821	testq	$0x0f,%rdi
822	setz	%al
823	testq	$0x0f,%rsi
824	setz	%bl
825	testl	%ebx,%eax
826	jnz	.Lctr32_aligned
827	negq	%rax
828	movq	$512,%rbx
829	notq	%rax
830	leaq	(%rsp),%rbp
831	cmpq	%rbx,%rcx
832	cmovcq	%rcx,%rbx
833	andq	%rbx,%rax
834	movq	%rcx,%rbx
835	negq	%rax
836	andq	$512-1,%rbx
837	leaq	(%rax,%rbp,1),%rsp
838	movq	$512,%rax
839	cmovzq	%rax,%rbx
840.Lctr32_reenter:
841	movl	-4(%rdx),%eax
842	bswapl	%eax
843	negl	%eax
844	andl	$31,%eax
845	movq	$512,%rbx
846	shll	$4,%eax
847	cmovzq	%rbx,%rax
848	cmpq	%rax,%rcx
849	cmovaq	%rax,%rbx
850	cmovbeq	%rcx,%rbx
851	cmpq	%rbx,%rcx
852	ja	.Lctr32_loop
853	movq	%rsi,%rax
854	cmpq	%rsp,%rbp
855	cmoveq	%rdi,%rax
856	addq	%rcx,%rax
857	negq	%rax
858	andq	$0xfff,%rax
859	cmpq	$32,%rax
860	movq	$-32,%rax
861	cmovaeq	%rbx,%rax
862	andq	%rax,%rbx
863	jz	.Lctr32_unaligned_tail
864	jmp	.Lctr32_loop
865.align	16
866.Lctr32_loop:
867	cmpq	%rcx,%rbx
868	cmovaq	%rcx,%rbx
869	movq	%rdi,%r8
870	movq	%rsi,%r9
871	movq	%rcx,%r10
872	movq	%rbx,%rcx
873	movq	%rbx,%r11
874	testq	$0x0f,%rdi
875	cmovnzq	%rsp,%rdi
876	testq	$0x0f,%rsi
877	jz	.Lctr32_inp_aligned
878	shrq	$3,%rcx
879.byte	0xf3,0x48,0xa5
880	subq	%rbx,%rdi
881	movq	%rbx,%rcx
882	movq	%rdi,%rsi
883.Lctr32_inp_aligned:
884	leaq	-16(%rdx),%rax
885	leaq	16(%rdx),%rbx
886	shrq	$4,%rcx
887.byte	0xf3,0x0f,0xa7,216
888	movl	-4(%rdx),%eax
889	testl	$0xffff0000,%eax
890	jnz	.Lctr32_no_carry
891	bswapl	%eax
892	addl	$0x10000,%eax
893	bswapl	%eax
894	movl	%eax,-4(%rdx)
895.Lctr32_no_carry:
896	movq	%r8,%rdi
897	movq	%r11,%rbx
898	testq	$0x0f,%rdi
899	jz	.Lctr32_out_aligned
900	movq	%rbx,%rcx
901	leaq	(%rsp),%rsi
902	shrq	$3,%rcx
903.byte	0xf3,0x48,0xa5
904	subq	%rbx,%rdi
905.Lctr32_out_aligned:
906	movq	%r9,%rsi
907	movq	%r10,%rcx
908	addq	%rbx,%rdi
909	addq	%rbx,%rsi
910	subq	%rbx,%rcx
911	movq	$512,%rbx
912	jz	.Lctr32_break
913	cmpq	%rbx,%rcx
914	jae	.Lctr32_loop
915	movq	%rcx,%rbx
916	movq	%rsi,%rax
917	cmpq	%rsp,%rbp
918	cmoveq	%rdi,%rax
919	addq	%rcx,%rax
920	negq	%rax
921	andq	$0xfff,%rax
922	cmpq	$32,%rax
923	movq	$-32,%rax
924	cmovaeq	%rbx,%rax
925	andq	%rax,%rbx
926	jnz	.Lctr32_loop
927.Lctr32_unaligned_tail:
928	xorl	%eax,%eax
929	cmpq	%rsp,%rbp
930	cmoveq	%rcx,%rax
931	movq	%rdi,%r8
932	movq	%rcx,%rbx
933	subq	%rax,%rsp
934	shrq	$3,%rcx
935	leaq	(%rsp),%rdi
936.byte	0xf3,0x48,0xa5
937	movq	%rsp,%rsi
938	movq	%r8,%rdi
939	movq	%rbx,%rcx
940	jmp	.Lctr32_loop
941.align	16
942.Lctr32_break:
943	cmpq	%rbp,%rsp
944	je	.Lctr32_done
945
946	pxor	%xmm0,%xmm0
947	leaq	(%rsp),%rax
948.Lctr32_bzero:
949	movaps	%xmm0,(%rax)
950	leaq	16(%rax),%rax
951	cmpq	%rax,%rbp
952	ja	.Lctr32_bzero
953
954.Lctr32_done:
955	leaq	(%rbp),%rsp
956	jmp	.Lctr32_exit
957
958.align	16
959.Lctr32_aligned:
960	movl	-4(%rdx),%eax
961	bswapl	%eax
962	negl	%eax
963	andl	$0xffff,%eax
964	movq	$1048576,%rbx
965	shll	$4,%eax
966	cmovzq	%rbx,%rax
967	cmpq	%rax,%rcx
968	cmovaq	%rax,%rbx
969	cmovbeq	%rcx,%rbx
970	jbe	.Lctr32_aligned_skip
971
972.Lctr32_aligned_loop:
973	movq	%rcx,%r10
974	movq	%rbx,%rcx
975	movq	%rbx,%r11
976
977	leaq	-16(%rdx),%rax
978	leaq	16(%rdx),%rbx
979	shrq	$4,%rcx
980.byte	0xf3,0x0f,0xa7,216
981
982	movl	-4(%rdx),%eax
983	bswapl	%eax
984	addl	$0x10000,%eax
985	bswapl	%eax
986	movl	%eax,-4(%rdx)
987
988	movq	%r10,%rcx
989	subq	%r11,%rcx
990	movq	$1048576,%rbx
991	jz	.Lctr32_exit
992	cmpq	%rbx,%rcx
993	jae	.Lctr32_aligned_loop
994
995.Lctr32_aligned_skip:
996	leaq	(%rsi,%rcx,1),%rbp
997	negq	%rbp
998	andq	$0xfff,%rbp
999	xorl	%eax,%eax
1000	cmpq	$32,%rbp
1001	movq	$32-1,%rbp
1002	cmovaeq	%rax,%rbp
1003	andq	%rcx,%rbp
1004	subq	%rbp,%rcx
1005	jz	.Lctr32_aligned_tail
1006	leaq	-16(%rdx),%rax
1007	leaq	16(%rdx),%rbx
1008	shrq	$4,%rcx
1009.byte	0xf3,0x0f,0xa7,216
1010	testq	%rbp,%rbp
1011	jz	.Lctr32_exit
1012
1013.Lctr32_aligned_tail:
1014	movq	%rdi,%r8
1015	movq	%rbp,%rbx
1016	movq	%rbp,%rcx
1017	leaq	(%rsp),%rbp
1018	subq	%rcx,%rsp
1019	shrq	$3,%rcx
1020	leaq	(%rsp),%rdi
1021.byte	0xf3,0x48,0xa5
1022	leaq	(%r8),%rdi
1023	leaq	(%rsp),%rsi
1024	movq	%rbx,%rcx
1025	jmp	.Lctr32_loop
1026.Lctr32_exit:
1027	movl	$1,%eax
1028	leaq	8(%rsp),%rsp
1029.Lctr32_abort:
1030	popq	%rbx
1031	popq	%rbp
1032	.byte	0xf3,0xc3
1033.size	padlock_ctr32_encrypt,.-padlock_ctr32_encrypt
1034.byte	86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1035.align	16
1036.data
1037.align	8
1038.Lpadlock_saved_context:
1039.quad	0
1040