1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11.text
12.extern	GFp_ia32cap_P
13.hidden GFp_ia32cap_P
14.globl	GFp_gcm_init_clmul
15.hidden GFp_gcm_init_clmul
16.type	GFp_gcm_init_clmul,@function
17.align	16
18GFp_gcm_init_clmul:
19.cfi_startproc
20.L_init_clmul:
21	movdqu	(%rsi),%xmm2
22	pshufd	$78,%xmm2,%xmm2
23
24
25	pshufd	$255,%xmm2,%xmm4
26	movdqa	%xmm2,%xmm3
27	psllq	$1,%xmm2
28	pxor	%xmm5,%xmm5
29	psrlq	$63,%xmm3
30	pcmpgtd	%xmm4,%xmm5
31	pslldq	$8,%xmm3
32	por	%xmm3,%xmm2
33
34
35	pand	.L0x1c2_polynomial(%rip),%xmm5
36	pxor	%xmm5,%xmm2
37
38
39	pshufd	$78,%xmm2,%xmm6
40	movdqa	%xmm2,%xmm0
41	pxor	%xmm2,%xmm6
42	movdqa	%xmm0,%xmm1
43	pshufd	$78,%xmm0,%xmm3
44	pxor	%xmm0,%xmm3
45.byte	102,15,58,68,194,0
46.byte	102,15,58,68,202,17
47.byte	102,15,58,68,222,0
48	pxor	%xmm0,%xmm3
49	pxor	%xmm1,%xmm3
50
51	movdqa	%xmm3,%xmm4
52	psrldq	$8,%xmm3
53	pslldq	$8,%xmm4
54	pxor	%xmm3,%xmm1
55	pxor	%xmm4,%xmm0
56
57	movdqa	%xmm0,%xmm4
58	movdqa	%xmm0,%xmm3
59	psllq	$5,%xmm0
60	pxor	%xmm0,%xmm3
61	psllq	$1,%xmm0
62	pxor	%xmm3,%xmm0
63	psllq	$57,%xmm0
64	movdqa	%xmm0,%xmm3
65	pslldq	$8,%xmm0
66	psrldq	$8,%xmm3
67	pxor	%xmm4,%xmm0
68	pxor	%xmm3,%xmm1
69
70
71	movdqa	%xmm0,%xmm4
72	psrlq	$1,%xmm0
73	pxor	%xmm4,%xmm1
74	pxor	%xmm0,%xmm4
75	psrlq	$5,%xmm0
76	pxor	%xmm4,%xmm0
77	psrlq	$1,%xmm0
78	pxor	%xmm1,%xmm0
79	pshufd	$78,%xmm2,%xmm3
80	pshufd	$78,%xmm0,%xmm4
81	pxor	%xmm2,%xmm3
82	movdqu	%xmm2,0(%rdi)
83	pxor	%xmm0,%xmm4
84	movdqu	%xmm0,16(%rdi)
85.byte	102,15,58,15,227,8
86	movdqu	%xmm4,32(%rdi)
87	movdqa	%xmm0,%xmm1
88	pshufd	$78,%xmm0,%xmm3
89	pxor	%xmm0,%xmm3
90.byte	102,15,58,68,194,0
91.byte	102,15,58,68,202,17
92.byte	102,15,58,68,222,0
93	pxor	%xmm0,%xmm3
94	pxor	%xmm1,%xmm3
95
96	movdqa	%xmm3,%xmm4
97	psrldq	$8,%xmm3
98	pslldq	$8,%xmm4
99	pxor	%xmm3,%xmm1
100	pxor	%xmm4,%xmm0
101
102	movdqa	%xmm0,%xmm4
103	movdqa	%xmm0,%xmm3
104	psllq	$5,%xmm0
105	pxor	%xmm0,%xmm3
106	psllq	$1,%xmm0
107	pxor	%xmm3,%xmm0
108	psllq	$57,%xmm0
109	movdqa	%xmm0,%xmm3
110	pslldq	$8,%xmm0
111	psrldq	$8,%xmm3
112	pxor	%xmm4,%xmm0
113	pxor	%xmm3,%xmm1
114
115
116	movdqa	%xmm0,%xmm4
117	psrlq	$1,%xmm0
118	pxor	%xmm4,%xmm1
119	pxor	%xmm0,%xmm4
120	psrlq	$5,%xmm0
121	pxor	%xmm4,%xmm0
122	psrlq	$1,%xmm0
123	pxor	%xmm1,%xmm0
124	movdqa	%xmm0,%xmm5
125	movdqa	%xmm0,%xmm1
126	pshufd	$78,%xmm0,%xmm3
127	pxor	%xmm0,%xmm3
128.byte	102,15,58,68,194,0
129.byte	102,15,58,68,202,17
130.byte	102,15,58,68,222,0
131	pxor	%xmm0,%xmm3
132	pxor	%xmm1,%xmm3
133
134	movdqa	%xmm3,%xmm4
135	psrldq	$8,%xmm3
136	pslldq	$8,%xmm4
137	pxor	%xmm3,%xmm1
138	pxor	%xmm4,%xmm0
139
140	movdqa	%xmm0,%xmm4
141	movdqa	%xmm0,%xmm3
142	psllq	$5,%xmm0
143	pxor	%xmm0,%xmm3
144	psllq	$1,%xmm0
145	pxor	%xmm3,%xmm0
146	psllq	$57,%xmm0
147	movdqa	%xmm0,%xmm3
148	pslldq	$8,%xmm0
149	psrldq	$8,%xmm3
150	pxor	%xmm4,%xmm0
151	pxor	%xmm3,%xmm1
152
153
154	movdqa	%xmm0,%xmm4
155	psrlq	$1,%xmm0
156	pxor	%xmm4,%xmm1
157	pxor	%xmm0,%xmm4
158	psrlq	$5,%xmm0
159	pxor	%xmm4,%xmm0
160	psrlq	$1,%xmm0
161	pxor	%xmm1,%xmm0
162	pshufd	$78,%xmm5,%xmm3
163	pshufd	$78,%xmm0,%xmm4
164	pxor	%xmm5,%xmm3
165	movdqu	%xmm5,48(%rdi)
166	pxor	%xmm0,%xmm4
167	movdqu	%xmm0,64(%rdi)
168.byte	102,15,58,15,227,8
169	movdqu	%xmm4,80(%rdi)
170	.byte	0xf3,0xc3
171.cfi_endproc
172.size	GFp_gcm_init_clmul,.-GFp_gcm_init_clmul
173.globl	GFp_gcm_gmult_clmul
174.hidden GFp_gcm_gmult_clmul
175.type	GFp_gcm_gmult_clmul,@function
176.align	16
177.cfi_startproc
178GFp_gcm_gmult_clmul:
179	movdqu	(%rdi),%xmm0
180	movdqa	.Lbswap_mask(%rip),%xmm5
181	movdqu	(%rsi),%xmm2
182	movdqu	32(%rsi),%xmm4
183.byte	102,15,56,0,197
184	movdqa	%xmm0,%xmm1
185	pshufd	$78,%xmm0,%xmm3
186	pxor	%xmm0,%xmm3
187.byte	102,15,58,68,194,0
188.byte	102,15,58,68,202,17
189.byte	102,15,58,68,220,0
190	pxor	%xmm0,%xmm3
191	pxor	%xmm1,%xmm3
192
193	movdqa	%xmm3,%xmm4
194	psrldq	$8,%xmm3
195	pslldq	$8,%xmm4
196	pxor	%xmm3,%xmm1
197	pxor	%xmm4,%xmm0
198
199	movdqa	%xmm0,%xmm4
200	movdqa	%xmm0,%xmm3
201	psllq	$5,%xmm0
202	pxor	%xmm0,%xmm3
203	psllq	$1,%xmm0
204	pxor	%xmm3,%xmm0
205	psllq	$57,%xmm0
206	movdqa	%xmm0,%xmm3
207	pslldq	$8,%xmm0
208	psrldq	$8,%xmm3
209	pxor	%xmm4,%xmm0
210	pxor	%xmm3,%xmm1
211
212
213	movdqa	%xmm0,%xmm4
214	psrlq	$1,%xmm0
215	pxor	%xmm4,%xmm1
216	pxor	%xmm0,%xmm4
217	psrlq	$5,%xmm0
218	pxor	%xmm4,%xmm0
219	psrlq	$1,%xmm0
220	pxor	%xmm1,%xmm0
221.byte	102,15,56,0,197
222	movdqu	%xmm0,(%rdi)
223	.byte	0xf3,0xc3
224.cfi_endproc
225.size	GFp_gcm_gmult_clmul,.-GFp_gcm_gmult_clmul
226.globl	GFp_gcm_ghash_clmul
227.hidden GFp_gcm_ghash_clmul
228.type	GFp_gcm_ghash_clmul,@function
229.align	32
230GFp_gcm_ghash_clmul:
231.cfi_startproc
232.L_ghash_clmul:
233	movdqa	.Lbswap_mask(%rip),%xmm10
234
235	movdqu	(%rdi),%xmm0
236	movdqu	(%rsi),%xmm2
237	movdqu	32(%rsi),%xmm7
238.byte	102,65,15,56,0,194
239
240	subq	$0x10,%rcx
241	jz	.Lodd_tail
242
243	movdqu	16(%rsi),%xmm6
244	leaq	GFp_ia32cap_P(%rip),%rax
245	movl	4(%rax),%eax
246	cmpq	$0x30,%rcx
247	jb	.Lskip4x
248
249	andl	$71303168,%eax
250	cmpl	$4194304,%eax
251	je	.Lskip4x
252
253	subq	$0x30,%rcx
254	movq	$0xA040608020C0E000,%rax
255	movdqu	48(%rsi),%xmm14
256	movdqu	64(%rsi),%xmm15
257
258
259
260
261	movdqu	48(%rdx),%xmm3
262	movdqu	32(%rdx),%xmm11
263.byte	102,65,15,56,0,218
264.byte	102,69,15,56,0,218
265	movdqa	%xmm3,%xmm5
266	pshufd	$78,%xmm3,%xmm4
267	pxor	%xmm3,%xmm4
268.byte	102,15,58,68,218,0
269.byte	102,15,58,68,234,17
270.byte	102,15,58,68,231,0
271
272	movdqa	%xmm11,%xmm13
273	pshufd	$78,%xmm11,%xmm12
274	pxor	%xmm11,%xmm12
275.byte	102,68,15,58,68,222,0
276.byte	102,68,15,58,68,238,17
277.byte	102,68,15,58,68,231,16
278	xorps	%xmm11,%xmm3
279	xorps	%xmm13,%xmm5
280	movups	80(%rsi),%xmm7
281	xorps	%xmm12,%xmm4
282
283	movdqu	16(%rdx),%xmm11
284	movdqu	0(%rdx),%xmm8
285.byte	102,69,15,56,0,218
286.byte	102,69,15,56,0,194
287	movdqa	%xmm11,%xmm13
288	pshufd	$78,%xmm11,%xmm12
289	pxor	%xmm8,%xmm0
290	pxor	%xmm11,%xmm12
291.byte	102,69,15,58,68,222,0
292	movdqa	%xmm0,%xmm1
293	pshufd	$78,%xmm0,%xmm8
294	pxor	%xmm0,%xmm8
295.byte	102,69,15,58,68,238,17
296.byte	102,68,15,58,68,231,0
297	xorps	%xmm11,%xmm3
298	xorps	%xmm13,%xmm5
299
300	leaq	64(%rdx),%rdx
301	subq	$0x40,%rcx
302	jc	.Ltail4x
303
304	jmp	.Lmod4_loop
305.align	32
306.Lmod4_loop:
307.byte	102,65,15,58,68,199,0
308	xorps	%xmm12,%xmm4
309	movdqu	48(%rdx),%xmm11
310.byte	102,69,15,56,0,218
311.byte	102,65,15,58,68,207,17
312	xorps	%xmm3,%xmm0
313	movdqu	32(%rdx),%xmm3
314	movdqa	%xmm11,%xmm13
315.byte	102,68,15,58,68,199,16
316	pshufd	$78,%xmm11,%xmm12
317	xorps	%xmm5,%xmm1
318	pxor	%xmm11,%xmm12
319.byte	102,65,15,56,0,218
320	movups	32(%rsi),%xmm7
321	xorps	%xmm4,%xmm8
322.byte	102,68,15,58,68,218,0
323	pshufd	$78,%xmm3,%xmm4
324
325	pxor	%xmm0,%xmm8
326	movdqa	%xmm3,%xmm5
327	pxor	%xmm1,%xmm8
328	pxor	%xmm3,%xmm4
329	movdqa	%xmm8,%xmm9
330.byte	102,68,15,58,68,234,17
331	pslldq	$8,%xmm8
332	psrldq	$8,%xmm9
333	pxor	%xmm8,%xmm0
334	movdqa	.L7_mask(%rip),%xmm8
335	pxor	%xmm9,%xmm1
336.byte	102,76,15,110,200
337
338	pand	%xmm0,%xmm8
339.byte	102,69,15,56,0,200
340	pxor	%xmm0,%xmm9
341.byte	102,68,15,58,68,231,0
342	psllq	$57,%xmm9
343	movdqa	%xmm9,%xmm8
344	pslldq	$8,%xmm9
345.byte	102,15,58,68,222,0
346	psrldq	$8,%xmm8
347	pxor	%xmm9,%xmm0
348	pxor	%xmm8,%xmm1
349	movdqu	0(%rdx),%xmm8
350
351	movdqa	%xmm0,%xmm9
352	psrlq	$1,%xmm0
353.byte	102,15,58,68,238,17
354	xorps	%xmm11,%xmm3
355	movdqu	16(%rdx),%xmm11
356.byte	102,69,15,56,0,218
357.byte	102,15,58,68,231,16
358	xorps	%xmm13,%xmm5
359	movups	80(%rsi),%xmm7
360.byte	102,69,15,56,0,194
361	pxor	%xmm9,%xmm1
362	pxor	%xmm0,%xmm9
363	psrlq	$5,%xmm0
364
365	movdqa	%xmm11,%xmm13
366	pxor	%xmm12,%xmm4
367	pshufd	$78,%xmm11,%xmm12
368	pxor	%xmm9,%xmm0
369	pxor	%xmm8,%xmm1
370	pxor	%xmm11,%xmm12
371.byte	102,69,15,58,68,222,0
372	psrlq	$1,%xmm0
373	pxor	%xmm1,%xmm0
374	movdqa	%xmm0,%xmm1
375.byte	102,69,15,58,68,238,17
376	xorps	%xmm11,%xmm3
377	pshufd	$78,%xmm0,%xmm8
378	pxor	%xmm0,%xmm8
379
380.byte	102,68,15,58,68,231,0
381	xorps	%xmm13,%xmm5
382
383	leaq	64(%rdx),%rdx
384	subq	$0x40,%rcx
385	jnc	.Lmod4_loop
386
387.Ltail4x:
388.byte	102,65,15,58,68,199,0
389.byte	102,65,15,58,68,207,17
390.byte	102,68,15,58,68,199,16
391	xorps	%xmm12,%xmm4
392	xorps	%xmm3,%xmm0
393	xorps	%xmm5,%xmm1
394	pxor	%xmm0,%xmm1
395	pxor	%xmm4,%xmm8
396
397	pxor	%xmm1,%xmm8
398	pxor	%xmm0,%xmm1
399
400	movdqa	%xmm8,%xmm9
401	psrldq	$8,%xmm8
402	pslldq	$8,%xmm9
403	pxor	%xmm8,%xmm1
404	pxor	%xmm9,%xmm0
405
406	movdqa	%xmm0,%xmm4
407	movdqa	%xmm0,%xmm3
408	psllq	$5,%xmm0
409	pxor	%xmm0,%xmm3
410	psllq	$1,%xmm0
411	pxor	%xmm3,%xmm0
412	psllq	$57,%xmm0
413	movdqa	%xmm0,%xmm3
414	pslldq	$8,%xmm0
415	psrldq	$8,%xmm3
416	pxor	%xmm4,%xmm0
417	pxor	%xmm3,%xmm1
418
419
420	movdqa	%xmm0,%xmm4
421	psrlq	$1,%xmm0
422	pxor	%xmm4,%xmm1
423	pxor	%xmm0,%xmm4
424	psrlq	$5,%xmm0
425	pxor	%xmm4,%xmm0
426	psrlq	$1,%xmm0
427	pxor	%xmm1,%xmm0
428	addq	$0x40,%rcx
429	jz	.Ldone
430	movdqu	32(%rsi),%xmm7
431	subq	$0x10,%rcx
432	jz	.Lodd_tail
433.Lskip4x:
434
435
436
437
438
439	movdqu	(%rdx),%xmm8
440	movdqu	16(%rdx),%xmm3
441.byte	102,69,15,56,0,194
442.byte	102,65,15,56,0,218
443	pxor	%xmm8,%xmm0
444
445	movdqa	%xmm3,%xmm5
446	pshufd	$78,%xmm3,%xmm4
447	pxor	%xmm3,%xmm4
448.byte	102,15,58,68,218,0
449.byte	102,15,58,68,234,17
450.byte	102,15,58,68,231,0
451
452	leaq	32(%rdx),%rdx
453	nop
454	subq	$0x20,%rcx
455	jbe	.Leven_tail
456	nop
457	jmp	.Lmod_loop
458
459.align	32
460.Lmod_loop:
461	movdqa	%xmm0,%xmm1
462	movdqa	%xmm4,%xmm8
463	pshufd	$78,%xmm0,%xmm4
464	pxor	%xmm0,%xmm4
465
466.byte	102,15,58,68,198,0
467.byte	102,15,58,68,206,17
468.byte	102,15,58,68,231,16
469
470	pxor	%xmm3,%xmm0
471	pxor	%xmm5,%xmm1
472	movdqu	(%rdx),%xmm9
473	pxor	%xmm0,%xmm8
474.byte	102,69,15,56,0,202
475	movdqu	16(%rdx),%xmm3
476
477	pxor	%xmm1,%xmm8
478	pxor	%xmm9,%xmm1
479	pxor	%xmm8,%xmm4
480.byte	102,65,15,56,0,218
481	movdqa	%xmm4,%xmm8
482	psrldq	$8,%xmm8
483	pslldq	$8,%xmm4
484	pxor	%xmm8,%xmm1
485	pxor	%xmm4,%xmm0
486
487	movdqa	%xmm3,%xmm5
488
489	movdqa	%xmm0,%xmm9
490	movdqa	%xmm0,%xmm8
491	psllq	$5,%xmm0
492	pxor	%xmm0,%xmm8
493.byte	102,15,58,68,218,0
494	psllq	$1,%xmm0
495	pxor	%xmm8,%xmm0
496	psllq	$57,%xmm0
497	movdqa	%xmm0,%xmm8
498	pslldq	$8,%xmm0
499	psrldq	$8,%xmm8
500	pxor	%xmm9,%xmm0
501	pshufd	$78,%xmm5,%xmm4
502	pxor	%xmm8,%xmm1
503	pxor	%xmm5,%xmm4
504
505	movdqa	%xmm0,%xmm9
506	psrlq	$1,%xmm0
507.byte	102,15,58,68,234,17
508	pxor	%xmm9,%xmm1
509	pxor	%xmm0,%xmm9
510	psrlq	$5,%xmm0
511	pxor	%xmm9,%xmm0
512	leaq	32(%rdx),%rdx
513	psrlq	$1,%xmm0
514.byte	102,15,58,68,231,0
515	pxor	%xmm1,%xmm0
516
517	subq	$0x20,%rcx
518	ja	.Lmod_loop
519
520.Leven_tail:
521	movdqa	%xmm0,%xmm1
522	movdqa	%xmm4,%xmm8
523	pshufd	$78,%xmm0,%xmm4
524	pxor	%xmm0,%xmm4
525
526.byte	102,15,58,68,198,0
527.byte	102,15,58,68,206,17
528.byte	102,15,58,68,231,16
529
530	pxor	%xmm3,%xmm0
531	pxor	%xmm5,%xmm1
532	pxor	%xmm0,%xmm8
533	pxor	%xmm1,%xmm8
534	pxor	%xmm8,%xmm4
535	movdqa	%xmm4,%xmm8
536	psrldq	$8,%xmm8
537	pslldq	$8,%xmm4
538	pxor	%xmm8,%xmm1
539	pxor	%xmm4,%xmm0
540
541	movdqa	%xmm0,%xmm4
542	movdqa	%xmm0,%xmm3
543	psllq	$5,%xmm0
544	pxor	%xmm0,%xmm3
545	psllq	$1,%xmm0
546	pxor	%xmm3,%xmm0
547	psllq	$57,%xmm0
548	movdqa	%xmm0,%xmm3
549	pslldq	$8,%xmm0
550	psrldq	$8,%xmm3
551	pxor	%xmm4,%xmm0
552	pxor	%xmm3,%xmm1
553
554
555	movdqa	%xmm0,%xmm4
556	psrlq	$1,%xmm0
557	pxor	%xmm4,%xmm1
558	pxor	%xmm0,%xmm4
559	psrlq	$5,%xmm0
560	pxor	%xmm4,%xmm0
561	psrlq	$1,%xmm0
562	pxor	%xmm1,%xmm0
563	testq	%rcx,%rcx
564	jnz	.Ldone
565
566.Lodd_tail:
567	movdqu	(%rdx),%xmm8
568.byte	102,69,15,56,0,194
569	pxor	%xmm8,%xmm0
570	movdqa	%xmm0,%xmm1
571	pshufd	$78,%xmm0,%xmm3
572	pxor	%xmm0,%xmm3
573.byte	102,15,58,68,194,0
574.byte	102,15,58,68,202,17
575.byte	102,15,58,68,223,0
576	pxor	%xmm0,%xmm3
577	pxor	%xmm1,%xmm3
578
579	movdqa	%xmm3,%xmm4
580	psrldq	$8,%xmm3
581	pslldq	$8,%xmm4
582	pxor	%xmm3,%xmm1
583	pxor	%xmm4,%xmm0
584
585	movdqa	%xmm0,%xmm4
586	movdqa	%xmm0,%xmm3
587	psllq	$5,%xmm0
588	pxor	%xmm0,%xmm3
589	psllq	$1,%xmm0
590	pxor	%xmm3,%xmm0
591	psllq	$57,%xmm0
592	movdqa	%xmm0,%xmm3
593	pslldq	$8,%xmm0
594	psrldq	$8,%xmm3
595	pxor	%xmm4,%xmm0
596	pxor	%xmm3,%xmm1
597
598
599	movdqa	%xmm0,%xmm4
600	psrlq	$1,%xmm0
601	pxor	%xmm4,%xmm1
602	pxor	%xmm0,%xmm4
603	psrlq	$5,%xmm0
604	pxor	%xmm4,%xmm0
605	psrlq	$1,%xmm0
606	pxor	%xmm1,%xmm0
607.Ldone:
608.byte	102,65,15,56,0,194
609	movdqu	%xmm0,(%rdi)
610	.byte	0xf3,0xc3
611.cfi_endproc
612.size	GFp_gcm_ghash_clmul,.-GFp_gcm_ghash_clmul
613.globl	GFp_gcm_init_avx
614.hidden GFp_gcm_init_avx
615.type	GFp_gcm_init_avx,@function
616.align	32
617GFp_gcm_init_avx:
618.cfi_startproc
619	vzeroupper
620
621	vmovdqu	(%rsi),%xmm2
622	vpshufd	$78,%xmm2,%xmm2
623
624
625	vpshufd	$255,%xmm2,%xmm4
626	vpsrlq	$63,%xmm2,%xmm3
627	vpsllq	$1,%xmm2,%xmm2
628	vpxor	%xmm5,%xmm5,%xmm5
629	vpcmpgtd	%xmm4,%xmm5,%xmm5
630	vpslldq	$8,%xmm3,%xmm3
631	vpor	%xmm3,%xmm2,%xmm2
632
633
634	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
635	vpxor	%xmm5,%xmm2,%xmm2
636
637	vpunpckhqdq	%xmm2,%xmm2,%xmm6
638	vmovdqa	%xmm2,%xmm0
639	vpxor	%xmm2,%xmm6,%xmm6
640	movq	$4,%r10
641	jmp	.Linit_start_avx
642.align	32
643.Linit_loop_avx:
644	vpalignr	$8,%xmm3,%xmm4,%xmm5
645	vmovdqu	%xmm5,-16(%rdi)
646	vpunpckhqdq	%xmm0,%xmm0,%xmm3
647	vpxor	%xmm0,%xmm3,%xmm3
648	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
649	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
650	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
651	vpxor	%xmm0,%xmm1,%xmm4
652	vpxor	%xmm4,%xmm3,%xmm3
653
654	vpslldq	$8,%xmm3,%xmm4
655	vpsrldq	$8,%xmm3,%xmm3
656	vpxor	%xmm4,%xmm0,%xmm0
657	vpxor	%xmm3,%xmm1,%xmm1
658	vpsllq	$57,%xmm0,%xmm3
659	vpsllq	$62,%xmm0,%xmm4
660	vpxor	%xmm3,%xmm4,%xmm4
661	vpsllq	$63,%xmm0,%xmm3
662	vpxor	%xmm3,%xmm4,%xmm4
663	vpslldq	$8,%xmm4,%xmm3
664	vpsrldq	$8,%xmm4,%xmm4
665	vpxor	%xmm3,%xmm0,%xmm0
666	vpxor	%xmm4,%xmm1,%xmm1
667
668	vpsrlq	$1,%xmm0,%xmm4
669	vpxor	%xmm0,%xmm1,%xmm1
670	vpxor	%xmm4,%xmm0,%xmm0
671	vpsrlq	$5,%xmm4,%xmm4
672	vpxor	%xmm4,%xmm0,%xmm0
673	vpsrlq	$1,%xmm0,%xmm0
674	vpxor	%xmm1,%xmm0,%xmm0
675.Linit_start_avx:
676	vmovdqa	%xmm0,%xmm5
677	vpunpckhqdq	%xmm0,%xmm0,%xmm3
678	vpxor	%xmm0,%xmm3,%xmm3
679	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
680	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
681	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
682	vpxor	%xmm0,%xmm1,%xmm4
683	vpxor	%xmm4,%xmm3,%xmm3
684
685	vpslldq	$8,%xmm3,%xmm4
686	vpsrldq	$8,%xmm3,%xmm3
687	vpxor	%xmm4,%xmm0,%xmm0
688	vpxor	%xmm3,%xmm1,%xmm1
689	vpsllq	$57,%xmm0,%xmm3
690	vpsllq	$62,%xmm0,%xmm4
691	vpxor	%xmm3,%xmm4,%xmm4
692	vpsllq	$63,%xmm0,%xmm3
693	vpxor	%xmm3,%xmm4,%xmm4
694	vpslldq	$8,%xmm4,%xmm3
695	vpsrldq	$8,%xmm4,%xmm4
696	vpxor	%xmm3,%xmm0,%xmm0
697	vpxor	%xmm4,%xmm1,%xmm1
698
699	vpsrlq	$1,%xmm0,%xmm4
700	vpxor	%xmm0,%xmm1,%xmm1
701	vpxor	%xmm4,%xmm0,%xmm0
702	vpsrlq	$5,%xmm4,%xmm4
703	vpxor	%xmm4,%xmm0,%xmm0
704	vpsrlq	$1,%xmm0,%xmm0
705	vpxor	%xmm1,%xmm0,%xmm0
706	vpshufd	$78,%xmm5,%xmm3
707	vpshufd	$78,%xmm0,%xmm4
708	vpxor	%xmm5,%xmm3,%xmm3
709	vmovdqu	%xmm5,0(%rdi)
710	vpxor	%xmm0,%xmm4,%xmm4
711	vmovdqu	%xmm0,16(%rdi)
712	leaq	48(%rdi),%rdi
713	subq	$1,%r10
714	jnz	.Linit_loop_avx
715
716	vpalignr	$8,%xmm4,%xmm3,%xmm5
717	vmovdqu	%xmm5,-16(%rdi)
718
719	vzeroupper
720	.byte	0xf3,0xc3
721.cfi_endproc
722.size	GFp_gcm_init_avx,.-GFp_gcm_init_avx
723.globl	GFp_gcm_ghash_avx
724.hidden GFp_gcm_ghash_avx
725.type	GFp_gcm_ghash_avx,@function
726.align	32
727GFp_gcm_ghash_avx:
728.cfi_startproc
729	vzeroupper
730
731	vmovdqu	(%rdi),%xmm10
732	leaq	.L0x1c2_polynomial(%rip),%r10
733	leaq	64(%rsi),%rsi
734	vmovdqu	.Lbswap_mask(%rip),%xmm13
735	vpshufb	%xmm13,%xmm10,%xmm10
736	cmpq	$0x80,%rcx
737	jb	.Lshort_avx
738	subq	$0x80,%rcx
739
740	vmovdqu	112(%rdx),%xmm14
741	vmovdqu	0-64(%rsi),%xmm6
742	vpshufb	%xmm13,%xmm14,%xmm14
743	vmovdqu	32-64(%rsi),%xmm7
744
745	vpunpckhqdq	%xmm14,%xmm14,%xmm9
746	vmovdqu	96(%rdx),%xmm15
747	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
748	vpxor	%xmm14,%xmm9,%xmm9
749	vpshufb	%xmm13,%xmm15,%xmm15
750	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
751	vmovdqu	16-64(%rsi),%xmm6
752	vpunpckhqdq	%xmm15,%xmm15,%xmm8
753	vmovdqu	80(%rdx),%xmm14
754	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
755	vpxor	%xmm15,%xmm8,%xmm8
756
757	vpshufb	%xmm13,%xmm14,%xmm14
758	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
759	vpunpckhqdq	%xmm14,%xmm14,%xmm9
760	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
761	vmovdqu	48-64(%rsi),%xmm6
762	vpxor	%xmm14,%xmm9,%xmm9
763	vmovdqu	64(%rdx),%xmm15
764	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
765	vmovdqu	80-64(%rsi),%xmm7
766
767	vpshufb	%xmm13,%xmm15,%xmm15
768	vpxor	%xmm0,%xmm3,%xmm3
769	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
770	vpxor	%xmm1,%xmm4,%xmm4
771	vpunpckhqdq	%xmm15,%xmm15,%xmm8
772	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
773	vmovdqu	64-64(%rsi),%xmm6
774	vpxor	%xmm2,%xmm5,%xmm5
775	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
776	vpxor	%xmm15,%xmm8,%xmm8
777
778	vmovdqu	48(%rdx),%xmm14
779	vpxor	%xmm3,%xmm0,%xmm0
780	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
781	vpxor	%xmm4,%xmm1,%xmm1
782	vpshufb	%xmm13,%xmm14,%xmm14
783	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
784	vmovdqu	96-64(%rsi),%xmm6
785	vpxor	%xmm5,%xmm2,%xmm2
786	vpunpckhqdq	%xmm14,%xmm14,%xmm9
787	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
788	vmovdqu	128-64(%rsi),%xmm7
789	vpxor	%xmm14,%xmm9,%xmm9
790
791	vmovdqu	32(%rdx),%xmm15
792	vpxor	%xmm0,%xmm3,%xmm3
793	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
794	vpxor	%xmm1,%xmm4,%xmm4
795	vpshufb	%xmm13,%xmm15,%xmm15
796	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
797	vmovdqu	112-64(%rsi),%xmm6
798	vpxor	%xmm2,%xmm5,%xmm5
799	vpunpckhqdq	%xmm15,%xmm15,%xmm8
800	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
801	vpxor	%xmm15,%xmm8,%xmm8
802
803	vmovdqu	16(%rdx),%xmm14
804	vpxor	%xmm3,%xmm0,%xmm0
805	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
806	vpxor	%xmm4,%xmm1,%xmm1
807	vpshufb	%xmm13,%xmm14,%xmm14
808	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
809	vmovdqu	144-64(%rsi),%xmm6
810	vpxor	%xmm5,%xmm2,%xmm2
811	vpunpckhqdq	%xmm14,%xmm14,%xmm9
812	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
813	vmovdqu	176-64(%rsi),%xmm7
814	vpxor	%xmm14,%xmm9,%xmm9
815
816	vmovdqu	(%rdx),%xmm15
817	vpxor	%xmm0,%xmm3,%xmm3
818	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
819	vpxor	%xmm1,%xmm4,%xmm4
820	vpshufb	%xmm13,%xmm15,%xmm15
821	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
822	vmovdqu	160-64(%rsi),%xmm6
823	vpxor	%xmm2,%xmm5,%xmm5
824	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
825
826	leaq	128(%rdx),%rdx
827	cmpq	$0x80,%rcx
828	jb	.Ltail_avx
829
830	vpxor	%xmm10,%xmm15,%xmm15
831	subq	$0x80,%rcx
832	jmp	.Loop8x_avx
833
834.align	32
835.Loop8x_avx:
836	vpunpckhqdq	%xmm15,%xmm15,%xmm8
837	vmovdqu	112(%rdx),%xmm14
838	vpxor	%xmm0,%xmm3,%xmm3
839	vpxor	%xmm15,%xmm8,%xmm8
840	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
841	vpshufb	%xmm13,%xmm14,%xmm14
842	vpxor	%xmm1,%xmm4,%xmm4
843	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
844	vmovdqu	0-64(%rsi),%xmm6
845	vpunpckhqdq	%xmm14,%xmm14,%xmm9
846	vpxor	%xmm2,%xmm5,%xmm5
847	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
848	vmovdqu	32-64(%rsi),%xmm7
849	vpxor	%xmm14,%xmm9,%xmm9
850
851	vmovdqu	96(%rdx),%xmm15
852	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
853	vpxor	%xmm3,%xmm10,%xmm10
854	vpshufb	%xmm13,%xmm15,%xmm15
855	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
856	vxorps	%xmm4,%xmm11,%xmm11
857	vmovdqu	16-64(%rsi),%xmm6
858	vpunpckhqdq	%xmm15,%xmm15,%xmm8
859	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
860	vpxor	%xmm5,%xmm12,%xmm12
861	vxorps	%xmm15,%xmm8,%xmm8
862
863	vmovdqu	80(%rdx),%xmm14
864	vpxor	%xmm10,%xmm12,%xmm12
865	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
866	vpxor	%xmm11,%xmm12,%xmm12
867	vpslldq	$8,%xmm12,%xmm9
868	vpxor	%xmm0,%xmm3,%xmm3
869	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
870	vpsrldq	$8,%xmm12,%xmm12
871	vpxor	%xmm9,%xmm10,%xmm10
872	vmovdqu	48-64(%rsi),%xmm6
873	vpshufb	%xmm13,%xmm14,%xmm14
874	vxorps	%xmm12,%xmm11,%xmm11
875	vpxor	%xmm1,%xmm4,%xmm4
876	vpunpckhqdq	%xmm14,%xmm14,%xmm9
877	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
878	vmovdqu	80-64(%rsi),%xmm7
879	vpxor	%xmm14,%xmm9,%xmm9
880	vpxor	%xmm2,%xmm5,%xmm5
881
882	vmovdqu	64(%rdx),%xmm15
883	vpalignr	$8,%xmm10,%xmm10,%xmm12
884	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
885	vpshufb	%xmm13,%xmm15,%xmm15
886	vpxor	%xmm3,%xmm0,%xmm0
887	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
888	vmovdqu	64-64(%rsi),%xmm6
889	vpunpckhqdq	%xmm15,%xmm15,%xmm8
890	vpxor	%xmm4,%xmm1,%xmm1
891	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
892	vxorps	%xmm15,%xmm8,%xmm8
893	vpxor	%xmm5,%xmm2,%xmm2
894
895	vmovdqu	48(%rdx),%xmm14
896	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
897	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
898	vpshufb	%xmm13,%xmm14,%xmm14
899	vpxor	%xmm0,%xmm3,%xmm3
900	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
901	vmovdqu	96-64(%rsi),%xmm6
902	vpunpckhqdq	%xmm14,%xmm14,%xmm9
903	vpxor	%xmm1,%xmm4,%xmm4
904	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
905	vmovdqu	128-64(%rsi),%xmm7
906	vpxor	%xmm14,%xmm9,%xmm9
907	vpxor	%xmm2,%xmm5,%xmm5
908
909	vmovdqu	32(%rdx),%xmm15
910	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
911	vpshufb	%xmm13,%xmm15,%xmm15
912	vpxor	%xmm3,%xmm0,%xmm0
913	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
914	vmovdqu	112-64(%rsi),%xmm6
915	vpunpckhqdq	%xmm15,%xmm15,%xmm8
916	vpxor	%xmm4,%xmm1,%xmm1
917	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
918	vpxor	%xmm15,%xmm8,%xmm8
919	vpxor	%xmm5,%xmm2,%xmm2
920	vxorps	%xmm12,%xmm10,%xmm10
921
922	vmovdqu	16(%rdx),%xmm14
923	vpalignr	$8,%xmm10,%xmm10,%xmm12
924	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
925	vpshufb	%xmm13,%xmm14,%xmm14
926	vpxor	%xmm0,%xmm3,%xmm3
927	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
928	vmovdqu	144-64(%rsi),%xmm6
929	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
930	vxorps	%xmm11,%xmm12,%xmm12
931	vpunpckhqdq	%xmm14,%xmm14,%xmm9
932	vpxor	%xmm1,%xmm4,%xmm4
933	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
934	vmovdqu	176-64(%rsi),%xmm7
935	vpxor	%xmm14,%xmm9,%xmm9
936	vpxor	%xmm2,%xmm5,%xmm5
937
938	vmovdqu	(%rdx),%xmm15
939	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
940	vpshufb	%xmm13,%xmm15,%xmm15
941	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
942	vmovdqu	160-64(%rsi),%xmm6
943	vpxor	%xmm12,%xmm15,%xmm15
944	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
945	vpxor	%xmm10,%xmm15,%xmm15
946
947	leaq	128(%rdx),%rdx
948	subq	$0x80,%rcx
949	jnc	.Loop8x_avx
950
951	addq	$0x80,%rcx
952	jmp	.Ltail_no_xor_avx
953
954.align	32
955.Lshort_avx:
956	vmovdqu	-16(%rdx,%rcx,1),%xmm14
957	leaq	(%rdx,%rcx,1),%rdx
958	vmovdqu	0-64(%rsi),%xmm6
959	vmovdqu	32-64(%rsi),%xmm7
960	vpshufb	%xmm13,%xmm14,%xmm15
961
962	vmovdqa	%xmm0,%xmm3
963	vmovdqa	%xmm1,%xmm4
964	vmovdqa	%xmm2,%xmm5
965	subq	$0x10,%rcx
966	jz	.Ltail_avx
967
968	vpunpckhqdq	%xmm15,%xmm15,%xmm8
969	vpxor	%xmm0,%xmm3,%xmm3
970	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
971	vpxor	%xmm15,%xmm8,%xmm8
972	vmovdqu	-32(%rdx),%xmm14
973	vpxor	%xmm1,%xmm4,%xmm4
974	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
975	vmovdqu	16-64(%rsi),%xmm6
976	vpshufb	%xmm13,%xmm14,%xmm15
977	vpxor	%xmm2,%xmm5,%xmm5
978	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
979	vpsrldq	$8,%xmm7,%xmm7
980	subq	$0x10,%rcx
981	jz	.Ltail_avx
982
983	vpunpckhqdq	%xmm15,%xmm15,%xmm8
984	vpxor	%xmm0,%xmm3,%xmm3
985	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
986	vpxor	%xmm15,%xmm8,%xmm8
987	vmovdqu	-48(%rdx),%xmm14
988	vpxor	%xmm1,%xmm4,%xmm4
989	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
990	vmovdqu	48-64(%rsi),%xmm6
991	vpshufb	%xmm13,%xmm14,%xmm15
992	vpxor	%xmm2,%xmm5,%xmm5
993	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
994	vmovdqu	80-64(%rsi),%xmm7
995	subq	$0x10,%rcx
996	jz	.Ltail_avx
997
998	vpunpckhqdq	%xmm15,%xmm15,%xmm8
999	vpxor	%xmm0,%xmm3,%xmm3
1000	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1001	vpxor	%xmm15,%xmm8,%xmm8
1002	vmovdqu	-64(%rdx),%xmm14
1003	vpxor	%xmm1,%xmm4,%xmm4
1004	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1005	vmovdqu	64-64(%rsi),%xmm6
1006	vpshufb	%xmm13,%xmm14,%xmm15
1007	vpxor	%xmm2,%xmm5,%xmm5
1008	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1009	vpsrldq	$8,%xmm7,%xmm7
1010	subq	$0x10,%rcx
1011	jz	.Ltail_avx
1012
1013	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1014	vpxor	%xmm0,%xmm3,%xmm3
1015	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1016	vpxor	%xmm15,%xmm8,%xmm8
1017	vmovdqu	-80(%rdx),%xmm14
1018	vpxor	%xmm1,%xmm4,%xmm4
1019	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1020	vmovdqu	96-64(%rsi),%xmm6
1021	vpshufb	%xmm13,%xmm14,%xmm15
1022	vpxor	%xmm2,%xmm5,%xmm5
1023	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1024	vmovdqu	128-64(%rsi),%xmm7
1025	subq	$0x10,%rcx
1026	jz	.Ltail_avx
1027
1028	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1029	vpxor	%xmm0,%xmm3,%xmm3
1030	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1031	vpxor	%xmm15,%xmm8,%xmm8
1032	vmovdqu	-96(%rdx),%xmm14
1033	vpxor	%xmm1,%xmm4,%xmm4
1034	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1035	vmovdqu	112-64(%rsi),%xmm6
1036	vpshufb	%xmm13,%xmm14,%xmm15
1037	vpxor	%xmm2,%xmm5,%xmm5
1038	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1039	vpsrldq	$8,%xmm7,%xmm7
1040	subq	$0x10,%rcx
1041	jz	.Ltail_avx
1042
1043	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1044	vpxor	%xmm0,%xmm3,%xmm3
1045	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1046	vpxor	%xmm15,%xmm8,%xmm8
1047	vmovdqu	-112(%rdx),%xmm14
1048	vpxor	%xmm1,%xmm4,%xmm4
1049	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1050	vmovdqu	144-64(%rsi),%xmm6
1051	vpshufb	%xmm13,%xmm14,%xmm15
1052	vpxor	%xmm2,%xmm5,%xmm5
1053	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1054	vmovq	184-64(%rsi),%xmm7
1055	subq	$0x10,%rcx
1056	jmp	.Ltail_avx
1057
1058.align	32
1059.Ltail_avx:
1060	vpxor	%xmm10,%xmm15,%xmm15
1061.Ltail_no_xor_avx:
1062	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1063	vpxor	%xmm0,%xmm3,%xmm3
1064	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1065	vpxor	%xmm15,%xmm8,%xmm8
1066	vpxor	%xmm1,%xmm4,%xmm4
1067	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1068	vpxor	%xmm2,%xmm5,%xmm5
1069	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1070
1071	vmovdqu	(%r10),%xmm12
1072
1073	vpxor	%xmm0,%xmm3,%xmm10
1074	vpxor	%xmm1,%xmm4,%xmm11
1075	vpxor	%xmm2,%xmm5,%xmm5
1076
1077	vpxor	%xmm10,%xmm5,%xmm5
1078	vpxor	%xmm11,%xmm5,%xmm5
1079	vpslldq	$8,%xmm5,%xmm9
1080	vpsrldq	$8,%xmm5,%xmm5
1081	vpxor	%xmm9,%xmm10,%xmm10
1082	vpxor	%xmm5,%xmm11,%xmm11
1083
1084	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1085	vpalignr	$8,%xmm10,%xmm10,%xmm10
1086	vpxor	%xmm9,%xmm10,%xmm10
1087
1088	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1089	vpalignr	$8,%xmm10,%xmm10,%xmm10
1090	vpxor	%xmm11,%xmm10,%xmm10
1091	vpxor	%xmm9,%xmm10,%xmm10
1092
1093	cmpq	$0,%rcx
1094	jne	.Lshort_avx
1095
1096	vpshufb	%xmm13,%xmm10,%xmm10
1097	vmovdqu	%xmm10,(%rdi)
1098	vzeroupper
1099	.byte	0xf3,0xc3
1100.cfi_endproc
1101.size	GFp_gcm_ghash_avx,.-GFp_gcm_ghash_avx
1102.align	64
1103.Lbswap_mask:
1104.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1105.L0x1c2_polynomial:
1106.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1107.L7_mask:
1108.long	7,0,7,0
1109
1110.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1111.align	64
1112#endif
1113.section	.note.GNU-stack,"",@progbits
1114