1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11.text
12.extern	GFp_ia32cap_P
13.hidden GFp_ia32cap_P
14.globl	GFp_gcm_init_clmul
15.hidden GFp_gcm_init_clmul
16.type	GFp_gcm_init_clmul,@function
17.align	16
18GFp_gcm_init_clmul:
19.cfi_startproc
20.L_init_clmul:
21	movdqu	(%rsi),%xmm2
22	pshufd	$78,%xmm2,%xmm2
23
24
25	pshufd	$255,%xmm2,%xmm4
26	movdqa	%xmm2,%xmm3
27	psllq	$1,%xmm2
28	pxor	%xmm5,%xmm5
29	psrlq	$63,%xmm3
30	pcmpgtd	%xmm4,%xmm5
31	pslldq	$8,%xmm3
32	por	%xmm3,%xmm2
33
34
35	pand	.L0x1c2_polynomial(%rip),%xmm5
36	pxor	%xmm5,%xmm2
37
38
39	pshufd	$78,%xmm2,%xmm6
40	movdqa	%xmm2,%xmm0
41	pxor	%xmm2,%xmm6
42	movdqa	%xmm0,%xmm1
43	pshufd	$78,%xmm0,%xmm3
44	pxor	%xmm0,%xmm3
45.byte	102,15,58,68,194,0
46.byte	102,15,58,68,202,17
47.byte	102,15,58,68,222,0
48	pxor	%xmm0,%xmm3
49	pxor	%xmm1,%xmm3
50
51	movdqa	%xmm3,%xmm4
52	psrldq	$8,%xmm3
53	pslldq	$8,%xmm4
54	pxor	%xmm3,%xmm1
55	pxor	%xmm4,%xmm0
56
57	movdqa	%xmm0,%xmm4
58	movdqa	%xmm0,%xmm3
59	psllq	$5,%xmm0
60	pxor	%xmm0,%xmm3
61	psllq	$1,%xmm0
62	pxor	%xmm3,%xmm0
63	psllq	$57,%xmm0
64	movdqa	%xmm0,%xmm3
65	pslldq	$8,%xmm0
66	psrldq	$8,%xmm3
67	pxor	%xmm4,%xmm0
68	pxor	%xmm3,%xmm1
69
70
71	movdqa	%xmm0,%xmm4
72	psrlq	$1,%xmm0
73	pxor	%xmm4,%xmm1
74	pxor	%xmm0,%xmm4
75	psrlq	$5,%xmm0
76	pxor	%xmm4,%xmm0
77	psrlq	$1,%xmm0
78	pxor	%xmm1,%xmm0
79	pshufd	$78,%xmm2,%xmm3
80	pshufd	$78,%xmm0,%xmm4
81	pxor	%xmm2,%xmm3
82	movdqu	%xmm2,0(%rdi)
83	pxor	%xmm0,%xmm4
84	movdqu	%xmm0,16(%rdi)
85.byte	102,15,58,15,227,8
86	movdqu	%xmm4,32(%rdi)
87	movdqa	%xmm0,%xmm1
88	pshufd	$78,%xmm0,%xmm3
89	pxor	%xmm0,%xmm3
90.byte	102,15,58,68,194,0
91.byte	102,15,58,68,202,17
92.byte	102,15,58,68,222,0
93	pxor	%xmm0,%xmm3
94	pxor	%xmm1,%xmm3
95
96	movdqa	%xmm3,%xmm4
97	psrldq	$8,%xmm3
98	pslldq	$8,%xmm4
99	pxor	%xmm3,%xmm1
100	pxor	%xmm4,%xmm0
101
102	movdqa	%xmm0,%xmm4
103	movdqa	%xmm0,%xmm3
104	psllq	$5,%xmm0
105	pxor	%xmm0,%xmm3
106	psllq	$1,%xmm0
107	pxor	%xmm3,%xmm0
108	psllq	$57,%xmm0
109	movdqa	%xmm0,%xmm3
110	pslldq	$8,%xmm0
111	psrldq	$8,%xmm3
112	pxor	%xmm4,%xmm0
113	pxor	%xmm3,%xmm1
114
115
116	movdqa	%xmm0,%xmm4
117	psrlq	$1,%xmm0
118	pxor	%xmm4,%xmm1
119	pxor	%xmm0,%xmm4
120	psrlq	$5,%xmm0
121	pxor	%xmm4,%xmm0
122	psrlq	$1,%xmm0
123	pxor	%xmm1,%xmm0
124	movdqa	%xmm0,%xmm5
125	movdqa	%xmm0,%xmm1
126	pshufd	$78,%xmm0,%xmm3
127	pxor	%xmm0,%xmm3
128.byte	102,15,58,68,194,0
129.byte	102,15,58,68,202,17
130.byte	102,15,58,68,222,0
131	pxor	%xmm0,%xmm3
132	pxor	%xmm1,%xmm3
133
134	movdqa	%xmm3,%xmm4
135	psrldq	$8,%xmm3
136	pslldq	$8,%xmm4
137	pxor	%xmm3,%xmm1
138	pxor	%xmm4,%xmm0
139
140	movdqa	%xmm0,%xmm4
141	movdqa	%xmm0,%xmm3
142	psllq	$5,%xmm0
143	pxor	%xmm0,%xmm3
144	psllq	$1,%xmm0
145	pxor	%xmm3,%xmm0
146	psllq	$57,%xmm0
147	movdqa	%xmm0,%xmm3
148	pslldq	$8,%xmm0
149	psrldq	$8,%xmm3
150	pxor	%xmm4,%xmm0
151	pxor	%xmm3,%xmm1
152
153
154	movdqa	%xmm0,%xmm4
155	psrlq	$1,%xmm0
156	pxor	%xmm4,%xmm1
157	pxor	%xmm0,%xmm4
158	psrlq	$5,%xmm0
159	pxor	%xmm4,%xmm0
160	psrlq	$1,%xmm0
161	pxor	%xmm1,%xmm0
162	pshufd	$78,%xmm5,%xmm3
163	pshufd	$78,%xmm0,%xmm4
164	pxor	%xmm5,%xmm3
165	movdqu	%xmm5,48(%rdi)
166	pxor	%xmm0,%xmm4
167	movdqu	%xmm0,64(%rdi)
168.byte	102,15,58,15,227,8
169	movdqu	%xmm4,80(%rdi)
170	.byte	0xf3,0xc3
171.cfi_endproc
172.size	GFp_gcm_init_clmul,.-GFp_gcm_init_clmul
173.globl	GFp_gcm_gmult_clmul
174.hidden GFp_gcm_gmult_clmul
175.type	GFp_gcm_gmult_clmul,@function
176.align	16
177GFp_gcm_gmult_clmul:
178.cfi_startproc
179.L_gmult_clmul:
180	movdqu	(%rdi),%xmm0
181	movdqa	.Lbswap_mask(%rip),%xmm5
182	movdqu	(%rsi),%xmm2
183	movdqu	32(%rsi),%xmm4
184.byte	102,15,56,0,197
185	movdqa	%xmm0,%xmm1
186	pshufd	$78,%xmm0,%xmm3
187	pxor	%xmm0,%xmm3
188.byte	102,15,58,68,194,0
189.byte	102,15,58,68,202,17
190.byte	102,15,58,68,220,0
191	pxor	%xmm0,%xmm3
192	pxor	%xmm1,%xmm3
193
194	movdqa	%xmm3,%xmm4
195	psrldq	$8,%xmm3
196	pslldq	$8,%xmm4
197	pxor	%xmm3,%xmm1
198	pxor	%xmm4,%xmm0
199
200	movdqa	%xmm0,%xmm4
201	movdqa	%xmm0,%xmm3
202	psllq	$5,%xmm0
203	pxor	%xmm0,%xmm3
204	psllq	$1,%xmm0
205	pxor	%xmm3,%xmm0
206	psllq	$57,%xmm0
207	movdqa	%xmm0,%xmm3
208	pslldq	$8,%xmm0
209	psrldq	$8,%xmm3
210	pxor	%xmm4,%xmm0
211	pxor	%xmm3,%xmm1
212
213
214	movdqa	%xmm0,%xmm4
215	psrlq	$1,%xmm0
216	pxor	%xmm4,%xmm1
217	pxor	%xmm0,%xmm4
218	psrlq	$5,%xmm0
219	pxor	%xmm4,%xmm0
220	psrlq	$1,%xmm0
221	pxor	%xmm1,%xmm0
222.byte	102,15,56,0,197
223	movdqu	%xmm0,(%rdi)
224	.byte	0xf3,0xc3
225.cfi_endproc
226.size	GFp_gcm_gmult_clmul,.-GFp_gcm_gmult_clmul
227.globl	GFp_gcm_ghash_clmul
228.hidden GFp_gcm_ghash_clmul
229.type	GFp_gcm_ghash_clmul,@function
230.align	32
231GFp_gcm_ghash_clmul:
232.cfi_startproc
233.L_ghash_clmul:
234	movdqa	.Lbswap_mask(%rip),%xmm10
235
236	movdqu	(%rdi),%xmm0
237	movdqu	(%rsi),%xmm2
238	movdqu	32(%rsi),%xmm7
239.byte	102,65,15,56,0,194
240
241	subq	$0x10,%rcx
242	jz	.Lodd_tail
243
244	movdqu	16(%rsi),%xmm6
245	leaq	GFp_ia32cap_P(%rip),%rax
246	movl	4(%rax),%eax
247	cmpq	$0x30,%rcx
248	jb	.Lskip4x
249
250	andl	$71303168,%eax
251	cmpl	$4194304,%eax
252	je	.Lskip4x
253
254	subq	$0x30,%rcx
255	movq	$0xA040608020C0E000,%rax
256	movdqu	48(%rsi),%xmm14
257	movdqu	64(%rsi),%xmm15
258
259
260
261
262	movdqu	48(%rdx),%xmm3
263	movdqu	32(%rdx),%xmm11
264.byte	102,65,15,56,0,218
265.byte	102,69,15,56,0,218
266	movdqa	%xmm3,%xmm5
267	pshufd	$78,%xmm3,%xmm4
268	pxor	%xmm3,%xmm4
269.byte	102,15,58,68,218,0
270.byte	102,15,58,68,234,17
271.byte	102,15,58,68,231,0
272
273	movdqa	%xmm11,%xmm13
274	pshufd	$78,%xmm11,%xmm12
275	pxor	%xmm11,%xmm12
276.byte	102,68,15,58,68,222,0
277.byte	102,68,15,58,68,238,17
278.byte	102,68,15,58,68,231,16
279	xorps	%xmm11,%xmm3
280	xorps	%xmm13,%xmm5
281	movups	80(%rsi),%xmm7
282	xorps	%xmm12,%xmm4
283
284	movdqu	16(%rdx),%xmm11
285	movdqu	0(%rdx),%xmm8
286.byte	102,69,15,56,0,218
287.byte	102,69,15,56,0,194
288	movdqa	%xmm11,%xmm13
289	pshufd	$78,%xmm11,%xmm12
290	pxor	%xmm8,%xmm0
291	pxor	%xmm11,%xmm12
292.byte	102,69,15,58,68,222,0
293	movdqa	%xmm0,%xmm1
294	pshufd	$78,%xmm0,%xmm8
295	pxor	%xmm0,%xmm8
296.byte	102,69,15,58,68,238,17
297.byte	102,68,15,58,68,231,0
298	xorps	%xmm11,%xmm3
299	xorps	%xmm13,%xmm5
300
301	leaq	64(%rdx),%rdx
302	subq	$0x40,%rcx
303	jc	.Ltail4x
304
305	jmp	.Lmod4_loop
306.align	32
307.Lmod4_loop:
308.byte	102,65,15,58,68,199,0
309	xorps	%xmm12,%xmm4
310	movdqu	48(%rdx),%xmm11
311.byte	102,69,15,56,0,218
312.byte	102,65,15,58,68,207,17
313	xorps	%xmm3,%xmm0
314	movdqu	32(%rdx),%xmm3
315	movdqa	%xmm11,%xmm13
316.byte	102,68,15,58,68,199,16
317	pshufd	$78,%xmm11,%xmm12
318	xorps	%xmm5,%xmm1
319	pxor	%xmm11,%xmm12
320.byte	102,65,15,56,0,218
321	movups	32(%rsi),%xmm7
322	xorps	%xmm4,%xmm8
323.byte	102,68,15,58,68,218,0
324	pshufd	$78,%xmm3,%xmm4
325
326	pxor	%xmm0,%xmm8
327	movdqa	%xmm3,%xmm5
328	pxor	%xmm1,%xmm8
329	pxor	%xmm3,%xmm4
330	movdqa	%xmm8,%xmm9
331.byte	102,68,15,58,68,234,17
332	pslldq	$8,%xmm8
333	psrldq	$8,%xmm9
334	pxor	%xmm8,%xmm0
335	movdqa	.L7_mask(%rip),%xmm8
336	pxor	%xmm9,%xmm1
337.byte	102,76,15,110,200
338
339	pand	%xmm0,%xmm8
340.byte	102,69,15,56,0,200
341	pxor	%xmm0,%xmm9
342.byte	102,68,15,58,68,231,0
343	psllq	$57,%xmm9
344	movdqa	%xmm9,%xmm8
345	pslldq	$8,%xmm9
346.byte	102,15,58,68,222,0
347	psrldq	$8,%xmm8
348	pxor	%xmm9,%xmm0
349	pxor	%xmm8,%xmm1
350	movdqu	0(%rdx),%xmm8
351
352	movdqa	%xmm0,%xmm9
353	psrlq	$1,%xmm0
354.byte	102,15,58,68,238,17
355	xorps	%xmm11,%xmm3
356	movdqu	16(%rdx),%xmm11
357.byte	102,69,15,56,0,218
358.byte	102,15,58,68,231,16
359	xorps	%xmm13,%xmm5
360	movups	80(%rsi),%xmm7
361.byte	102,69,15,56,0,194
362	pxor	%xmm9,%xmm1
363	pxor	%xmm0,%xmm9
364	psrlq	$5,%xmm0
365
366	movdqa	%xmm11,%xmm13
367	pxor	%xmm12,%xmm4
368	pshufd	$78,%xmm11,%xmm12
369	pxor	%xmm9,%xmm0
370	pxor	%xmm8,%xmm1
371	pxor	%xmm11,%xmm12
372.byte	102,69,15,58,68,222,0
373	psrlq	$1,%xmm0
374	pxor	%xmm1,%xmm0
375	movdqa	%xmm0,%xmm1
376.byte	102,69,15,58,68,238,17
377	xorps	%xmm11,%xmm3
378	pshufd	$78,%xmm0,%xmm8
379	pxor	%xmm0,%xmm8
380
381.byte	102,68,15,58,68,231,0
382	xorps	%xmm13,%xmm5
383
384	leaq	64(%rdx),%rdx
385	subq	$0x40,%rcx
386	jnc	.Lmod4_loop
387
388.Ltail4x:
389.byte	102,65,15,58,68,199,0
390.byte	102,65,15,58,68,207,17
391.byte	102,68,15,58,68,199,16
392	xorps	%xmm12,%xmm4
393	xorps	%xmm3,%xmm0
394	xorps	%xmm5,%xmm1
395	pxor	%xmm0,%xmm1
396	pxor	%xmm4,%xmm8
397
398	pxor	%xmm1,%xmm8
399	pxor	%xmm0,%xmm1
400
401	movdqa	%xmm8,%xmm9
402	psrldq	$8,%xmm8
403	pslldq	$8,%xmm9
404	pxor	%xmm8,%xmm1
405	pxor	%xmm9,%xmm0
406
407	movdqa	%xmm0,%xmm4
408	movdqa	%xmm0,%xmm3
409	psllq	$5,%xmm0
410	pxor	%xmm0,%xmm3
411	psllq	$1,%xmm0
412	pxor	%xmm3,%xmm0
413	psllq	$57,%xmm0
414	movdqa	%xmm0,%xmm3
415	pslldq	$8,%xmm0
416	psrldq	$8,%xmm3
417	pxor	%xmm4,%xmm0
418	pxor	%xmm3,%xmm1
419
420
421	movdqa	%xmm0,%xmm4
422	psrlq	$1,%xmm0
423	pxor	%xmm4,%xmm1
424	pxor	%xmm0,%xmm4
425	psrlq	$5,%xmm0
426	pxor	%xmm4,%xmm0
427	psrlq	$1,%xmm0
428	pxor	%xmm1,%xmm0
429	addq	$0x40,%rcx
430	jz	.Ldone
431	movdqu	32(%rsi),%xmm7
432	subq	$0x10,%rcx
433	jz	.Lodd_tail
434.Lskip4x:
435
436
437
438
439
440	movdqu	(%rdx),%xmm8
441	movdqu	16(%rdx),%xmm3
442.byte	102,69,15,56,0,194
443.byte	102,65,15,56,0,218
444	pxor	%xmm8,%xmm0
445
446	movdqa	%xmm3,%xmm5
447	pshufd	$78,%xmm3,%xmm4
448	pxor	%xmm3,%xmm4
449.byte	102,15,58,68,218,0
450.byte	102,15,58,68,234,17
451.byte	102,15,58,68,231,0
452
453	leaq	32(%rdx),%rdx
454	nop
455	subq	$0x20,%rcx
456	jbe	.Leven_tail
457	nop
458	jmp	.Lmod_loop
459
460.align	32
461.Lmod_loop:
462	movdqa	%xmm0,%xmm1
463	movdqa	%xmm4,%xmm8
464	pshufd	$78,%xmm0,%xmm4
465	pxor	%xmm0,%xmm4
466
467.byte	102,15,58,68,198,0
468.byte	102,15,58,68,206,17
469.byte	102,15,58,68,231,16
470
471	pxor	%xmm3,%xmm0
472	pxor	%xmm5,%xmm1
473	movdqu	(%rdx),%xmm9
474	pxor	%xmm0,%xmm8
475.byte	102,69,15,56,0,202
476	movdqu	16(%rdx),%xmm3
477
478	pxor	%xmm1,%xmm8
479	pxor	%xmm9,%xmm1
480	pxor	%xmm8,%xmm4
481.byte	102,65,15,56,0,218
482	movdqa	%xmm4,%xmm8
483	psrldq	$8,%xmm8
484	pslldq	$8,%xmm4
485	pxor	%xmm8,%xmm1
486	pxor	%xmm4,%xmm0
487
488	movdqa	%xmm3,%xmm5
489
490	movdqa	%xmm0,%xmm9
491	movdqa	%xmm0,%xmm8
492	psllq	$5,%xmm0
493	pxor	%xmm0,%xmm8
494.byte	102,15,58,68,218,0
495	psllq	$1,%xmm0
496	pxor	%xmm8,%xmm0
497	psllq	$57,%xmm0
498	movdqa	%xmm0,%xmm8
499	pslldq	$8,%xmm0
500	psrldq	$8,%xmm8
501	pxor	%xmm9,%xmm0
502	pshufd	$78,%xmm5,%xmm4
503	pxor	%xmm8,%xmm1
504	pxor	%xmm5,%xmm4
505
506	movdqa	%xmm0,%xmm9
507	psrlq	$1,%xmm0
508.byte	102,15,58,68,234,17
509	pxor	%xmm9,%xmm1
510	pxor	%xmm0,%xmm9
511	psrlq	$5,%xmm0
512	pxor	%xmm9,%xmm0
513	leaq	32(%rdx),%rdx
514	psrlq	$1,%xmm0
515.byte	102,15,58,68,231,0
516	pxor	%xmm1,%xmm0
517
518	subq	$0x20,%rcx
519	ja	.Lmod_loop
520
521.Leven_tail:
522	movdqa	%xmm0,%xmm1
523	movdqa	%xmm4,%xmm8
524	pshufd	$78,%xmm0,%xmm4
525	pxor	%xmm0,%xmm4
526
527.byte	102,15,58,68,198,0
528.byte	102,15,58,68,206,17
529.byte	102,15,58,68,231,16
530
531	pxor	%xmm3,%xmm0
532	pxor	%xmm5,%xmm1
533	pxor	%xmm0,%xmm8
534	pxor	%xmm1,%xmm8
535	pxor	%xmm8,%xmm4
536	movdqa	%xmm4,%xmm8
537	psrldq	$8,%xmm8
538	pslldq	$8,%xmm4
539	pxor	%xmm8,%xmm1
540	pxor	%xmm4,%xmm0
541
542	movdqa	%xmm0,%xmm4
543	movdqa	%xmm0,%xmm3
544	psllq	$5,%xmm0
545	pxor	%xmm0,%xmm3
546	psllq	$1,%xmm0
547	pxor	%xmm3,%xmm0
548	psllq	$57,%xmm0
549	movdqa	%xmm0,%xmm3
550	pslldq	$8,%xmm0
551	psrldq	$8,%xmm3
552	pxor	%xmm4,%xmm0
553	pxor	%xmm3,%xmm1
554
555
556	movdqa	%xmm0,%xmm4
557	psrlq	$1,%xmm0
558	pxor	%xmm4,%xmm1
559	pxor	%xmm0,%xmm4
560	psrlq	$5,%xmm0
561	pxor	%xmm4,%xmm0
562	psrlq	$1,%xmm0
563	pxor	%xmm1,%xmm0
564	testq	%rcx,%rcx
565	jnz	.Ldone
566
567.Lodd_tail:
568	movdqu	(%rdx),%xmm8
569.byte	102,69,15,56,0,194
570	pxor	%xmm8,%xmm0
571	movdqa	%xmm0,%xmm1
572	pshufd	$78,%xmm0,%xmm3
573	pxor	%xmm0,%xmm3
574.byte	102,15,58,68,194,0
575.byte	102,15,58,68,202,17
576.byte	102,15,58,68,223,0
577	pxor	%xmm0,%xmm3
578	pxor	%xmm1,%xmm3
579
580	movdqa	%xmm3,%xmm4
581	psrldq	$8,%xmm3
582	pslldq	$8,%xmm4
583	pxor	%xmm3,%xmm1
584	pxor	%xmm4,%xmm0
585
586	movdqa	%xmm0,%xmm4
587	movdqa	%xmm0,%xmm3
588	psllq	$5,%xmm0
589	pxor	%xmm0,%xmm3
590	psllq	$1,%xmm0
591	pxor	%xmm3,%xmm0
592	psllq	$57,%xmm0
593	movdqa	%xmm0,%xmm3
594	pslldq	$8,%xmm0
595	psrldq	$8,%xmm3
596	pxor	%xmm4,%xmm0
597	pxor	%xmm3,%xmm1
598
599
600	movdqa	%xmm0,%xmm4
601	psrlq	$1,%xmm0
602	pxor	%xmm4,%xmm1
603	pxor	%xmm0,%xmm4
604	psrlq	$5,%xmm0
605	pxor	%xmm4,%xmm0
606	psrlq	$1,%xmm0
607	pxor	%xmm1,%xmm0
608.Ldone:
609.byte	102,65,15,56,0,194
610	movdqu	%xmm0,(%rdi)
611	.byte	0xf3,0xc3
612.cfi_endproc
613.size	GFp_gcm_ghash_clmul,.-GFp_gcm_ghash_clmul
614.globl	GFp_gcm_init_avx
615.hidden GFp_gcm_init_avx
616.type	GFp_gcm_init_avx,@function
617.align	32
618GFp_gcm_init_avx:
619.cfi_startproc
620	vzeroupper
621
622	vmovdqu	(%rsi),%xmm2
623	vpshufd	$78,%xmm2,%xmm2
624
625
626	vpshufd	$255,%xmm2,%xmm4
627	vpsrlq	$63,%xmm2,%xmm3
628	vpsllq	$1,%xmm2,%xmm2
629	vpxor	%xmm5,%xmm5,%xmm5
630	vpcmpgtd	%xmm4,%xmm5,%xmm5
631	vpslldq	$8,%xmm3,%xmm3
632	vpor	%xmm3,%xmm2,%xmm2
633
634
635	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
636	vpxor	%xmm5,%xmm2,%xmm2
637
638	vpunpckhqdq	%xmm2,%xmm2,%xmm6
639	vmovdqa	%xmm2,%xmm0
640	vpxor	%xmm2,%xmm6,%xmm6
641	movq	$4,%r10
642	jmp	.Linit_start_avx
643.align	32
644.Linit_loop_avx:
645	vpalignr	$8,%xmm3,%xmm4,%xmm5
646	vmovdqu	%xmm5,-16(%rdi)
647	vpunpckhqdq	%xmm0,%xmm0,%xmm3
648	vpxor	%xmm0,%xmm3,%xmm3
649	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
650	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
651	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
652	vpxor	%xmm0,%xmm1,%xmm4
653	vpxor	%xmm4,%xmm3,%xmm3
654
655	vpslldq	$8,%xmm3,%xmm4
656	vpsrldq	$8,%xmm3,%xmm3
657	vpxor	%xmm4,%xmm0,%xmm0
658	vpxor	%xmm3,%xmm1,%xmm1
659	vpsllq	$57,%xmm0,%xmm3
660	vpsllq	$62,%xmm0,%xmm4
661	vpxor	%xmm3,%xmm4,%xmm4
662	vpsllq	$63,%xmm0,%xmm3
663	vpxor	%xmm3,%xmm4,%xmm4
664	vpslldq	$8,%xmm4,%xmm3
665	vpsrldq	$8,%xmm4,%xmm4
666	vpxor	%xmm3,%xmm0,%xmm0
667	vpxor	%xmm4,%xmm1,%xmm1
668
669	vpsrlq	$1,%xmm0,%xmm4
670	vpxor	%xmm0,%xmm1,%xmm1
671	vpxor	%xmm4,%xmm0,%xmm0
672	vpsrlq	$5,%xmm4,%xmm4
673	vpxor	%xmm4,%xmm0,%xmm0
674	vpsrlq	$1,%xmm0,%xmm0
675	vpxor	%xmm1,%xmm0,%xmm0
676.Linit_start_avx:
677	vmovdqa	%xmm0,%xmm5
678	vpunpckhqdq	%xmm0,%xmm0,%xmm3
679	vpxor	%xmm0,%xmm3,%xmm3
680	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
681	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
682	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
683	vpxor	%xmm0,%xmm1,%xmm4
684	vpxor	%xmm4,%xmm3,%xmm3
685
686	vpslldq	$8,%xmm3,%xmm4
687	vpsrldq	$8,%xmm3,%xmm3
688	vpxor	%xmm4,%xmm0,%xmm0
689	vpxor	%xmm3,%xmm1,%xmm1
690	vpsllq	$57,%xmm0,%xmm3
691	vpsllq	$62,%xmm0,%xmm4
692	vpxor	%xmm3,%xmm4,%xmm4
693	vpsllq	$63,%xmm0,%xmm3
694	vpxor	%xmm3,%xmm4,%xmm4
695	vpslldq	$8,%xmm4,%xmm3
696	vpsrldq	$8,%xmm4,%xmm4
697	vpxor	%xmm3,%xmm0,%xmm0
698	vpxor	%xmm4,%xmm1,%xmm1
699
700	vpsrlq	$1,%xmm0,%xmm4
701	vpxor	%xmm0,%xmm1,%xmm1
702	vpxor	%xmm4,%xmm0,%xmm0
703	vpsrlq	$5,%xmm4,%xmm4
704	vpxor	%xmm4,%xmm0,%xmm0
705	vpsrlq	$1,%xmm0,%xmm0
706	vpxor	%xmm1,%xmm0,%xmm0
707	vpshufd	$78,%xmm5,%xmm3
708	vpshufd	$78,%xmm0,%xmm4
709	vpxor	%xmm5,%xmm3,%xmm3
710	vmovdqu	%xmm5,0(%rdi)
711	vpxor	%xmm0,%xmm4,%xmm4
712	vmovdqu	%xmm0,16(%rdi)
713	leaq	48(%rdi),%rdi
714	subq	$1,%r10
715	jnz	.Linit_loop_avx
716
717	vpalignr	$8,%xmm4,%xmm3,%xmm5
718	vmovdqu	%xmm5,-16(%rdi)
719
720	vzeroupper
721	.byte	0xf3,0xc3
722.cfi_endproc
723.size	GFp_gcm_init_avx,.-GFp_gcm_init_avx
724.globl	GFp_gcm_ghash_avx
725.hidden GFp_gcm_ghash_avx
726.type	GFp_gcm_ghash_avx,@function
727.align	32
728GFp_gcm_ghash_avx:
729.cfi_startproc
730	vzeroupper
731
732	vmovdqu	(%rdi),%xmm10
733	leaq	.L0x1c2_polynomial(%rip),%r10
734	leaq	64(%rsi),%rsi
735	vmovdqu	.Lbswap_mask(%rip),%xmm13
736	vpshufb	%xmm13,%xmm10,%xmm10
737	cmpq	$0x80,%rcx
738	jb	.Lshort_avx
739	subq	$0x80,%rcx
740
741	vmovdqu	112(%rdx),%xmm14
742	vmovdqu	0-64(%rsi),%xmm6
743	vpshufb	%xmm13,%xmm14,%xmm14
744	vmovdqu	32-64(%rsi),%xmm7
745
746	vpunpckhqdq	%xmm14,%xmm14,%xmm9
747	vmovdqu	96(%rdx),%xmm15
748	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
749	vpxor	%xmm14,%xmm9,%xmm9
750	vpshufb	%xmm13,%xmm15,%xmm15
751	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
752	vmovdqu	16-64(%rsi),%xmm6
753	vpunpckhqdq	%xmm15,%xmm15,%xmm8
754	vmovdqu	80(%rdx),%xmm14
755	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
756	vpxor	%xmm15,%xmm8,%xmm8
757
758	vpshufb	%xmm13,%xmm14,%xmm14
759	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
760	vpunpckhqdq	%xmm14,%xmm14,%xmm9
761	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
762	vmovdqu	48-64(%rsi),%xmm6
763	vpxor	%xmm14,%xmm9,%xmm9
764	vmovdqu	64(%rdx),%xmm15
765	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
766	vmovdqu	80-64(%rsi),%xmm7
767
768	vpshufb	%xmm13,%xmm15,%xmm15
769	vpxor	%xmm0,%xmm3,%xmm3
770	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
771	vpxor	%xmm1,%xmm4,%xmm4
772	vpunpckhqdq	%xmm15,%xmm15,%xmm8
773	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
774	vmovdqu	64-64(%rsi),%xmm6
775	vpxor	%xmm2,%xmm5,%xmm5
776	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
777	vpxor	%xmm15,%xmm8,%xmm8
778
779	vmovdqu	48(%rdx),%xmm14
780	vpxor	%xmm3,%xmm0,%xmm0
781	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
782	vpxor	%xmm4,%xmm1,%xmm1
783	vpshufb	%xmm13,%xmm14,%xmm14
784	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
785	vmovdqu	96-64(%rsi),%xmm6
786	vpxor	%xmm5,%xmm2,%xmm2
787	vpunpckhqdq	%xmm14,%xmm14,%xmm9
788	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
789	vmovdqu	128-64(%rsi),%xmm7
790	vpxor	%xmm14,%xmm9,%xmm9
791
792	vmovdqu	32(%rdx),%xmm15
793	vpxor	%xmm0,%xmm3,%xmm3
794	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
795	vpxor	%xmm1,%xmm4,%xmm4
796	vpshufb	%xmm13,%xmm15,%xmm15
797	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
798	vmovdqu	112-64(%rsi),%xmm6
799	vpxor	%xmm2,%xmm5,%xmm5
800	vpunpckhqdq	%xmm15,%xmm15,%xmm8
801	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
802	vpxor	%xmm15,%xmm8,%xmm8
803
804	vmovdqu	16(%rdx),%xmm14
805	vpxor	%xmm3,%xmm0,%xmm0
806	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
807	vpxor	%xmm4,%xmm1,%xmm1
808	vpshufb	%xmm13,%xmm14,%xmm14
809	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
810	vmovdqu	144-64(%rsi),%xmm6
811	vpxor	%xmm5,%xmm2,%xmm2
812	vpunpckhqdq	%xmm14,%xmm14,%xmm9
813	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
814	vmovdqu	176-64(%rsi),%xmm7
815	vpxor	%xmm14,%xmm9,%xmm9
816
817	vmovdqu	(%rdx),%xmm15
818	vpxor	%xmm0,%xmm3,%xmm3
819	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
820	vpxor	%xmm1,%xmm4,%xmm4
821	vpshufb	%xmm13,%xmm15,%xmm15
822	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
823	vmovdqu	160-64(%rsi),%xmm6
824	vpxor	%xmm2,%xmm5,%xmm5
825	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
826
827	leaq	128(%rdx),%rdx
828	cmpq	$0x80,%rcx
829	jb	.Ltail_avx
830
831	vpxor	%xmm10,%xmm15,%xmm15
832	subq	$0x80,%rcx
833	jmp	.Loop8x_avx
834
835.align	32
836.Loop8x_avx:
837	vpunpckhqdq	%xmm15,%xmm15,%xmm8
838	vmovdqu	112(%rdx),%xmm14
839	vpxor	%xmm0,%xmm3,%xmm3
840	vpxor	%xmm15,%xmm8,%xmm8
841	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
842	vpshufb	%xmm13,%xmm14,%xmm14
843	vpxor	%xmm1,%xmm4,%xmm4
844	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
845	vmovdqu	0-64(%rsi),%xmm6
846	vpunpckhqdq	%xmm14,%xmm14,%xmm9
847	vpxor	%xmm2,%xmm5,%xmm5
848	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
849	vmovdqu	32-64(%rsi),%xmm7
850	vpxor	%xmm14,%xmm9,%xmm9
851
852	vmovdqu	96(%rdx),%xmm15
853	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
854	vpxor	%xmm3,%xmm10,%xmm10
855	vpshufb	%xmm13,%xmm15,%xmm15
856	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
857	vxorps	%xmm4,%xmm11,%xmm11
858	vmovdqu	16-64(%rsi),%xmm6
859	vpunpckhqdq	%xmm15,%xmm15,%xmm8
860	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
861	vpxor	%xmm5,%xmm12,%xmm12
862	vxorps	%xmm15,%xmm8,%xmm8
863
864	vmovdqu	80(%rdx),%xmm14
865	vpxor	%xmm10,%xmm12,%xmm12
866	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
867	vpxor	%xmm11,%xmm12,%xmm12
868	vpslldq	$8,%xmm12,%xmm9
869	vpxor	%xmm0,%xmm3,%xmm3
870	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
871	vpsrldq	$8,%xmm12,%xmm12
872	vpxor	%xmm9,%xmm10,%xmm10
873	vmovdqu	48-64(%rsi),%xmm6
874	vpshufb	%xmm13,%xmm14,%xmm14
875	vxorps	%xmm12,%xmm11,%xmm11
876	vpxor	%xmm1,%xmm4,%xmm4
877	vpunpckhqdq	%xmm14,%xmm14,%xmm9
878	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
879	vmovdqu	80-64(%rsi),%xmm7
880	vpxor	%xmm14,%xmm9,%xmm9
881	vpxor	%xmm2,%xmm5,%xmm5
882
883	vmovdqu	64(%rdx),%xmm15
884	vpalignr	$8,%xmm10,%xmm10,%xmm12
885	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
886	vpshufb	%xmm13,%xmm15,%xmm15
887	vpxor	%xmm3,%xmm0,%xmm0
888	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
889	vmovdqu	64-64(%rsi),%xmm6
890	vpunpckhqdq	%xmm15,%xmm15,%xmm8
891	vpxor	%xmm4,%xmm1,%xmm1
892	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
893	vxorps	%xmm15,%xmm8,%xmm8
894	vpxor	%xmm5,%xmm2,%xmm2
895
896	vmovdqu	48(%rdx),%xmm14
897	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
898	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
899	vpshufb	%xmm13,%xmm14,%xmm14
900	vpxor	%xmm0,%xmm3,%xmm3
901	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
902	vmovdqu	96-64(%rsi),%xmm6
903	vpunpckhqdq	%xmm14,%xmm14,%xmm9
904	vpxor	%xmm1,%xmm4,%xmm4
905	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
906	vmovdqu	128-64(%rsi),%xmm7
907	vpxor	%xmm14,%xmm9,%xmm9
908	vpxor	%xmm2,%xmm5,%xmm5
909
910	vmovdqu	32(%rdx),%xmm15
911	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
912	vpshufb	%xmm13,%xmm15,%xmm15
913	vpxor	%xmm3,%xmm0,%xmm0
914	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
915	vmovdqu	112-64(%rsi),%xmm6
916	vpunpckhqdq	%xmm15,%xmm15,%xmm8
917	vpxor	%xmm4,%xmm1,%xmm1
918	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
919	vpxor	%xmm15,%xmm8,%xmm8
920	vpxor	%xmm5,%xmm2,%xmm2
921	vxorps	%xmm12,%xmm10,%xmm10
922
923	vmovdqu	16(%rdx),%xmm14
924	vpalignr	$8,%xmm10,%xmm10,%xmm12
925	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
926	vpshufb	%xmm13,%xmm14,%xmm14
927	vpxor	%xmm0,%xmm3,%xmm3
928	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
929	vmovdqu	144-64(%rsi),%xmm6
930	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
931	vxorps	%xmm11,%xmm12,%xmm12
932	vpunpckhqdq	%xmm14,%xmm14,%xmm9
933	vpxor	%xmm1,%xmm4,%xmm4
934	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
935	vmovdqu	176-64(%rsi),%xmm7
936	vpxor	%xmm14,%xmm9,%xmm9
937	vpxor	%xmm2,%xmm5,%xmm5
938
939	vmovdqu	(%rdx),%xmm15
940	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
941	vpshufb	%xmm13,%xmm15,%xmm15
942	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
943	vmovdqu	160-64(%rsi),%xmm6
944	vpxor	%xmm12,%xmm15,%xmm15
945	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
946	vpxor	%xmm10,%xmm15,%xmm15
947
948	leaq	128(%rdx),%rdx
949	subq	$0x80,%rcx
950	jnc	.Loop8x_avx
951
952	addq	$0x80,%rcx
953	jmp	.Ltail_no_xor_avx
954
955.align	32
956.Lshort_avx:
957	vmovdqu	-16(%rdx,%rcx,1),%xmm14
958	leaq	(%rdx,%rcx,1),%rdx
959	vmovdqu	0-64(%rsi),%xmm6
960	vmovdqu	32-64(%rsi),%xmm7
961	vpshufb	%xmm13,%xmm14,%xmm15
962
963	vmovdqa	%xmm0,%xmm3
964	vmovdqa	%xmm1,%xmm4
965	vmovdqa	%xmm2,%xmm5
966	subq	$0x10,%rcx
967	jz	.Ltail_avx
968
969	vpunpckhqdq	%xmm15,%xmm15,%xmm8
970	vpxor	%xmm0,%xmm3,%xmm3
971	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
972	vpxor	%xmm15,%xmm8,%xmm8
973	vmovdqu	-32(%rdx),%xmm14
974	vpxor	%xmm1,%xmm4,%xmm4
975	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
976	vmovdqu	16-64(%rsi),%xmm6
977	vpshufb	%xmm13,%xmm14,%xmm15
978	vpxor	%xmm2,%xmm5,%xmm5
979	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
980	vpsrldq	$8,%xmm7,%xmm7
981	subq	$0x10,%rcx
982	jz	.Ltail_avx
983
984	vpunpckhqdq	%xmm15,%xmm15,%xmm8
985	vpxor	%xmm0,%xmm3,%xmm3
986	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
987	vpxor	%xmm15,%xmm8,%xmm8
988	vmovdqu	-48(%rdx),%xmm14
989	vpxor	%xmm1,%xmm4,%xmm4
990	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
991	vmovdqu	48-64(%rsi),%xmm6
992	vpshufb	%xmm13,%xmm14,%xmm15
993	vpxor	%xmm2,%xmm5,%xmm5
994	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
995	vmovdqu	80-64(%rsi),%xmm7
996	subq	$0x10,%rcx
997	jz	.Ltail_avx
998
999	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1000	vpxor	%xmm0,%xmm3,%xmm3
1001	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1002	vpxor	%xmm15,%xmm8,%xmm8
1003	vmovdqu	-64(%rdx),%xmm14
1004	vpxor	%xmm1,%xmm4,%xmm4
1005	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1006	vmovdqu	64-64(%rsi),%xmm6
1007	vpshufb	%xmm13,%xmm14,%xmm15
1008	vpxor	%xmm2,%xmm5,%xmm5
1009	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1010	vpsrldq	$8,%xmm7,%xmm7
1011	subq	$0x10,%rcx
1012	jz	.Ltail_avx
1013
1014	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1015	vpxor	%xmm0,%xmm3,%xmm3
1016	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1017	vpxor	%xmm15,%xmm8,%xmm8
1018	vmovdqu	-80(%rdx),%xmm14
1019	vpxor	%xmm1,%xmm4,%xmm4
1020	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1021	vmovdqu	96-64(%rsi),%xmm6
1022	vpshufb	%xmm13,%xmm14,%xmm15
1023	vpxor	%xmm2,%xmm5,%xmm5
1024	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1025	vmovdqu	128-64(%rsi),%xmm7
1026	subq	$0x10,%rcx
1027	jz	.Ltail_avx
1028
1029	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1030	vpxor	%xmm0,%xmm3,%xmm3
1031	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1032	vpxor	%xmm15,%xmm8,%xmm8
1033	vmovdqu	-96(%rdx),%xmm14
1034	vpxor	%xmm1,%xmm4,%xmm4
1035	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1036	vmovdqu	112-64(%rsi),%xmm6
1037	vpshufb	%xmm13,%xmm14,%xmm15
1038	vpxor	%xmm2,%xmm5,%xmm5
1039	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1040	vpsrldq	$8,%xmm7,%xmm7
1041	subq	$0x10,%rcx
1042	jz	.Ltail_avx
1043
1044	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1045	vpxor	%xmm0,%xmm3,%xmm3
1046	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1047	vpxor	%xmm15,%xmm8,%xmm8
1048	vmovdqu	-112(%rdx),%xmm14
1049	vpxor	%xmm1,%xmm4,%xmm4
1050	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1051	vmovdqu	144-64(%rsi),%xmm6
1052	vpshufb	%xmm13,%xmm14,%xmm15
1053	vpxor	%xmm2,%xmm5,%xmm5
1054	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1055	vmovq	184-64(%rsi),%xmm7
1056	subq	$0x10,%rcx
1057	jmp	.Ltail_avx
1058
1059.align	32
1060.Ltail_avx:
1061	vpxor	%xmm10,%xmm15,%xmm15
1062.Ltail_no_xor_avx:
1063	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1064	vpxor	%xmm0,%xmm3,%xmm3
1065	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1066	vpxor	%xmm15,%xmm8,%xmm8
1067	vpxor	%xmm1,%xmm4,%xmm4
1068	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1069	vpxor	%xmm2,%xmm5,%xmm5
1070	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1071
1072	vmovdqu	(%r10),%xmm12
1073
1074	vpxor	%xmm0,%xmm3,%xmm10
1075	vpxor	%xmm1,%xmm4,%xmm11
1076	vpxor	%xmm2,%xmm5,%xmm5
1077
1078	vpxor	%xmm10,%xmm5,%xmm5
1079	vpxor	%xmm11,%xmm5,%xmm5
1080	vpslldq	$8,%xmm5,%xmm9
1081	vpsrldq	$8,%xmm5,%xmm5
1082	vpxor	%xmm9,%xmm10,%xmm10
1083	vpxor	%xmm5,%xmm11,%xmm11
1084
1085	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1086	vpalignr	$8,%xmm10,%xmm10,%xmm10
1087	vpxor	%xmm9,%xmm10,%xmm10
1088
1089	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1090	vpalignr	$8,%xmm10,%xmm10,%xmm10
1091	vpxor	%xmm11,%xmm10,%xmm10
1092	vpxor	%xmm9,%xmm10,%xmm10
1093
1094	cmpq	$0,%rcx
1095	jne	.Lshort_avx
1096
1097	vpshufb	%xmm13,%xmm10,%xmm10
1098	vmovdqu	%xmm10,(%rdi)
1099	vzeroupper
1100	.byte	0xf3,0xc3
1101.cfi_endproc
1102.size	GFp_gcm_ghash_avx,.-GFp_gcm_ghash_avx
1103.align	64
1104.Lbswap_mask:
1105.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1106.L0x1c2_polynomial:
1107.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1108.L7_mask:
1109.long	7,0,7,0
1110.align	64
1111
1112.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1113.align	64
1114#endif
1115.section	.note.GNU-stack,"",@progbits
1116