1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8section	.text code align=64
9
10EXTERN	GFp_ia32cap_P
11global	GFp_gcm_init_clmul
12
13ALIGN	16
14GFp_gcm_init_clmul:
15
16$L$_init_clmul:
17$L$SEH_begin_GFp_gcm_init_clmul:
18
19DB	0x48,0x83,0xec,0x18
20DB	0x0f,0x29,0x34,0x24
21	movdqu	xmm2,XMMWORD[rdx]
22	pshufd	xmm2,xmm2,78
23
24
25	pshufd	xmm4,xmm2,255
26	movdqa	xmm3,xmm2
27	psllq	xmm2,1
28	pxor	xmm5,xmm5
29	psrlq	xmm3,63
30	pcmpgtd	xmm5,xmm4
31	pslldq	xmm3,8
32	por	xmm2,xmm3
33
34
35	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
36	pxor	xmm2,xmm5
37
38
39	pshufd	xmm6,xmm2,78
40	movdqa	xmm0,xmm2
41	pxor	xmm6,xmm2
42	movdqa	xmm1,xmm0
43	pshufd	xmm3,xmm0,78
44	pxor	xmm3,xmm0
45DB	102,15,58,68,194,0
46DB	102,15,58,68,202,17
47DB	102,15,58,68,222,0
48	pxor	xmm3,xmm0
49	pxor	xmm3,xmm1
50
51	movdqa	xmm4,xmm3
52	psrldq	xmm3,8
53	pslldq	xmm4,8
54	pxor	xmm1,xmm3
55	pxor	xmm0,xmm4
56
57	movdqa	xmm4,xmm0
58	movdqa	xmm3,xmm0
59	psllq	xmm0,5
60	pxor	xmm3,xmm0
61	psllq	xmm0,1
62	pxor	xmm0,xmm3
63	psllq	xmm0,57
64	movdqa	xmm3,xmm0
65	pslldq	xmm0,8
66	psrldq	xmm3,8
67	pxor	xmm0,xmm4
68	pxor	xmm1,xmm3
69
70
71	movdqa	xmm4,xmm0
72	psrlq	xmm0,1
73	pxor	xmm1,xmm4
74	pxor	xmm4,xmm0
75	psrlq	xmm0,5
76	pxor	xmm0,xmm4
77	psrlq	xmm0,1
78	pxor	xmm0,xmm1
79	pshufd	xmm3,xmm2,78
80	pshufd	xmm4,xmm0,78
81	pxor	xmm3,xmm2
82	movdqu	XMMWORD[rcx],xmm2
83	pxor	xmm4,xmm0
84	movdqu	XMMWORD[16+rcx],xmm0
85DB	102,15,58,15,227,8
86	movdqu	XMMWORD[32+rcx],xmm4
87	movdqa	xmm1,xmm0
88	pshufd	xmm3,xmm0,78
89	pxor	xmm3,xmm0
90DB	102,15,58,68,194,0
91DB	102,15,58,68,202,17
92DB	102,15,58,68,222,0
93	pxor	xmm3,xmm0
94	pxor	xmm3,xmm1
95
96	movdqa	xmm4,xmm3
97	psrldq	xmm3,8
98	pslldq	xmm4,8
99	pxor	xmm1,xmm3
100	pxor	xmm0,xmm4
101
102	movdqa	xmm4,xmm0
103	movdqa	xmm3,xmm0
104	psllq	xmm0,5
105	pxor	xmm3,xmm0
106	psllq	xmm0,1
107	pxor	xmm0,xmm3
108	psllq	xmm0,57
109	movdqa	xmm3,xmm0
110	pslldq	xmm0,8
111	psrldq	xmm3,8
112	pxor	xmm0,xmm4
113	pxor	xmm1,xmm3
114
115
116	movdqa	xmm4,xmm0
117	psrlq	xmm0,1
118	pxor	xmm1,xmm4
119	pxor	xmm4,xmm0
120	psrlq	xmm0,5
121	pxor	xmm0,xmm4
122	psrlq	xmm0,1
123	pxor	xmm0,xmm1
124	movdqa	xmm5,xmm0
125	movdqa	xmm1,xmm0
126	pshufd	xmm3,xmm0,78
127	pxor	xmm3,xmm0
128DB	102,15,58,68,194,0
129DB	102,15,58,68,202,17
130DB	102,15,58,68,222,0
131	pxor	xmm3,xmm0
132	pxor	xmm3,xmm1
133
134	movdqa	xmm4,xmm3
135	psrldq	xmm3,8
136	pslldq	xmm4,8
137	pxor	xmm1,xmm3
138	pxor	xmm0,xmm4
139
140	movdqa	xmm4,xmm0
141	movdqa	xmm3,xmm0
142	psllq	xmm0,5
143	pxor	xmm3,xmm0
144	psllq	xmm0,1
145	pxor	xmm0,xmm3
146	psllq	xmm0,57
147	movdqa	xmm3,xmm0
148	pslldq	xmm0,8
149	psrldq	xmm3,8
150	pxor	xmm0,xmm4
151	pxor	xmm1,xmm3
152
153
154	movdqa	xmm4,xmm0
155	psrlq	xmm0,1
156	pxor	xmm1,xmm4
157	pxor	xmm4,xmm0
158	psrlq	xmm0,5
159	pxor	xmm0,xmm4
160	psrlq	xmm0,1
161	pxor	xmm0,xmm1
162	pshufd	xmm3,xmm5,78
163	pshufd	xmm4,xmm0,78
164	pxor	xmm3,xmm5
165	movdqu	XMMWORD[48+rcx],xmm5
166	pxor	xmm4,xmm0
167	movdqu	XMMWORD[64+rcx],xmm0
168DB	102,15,58,15,227,8
169	movdqu	XMMWORD[80+rcx],xmm4
170	movaps	xmm6,XMMWORD[rsp]
171	lea	rsp,[24+rsp]
172$L$SEH_end_GFp_gcm_init_clmul:
173	DB	0F3h,0C3h		;repret
174
175
176global	GFp_gcm_gmult_clmul
177
178ALIGN	16
179GFp_gcm_gmult_clmul:
180
181$L$_gmult_clmul:
182	movdqu	xmm0,XMMWORD[rcx]
183	movdqa	xmm5,XMMWORD[$L$bswap_mask]
184	movdqu	xmm2,XMMWORD[rdx]
185	movdqu	xmm4,XMMWORD[32+rdx]
186DB	102,15,56,0,197
187	movdqa	xmm1,xmm0
188	pshufd	xmm3,xmm0,78
189	pxor	xmm3,xmm0
190DB	102,15,58,68,194,0
191DB	102,15,58,68,202,17
192DB	102,15,58,68,220,0
193	pxor	xmm3,xmm0
194	pxor	xmm3,xmm1
195
196	movdqa	xmm4,xmm3
197	psrldq	xmm3,8
198	pslldq	xmm4,8
199	pxor	xmm1,xmm3
200	pxor	xmm0,xmm4
201
202	movdqa	xmm4,xmm0
203	movdqa	xmm3,xmm0
204	psllq	xmm0,5
205	pxor	xmm3,xmm0
206	psllq	xmm0,1
207	pxor	xmm0,xmm3
208	psllq	xmm0,57
209	movdqa	xmm3,xmm0
210	pslldq	xmm0,8
211	psrldq	xmm3,8
212	pxor	xmm0,xmm4
213	pxor	xmm1,xmm3
214
215
216	movdqa	xmm4,xmm0
217	psrlq	xmm0,1
218	pxor	xmm1,xmm4
219	pxor	xmm4,xmm0
220	psrlq	xmm0,5
221	pxor	xmm0,xmm4
222	psrlq	xmm0,1
223	pxor	xmm0,xmm1
224DB	102,15,56,0,197
225	movdqu	XMMWORD[rcx],xmm0
226	DB	0F3h,0C3h		;repret
227
228
229global	GFp_gcm_ghash_clmul
230
231ALIGN	32
232GFp_gcm_ghash_clmul:
233
234$L$_ghash_clmul:
235	lea	rax,[((-136))+rsp]
236$L$SEH_begin_GFp_gcm_ghash_clmul:
237
238DB	0x48,0x8d,0x60,0xe0
239DB	0x0f,0x29,0x70,0xe0
240DB	0x0f,0x29,0x78,0xf0
241DB	0x44,0x0f,0x29,0x00
242DB	0x44,0x0f,0x29,0x48,0x10
243DB	0x44,0x0f,0x29,0x50,0x20
244DB	0x44,0x0f,0x29,0x58,0x30
245DB	0x44,0x0f,0x29,0x60,0x40
246DB	0x44,0x0f,0x29,0x68,0x50
247DB	0x44,0x0f,0x29,0x70,0x60
248DB	0x44,0x0f,0x29,0x78,0x70
249	movdqa	xmm10,XMMWORD[$L$bswap_mask]
250
251	movdqu	xmm0,XMMWORD[rcx]
252	movdqu	xmm2,XMMWORD[rdx]
253	movdqu	xmm7,XMMWORD[32+rdx]
254DB	102,65,15,56,0,194
255
256	sub	r9,0x10
257	jz	NEAR $L$odd_tail
258
259	movdqu	xmm6,XMMWORD[16+rdx]
260	lea	rax,[GFp_ia32cap_P]
261	mov	eax,DWORD[4+rax]
262	cmp	r9,0x30
263	jb	NEAR $L$skip4x
264
265	and	eax,71303168
266	cmp	eax,4194304
267	je	NEAR $L$skip4x
268
269	sub	r9,0x30
270	mov	rax,0xA040608020C0E000
271	movdqu	xmm14,XMMWORD[48+rdx]
272	movdqu	xmm15,XMMWORD[64+rdx]
273
274
275
276
277	movdqu	xmm3,XMMWORD[48+r8]
278	movdqu	xmm11,XMMWORD[32+r8]
279DB	102,65,15,56,0,218
280DB	102,69,15,56,0,218
281	movdqa	xmm5,xmm3
282	pshufd	xmm4,xmm3,78
283	pxor	xmm4,xmm3
284DB	102,15,58,68,218,0
285DB	102,15,58,68,234,17
286DB	102,15,58,68,231,0
287
288	movdqa	xmm13,xmm11
289	pshufd	xmm12,xmm11,78
290	pxor	xmm12,xmm11
291DB	102,68,15,58,68,222,0
292DB	102,68,15,58,68,238,17
293DB	102,68,15,58,68,231,16
294	xorps	xmm3,xmm11
295	xorps	xmm5,xmm13
296	movups	xmm7,XMMWORD[80+rdx]
297	xorps	xmm4,xmm12
298
299	movdqu	xmm11,XMMWORD[16+r8]
300	movdqu	xmm8,XMMWORD[r8]
301DB	102,69,15,56,0,218
302DB	102,69,15,56,0,194
303	movdqa	xmm13,xmm11
304	pshufd	xmm12,xmm11,78
305	pxor	xmm0,xmm8
306	pxor	xmm12,xmm11
307DB	102,69,15,58,68,222,0
308	movdqa	xmm1,xmm0
309	pshufd	xmm8,xmm0,78
310	pxor	xmm8,xmm0
311DB	102,69,15,58,68,238,17
312DB	102,68,15,58,68,231,0
313	xorps	xmm3,xmm11
314	xorps	xmm5,xmm13
315
316	lea	r8,[64+r8]
317	sub	r9,0x40
318	jc	NEAR $L$tail4x
319
320	jmp	NEAR $L$mod4_loop
321ALIGN	32
322$L$mod4_loop:
323DB	102,65,15,58,68,199,0
324	xorps	xmm4,xmm12
325	movdqu	xmm11,XMMWORD[48+r8]
326DB	102,69,15,56,0,218
327DB	102,65,15,58,68,207,17
328	xorps	xmm0,xmm3
329	movdqu	xmm3,XMMWORD[32+r8]
330	movdqa	xmm13,xmm11
331DB	102,68,15,58,68,199,16
332	pshufd	xmm12,xmm11,78
333	xorps	xmm1,xmm5
334	pxor	xmm12,xmm11
335DB	102,65,15,56,0,218
336	movups	xmm7,XMMWORD[32+rdx]
337	xorps	xmm8,xmm4
338DB	102,68,15,58,68,218,0
339	pshufd	xmm4,xmm3,78
340
341	pxor	xmm8,xmm0
342	movdqa	xmm5,xmm3
343	pxor	xmm8,xmm1
344	pxor	xmm4,xmm3
345	movdqa	xmm9,xmm8
346DB	102,68,15,58,68,234,17
347	pslldq	xmm8,8
348	psrldq	xmm9,8
349	pxor	xmm0,xmm8
350	movdqa	xmm8,XMMWORD[$L$7_mask]
351	pxor	xmm1,xmm9
352DB	102,76,15,110,200
353
354	pand	xmm8,xmm0
355DB	102,69,15,56,0,200
356	pxor	xmm9,xmm0
357DB	102,68,15,58,68,231,0
358	psllq	xmm9,57
359	movdqa	xmm8,xmm9
360	pslldq	xmm9,8
361DB	102,15,58,68,222,0
362	psrldq	xmm8,8
363	pxor	xmm0,xmm9
364	pxor	xmm1,xmm8
365	movdqu	xmm8,XMMWORD[r8]
366
367	movdqa	xmm9,xmm0
368	psrlq	xmm0,1
369DB	102,15,58,68,238,17
370	xorps	xmm3,xmm11
371	movdqu	xmm11,XMMWORD[16+r8]
372DB	102,69,15,56,0,218
373DB	102,15,58,68,231,16
374	xorps	xmm5,xmm13
375	movups	xmm7,XMMWORD[80+rdx]
376DB	102,69,15,56,0,194
377	pxor	xmm1,xmm9
378	pxor	xmm9,xmm0
379	psrlq	xmm0,5
380
381	movdqa	xmm13,xmm11
382	pxor	xmm4,xmm12
383	pshufd	xmm12,xmm11,78
384	pxor	xmm0,xmm9
385	pxor	xmm1,xmm8
386	pxor	xmm12,xmm11
387DB	102,69,15,58,68,222,0
388	psrlq	xmm0,1
389	pxor	xmm0,xmm1
390	movdqa	xmm1,xmm0
391DB	102,69,15,58,68,238,17
392	xorps	xmm3,xmm11
393	pshufd	xmm8,xmm0,78
394	pxor	xmm8,xmm0
395
396DB	102,68,15,58,68,231,0
397	xorps	xmm5,xmm13
398
399	lea	r8,[64+r8]
400	sub	r9,0x40
401	jnc	NEAR $L$mod4_loop
402
403$L$tail4x:
404DB	102,65,15,58,68,199,0
405DB	102,65,15,58,68,207,17
406DB	102,68,15,58,68,199,16
407	xorps	xmm4,xmm12
408	xorps	xmm0,xmm3
409	xorps	xmm1,xmm5
410	pxor	xmm1,xmm0
411	pxor	xmm8,xmm4
412
413	pxor	xmm8,xmm1
414	pxor	xmm1,xmm0
415
416	movdqa	xmm9,xmm8
417	psrldq	xmm8,8
418	pslldq	xmm9,8
419	pxor	xmm1,xmm8
420	pxor	xmm0,xmm9
421
422	movdqa	xmm4,xmm0
423	movdqa	xmm3,xmm0
424	psllq	xmm0,5
425	pxor	xmm3,xmm0
426	psllq	xmm0,1
427	pxor	xmm0,xmm3
428	psllq	xmm0,57
429	movdqa	xmm3,xmm0
430	pslldq	xmm0,8
431	psrldq	xmm3,8
432	pxor	xmm0,xmm4
433	pxor	xmm1,xmm3
434
435
436	movdqa	xmm4,xmm0
437	psrlq	xmm0,1
438	pxor	xmm1,xmm4
439	pxor	xmm4,xmm0
440	psrlq	xmm0,5
441	pxor	xmm0,xmm4
442	psrlq	xmm0,1
443	pxor	xmm0,xmm1
444	add	r9,0x40
445	jz	NEAR $L$done
446	movdqu	xmm7,XMMWORD[32+rdx]
447	sub	r9,0x10
448	jz	NEAR $L$odd_tail
449$L$skip4x:
450
451
452
453
454
455	movdqu	xmm8,XMMWORD[r8]
456	movdqu	xmm3,XMMWORD[16+r8]
457DB	102,69,15,56,0,194
458DB	102,65,15,56,0,218
459	pxor	xmm0,xmm8
460
461	movdqa	xmm5,xmm3
462	pshufd	xmm4,xmm3,78
463	pxor	xmm4,xmm3
464DB	102,15,58,68,218,0
465DB	102,15,58,68,234,17
466DB	102,15,58,68,231,0
467
468	lea	r8,[32+r8]
469	nop
470	sub	r9,0x20
471	jbe	NEAR $L$even_tail
472	nop
473	jmp	NEAR $L$mod_loop
474
475ALIGN	32
476$L$mod_loop:
477	movdqa	xmm1,xmm0
478	movdqa	xmm8,xmm4
479	pshufd	xmm4,xmm0,78
480	pxor	xmm4,xmm0
481
482DB	102,15,58,68,198,0
483DB	102,15,58,68,206,17
484DB	102,15,58,68,231,16
485
486	pxor	xmm0,xmm3
487	pxor	xmm1,xmm5
488	movdqu	xmm9,XMMWORD[r8]
489	pxor	xmm8,xmm0
490DB	102,69,15,56,0,202
491	movdqu	xmm3,XMMWORD[16+r8]
492
493	pxor	xmm8,xmm1
494	pxor	xmm1,xmm9
495	pxor	xmm4,xmm8
496DB	102,65,15,56,0,218
497	movdqa	xmm8,xmm4
498	psrldq	xmm8,8
499	pslldq	xmm4,8
500	pxor	xmm1,xmm8
501	pxor	xmm0,xmm4
502
503	movdqa	xmm5,xmm3
504
505	movdqa	xmm9,xmm0
506	movdqa	xmm8,xmm0
507	psllq	xmm0,5
508	pxor	xmm8,xmm0
509DB	102,15,58,68,218,0
510	psllq	xmm0,1
511	pxor	xmm0,xmm8
512	psllq	xmm0,57
513	movdqa	xmm8,xmm0
514	pslldq	xmm0,8
515	psrldq	xmm8,8
516	pxor	xmm0,xmm9
517	pshufd	xmm4,xmm5,78
518	pxor	xmm1,xmm8
519	pxor	xmm4,xmm5
520
521	movdqa	xmm9,xmm0
522	psrlq	xmm0,1
523DB	102,15,58,68,234,17
524	pxor	xmm1,xmm9
525	pxor	xmm9,xmm0
526	psrlq	xmm0,5
527	pxor	xmm0,xmm9
528	lea	r8,[32+r8]
529	psrlq	xmm0,1
530DB	102,15,58,68,231,0
531	pxor	xmm0,xmm1
532
533	sub	r9,0x20
534	ja	NEAR $L$mod_loop
535
536$L$even_tail:
537	movdqa	xmm1,xmm0
538	movdqa	xmm8,xmm4
539	pshufd	xmm4,xmm0,78
540	pxor	xmm4,xmm0
541
542DB	102,15,58,68,198,0
543DB	102,15,58,68,206,17
544DB	102,15,58,68,231,16
545
546	pxor	xmm0,xmm3
547	pxor	xmm1,xmm5
548	pxor	xmm8,xmm0
549	pxor	xmm8,xmm1
550	pxor	xmm4,xmm8
551	movdqa	xmm8,xmm4
552	psrldq	xmm8,8
553	pslldq	xmm4,8
554	pxor	xmm1,xmm8
555	pxor	xmm0,xmm4
556
557	movdqa	xmm4,xmm0
558	movdqa	xmm3,xmm0
559	psllq	xmm0,5
560	pxor	xmm3,xmm0
561	psllq	xmm0,1
562	pxor	xmm0,xmm3
563	psllq	xmm0,57
564	movdqa	xmm3,xmm0
565	pslldq	xmm0,8
566	psrldq	xmm3,8
567	pxor	xmm0,xmm4
568	pxor	xmm1,xmm3
569
570
571	movdqa	xmm4,xmm0
572	psrlq	xmm0,1
573	pxor	xmm1,xmm4
574	pxor	xmm4,xmm0
575	psrlq	xmm0,5
576	pxor	xmm0,xmm4
577	psrlq	xmm0,1
578	pxor	xmm0,xmm1
579	test	r9,r9
580	jnz	NEAR $L$done
581
582$L$odd_tail:
583	movdqu	xmm8,XMMWORD[r8]
584DB	102,69,15,56,0,194
585	pxor	xmm0,xmm8
586	movdqa	xmm1,xmm0
587	pshufd	xmm3,xmm0,78
588	pxor	xmm3,xmm0
589DB	102,15,58,68,194,0
590DB	102,15,58,68,202,17
591DB	102,15,58,68,223,0
592	pxor	xmm3,xmm0
593	pxor	xmm3,xmm1
594
595	movdqa	xmm4,xmm3
596	psrldq	xmm3,8
597	pslldq	xmm4,8
598	pxor	xmm1,xmm3
599	pxor	xmm0,xmm4
600
601	movdqa	xmm4,xmm0
602	movdqa	xmm3,xmm0
603	psllq	xmm0,5
604	pxor	xmm3,xmm0
605	psllq	xmm0,1
606	pxor	xmm0,xmm3
607	psllq	xmm0,57
608	movdqa	xmm3,xmm0
609	pslldq	xmm0,8
610	psrldq	xmm3,8
611	pxor	xmm0,xmm4
612	pxor	xmm1,xmm3
613
614
615	movdqa	xmm4,xmm0
616	psrlq	xmm0,1
617	pxor	xmm1,xmm4
618	pxor	xmm4,xmm0
619	psrlq	xmm0,5
620	pxor	xmm0,xmm4
621	psrlq	xmm0,1
622	pxor	xmm0,xmm1
623$L$done:
624DB	102,65,15,56,0,194
625	movdqu	XMMWORD[rcx],xmm0
626	movaps	xmm6,XMMWORD[rsp]
627	movaps	xmm7,XMMWORD[16+rsp]
628	movaps	xmm8,XMMWORD[32+rsp]
629	movaps	xmm9,XMMWORD[48+rsp]
630	movaps	xmm10,XMMWORD[64+rsp]
631	movaps	xmm11,XMMWORD[80+rsp]
632	movaps	xmm12,XMMWORD[96+rsp]
633	movaps	xmm13,XMMWORD[112+rsp]
634	movaps	xmm14,XMMWORD[128+rsp]
635	movaps	xmm15,XMMWORD[144+rsp]
636	lea	rsp,[168+rsp]
637$L$SEH_end_GFp_gcm_ghash_clmul:
638	DB	0F3h,0C3h		;repret
639
640
641global	GFp_gcm_init_avx
642
643ALIGN	32
644GFp_gcm_init_avx:
645
646$L$SEH_begin_GFp_gcm_init_avx:
647
648DB	0x48,0x83,0xec,0x18
649DB	0x0f,0x29,0x34,0x24
650	vzeroupper
651
652	vmovdqu	xmm2,XMMWORD[rdx]
653	vpshufd	xmm2,xmm2,78
654
655
656	vpshufd	xmm4,xmm2,255
657	vpsrlq	xmm3,xmm2,63
658	vpsllq	xmm2,xmm2,1
659	vpxor	xmm5,xmm5,xmm5
660	vpcmpgtd	xmm5,xmm5,xmm4
661	vpslldq	xmm3,xmm3,8
662	vpor	xmm2,xmm2,xmm3
663
664
665	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
666	vpxor	xmm2,xmm2,xmm5
667
668	vpunpckhqdq	xmm6,xmm2,xmm2
669	vmovdqa	xmm0,xmm2
670	vpxor	xmm6,xmm6,xmm2
671	mov	r10,4
672	jmp	NEAR $L$init_start_avx
673ALIGN	32
674$L$init_loop_avx:
675	vpalignr	xmm5,xmm4,xmm3,8
676	vmovdqu	XMMWORD[(-16)+rcx],xmm5
677	vpunpckhqdq	xmm3,xmm0,xmm0
678	vpxor	xmm3,xmm3,xmm0
679	vpclmulqdq	xmm1,xmm0,xmm2,0x11
680	vpclmulqdq	xmm0,xmm0,xmm2,0x00
681	vpclmulqdq	xmm3,xmm3,xmm6,0x00
682	vpxor	xmm4,xmm1,xmm0
683	vpxor	xmm3,xmm3,xmm4
684
685	vpslldq	xmm4,xmm3,8
686	vpsrldq	xmm3,xmm3,8
687	vpxor	xmm0,xmm0,xmm4
688	vpxor	xmm1,xmm1,xmm3
689	vpsllq	xmm3,xmm0,57
690	vpsllq	xmm4,xmm0,62
691	vpxor	xmm4,xmm4,xmm3
692	vpsllq	xmm3,xmm0,63
693	vpxor	xmm4,xmm4,xmm3
694	vpslldq	xmm3,xmm4,8
695	vpsrldq	xmm4,xmm4,8
696	vpxor	xmm0,xmm0,xmm3
697	vpxor	xmm1,xmm1,xmm4
698
699	vpsrlq	xmm4,xmm0,1
700	vpxor	xmm1,xmm1,xmm0
701	vpxor	xmm0,xmm0,xmm4
702	vpsrlq	xmm4,xmm4,5
703	vpxor	xmm0,xmm0,xmm4
704	vpsrlq	xmm0,xmm0,1
705	vpxor	xmm0,xmm0,xmm1
706$L$init_start_avx:
707	vmovdqa	xmm5,xmm0
708	vpunpckhqdq	xmm3,xmm0,xmm0
709	vpxor	xmm3,xmm3,xmm0
710	vpclmulqdq	xmm1,xmm0,xmm2,0x11
711	vpclmulqdq	xmm0,xmm0,xmm2,0x00
712	vpclmulqdq	xmm3,xmm3,xmm6,0x00
713	vpxor	xmm4,xmm1,xmm0
714	vpxor	xmm3,xmm3,xmm4
715
716	vpslldq	xmm4,xmm3,8
717	vpsrldq	xmm3,xmm3,8
718	vpxor	xmm0,xmm0,xmm4
719	vpxor	xmm1,xmm1,xmm3
720	vpsllq	xmm3,xmm0,57
721	vpsllq	xmm4,xmm0,62
722	vpxor	xmm4,xmm4,xmm3
723	vpsllq	xmm3,xmm0,63
724	vpxor	xmm4,xmm4,xmm3
725	vpslldq	xmm3,xmm4,8
726	vpsrldq	xmm4,xmm4,8
727	vpxor	xmm0,xmm0,xmm3
728	vpxor	xmm1,xmm1,xmm4
729
730	vpsrlq	xmm4,xmm0,1
731	vpxor	xmm1,xmm1,xmm0
732	vpxor	xmm0,xmm0,xmm4
733	vpsrlq	xmm4,xmm4,5
734	vpxor	xmm0,xmm0,xmm4
735	vpsrlq	xmm0,xmm0,1
736	vpxor	xmm0,xmm0,xmm1
737	vpshufd	xmm3,xmm5,78
738	vpshufd	xmm4,xmm0,78
739	vpxor	xmm3,xmm3,xmm5
740	vmovdqu	XMMWORD[rcx],xmm5
741	vpxor	xmm4,xmm4,xmm0
742	vmovdqu	XMMWORD[16+rcx],xmm0
743	lea	rcx,[48+rcx]
744	sub	r10,1
745	jnz	NEAR $L$init_loop_avx
746
747	vpalignr	xmm5,xmm3,xmm4,8
748	vmovdqu	XMMWORD[(-16)+rcx],xmm5
749
750	vzeroupper
751	movaps	xmm6,XMMWORD[rsp]
752	lea	rsp,[24+rsp]
753$L$SEH_end_GFp_gcm_init_avx:
754	DB	0F3h,0C3h		;repret
755
756
757global	GFp_gcm_ghash_avx
758
759ALIGN	32
760GFp_gcm_ghash_avx:
761
762	lea	rax,[((-136))+rsp]
763$L$SEH_begin_GFp_gcm_ghash_avx:
764
765DB	0x48,0x8d,0x60,0xe0
766DB	0x0f,0x29,0x70,0xe0
767DB	0x0f,0x29,0x78,0xf0
768DB	0x44,0x0f,0x29,0x00
769DB	0x44,0x0f,0x29,0x48,0x10
770DB	0x44,0x0f,0x29,0x50,0x20
771DB	0x44,0x0f,0x29,0x58,0x30
772DB	0x44,0x0f,0x29,0x60,0x40
773DB	0x44,0x0f,0x29,0x68,0x50
774DB	0x44,0x0f,0x29,0x70,0x60
775DB	0x44,0x0f,0x29,0x78,0x70
776	vzeroupper
777
778	vmovdqu	xmm10,XMMWORD[rcx]
779	lea	r10,[$L$0x1c2_polynomial]
780	lea	rdx,[64+rdx]
781	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
782	vpshufb	xmm10,xmm10,xmm13
783	cmp	r9,0x80
784	jb	NEAR $L$short_avx
785	sub	r9,0x80
786
787	vmovdqu	xmm14,XMMWORD[112+r8]
788	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
789	vpshufb	xmm14,xmm14,xmm13
790	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
791
792	vpunpckhqdq	xmm9,xmm14,xmm14
793	vmovdqu	xmm15,XMMWORD[96+r8]
794	vpclmulqdq	xmm0,xmm14,xmm6,0x00
795	vpxor	xmm9,xmm9,xmm14
796	vpshufb	xmm15,xmm15,xmm13
797	vpclmulqdq	xmm1,xmm14,xmm6,0x11
798	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
799	vpunpckhqdq	xmm8,xmm15,xmm15
800	vmovdqu	xmm14,XMMWORD[80+r8]
801	vpclmulqdq	xmm2,xmm9,xmm7,0x00
802	vpxor	xmm8,xmm8,xmm15
803
804	vpshufb	xmm14,xmm14,xmm13
805	vpclmulqdq	xmm3,xmm15,xmm6,0x00
806	vpunpckhqdq	xmm9,xmm14,xmm14
807	vpclmulqdq	xmm4,xmm15,xmm6,0x11
808	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
809	vpxor	xmm9,xmm9,xmm14
810	vmovdqu	xmm15,XMMWORD[64+r8]
811	vpclmulqdq	xmm5,xmm8,xmm7,0x10
812	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
813
814	vpshufb	xmm15,xmm15,xmm13
815	vpxor	xmm3,xmm3,xmm0
816	vpclmulqdq	xmm0,xmm14,xmm6,0x00
817	vpxor	xmm4,xmm4,xmm1
818	vpunpckhqdq	xmm8,xmm15,xmm15
819	vpclmulqdq	xmm1,xmm14,xmm6,0x11
820	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
821	vpxor	xmm5,xmm5,xmm2
822	vpclmulqdq	xmm2,xmm9,xmm7,0x00
823	vpxor	xmm8,xmm8,xmm15
824
825	vmovdqu	xmm14,XMMWORD[48+r8]
826	vpxor	xmm0,xmm0,xmm3
827	vpclmulqdq	xmm3,xmm15,xmm6,0x00
828	vpxor	xmm1,xmm1,xmm4
829	vpshufb	xmm14,xmm14,xmm13
830	vpclmulqdq	xmm4,xmm15,xmm6,0x11
831	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
832	vpxor	xmm2,xmm2,xmm5
833	vpunpckhqdq	xmm9,xmm14,xmm14
834	vpclmulqdq	xmm5,xmm8,xmm7,0x10
835	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
836	vpxor	xmm9,xmm9,xmm14
837
838	vmovdqu	xmm15,XMMWORD[32+r8]
839	vpxor	xmm3,xmm3,xmm0
840	vpclmulqdq	xmm0,xmm14,xmm6,0x00
841	vpxor	xmm4,xmm4,xmm1
842	vpshufb	xmm15,xmm15,xmm13
843	vpclmulqdq	xmm1,xmm14,xmm6,0x11
844	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
845	vpxor	xmm5,xmm5,xmm2
846	vpunpckhqdq	xmm8,xmm15,xmm15
847	vpclmulqdq	xmm2,xmm9,xmm7,0x00
848	vpxor	xmm8,xmm8,xmm15
849
850	vmovdqu	xmm14,XMMWORD[16+r8]
851	vpxor	xmm0,xmm0,xmm3
852	vpclmulqdq	xmm3,xmm15,xmm6,0x00
853	vpxor	xmm1,xmm1,xmm4
854	vpshufb	xmm14,xmm14,xmm13
855	vpclmulqdq	xmm4,xmm15,xmm6,0x11
856	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
857	vpxor	xmm2,xmm2,xmm5
858	vpunpckhqdq	xmm9,xmm14,xmm14
859	vpclmulqdq	xmm5,xmm8,xmm7,0x10
860	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
861	vpxor	xmm9,xmm9,xmm14
862
863	vmovdqu	xmm15,XMMWORD[r8]
864	vpxor	xmm3,xmm3,xmm0
865	vpclmulqdq	xmm0,xmm14,xmm6,0x00
866	vpxor	xmm4,xmm4,xmm1
867	vpshufb	xmm15,xmm15,xmm13
868	vpclmulqdq	xmm1,xmm14,xmm6,0x11
869	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
870	vpxor	xmm5,xmm5,xmm2
871	vpclmulqdq	xmm2,xmm9,xmm7,0x10
872
873	lea	r8,[128+r8]
874	cmp	r9,0x80
875	jb	NEAR $L$tail_avx
876
877	vpxor	xmm15,xmm15,xmm10
878	sub	r9,0x80
879	jmp	NEAR $L$oop8x_avx
880
881ALIGN	32
882$L$oop8x_avx:
883	vpunpckhqdq	xmm8,xmm15,xmm15
884	vmovdqu	xmm14,XMMWORD[112+r8]
885	vpxor	xmm3,xmm3,xmm0
886	vpxor	xmm8,xmm8,xmm15
887	vpclmulqdq	xmm10,xmm15,xmm6,0x00
888	vpshufb	xmm14,xmm14,xmm13
889	vpxor	xmm4,xmm4,xmm1
890	vpclmulqdq	xmm11,xmm15,xmm6,0x11
891	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
892	vpunpckhqdq	xmm9,xmm14,xmm14
893	vpxor	xmm5,xmm5,xmm2
894	vpclmulqdq	xmm12,xmm8,xmm7,0x00
895	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
896	vpxor	xmm9,xmm9,xmm14
897
898	vmovdqu	xmm15,XMMWORD[96+r8]
899	vpclmulqdq	xmm0,xmm14,xmm6,0x00
900	vpxor	xmm10,xmm10,xmm3
901	vpshufb	xmm15,xmm15,xmm13
902	vpclmulqdq	xmm1,xmm14,xmm6,0x11
903	vxorps	xmm11,xmm11,xmm4
904	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
905	vpunpckhqdq	xmm8,xmm15,xmm15
906	vpclmulqdq	xmm2,xmm9,xmm7,0x00
907	vpxor	xmm12,xmm12,xmm5
908	vxorps	xmm8,xmm8,xmm15
909
910	vmovdqu	xmm14,XMMWORD[80+r8]
911	vpxor	xmm12,xmm12,xmm10
912	vpclmulqdq	xmm3,xmm15,xmm6,0x00
913	vpxor	xmm12,xmm12,xmm11
914	vpslldq	xmm9,xmm12,8
915	vpxor	xmm3,xmm3,xmm0
916	vpclmulqdq	xmm4,xmm15,xmm6,0x11
917	vpsrldq	xmm12,xmm12,8
918	vpxor	xmm10,xmm10,xmm9
919	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
920	vpshufb	xmm14,xmm14,xmm13
921	vxorps	xmm11,xmm11,xmm12
922	vpxor	xmm4,xmm4,xmm1
923	vpunpckhqdq	xmm9,xmm14,xmm14
924	vpclmulqdq	xmm5,xmm8,xmm7,0x10
925	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
926	vpxor	xmm9,xmm9,xmm14
927	vpxor	xmm5,xmm5,xmm2
928
929	vmovdqu	xmm15,XMMWORD[64+r8]
930	vpalignr	xmm12,xmm10,xmm10,8
931	vpclmulqdq	xmm0,xmm14,xmm6,0x00
932	vpshufb	xmm15,xmm15,xmm13
933	vpxor	xmm0,xmm0,xmm3
934	vpclmulqdq	xmm1,xmm14,xmm6,0x11
935	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
936	vpunpckhqdq	xmm8,xmm15,xmm15
937	vpxor	xmm1,xmm1,xmm4
938	vpclmulqdq	xmm2,xmm9,xmm7,0x00
939	vxorps	xmm8,xmm8,xmm15
940	vpxor	xmm2,xmm2,xmm5
941
942	vmovdqu	xmm14,XMMWORD[48+r8]
943	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
944	vpclmulqdq	xmm3,xmm15,xmm6,0x00
945	vpshufb	xmm14,xmm14,xmm13
946	vpxor	xmm3,xmm3,xmm0
947	vpclmulqdq	xmm4,xmm15,xmm6,0x11
948	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
949	vpunpckhqdq	xmm9,xmm14,xmm14
950	vpxor	xmm4,xmm4,xmm1
951	vpclmulqdq	xmm5,xmm8,xmm7,0x10
952	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
953	vpxor	xmm9,xmm9,xmm14
954	vpxor	xmm5,xmm5,xmm2
955
956	vmovdqu	xmm15,XMMWORD[32+r8]
957	vpclmulqdq	xmm0,xmm14,xmm6,0x00
958	vpshufb	xmm15,xmm15,xmm13
959	vpxor	xmm0,xmm0,xmm3
960	vpclmulqdq	xmm1,xmm14,xmm6,0x11
961	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
962	vpunpckhqdq	xmm8,xmm15,xmm15
963	vpxor	xmm1,xmm1,xmm4
964	vpclmulqdq	xmm2,xmm9,xmm7,0x00
965	vpxor	xmm8,xmm8,xmm15
966	vpxor	xmm2,xmm2,xmm5
967	vxorps	xmm10,xmm10,xmm12
968
969	vmovdqu	xmm14,XMMWORD[16+r8]
970	vpalignr	xmm12,xmm10,xmm10,8
971	vpclmulqdq	xmm3,xmm15,xmm6,0x00
972	vpshufb	xmm14,xmm14,xmm13
973	vpxor	xmm3,xmm3,xmm0
974	vpclmulqdq	xmm4,xmm15,xmm6,0x11
975	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
976	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
977	vxorps	xmm12,xmm12,xmm11
978	vpunpckhqdq	xmm9,xmm14,xmm14
979	vpxor	xmm4,xmm4,xmm1
980	vpclmulqdq	xmm5,xmm8,xmm7,0x10
981	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
982	vpxor	xmm9,xmm9,xmm14
983	vpxor	xmm5,xmm5,xmm2
984
985	vmovdqu	xmm15,XMMWORD[r8]
986	vpclmulqdq	xmm0,xmm14,xmm6,0x00
987	vpshufb	xmm15,xmm15,xmm13
988	vpclmulqdq	xmm1,xmm14,xmm6,0x11
989	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
990	vpxor	xmm15,xmm15,xmm12
991	vpclmulqdq	xmm2,xmm9,xmm7,0x10
992	vpxor	xmm15,xmm15,xmm10
993
994	lea	r8,[128+r8]
995	sub	r9,0x80
996	jnc	NEAR $L$oop8x_avx
997
998	add	r9,0x80
999	jmp	NEAR $L$tail_no_xor_avx
1000
1001ALIGN	32
1002$L$short_avx:
1003	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
1004	lea	r8,[r9*1+r8]
1005	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1006	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1007	vpshufb	xmm15,xmm14,xmm13
1008
1009	vmovdqa	xmm3,xmm0
1010	vmovdqa	xmm4,xmm1
1011	vmovdqa	xmm5,xmm2
1012	sub	r9,0x10
1013	jz	NEAR $L$tail_avx
1014
1015	vpunpckhqdq	xmm8,xmm15,xmm15
1016	vpxor	xmm3,xmm3,xmm0
1017	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1018	vpxor	xmm8,xmm8,xmm15
1019	vmovdqu	xmm14,XMMWORD[((-32))+r8]
1020	vpxor	xmm4,xmm4,xmm1
1021	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1022	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1023	vpshufb	xmm15,xmm14,xmm13
1024	vpxor	xmm5,xmm5,xmm2
1025	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1026	vpsrldq	xmm7,xmm7,8
1027	sub	r9,0x10
1028	jz	NEAR $L$tail_avx
1029
1030	vpunpckhqdq	xmm8,xmm15,xmm15
1031	vpxor	xmm3,xmm3,xmm0
1032	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1033	vpxor	xmm8,xmm8,xmm15
1034	vmovdqu	xmm14,XMMWORD[((-48))+r8]
1035	vpxor	xmm4,xmm4,xmm1
1036	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1037	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1038	vpshufb	xmm15,xmm14,xmm13
1039	vpxor	xmm5,xmm5,xmm2
1040	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1041	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1042	sub	r9,0x10
1043	jz	NEAR $L$tail_avx
1044
1045	vpunpckhqdq	xmm8,xmm15,xmm15
1046	vpxor	xmm3,xmm3,xmm0
1047	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1048	vpxor	xmm8,xmm8,xmm15
1049	vmovdqu	xmm14,XMMWORD[((-64))+r8]
1050	vpxor	xmm4,xmm4,xmm1
1051	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1052	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1053	vpshufb	xmm15,xmm14,xmm13
1054	vpxor	xmm5,xmm5,xmm2
1055	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1056	vpsrldq	xmm7,xmm7,8
1057	sub	r9,0x10
1058	jz	NEAR $L$tail_avx
1059
1060	vpunpckhqdq	xmm8,xmm15,xmm15
1061	vpxor	xmm3,xmm3,xmm0
1062	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1063	vpxor	xmm8,xmm8,xmm15
1064	vmovdqu	xmm14,XMMWORD[((-80))+r8]
1065	vpxor	xmm4,xmm4,xmm1
1066	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1067	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1068	vpshufb	xmm15,xmm14,xmm13
1069	vpxor	xmm5,xmm5,xmm2
1070	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1071	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1072	sub	r9,0x10
1073	jz	NEAR $L$tail_avx
1074
1075	vpunpckhqdq	xmm8,xmm15,xmm15
1076	vpxor	xmm3,xmm3,xmm0
1077	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1078	vpxor	xmm8,xmm8,xmm15
1079	vmovdqu	xmm14,XMMWORD[((-96))+r8]
1080	vpxor	xmm4,xmm4,xmm1
1081	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1082	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1083	vpshufb	xmm15,xmm14,xmm13
1084	vpxor	xmm5,xmm5,xmm2
1085	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1086	vpsrldq	xmm7,xmm7,8
1087	sub	r9,0x10
1088	jz	NEAR $L$tail_avx
1089
1090	vpunpckhqdq	xmm8,xmm15,xmm15
1091	vpxor	xmm3,xmm3,xmm0
1092	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1093	vpxor	xmm8,xmm8,xmm15
1094	vmovdqu	xmm14,XMMWORD[((-112))+r8]
1095	vpxor	xmm4,xmm4,xmm1
1096	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1097	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1098	vpshufb	xmm15,xmm14,xmm13
1099	vpxor	xmm5,xmm5,xmm2
1100	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1101	vmovq	xmm7,QWORD[((184-64))+rdx]
1102	sub	r9,0x10
1103	jmp	NEAR $L$tail_avx
1104
1105ALIGN	32
1106$L$tail_avx:
1107	vpxor	xmm15,xmm15,xmm10
1108$L$tail_no_xor_avx:
1109	vpunpckhqdq	xmm8,xmm15,xmm15
1110	vpxor	xmm3,xmm3,xmm0
1111	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1112	vpxor	xmm8,xmm8,xmm15
1113	vpxor	xmm4,xmm4,xmm1
1114	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1115	vpxor	xmm5,xmm5,xmm2
1116	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1117
1118	vmovdqu	xmm12,XMMWORD[r10]
1119
1120	vpxor	xmm10,xmm3,xmm0
1121	vpxor	xmm11,xmm4,xmm1
1122	vpxor	xmm5,xmm5,xmm2
1123
1124	vpxor	xmm5,xmm5,xmm10
1125	vpxor	xmm5,xmm5,xmm11
1126	vpslldq	xmm9,xmm5,8
1127	vpsrldq	xmm5,xmm5,8
1128	vpxor	xmm10,xmm10,xmm9
1129	vpxor	xmm11,xmm11,xmm5
1130
1131	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1132	vpalignr	xmm10,xmm10,xmm10,8
1133	vpxor	xmm10,xmm10,xmm9
1134
1135	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1136	vpalignr	xmm10,xmm10,xmm10,8
1137	vpxor	xmm10,xmm10,xmm11
1138	vpxor	xmm10,xmm10,xmm9
1139
1140	cmp	r9,0
1141	jne	NEAR $L$short_avx
1142
1143	vpshufb	xmm10,xmm10,xmm13
1144	vmovdqu	XMMWORD[rcx],xmm10
1145	vzeroupper
1146	movaps	xmm6,XMMWORD[rsp]
1147	movaps	xmm7,XMMWORD[16+rsp]
1148	movaps	xmm8,XMMWORD[32+rsp]
1149	movaps	xmm9,XMMWORD[48+rsp]
1150	movaps	xmm10,XMMWORD[64+rsp]
1151	movaps	xmm11,XMMWORD[80+rsp]
1152	movaps	xmm12,XMMWORD[96+rsp]
1153	movaps	xmm13,XMMWORD[112+rsp]
1154	movaps	xmm14,XMMWORD[128+rsp]
1155	movaps	xmm15,XMMWORD[144+rsp]
1156	lea	rsp,[168+rsp]
1157$L$SEH_end_GFp_gcm_ghash_avx:
1158	DB	0F3h,0C3h		;repret
1159
1160
1161ALIGN	64
1162$L$bswap_mask:
1163DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1164$L$0x1c2_polynomial:
1165DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1166$L$7_mask:
1167	DD	7,0,7,0
1168ALIGN	64
1169
1170DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1171DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1172DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1173DB	114,103,62,0
1174ALIGN	64
1175section	.pdata rdata align=4
1176ALIGN	4
1177	DD	$L$SEH_begin_GFp_gcm_init_clmul wrt ..imagebase
1178	DD	$L$SEH_end_GFp_gcm_init_clmul wrt ..imagebase
1179	DD	$L$SEH_info_GFp_gcm_init_clmul wrt ..imagebase
1180
1181	DD	$L$SEH_begin_GFp_gcm_ghash_clmul wrt ..imagebase
1182	DD	$L$SEH_end_GFp_gcm_ghash_clmul wrt ..imagebase
1183	DD	$L$SEH_info_GFp_gcm_ghash_clmul wrt ..imagebase
1184	DD	$L$SEH_begin_GFp_gcm_init_avx wrt ..imagebase
1185	DD	$L$SEH_end_GFp_gcm_init_avx wrt ..imagebase
1186	DD	$L$SEH_info_GFp_gcm_init_clmul wrt ..imagebase
1187
1188	DD	$L$SEH_begin_GFp_gcm_ghash_avx wrt ..imagebase
1189	DD	$L$SEH_end_GFp_gcm_ghash_avx wrt ..imagebase
1190	DD	$L$SEH_info_GFp_gcm_ghash_clmul wrt ..imagebase
1191section	.xdata rdata align=8
1192ALIGN	8
1193$L$SEH_info_GFp_gcm_init_clmul:
1194DB	0x01,0x08,0x03,0x00
1195DB	0x08,0x68,0x00,0x00
1196DB	0x04,0x22,0x00,0x00
1197$L$SEH_info_GFp_gcm_ghash_clmul:
1198DB	0x01,0x33,0x16,0x00
1199DB	0x33,0xf8,0x09,0x00
1200DB	0x2e,0xe8,0x08,0x00
1201DB	0x29,0xd8,0x07,0x00
1202DB	0x24,0xc8,0x06,0x00
1203DB	0x1f,0xb8,0x05,0x00
1204DB	0x1a,0xa8,0x04,0x00
1205DB	0x15,0x98,0x03,0x00
1206DB	0x10,0x88,0x02,0x00
1207DB	0x0c,0x78,0x01,0x00
1208DB	0x08,0x68,0x00,0x00
1209DB	0x04,0x01,0x15,0x00
1210