1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8section	.text code align=64
9
10EXTERN	GFp_ia32cap_P
11
12chacha20_poly1305_constants:
13
14ALIGN	64
15$L$chacha20_consts:
16DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
17DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
18$L$rol8:
19DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
20DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
21$L$rol16:
22DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
23DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
24$L$avx2_init:
25	DD	0,0,0,0
26$L$sse_inc:
27	DD	1,0,0,0
28$L$avx2_inc:
29	DD	2,0,0,0,2,0,0,0
30$L$clamp:
31	DQ	0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC
32	DQ	0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF
33ALIGN	16
34$L$and_masks:
35DB	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
36DB	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
37DB	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
38DB	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
39DB	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40DB	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
45DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
46DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
47DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
48DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
49DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
50DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
51
52
53ALIGN	64
54poly_hash_ad_internal:
55
56
57	xor	r10,r10
58	xor	r11,r11
59	xor	r12,r12
60	cmp	r8,13
61	jne	NEAR $L$hash_ad_loop
62$L$poly_fast_tls_ad:
63
64	mov	r10,QWORD[rcx]
65	mov	r11,QWORD[5+rcx]
66	shr	r11,24
67	mov	r12,1
68	mov	rax,QWORD[((0+160+0))+rbp]
69	mov	r15,rax
70	mul	r10
71	mov	r13,rax
72	mov	r14,rdx
73	mov	rax,QWORD[((0+160+0))+rbp]
74	mul	r11
75	imul	r15,r12
76	add	r14,rax
77	adc	r15,rdx
78	mov	rax,QWORD[((8+160+0))+rbp]
79	mov	r9,rax
80	mul	r10
81	add	r14,rax
82	adc	rdx,0
83	mov	r10,rdx
84	mov	rax,QWORD[((8+160+0))+rbp]
85	mul	r11
86	add	r15,rax
87	adc	rdx,0
88	imul	r9,r12
89	add	r15,r10
90	adc	r9,rdx
91	mov	r10,r13
92	mov	r11,r14
93	mov	r12,r15
94	and	r12,3
95	mov	r13,r15
96	and	r13,-4
97	mov	r14,r9
98	shrd	r15,r9,2
99	shr	r9,2
100	add	r15,r13
101	adc	r9,r14
102	add	r10,r15
103	adc	r11,r9
104	adc	r12,0
105
106	DB	0F3h,0C3h		;repret
107$L$hash_ad_loop:
108
109	cmp	r8,16
110	jb	NEAR $L$hash_ad_tail
111	add	r10,QWORD[((0+0))+rcx]
112	adc	r11,QWORD[((8+0))+rcx]
113	adc	r12,1
114	mov	rax,QWORD[((0+160+0))+rbp]
115	mov	r15,rax
116	mul	r10
117	mov	r13,rax
118	mov	r14,rdx
119	mov	rax,QWORD[((0+160+0))+rbp]
120	mul	r11
121	imul	r15,r12
122	add	r14,rax
123	adc	r15,rdx
124	mov	rax,QWORD[((8+160+0))+rbp]
125	mov	r9,rax
126	mul	r10
127	add	r14,rax
128	adc	rdx,0
129	mov	r10,rdx
130	mov	rax,QWORD[((8+160+0))+rbp]
131	mul	r11
132	add	r15,rax
133	adc	rdx,0
134	imul	r9,r12
135	add	r15,r10
136	adc	r9,rdx
137	mov	r10,r13
138	mov	r11,r14
139	mov	r12,r15
140	and	r12,3
141	mov	r13,r15
142	and	r13,-4
143	mov	r14,r9
144	shrd	r15,r9,2
145	shr	r9,2
146	add	r15,r13
147	adc	r9,r14
148	add	r10,r15
149	adc	r11,r9
150	adc	r12,0
151
152	lea	rcx,[16+rcx]
153	sub	r8,16
154	jmp	NEAR $L$hash_ad_loop
155$L$hash_ad_tail:
156	cmp	r8,0
157	je	NEAR $L$hash_ad_done
158
159	xor	r13,r13
160	xor	r14,r14
161	xor	r15,r15
162	add	rcx,r8
163$L$hash_ad_tail_loop:
164	shld	r14,r13,8
165	shl	r13,8
166	movzx	r15,BYTE[((-1))+rcx]
167	xor	r13,r15
168	dec	rcx
169	dec	r8
170	jne	NEAR $L$hash_ad_tail_loop
171
172	add	r10,r13
173	adc	r11,r14
174	adc	r12,1
175	mov	rax,QWORD[((0+160+0))+rbp]
176	mov	r15,rax
177	mul	r10
178	mov	r13,rax
179	mov	r14,rdx
180	mov	rax,QWORD[((0+160+0))+rbp]
181	mul	r11
182	imul	r15,r12
183	add	r14,rax
184	adc	r15,rdx
185	mov	rax,QWORD[((8+160+0))+rbp]
186	mov	r9,rax
187	mul	r10
188	add	r14,rax
189	adc	rdx,0
190	mov	r10,rdx
191	mov	rax,QWORD[((8+160+0))+rbp]
192	mul	r11
193	add	r15,rax
194	adc	rdx,0
195	imul	r9,r12
196	add	r15,r10
197	adc	r9,rdx
198	mov	r10,r13
199	mov	r11,r14
200	mov	r12,r15
201	and	r12,3
202	mov	r13,r15
203	and	r13,-4
204	mov	r14,r9
205	shrd	r15,r9,2
206	shr	r9,2
207	add	r15,r13
208	adc	r9,r14
209	add	r10,r15
210	adc	r11,r9
211	adc	r12,0
212
213
214$L$hash_ad_done:
215	DB	0F3h,0C3h		;repret
216
217
218
219global	GFp_chacha20_poly1305_open
220
221ALIGN	64
222GFp_chacha20_poly1305_open:
223	mov	QWORD[8+rsp],rdi	;WIN64 prologue
224	mov	QWORD[16+rsp],rsi
225	mov	rax,rsp
226$L$SEH_begin_GFp_chacha20_poly1305_open:
227	mov	rdi,rcx
228	mov	rsi,rdx
229	mov	rdx,r8
230	mov	rcx,r9
231	mov	r8,QWORD[40+rsp]
232	mov	r9,QWORD[48+rsp]
233
234
235
236	push	rbp
237
238	push	rbx
239
240	push	r12
241
242	push	r13
243
244	push	r14
245
246	push	r15
247
248
249
250	push	r9
251
252	sub	rsp,288 + 160 + 32
253
254
255	lea	rbp,[32+rsp]
256	and	rbp,-32
257
258	movaps	XMMWORD[(0+0)+rbp],xmm6
259	movaps	XMMWORD[(16+0)+rbp],xmm7
260	movaps	XMMWORD[(32+0)+rbp],xmm8
261	movaps	XMMWORD[(48+0)+rbp],xmm9
262	movaps	XMMWORD[(64+0)+rbp],xmm10
263	movaps	XMMWORD[(80+0)+rbp],xmm11
264	movaps	XMMWORD[(96+0)+rbp],xmm12
265	movaps	XMMWORD[(112+0)+rbp],xmm13
266	movaps	XMMWORD[(128+0)+rbp],xmm14
267	movaps	XMMWORD[(144+0)+rbp],xmm15
268
269	mov	rbx,rdx
270	mov	QWORD[((0+160+32))+rbp],r8
271	mov	QWORD[((8+160+32))+rbp],rbx
272
273	mov	eax,DWORD[((GFp_ia32cap_P+8))]
274	and	eax,288
275	xor	eax,288
276	jz	NEAR chacha20_poly1305_open_avx2
277
278	cmp	rbx,128
279	jbe	NEAR $L$open_sse_128
280
281	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
282	movdqu	xmm4,XMMWORD[r9]
283	movdqu	xmm8,XMMWORD[16+r9]
284	movdqu	xmm12,XMMWORD[32+r9]
285
286	movdqa	xmm7,xmm12
287
288	movdqa	XMMWORD[(160+48)+rbp],xmm4
289	movdqa	XMMWORD[(160+64)+rbp],xmm8
290	movdqa	XMMWORD[(160+96)+rbp],xmm12
291	mov	r10,10
292$L$open_sse_init_rounds:
293	paddd	xmm0,xmm4
294	pxor	xmm12,xmm0
295	pshufb	xmm12,XMMWORD[$L$rol16]
296	paddd	xmm8,xmm12
297	pxor	xmm4,xmm8
298	movdqa	xmm3,xmm4
299	pslld	xmm3,12
300	psrld	xmm4,20
301	pxor	xmm4,xmm3
302	paddd	xmm0,xmm4
303	pxor	xmm12,xmm0
304	pshufb	xmm12,XMMWORD[$L$rol8]
305	paddd	xmm8,xmm12
306	pxor	xmm4,xmm8
307	movdqa	xmm3,xmm4
308	pslld	xmm3,7
309	psrld	xmm4,25
310	pxor	xmm4,xmm3
311DB	102,15,58,15,228,4
312DB	102,69,15,58,15,192,8
313DB	102,69,15,58,15,228,12
314	paddd	xmm0,xmm4
315	pxor	xmm12,xmm0
316	pshufb	xmm12,XMMWORD[$L$rol16]
317	paddd	xmm8,xmm12
318	pxor	xmm4,xmm8
319	movdqa	xmm3,xmm4
320	pslld	xmm3,12
321	psrld	xmm4,20
322	pxor	xmm4,xmm3
323	paddd	xmm0,xmm4
324	pxor	xmm12,xmm0
325	pshufb	xmm12,XMMWORD[$L$rol8]
326	paddd	xmm8,xmm12
327	pxor	xmm4,xmm8
328	movdqa	xmm3,xmm4
329	pslld	xmm3,7
330	psrld	xmm4,25
331	pxor	xmm4,xmm3
332DB	102,15,58,15,228,12
333DB	102,69,15,58,15,192,8
334DB	102,69,15,58,15,228,4
335
336	dec	r10
337	jne	NEAR $L$open_sse_init_rounds
338
339	paddd	xmm0,XMMWORD[$L$chacha20_consts]
340	paddd	xmm4,XMMWORD[((160+48))+rbp]
341
342	pand	xmm0,XMMWORD[$L$clamp]
343	movdqa	XMMWORD[(160+0)+rbp],xmm0
344	movdqa	XMMWORD[(160+16)+rbp],xmm4
345
346	mov	r8,r8
347	call	poly_hash_ad_internal
348$L$open_sse_main_loop:
349	cmp	rbx,16*16
350	jb	NEAR $L$open_sse_tail
351
352	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
353	movdqa	xmm4,XMMWORD[((160+48))+rbp]
354	movdqa	xmm8,XMMWORD[((160+64))+rbp]
355	movdqa	xmm1,xmm0
356	movdqa	xmm5,xmm4
357	movdqa	xmm9,xmm8
358	movdqa	xmm2,xmm0
359	movdqa	xmm6,xmm4
360	movdqa	xmm10,xmm8
361	movdqa	xmm3,xmm0
362	movdqa	xmm7,xmm4
363	movdqa	xmm11,xmm8
364	movdqa	xmm15,XMMWORD[((160+96))+rbp]
365	paddd	xmm15,XMMWORD[$L$sse_inc]
366	movdqa	xmm14,xmm15
367	paddd	xmm14,XMMWORD[$L$sse_inc]
368	movdqa	xmm13,xmm14
369	paddd	xmm13,XMMWORD[$L$sse_inc]
370	movdqa	xmm12,xmm13
371	paddd	xmm12,XMMWORD[$L$sse_inc]
372	movdqa	XMMWORD[(160+96)+rbp],xmm12
373	movdqa	XMMWORD[(160+112)+rbp],xmm13
374	movdqa	XMMWORD[(160+128)+rbp],xmm14
375	movdqa	XMMWORD[(160+144)+rbp],xmm15
376
377
378
379	mov	rcx,4
380	mov	r8,rsi
381$L$open_sse_main_loop_rounds:
382	movdqa	XMMWORD[(160+80)+rbp],xmm8
383	movdqa	xmm8,XMMWORD[$L$rol16]
384	paddd	xmm3,xmm7
385	paddd	xmm2,xmm6
386	paddd	xmm1,xmm5
387	paddd	xmm0,xmm4
388	pxor	xmm15,xmm3
389	pxor	xmm14,xmm2
390	pxor	xmm13,xmm1
391	pxor	xmm12,xmm0
392DB	102,69,15,56,0,248
393DB	102,69,15,56,0,240
394DB	102,69,15,56,0,232
395DB	102,69,15,56,0,224
396	movdqa	xmm8,XMMWORD[((160+80))+rbp]
397	paddd	xmm11,xmm15
398	paddd	xmm10,xmm14
399	paddd	xmm9,xmm13
400	paddd	xmm8,xmm12
401	pxor	xmm7,xmm11
402	add	r10,QWORD[((0+0))+r8]
403	adc	r11,QWORD[((8+0))+r8]
404	adc	r12,1
405
406	lea	r8,[16+r8]
407	pxor	xmm6,xmm10
408	pxor	xmm5,xmm9
409	pxor	xmm4,xmm8
410	movdqa	XMMWORD[(160+80)+rbp],xmm8
411	movdqa	xmm8,xmm7
412	psrld	xmm8,20
413	pslld	xmm7,32-20
414	pxor	xmm7,xmm8
415	movdqa	xmm8,xmm6
416	psrld	xmm8,20
417	pslld	xmm6,32-20
418	pxor	xmm6,xmm8
419	movdqa	xmm8,xmm5
420	psrld	xmm8,20
421	pslld	xmm5,32-20
422	pxor	xmm5,xmm8
423	movdqa	xmm8,xmm4
424	psrld	xmm8,20
425	pslld	xmm4,32-20
426	pxor	xmm4,xmm8
427	mov	rax,QWORD[((0+160+0))+rbp]
428	mov	r15,rax
429	mul	r10
430	mov	r13,rax
431	mov	r14,rdx
432	mov	rax,QWORD[((0+160+0))+rbp]
433	mul	r11
434	imul	r15,r12
435	add	r14,rax
436	adc	r15,rdx
437	movdqa	xmm8,XMMWORD[$L$rol8]
438	paddd	xmm3,xmm7
439	paddd	xmm2,xmm6
440	paddd	xmm1,xmm5
441	paddd	xmm0,xmm4
442	pxor	xmm15,xmm3
443	pxor	xmm14,xmm2
444	pxor	xmm13,xmm1
445	pxor	xmm12,xmm0
446DB	102,69,15,56,0,248
447DB	102,69,15,56,0,240
448DB	102,69,15,56,0,232
449DB	102,69,15,56,0,224
450	movdqa	xmm8,XMMWORD[((160+80))+rbp]
451	paddd	xmm11,xmm15
452	paddd	xmm10,xmm14
453	paddd	xmm9,xmm13
454	paddd	xmm8,xmm12
455	pxor	xmm7,xmm11
456	pxor	xmm6,xmm10
457	mov	rax,QWORD[((8+160+0))+rbp]
458	mov	r9,rax
459	mul	r10
460	add	r14,rax
461	adc	rdx,0
462	mov	r10,rdx
463	mov	rax,QWORD[((8+160+0))+rbp]
464	mul	r11
465	add	r15,rax
466	adc	rdx,0
467	pxor	xmm5,xmm9
468	pxor	xmm4,xmm8
469	movdqa	XMMWORD[(160+80)+rbp],xmm8
470	movdqa	xmm8,xmm7
471	psrld	xmm8,25
472	pslld	xmm7,32-25
473	pxor	xmm7,xmm8
474	movdqa	xmm8,xmm6
475	psrld	xmm8,25
476	pslld	xmm6,32-25
477	pxor	xmm6,xmm8
478	movdqa	xmm8,xmm5
479	psrld	xmm8,25
480	pslld	xmm5,32-25
481	pxor	xmm5,xmm8
482	movdqa	xmm8,xmm4
483	psrld	xmm8,25
484	pslld	xmm4,32-25
485	pxor	xmm4,xmm8
486	movdqa	xmm8,XMMWORD[((160+80))+rbp]
487	imul	r9,r12
488	add	r15,r10
489	adc	r9,rdx
490DB	102,15,58,15,255,4
491DB	102,69,15,58,15,219,8
492DB	102,69,15,58,15,255,12
493DB	102,15,58,15,246,4
494DB	102,69,15,58,15,210,8
495DB	102,69,15,58,15,246,12
496DB	102,15,58,15,237,4
497DB	102,69,15,58,15,201,8
498DB	102,69,15,58,15,237,12
499DB	102,15,58,15,228,4
500DB	102,69,15,58,15,192,8
501DB	102,69,15,58,15,228,12
502	movdqa	XMMWORD[(160+80)+rbp],xmm8
503	movdqa	xmm8,XMMWORD[$L$rol16]
504	paddd	xmm3,xmm7
505	paddd	xmm2,xmm6
506	paddd	xmm1,xmm5
507	paddd	xmm0,xmm4
508	pxor	xmm15,xmm3
509	pxor	xmm14,xmm2
510	mov	r10,r13
511	mov	r11,r14
512	mov	r12,r15
513	and	r12,3
514	mov	r13,r15
515	and	r13,-4
516	mov	r14,r9
517	shrd	r15,r9,2
518	shr	r9,2
519	add	r15,r13
520	adc	r9,r14
521	add	r10,r15
522	adc	r11,r9
523	adc	r12,0
524	pxor	xmm13,xmm1
525	pxor	xmm12,xmm0
526DB	102,69,15,56,0,248
527DB	102,69,15,56,0,240
528DB	102,69,15,56,0,232
529DB	102,69,15,56,0,224
530	movdqa	xmm8,XMMWORD[((160+80))+rbp]
531	paddd	xmm11,xmm15
532	paddd	xmm10,xmm14
533	paddd	xmm9,xmm13
534	paddd	xmm8,xmm12
535	pxor	xmm7,xmm11
536	pxor	xmm6,xmm10
537	pxor	xmm5,xmm9
538	pxor	xmm4,xmm8
539	movdqa	XMMWORD[(160+80)+rbp],xmm8
540	movdqa	xmm8,xmm7
541	psrld	xmm8,20
542	pslld	xmm7,32-20
543	pxor	xmm7,xmm8
544	movdqa	xmm8,xmm6
545	psrld	xmm8,20
546	pslld	xmm6,32-20
547	pxor	xmm6,xmm8
548	movdqa	xmm8,xmm5
549	psrld	xmm8,20
550	pslld	xmm5,32-20
551	pxor	xmm5,xmm8
552	movdqa	xmm8,xmm4
553	psrld	xmm8,20
554	pslld	xmm4,32-20
555	pxor	xmm4,xmm8
556	movdqa	xmm8,XMMWORD[$L$rol8]
557	paddd	xmm3,xmm7
558	paddd	xmm2,xmm6
559	paddd	xmm1,xmm5
560	paddd	xmm0,xmm4
561	pxor	xmm15,xmm3
562	pxor	xmm14,xmm2
563	pxor	xmm13,xmm1
564	pxor	xmm12,xmm0
565DB	102,69,15,56,0,248
566DB	102,69,15,56,0,240
567DB	102,69,15,56,0,232
568DB	102,69,15,56,0,224
569	movdqa	xmm8,XMMWORD[((160+80))+rbp]
570	paddd	xmm11,xmm15
571	paddd	xmm10,xmm14
572	paddd	xmm9,xmm13
573	paddd	xmm8,xmm12
574	pxor	xmm7,xmm11
575	pxor	xmm6,xmm10
576	pxor	xmm5,xmm9
577	pxor	xmm4,xmm8
578	movdqa	XMMWORD[(160+80)+rbp],xmm8
579	movdqa	xmm8,xmm7
580	psrld	xmm8,25
581	pslld	xmm7,32-25
582	pxor	xmm7,xmm8
583	movdqa	xmm8,xmm6
584	psrld	xmm8,25
585	pslld	xmm6,32-25
586	pxor	xmm6,xmm8
587	movdqa	xmm8,xmm5
588	psrld	xmm8,25
589	pslld	xmm5,32-25
590	pxor	xmm5,xmm8
591	movdqa	xmm8,xmm4
592	psrld	xmm8,25
593	pslld	xmm4,32-25
594	pxor	xmm4,xmm8
595	movdqa	xmm8,XMMWORD[((160+80))+rbp]
596DB	102,15,58,15,255,12
597DB	102,69,15,58,15,219,8
598DB	102,69,15,58,15,255,4
599DB	102,15,58,15,246,12
600DB	102,69,15,58,15,210,8
601DB	102,69,15,58,15,246,4
602DB	102,15,58,15,237,12
603DB	102,69,15,58,15,201,8
604DB	102,69,15,58,15,237,4
605DB	102,15,58,15,228,12
606DB	102,69,15,58,15,192,8
607DB	102,69,15,58,15,228,4
608
609	dec	rcx
610	jge	NEAR $L$open_sse_main_loop_rounds
611	add	r10,QWORD[((0+0))+r8]
612	adc	r11,QWORD[((8+0))+r8]
613	adc	r12,1
614	mov	rax,QWORD[((0+160+0))+rbp]
615	mov	r15,rax
616	mul	r10
617	mov	r13,rax
618	mov	r14,rdx
619	mov	rax,QWORD[((0+160+0))+rbp]
620	mul	r11
621	imul	r15,r12
622	add	r14,rax
623	adc	r15,rdx
624	mov	rax,QWORD[((8+160+0))+rbp]
625	mov	r9,rax
626	mul	r10
627	add	r14,rax
628	adc	rdx,0
629	mov	r10,rdx
630	mov	rax,QWORD[((8+160+0))+rbp]
631	mul	r11
632	add	r15,rax
633	adc	rdx,0
634	imul	r9,r12
635	add	r15,r10
636	adc	r9,rdx
637	mov	r10,r13
638	mov	r11,r14
639	mov	r12,r15
640	and	r12,3
641	mov	r13,r15
642	and	r13,-4
643	mov	r14,r9
644	shrd	r15,r9,2
645	shr	r9,2
646	add	r15,r13
647	adc	r9,r14
648	add	r10,r15
649	adc	r11,r9
650	adc	r12,0
651
652	lea	r8,[16+r8]
653	cmp	rcx,-6
654	jg	NEAR $L$open_sse_main_loop_rounds
655	paddd	xmm3,XMMWORD[$L$chacha20_consts]
656	paddd	xmm7,XMMWORD[((160+48))+rbp]
657	paddd	xmm11,XMMWORD[((160+64))+rbp]
658	paddd	xmm15,XMMWORD[((160+144))+rbp]
659	paddd	xmm2,XMMWORD[$L$chacha20_consts]
660	paddd	xmm6,XMMWORD[((160+48))+rbp]
661	paddd	xmm10,XMMWORD[((160+64))+rbp]
662	paddd	xmm14,XMMWORD[((160+128))+rbp]
663	paddd	xmm1,XMMWORD[$L$chacha20_consts]
664	paddd	xmm5,XMMWORD[((160+48))+rbp]
665	paddd	xmm9,XMMWORD[((160+64))+rbp]
666	paddd	xmm13,XMMWORD[((160+112))+rbp]
667	paddd	xmm0,XMMWORD[$L$chacha20_consts]
668	paddd	xmm4,XMMWORD[((160+48))+rbp]
669	paddd	xmm8,XMMWORD[((160+64))+rbp]
670	paddd	xmm12,XMMWORD[((160+96))+rbp]
671	movdqa	XMMWORD[(160+80)+rbp],xmm12
672	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
673	pxor	xmm12,xmm3
674	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
675	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
676	pxor	xmm12,xmm7
677	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
678	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
679	pxor	xmm12,xmm11
680	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
681	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
682	pxor	xmm12,xmm15
683	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
684	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
685	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
686	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
687	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
688	pxor	xmm2,xmm3
689	pxor	xmm6,xmm7
690	pxor	xmm10,xmm11
691	pxor	xmm15,xmm14
692	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
693	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
694	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
695	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
696	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
697	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
698	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
699	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
700	pxor	xmm1,xmm3
701	pxor	xmm5,xmm7
702	pxor	xmm9,xmm11
703	pxor	xmm15,xmm13
704	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
705	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
706	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
707	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
708	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
709	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
710	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
711	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
712	pxor	xmm0,xmm3
713	pxor	xmm4,xmm7
714	pxor	xmm8,xmm11
715	pxor	xmm15,XMMWORD[((160+80))+rbp]
716	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
717	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
718	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
719	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
720
721	lea	rsi,[256+rsi]
722	lea	rdi,[256+rdi]
723	sub	rbx,16*16
724	jmp	NEAR $L$open_sse_main_loop
725$L$open_sse_tail:
726
727	test	rbx,rbx
728	jz	NEAR $L$open_sse_finalize
729	cmp	rbx,12*16
730	ja	NEAR $L$open_sse_tail_256
731	cmp	rbx,8*16
732	ja	NEAR $L$open_sse_tail_192
733	cmp	rbx,4*16
734	ja	NEAR $L$open_sse_tail_128
735	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
736	movdqa	xmm4,XMMWORD[((160+48))+rbp]
737	movdqa	xmm8,XMMWORD[((160+64))+rbp]
738	movdqa	xmm12,XMMWORD[((160+96))+rbp]
739	paddd	xmm12,XMMWORD[$L$sse_inc]
740	movdqa	XMMWORD[(160+96)+rbp],xmm12
741
742	xor	r8,r8
743	mov	rcx,rbx
744	cmp	rcx,16
745	jb	NEAR $L$open_sse_tail_64_rounds
746$L$open_sse_tail_64_rounds_and_x1hash:
747	add	r10,QWORD[((0+0))+r8*1+rsi]
748	adc	r11,QWORD[((8+0))+r8*1+rsi]
749	adc	r12,1
750	mov	rax,QWORD[((0+160+0))+rbp]
751	mov	r15,rax
752	mul	r10
753	mov	r13,rax
754	mov	r14,rdx
755	mov	rax,QWORD[((0+160+0))+rbp]
756	mul	r11
757	imul	r15,r12
758	add	r14,rax
759	adc	r15,rdx
760	mov	rax,QWORD[((8+160+0))+rbp]
761	mov	r9,rax
762	mul	r10
763	add	r14,rax
764	adc	rdx,0
765	mov	r10,rdx
766	mov	rax,QWORD[((8+160+0))+rbp]
767	mul	r11
768	add	r15,rax
769	adc	rdx,0
770	imul	r9,r12
771	add	r15,r10
772	adc	r9,rdx
773	mov	r10,r13
774	mov	r11,r14
775	mov	r12,r15
776	and	r12,3
777	mov	r13,r15
778	and	r13,-4
779	mov	r14,r9
780	shrd	r15,r9,2
781	shr	r9,2
782	add	r15,r13
783	adc	r9,r14
784	add	r10,r15
785	adc	r11,r9
786	adc	r12,0
787
788	sub	rcx,16
789$L$open_sse_tail_64_rounds:
790	add	r8,16
791	paddd	xmm0,xmm4
792	pxor	xmm12,xmm0
793	pshufb	xmm12,XMMWORD[$L$rol16]
794	paddd	xmm8,xmm12
795	pxor	xmm4,xmm8
796	movdqa	xmm3,xmm4
797	pslld	xmm3,12
798	psrld	xmm4,20
799	pxor	xmm4,xmm3
800	paddd	xmm0,xmm4
801	pxor	xmm12,xmm0
802	pshufb	xmm12,XMMWORD[$L$rol8]
803	paddd	xmm8,xmm12
804	pxor	xmm4,xmm8
805	movdqa	xmm3,xmm4
806	pslld	xmm3,7
807	psrld	xmm4,25
808	pxor	xmm4,xmm3
809DB	102,15,58,15,228,4
810DB	102,69,15,58,15,192,8
811DB	102,69,15,58,15,228,12
812	paddd	xmm0,xmm4
813	pxor	xmm12,xmm0
814	pshufb	xmm12,XMMWORD[$L$rol16]
815	paddd	xmm8,xmm12
816	pxor	xmm4,xmm8
817	movdqa	xmm3,xmm4
818	pslld	xmm3,12
819	psrld	xmm4,20
820	pxor	xmm4,xmm3
821	paddd	xmm0,xmm4
822	pxor	xmm12,xmm0
823	pshufb	xmm12,XMMWORD[$L$rol8]
824	paddd	xmm8,xmm12
825	pxor	xmm4,xmm8
826	movdqa	xmm3,xmm4
827	pslld	xmm3,7
828	psrld	xmm4,25
829	pxor	xmm4,xmm3
830DB	102,15,58,15,228,12
831DB	102,69,15,58,15,192,8
832DB	102,69,15,58,15,228,4
833
834	cmp	rcx,16
835	jae	NEAR $L$open_sse_tail_64_rounds_and_x1hash
836	cmp	r8,10*16
837	jne	NEAR $L$open_sse_tail_64_rounds
838	paddd	xmm0,XMMWORD[$L$chacha20_consts]
839	paddd	xmm4,XMMWORD[((160+48))+rbp]
840	paddd	xmm8,XMMWORD[((160+64))+rbp]
841	paddd	xmm12,XMMWORD[((160+96))+rbp]
842
843	jmp	NEAR $L$open_sse_tail_64_dec_loop
844
845$L$open_sse_tail_128:
846	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
847	movdqa	xmm4,XMMWORD[((160+48))+rbp]
848	movdqa	xmm8,XMMWORD[((160+64))+rbp]
849	movdqa	xmm1,xmm0
850	movdqa	xmm5,xmm4
851	movdqa	xmm9,xmm8
852	movdqa	xmm13,XMMWORD[((160+96))+rbp]
853	paddd	xmm13,XMMWORD[$L$sse_inc]
854	movdqa	xmm12,xmm13
855	paddd	xmm12,XMMWORD[$L$sse_inc]
856	movdqa	XMMWORD[(160+96)+rbp],xmm12
857	movdqa	XMMWORD[(160+112)+rbp],xmm13
858
859	mov	rcx,rbx
860	and	rcx,-16
861	xor	r8,r8
862$L$open_sse_tail_128_rounds_and_x1hash:
863	add	r10,QWORD[((0+0))+r8*1+rsi]
864	adc	r11,QWORD[((8+0))+r8*1+rsi]
865	adc	r12,1
866	mov	rax,QWORD[((0+160+0))+rbp]
867	mov	r15,rax
868	mul	r10
869	mov	r13,rax
870	mov	r14,rdx
871	mov	rax,QWORD[((0+160+0))+rbp]
872	mul	r11
873	imul	r15,r12
874	add	r14,rax
875	adc	r15,rdx
876	mov	rax,QWORD[((8+160+0))+rbp]
877	mov	r9,rax
878	mul	r10
879	add	r14,rax
880	adc	rdx,0
881	mov	r10,rdx
882	mov	rax,QWORD[((8+160+0))+rbp]
883	mul	r11
884	add	r15,rax
885	adc	rdx,0
886	imul	r9,r12
887	add	r15,r10
888	adc	r9,rdx
889	mov	r10,r13
890	mov	r11,r14
891	mov	r12,r15
892	and	r12,3
893	mov	r13,r15
894	and	r13,-4
895	mov	r14,r9
896	shrd	r15,r9,2
897	shr	r9,2
898	add	r15,r13
899	adc	r9,r14
900	add	r10,r15
901	adc	r11,r9
902	adc	r12,0
903
904$L$open_sse_tail_128_rounds:
905	add	r8,16
906	paddd	xmm0,xmm4
907	pxor	xmm12,xmm0
908	pshufb	xmm12,XMMWORD[$L$rol16]
909	paddd	xmm8,xmm12
910	pxor	xmm4,xmm8
911	movdqa	xmm3,xmm4
912	pslld	xmm3,12
913	psrld	xmm4,20
914	pxor	xmm4,xmm3
915	paddd	xmm0,xmm4
916	pxor	xmm12,xmm0
917	pshufb	xmm12,XMMWORD[$L$rol8]
918	paddd	xmm8,xmm12
919	pxor	xmm4,xmm8
920	movdqa	xmm3,xmm4
921	pslld	xmm3,7
922	psrld	xmm4,25
923	pxor	xmm4,xmm3
924DB	102,15,58,15,228,4
925DB	102,69,15,58,15,192,8
926DB	102,69,15,58,15,228,12
927	paddd	xmm1,xmm5
928	pxor	xmm13,xmm1
929	pshufb	xmm13,XMMWORD[$L$rol16]
930	paddd	xmm9,xmm13
931	pxor	xmm5,xmm9
932	movdqa	xmm3,xmm5
933	pslld	xmm3,12
934	psrld	xmm5,20
935	pxor	xmm5,xmm3
936	paddd	xmm1,xmm5
937	pxor	xmm13,xmm1
938	pshufb	xmm13,XMMWORD[$L$rol8]
939	paddd	xmm9,xmm13
940	pxor	xmm5,xmm9
941	movdqa	xmm3,xmm5
942	pslld	xmm3,7
943	psrld	xmm5,25
944	pxor	xmm5,xmm3
945DB	102,15,58,15,237,4
946DB	102,69,15,58,15,201,8
947DB	102,69,15,58,15,237,12
948	paddd	xmm0,xmm4
949	pxor	xmm12,xmm0
950	pshufb	xmm12,XMMWORD[$L$rol16]
951	paddd	xmm8,xmm12
952	pxor	xmm4,xmm8
953	movdqa	xmm3,xmm4
954	pslld	xmm3,12
955	psrld	xmm4,20
956	pxor	xmm4,xmm3
957	paddd	xmm0,xmm4
958	pxor	xmm12,xmm0
959	pshufb	xmm12,XMMWORD[$L$rol8]
960	paddd	xmm8,xmm12
961	pxor	xmm4,xmm8
962	movdqa	xmm3,xmm4
963	pslld	xmm3,7
964	psrld	xmm4,25
965	pxor	xmm4,xmm3
966DB	102,15,58,15,228,12
967DB	102,69,15,58,15,192,8
968DB	102,69,15,58,15,228,4
969	paddd	xmm1,xmm5
970	pxor	xmm13,xmm1
971	pshufb	xmm13,XMMWORD[$L$rol16]
972	paddd	xmm9,xmm13
973	pxor	xmm5,xmm9
974	movdqa	xmm3,xmm5
975	pslld	xmm3,12
976	psrld	xmm5,20
977	pxor	xmm5,xmm3
978	paddd	xmm1,xmm5
979	pxor	xmm13,xmm1
980	pshufb	xmm13,XMMWORD[$L$rol8]
981	paddd	xmm9,xmm13
982	pxor	xmm5,xmm9
983	movdqa	xmm3,xmm5
984	pslld	xmm3,7
985	psrld	xmm5,25
986	pxor	xmm5,xmm3
987DB	102,15,58,15,237,12
988DB	102,69,15,58,15,201,8
989DB	102,69,15,58,15,237,4
990
991	cmp	r8,rcx
992	jb	NEAR $L$open_sse_tail_128_rounds_and_x1hash
993	cmp	r8,10*16
994	jne	NEAR $L$open_sse_tail_128_rounds
995	paddd	xmm1,XMMWORD[$L$chacha20_consts]
996	paddd	xmm5,XMMWORD[((160+48))+rbp]
997	paddd	xmm9,XMMWORD[((160+64))+rbp]
998	paddd	xmm13,XMMWORD[((160+112))+rbp]
999	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1000	paddd	xmm4,XMMWORD[((160+48))+rbp]
1001	paddd	xmm8,XMMWORD[((160+64))+rbp]
1002	paddd	xmm12,XMMWORD[((160+96))+rbp]
1003	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1004	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1005	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1006	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1007	pxor	xmm1,xmm3
1008	pxor	xmm5,xmm7
1009	pxor	xmm9,xmm11
1010	pxor	xmm15,xmm13
1011	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
1012	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
1013	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
1014	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1015
1016	sub	rbx,4*16
1017	lea	rsi,[64+rsi]
1018	lea	rdi,[64+rdi]
1019	jmp	NEAR $L$open_sse_tail_64_dec_loop
1020
1021$L$open_sse_tail_192:
1022	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1023	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1024	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1025	movdqa	xmm1,xmm0
1026	movdqa	xmm5,xmm4
1027	movdqa	xmm9,xmm8
1028	movdqa	xmm2,xmm0
1029	movdqa	xmm6,xmm4
1030	movdqa	xmm10,xmm8
1031	movdqa	xmm14,XMMWORD[((160+96))+rbp]
1032	paddd	xmm14,XMMWORD[$L$sse_inc]
1033	movdqa	xmm13,xmm14
1034	paddd	xmm13,XMMWORD[$L$sse_inc]
1035	movdqa	xmm12,xmm13
1036	paddd	xmm12,XMMWORD[$L$sse_inc]
1037	movdqa	XMMWORD[(160+96)+rbp],xmm12
1038	movdqa	XMMWORD[(160+112)+rbp],xmm13
1039	movdqa	XMMWORD[(160+128)+rbp],xmm14
1040
1041	mov	rcx,rbx
1042	mov	r8,10*16
1043	cmp	rcx,10*16
1044	cmovg	rcx,r8
1045	and	rcx,-16
1046	xor	r8,r8
1047$L$open_sse_tail_192_rounds_and_x1hash:
1048	add	r10,QWORD[((0+0))+r8*1+rsi]
1049	adc	r11,QWORD[((8+0))+r8*1+rsi]
1050	adc	r12,1
1051	mov	rax,QWORD[((0+160+0))+rbp]
1052	mov	r15,rax
1053	mul	r10
1054	mov	r13,rax
1055	mov	r14,rdx
1056	mov	rax,QWORD[((0+160+0))+rbp]
1057	mul	r11
1058	imul	r15,r12
1059	add	r14,rax
1060	adc	r15,rdx
1061	mov	rax,QWORD[((8+160+0))+rbp]
1062	mov	r9,rax
1063	mul	r10
1064	add	r14,rax
1065	adc	rdx,0
1066	mov	r10,rdx
1067	mov	rax,QWORD[((8+160+0))+rbp]
1068	mul	r11
1069	add	r15,rax
1070	adc	rdx,0
1071	imul	r9,r12
1072	add	r15,r10
1073	adc	r9,rdx
1074	mov	r10,r13
1075	mov	r11,r14
1076	mov	r12,r15
1077	and	r12,3
1078	mov	r13,r15
1079	and	r13,-4
1080	mov	r14,r9
1081	shrd	r15,r9,2
1082	shr	r9,2
1083	add	r15,r13
1084	adc	r9,r14
1085	add	r10,r15
1086	adc	r11,r9
1087	adc	r12,0
1088
1089$L$open_sse_tail_192_rounds:
1090	add	r8,16
1091	paddd	xmm0,xmm4
1092	pxor	xmm12,xmm0
1093	pshufb	xmm12,XMMWORD[$L$rol16]
1094	paddd	xmm8,xmm12
1095	pxor	xmm4,xmm8
1096	movdqa	xmm3,xmm4
1097	pslld	xmm3,12
1098	psrld	xmm4,20
1099	pxor	xmm4,xmm3
1100	paddd	xmm0,xmm4
1101	pxor	xmm12,xmm0
1102	pshufb	xmm12,XMMWORD[$L$rol8]
1103	paddd	xmm8,xmm12
1104	pxor	xmm4,xmm8
1105	movdqa	xmm3,xmm4
1106	pslld	xmm3,7
1107	psrld	xmm4,25
1108	pxor	xmm4,xmm3
1109DB	102,15,58,15,228,4
1110DB	102,69,15,58,15,192,8
1111DB	102,69,15,58,15,228,12
1112	paddd	xmm1,xmm5
1113	pxor	xmm13,xmm1
1114	pshufb	xmm13,XMMWORD[$L$rol16]
1115	paddd	xmm9,xmm13
1116	pxor	xmm5,xmm9
1117	movdqa	xmm3,xmm5
1118	pslld	xmm3,12
1119	psrld	xmm5,20
1120	pxor	xmm5,xmm3
1121	paddd	xmm1,xmm5
1122	pxor	xmm13,xmm1
1123	pshufb	xmm13,XMMWORD[$L$rol8]
1124	paddd	xmm9,xmm13
1125	pxor	xmm5,xmm9
1126	movdqa	xmm3,xmm5
1127	pslld	xmm3,7
1128	psrld	xmm5,25
1129	pxor	xmm5,xmm3
1130DB	102,15,58,15,237,4
1131DB	102,69,15,58,15,201,8
1132DB	102,69,15,58,15,237,12
1133	paddd	xmm2,xmm6
1134	pxor	xmm14,xmm2
1135	pshufb	xmm14,XMMWORD[$L$rol16]
1136	paddd	xmm10,xmm14
1137	pxor	xmm6,xmm10
1138	movdqa	xmm3,xmm6
1139	pslld	xmm3,12
1140	psrld	xmm6,20
1141	pxor	xmm6,xmm3
1142	paddd	xmm2,xmm6
1143	pxor	xmm14,xmm2
1144	pshufb	xmm14,XMMWORD[$L$rol8]
1145	paddd	xmm10,xmm14
1146	pxor	xmm6,xmm10
1147	movdqa	xmm3,xmm6
1148	pslld	xmm3,7
1149	psrld	xmm6,25
1150	pxor	xmm6,xmm3
1151DB	102,15,58,15,246,4
1152DB	102,69,15,58,15,210,8
1153DB	102,69,15,58,15,246,12
1154	paddd	xmm0,xmm4
1155	pxor	xmm12,xmm0
1156	pshufb	xmm12,XMMWORD[$L$rol16]
1157	paddd	xmm8,xmm12
1158	pxor	xmm4,xmm8
1159	movdqa	xmm3,xmm4
1160	pslld	xmm3,12
1161	psrld	xmm4,20
1162	pxor	xmm4,xmm3
1163	paddd	xmm0,xmm4
1164	pxor	xmm12,xmm0
1165	pshufb	xmm12,XMMWORD[$L$rol8]
1166	paddd	xmm8,xmm12
1167	pxor	xmm4,xmm8
1168	movdqa	xmm3,xmm4
1169	pslld	xmm3,7
1170	psrld	xmm4,25
1171	pxor	xmm4,xmm3
1172DB	102,15,58,15,228,12
1173DB	102,69,15,58,15,192,8
1174DB	102,69,15,58,15,228,4
1175	paddd	xmm1,xmm5
1176	pxor	xmm13,xmm1
1177	pshufb	xmm13,XMMWORD[$L$rol16]
1178	paddd	xmm9,xmm13
1179	pxor	xmm5,xmm9
1180	movdqa	xmm3,xmm5
1181	pslld	xmm3,12
1182	psrld	xmm5,20
1183	pxor	xmm5,xmm3
1184	paddd	xmm1,xmm5
1185	pxor	xmm13,xmm1
1186	pshufb	xmm13,XMMWORD[$L$rol8]
1187	paddd	xmm9,xmm13
1188	pxor	xmm5,xmm9
1189	movdqa	xmm3,xmm5
1190	pslld	xmm3,7
1191	psrld	xmm5,25
1192	pxor	xmm5,xmm3
1193DB	102,15,58,15,237,12
1194DB	102,69,15,58,15,201,8
1195DB	102,69,15,58,15,237,4
1196	paddd	xmm2,xmm6
1197	pxor	xmm14,xmm2
1198	pshufb	xmm14,XMMWORD[$L$rol16]
1199	paddd	xmm10,xmm14
1200	pxor	xmm6,xmm10
1201	movdqa	xmm3,xmm6
1202	pslld	xmm3,12
1203	psrld	xmm6,20
1204	pxor	xmm6,xmm3
1205	paddd	xmm2,xmm6
1206	pxor	xmm14,xmm2
1207	pshufb	xmm14,XMMWORD[$L$rol8]
1208	paddd	xmm10,xmm14
1209	pxor	xmm6,xmm10
1210	movdqa	xmm3,xmm6
1211	pslld	xmm3,7
1212	psrld	xmm6,25
1213	pxor	xmm6,xmm3
1214DB	102,15,58,15,246,12
1215DB	102,69,15,58,15,210,8
1216DB	102,69,15,58,15,246,4
1217
1218	cmp	r8,rcx
1219	jb	NEAR $L$open_sse_tail_192_rounds_and_x1hash
1220	cmp	r8,10*16
1221	jne	NEAR $L$open_sse_tail_192_rounds
1222	cmp	rbx,11*16
1223	jb	NEAR $L$open_sse_tail_192_finish
1224	add	r10,QWORD[((0+160))+rsi]
1225	adc	r11,QWORD[((8+160))+rsi]
1226	adc	r12,1
1227	mov	rax,QWORD[((0+160+0))+rbp]
1228	mov	r15,rax
1229	mul	r10
1230	mov	r13,rax
1231	mov	r14,rdx
1232	mov	rax,QWORD[((0+160+0))+rbp]
1233	mul	r11
1234	imul	r15,r12
1235	add	r14,rax
1236	adc	r15,rdx
1237	mov	rax,QWORD[((8+160+0))+rbp]
1238	mov	r9,rax
1239	mul	r10
1240	add	r14,rax
1241	adc	rdx,0
1242	mov	r10,rdx
1243	mov	rax,QWORD[((8+160+0))+rbp]
1244	mul	r11
1245	add	r15,rax
1246	adc	rdx,0
1247	imul	r9,r12
1248	add	r15,r10
1249	adc	r9,rdx
1250	mov	r10,r13
1251	mov	r11,r14
1252	mov	r12,r15
1253	and	r12,3
1254	mov	r13,r15
1255	and	r13,-4
1256	mov	r14,r9
1257	shrd	r15,r9,2
1258	shr	r9,2
1259	add	r15,r13
1260	adc	r9,r14
1261	add	r10,r15
1262	adc	r11,r9
1263	adc	r12,0
1264
1265	cmp	rbx,12*16
1266	jb	NEAR $L$open_sse_tail_192_finish
1267	add	r10,QWORD[((0+176))+rsi]
1268	adc	r11,QWORD[((8+176))+rsi]
1269	adc	r12,1
1270	mov	rax,QWORD[((0+160+0))+rbp]
1271	mov	r15,rax
1272	mul	r10
1273	mov	r13,rax
1274	mov	r14,rdx
1275	mov	rax,QWORD[((0+160+0))+rbp]
1276	mul	r11
1277	imul	r15,r12
1278	add	r14,rax
1279	adc	r15,rdx
1280	mov	rax,QWORD[((8+160+0))+rbp]
1281	mov	r9,rax
1282	mul	r10
1283	add	r14,rax
1284	adc	rdx,0
1285	mov	r10,rdx
1286	mov	rax,QWORD[((8+160+0))+rbp]
1287	mul	r11
1288	add	r15,rax
1289	adc	rdx,0
1290	imul	r9,r12
1291	add	r15,r10
1292	adc	r9,rdx
1293	mov	r10,r13
1294	mov	r11,r14
1295	mov	r12,r15
1296	and	r12,3
1297	mov	r13,r15
1298	and	r13,-4
1299	mov	r14,r9
1300	shrd	r15,r9,2
1301	shr	r9,2
1302	add	r15,r13
1303	adc	r9,r14
1304	add	r10,r15
1305	adc	r11,r9
1306	adc	r12,0
1307
1308$L$open_sse_tail_192_finish:
1309	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1310	paddd	xmm6,XMMWORD[((160+48))+rbp]
1311	paddd	xmm10,XMMWORD[((160+64))+rbp]
1312	paddd	xmm14,XMMWORD[((160+128))+rbp]
1313	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1314	paddd	xmm5,XMMWORD[((160+48))+rbp]
1315	paddd	xmm9,XMMWORD[((160+64))+rbp]
1316	paddd	xmm13,XMMWORD[((160+112))+rbp]
1317	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1318	paddd	xmm4,XMMWORD[((160+48))+rbp]
1319	paddd	xmm8,XMMWORD[((160+64))+rbp]
1320	paddd	xmm12,XMMWORD[((160+96))+rbp]
1321	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
1322	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
1323	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
1324	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
1325	pxor	xmm2,xmm3
1326	pxor	xmm6,xmm7
1327	pxor	xmm10,xmm11
1328	pxor	xmm15,xmm14
1329	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
1330	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
1331	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
1332	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
1333	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1334	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1335	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1336	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1337	pxor	xmm1,xmm3
1338	pxor	xmm5,xmm7
1339	pxor	xmm9,xmm11
1340	pxor	xmm15,xmm13
1341	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
1342	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
1343	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
1344	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1345
1346	sub	rbx,8*16
1347	lea	rsi,[128+rsi]
1348	lea	rdi,[128+rdi]
1349	jmp	NEAR $L$open_sse_tail_64_dec_loop
1350
1351$L$open_sse_tail_256:
1352	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
1353	movdqa	xmm4,XMMWORD[((160+48))+rbp]
1354	movdqa	xmm8,XMMWORD[((160+64))+rbp]
1355	movdqa	xmm1,xmm0
1356	movdqa	xmm5,xmm4
1357	movdqa	xmm9,xmm8
1358	movdqa	xmm2,xmm0
1359	movdqa	xmm6,xmm4
1360	movdqa	xmm10,xmm8
1361	movdqa	xmm3,xmm0
1362	movdqa	xmm7,xmm4
1363	movdqa	xmm11,xmm8
1364	movdqa	xmm15,XMMWORD[((160+96))+rbp]
1365	paddd	xmm15,XMMWORD[$L$sse_inc]
1366	movdqa	xmm14,xmm15
1367	paddd	xmm14,XMMWORD[$L$sse_inc]
1368	movdqa	xmm13,xmm14
1369	paddd	xmm13,XMMWORD[$L$sse_inc]
1370	movdqa	xmm12,xmm13
1371	paddd	xmm12,XMMWORD[$L$sse_inc]
1372	movdqa	XMMWORD[(160+96)+rbp],xmm12
1373	movdqa	XMMWORD[(160+112)+rbp],xmm13
1374	movdqa	XMMWORD[(160+128)+rbp],xmm14
1375	movdqa	XMMWORD[(160+144)+rbp],xmm15
1376
1377	xor	r8,r8
1378$L$open_sse_tail_256_rounds_and_x1hash:
1379	add	r10,QWORD[((0+0))+r8*1+rsi]
1380	adc	r11,QWORD[((8+0))+r8*1+rsi]
1381	adc	r12,1
1382	movdqa	XMMWORD[(160+80)+rbp],xmm11
1383	paddd	xmm0,xmm4
1384	pxor	xmm12,xmm0
1385	pshufb	xmm12,XMMWORD[$L$rol16]
1386	paddd	xmm8,xmm12
1387	pxor	xmm4,xmm8
1388	movdqa	xmm11,xmm4
1389	pslld	xmm11,12
1390	psrld	xmm4,20
1391	pxor	xmm4,xmm11
1392	paddd	xmm0,xmm4
1393	pxor	xmm12,xmm0
1394	pshufb	xmm12,XMMWORD[$L$rol8]
1395	paddd	xmm8,xmm12
1396	pxor	xmm4,xmm8
1397	movdqa	xmm11,xmm4
1398	pslld	xmm11,7
1399	psrld	xmm4,25
1400	pxor	xmm4,xmm11
1401DB	102,15,58,15,228,4
1402DB	102,69,15,58,15,192,8
1403DB	102,69,15,58,15,228,12
1404	paddd	xmm1,xmm5
1405	pxor	xmm13,xmm1
1406	pshufb	xmm13,XMMWORD[$L$rol16]
1407	paddd	xmm9,xmm13
1408	pxor	xmm5,xmm9
1409	movdqa	xmm11,xmm5
1410	pslld	xmm11,12
1411	psrld	xmm5,20
1412	pxor	xmm5,xmm11
1413	paddd	xmm1,xmm5
1414	pxor	xmm13,xmm1
1415	pshufb	xmm13,XMMWORD[$L$rol8]
1416	paddd	xmm9,xmm13
1417	pxor	xmm5,xmm9
1418	movdqa	xmm11,xmm5
1419	pslld	xmm11,7
1420	psrld	xmm5,25
1421	pxor	xmm5,xmm11
1422DB	102,15,58,15,237,4
1423DB	102,69,15,58,15,201,8
1424DB	102,69,15,58,15,237,12
1425	paddd	xmm2,xmm6
1426	pxor	xmm14,xmm2
1427	pshufb	xmm14,XMMWORD[$L$rol16]
1428	paddd	xmm10,xmm14
1429	pxor	xmm6,xmm10
1430	movdqa	xmm11,xmm6
1431	pslld	xmm11,12
1432	psrld	xmm6,20
1433	pxor	xmm6,xmm11
1434	paddd	xmm2,xmm6
1435	pxor	xmm14,xmm2
1436	pshufb	xmm14,XMMWORD[$L$rol8]
1437	paddd	xmm10,xmm14
1438	pxor	xmm6,xmm10
1439	movdqa	xmm11,xmm6
1440	pslld	xmm11,7
1441	psrld	xmm6,25
1442	pxor	xmm6,xmm11
1443DB	102,15,58,15,246,4
1444DB	102,69,15,58,15,210,8
1445DB	102,69,15,58,15,246,12
1446	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1447	mov	rax,QWORD[((0+160+0))+rbp]
1448	mov	r15,rax
1449	mul	r10
1450	mov	r13,rax
1451	mov	r14,rdx
1452	mov	rax,QWORD[((0+160+0))+rbp]
1453	mul	r11
1454	imul	r15,r12
1455	add	r14,rax
1456	adc	r15,rdx
1457	movdqa	XMMWORD[(160+80)+rbp],xmm9
1458	paddd	xmm3,xmm7
1459	pxor	xmm15,xmm3
1460	pshufb	xmm15,XMMWORD[$L$rol16]
1461	paddd	xmm11,xmm15
1462	pxor	xmm7,xmm11
1463	movdqa	xmm9,xmm7
1464	pslld	xmm9,12
1465	psrld	xmm7,20
1466	pxor	xmm7,xmm9
1467	paddd	xmm3,xmm7
1468	pxor	xmm15,xmm3
1469	pshufb	xmm15,XMMWORD[$L$rol8]
1470	paddd	xmm11,xmm15
1471	pxor	xmm7,xmm11
1472	movdqa	xmm9,xmm7
1473	pslld	xmm9,7
1474	psrld	xmm7,25
1475	pxor	xmm7,xmm9
1476DB	102,15,58,15,255,4
1477DB	102,69,15,58,15,219,8
1478DB	102,69,15,58,15,255,12
1479	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1480	mov	rax,QWORD[((8+160+0))+rbp]
1481	mov	r9,rax
1482	mul	r10
1483	add	r14,rax
1484	adc	rdx,0
1485	mov	r10,rdx
1486	mov	rax,QWORD[((8+160+0))+rbp]
1487	mul	r11
1488	add	r15,rax
1489	adc	rdx,0
1490	movdqa	XMMWORD[(160+80)+rbp],xmm11
1491	paddd	xmm0,xmm4
1492	pxor	xmm12,xmm0
1493	pshufb	xmm12,XMMWORD[$L$rol16]
1494	paddd	xmm8,xmm12
1495	pxor	xmm4,xmm8
1496	movdqa	xmm11,xmm4
1497	pslld	xmm11,12
1498	psrld	xmm4,20
1499	pxor	xmm4,xmm11
1500	paddd	xmm0,xmm4
1501	pxor	xmm12,xmm0
1502	pshufb	xmm12,XMMWORD[$L$rol8]
1503	paddd	xmm8,xmm12
1504	pxor	xmm4,xmm8
1505	movdqa	xmm11,xmm4
1506	pslld	xmm11,7
1507	psrld	xmm4,25
1508	pxor	xmm4,xmm11
1509DB	102,15,58,15,228,12
1510DB	102,69,15,58,15,192,8
1511DB	102,69,15,58,15,228,4
1512	paddd	xmm1,xmm5
1513	pxor	xmm13,xmm1
1514	pshufb	xmm13,XMMWORD[$L$rol16]
1515	paddd	xmm9,xmm13
1516	pxor	xmm5,xmm9
1517	movdqa	xmm11,xmm5
1518	pslld	xmm11,12
1519	psrld	xmm5,20
1520	pxor	xmm5,xmm11
1521	paddd	xmm1,xmm5
1522	pxor	xmm13,xmm1
1523	pshufb	xmm13,XMMWORD[$L$rol8]
1524	paddd	xmm9,xmm13
1525	pxor	xmm5,xmm9
1526	movdqa	xmm11,xmm5
1527	pslld	xmm11,7
1528	psrld	xmm5,25
1529	pxor	xmm5,xmm11
1530DB	102,15,58,15,237,12
1531DB	102,69,15,58,15,201,8
1532DB	102,69,15,58,15,237,4
1533	imul	r9,r12
1534	add	r15,r10
1535	adc	r9,rdx
1536	paddd	xmm2,xmm6
1537	pxor	xmm14,xmm2
1538	pshufb	xmm14,XMMWORD[$L$rol16]
1539	paddd	xmm10,xmm14
1540	pxor	xmm6,xmm10
1541	movdqa	xmm11,xmm6
1542	pslld	xmm11,12
1543	psrld	xmm6,20
1544	pxor	xmm6,xmm11
1545	paddd	xmm2,xmm6
1546	pxor	xmm14,xmm2
1547	pshufb	xmm14,XMMWORD[$L$rol8]
1548	paddd	xmm10,xmm14
1549	pxor	xmm6,xmm10
1550	movdqa	xmm11,xmm6
1551	pslld	xmm11,7
1552	psrld	xmm6,25
1553	pxor	xmm6,xmm11
1554DB	102,15,58,15,246,12
1555DB	102,69,15,58,15,210,8
1556DB	102,69,15,58,15,246,4
1557	movdqa	xmm11,XMMWORD[((160+80))+rbp]
1558	mov	r10,r13
1559	mov	r11,r14
1560	mov	r12,r15
1561	and	r12,3
1562	mov	r13,r15
1563	and	r13,-4
1564	mov	r14,r9
1565	shrd	r15,r9,2
1566	shr	r9,2
1567	add	r15,r13
1568	adc	r9,r14
1569	add	r10,r15
1570	adc	r11,r9
1571	adc	r12,0
1572	movdqa	XMMWORD[(160+80)+rbp],xmm9
1573	paddd	xmm3,xmm7
1574	pxor	xmm15,xmm3
1575	pshufb	xmm15,XMMWORD[$L$rol16]
1576	paddd	xmm11,xmm15
1577	pxor	xmm7,xmm11
1578	movdqa	xmm9,xmm7
1579	pslld	xmm9,12
1580	psrld	xmm7,20
1581	pxor	xmm7,xmm9
1582	paddd	xmm3,xmm7
1583	pxor	xmm15,xmm3
1584	pshufb	xmm15,XMMWORD[$L$rol8]
1585	paddd	xmm11,xmm15
1586	pxor	xmm7,xmm11
1587	movdqa	xmm9,xmm7
1588	pslld	xmm9,7
1589	psrld	xmm7,25
1590	pxor	xmm7,xmm9
1591DB	102,15,58,15,255,12
1592DB	102,69,15,58,15,219,8
1593DB	102,69,15,58,15,255,4
1594	movdqa	xmm9,XMMWORD[((160+80))+rbp]
1595
1596	add	r8,16
1597	cmp	r8,10*16
1598	jb	NEAR $L$open_sse_tail_256_rounds_and_x1hash
1599
1600	mov	rcx,rbx
1601	and	rcx,-16
1602$L$open_sse_tail_256_hash:
1603	add	r10,QWORD[((0+0))+r8*1+rsi]
1604	adc	r11,QWORD[((8+0))+r8*1+rsi]
1605	adc	r12,1
1606	mov	rax,QWORD[((0+160+0))+rbp]
1607	mov	r15,rax
1608	mul	r10
1609	mov	r13,rax
1610	mov	r14,rdx
1611	mov	rax,QWORD[((0+160+0))+rbp]
1612	mul	r11
1613	imul	r15,r12
1614	add	r14,rax
1615	adc	r15,rdx
1616	mov	rax,QWORD[((8+160+0))+rbp]
1617	mov	r9,rax
1618	mul	r10
1619	add	r14,rax
1620	adc	rdx,0
1621	mov	r10,rdx
1622	mov	rax,QWORD[((8+160+0))+rbp]
1623	mul	r11
1624	add	r15,rax
1625	adc	rdx,0
1626	imul	r9,r12
1627	add	r15,r10
1628	adc	r9,rdx
1629	mov	r10,r13
1630	mov	r11,r14
1631	mov	r12,r15
1632	and	r12,3
1633	mov	r13,r15
1634	and	r13,-4
1635	mov	r14,r9
1636	shrd	r15,r9,2
1637	shr	r9,2
1638	add	r15,r13
1639	adc	r9,r14
1640	add	r10,r15
1641	adc	r11,r9
1642	adc	r12,0
1643
1644	add	r8,16
1645	cmp	r8,rcx
1646	jb	NEAR $L$open_sse_tail_256_hash
1647	paddd	xmm3,XMMWORD[$L$chacha20_consts]
1648	paddd	xmm7,XMMWORD[((160+48))+rbp]
1649	paddd	xmm11,XMMWORD[((160+64))+rbp]
1650	paddd	xmm15,XMMWORD[((160+144))+rbp]
1651	paddd	xmm2,XMMWORD[$L$chacha20_consts]
1652	paddd	xmm6,XMMWORD[((160+48))+rbp]
1653	paddd	xmm10,XMMWORD[((160+64))+rbp]
1654	paddd	xmm14,XMMWORD[((160+128))+rbp]
1655	paddd	xmm1,XMMWORD[$L$chacha20_consts]
1656	paddd	xmm5,XMMWORD[((160+48))+rbp]
1657	paddd	xmm9,XMMWORD[((160+64))+rbp]
1658	paddd	xmm13,XMMWORD[((160+112))+rbp]
1659	paddd	xmm0,XMMWORD[$L$chacha20_consts]
1660	paddd	xmm4,XMMWORD[((160+48))+rbp]
1661	paddd	xmm8,XMMWORD[((160+64))+rbp]
1662	paddd	xmm12,XMMWORD[((160+96))+rbp]
1663	movdqa	XMMWORD[(160+80)+rbp],xmm12
1664	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
1665	pxor	xmm12,xmm3
1666	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
1667	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
1668	pxor	xmm12,xmm7
1669	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
1670	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
1671	pxor	xmm12,xmm11
1672	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
1673	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
1674	pxor	xmm12,xmm15
1675	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
1676	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
1677	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
1678	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
1679	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
1680	pxor	xmm2,xmm3
1681	pxor	xmm6,xmm7
1682	pxor	xmm10,xmm11
1683	pxor	xmm15,xmm14
1684	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
1685	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
1686	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
1687	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
1688	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
1689	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
1690	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
1691	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
1692	pxor	xmm1,xmm3
1693	pxor	xmm5,xmm7
1694	pxor	xmm9,xmm11
1695	pxor	xmm15,xmm13
1696	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
1697	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
1698	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
1699	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
1700
1701	movdqa	xmm12,XMMWORD[((160+80))+rbp]
1702	sub	rbx,12*16
1703	lea	rsi,[192+rsi]
1704	lea	rdi,[192+rdi]
1705
1706
1707$L$open_sse_tail_64_dec_loop:
1708	cmp	rbx,16
1709	jb	NEAR $L$open_sse_tail_16_init
1710	sub	rbx,16
1711	movdqu	xmm3,XMMWORD[rsi]
1712	pxor	xmm0,xmm3
1713	movdqu	XMMWORD[rdi],xmm0
1714	lea	rsi,[16+rsi]
1715	lea	rdi,[16+rdi]
1716	movdqa	xmm0,xmm4
1717	movdqa	xmm4,xmm8
1718	movdqa	xmm8,xmm12
1719	jmp	NEAR $L$open_sse_tail_64_dec_loop
1720$L$open_sse_tail_16_init:
1721	movdqa	xmm1,xmm0
1722
1723
1724$L$open_sse_tail_16:
1725	test	rbx,rbx
1726	jz	NEAR $L$open_sse_finalize
1727
1728
1729
1730	pxor	xmm3,xmm3
1731	lea	rsi,[((-1))+rbx*1+rsi]
1732	mov	r8,rbx
1733$L$open_sse_tail_16_compose:
1734	pslldq	xmm3,1
1735	pinsrb	xmm3,BYTE[rsi],0
1736	sub	rsi,1
1737	sub	r8,1
1738	jnz	NEAR $L$open_sse_tail_16_compose
1739
1740DB	102,73,15,126,221
1741	pextrq	r14,xmm3,1
1742
1743	pxor	xmm3,xmm1
1744
1745
1746$L$open_sse_tail_16_extract:
1747	pextrb	XMMWORD[rdi],xmm3,0
1748	psrldq	xmm3,1
1749	add	rdi,1
1750	sub	rbx,1
1751	jne	NEAR $L$open_sse_tail_16_extract
1752
1753	add	r10,r13
1754	adc	r11,r14
1755	adc	r12,1
1756	mov	rax,QWORD[((0+160+0))+rbp]
1757	mov	r15,rax
1758	mul	r10
1759	mov	r13,rax
1760	mov	r14,rdx
1761	mov	rax,QWORD[((0+160+0))+rbp]
1762	mul	r11
1763	imul	r15,r12
1764	add	r14,rax
1765	adc	r15,rdx
1766	mov	rax,QWORD[((8+160+0))+rbp]
1767	mov	r9,rax
1768	mul	r10
1769	add	r14,rax
1770	adc	rdx,0
1771	mov	r10,rdx
1772	mov	rax,QWORD[((8+160+0))+rbp]
1773	mul	r11
1774	add	r15,rax
1775	adc	rdx,0
1776	imul	r9,r12
1777	add	r15,r10
1778	adc	r9,rdx
1779	mov	r10,r13
1780	mov	r11,r14
1781	mov	r12,r15
1782	and	r12,3
1783	mov	r13,r15
1784	and	r13,-4
1785	mov	r14,r9
1786	shrd	r15,r9,2
1787	shr	r9,2
1788	add	r15,r13
1789	adc	r9,r14
1790	add	r10,r15
1791	adc	r11,r9
1792	adc	r12,0
1793
1794
1795$L$open_sse_finalize:
1796	add	r10,QWORD[((0+160+32))+rbp]
1797	adc	r11,QWORD[((8+160+32))+rbp]
1798	adc	r12,1
1799	mov	rax,QWORD[((0+160+0))+rbp]
1800	mov	r15,rax
1801	mul	r10
1802	mov	r13,rax
1803	mov	r14,rdx
1804	mov	rax,QWORD[((0+160+0))+rbp]
1805	mul	r11
1806	imul	r15,r12
1807	add	r14,rax
1808	adc	r15,rdx
1809	mov	rax,QWORD[((8+160+0))+rbp]
1810	mov	r9,rax
1811	mul	r10
1812	add	r14,rax
1813	adc	rdx,0
1814	mov	r10,rdx
1815	mov	rax,QWORD[((8+160+0))+rbp]
1816	mul	r11
1817	add	r15,rax
1818	adc	rdx,0
1819	imul	r9,r12
1820	add	r15,r10
1821	adc	r9,rdx
1822	mov	r10,r13
1823	mov	r11,r14
1824	mov	r12,r15
1825	and	r12,3
1826	mov	r13,r15
1827	and	r13,-4
1828	mov	r14,r9
1829	shrd	r15,r9,2
1830	shr	r9,2
1831	add	r15,r13
1832	adc	r9,r14
1833	add	r10,r15
1834	adc	r11,r9
1835	adc	r12,0
1836
1837
1838	mov	r13,r10
1839	mov	r14,r11
1840	mov	r15,r12
1841	sub	r10,-5
1842	sbb	r11,-1
1843	sbb	r12,3
1844	cmovc	r10,r13
1845	cmovc	r11,r14
1846	cmovc	r12,r15
1847
1848	add	r10,QWORD[((0+160+16))+rbp]
1849	adc	r11,QWORD[((8+160+16))+rbp]
1850
1851	movaps	xmm6,XMMWORD[((0+0))+rbp]
1852	movaps	xmm7,XMMWORD[((16+0))+rbp]
1853	movaps	xmm8,XMMWORD[((32+0))+rbp]
1854	movaps	xmm9,XMMWORD[((48+0))+rbp]
1855	movaps	xmm10,XMMWORD[((64+0))+rbp]
1856	movaps	xmm11,XMMWORD[((80+0))+rbp]
1857	movaps	xmm12,XMMWORD[((96+0))+rbp]
1858	movaps	xmm13,XMMWORD[((112+0))+rbp]
1859	movaps	xmm14,XMMWORD[((128+0))+rbp]
1860	movaps	xmm15,XMMWORD[((144+0))+rbp]
1861
1862
1863	add	rsp,288 + 160 + 32
1864
1865
1866	pop	r9
1867
1868	mov	QWORD[r9],r10
1869	mov	QWORD[8+r9],r11
1870	pop	r15
1871
1872	pop	r14
1873
1874	pop	r13
1875
1876	pop	r12
1877
1878	pop	rbx
1879
1880	pop	rbp
1881
1882	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
1883	mov	rsi,QWORD[16+rsp]
1884	DB	0F3h,0C3h		;repret
1885
1886$L$open_sse_128:
1887
1888	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
1889	movdqa	xmm1,xmm0
1890	movdqa	xmm2,xmm0
1891	movdqu	xmm4,XMMWORD[r9]
1892	movdqa	xmm5,xmm4
1893	movdqa	xmm6,xmm4
1894	movdqu	xmm8,XMMWORD[16+r9]
1895	movdqa	xmm9,xmm8
1896	movdqa	xmm10,xmm8
1897	movdqu	xmm12,XMMWORD[32+r9]
1898	movdqa	xmm13,xmm12
1899	paddd	xmm13,XMMWORD[$L$sse_inc]
1900	movdqa	xmm14,xmm13
1901	paddd	xmm14,XMMWORD[$L$sse_inc]
1902	movdqa	xmm7,xmm4
1903	movdqa	xmm11,xmm8
1904	movdqa	xmm15,xmm13
1905	mov	r10,10
1906
1907$L$open_sse_128_rounds:
1908	paddd	xmm0,xmm4
1909	pxor	xmm12,xmm0
1910	pshufb	xmm12,XMMWORD[$L$rol16]
1911	paddd	xmm8,xmm12
1912	pxor	xmm4,xmm8
1913	movdqa	xmm3,xmm4
1914	pslld	xmm3,12
1915	psrld	xmm4,20
1916	pxor	xmm4,xmm3
1917	paddd	xmm0,xmm4
1918	pxor	xmm12,xmm0
1919	pshufb	xmm12,XMMWORD[$L$rol8]
1920	paddd	xmm8,xmm12
1921	pxor	xmm4,xmm8
1922	movdqa	xmm3,xmm4
1923	pslld	xmm3,7
1924	psrld	xmm4,25
1925	pxor	xmm4,xmm3
1926DB	102,15,58,15,228,4
1927DB	102,69,15,58,15,192,8
1928DB	102,69,15,58,15,228,12
1929	paddd	xmm1,xmm5
1930	pxor	xmm13,xmm1
1931	pshufb	xmm13,XMMWORD[$L$rol16]
1932	paddd	xmm9,xmm13
1933	pxor	xmm5,xmm9
1934	movdqa	xmm3,xmm5
1935	pslld	xmm3,12
1936	psrld	xmm5,20
1937	pxor	xmm5,xmm3
1938	paddd	xmm1,xmm5
1939	pxor	xmm13,xmm1
1940	pshufb	xmm13,XMMWORD[$L$rol8]
1941	paddd	xmm9,xmm13
1942	pxor	xmm5,xmm9
1943	movdqa	xmm3,xmm5
1944	pslld	xmm3,7
1945	psrld	xmm5,25
1946	pxor	xmm5,xmm3
1947DB	102,15,58,15,237,4
1948DB	102,69,15,58,15,201,8
1949DB	102,69,15,58,15,237,12
1950	paddd	xmm2,xmm6
1951	pxor	xmm14,xmm2
1952	pshufb	xmm14,XMMWORD[$L$rol16]
1953	paddd	xmm10,xmm14
1954	pxor	xmm6,xmm10
1955	movdqa	xmm3,xmm6
1956	pslld	xmm3,12
1957	psrld	xmm6,20
1958	pxor	xmm6,xmm3
1959	paddd	xmm2,xmm6
1960	pxor	xmm14,xmm2
1961	pshufb	xmm14,XMMWORD[$L$rol8]
1962	paddd	xmm10,xmm14
1963	pxor	xmm6,xmm10
1964	movdqa	xmm3,xmm6
1965	pslld	xmm3,7
1966	psrld	xmm6,25
1967	pxor	xmm6,xmm3
1968DB	102,15,58,15,246,4
1969DB	102,69,15,58,15,210,8
1970DB	102,69,15,58,15,246,12
1971	paddd	xmm0,xmm4
1972	pxor	xmm12,xmm0
1973	pshufb	xmm12,XMMWORD[$L$rol16]
1974	paddd	xmm8,xmm12
1975	pxor	xmm4,xmm8
1976	movdqa	xmm3,xmm4
1977	pslld	xmm3,12
1978	psrld	xmm4,20
1979	pxor	xmm4,xmm3
1980	paddd	xmm0,xmm4
1981	pxor	xmm12,xmm0
1982	pshufb	xmm12,XMMWORD[$L$rol8]
1983	paddd	xmm8,xmm12
1984	pxor	xmm4,xmm8
1985	movdqa	xmm3,xmm4
1986	pslld	xmm3,7
1987	psrld	xmm4,25
1988	pxor	xmm4,xmm3
1989DB	102,15,58,15,228,12
1990DB	102,69,15,58,15,192,8
1991DB	102,69,15,58,15,228,4
1992	paddd	xmm1,xmm5
1993	pxor	xmm13,xmm1
1994	pshufb	xmm13,XMMWORD[$L$rol16]
1995	paddd	xmm9,xmm13
1996	pxor	xmm5,xmm9
1997	movdqa	xmm3,xmm5
1998	pslld	xmm3,12
1999	psrld	xmm5,20
2000	pxor	xmm5,xmm3
2001	paddd	xmm1,xmm5
2002	pxor	xmm13,xmm1
2003	pshufb	xmm13,XMMWORD[$L$rol8]
2004	paddd	xmm9,xmm13
2005	pxor	xmm5,xmm9
2006	movdqa	xmm3,xmm5
2007	pslld	xmm3,7
2008	psrld	xmm5,25
2009	pxor	xmm5,xmm3
2010DB	102,15,58,15,237,12
2011DB	102,69,15,58,15,201,8
2012DB	102,69,15,58,15,237,4
2013	paddd	xmm2,xmm6
2014	pxor	xmm14,xmm2
2015	pshufb	xmm14,XMMWORD[$L$rol16]
2016	paddd	xmm10,xmm14
2017	pxor	xmm6,xmm10
2018	movdqa	xmm3,xmm6
2019	pslld	xmm3,12
2020	psrld	xmm6,20
2021	pxor	xmm6,xmm3
2022	paddd	xmm2,xmm6
2023	pxor	xmm14,xmm2
2024	pshufb	xmm14,XMMWORD[$L$rol8]
2025	paddd	xmm10,xmm14
2026	pxor	xmm6,xmm10
2027	movdqa	xmm3,xmm6
2028	pslld	xmm3,7
2029	psrld	xmm6,25
2030	pxor	xmm6,xmm3
2031DB	102,15,58,15,246,12
2032DB	102,69,15,58,15,210,8
2033DB	102,69,15,58,15,246,4
2034
2035	dec	r10
2036	jnz	NEAR $L$open_sse_128_rounds
2037	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2038	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2039	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2040	paddd	xmm4,xmm7
2041	paddd	xmm5,xmm7
2042	paddd	xmm6,xmm7
2043	paddd	xmm9,xmm11
2044	paddd	xmm10,xmm11
2045	paddd	xmm13,xmm15
2046	paddd	xmm15,XMMWORD[$L$sse_inc]
2047	paddd	xmm14,xmm15
2048
2049	pand	xmm0,XMMWORD[$L$clamp]
2050	movdqa	XMMWORD[(160+0)+rbp],xmm0
2051	movdqa	XMMWORD[(160+16)+rbp],xmm4
2052
2053	mov	r8,r8
2054	call	poly_hash_ad_internal
2055$L$open_sse_128_xor_hash:
2056	cmp	rbx,16
2057	jb	NEAR $L$open_sse_tail_16
2058	sub	rbx,16
2059	add	r10,QWORD[((0+0))+rsi]
2060	adc	r11,QWORD[((8+0))+rsi]
2061	adc	r12,1
2062
2063
2064	movdqu	xmm3,XMMWORD[rsi]
2065	pxor	xmm1,xmm3
2066	movdqu	XMMWORD[rdi],xmm1
2067	lea	rsi,[16+rsi]
2068	lea	rdi,[16+rdi]
2069	mov	rax,QWORD[((0+160+0))+rbp]
2070	mov	r15,rax
2071	mul	r10
2072	mov	r13,rax
2073	mov	r14,rdx
2074	mov	rax,QWORD[((0+160+0))+rbp]
2075	mul	r11
2076	imul	r15,r12
2077	add	r14,rax
2078	adc	r15,rdx
2079	mov	rax,QWORD[((8+160+0))+rbp]
2080	mov	r9,rax
2081	mul	r10
2082	add	r14,rax
2083	adc	rdx,0
2084	mov	r10,rdx
2085	mov	rax,QWORD[((8+160+0))+rbp]
2086	mul	r11
2087	add	r15,rax
2088	adc	rdx,0
2089	imul	r9,r12
2090	add	r15,r10
2091	adc	r9,rdx
2092	mov	r10,r13
2093	mov	r11,r14
2094	mov	r12,r15
2095	and	r12,3
2096	mov	r13,r15
2097	and	r13,-4
2098	mov	r14,r9
2099	shrd	r15,r9,2
2100	shr	r9,2
2101	add	r15,r13
2102	adc	r9,r14
2103	add	r10,r15
2104	adc	r11,r9
2105	adc	r12,0
2106
2107
2108	movdqa	xmm1,xmm5
2109	movdqa	xmm5,xmm9
2110	movdqa	xmm9,xmm13
2111	movdqa	xmm13,xmm2
2112	movdqa	xmm2,xmm6
2113	movdqa	xmm6,xmm10
2114	movdqa	xmm10,xmm14
2115	jmp	NEAR $L$open_sse_128_xor_hash
2116$L$SEH_end_GFp_chacha20_poly1305_open:
2117
2118
2119
2120
2121
2122
2123
2124global	GFp_chacha20_poly1305_seal
2125
2126ALIGN	64
2127GFp_chacha20_poly1305_seal:
2128	mov	QWORD[8+rsp],rdi	;WIN64 prologue
2129	mov	QWORD[16+rsp],rsi
2130	mov	rax,rsp
2131$L$SEH_begin_GFp_chacha20_poly1305_seal:
2132	mov	rdi,rcx
2133	mov	rsi,rdx
2134	mov	rdx,r8
2135	mov	rcx,r9
2136	mov	r8,QWORD[40+rsp]
2137	mov	r9,QWORD[48+rsp]
2138
2139
2140
2141	push	rbp
2142
2143	push	rbx
2144
2145	push	r12
2146
2147	push	r13
2148
2149	push	r14
2150
2151	push	r15
2152
2153
2154
2155	push	r9
2156
2157	sub	rsp,288 + 160 + 32
2158
2159	lea	rbp,[32+rsp]
2160	and	rbp,-32
2161
2162	movaps	XMMWORD[(0+0)+rbp],xmm6
2163	movaps	XMMWORD[(16+0)+rbp],xmm7
2164	movaps	XMMWORD[(32+0)+rbp],xmm8
2165	movaps	XMMWORD[(48+0)+rbp],xmm9
2166	movaps	XMMWORD[(64+0)+rbp],xmm10
2167	movaps	XMMWORD[(80+0)+rbp],xmm11
2168	movaps	XMMWORD[(96+0)+rbp],xmm12
2169	movaps	XMMWORD[(112+0)+rbp],xmm13
2170	movaps	XMMWORD[(128+0)+rbp],xmm14
2171	movaps	XMMWORD[(144+0)+rbp],xmm15
2172
2173	mov	rbx,QWORD[56+r9]
2174	add	rbx,rdx
2175	mov	QWORD[((0+160+32))+rbp],r8
2176	mov	QWORD[((8+160+32))+rbp],rbx
2177	mov	rbx,rdx
2178
2179	mov	eax,DWORD[((GFp_ia32cap_P+8))]
2180	and	eax,288
2181	xor	eax,288
2182	jz	NEAR chacha20_poly1305_seal_avx2
2183
2184	cmp	rbx,128
2185	jbe	NEAR $L$seal_sse_128
2186
2187	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2188	movdqu	xmm4,XMMWORD[r9]
2189	movdqu	xmm8,XMMWORD[16+r9]
2190	movdqu	xmm12,XMMWORD[32+r9]
2191
2192	movdqa	xmm1,xmm0
2193	movdqa	xmm2,xmm0
2194	movdqa	xmm3,xmm0
2195	movdqa	xmm5,xmm4
2196	movdqa	xmm6,xmm4
2197	movdqa	xmm7,xmm4
2198	movdqa	xmm9,xmm8
2199	movdqa	xmm10,xmm8
2200	movdqa	xmm11,xmm8
2201	movdqa	xmm15,xmm12
2202	paddd	xmm12,XMMWORD[$L$sse_inc]
2203	movdqa	xmm14,xmm12
2204	paddd	xmm12,XMMWORD[$L$sse_inc]
2205	movdqa	xmm13,xmm12
2206	paddd	xmm12,XMMWORD[$L$sse_inc]
2207
2208	movdqa	XMMWORD[(160+48)+rbp],xmm4
2209	movdqa	XMMWORD[(160+64)+rbp],xmm8
2210	movdqa	XMMWORD[(160+96)+rbp],xmm12
2211	movdqa	XMMWORD[(160+112)+rbp],xmm13
2212	movdqa	XMMWORD[(160+128)+rbp],xmm14
2213	movdqa	XMMWORD[(160+144)+rbp],xmm15
2214	mov	r10,10
2215$L$seal_sse_init_rounds:
2216	movdqa	XMMWORD[(160+80)+rbp],xmm8
2217	movdqa	xmm8,XMMWORD[$L$rol16]
2218	paddd	xmm3,xmm7
2219	paddd	xmm2,xmm6
2220	paddd	xmm1,xmm5
2221	paddd	xmm0,xmm4
2222	pxor	xmm15,xmm3
2223	pxor	xmm14,xmm2
2224	pxor	xmm13,xmm1
2225	pxor	xmm12,xmm0
2226DB	102,69,15,56,0,248
2227DB	102,69,15,56,0,240
2228DB	102,69,15,56,0,232
2229DB	102,69,15,56,0,224
2230	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2231	paddd	xmm11,xmm15
2232	paddd	xmm10,xmm14
2233	paddd	xmm9,xmm13
2234	paddd	xmm8,xmm12
2235	pxor	xmm7,xmm11
2236	pxor	xmm6,xmm10
2237	pxor	xmm5,xmm9
2238	pxor	xmm4,xmm8
2239	movdqa	XMMWORD[(160+80)+rbp],xmm8
2240	movdqa	xmm8,xmm7
2241	psrld	xmm8,20
2242	pslld	xmm7,32-20
2243	pxor	xmm7,xmm8
2244	movdqa	xmm8,xmm6
2245	psrld	xmm8,20
2246	pslld	xmm6,32-20
2247	pxor	xmm6,xmm8
2248	movdqa	xmm8,xmm5
2249	psrld	xmm8,20
2250	pslld	xmm5,32-20
2251	pxor	xmm5,xmm8
2252	movdqa	xmm8,xmm4
2253	psrld	xmm8,20
2254	pslld	xmm4,32-20
2255	pxor	xmm4,xmm8
2256	movdqa	xmm8,XMMWORD[$L$rol8]
2257	paddd	xmm3,xmm7
2258	paddd	xmm2,xmm6
2259	paddd	xmm1,xmm5
2260	paddd	xmm0,xmm4
2261	pxor	xmm15,xmm3
2262	pxor	xmm14,xmm2
2263	pxor	xmm13,xmm1
2264	pxor	xmm12,xmm0
2265DB	102,69,15,56,0,248
2266DB	102,69,15,56,0,240
2267DB	102,69,15,56,0,232
2268DB	102,69,15,56,0,224
2269	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2270	paddd	xmm11,xmm15
2271	paddd	xmm10,xmm14
2272	paddd	xmm9,xmm13
2273	paddd	xmm8,xmm12
2274	pxor	xmm7,xmm11
2275	pxor	xmm6,xmm10
2276	pxor	xmm5,xmm9
2277	pxor	xmm4,xmm8
2278	movdqa	XMMWORD[(160+80)+rbp],xmm8
2279	movdqa	xmm8,xmm7
2280	psrld	xmm8,25
2281	pslld	xmm7,32-25
2282	pxor	xmm7,xmm8
2283	movdqa	xmm8,xmm6
2284	psrld	xmm8,25
2285	pslld	xmm6,32-25
2286	pxor	xmm6,xmm8
2287	movdqa	xmm8,xmm5
2288	psrld	xmm8,25
2289	pslld	xmm5,32-25
2290	pxor	xmm5,xmm8
2291	movdqa	xmm8,xmm4
2292	psrld	xmm8,25
2293	pslld	xmm4,32-25
2294	pxor	xmm4,xmm8
2295	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2296DB	102,15,58,15,255,4
2297DB	102,69,15,58,15,219,8
2298DB	102,69,15,58,15,255,12
2299DB	102,15,58,15,246,4
2300DB	102,69,15,58,15,210,8
2301DB	102,69,15,58,15,246,12
2302DB	102,15,58,15,237,4
2303DB	102,69,15,58,15,201,8
2304DB	102,69,15,58,15,237,12
2305DB	102,15,58,15,228,4
2306DB	102,69,15,58,15,192,8
2307DB	102,69,15,58,15,228,12
2308	movdqa	XMMWORD[(160+80)+rbp],xmm8
2309	movdqa	xmm8,XMMWORD[$L$rol16]
2310	paddd	xmm3,xmm7
2311	paddd	xmm2,xmm6
2312	paddd	xmm1,xmm5
2313	paddd	xmm0,xmm4
2314	pxor	xmm15,xmm3
2315	pxor	xmm14,xmm2
2316	pxor	xmm13,xmm1
2317	pxor	xmm12,xmm0
2318DB	102,69,15,56,0,248
2319DB	102,69,15,56,0,240
2320DB	102,69,15,56,0,232
2321DB	102,69,15,56,0,224
2322	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2323	paddd	xmm11,xmm15
2324	paddd	xmm10,xmm14
2325	paddd	xmm9,xmm13
2326	paddd	xmm8,xmm12
2327	pxor	xmm7,xmm11
2328	pxor	xmm6,xmm10
2329	pxor	xmm5,xmm9
2330	pxor	xmm4,xmm8
2331	movdqa	XMMWORD[(160+80)+rbp],xmm8
2332	movdqa	xmm8,xmm7
2333	psrld	xmm8,20
2334	pslld	xmm7,32-20
2335	pxor	xmm7,xmm8
2336	movdqa	xmm8,xmm6
2337	psrld	xmm8,20
2338	pslld	xmm6,32-20
2339	pxor	xmm6,xmm8
2340	movdqa	xmm8,xmm5
2341	psrld	xmm8,20
2342	pslld	xmm5,32-20
2343	pxor	xmm5,xmm8
2344	movdqa	xmm8,xmm4
2345	psrld	xmm8,20
2346	pslld	xmm4,32-20
2347	pxor	xmm4,xmm8
2348	movdqa	xmm8,XMMWORD[$L$rol8]
2349	paddd	xmm3,xmm7
2350	paddd	xmm2,xmm6
2351	paddd	xmm1,xmm5
2352	paddd	xmm0,xmm4
2353	pxor	xmm15,xmm3
2354	pxor	xmm14,xmm2
2355	pxor	xmm13,xmm1
2356	pxor	xmm12,xmm0
2357DB	102,69,15,56,0,248
2358DB	102,69,15,56,0,240
2359DB	102,69,15,56,0,232
2360DB	102,69,15,56,0,224
2361	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2362	paddd	xmm11,xmm15
2363	paddd	xmm10,xmm14
2364	paddd	xmm9,xmm13
2365	paddd	xmm8,xmm12
2366	pxor	xmm7,xmm11
2367	pxor	xmm6,xmm10
2368	pxor	xmm5,xmm9
2369	pxor	xmm4,xmm8
2370	movdqa	XMMWORD[(160+80)+rbp],xmm8
2371	movdqa	xmm8,xmm7
2372	psrld	xmm8,25
2373	pslld	xmm7,32-25
2374	pxor	xmm7,xmm8
2375	movdqa	xmm8,xmm6
2376	psrld	xmm8,25
2377	pslld	xmm6,32-25
2378	pxor	xmm6,xmm8
2379	movdqa	xmm8,xmm5
2380	psrld	xmm8,25
2381	pslld	xmm5,32-25
2382	pxor	xmm5,xmm8
2383	movdqa	xmm8,xmm4
2384	psrld	xmm8,25
2385	pslld	xmm4,32-25
2386	pxor	xmm4,xmm8
2387	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2388DB	102,15,58,15,255,12
2389DB	102,69,15,58,15,219,8
2390DB	102,69,15,58,15,255,4
2391DB	102,15,58,15,246,12
2392DB	102,69,15,58,15,210,8
2393DB	102,69,15,58,15,246,4
2394DB	102,15,58,15,237,12
2395DB	102,69,15,58,15,201,8
2396DB	102,69,15,58,15,237,4
2397DB	102,15,58,15,228,12
2398DB	102,69,15,58,15,192,8
2399DB	102,69,15,58,15,228,4
2400
2401	dec	r10
2402	jnz	NEAR $L$seal_sse_init_rounds
2403	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2404	paddd	xmm7,XMMWORD[((160+48))+rbp]
2405	paddd	xmm11,XMMWORD[((160+64))+rbp]
2406	paddd	xmm15,XMMWORD[((160+144))+rbp]
2407	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2408	paddd	xmm6,XMMWORD[((160+48))+rbp]
2409	paddd	xmm10,XMMWORD[((160+64))+rbp]
2410	paddd	xmm14,XMMWORD[((160+128))+rbp]
2411	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2412	paddd	xmm5,XMMWORD[((160+48))+rbp]
2413	paddd	xmm9,XMMWORD[((160+64))+rbp]
2414	paddd	xmm13,XMMWORD[((160+112))+rbp]
2415	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2416	paddd	xmm4,XMMWORD[((160+48))+rbp]
2417	paddd	xmm8,XMMWORD[((160+64))+rbp]
2418	paddd	xmm12,XMMWORD[((160+96))+rbp]
2419
2420
2421	pand	xmm3,XMMWORD[$L$clamp]
2422	movdqa	XMMWORD[(160+0)+rbp],xmm3
2423	movdqa	XMMWORD[(160+16)+rbp],xmm7
2424
2425	mov	r8,r8
2426	call	poly_hash_ad_internal
2427	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
2428	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
2429	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
2430	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
2431	pxor	xmm2,xmm3
2432	pxor	xmm6,xmm7
2433	pxor	xmm10,xmm11
2434	pxor	xmm15,xmm14
2435	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
2436	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
2437	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
2438	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
2439	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2440	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2441	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2442	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2443	pxor	xmm1,xmm3
2444	pxor	xmm5,xmm7
2445	pxor	xmm9,xmm11
2446	pxor	xmm15,xmm13
2447	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
2448	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
2449	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
2450	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2451
2452	cmp	rbx,12*16
2453	ja	NEAR $L$seal_sse_main_init
2454	mov	rcx,8*16
2455	sub	rbx,8*16
2456	lea	rsi,[128+rsi]
2457	jmp	NEAR $L$seal_sse_128_tail_hash
2458$L$seal_sse_main_init:
2459	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2460	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2461	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2462	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2463	pxor	xmm0,xmm3
2464	pxor	xmm4,xmm7
2465	pxor	xmm8,xmm11
2466	pxor	xmm15,xmm12
2467	movdqu	XMMWORD[(0 + 128)+rdi],xmm0
2468	movdqu	XMMWORD[(16 + 128)+rdi],xmm4
2469	movdqu	XMMWORD[(32 + 128)+rdi],xmm8
2470	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2471
2472	mov	rcx,12*16
2473	sub	rbx,12*16
2474	lea	rsi,[192+rsi]
2475	mov	rcx,2
2476	mov	r8,8
2477	cmp	rbx,4*16
2478	jbe	NEAR $L$seal_sse_tail_64
2479	cmp	rbx,8*16
2480	jbe	NEAR $L$seal_sse_tail_128
2481	cmp	rbx,12*16
2482	jbe	NEAR $L$seal_sse_tail_192
2483
2484$L$seal_sse_main_loop:
2485	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2486	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2487	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2488	movdqa	xmm1,xmm0
2489	movdqa	xmm5,xmm4
2490	movdqa	xmm9,xmm8
2491	movdqa	xmm2,xmm0
2492	movdqa	xmm6,xmm4
2493	movdqa	xmm10,xmm8
2494	movdqa	xmm3,xmm0
2495	movdqa	xmm7,xmm4
2496	movdqa	xmm11,xmm8
2497	movdqa	xmm15,XMMWORD[((160+96))+rbp]
2498	paddd	xmm15,XMMWORD[$L$sse_inc]
2499	movdqa	xmm14,xmm15
2500	paddd	xmm14,XMMWORD[$L$sse_inc]
2501	movdqa	xmm13,xmm14
2502	paddd	xmm13,XMMWORD[$L$sse_inc]
2503	movdqa	xmm12,xmm13
2504	paddd	xmm12,XMMWORD[$L$sse_inc]
2505	movdqa	XMMWORD[(160+96)+rbp],xmm12
2506	movdqa	XMMWORD[(160+112)+rbp],xmm13
2507	movdqa	XMMWORD[(160+128)+rbp],xmm14
2508	movdqa	XMMWORD[(160+144)+rbp],xmm15
2509
2510ALIGN	32
2511$L$seal_sse_main_rounds:
2512	movdqa	XMMWORD[(160+80)+rbp],xmm8
2513	movdqa	xmm8,XMMWORD[$L$rol16]
2514	paddd	xmm3,xmm7
2515	paddd	xmm2,xmm6
2516	paddd	xmm1,xmm5
2517	paddd	xmm0,xmm4
2518	pxor	xmm15,xmm3
2519	pxor	xmm14,xmm2
2520	pxor	xmm13,xmm1
2521	pxor	xmm12,xmm0
2522DB	102,69,15,56,0,248
2523DB	102,69,15,56,0,240
2524DB	102,69,15,56,0,232
2525DB	102,69,15,56,0,224
2526	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2527	paddd	xmm11,xmm15
2528	paddd	xmm10,xmm14
2529	paddd	xmm9,xmm13
2530	paddd	xmm8,xmm12
2531	pxor	xmm7,xmm11
2532	add	r10,QWORD[((0+0))+rdi]
2533	adc	r11,QWORD[((8+0))+rdi]
2534	adc	r12,1
2535	pxor	xmm6,xmm10
2536	pxor	xmm5,xmm9
2537	pxor	xmm4,xmm8
2538	movdqa	XMMWORD[(160+80)+rbp],xmm8
2539	movdqa	xmm8,xmm7
2540	psrld	xmm8,20
2541	pslld	xmm7,32-20
2542	pxor	xmm7,xmm8
2543	movdqa	xmm8,xmm6
2544	psrld	xmm8,20
2545	pslld	xmm6,32-20
2546	pxor	xmm6,xmm8
2547	movdqa	xmm8,xmm5
2548	psrld	xmm8,20
2549	pslld	xmm5,32-20
2550	pxor	xmm5,xmm8
2551	movdqa	xmm8,xmm4
2552	psrld	xmm8,20
2553	pslld	xmm4,32-20
2554	pxor	xmm4,xmm8
2555	mov	rax,QWORD[((0+160+0))+rbp]
2556	mov	r15,rax
2557	mul	r10
2558	mov	r13,rax
2559	mov	r14,rdx
2560	mov	rax,QWORD[((0+160+0))+rbp]
2561	mul	r11
2562	imul	r15,r12
2563	add	r14,rax
2564	adc	r15,rdx
2565	movdqa	xmm8,XMMWORD[$L$rol8]
2566	paddd	xmm3,xmm7
2567	paddd	xmm2,xmm6
2568	paddd	xmm1,xmm5
2569	paddd	xmm0,xmm4
2570	pxor	xmm15,xmm3
2571	pxor	xmm14,xmm2
2572	pxor	xmm13,xmm1
2573	pxor	xmm12,xmm0
2574DB	102,69,15,56,0,248
2575DB	102,69,15,56,0,240
2576DB	102,69,15,56,0,232
2577DB	102,69,15,56,0,224
2578	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2579	paddd	xmm11,xmm15
2580	paddd	xmm10,xmm14
2581	paddd	xmm9,xmm13
2582	paddd	xmm8,xmm12
2583	pxor	xmm7,xmm11
2584	pxor	xmm6,xmm10
2585	mov	rax,QWORD[((8+160+0))+rbp]
2586	mov	r9,rax
2587	mul	r10
2588	add	r14,rax
2589	adc	rdx,0
2590	mov	r10,rdx
2591	mov	rax,QWORD[((8+160+0))+rbp]
2592	mul	r11
2593	add	r15,rax
2594	adc	rdx,0
2595	pxor	xmm5,xmm9
2596	pxor	xmm4,xmm8
2597	movdqa	XMMWORD[(160+80)+rbp],xmm8
2598	movdqa	xmm8,xmm7
2599	psrld	xmm8,25
2600	pslld	xmm7,32-25
2601	pxor	xmm7,xmm8
2602	movdqa	xmm8,xmm6
2603	psrld	xmm8,25
2604	pslld	xmm6,32-25
2605	pxor	xmm6,xmm8
2606	movdqa	xmm8,xmm5
2607	psrld	xmm8,25
2608	pslld	xmm5,32-25
2609	pxor	xmm5,xmm8
2610	movdqa	xmm8,xmm4
2611	psrld	xmm8,25
2612	pslld	xmm4,32-25
2613	pxor	xmm4,xmm8
2614	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2615	imul	r9,r12
2616	add	r15,r10
2617	adc	r9,rdx
2618DB	102,15,58,15,255,4
2619DB	102,69,15,58,15,219,8
2620DB	102,69,15,58,15,255,12
2621DB	102,15,58,15,246,4
2622DB	102,69,15,58,15,210,8
2623DB	102,69,15,58,15,246,12
2624DB	102,15,58,15,237,4
2625DB	102,69,15,58,15,201,8
2626DB	102,69,15,58,15,237,12
2627DB	102,15,58,15,228,4
2628DB	102,69,15,58,15,192,8
2629DB	102,69,15,58,15,228,12
2630	movdqa	XMMWORD[(160+80)+rbp],xmm8
2631	movdqa	xmm8,XMMWORD[$L$rol16]
2632	paddd	xmm3,xmm7
2633	paddd	xmm2,xmm6
2634	paddd	xmm1,xmm5
2635	paddd	xmm0,xmm4
2636	pxor	xmm15,xmm3
2637	pxor	xmm14,xmm2
2638	mov	r10,r13
2639	mov	r11,r14
2640	mov	r12,r15
2641	and	r12,3
2642	mov	r13,r15
2643	and	r13,-4
2644	mov	r14,r9
2645	shrd	r15,r9,2
2646	shr	r9,2
2647	add	r15,r13
2648	adc	r9,r14
2649	add	r10,r15
2650	adc	r11,r9
2651	adc	r12,0
2652	pxor	xmm13,xmm1
2653	pxor	xmm12,xmm0
2654DB	102,69,15,56,0,248
2655DB	102,69,15,56,0,240
2656DB	102,69,15,56,0,232
2657DB	102,69,15,56,0,224
2658	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2659	paddd	xmm11,xmm15
2660	paddd	xmm10,xmm14
2661	paddd	xmm9,xmm13
2662	paddd	xmm8,xmm12
2663	pxor	xmm7,xmm11
2664	pxor	xmm6,xmm10
2665	pxor	xmm5,xmm9
2666	pxor	xmm4,xmm8
2667	movdqa	XMMWORD[(160+80)+rbp],xmm8
2668	movdqa	xmm8,xmm7
2669	psrld	xmm8,20
2670	pslld	xmm7,32-20
2671	pxor	xmm7,xmm8
2672	movdqa	xmm8,xmm6
2673	psrld	xmm8,20
2674	pslld	xmm6,32-20
2675	pxor	xmm6,xmm8
2676	movdqa	xmm8,xmm5
2677	psrld	xmm8,20
2678	pslld	xmm5,32-20
2679	pxor	xmm5,xmm8
2680	movdqa	xmm8,xmm4
2681	psrld	xmm8,20
2682	pslld	xmm4,32-20
2683	pxor	xmm4,xmm8
2684	movdqa	xmm8,XMMWORD[$L$rol8]
2685	paddd	xmm3,xmm7
2686	paddd	xmm2,xmm6
2687	paddd	xmm1,xmm5
2688	paddd	xmm0,xmm4
2689	pxor	xmm15,xmm3
2690	pxor	xmm14,xmm2
2691	pxor	xmm13,xmm1
2692	pxor	xmm12,xmm0
2693DB	102,69,15,56,0,248
2694DB	102,69,15,56,0,240
2695DB	102,69,15,56,0,232
2696DB	102,69,15,56,0,224
2697	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2698	paddd	xmm11,xmm15
2699	paddd	xmm10,xmm14
2700	paddd	xmm9,xmm13
2701	paddd	xmm8,xmm12
2702	pxor	xmm7,xmm11
2703	pxor	xmm6,xmm10
2704	pxor	xmm5,xmm9
2705	pxor	xmm4,xmm8
2706	movdqa	XMMWORD[(160+80)+rbp],xmm8
2707	movdqa	xmm8,xmm7
2708	psrld	xmm8,25
2709	pslld	xmm7,32-25
2710	pxor	xmm7,xmm8
2711	movdqa	xmm8,xmm6
2712	psrld	xmm8,25
2713	pslld	xmm6,32-25
2714	pxor	xmm6,xmm8
2715	movdqa	xmm8,xmm5
2716	psrld	xmm8,25
2717	pslld	xmm5,32-25
2718	pxor	xmm5,xmm8
2719	movdqa	xmm8,xmm4
2720	psrld	xmm8,25
2721	pslld	xmm4,32-25
2722	pxor	xmm4,xmm8
2723	movdqa	xmm8,XMMWORD[((160+80))+rbp]
2724DB	102,15,58,15,255,12
2725DB	102,69,15,58,15,219,8
2726DB	102,69,15,58,15,255,4
2727DB	102,15,58,15,246,12
2728DB	102,69,15,58,15,210,8
2729DB	102,69,15,58,15,246,4
2730DB	102,15,58,15,237,12
2731DB	102,69,15,58,15,201,8
2732DB	102,69,15,58,15,237,4
2733DB	102,15,58,15,228,12
2734DB	102,69,15,58,15,192,8
2735DB	102,69,15,58,15,228,4
2736
2737	lea	rdi,[16+rdi]
2738	dec	r8
2739	jge	NEAR $L$seal_sse_main_rounds
2740	add	r10,QWORD[((0+0))+rdi]
2741	adc	r11,QWORD[((8+0))+rdi]
2742	adc	r12,1
2743	mov	rax,QWORD[((0+160+0))+rbp]
2744	mov	r15,rax
2745	mul	r10
2746	mov	r13,rax
2747	mov	r14,rdx
2748	mov	rax,QWORD[((0+160+0))+rbp]
2749	mul	r11
2750	imul	r15,r12
2751	add	r14,rax
2752	adc	r15,rdx
2753	mov	rax,QWORD[((8+160+0))+rbp]
2754	mov	r9,rax
2755	mul	r10
2756	add	r14,rax
2757	adc	rdx,0
2758	mov	r10,rdx
2759	mov	rax,QWORD[((8+160+0))+rbp]
2760	mul	r11
2761	add	r15,rax
2762	adc	rdx,0
2763	imul	r9,r12
2764	add	r15,r10
2765	adc	r9,rdx
2766	mov	r10,r13
2767	mov	r11,r14
2768	mov	r12,r15
2769	and	r12,3
2770	mov	r13,r15
2771	and	r13,-4
2772	mov	r14,r9
2773	shrd	r15,r9,2
2774	shr	r9,2
2775	add	r15,r13
2776	adc	r9,r14
2777	add	r10,r15
2778	adc	r11,r9
2779	adc	r12,0
2780
2781	lea	rdi,[16+rdi]
2782	dec	rcx
2783	jg	NEAR $L$seal_sse_main_rounds
2784	paddd	xmm3,XMMWORD[$L$chacha20_consts]
2785	paddd	xmm7,XMMWORD[((160+48))+rbp]
2786	paddd	xmm11,XMMWORD[((160+64))+rbp]
2787	paddd	xmm15,XMMWORD[((160+144))+rbp]
2788	paddd	xmm2,XMMWORD[$L$chacha20_consts]
2789	paddd	xmm6,XMMWORD[((160+48))+rbp]
2790	paddd	xmm10,XMMWORD[((160+64))+rbp]
2791	paddd	xmm14,XMMWORD[((160+128))+rbp]
2792	paddd	xmm1,XMMWORD[$L$chacha20_consts]
2793	paddd	xmm5,XMMWORD[((160+48))+rbp]
2794	paddd	xmm9,XMMWORD[((160+64))+rbp]
2795	paddd	xmm13,XMMWORD[((160+112))+rbp]
2796	paddd	xmm0,XMMWORD[$L$chacha20_consts]
2797	paddd	xmm4,XMMWORD[((160+48))+rbp]
2798	paddd	xmm8,XMMWORD[((160+64))+rbp]
2799	paddd	xmm12,XMMWORD[((160+96))+rbp]
2800
2801	movdqa	XMMWORD[(160+80)+rbp],xmm14
2802	movdqa	XMMWORD[(160+80)+rbp],xmm14
2803	movdqu	xmm14,XMMWORD[((0 + 0))+rsi]
2804	pxor	xmm14,xmm3
2805	movdqu	XMMWORD[(0 + 0)+rdi],xmm14
2806	movdqu	xmm14,XMMWORD[((16 + 0))+rsi]
2807	pxor	xmm14,xmm7
2808	movdqu	XMMWORD[(16 + 0)+rdi],xmm14
2809	movdqu	xmm14,XMMWORD[((32 + 0))+rsi]
2810	pxor	xmm14,xmm11
2811	movdqu	XMMWORD[(32 + 0)+rdi],xmm14
2812	movdqu	xmm14,XMMWORD[((48 + 0))+rsi]
2813	pxor	xmm14,xmm15
2814	movdqu	XMMWORD[(48 + 0)+rdi],xmm14
2815
2816	movdqa	xmm14,XMMWORD[((160+80))+rbp]
2817	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
2818	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
2819	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
2820	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
2821	pxor	xmm2,xmm3
2822	pxor	xmm6,xmm7
2823	pxor	xmm10,xmm11
2824	pxor	xmm15,xmm14
2825	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
2826	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
2827	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
2828	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
2829	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
2830	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
2831	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
2832	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
2833	pxor	xmm1,xmm3
2834	pxor	xmm5,xmm7
2835	pxor	xmm9,xmm11
2836	pxor	xmm15,xmm13
2837	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
2838	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
2839	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
2840	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
2841
2842	cmp	rbx,16*16
2843	ja	NEAR $L$seal_sse_main_loop_xor
2844
2845	mov	rcx,12*16
2846	sub	rbx,12*16
2847	lea	rsi,[192+rsi]
2848	jmp	NEAR $L$seal_sse_128_tail_hash
2849$L$seal_sse_main_loop_xor:
2850	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
2851	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
2852	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
2853	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
2854	pxor	xmm0,xmm3
2855	pxor	xmm4,xmm7
2856	pxor	xmm8,xmm11
2857	pxor	xmm15,xmm12
2858	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
2859	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
2860	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
2861	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
2862
2863	lea	rsi,[256+rsi]
2864	sub	rbx,16*16
2865	mov	rcx,6
2866	mov	r8,4
2867	cmp	rbx,12*16
2868	jg	NEAR $L$seal_sse_main_loop
2869	mov	rcx,rbx
2870	test	rbx,rbx
2871	je	NEAR $L$seal_sse_128_tail_hash
2872	mov	rcx,6
2873	cmp	rbx,8*16
2874	ja	NEAR $L$seal_sse_tail_192
2875	cmp	rbx,4*16
2876	ja	NEAR $L$seal_sse_tail_128
2877
2878$L$seal_sse_tail_64:
2879	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
2880	movdqa	xmm4,XMMWORD[((160+48))+rbp]
2881	movdqa	xmm8,XMMWORD[((160+64))+rbp]
2882	movdqa	xmm12,XMMWORD[((160+96))+rbp]
2883	paddd	xmm12,XMMWORD[$L$sse_inc]
2884	movdqa	XMMWORD[(160+96)+rbp],xmm12
2885
2886$L$seal_sse_tail_64_rounds_and_x2hash:
2887	add	r10,QWORD[((0+0))+rdi]
2888	adc	r11,QWORD[((8+0))+rdi]
2889	adc	r12,1
2890	mov	rax,QWORD[((0+160+0))+rbp]
2891	mov	r15,rax
2892	mul	r10
2893	mov	r13,rax
2894	mov	r14,rdx
2895	mov	rax,QWORD[((0+160+0))+rbp]
2896	mul	r11
2897	imul	r15,r12
2898	add	r14,rax
2899	adc	r15,rdx
2900	mov	rax,QWORD[((8+160+0))+rbp]
2901	mov	r9,rax
2902	mul	r10
2903	add	r14,rax
2904	adc	rdx,0
2905	mov	r10,rdx
2906	mov	rax,QWORD[((8+160+0))+rbp]
2907	mul	r11
2908	add	r15,rax
2909	adc	rdx,0
2910	imul	r9,r12
2911	add	r15,r10
2912	adc	r9,rdx
2913	mov	r10,r13
2914	mov	r11,r14
2915	mov	r12,r15
2916	and	r12,3
2917	mov	r13,r15
2918	and	r13,-4
2919	mov	r14,r9
2920	shrd	r15,r9,2
2921	shr	r9,2
2922	add	r15,r13
2923	adc	r9,r14
2924	add	r10,r15
2925	adc	r11,r9
2926	adc	r12,0
2927
2928	lea	rdi,[16+rdi]
2929$L$seal_sse_tail_64_rounds_and_x1hash:
2930	paddd	xmm0,xmm4
2931	pxor	xmm12,xmm0
2932	pshufb	xmm12,XMMWORD[$L$rol16]
2933	paddd	xmm8,xmm12
2934	pxor	xmm4,xmm8
2935	movdqa	xmm3,xmm4
2936	pslld	xmm3,12
2937	psrld	xmm4,20
2938	pxor	xmm4,xmm3
2939	paddd	xmm0,xmm4
2940	pxor	xmm12,xmm0
2941	pshufb	xmm12,XMMWORD[$L$rol8]
2942	paddd	xmm8,xmm12
2943	pxor	xmm4,xmm8
2944	movdqa	xmm3,xmm4
2945	pslld	xmm3,7
2946	psrld	xmm4,25
2947	pxor	xmm4,xmm3
2948DB	102,15,58,15,228,4
2949DB	102,69,15,58,15,192,8
2950DB	102,69,15,58,15,228,12
2951	paddd	xmm0,xmm4
2952	pxor	xmm12,xmm0
2953	pshufb	xmm12,XMMWORD[$L$rol16]
2954	paddd	xmm8,xmm12
2955	pxor	xmm4,xmm8
2956	movdqa	xmm3,xmm4
2957	pslld	xmm3,12
2958	psrld	xmm4,20
2959	pxor	xmm4,xmm3
2960	paddd	xmm0,xmm4
2961	pxor	xmm12,xmm0
2962	pshufb	xmm12,XMMWORD[$L$rol8]
2963	paddd	xmm8,xmm12
2964	pxor	xmm4,xmm8
2965	movdqa	xmm3,xmm4
2966	pslld	xmm3,7
2967	psrld	xmm4,25
2968	pxor	xmm4,xmm3
2969DB	102,15,58,15,228,12
2970DB	102,69,15,58,15,192,8
2971DB	102,69,15,58,15,228,4
2972	add	r10,QWORD[((0+0))+rdi]
2973	adc	r11,QWORD[((8+0))+rdi]
2974	adc	r12,1
2975	mov	rax,QWORD[((0+160+0))+rbp]
2976	mov	r15,rax
2977	mul	r10
2978	mov	r13,rax
2979	mov	r14,rdx
2980	mov	rax,QWORD[((0+160+0))+rbp]
2981	mul	r11
2982	imul	r15,r12
2983	add	r14,rax
2984	adc	r15,rdx
2985	mov	rax,QWORD[((8+160+0))+rbp]
2986	mov	r9,rax
2987	mul	r10
2988	add	r14,rax
2989	adc	rdx,0
2990	mov	r10,rdx
2991	mov	rax,QWORD[((8+160+0))+rbp]
2992	mul	r11
2993	add	r15,rax
2994	adc	rdx,0
2995	imul	r9,r12
2996	add	r15,r10
2997	adc	r9,rdx
2998	mov	r10,r13
2999	mov	r11,r14
3000	mov	r12,r15
3001	and	r12,3
3002	mov	r13,r15
3003	and	r13,-4
3004	mov	r14,r9
3005	shrd	r15,r9,2
3006	shr	r9,2
3007	add	r15,r13
3008	adc	r9,r14
3009	add	r10,r15
3010	adc	r11,r9
3011	adc	r12,0
3012
3013	lea	rdi,[16+rdi]
3014	dec	rcx
3015	jg	NEAR $L$seal_sse_tail_64_rounds_and_x2hash
3016	dec	r8
3017	jge	NEAR $L$seal_sse_tail_64_rounds_and_x1hash
3018	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3019	paddd	xmm4,XMMWORD[((160+48))+rbp]
3020	paddd	xmm8,XMMWORD[((160+64))+rbp]
3021	paddd	xmm12,XMMWORD[((160+96))+rbp]
3022
3023	jmp	NEAR $L$seal_sse_128_tail_xor
3024
3025$L$seal_sse_tail_128:
3026	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3027	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3028	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3029	movdqa	xmm1,xmm0
3030	movdqa	xmm5,xmm4
3031	movdqa	xmm9,xmm8
3032	movdqa	xmm13,XMMWORD[((160+96))+rbp]
3033	paddd	xmm13,XMMWORD[$L$sse_inc]
3034	movdqa	xmm12,xmm13
3035	paddd	xmm12,XMMWORD[$L$sse_inc]
3036	movdqa	XMMWORD[(160+96)+rbp],xmm12
3037	movdqa	XMMWORD[(160+112)+rbp],xmm13
3038
3039$L$seal_sse_tail_128_rounds_and_x2hash:
3040	add	r10,QWORD[((0+0))+rdi]
3041	adc	r11,QWORD[((8+0))+rdi]
3042	adc	r12,1
3043	mov	rax,QWORD[((0+160+0))+rbp]
3044	mov	r15,rax
3045	mul	r10
3046	mov	r13,rax
3047	mov	r14,rdx
3048	mov	rax,QWORD[((0+160+0))+rbp]
3049	mul	r11
3050	imul	r15,r12
3051	add	r14,rax
3052	adc	r15,rdx
3053	mov	rax,QWORD[((8+160+0))+rbp]
3054	mov	r9,rax
3055	mul	r10
3056	add	r14,rax
3057	adc	rdx,0
3058	mov	r10,rdx
3059	mov	rax,QWORD[((8+160+0))+rbp]
3060	mul	r11
3061	add	r15,rax
3062	adc	rdx,0
3063	imul	r9,r12
3064	add	r15,r10
3065	adc	r9,rdx
3066	mov	r10,r13
3067	mov	r11,r14
3068	mov	r12,r15
3069	and	r12,3
3070	mov	r13,r15
3071	and	r13,-4
3072	mov	r14,r9
3073	shrd	r15,r9,2
3074	shr	r9,2
3075	add	r15,r13
3076	adc	r9,r14
3077	add	r10,r15
3078	adc	r11,r9
3079	adc	r12,0
3080
3081	lea	rdi,[16+rdi]
3082$L$seal_sse_tail_128_rounds_and_x1hash:
3083	paddd	xmm0,xmm4
3084	pxor	xmm12,xmm0
3085	pshufb	xmm12,XMMWORD[$L$rol16]
3086	paddd	xmm8,xmm12
3087	pxor	xmm4,xmm8
3088	movdqa	xmm3,xmm4
3089	pslld	xmm3,12
3090	psrld	xmm4,20
3091	pxor	xmm4,xmm3
3092	paddd	xmm0,xmm4
3093	pxor	xmm12,xmm0
3094	pshufb	xmm12,XMMWORD[$L$rol8]
3095	paddd	xmm8,xmm12
3096	pxor	xmm4,xmm8
3097	movdqa	xmm3,xmm4
3098	pslld	xmm3,7
3099	psrld	xmm4,25
3100	pxor	xmm4,xmm3
3101DB	102,15,58,15,228,4
3102DB	102,69,15,58,15,192,8
3103DB	102,69,15,58,15,228,12
3104	paddd	xmm1,xmm5
3105	pxor	xmm13,xmm1
3106	pshufb	xmm13,XMMWORD[$L$rol16]
3107	paddd	xmm9,xmm13
3108	pxor	xmm5,xmm9
3109	movdqa	xmm3,xmm5
3110	pslld	xmm3,12
3111	psrld	xmm5,20
3112	pxor	xmm5,xmm3
3113	paddd	xmm1,xmm5
3114	pxor	xmm13,xmm1
3115	pshufb	xmm13,XMMWORD[$L$rol8]
3116	paddd	xmm9,xmm13
3117	pxor	xmm5,xmm9
3118	movdqa	xmm3,xmm5
3119	pslld	xmm3,7
3120	psrld	xmm5,25
3121	pxor	xmm5,xmm3
3122DB	102,15,58,15,237,4
3123DB	102,69,15,58,15,201,8
3124DB	102,69,15,58,15,237,12
3125	add	r10,QWORD[((0+0))+rdi]
3126	adc	r11,QWORD[((8+0))+rdi]
3127	adc	r12,1
3128	mov	rax,QWORD[((0+160+0))+rbp]
3129	mov	r15,rax
3130	mul	r10
3131	mov	r13,rax
3132	mov	r14,rdx
3133	mov	rax,QWORD[((0+160+0))+rbp]
3134	mul	r11
3135	imul	r15,r12
3136	add	r14,rax
3137	adc	r15,rdx
3138	mov	rax,QWORD[((8+160+0))+rbp]
3139	mov	r9,rax
3140	mul	r10
3141	add	r14,rax
3142	adc	rdx,0
3143	mov	r10,rdx
3144	mov	rax,QWORD[((8+160+0))+rbp]
3145	mul	r11
3146	add	r15,rax
3147	adc	rdx,0
3148	imul	r9,r12
3149	add	r15,r10
3150	adc	r9,rdx
3151	mov	r10,r13
3152	mov	r11,r14
3153	mov	r12,r15
3154	and	r12,3
3155	mov	r13,r15
3156	and	r13,-4
3157	mov	r14,r9
3158	shrd	r15,r9,2
3159	shr	r9,2
3160	add	r15,r13
3161	adc	r9,r14
3162	add	r10,r15
3163	adc	r11,r9
3164	adc	r12,0
3165	paddd	xmm0,xmm4
3166	pxor	xmm12,xmm0
3167	pshufb	xmm12,XMMWORD[$L$rol16]
3168	paddd	xmm8,xmm12
3169	pxor	xmm4,xmm8
3170	movdqa	xmm3,xmm4
3171	pslld	xmm3,12
3172	psrld	xmm4,20
3173	pxor	xmm4,xmm3
3174	paddd	xmm0,xmm4
3175	pxor	xmm12,xmm0
3176	pshufb	xmm12,XMMWORD[$L$rol8]
3177	paddd	xmm8,xmm12
3178	pxor	xmm4,xmm8
3179	movdqa	xmm3,xmm4
3180	pslld	xmm3,7
3181	psrld	xmm4,25
3182	pxor	xmm4,xmm3
3183DB	102,15,58,15,228,12
3184DB	102,69,15,58,15,192,8
3185DB	102,69,15,58,15,228,4
3186	paddd	xmm1,xmm5
3187	pxor	xmm13,xmm1
3188	pshufb	xmm13,XMMWORD[$L$rol16]
3189	paddd	xmm9,xmm13
3190	pxor	xmm5,xmm9
3191	movdqa	xmm3,xmm5
3192	pslld	xmm3,12
3193	psrld	xmm5,20
3194	pxor	xmm5,xmm3
3195	paddd	xmm1,xmm5
3196	pxor	xmm13,xmm1
3197	pshufb	xmm13,XMMWORD[$L$rol8]
3198	paddd	xmm9,xmm13
3199	pxor	xmm5,xmm9
3200	movdqa	xmm3,xmm5
3201	pslld	xmm3,7
3202	psrld	xmm5,25
3203	pxor	xmm5,xmm3
3204DB	102,15,58,15,237,12
3205DB	102,69,15,58,15,201,8
3206DB	102,69,15,58,15,237,4
3207
3208	lea	rdi,[16+rdi]
3209	dec	rcx
3210	jg	NEAR $L$seal_sse_tail_128_rounds_and_x2hash
3211	dec	r8
3212	jge	NEAR $L$seal_sse_tail_128_rounds_and_x1hash
3213	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3214	paddd	xmm5,XMMWORD[((160+48))+rbp]
3215	paddd	xmm9,XMMWORD[((160+64))+rbp]
3216	paddd	xmm13,XMMWORD[((160+112))+rbp]
3217	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3218	paddd	xmm4,XMMWORD[((160+48))+rbp]
3219	paddd	xmm8,XMMWORD[((160+64))+rbp]
3220	paddd	xmm12,XMMWORD[((160+96))+rbp]
3221	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3222	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3223	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3224	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3225	pxor	xmm1,xmm3
3226	pxor	xmm5,xmm7
3227	pxor	xmm9,xmm11
3228	pxor	xmm15,xmm13
3229	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
3230	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
3231	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
3232	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3233
3234	mov	rcx,4*16
3235	sub	rbx,4*16
3236	lea	rsi,[64+rsi]
3237	jmp	NEAR $L$seal_sse_128_tail_hash
3238
3239$L$seal_sse_tail_192:
3240	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
3241	movdqa	xmm4,XMMWORD[((160+48))+rbp]
3242	movdqa	xmm8,XMMWORD[((160+64))+rbp]
3243	movdqa	xmm1,xmm0
3244	movdqa	xmm5,xmm4
3245	movdqa	xmm9,xmm8
3246	movdqa	xmm2,xmm0
3247	movdqa	xmm6,xmm4
3248	movdqa	xmm10,xmm8
3249	movdqa	xmm14,XMMWORD[((160+96))+rbp]
3250	paddd	xmm14,XMMWORD[$L$sse_inc]
3251	movdqa	xmm13,xmm14
3252	paddd	xmm13,XMMWORD[$L$sse_inc]
3253	movdqa	xmm12,xmm13
3254	paddd	xmm12,XMMWORD[$L$sse_inc]
3255	movdqa	XMMWORD[(160+96)+rbp],xmm12
3256	movdqa	XMMWORD[(160+112)+rbp],xmm13
3257	movdqa	XMMWORD[(160+128)+rbp],xmm14
3258
3259$L$seal_sse_tail_192_rounds_and_x2hash:
3260	add	r10,QWORD[((0+0))+rdi]
3261	adc	r11,QWORD[((8+0))+rdi]
3262	adc	r12,1
3263	mov	rax,QWORD[((0+160+0))+rbp]
3264	mov	r15,rax
3265	mul	r10
3266	mov	r13,rax
3267	mov	r14,rdx
3268	mov	rax,QWORD[((0+160+0))+rbp]
3269	mul	r11
3270	imul	r15,r12
3271	add	r14,rax
3272	adc	r15,rdx
3273	mov	rax,QWORD[((8+160+0))+rbp]
3274	mov	r9,rax
3275	mul	r10
3276	add	r14,rax
3277	adc	rdx,0
3278	mov	r10,rdx
3279	mov	rax,QWORD[((8+160+0))+rbp]
3280	mul	r11
3281	add	r15,rax
3282	adc	rdx,0
3283	imul	r9,r12
3284	add	r15,r10
3285	adc	r9,rdx
3286	mov	r10,r13
3287	mov	r11,r14
3288	mov	r12,r15
3289	and	r12,3
3290	mov	r13,r15
3291	and	r13,-4
3292	mov	r14,r9
3293	shrd	r15,r9,2
3294	shr	r9,2
3295	add	r15,r13
3296	adc	r9,r14
3297	add	r10,r15
3298	adc	r11,r9
3299	adc	r12,0
3300
3301	lea	rdi,[16+rdi]
3302$L$seal_sse_tail_192_rounds_and_x1hash:
3303	paddd	xmm0,xmm4
3304	pxor	xmm12,xmm0
3305	pshufb	xmm12,XMMWORD[$L$rol16]
3306	paddd	xmm8,xmm12
3307	pxor	xmm4,xmm8
3308	movdqa	xmm3,xmm4
3309	pslld	xmm3,12
3310	psrld	xmm4,20
3311	pxor	xmm4,xmm3
3312	paddd	xmm0,xmm4
3313	pxor	xmm12,xmm0
3314	pshufb	xmm12,XMMWORD[$L$rol8]
3315	paddd	xmm8,xmm12
3316	pxor	xmm4,xmm8
3317	movdqa	xmm3,xmm4
3318	pslld	xmm3,7
3319	psrld	xmm4,25
3320	pxor	xmm4,xmm3
3321DB	102,15,58,15,228,4
3322DB	102,69,15,58,15,192,8
3323DB	102,69,15,58,15,228,12
3324	paddd	xmm1,xmm5
3325	pxor	xmm13,xmm1
3326	pshufb	xmm13,XMMWORD[$L$rol16]
3327	paddd	xmm9,xmm13
3328	pxor	xmm5,xmm9
3329	movdqa	xmm3,xmm5
3330	pslld	xmm3,12
3331	psrld	xmm5,20
3332	pxor	xmm5,xmm3
3333	paddd	xmm1,xmm5
3334	pxor	xmm13,xmm1
3335	pshufb	xmm13,XMMWORD[$L$rol8]
3336	paddd	xmm9,xmm13
3337	pxor	xmm5,xmm9
3338	movdqa	xmm3,xmm5
3339	pslld	xmm3,7
3340	psrld	xmm5,25
3341	pxor	xmm5,xmm3
3342DB	102,15,58,15,237,4
3343DB	102,69,15,58,15,201,8
3344DB	102,69,15,58,15,237,12
3345	paddd	xmm2,xmm6
3346	pxor	xmm14,xmm2
3347	pshufb	xmm14,XMMWORD[$L$rol16]
3348	paddd	xmm10,xmm14
3349	pxor	xmm6,xmm10
3350	movdqa	xmm3,xmm6
3351	pslld	xmm3,12
3352	psrld	xmm6,20
3353	pxor	xmm6,xmm3
3354	paddd	xmm2,xmm6
3355	pxor	xmm14,xmm2
3356	pshufb	xmm14,XMMWORD[$L$rol8]
3357	paddd	xmm10,xmm14
3358	pxor	xmm6,xmm10
3359	movdqa	xmm3,xmm6
3360	pslld	xmm3,7
3361	psrld	xmm6,25
3362	pxor	xmm6,xmm3
3363DB	102,15,58,15,246,4
3364DB	102,69,15,58,15,210,8
3365DB	102,69,15,58,15,246,12
3366	add	r10,QWORD[((0+0))+rdi]
3367	adc	r11,QWORD[((8+0))+rdi]
3368	adc	r12,1
3369	mov	rax,QWORD[((0+160+0))+rbp]
3370	mov	r15,rax
3371	mul	r10
3372	mov	r13,rax
3373	mov	r14,rdx
3374	mov	rax,QWORD[((0+160+0))+rbp]
3375	mul	r11
3376	imul	r15,r12
3377	add	r14,rax
3378	adc	r15,rdx
3379	mov	rax,QWORD[((8+160+0))+rbp]
3380	mov	r9,rax
3381	mul	r10
3382	add	r14,rax
3383	adc	rdx,0
3384	mov	r10,rdx
3385	mov	rax,QWORD[((8+160+0))+rbp]
3386	mul	r11
3387	add	r15,rax
3388	adc	rdx,0
3389	imul	r9,r12
3390	add	r15,r10
3391	adc	r9,rdx
3392	mov	r10,r13
3393	mov	r11,r14
3394	mov	r12,r15
3395	and	r12,3
3396	mov	r13,r15
3397	and	r13,-4
3398	mov	r14,r9
3399	shrd	r15,r9,2
3400	shr	r9,2
3401	add	r15,r13
3402	adc	r9,r14
3403	add	r10,r15
3404	adc	r11,r9
3405	adc	r12,0
3406	paddd	xmm0,xmm4
3407	pxor	xmm12,xmm0
3408	pshufb	xmm12,XMMWORD[$L$rol16]
3409	paddd	xmm8,xmm12
3410	pxor	xmm4,xmm8
3411	movdqa	xmm3,xmm4
3412	pslld	xmm3,12
3413	psrld	xmm4,20
3414	pxor	xmm4,xmm3
3415	paddd	xmm0,xmm4
3416	pxor	xmm12,xmm0
3417	pshufb	xmm12,XMMWORD[$L$rol8]
3418	paddd	xmm8,xmm12
3419	pxor	xmm4,xmm8
3420	movdqa	xmm3,xmm4
3421	pslld	xmm3,7
3422	psrld	xmm4,25
3423	pxor	xmm4,xmm3
3424DB	102,15,58,15,228,12
3425DB	102,69,15,58,15,192,8
3426DB	102,69,15,58,15,228,4
3427	paddd	xmm1,xmm5
3428	pxor	xmm13,xmm1
3429	pshufb	xmm13,XMMWORD[$L$rol16]
3430	paddd	xmm9,xmm13
3431	pxor	xmm5,xmm9
3432	movdqa	xmm3,xmm5
3433	pslld	xmm3,12
3434	psrld	xmm5,20
3435	pxor	xmm5,xmm3
3436	paddd	xmm1,xmm5
3437	pxor	xmm13,xmm1
3438	pshufb	xmm13,XMMWORD[$L$rol8]
3439	paddd	xmm9,xmm13
3440	pxor	xmm5,xmm9
3441	movdqa	xmm3,xmm5
3442	pslld	xmm3,7
3443	psrld	xmm5,25
3444	pxor	xmm5,xmm3
3445DB	102,15,58,15,237,12
3446DB	102,69,15,58,15,201,8
3447DB	102,69,15,58,15,237,4
3448	paddd	xmm2,xmm6
3449	pxor	xmm14,xmm2
3450	pshufb	xmm14,XMMWORD[$L$rol16]
3451	paddd	xmm10,xmm14
3452	pxor	xmm6,xmm10
3453	movdqa	xmm3,xmm6
3454	pslld	xmm3,12
3455	psrld	xmm6,20
3456	pxor	xmm6,xmm3
3457	paddd	xmm2,xmm6
3458	pxor	xmm14,xmm2
3459	pshufb	xmm14,XMMWORD[$L$rol8]
3460	paddd	xmm10,xmm14
3461	pxor	xmm6,xmm10
3462	movdqa	xmm3,xmm6
3463	pslld	xmm3,7
3464	psrld	xmm6,25
3465	pxor	xmm6,xmm3
3466DB	102,15,58,15,246,12
3467DB	102,69,15,58,15,210,8
3468DB	102,69,15,58,15,246,4
3469
3470	lea	rdi,[16+rdi]
3471	dec	rcx
3472	jg	NEAR $L$seal_sse_tail_192_rounds_and_x2hash
3473	dec	r8
3474	jge	NEAR $L$seal_sse_tail_192_rounds_and_x1hash
3475	paddd	xmm2,XMMWORD[$L$chacha20_consts]
3476	paddd	xmm6,XMMWORD[((160+48))+rbp]
3477	paddd	xmm10,XMMWORD[((160+64))+rbp]
3478	paddd	xmm14,XMMWORD[((160+128))+rbp]
3479	paddd	xmm1,XMMWORD[$L$chacha20_consts]
3480	paddd	xmm5,XMMWORD[((160+48))+rbp]
3481	paddd	xmm9,XMMWORD[((160+64))+rbp]
3482	paddd	xmm13,XMMWORD[((160+112))+rbp]
3483	paddd	xmm0,XMMWORD[$L$chacha20_consts]
3484	paddd	xmm4,XMMWORD[((160+48))+rbp]
3485	paddd	xmm8,XMMWORD[((160+64))+rbp]
3486	paddd	xmm12,XMMWORD[((160+96))+rbp]
3487	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
3488	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
3489	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
3490	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
3491	pxor	xmm2,xmm3
3492	pxor	xmm6,xmm7
3493	pxor	xmm10,xmm11
3494	pxor	xmm15,xmm14
3495	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
3496	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
3497	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
3498	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
3499	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
3500	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
3501	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
3502	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
3503	pxor	xmm1,xmm3
3504	pxor	xmm5,xmm7
3505	pxor	xmm9,xmm11
3506	pxor	xmm15,xmm13
3507	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
3508	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
3509	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
3510	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
3511
3512	mov	rcx,8*16
3513	sub	rbx,8*16
3514	lea	rsi,[128+rsi]
3515
3516$L$seal_sse_128_tail_hash:
3517	cmp	rcx,16
3518	jb	NEAR $L$seal_sse_128_tail_xor
3519	add	r10,QWORD[((0+0))+rdi]
3520	adc	r11,QWORD[((8+0))+rdi]
3521	adc	r12,1
3522	mov	rax,QWORD[((0+160+0))+rbp]
3523	mov	r15,rax
3524	mul	r10
3525	mov	r13,rax
3526	mov	r14,rdx
3527	mov	rax,QWORD[((0+160+0))+rbp]
3528	mul	r11
3529	imul	r15,r12
3530	add	r14,rax
3531	adc	r15,rdx
3532	mov	rax,QWORD[((8+160+0))+rbp]
3533	mov	r9,rax
3534	mul	r10
3535	add	r14,rax
3536	adc	rdx,0
3537	mov	r10,rdx
3538	mov	rax,QWORD[((8+160+0))+rbp]
3539	mul	r11
3540	add	r15,rax
3541	adc	rdx,0
3542	imul	r9,r12
3543	add	r15,r10
3544	adc	r9,rdx
3545	mov	r10,r13
3546	mov	r11,r14
3547	mov	r12,r15
3548	and	r12,3
3549	mov	r13,r15
3550	and	r13,-4
3551	mov	r14,r9
3552	shrd	r15,r9,2
3553	shr	r9,2
3554	add	r15,r13
3555	adc	r9,r14
3556	add	r10,r15
3557	adc	r11,r9
3558	adc	r12,0
3559
3560	sub	rcx,16
3561	lea	rdi,[16+rdi]
3562	jmp	NEAR $L$seal_sse_128_tail_hash
3563
3564$L$seal_sse_128_tail_xor:
3565	cmp	rbx,16
3566	jb	NEAR $L$seal_sse_tail_16
3567	sub	rbx,16
3568
3569	movdqu	xmm3,XMMWORD[rsi]
3570	pxor	xmm0,xmm3
3571	movdqu	XMMWORD[rdi],xmm0
3572
3573	add	r10,QWORD[rdi]
3574	adc	r11,QWORD[8+rdi]
3575	adc	r12,1
3576	lea	rsi,[16+rsi]
3577	lea	rdi,[16+rdi]
3578	mov	rax,QWORD[((0+160+0))+rbp]
3579	mov	r15,rax
3580	mul	r10
3581	mov	r13,rax
3582	mov	r14,rdx
3583	mov	rax,QWORD[((0+160+0))+rbp]
3584	mul	r11
3585	imul	r15,r12
3586	add	r14,rax
3587	adc	r15,rdx
3588	mov	rax,QWORD[((8+160+0))+rbp]
3589	mov	r9,rax
3590	mul	r10
3591	add	r14,rax
3592	adc	rdx,0
3593	mov	r10,rdx
3594	mov	rax,QWORD[((8+160+0))+rbp]
3595	mul	r11
3596	add	r15,rax
3597	adc	rdx,0
3598	imul	r9,r12
3599	add	r15,r10
3600	adc	r9,rdx
3601	mov	r10,r13
3602	mov	r11,r14
3603	mov	r12,r15
3604	and	r12,3
3605	mov	r13,r15
3606	and	r13,-4
3607	mov	r14,r9
3608	shrd	r15,r9,2
3609	shr	r9,2
3610	add	r15,r13
3611	adc	r9,r14
3612	add	r10,r15
3613	adc	r11,r9
3614	adc	r12,0
3615
3616
3617	movdqa	xmm0,xmm4
3618	movdqa	xmm4,xmm8
3619	movdqa	xmm8,xmm12
3620	movdqa	xmm12,xmm1
3621	movdqa	xmm1,xmm5
3622	movdqa	xmm5,xmm9
3623	movdqa	xmm9,xmm13
3624	jmp	NEAR $L$seal_sse_128_tail_xor
3625
3626$L$seal_sse_tail_16:
3627	test	rbx,rbx
3628	jz	NEAR $L$process_blocks_of_extra_in
3629
3630	mov	r8,rbx
3631	mov	rcx,rbx
3632	lea	rsi,[((-1))+rbx*1+rsi]
3633	pxor	xmm15,xmm15
3634$L$seal_sse_tail_16_compose:
3635	pslldq	xmm15,1
3636	pinsrb	xmm15,BYTE[rsi],0
3637	lea	rsi,[((-1))+rsi]
3638	dec	rcx
3639	jne	NEAR $L$seal_sse_tail_16_compose
3640
3641
3642	pxor	xmm15,xmm0
3643
3644
3645	mov	rcx,rbx
3646	movdqu	xmm0,xmm15
3647$L$seal_sse_tail_16_extract:
3648	pextrb	XMMWORD[rdi],xmm0,0
3649	psrldq	xmm0,1
3650	add	rdi,1
3651	sub	rcx,1
3652	jnz	NEAR $L$seal_sse_tail_16_extract
3653
3654
3655
3656
3657
3658
3659
3660
3661	mov	r9,QWORD[((288 + 160 + 32))+rsp]
3662	mov	r14,QWORD[56+r9]
3663	mov	r13,QWORD[48+r9]
3664	test	r14,r14
3665	jz	NEAR $L$process_partial_block
3666
3667	mov	r15,16
3668	sub	r15,rbx
3669	cmp	r14,r15
3670
3671	jge	NEAR $L$load_extra_in
3672	mov	r15,r14
3673
3674$L$load_extra_in:
3675
3676
3677	lea	rsi,[((-1))+r15*1+r13]
3678
3679
3680	add	r13,r15
3681	sub	r14,r15
3682	mov	QWORD[48+r9],r13
3683	mov	QWORD[56+r9],r14
3684
3685
3686
3687	add	r8,r15
3688
3689
3690	pxor	xmm11,xmm11
3691$L$load_extra_load_loop:
3692	pslldq	xmm11,1
3693	pinsrb	xmm11,BYTE[rsi],0
3694	lea	rsi,[((-1))+rsi]
3695	sub	r15,1
3696	jnz	NEAR $L$load_extra_load_loop
3697
3698
3699
3700
3701	mov	r15,rbx
3702
3703$L$load_extra_shift_loop:
3704	pslldq	xmm11,1
3705	sub	r15,1
3706	jnz	NEAR $L$load_extra_shift_loop
3707
3708
3709
3710
3711	lea	r15,[$L$and_masks]
3712	shl	rbx,4
3713	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3714
3715
3716	por	xmm15,xmm11
3717
3718
3719
3720DB	102,77,15,126,253
3721	pextrq	r14,xmm15,1
3722	add	r10,r13
3723	adc	r11,r14
3724	adc	r12,1
3725	mov	rax,QWORD[((0+160+0))+rbp]
3726	mov	r15,rax
3727	mul	r10
3728	mov	r13,rax
3729	mov	r14,rdx
3730	mov	rax,QWORD[((0+160+0))+rbp]
3731	mul	r11
3732	imul	r15,r12
3733	add	r14,rax
3734	adc	r15,rdx
3735	mov	rax,QWORD[((8+160+0))+rbp]
3736	mov	r9,rax
3737	mul	r10
3738	add	r14,rax
3739	adc	rdx,0
3740	mov	r10,rdx
3741	mov	rax,QWORD[((8+160+0))+rbp]
3742	mul	r11
3743	add	r15,rax
3744	adc	rdx,0
3745	imul	r9,r12
3746	add	r15,r10
3747	adc	r9,rdx
3748	mov	r10,r13
3749	mov	r11,r14
3750	mov	r12,r15
3751	and	r12,3
3752	mov	r13,r15
3753	and	r13,-4
3754	mov	r14,r9
3755	shrd	r15,r9,2
3756	shr	r9,2
3757	add	r15,r13
3758	adc	r9,r14
3759	add	r10,r15
3760	adc	r11,r9
3761	adc	r12,0
3762
3763
3764$L$process_blocks_of_extra_in:
3765
3766	mov	r9,QWORD[((288+32+160 ))+rsp]
3767	mov	rsi,QWORD[48+r9]
3768	mov	r8,QWORD[56+r9]
3769	mov	rcx,r8
3770	shr	r8,4
3771
3772$L$process_extra_hash_loop:
3773	jz	NEAR process_extra_in_trailer
3774	add	r10,QWORD[((0+0))+rsi]
3775	adc	r11,QWORD[((8+0))+rsi]
3776	adc	r12,1
3777	mov	rax,QWORD[((0+160+0))+rbp]
3778	mov	r15,rax
3779	mul	r10
3780	mov	r13,rax
3781	mov	r14,rdx
3782	mov	rax,QWORD[((0+160+0))+rbp]
3783	mul	r11
3784	imul	r15,r12
3785	add	r14,rax
3786	adc	r15,rdx
3787	mov	rax,QWORD[((8+160+0))+rbp]
3788	mov	r9,rax
3789	mul	r10
3790	add	r14,rax
3791	adc	rdx,0
3792	mov	r10,rdx
3793	mov	rax,QWORD[((8+160+0))+rbp]
3794	mul	r11
3795	add	r15,rax
3796	adc	rdx,0
3797	imul	r9,r12
3798	add	r15,r10
3799	adc	r9,rdx
3800	mov	r10,r13
3801	mov	r11,r14
3802	mov	r12,r15
3803	and	r12,3
3804	mov	r13,r15
3805	and	r13,-4
3806	mov	r14,r9
3807	shrd	r15,r9,2
3808	shr	r9,2
3809	add	r15,r13
3810	adc	r9,r14
3811	add	r10,r15
3812	adc	r11,r9
3813	adc	r12,0
3814
3815	lea	rsi,[16+rsi]
3816	sub	r8,1
3817	jmp	NEAR $L$process_extra_hash_loop
3818process_extra_in_trailer:
3819	and	rcx,15
3820	mov	rbx,rcx
3821	jz	NEAR $L$do_length_block
3822	lea	rsi,[((-1))+rcx*1+rsi]
3823
3824$L$process_extra_in_trailer_load:
3825	pslldq	xmm15,1
3826	pinsrb	xmm15,BYTE[rsi],0
3827	lea	rsi,[((-1))+rsi]
3828	sub	rcx,1
3829	jnz	NEAR $L$process_extra_in_trailer_load
3830
3831$L$process_partial_block:
3832
3833	lea	r15,[$L$and_masks]
3834	shl	rbx,4
3835	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
3836DB	102,77,15,126,253
3837	pextrq	r14,xmm15,1
3838	add	r10,r13
3839	adc	r11,r14
3840	adc	r12,1
3841	mov	rax,QWORD[((0+160+0))+rbp]
3842	mov	r15,rax
3843	mul	r10
3844	mov	r13,rax
3845	mov	r14,rdx
3846	mov	rax,QWORD[((0+160+0))+rbp]
3847	mul	r11
3848	imul	r15,r12
3849	add	r14,rax
3850	adc	r15,rdx
3851	mov	rax,QWORD[((8+160+0))+rbp]
3852	mov	r9,rax
3853	mul	r10
3854	add	r14,rax
3855	adc	rdx,0
3856	mov	r10,rdx
3857	mov	rax,QWORD[((8+160+0))+rbp]
3858	mul	r11
3859	add	r15,rax
3860	adc	rdx,0
3861	imul	r9,r12
3862	add	r15,r10
3863	adc	r9,rdx
3864	mov	r10,r13
3865	mov	r11,r14
3866	mov	r12,r15
3867	and	r12,3
3868	mov	r13,r15
3869	and	r13,-4
3870	mov	r14,r9
3871	shrd	r15,r9,2
3872	shr	r9,2
3873	add	r15,r13
3874	adc	r9,r14
3875	add	r10,r15
3876	adc	r11,r9
3877	adc	r12,0
3878
3879
3880$L$do_length_block:
3881	add	r10,QWORD[((0+160+32))+rbp]
3882	adc	r11,QWORD[((8+160+32))+rbp]
3883	adc	r12,1
3884	mov	rax,QWORD[((0+160+0))+rbp]
3885	mov	r15,rax
3886	mul	r10
3887	mov	r13,rax
3888	mov	r14,rdx
3889	mov	rax,QWORD[((0+160+0))+rbp]
3890	mul	r11
3891	imul	r15,r12
3892	add	r14,rax
3893	adc	r15,rdx
3894	mov	rax,QWORD[((8+160+0))+rbp]
3895	mov	r9,rax
3896	mul	r10
3897	add	r14,rax
3898	adc	rdx,0
3899	mov	r10,rdx
3900	mov	rax,QWORD[((8+160+0))+rbp]
3901	mul	r11
3902	add	r15,rax
3903	adc	rdx,0
3904	imul	r9,r12
3905	add	r15,r10
3906	adc	r9,rdx
3907	mov	r10,r13
3908	mov	r11,r14
3909	mov	r12,r15
3910	and	r12,3
3911	mov	r13,r15
3912	and	r13,-4
3913	mov	r14,r9
3914	shrd	r15,r9,2
3915	shr	r9,2
3916	add	r15,r13
3917	adc	r9,r14
3918	add	r10,r15
3919	adc	r11,r9
3920	adc	r12,0
3921
3922
3923	mov	r13,r10
3924	mov	r14,r11
3925	mov	r15,r12
3926	sub	r10,-5
3927	sbb	r11,-1
3928	sbb	r12,3
3929	cmovc	r10,r13
3930	cmovc	r11,r14
3931	cmovc	r12,r15
3932
3933	add	r10,QWORD[((0+160+16))+rbp]
3934	adc	r11,QWORD[((8+160+16))+rbp]
3935
3936	movaps	xmm6,XMMWORD[((0+0))+rbp]
3937	movaps	xmm7,XMMWORD[((16+0))+rbp]
3938	movaps	xmm8,XMMWORD[((32+0))+rbp]
3939	movaps	xmm9,XMMWORD[((48+0))+rbp]
3940	movaps	xmm10,XMMWORD[((64+0))+rbp]
3941	movaps	xmm11,XMMWORD[((80+0))+rbp]
3942	movaps	xmm12,XMMWORD[((96+0))+rbp]
3943	movaps	xmm13,XMMWORD[((112+0))+rbp]
3944	movaps	xmm14,XMMWORD[((128+0))+rbp]
3945	movaps	xmm15,XMMWORD[((144+0))+rbp]
3946
3947
3948	add	rsp,288 + 160 + 32
3949
3950
3951	pop	r9
3952
3953	mov	QWORD[r9],r10
3954	mov	QWORD[8+r9],r11
3955	pop	r15
3956
3957	pop	r14
3958
3959	pop	r13
3960
3961	pop	r12
3962
3963	pop	rbx
3964
3965	pop	rbp
3966
3967	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
3968	mov	rsi,QWORD[16+rsp]
3969	DB	0F3h,0C3h		;repret
3970
3971$L$seal_sse_128:
3972
3973	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
3974	movdqa	xmm1,xmm0
3975	movdqa	xmm2,xmm0
3976	movdqu	xmm4,XMMWORD[r9]
3977	movdqa	xmm5,xmm4
3978	movdqa	xmm6,xmm4
3979	movdqu	xmm8,XMMWORD[16+r9]
3980	movdqa	xmm9,xmm8
3981	movdqa	xmm10,xmm8
3982	movdqu	xmm14,XMMWORD[32+r9]
3983	movdqa	xmm12,xmm14
3984	paddd	xmm12,XMMWORD[$L$sse_inc]
3985	movdqa	xmm13,xmm12
3986	paddd	xmm13,XMMWORD[$L$sse_inc]
3987	movdqa	xmm7,xmm4
3988	movdqa	xmm11,xmm8
3989	movdqa	xmm15,xmm12
3990	mov	r10,10
3991
3992$L$seal_sse_128_rounds:
3993	paddd	xmm0,xmm4
3994	pxor	xmm12,xmm0
3995	pshufb	xmm12,XMMWORD[$L$rol16]
3996	paddd	xmm8,xmm12
3997	pxor	xmm4,xmm8
3998	movdqa	xmm3,xmm4
3999	pslld	xmm3,12
4000	psrld	xmm4,20
4001	pxor	xmm4,xmm3
4002	paddd	xmm0,xmm4
4003	pxor	xmm12,xmm0
4004	pshufb	xmm12,XMMWORD[$L$rol8]
4005	paddd	xmm8,xmm12
4006	pxor	xmm4,xmm8
4007	movdqa	xmm3,xmm4
4008	pslld	xmm3,7
4009	psrld	xmm4,25
4010	pxor	xmm4,xmm3
4011DB	102,15,58,15,228,4
4012DB	102,69,15,58,15,192,8
4013DB	102,69,15,58,15,228,12
4014	paddd	xmm1,xmm5
4015	pxor	xmm13,xmm1
4016	pshufb	xmm13,XMMWORD[$L$rol16]
4017	paddd	xmm9,xmm13
4018	pxor	xmm5,xmm9
4019	movdqa	xmm3,xmm5
4020	pslld	xmm3,12
4021	psrld	xmm5,20
4022	pxor	xmm5,xmm3
4023	paddd	xmm1,xmm5
4024	pxor	xmm13,xmm1
4025	pshufb	xmm13,XMMWORD[$L$rol8]
4026	paddd	xmm9,xmm13
4027	pxor	xmm5,xmm9
4028	movdqa	xmm3,xmm5
4029	pslld	xmm3,7
4030	psrld	xmm5,25
4031	pxor	xmm5,xmm3
4032DB	102,15,58,15,237,4
4033DB	102,69,15,58,15,201,8
4034DB	102,69,15,58,15,237,12
4035	paddd	xmm2,xmm6
4036	pxor	xmm14,xmm2
4037	pshufb	xmm14,XMMWORD[$L$rol16]
4038	paddd	xmm10,xmm14
4039	pxor	xmm6,xmm10
4040	movdqa	xmm3,xmm6
4041	pslld	xmm3,12
4042	psrld	xmm6,20
4043	pxor	xmm6,xmm3
4044	paddd	xmm2,xmm6
4045	pxor	xmm14,xmm2
4046	pshufb	xmm14,XMMWORD[$L$rol8]
4047	paddd	xmm10,xmm14
4048	pxor	xmm6,xmm10
4049	movdqa	xmm3,xmm6
4050	pslld	xmm3,7
4051	psrld	xmm6,25
4052	pxor	xmm6,xmm3
4053DB	102,15,58,15,246,4
4054DB	102,69,15,58,15,210,8
4055DB	102,69,15,58,15,246,12
4056	paddd	xmm0,xmm4
4057	pxor	xmm12,xmm0
4058	pshufb	xmm12,XMMWORD[$L$rol16]
4059	paddd	xmm8,xmm12
4060	pxor	xmm4,xmm8
4061	movdqa	xmm3,xmm4
4062	pslld	xmm3,12
4063	psrld	xmm4,20
4064	pxor	xmm4,xmm3
4065	paddd	xmm0,xmm4
4066	pxor	xmm12,xmm0
4067	pshufb	xmm12,XMMWORD[$L$rol8]
4068	paddd	xmm8,xmm12
4069	pxor	xmm4,xmm8
4070	movdqa	xmm3,xmm4
4071	pslld	xmm3,7
4072	psrld	xmm4,25
4073	pxor	xmm4,xmm3
4074DB	102,15,58,15,228,12
4075DB	102,69,15,58,15,192,8
4076DB	102,69,15,58,15,228,4
4077	paddd	xmm1,xmm5
4078	pxor	xmm13,xmm1
4079	pshufb	xmm13,XMMWORD[$L$rol16]
4080	paddd	xmm9,xmm13
4081	pxor	xmm5,xmm9
4082	movdqa	xmm3,xmm5
4083	pslld	xmm3,12
4084	psrld	xmm5,20
4085	pxor	xmm5,xmm3
4086	paddd	xmm1,xmm5
4087	pxor	xmm13,xmm1
4088	pshufb	xmm13,XMMWORD[$L$rol8]
4089	paddd	xmm9,xmm13
4090	pxor	xmm5,xmm9
4091	movdqa	xmm3,xmm5
4092	pslld	xmm3,7
4093	psrld	xmm5,25
4094	pxor	xmm5,xmm3
4095DB	102,15,58,15,237,12
4096DB	102,69,15,58,15,201,8
4097DB	102,69,15,58,15,237,4
4098	paddd	xmm2,xmm6
4099	pxor	xmm14,xmm2
4100	pshufb	xmm14,XMMWORD[$L$rol16]
4101	paddd	xmm10,xmm14
4102	pxor	xmm6,xmm10
4103	movdqa	xmm3,xmm6
4104	pslld	xmm3,12
4105	psrld	xmm6,20
4106	pxor	xmm6,xmm3
4107	paddd	xmm2,xmm6
4108	pxor	xmm14,xmm2
4109	pshufb	xmm14,XMMWORD[$L$rol8]
4110	paddd	xmm10,xmm14
4111	pxor	xmm6,xmm10
4112	movdqa	xmm3,xmm6
4113	pslld	xmm3,7
4114	psrld	xmm6,25
4115	pxor	xmm6,xmm3
4116DB	102,15,58,15,246,12
4117DB	102,69,15,58,15,210,8
4118DB	102,69,15,58,15,246,4
4119
4120	dec	r10
4121	jnz	NEAR $L$seal_sse_128_rounds
4122	paddd	xmm0,XMMWORD[$L$chacha20_consts]
4123	paddd	xmm1,XMMWORD[$L$chacha20_consts]
4124	paddd	xmm2,XMMWORD[$L$chacha20_consts]
4125	paddd	xmm4,xmm7
4126	paddd	xmm5,xmm7
4127	paddd	xmm6,xmm7
4128	paddd	xmm8,xmm11
4129	paddd	xmm9,xmm11
4130	paddd	xmm12,xmm15
4131	paddd	xmm15,XMMWORD[$L$sse_inc]
4132	paddd	xmm13,xmm15
4133
4134	pand	xmm2,XMMWORD[$L$clamp]
4135	movdqa	XMMWORD[(160+0)+rbp],xmm2
4136	movdqa	XMMWORD[(160+16)+rbp],xmm6
4137
4138	mov	r8,r8
4139	call	poly_hash_ad_internal
4140	jmp	NEAR $L$seal_sse_128_tail_xor
4141$L$SEH_end_GFp_chacha20_poly1305_seal:
4142
4143
4144
4145
4146ALIGN	64
4147chacha20_poly1305_open_avx2:
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160	vzeroupper
4161	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4162	vbroadcasti128	ymm4,XMMWORD[r9]
4163	vbroadcasti128	ymm8,XMMWORD[16+r9]
4164	vbroadcasti128	ymm12,XMMWORD[32+r9]
4165	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
4166	cmp	rbx,6*32
4167	jbe	NEAR $L$open_avx2_192
4168	cmp	rbx,10*32
4169	jbe	NEAR $L$open_avx2_320
4170
4171	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
4172	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
4173	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4174	mov	r10,10
4175$L$open_avx2_init_rounds:
4176	vpaddd	ymm0,ymm0,ymm4
4177	vpxor	ymm12,ymm12,ymm0
4178	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4179	vpaddd	ymm8,ymm8,ymm12
4180	vpxor	ymm4,ymm4,ymm8
4181	vpsrld	ymm3,ymm4,20
4182	vpslld	ymm4,ymm4,12
4183	vpxor	ymm4,ymm4,ymm3
4184	vpaddd	ymm0,ymm0,ymm4
4185	vpxor	ymm12,ymm12,ymm0
4186	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4187	vpaddd	ymm8,ymm8,ymm12
4188	vpxor	ymm4,ymm4,ymm8
4189	vpslld	ymm3,ymm4,7
4190	vpsrld	ymm4,ymm4,25
4191	vpxor	ymm4,ymm4,ymm3
4192	vpalignr	ymm12,ymm12,ymm12,12
4193	vpalignr	ymm8,ymm8,ymm8,8
4194	vpalignr	ymm4,ymm4,ymm4,4
4195	vpaddd	ymm0,ymm0,ymm4
4196	vpxor	ymm12,ymm12,ymm0
4197	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4198	vpaddd	ymm8,ymm8,ymm12
4199	vpxor	ymm4,ymm4,ymm8
4200	vpsrld	ymm3,ymm4,20
4201	vpslld	ymm4,ymm4,12
4202	vpxor	ymm4,ymm4,ymm3
4203	vpaddd	ymm0,ymm0,ymm4
4204	vpxor	ymm12,ymm12,ymm0
4205	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4206	vpaddd	ymm8,ymm8,ymm12
4207	vpxor	ymm4,ymm4,ymm8
4208	vpslld	ymm3,ymm4,7
4209	vpsrld	ymm4,ymm4,25
4210	vpxor	ymm4,ymm4,ymm3
4211	vpalignr	ymm12,ymm12,ymm12,4
4212	vpalignr	ymm8,ymm8,ymm8,8
4213	vpalignr	ymm4,ymm4,ymm4,12
4214
4215	dec	r10
4216	jne	NEAR $L$open_avx2_init_rounds
4217	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4218	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4219	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4220	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4221
4222	vperm2i128	ymm3,ymm4,ymm0,0x02
4223
4224	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
4225	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
4226
4227	vperm2i128	ymm0,ymm4,ymm0,0x13
4228	vperm2i128	ymm4,ymm12,ymm8,0x13
4229
4230	mov	r8,r8
4231	call	poly_hash_ad_internal
4232
4233	xor	rcx,rcx
4234$L$open_avx2_init_hash:
4235	add	r10,QWORD[((0+0))+rcx*1+rsi]
4236	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4237	adc	r12,1
4238	mov	rax,QWORD[((0+160+0))+rbp]
4239	mov	r15,rax
4240	mul	r10
4241	mov	r13,rax
4242	mov	r14,rdx
4243	mov	rax,QWORD[((0+160+0))+rbp]
4244	mul	r11
4245	imul	r15,r12
4246	add	r14,rax
4247	adc	r15,rdx
4248	mov	rax,QWORD[((8+160+0))+rbp]
4249	mov	r9,rax
4250	mul	r10
4251	add	r14,rax
4252	adc	rdx,0
4253	mov	r10,rdx
4254	mov	rax,QWORD[((8+160+0))+rbp]
4255	mul	r11
4256	add	r15,rax
4257	adc	rdx,0
4258	imul	r9,r12
4259	add	r15,r10
4260	adc	r9,rdx
4261	mov	r10,r13
4262	mov	r11,r14
4263	mov	r12,r15
4264	and	r12,3
4265	mov	r13,r15
4266	and	r13,-4
4267	mov	r14,r9
4268	shrd	r15,r9,2
4269	shr	r9,2
4270	add	r15,r13
4271	adc	r9,r14
4272	add	r10,r15
4273	adc	r11,r9
4274	adc	r12,0
4275
4276	add	rcx,16
4277	cmp	rcx,2*32
4278	jne	NEAR $L$open_avx2_init_hash
4279
4280	vpxor	ymm0,ymm0,YMMWORD[rsi]
4281	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
4282
4283	vmovdqu	YMMWORD[rdi],ymm0
4284	vmovdqu	YMMWORD[32+rdi],ymm4
4285	lea	rsi,[64+rsi]
4286	lea	rdi,[64+rdi]
4287	sub	rbx,2*32
4288$L$open_avx2_main_loop:
4289
4290	cmp	rbx,16*32
4291	jb	NEAR $L$open_avx2_main_loop_done
4292	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4293	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4294	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4295	vmovdqa	ymm1,ymm0
4296	vmovdqa	ymm5,ymm4
4297	vmovdqa	ymm9,ymm8
4298	vmovdqa	ymm2,ymm0
4299	vmovdqa	ymm6,ymm4
4300	vmovdqa	ymm10,ymm8
4301	vmovdqa	ymm3,ymm0
4302	vmovdqa	ymm7,ymm4
4303	vmovdqa	ymm11,ymm8
4304	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4305	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
4306	vpaddd	ymm14,ymm12,ymm15
4307	vpaddd	ymm13,ymm12,ymm14
4308	vpaddd	ymm12,ymm12,ymm13
4309	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
4310	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
4311	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4312	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4313
4314	xor	rcx,rcx
4315$L$open_avx2_main_loop_rounds:
4316	add	r10,QWORD[((0+0))+rcx*1+rsi]
4317	adc	r11,QWORD[((8+0))+rcx*1+rsi]
4318	adc	r12,1
4319	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4320	vmovdqa	ymm8,YMMWORD[$L$rol16]
4321	vpaddd	ymm3,ymm3,ymm7
4322	vpaddd	ymm2,ymm2,ymm6
4323	vpaddd	ymm1,ymm1,ymm5
4324	vpaddd	ymm0,ymm0,ymm4
4325	vpxor	ymm15,ymm15,ymm3
4326	vpxor	ymm14,ymm14,ymm2
4327	vpxor	ymm13,ymm13,ymm1
4328	vpxor	ymm12,ymm12,ymm0
4329	mov	rdx,QWORD[((0+160+0))+rbp]
4330	mov	r15,rdx
4331	mulx	r14,r13,r10
4332	mulx	rdx,rax,r11
4333	imul	r15,r12
4334	add	r14,rax
4335	adc	r15,rdx
4336	vpshufb	ymm15,ymm15,ymm8
4337	vpshufb	ymm14,ymm14,ymm8
4338	vpshufb	ymm13,ymm13,ymm8
4339	vpshufb	ymm12,ymm12,ymm8
4340	vpaddd	ymm11,ymm11,ymm15
4341	vpaddd	ymm10,ymm10,ymm14
4342	vpaddd	ymm9,ymm9,ymm13
4343	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4344	vpxor	ymm7,ymm7,ymm11
4345	mov	rdx,QWORD[((8+160+0))+rbp]
4346	mulx	rax,r10,r10
4347	add	r14,r10
4348	mulx	r9,r11,r11
4349	adc	r15,r11
4350	adc	r9,0
4351	imul	rdx,r12
4352	vpxor	ymm6,ymm6,ymm10
4353	vpxor	ymm5,ymm5,ymm9
4354	vpxor	ymm4,ymm4,ymm8
4355	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4356	vpsrld	ymm8,ymm7,20
4357	vpslld	ymm7,ymm7,32-20
4358	vpxor	ymm7,ymm7,ymm8
4359	vpsrld	ymm8,ymm6,20
4360	vpslld	ymm6,ymm6,32-20
4361	vpxor	ymm6,ymm6,ymm8
4362	vpsrld	ymm8,ymm5,20
4363	vpslld	ymm5,ymm5,32-20
4364	add	r15,rax
4365	adc	r9,rdx
4366	vpxor	ymm5,ymm5,ymm8
4367	vpsrld	ymm8,ymm4,20
4368	vpslld	ymm4,ymm4,32-20
4369	vpxor	ymm4,ymm4,ymm8
4370	vmovdqa	ymm8,YMMWORD[$L$rol8]
4371	vpaddd	ymm3,ymm3,ymm7
4372	vpaddd	ymm2,ymm2,ymm6
4373	vpaddd	ymm1,ymm1,ymm5
4374	vpaddd	ymm0,ymm0,ymm4
4375	vpxor	ymm15,ymm15,ymm3
4376	mov	r10,r13
4377	mov	r11,r14
4378	mov	r12,r15
4379	and	r12,3
4380	mov	r13,r15
4381	and	r13,-4
4382	mov	r14,r9
4383	shrd	r15,r9,2
4384	shr	r9,2
4385	add	r15,r13
4386	adc	r9,r14
4387	add	r10,r15
4388	adc	r11,r9
4389	adc	r12,0
4390	vpxor	ymm14,ymm14,ymm2
4391	vpxor	ymm13,ymm13,ymm1
4392	vpxor	ymm12,ymm12,ymm0
4393	vpshufb	ymm15,ymm15,ymm8
4394	vpshufb	ymm14,ymm14,ymm8
4395	vpshufb	ymm13,ymm13,ymm8
4396	vpshufb	ymm12,ymm12,ymm8
4397	vpaddd	ymm11,ymm11,ymm15
4398	vpaddd	ymm10,ymm10,ymm14
4399	add	r10,QWORD[((0+16))+rcx*1+rsi]
4400	adc	r11,QWORD[((8+16))+rcx*1+rsi]
4401	adc	r12,1
4402	vpaddd	ymm9,ymm9,ymm13
4403	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4404	vpxor	ymm7,ymm7,ymm11
4405	vpxor	ymm6,ymm6,ymm10
4406	vpxor	ymm5,ymm5,ymm9
4407	vpxor	ymm4,ymm4,ymm8
4408	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4409	vpsrld	ymm8,ymm7,25
4410	mov	rdx,QWORD[((0+160+0))+rbp]
4411	mov	r15,rdx
4412	mulx	r14,r13,r10
4413	mulx	rdx,rax,r11
4414	imul	r15,r12
4415	add	r14,rax
4416	adc	r15,rdx
4417	vpslld	ymm7,ymm7,32-25
4418	vpxor	ymm7,ymm7,ymm8
4419	vpsrld	ymm8,ymm6,25
4420	vpslld	ymm6,ymm6,32-25
4421	vpxor	ymm6,ymm6,ymm8
4422	vpsrld	ymm8,ymm5,25
4423	vpslld	ymm5,ymm5,32-25
4424	vpxor	ymm5,ymm5,ymm8
4425	vpsrld	ymm8,ymm4,25
4426	vpslld	ymm4,ymm4,32-25
4427	vpxor	ymm4,ymm4,ymm8
4428	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4429	vpalignr	ymm7,ymm7,ymm7,4
4430	vpalignr	ymm11,ymm11,ymm11,8
4431	vpalignr	ymm15,ymm15,ymm15,12
4432	vpalignr	ymm6,ymm6,ymm6,4
4433	vpalignr	ymm10,ymm10,ymm10,8
4434	vpalignr	ymm14,ymm14,ymm14,12
4435	mov	rdx,QWORD[((8+160+0))+rbp]
4436	mulx	rax,r10,r10
4437	add	r14,r10
4438	mulx	r9,r11,r11
4439	adc	r15,r11
4440	adc	r9,0
4441	imul	rdx,r12
4442	vpalignr	ymm5,ymm5,ymm5,4
4443	vpalignr	ymm9,ymm9,ymm9,8
4444	vpalignr	ymm13,ymm13,ymm13,12
4445	vpalignr	ymm4,ymm4,ymm4,4
4446	vpalignr	ymm8,ymm8,ymm8,8
4447	vpalignr	ymm12,ymm12,ymm12,12
4448	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4449	vmovdqa	ymm8,YMMWORD[$L$rol16]
4450	vpaddd	ymm3,ymm3,ymm7
4451	vpaddd	ymm2,ymm2,ymm6
4452	vpaddd	ymm1,ymm1,ymm5
4453	vpaddd	ymm0,ymm0,ymm4
4454	vpxor	ymm15,ymm15,ymm3
4455	vpxor	ymm14,ymm14,ymm2
4456	vpxor	ymm13,ymm13,ymm1
4457	vpxor	ymm12,ymm12,ymm0
4458	vpshufb	ymm15,ymm15,ymm8
4459	vpshufb	ymm14,ymm14,ymm8
4460	add	r15,rax
4461	adc	r9,rdx
4462	vpshufb	ymm13,ymm13,ymm8
4463	vpshufb	ymm12,ymm12,ymm8
4464	vpaddd	ymm11,ymm11,ymm15
4465	vpaddd	ymm10,ymm10,ymm14
4466	vpaddd	ymm9,ymm9,ymm13
4467	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4468	vpxor	ymm7,ymm7,ymm11
4469	vpxor	ymm6,ymm6,ymm10
4470	vpxor	ymm5,ymm5,ymm9
4471	mov	r10,r13
4472	mov	r11,r14
4473	mov	r12,r15
4474	and	r12,3
4475	mov	r13,r15
4476	and	r13,-4
4477	mov	r14,r9
4478	shrd	r15,r9,2
4479	shr	r9,2
4480	add	r15,r13
4481	adc	r9,r14
4482	add	r10,r15
4483	adc	r11,r9
4484	adc	r12,0
4485	vpxor	ymm4,ymm4,ymm8
4486	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4487	vpsrld	ymm8,ymm7,20
4488	vpslld	ymm7,ymm7,32-20
4489	vpxor	ymm7,ymm7,ymm8
4490	vpsrld	ymm8,ymm6,20
4491	vpslld	ymm6,ymm6,32-20
4492	vpxor	ymm6,ymm6,ymm8
4493	add	r10,QWORD[((0+32))+rcx*1+rsi]
4494	adc	r11,QWORD[((8+32))+rcx*1+rsi]
4495	adc	r12,1
4496
4497	lea	rcx,[48+rcx]
4498	vpsrld	ymm8,ymm5,20
4499	vpslld	ymm5,ymm5,32-20
4500	vpxor	ymm5,ymm5,ymm8
4501	vpsrld	ymm8,ymm4,20
4502	vpslld	ymm4,ymm4,32-20
4503	vpxor	ymm4,ymm4,ymm8
4504	vmovdqa	ymm8,YMMWORD[$L$rol8]
4505	vpaddd	ymm3,ymm3,ymm7
4506	vpaddd	ymm2,ymm2,ymm6
4507	vpaddd	ymm1,ymm1,ymm5
4508	vpaddd	ymm0,ymm0,ymm4
4509	vpxor	ymm15,ymm15,ymm3
4510	vpxor	ymm14,ymm14,ymm2
4511	vpxor	ymm13,ymm13,ymm1
4512	vpxor	ymm12,ymm12,ymm0
4513	vpshufb	ymm15,ymm15,ymm8
4514	vpshufb	ymm14,ymm14,ymm8
4515	vpshufb	ymm13,ymm13,ymm8
4516	mov	rdx,QWORD[((0+160+0))+rbp]
4517	mov	r15,rdx
4518	mulx	r14,r13,r10
4519	mulx	rdx,rax,r11
4520	imul	r15,r12
4521	add	r14,rax
4522	adc	r15,rdx
4523	vpshufb	ymm12,ymm12,ymm8
4524	vpaddd	ymm11,ymm11,ymm15
4525	vpaddd	ymm10,ymm10,ymm14
4526	vpaddd	ymm9,ymm9,ymm13
4527	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
4528	vpxor	ymm7,ymm7,ymm11
4529	vpxor	ymm6,ymm6,ymm10
4530	vpxor	ymm5,ymm5,ymm9
4531	mov	rdx,QWORD[((8+160+0))+rbp]
4532	mulx	rax,r10,r10
4533	add	r14,r10
4534	mulx	r9,r11,r11
4535	adc	r15,r11
4536	adc	r9,0
4537	imul	rdx,r12
4538	vpxor	ymm4,ymm4,ymm8
4539	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
4540	vpsrld	ymm8,ymm7,25
4541	vpslld	ymm7,ymm7,32-25
4542	vpxor	ymm7,ymm7,ymm8
4543	vpsrld	ymm8,ymm6,25
4544	vpslld	ymm6,ymm6,32-25
4545	vpxor	ymm6,ymm6,ymm8
4546	add	r15,rax
4547	adc	r9,rdx
4548	vpsrld	ymm8,ymm5,25
4549	vpslld	ymm5,ymm5,32-25
4550	vpxor	ymm5,ymm5,ymm8
4551	vpsrld	ymm8,ymm4,25
4552	vpslld	ymm4,ymm4,32-25
4553	vpxor	ymm4,ymm4,ymm8
4554	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
4555	vpalignr	ymm7,ymm7,ymm7,12
4556	vpalignr	ymm11,ymm11,ymm11,8
4557	vpalignr	ymm15,ymm15,ymm15,4
4558	vpalignr	ymm6,ymm6,ymm6,12
4559	vpalignr	ymm10,ymm10,ymm10,8
4560	vpalignr	ymm14,ymm14,ymm14,4
4561	vpalignr	ymm5,ymm5,ymm5,12
4562	vpalignr	ymm9,ymm9,ymm9,8
4563	vpalignr	ymm13,ymm13,ymm13,4
4564	vpalignr	ymm4,ymm4,ymm4,12
4565	vpalignr	ymm8,ymm8,ymm8,8
4566	mov	r10,r13
4567	mov	r11,r14
4568	mov	r12,r15
4569	and	r12,3
4570	mov	r13,r15
4571	and	r13,-4
4572	mov	r14,r9
4573	shrd	r15,r9,2
4574	shr	r9,2
4575	add	r15,r13
4576	adc	r9,r14
4577	add	r10,r15
4578	adc	r11,r9
4579	adc	r12,0
4580	vpalignr	ymm12,ymm12,ymm12,4
4581
4582	cmp	rcx,10*6*8
4583	jne	NEAR $L$open_avx2_main_loop_rounds
4584	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
4585	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
4586	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
4587	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
4588	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
4589	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
4590	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
4591	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
4592	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
4593	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
4594	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
4595	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
4596	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4597	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4598	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4599	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4600
4601	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
4602	add	r10,QWORD[((0+480))+rsi]
4603	adc	r11,QWORD[((8+480))+rsi]
4604	adc	r12,1
4605	vperm2i128	ymm0,ymm7,ymm3,0x02
4606	vperm2i128	ymm7,ymm7,ymm3,0x13
4607	vperm2i128	ymm3,ymm15,ymm11,0x02
4608	vperm2i128	ymm11,ymm15,ymm11,0x13
4609	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
4610	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
4611	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
4612	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
4613	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
4614	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
4615	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
4616	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
4617
4618	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
4619	mov	rax,QWORD[((0+160+0))+rbp]
4620	mov	r15,rax
4621	mul	r10
4622	mov	r13,rax
4623	mov	r14,rdx
4624	mov	rax,QWORD[((0+160+0))+rbp]
4625	mul	r11
4626	imul	r15,r12
4627	add	r14,rax
4628	adc	r15,rdx
4629	mov	rax,QWORD[((8+160+0))+rbp]
4630	mov	r9,rax
4631	mul	r10
4632	add	r14,rax
4633	adc	rdx,0
4634	mov	r10,rdx
4635	mov	rax,QWORD[((8+160+0))+rbp]
4636	mul	r11
4637	add	r15,rax
4638	adc	rdx,0
4639	imul	r9,r12
4640	add	r15,r10
4641	adc	r9,rdx
4642	mov	r10,r13
4643	mov	r11,r14
4644	mov	r12,r15
4645	and	r12,3
4646	mov	r13,r15
4647	and	r13,-4
4648	mov	r14,r9
4649	shrd	r15,r9,2
4650	shr	r9,2
4651	add	r15,r13
4652	adc	r9,r14
4653	add	r10,r15
4654	adc	r11,r9
4655	adc	r12,0
4656	vperm2i128	ymm3,ymm6,ymm2,0x02
4657	vperm2i128	ymm6,ymm6,ymm2,0x13
4658	vperm2i128	ymm2,ymm14,ymm10,0x02
4659	vperm2i128	ymm10,ymm14,ymm10,0x13
4660	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
4661	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
4662	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
4663	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
4664	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
4665	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
4666	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
4667	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
4668	add	r10,QWORD[((0+480+16))+rsi]
4669	adc	r11,QWORD[((8+480+16))+rsi]
4670	adc	r12,1
4671	vperm2i128	ymm3,ymm5,ymm1,0x02
4672	vperm2i128	ymm5,ymm5,ymm1,0x13
4673	vperm2i128	ymm1,ymm13,ymm9,0x02
4674	vperm2i128	ymm9,ymm13,ymm9,0x13
4675	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
4676	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
4677	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
4678	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
4679	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
4680	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
4681	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
4682	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
4683	mov	rax,QWORD[((0+160+0))+rbp]
4684	mov	r15,rax
4685	mul	r10
4686	mov	r13,rax
4687	mov	r14,rdx
4688	mov	rax,QWORD[((0+160+0))+rbp]
4689	mul	r11
4690	imul	r15,r12
4691	add	r14,rax
4692	adc	r15,rdx
4693	mov	rax,QWORD[((8+160+0))+rbp]
4694	mov	r9,rax
4695	mul	r10
4696	add	r14,rax
4697	adc	rdx,0
4698	mov	r10,rdx
4699	mov	rax,QWORD[((8+160+0))+rbp]
4700	mul	r11
4701	add	r15,rax
4702	adc	rdx,0
4703	imul	r9,r12
4704	add	r15,r10
4705	adc	r9,rdx
4706	mov	r10,r13
4707	mov	r11,r14
4708	mov	r12,r15
4709	and	r12,3
4710	mov	r13,r15
4711	and	r13,-4
4712	mov	r14,r9
4713	shrd	r15,r9,2
4714	shr	r9,2
4715	add	r15,r13
4716	adc	r9,r14
4717	add	r10,r15
4718	adc	r11,r9
4719	adc	r12,0
4720	vperm2i128	ymm3,ymm4,ymm0,0x02
4721	vperm2i128	ymm4,ymm4,ymm0,0x13
4722	vperm2i128	ymm0,ymm12,ymm8,0x02
4723	vperm2i128	ymm8,ymm12,ymm8,0x13
4724	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
4725	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
4726	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
4727	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
4728	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
4729	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
4730	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
4731	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
4732
4733	lea	rsi,[512+rsi]
4734	lea	rdi,[512+rdi]
4735	sub	rbx,16*32
4736	jmp	NEAR $L$open_avx2_main_loop
4737$L$open_avx2_main_loop_done:
4738	test	rbx,rbx
4739	vzeroupper
4740	je	NEAR $L$open_sse_finalize
4741
4742	cmp	rbx,12*32
4743	ja	NEAR $L$open_avx2_tail_512
4744	cmp	rbx,8*32
4745	ja	NEAR $L$open_avx2_tail_384
4746	cmp	rbx,4*32
4747	ja	NEAR $L$open_avx2_tail_256
4748	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4749	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4750	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4751	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4752	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4753	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4754
4755	xor	r8,r8
4756	mov	rcx,rbx
4757	and	rcx,-16
4758	test	rcx,rcx
4759	je	NEAR $L$open_avx2_tail_128_rounds
4760$L$open_avx2_tail_128_rounds_and_x1hash:
4761	add	r10,QWORD[((0+0))+r8*1+rsi]
4762	adc	r11,QWORD[((8+0))+r8*1+rsi]
4763	adc	r12,1
4764	mov	rax,QWORD[((0+160+0))+rbp]
4765	mov	r15,rax
4766	mul	r10
4767	mov	r13,rax
4768	mov	r14,rdx
4769	mov	rax,QWORD[((0+160+0))+rbp]
4770	mul	r11
4771	imul	r15,r12
4772	add	r14,rax
4773	adc	r15,rdx
4774	mov	rax,QWORD[((8+160+0))+rbp]
4775	mov	r9,rax
4776	mul	r10
4777	add	r14,rax
4778	adc	rdx,0
4779	mov	r10,rdx
4780	mov	rax,QWORD[((8+160+0))+rbp]
4781	mul	r11
4782	add	r15,rax
4783	adc	rdx,0
4784	imul	r9,r12
4785	add	r15,r10
4786	adc	r9,rdx
4787	mov	r10,r13
4788	mov	r11,r14
4789	mov	r12,r15
4790	and	r12,3
4791	mov	r13,r15
4792	and	r13,-4
4793	mov	r14,r9
4794	shrd	r15,r9,2
4795	shr	r9,2
4796	add	r15,r13
4797	adc	r9,r14
4798	add	r10,r15
4799	adc	r11,r9
4800	adc	r12,0
4801
4802$L$open_avx2_tail_128_rounds:
4803	add	r8,16
4804	vpaddd	ymm0,ymm0,ymm4
4805	vpxor	ymm12,ymm12,ymm0
4806	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4807	vpaddd	ymm8,ymm8,ymm12
4808	vpxor	ymm4,ymm4,ymm8
4809	vpsrld	ymm3,ymm4,20
4810	vpslld	ymm4,ymm4,12
4811	vpxor	ymm4,ymm4,ymm3
4812	vpaddd	ymm0,ymm0,ymm4
4813	vpxor	ymm12,ymm12,ymm0
4814	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4815	vpaddd	ymm8,ymm8,ymm12
4816	vpxor	ymm4,ymm4,ymm8
4817	vpslld	ymm3,ymm4,7
4818	vpsrld	ymm4,ymm4,25
4819	vpxor	ymm4,ymm4,ymm3
4820	vpalignr	ymm12,ymm12,ymm12,12
4821	vpalignr	ymm8,ymm8,ymm8,8
4822	vpalignr	ymm4,ymm4,ymm4,4
4823	vpaddd	ymm0,ymm0,ymm4
4824	vpxor	ymm12,ymm12,ymm0
4825	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4826	vpaddd	ymm8,ymm8,ymm12
4827	vpxor	ymm4,ymm4,ymm8
4828	vpsrld	ymm3,ymm4,20
4829	vpslld	ymm4,ymm4,12
4830	vpxor	ymm4,ymm4,ymm3
4831	vpaddd	ymm0,ymm0,ymm4
4832	vpxor	ymm12,ymm12,ymm0
4833	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4834	vpaddd	ymm8,ymm8,ymm12
4835	vpxor	ymm4,ymm4,ymm8
4836	vpslld	ymm3,ymm4,7
4837	vpsrld	ymm4,ymm4,25
4838	vpxor	ymm4,ymm4,ymm3
4839	vpalignr	ymm12,ymm12,ymm12,4
4840	vpalignr	ymm8,ymm8,ymm8,8
4841	vpalignr	ymm4,ymm4,ymm4,12
4842
4843	cmp	r8,rcx
4844	jb	NEAR $L$open_avx2_tail_128_rounds_and_x1hash
4845	cmp	r8,160
4846	jne	NEAR $L$open_avx2_tail_128_rounds
4847	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
4848	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
4849	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
4850	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
4851	vperm2i128	ymm3,ymm4,ymm0,0x13
4852	vperm2i128	ymm0,ymm4,ymm0,0x02
4853	vperm2i128	ymm4,ymm12,ymm8,0x02
4854	vperm2i128	ymm12,ymm12,ymm8,0x13
4855	vmovdqa	ymm8,ymm3
4856
4857	jmp	NEAR $L$open_avx2_tail_128_xor
4858
4859$L$open_avx2_tail_256:
4860	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
4861	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
4862	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
4863	vmovdqa	ymm1,ymm0
4864	vmovdqa	ymm5,ymm4
4865	vmovdqa	ymm9,ymm8
4866	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
4867	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
4868	vpaddd	ymm12,ymm12,ymm13
4869	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
4870	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
4871
4872	mov	QWORD[((160+128))+rbp],rbx
4873	mov	rcx,rbx
4874	sub	rcx,4*32
4875	shr	rcx,4
4876	mov	r8,10
4877	cmp	rcx,10
4878	cmovg	rcx,r8
4879	mov	rbx,rsi
4880	xor	r8,r8
4881$L$open_avx2_tail_256_rounds_and_x1hash:
4882	add	r10,QWORD[((0+0))+rbx]
4883	adc	r11,QWORD[((8+0))+rbx]
4884	adc	r12,1
4885	mov	rdx,QWORD[((0+160+0))+rbp]
4886	mov	r15,rdx
4887	mulx	r14,r13,r10
4888	mulx	rdx,rax,r11
4889	imul	r15,r12
4890	add	r14,rax
4891	adc	r15,rdx
4892	mov	rdx,QWORD[((8+160+0))+rbp]
4893	mulx	rax,r10,r10
4894	add	r14,r10
4895	mulx	r9,r11,r11
4896	adc	r15,r11
4897	adc	r9,0
4898	imul	rdx,r12
4899	add	r15,rax
4900	adc	r9,rdx
4901	mov	r10,r13
4902	mov	r11,r14
4903	mov	r12,r15
4904	and	r12,3
4905	mov	r13,r15
4906	and	r13,-4
4907	mov	r14,r9
4908	shrd	r15,r9,2
4909	shr	r9,2
4910	add	r15,r13
4911	adc	r9,r14
4912	add	r10,r15
4913	adc	r11,r9
4914	adc	r12,0
4915
4916	lea	rbx,[16+rbx]
4917$L$open_avx2_tail_256_rounds:
4918	vpaddd	ymm0,ymm0,ymm4
4919	vpxor	ymm12,ymm12,ymm0
4920	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4921	vpaddd	ymm8,ymm8,ymm12
4922	vpxor	ymm4,ymm4,ymm8
4923	vpsrld	ymm3,ymm4,20
4924	vpslld	ymm4,ymm4,12
4925	vpxor	ymm4,ymm4,ymm3
4926	vpaddd	ymm0,ymm0,ymm4
4927	vpxor	ymm12,ymm12,ymm0
4928	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4929	vpaddd	ymm8,ymm8,ymm12
4930	vpxor	ymm4,ymm4,ymm8
4931	vpslld	ymm3,ymm4,7
4932	vpsrld	ymm4,ymm4,25
4933	vpxor	ymm4,ymm4,ymm3
4934	vpalignr	ymm12,ymm12,ymm12,12
4935	vpalignr	ymm8,ymm8,ymm8,8
4936	vpalignr	ymm4,ymm4,ymm4,4
4937	vpaddd	ymm1,ymm1,ymm5
4938	vpxor	ymm13,ymm13,ymm1
4939	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4940	vpaddd	ymm9,ymm9,ymm13
4941	vpxor	ymm5,ymm5,ymm9
4942	vpsrld	ymm3,ymm5,20
4943	vpslld	ymm5,ymm5,12
4944	vpxor	ymm5,ymm5,ymm3
4945	vpaddd	ymm1,ymm1,ymm5
4946	vpxor	ymm13,ymm13,ymm1
4947	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
4948	vpaddd	ymm9,ymm9,ymm13
4949	vpxor	ymm5,ymm5,ymm9
4950	vpslld	ymm3,ymm5,7
4951	vpsrld	ymm5,ymm5,25
4952	vpxor	ymm5,ymm5,ymm3
4953	vpalignr	ymm13,ymm13,ymm13,12
4954	vpalignr	ymm9,ymm9,ymm9,8
4955	vpalignr	ymm5,ymm5,ymm5,4
4956
4957	inc	r8
4958	vpaddd	ymm0,ymm0,ymm4
4959	vpxor	ymm12,ymm12,ymm0
4960	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
4961	vpaddd	ymm8,ymm8,ymm12
4962	vpxor	ymm4,ymm4,ymm8
4963	vpsrld	ymm3,ymm4,20
4964	vpslld	ymm4,ymm4,12
4965	vpxor	ymm4,ymm4,ymm3
4966	vpaddd	ymm0,ymm0,ymm4
4967	vpxor	ymm12,ymm12,ymm0
4968	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
4969	vpaddd	ymm8,ymm8,ymm12
4970	vpxor	ymm4,ymm4,ymm8
4971	vpslld	ymm3,ymm4,7
4972	vpsrld	ymm4,ymm4,25
4973	vpxor	ymm4,ymm4,ymm3
4974	vpalignr	ymm12,ymm12,ymm12,4
4975	vpalignr	ymm8,ymm8,ymm8,8
4976	vpalignr	ymm4,ymm4,ymm4,12
4977	vpaddd	ymm1,ymm1,ymm5
4978	vpxor	ymm13,ymm13,ymm1
4979	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
4980	vpaddd	ymm9,ymm9,ymm13
4981	vpxor	ymm5,ymm5,ymm9
4982	vpsrld	ymm3,ymm5,20
4983	vpslld	ymm5,ymm5,12
4984	vpxor	ymm5,ymm5,ymm3
4985	vpaddd	ymm1,ymm1,ymm5
4986	vpxor	ymm13,ymm13,ymm1
4987	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
4988	vpaddd	ymm9,ymm9,ymm13
4989	vpxor	ymm5,ymm5,ymm9
4990	vpslld	ymm3,ymm5,7
4991	vpsrld	ymm5,ymm5,25
4992	vpxor	ymm5,ymm5,ymm3
4993	vpalignr	ymm13,ymm13,ymm13,4
4994	vpalignr	ymm9,ymm9,ymm9,8
4995	vpalignr	ymm5,ymm5,ymm5,12
4996	vpaddd	ymm2,ymm2,ymm6
4997	vpxor	ymm14,ymm14,ymm2
4998	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
4999	vpaddd	ymm10,ymm10,ymm14
5000	vpxor	ymm6,ymm6,ymm10
5001	vpsrld	ymm3,ymm6,20
5002	vpslld	ymm6,ymm6,12
5003	vpxor	ymm6,ymm6,ymm3
5004	vpaddd	ymm2,ymm2,ymm6
5005	vpxor	ymm14,ymm14,ymm2
5006	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5007	vpaddd	ymm10,ymm10,ymm14
5008	vpxor	ymm6,ymm6,ymm10
5009	vpslld	ymm3,ymm6,7
5010	vpsrld	ymm6,ymm6,25
5011	vpxor	ymm6,ymm6,ymm3
5012	vpalignr	ymm14,ymm14,ymm14,4
5013	vpalignr	ymm10,ymm10,ymm10,8
5014	vpalignr	ymm6,ymm6,ymm6,12
5015
5016	cmp	r8,rcx
5017	jb	NEAR $L$open_avx2_tail_256_rounds_and_x1hash
5018	cmp	r8,10
5019	jne	NEAR $L$open_avx2_tail_256_rounds
5020	mov	r8,rbx
5021	sub	rbx,rsi
5022	mov	rcx,rbx
5023	mov	rbx,QWORD[((160+128))+rbp]
5024$L$open_avx2_tail_256_hash:
5025	add	rcx,16
5026	cmp	rcx,rbx
5027	jg	NEAR $L$open_avx2_tail_256_done
5028	add	r10,QWORD[((0+0))+r8]
5029	adc	r11,QWORD[((8+0))+r8]
5030	adc	r12,1
5031	mov	rdx,QWORD[((0+160+0))+rbp]
5032	mov	r15,rdx
5033	mulx	r14,r13,r10
5034	mulx	rdx,rax,r11
5035	imul	r15,r12
5036	add	r14,rax
5037	adc	r15,rdx
5038	mov	rdx,QWORD[((8+160+0))+rbp]
5039	mulx	rax,r10,r10
5040	add	r14,r10
5041	mulx	r9,r11,r11
5042	adc	r15,r11
5043	adc	r9,0
5044	imul	rdx,r12
5045	add	r15,rax
5046	adc	r9,rdx
5047	mov	r10,r13
5048	mov	r11,r14
5049	mov	r12,r15
5050	and	r12,3
5051	mov	r13,r15
5052	and	r13,-4
5053	mov	r14,r9
5054	shrd	r15,r9,2
5055	shr	r9,2
5056	add	r15,r13
5057	adc	r9,r14
5058	add	r10,r15
5059	adc	r11,r9
5060	adc	r12,0
5061
5062	lea	r8,[16+r8]
5063	jmp	NEAR $L$open_avx2_tail_256_hash
5064$L$open_avx2_tail_256_done:
5065	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5066	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5067	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5068	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5069	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5070	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5071	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5072	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5073	vperm2i128	ymm3,ymm5,ymm1,0x02
5074	vperm2i128	ymm5,ymm5,ymm1,0x13
5075	vperm2i128	ymm1,ymm13,ymm9,0x02
5076	vperm2i128	ymm9,ymm13,ymm9,0x13
5077	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5078	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
5079	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
5080	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
5081	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5082	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
5083	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
5084	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
5085	vperm2i128	ymm3,ymm4,ymm0,0x13
5086	vperm2i128	ymm0,ymm4,ymm0,0x02
5087	vperm2i128	ymm4,ymm12,ymm8,0x02
5088	vperm2i128	ymm12,ymm12,ymm8,0x13
5089	vmovdqa	ymm8,ymm3
5090
5091	lea	rsi,[128+rsi]
5092	lea	rdi,[128+rdi]
5093	sub	rbx,4*32
5094	jmp	NEAR $L$open_avx2_tail_128_xor
5095
5096$L$open_avx2_tail_384:
5097	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5098	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5099	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5100	vmovdqa	ymm1,ymm0
5101	vmovdqa	ymm5,ymm4
5102	vmovdqa	ymm9,ymm8
5103	vmovdqa	ymm2,ymm0
5104	vmovdqa	ymm6,ymm4
5105	vmovdqa	ymm10,ymm8
5106	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5107	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
5108	vpaddd	ymm13,ymm12,ymm14
5109	vpaddd	ymm12,ymm12,ymm13
5110	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5111	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5112	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5113
5114	mov	QWORD[((160+128))+rbp],rbx
5115	mov	rcx,rbx
5116	sub	rcx,8*32
5117	shr	rcx,4
5118	add	rcx,6
5119	mov	r8,10
5120	cmp	rcx,10
5121	cmovg	rcx,r8
5122	mov	rbx,rsi
5123	xor	r8,r8
5124$L$open_avx2_tail_384_rounds_and_x2hash:
5125	add	r10,QWORD[((0+0))+rbx]
5126	adc	r11,QWORD[((8+0))+rbx]
5127	adc	r12,1
5128	mov	rdx,QWORD[((0+160+0))+rbp]
5129	mov	r15,rdx
5130	mulx	r14,r13,r10
5131	mulx	rdx,rax,r11
5132	imul	r15,r12
5133	add	r14,rax
5134	adc	r15,rdx
5135	mov	rdx,QWORD[((8+160+0))+rbp]
5136	mulx	rax,r10,r10
5137	add	r14,r10
5138	mulx	r9,r11,r11
5139	adc	r15,r11
5140	adc	r9,0
5141	imul	rdx,r12
5142	add	r15,rax
5143	adc	r9,rdx
5144	mov	r10,r13
5145	mov	r11,r14
5146	mov	r12,r15
5147	and	r12,3
5148	mov	r13,r15
5149	and	r13,-4
5150	mov	r14,r9
5151	shrd	r15,r9,2
5152	shr	r9,2
5153	add	r15,r13
5154	adc	r9,r14
5155	add	r10,r15
5156	adc	r11,r9
5157	adc	r12,0
5158
5159	lea	rbx,[16+rbx]
5160$L$open_avx2_tail_384_rounds_and_x1hash:
5161	vpaddd	ymm2,ymm2,ymm6
5162	vpxor	ymm14,ymm14,ymm2
5163	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5164	vpaddd	ymm10,ymm10,ymm14
5165	vpxor	ymm6,ymm6,ymm10
5166	vpsrld	ymm3,ymm6,20
5167	vpslld	ymm6,ymm6,12
5168	vpxor	ymm6,ymm6,ymm3
5169	vpaddd	ymm2,ymm2,ymm6
5170	vpxor	ymm14,ymm14,ymm2
5171	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5172	vpaddd	ymm10,ymm10,ymm14
5173	vpxor	ymm6,ymm6,ymm10
5174	vpslld	ymm3,ymm6,7
5175	vpsrld	ymm6,ymm6,25
5176	vpxor	ymm6,ymm6,ymm3
5177	vpalignr	ymm14,ymm14,ymm14,12
5178	vpalignr	ymm10,ymm10,ymm10,8
5179	vpalignr	ymm6,ymm6,ymm6,4
5180	vpaddd	ymm1,ymm1,ymm5
5181	vpxor	ymm13,ymm13,ymm1
5182	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5183	vpaddd	ymm9,ymm9,ymm13
5184	vpxor	ymm5,ymm5,ymm9
5185	vpsrld	ymm3,ymm5,20
5186	vpslld	ymm5,ymm5,12
5187	vpxor	ymm5,ymm5,ymm3
5188	vpaddd	ymm1,ymm1,ymm5
5189	vpxor	ymm13,ymm13,ymm1
5190	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5191	vpaddd	ymm9,ymm9,ymm13
5192	vpxor	ymm5,ymm5,ymm9
5193	vpslld	ymm3,ymm5,7
5194	vpsrld	ymm5,ymm5,25
5195	vpxor	ymm5,ymm5,ymm3
5196	vpalignr	ymm13,ymm13,ymm13,12
5197	vpalignr	ymm9,ymm9,ymm9,8
5198	vpalignr	ymm5,ymm5,ymm5,4
5199	vpaddd	ymm0,ymm0,ymm4
5200	vpxor	ymm12,ymm12,ymm0
5201	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5202	vpaddd	ymm8,ymm8,ymm12
5203	vpxor	ymm4,ymm4,ymm8
5204	vpsrld	ymm3,ymm4,20
5205	vpslld	ymm4,ymm4,12
5206	vpxor	ymm4,ymm4,ymm3
5207	vpaddd	ymm0,ymm0,ymm4
5208	vpxor	ymm12,ymm12,ymm0
5209	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5210	vpaddd	ymm8,ymm8,ymm12
5211	vpxor	ymm4,ymm4,ymm8
5212	vpslld	ymm3,ymm4,7
5213	vpsrld	ymm4,ymm4,25
5214	vpxor	ymm4,ymm4,ymm3
5215	vpalignr	ymm12,ymm12,ymm12,12
5216	vpalignr	ymm8,ymm8,ymm8,8
5217	vpalignr	ymm4,ymm4,ymm4,4
5218	add	r10,QWORD[((0+0))+rbx]
5219	adc	r11,QWORD[((8+0))+rbx]
5220	adc	r12,1
5221	mov	rax,QWORD[((0+160+0))+rbp]
5222	mov	r15,rax
5223	mul	r10
5224	mov	r13,rax
5225	mov	r14,rdx
5226	mov	rax,QWORD[((0+160+0))+rbp]
5227	mul	r11
5228	imul	r15,r12
5229	add	r14,rax
5230	adc	r15,rdx
5231	mov	rax,QWORD[((8+160+0))+rbp]
5232	mov	r9,rax
5233	mul	r10
5234	add	r14,rax
5235	adc	rdx,0
5236	mov	r10,rdx
5237	mov	rax,QWORD[((8+160+0))+rbp]
5238	mul	r11
5239	add	r15,rax
5240	adc	rdx,0
5241	imul	r9,r12
5242	add	r15,r10
5243	adc	r9,rdx
5244	mov	r10,r13
5245	mov	r11,r14
5246	mov	r12,r15
5247	and	r12,3
5248	mov	r13,r15
5249	and	r13,-4
5250	mov	r14,r9
5251	shrd	r15,r9,2
5252	shr	r9,2
5253	add	r15,r13
5254	adc	r9,r14
5255	add	r10,r15
5256	adc	r11,r9
5257	adc	r12,0
5258
5259	lea	rbx,[16+rbx]
5260	inc	r8
5261	vpaddd	ymm2,ymm2,ymm6
5262	vpxor	ymm14,ymm14,ymm2
5263	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
5264	vpaddd	ymm10,ymm10,ymm14
5265	vpxor	ymm6,ymm6,ymm10
5266	vpsrld	ymm3,ymm6,20
5267	vpslld	ymm6,ymm6,12
5268	vpxor	ymm6,ymm6,ymm3
5269	vpaddd	ymm2,ymm2,ymm6
5270	vpxor	ymm14,ymm14,ymm2
5271	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
5272	vpaddd	ymm10,ymm10,ymm14
5273	vpxor	ymm6,ymm6,ymm10
5274	vpslld	ymm3,ymm6,7
5275	vpsrld	ymm6,ymm6,25
5276	vpxor	ymm6,ymm6,ymm3
5277	vpalignr	ymm14,ymm14,ymm14,4
5278	vpalignr	ymm10,ymm10,ymm10,8
5279	vpalignr	ymm6,ymm6,ymm6,12
5280	vpaddd	ymm1,ymm1,ymm5
5281	vpxor	ymm13,ymm13,ymm1
5282	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5283	vpaddd	ymm9,ymm9,ymm13
5284	vpxor	ymm5,ymm5,ymm9
5285	vpsrld	ymm3,ymm5,20
5286	vpslld	ymm5,ymm5,12
5287	vpxor	ymm5,ymm5,ymm3
5288	vpaddd	ymm1,ymm1,ymm5
5289	vpxor	ymm13,ymm13,ymm1
5290	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5291	vpaddd	ymm9,ymm9,ymm13
5292	vpxor	ymm5,ymm5,ymm9
5293	vpslld	ymm3,ymm5,7
5294	vpsrld	ymm5,ymm5,25
5295	vpxor	ymm5,ymm5,ymm3
5296	vpalignr	ymm13,ymm13,ymm13,4
5297	vpalignr	ymm9,ymm9,ymm9,8
5298	vpalignr	ymm5,ymm5,ymm5,12
5299	vpaddd	ymm0,ymm0,ymm4
5300	vpxor	ymm12,ymm12,ymm0
5301	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5302	vpaddd	ymm8,ymm8,ymm12
5303	vpxor	ymm4,ymm4,ymm8
5304	vpsrld	ymm3,ymm4,20
5305	vpslld	ymm4,ymm4,12
5306	vpxor	ymm4,ymm4,ymm3
5307	vpaddd	ymm0,ymm0,ymm4
5308	vpxor	ymm12,ymm12,ymm0
5309	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5310	vpaddd	ymm8,ymm8,ymm12
5311	vpxor	ymm4,ymm4,ymm8
5312	vpslld	ymm3,ymm4,7
5313	vpsrld	ymm4,ymm4,25
5314	vpxor	ymm4,ymm4,ymm3
5315	vpalignr	ymm12,ymm12,ymm12,4
5316	vpalignr	ymm8,ymm8,ymm8,8
5317	vpalignr	ymm4,ymm4,ymm4,12
5318
5319	cmp	r8,rcx
5320	jb	NEAR $L$open_avx2_tail_384_rounds_and_x2hash
5321	cmp	r8,10
5322	jne	NEAR $L$open_avx2_tail_384_rounds_and_x1hash
5323	mov	r8,rbx
5324	sub	rbx,rsi
5325	mov	rcx,rbx
5326	mov	rbx,QWORD[((160+128))+rbp]
5327$L$open_avx2_384_tail_hash:
5328	add	rcx,16
5329	cmp	rcx,rbx
5330	jg	NEAR $L$open_avx2_384_tail_done
5331	add	r10,QWORD[((0+0))+r8]
5332	adc	r11,QWORD[((8+0))+r8]
5333	adc	r12,1
5334	mov	rdx,QWORD[((0+160+0))+rbp]
5335	mov	r15,rdx
5336	mulx	r14,r13,r10
5337	mulx	rdx,rax,r11
5338	imul	r15,r12
5339	add	r14,rax
5340	adc	r15,rdx
5341	mov	rdx,QWORD[((8+160+0))+rbp]
5342	mulx	rax,r10,r10
5343	add	r14,r10
5344	mulx	r9,r11,r11
5345	adc	r15,r11
5346	adc	r9,0
5347	imul	rdx,r12
5348	add	r15,rax
5349	adc	r9,rdx
5350	mov	r10,r13
5351	mov	r11,r14
5352	mov	r12,r15
5353	and	r12,3
5354	mov	r13,r15
5355	and	r13,-4
5356	mov	r14,r9
5357	shrd	r15,r9,2
5358	shr	r9,2
5359	add	r15,r13
5360	adc	r9,r14
5361	add	r10,r15
5362	adc	r11,r9
5363	adc	r12,0
5364
5365	lea	r8,[16+r8]
5366	jmp	NEAR $L$open_avx2_384_tail_hash
5367$L$open_avx2_384_tail_done:
5368	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5369	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5370	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5371	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5372	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5373	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5374	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5375	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5376	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5377	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5378	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5379	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5380	vperm2i128	ymm3,ymm6,ymm2,0x02
5381	vperm2i128	ymm6,ymm6,ymm2,0x13
5382	vperm2i128	ymm2,ymm14,ymm10,0x02
5383	vperm2i128	ymm10,ymm14,ymm10,0x13
5384	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
5385	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
5386	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
5387	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
5388	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
5389	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
5390	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
5391	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
5392	vperm2i128	ymm3,ymm5,ymm1,0x02
5393	vperm2i128	ymm5,ymm5,ymm1,0x13
5394	vperm2i128	ymm1,ymm13,ymm9,0x02
5395	vperm2i128	ymm9,ymm13,ymm9,0x13
5396	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5397	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
5398	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
5399	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
5400	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5401	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
5402	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
5403	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
5404	vperm2i128	ymm3,ymm4,ymm0,0x13
5405	vperm2i128	ymm0,ymm4,ymm0,0x02
5406	vperm2i128	ymm4,ymm12,ymm8,0x02
5407	vperm2i128	ymm12,ymm12,ymm8,0x13
5408	vmovdqa	ymm8,ymm3
5409
5410	lea	rsi,[256+rsi]
5411	lea	rdi,[256+rdi]
5412	sub	rbx,8*32
5413	jmp	NEAR $L$open_avx2_tail_128_xor
5414
5415$L$open_avx2_tail_512:
5416	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
5417	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
5418	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
5419	vmovdqa	ymm1,ymm0
5420	vmovdqa	ymm5,ymm4
5421	vmovdqa	ymm9,ymm8
5422	vmovdqa	ymm2,ymm0
5423	vmovdqa	ymm6,ymm4
5424	vmovdqa	ymm10,ymm8
5425	vmovdqa	ymm3,ymm0
5426	vmovdqa	ymm7,ymm4
5427	vmovdqa	ymm11,ymm8
5428	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
5429	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
5430	vpaddd	ymm14,ymm12,ymm15
5431	vpaddd	ymm13,ymm12,ymm14
5432	vpaddd	ymm12,ymm12,ymm13
5433	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
5434	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
5435	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
5436	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
5437
5438	xor	rcx,rcx
5439	mov	r8,rsi
5440$L$open_avx2_tail_512_rounds_and_x2hash:
5441	add	r10,QWORD[((0+0))+r8]
5442	adc	r11,QWORD[((8+0))+r8]
5443	adc	r12,1
5444	mov	rax,QWORD[((0+160+0))+rbp]
5445	mov	r15,rax
5446	mul	r10
5447	mov	r13,rax
5448	mov	r14,rdx
5449	mov	rax,QWORD[((0+160+0))+rbp]
5450	mul	r11
5451	imul	r15,r12
5452	add	r14,rax
5453	adc	r15,rdx
5454	mov	rax,QWORD[((8+160+0))+rbp]
5455	mov	r9,rax
5456	mul	r10
5457	add	r14,rax
5458	adc	rdx,0
5459	mov	r10,rdx
5460	mov	rax,QWORD[((8+160+0))+rbp]
5461	mul	r11
5462	add	r15,rax
5463	adc	rdx,0
5464	imul	r9,r12
5465	add	r15,r10
5466	adc	r9,rdx
5467	mov	r10,r13
5468	mov	r11,r14
5469	mov	r12,r15
5470	and	r12,3
5471	mov	r13,r15
5472	and	r13,-4
5473	mov	r14,r9
5474	shrd	r15,r9,2
5475	shr	r9,2
5476	add	r15,r13
5477	adc	r9,r14
5478	add	r10,r15
5479	adc	r11,r9
5480	adc	r12,0
5481
5482	lea	r8,[16+r8]
5483$L$open_avx2_tail_512_rounds_and_x1hash:
5484	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5485	vmovdqa	ymm8,YMMWORD[$L$rol16]
5486	vpaddd	ymm3,ymm3,ymm7
5487	vpaddd	ymm2,ymm2,ymm6
5488	vpaddd	ymm1,ymm1,ymm5
5489	vpaddd	ymm0,ymm0,ymm4
5490	vpxor	ymm15,ymm15,ymm3
5491	vpxor	ymm14,ymm14,ymm2
5492	vpxor	ymm13,ymm13,ymm1
5493	vpxor	ymm12,ymm12,ymm0
5494	vpshufb	ymm15,ymm15,ymm8
5495	vpshufb	ymm14,ymm14,ymm8
5496	vpshufb	ymm13,ymm13,ymm8
5497	vpshufb	ymm12,ymm12,ymm8
5498	vpaddd	ymm11,ymm11,ymm15
5499	vpaddd	ymm10,ymm10,ymm14
5500	vpaddd	ymm9,ymm9,ymm13
5501	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5502	vpxor	ymm7,ymm7,ymm11
5503	vpxor	ymm6,ymm6,ymm10
5504	vpxor	ymm5,ymm5,ymm9
5505	vpxor	ymm4,ymm4,ymm8
5506	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5507	vpsrld	ymm8,ymm7,20
5508	vpslld	ymm7,ymm7,32-20
5509	vpxor	ymm7,ymm7,ymm8
5510	vpsrld	ymm8,ymm6,20
5511	vpslld	ymm6,ymm6,32-20
5512	vpxor	ymm6,ymm6,ymm8
5513	vpsrld	ymm8,ymm5,20
5514	vpslld	ymm5,ymm5,32-20
5515	vpxor	ymm5,ymm5,ymm8
5516	vpsrld	ymm8,ymm4,20
5517	vpslld	ymm4,ymm4,32-20
5518	vpxor	ymm4,ymm4,ymm8
5519	vmovdqa	ymm8,YMMWORD[$L$rol8]
5520	vpaddd	ymm3,ymm3,ymm7
5521	add	r10,QWORD[((0+0))+r8]
5522	adc	r11,QWORD[((8+0))+r8]
5523	adc	r12,1
5524	mov	rdx,QWORD[((0+160+0))+rbp]
5525	mov	r15,rdx
5526	mulx	r14,r13,r10
5527	mulx	rdx,rax,r11
5528	imul	r15,r12
5529	add	r14,rax
5530	adc	r15,rdx
5531	mov	rdx,QWORD[((8+160+0))+rbp]
5532	mulx	rax,r10,r10
5533	add	r14,r10
5534	mulx	r9,r11,r11
5535	adc	r15,r11
5536	adc	r9,0
5537	imul	rdx,r12
5538	add	r15,rax
5539	adc	r9,rdx
5540	mov	r10,r13
5541	mov	r11,r14
5542	mov	r12,r15
5543	and	r12,3
5544	mov	r13,r15
5545	and	r13,-4
5546	mov	r14,r9
5547	shrd	r15,r9,2
5548	shr	r9,2
5549	add	r15,r13
5550	adc	r9,r14
5551	add	r10,r15
5552	adc	r11,r9
5553	adc	r12,0
5554	vpaddd	ymm2,ymm2,ymm6
5555	vpaddd	ymm1,ymm1,ymm5
5556	vpaddd	ymm0,ymm0,ymm4
5557	vpxor	ymm15,ymm15,ymm3
5558	vpxor	ymm14,ymm14,ymm2
5559	vpxor	ymm13,ymm13,ymm1
5560	vpxor	ymm12,ymm12,ymm0
5561	vpshufb	ymm15,ymm15,ymm8
5562	vpshufb	ymm14,ymm14,ymm8
5563	vpshufb	ymm13,ymm13,ymm8
5564	vpshufb	ymm12,ymm12,ymm8
5565	vpaddd	ymm11,ymm11,ymm15
5566	vpaddd	ymm10,ymm10,ymm14
5567	vpaddd	ymm9,ymm9,ymm13
5568	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5569	vpxor	ymm7,ymm7,ymm11
5570	vpxor	ymm6,ymm6,ymm10
5571	vpxor	ymm5,ymm5,ymm9
5572	vpxor	ymm4,ymm4,ymm8
5573	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5574	vpsrld	ymm8,ymm7,25
5575	vpslld	ymm7,ymm7,32-25
5576	vpxor	ymm7,ymm7,ymm8
5577	vpsrld	ymm8,ymm6,25
5578	vpslld	ymm6,ymm6,32-25
5579	vpxor	ymm6,ymm6,ymm8
5580	vpsrld	ymm8,ymm5,25
5581	vpslld	ymm5,ymm5,32-25
5582	vpxor	ymm5,ymm5,ymm8
5583	vpsrld	ymm8,ymm4,25
5584	vpslld	ymm4,ymm4,32-25
5585	vpxor	ymm4,ymm4,ymm8
5586	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5587	vpalignr	ymm7,ymm7,ymm7,4
5588	vpalignr	ymm11,ymm11,ymm11,8
5589	vpalignr	ymm15,ymm15,ymm15,12
5590	vpalignr	ymm6,ymm6,ymm6,4
5591	vpalignr	ymm10,ymm10,ymm10,8
5592	vpalignr	ymm14,ymm14,ymm14,12
5593	vpalignr	ymm5,ymm5,ymm5,4
5594	vpalignr	ymm9,ymm9,ymm9,8
5595	vpalignr	ymm13,ymm13,ymm13,12
5596	vpalignr	ymm4,ymm4,ymm4,4
5597	vpalignr	ymm8,ymm8,ymm8,8
5598	vpalignr	ymm12,ymm12,ymm12,12
5599	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5600	vmovdqa	ymm8,YMMWORD[$L$rol16]
5601	vpaddd	ymm3,ymm3,ymm7
5602	add	r10,QWORD[((0+16))+r8]
5603	adc	r11,QWORD[((8+16))+r8]
5604	adc	r12,1
5605	mov	rdx,QWORD[((0+160+0))+rbp]
5606	mov	r15,rdx
5607	mulx	r14,r13,r10
5608	mulx	rdx,rax,r11
5609	imul	r15,r12
5610	add	r14,rax
5611	adc	r15,rdx
5612	mov	rdx,QWORD[((8+160+0))+rbp]
5613	mulx	rax,r10,r10
5614	add	r14,r10
5615	mulx	r9,r11,r11
5616	adc	r15,r11
5617	adc	r9,0
5618	imul	rdx,r12
5619	add	r15,rax
5620	adc	r9,rdx
5621	mov	r10,r13
5622	mov	r11,r14
5623	mov	r12,r15
5624	and	r12,3
5625	mov	r13,r15
5626	and	r13,-4
5627	mov	r14,r9
5628	shrd	r15,r9,2
5629	shr	r9,2
5630	add	r15,r13
5631	adc	r9,r14
5632	add	r10,r15
5633	adc	r11,r9
5634	adc	r12,0
5635
5636	lea	r8,[32+r8]
5637	vpaddd	ymm2,ymm2,ymm6
5638	vpaddd	ymm1,ymm1,ymm5
5639	vpaddd	ymm0,ymm0,ymm4
5640	vpxor	ymm15,ymm15,ymm3
5641	vpxor	ymm14,ymm14,ymm2
5642	vpxor	ymm13,ymm13,ymm1
5643	vpxor	ymm12,ymm12,ymm0
5644	vpshufb	ymm15,ymm15,ymm8
5645	vpshufb	ymm14,ymm14,ymm8
5646	vpshufb	ymm13,ymm13,ymm8
5647	vpshufb	ymm12,ymm12,ymm8
5648	vpaddd	ymm11,ymm11,ymm15
5649	vpaddd	ymm10,ymm10,ymm14
5650	vpaddd	ymm9,ymm9,ymm13
5651	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5652	vpxor	ymm7,ymm7,ymm11
5653	vpxor	ymm6,ymm6,ymm10
5654	vpxor	ymm5,ymm5,ymm9
5655	vpxor	ymm4,ymm4,ymm8
5656	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5657	vpsrld	ymm8,ymm7,20
5658	vpslld	ymm7,ymm7,32-20
5659	vpxor	ymm7,ymm7,ymm8
5660	vpsrld	ymm8,ymm6,20
5661	vpslld	ymm6,ymm6,32-20
5662	vpxor	ymm6,ymm6,ymm8
5663	vpsrld	ymm8,ymm5,20
5664	vpslld	ymm5,ymm5,32-20
5665	vpxor	ymm5,ymm5,ymm8
5666	vpsrld	ymm8,ymm4,20
5667	vpslld	ymm4,ymm4,32-20
5668	vpxor	ymm4,ymm4,ymm8
5669	vmovdqa	ymm8,YMMWORD[$L$rol8]
5670	vpaddd	ymm3,ymm3,ymm7
5671	vpaddd	ymm2,ymm2,ymm6
5672	vpaddd	ymm1,ymm1,ymm5
5673	vpaddd	ymm0,ymm0,ymm4
5674	vpxor	ymm15,ymm15,ymm3
5675	vpxor	ymm14,ymm14,ymm2
5676	vpxor	ymm13,ymm13,ymm1
5677	vpxor	ymm12,ymm12,ymm0
5678	vpshufb	ymm15,ymm15,ymm8
5679	vpshufb	ymm14,ymm14,ymm8
5680	vpshufb	ymm13,ymm13,ymm8
5681	vpshufb	ymm12,ymm12,ymm8
5682	vpaddd	ymm11,ymm11,ymm15
5683	vpaddd	ymm10,ymm10,ymm14
5684	vpaddd	ymm9,ymm9,ymm13
5685	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
5686	vpxor	ymm7,ymm7,ymm11
5687	vpxor	ymm6,ymm6,ymm10
5688	vpxor	ymm5,ymm5,ymm9
5689	vpxor	ymm4,ymm4,ymm8
5690	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
5691	vpsrld	ymm8,ymm7,25
5692	vpslld	ymm7,ymm7,32-25
5693	vpxor	ymm7,ymm7,ymm8
5694	vpsrld	ymm8,ymm6,25
5695	vpslld	ymm6,ymm6,32-25
5696	vpxor	ymm6,ymm6,ymm8
5697	vpsrld	ymm8,ymm5,25
5698	vpslld	ymm5,ymm5,32-25
5699	vpxor	ymm5,ymm5,ymm8
5700	vpsrld	ymm8,ymm4,25
5701	vpslld	ymm4,ymm4,32-25
5702	vpxor	ymm4,ymm4,ymm8
5703	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
5704	vpalignr	ymm7,ymm7,ymm7,12
5705	vpalignr	ymm11,ymm11,ymm11,8
5706	vpalignr	ymm15,ymm15,ymm15,4
5707	vpalignr	ymm6,ymm6,ymm6,12
5708	vpalignr	ymm10,ymm10,ymm10,8
5709	vpalignr	ymm14,ymm14,ymm14,4
5710	vpalignr	ymm5,ymm5,ymm5,12
5711	vpalignr	ymm9,ymm9,ymm9,8
5712	vpalignr	ymm13,ymm13,ymm13,4
5713	vpalignr	ymm4,ymm4,ymm4,12
5714	vpalignr	ymm8,ymm8,ymm8,8
5715	vpalignr	ymm12,ymm12,ymm12,4
5716
5717	inc	rcx
5718	cmp	rcx,4
5719	jl	NEAR $L$open_avx2_tail_512_rounds_and_x2hash
5720	cmp	rcx,10
5721	jne	NEAR $L$open_avx2_tail_512_rounds_and_x1hash
5722	mov	rcx,rbx
5723	sub	rcx,12*32
5724	and	rcx,-16
5725$L$open_avx2_tail_512_hash:
5726	test	rcx,rcx
5727	je	NEAR $L$open_avx2_tail_512_done
5728	add	r10,QWORD[((0+0))+r8]
5729	adc	r11,QWORD[((8+0))+r8]
5730	adc	r12,1
5731	mov	rdx,QWORD[((0+160+0))+rbp]
5732	mov	r15,rdx
5733	mulx	r14,r13,r10
5734	mulx	rdx,rax,r11
5735	imul	r15,r12
5736	add	r14,rax
5737	adc	r15,rdx
5738	mov	rdx,QWORD[((8+160+0))+rbp]
5739	mulx	rax,r10,r10
5740	add	r14,r10
5741	mulx	r9,r11,r11
5742	adc	r15,r11
5743	adc	r9,0
5744	imul	rdx,r12
5745	add	r15,rax
5746	adc	r9,rdx
5747	mov	r10,r13
5748	mov	r11,r14
5749	mov	r12,r15
5750	and	r12,3
5751	mov	r13,r15
5752	and	r13,-4
5753	mov	r14,r9
5754	shrd	r15,r9,2
5755	shr	r9,2
5756	add	r15,r13
5757	adc	r9,r14
5758	add	r10,r15
5759	adc	r11,r9
5760	adc	r12,0
5761
5762	lea	r8,[16+r8]
5763	sub	rcx,2*8
5764	jmp	NEAR $L$open_avx2_tail_512_hash
5765$L$open_avx2_tail_512_done:
5766	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
5767	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
5768	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
5769	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
5770	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
5771	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
5772	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
5773	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
5774	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
5775	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
5776	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
5777	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
5778	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
5779	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
5780	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
5781	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
5782
5783	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
5784	vperm2i128	ymm0,ymm7,ymm3,0x02
5785	vperm2i128	ymm7,ymm7,ymm3,0x13
5786	vperm2i128	ymm3,ymm15,ymm11,0x02
5787	vperm2i128	ymm11,ymm15,ymm11,0x13
5788	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
5789	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
5790	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
5791	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
5792	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
5793	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
5794	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
5795	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
5796
5797	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
5798	vperm2i128	ymm3,ymm6,ymm2,0x02
5799	vperm2i128	ymm6,ymm6,ymm2,0x13
5800	vperm2i128	ymm2,ymm14,ymm10,0x02
5801	vperm2i128	ymm10,ymm14,ymm10,0x13
5802	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
5803	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
5804	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
5805	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
5806	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
5807	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
5808	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
5809	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
5810	vperm2i128	ymm3,ymm5,ymm1,0x02
5811	vperm2i128	ymm5,ymm5,ymm1,0x13
5812	vperm2i128	ymm1,ymm13,ymm9,0x02
5813	vperm2i128	ymm9,ymm13,ymm9,0x13
5814	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
5815	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
5816	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
5817	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
5818	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
5819	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
5820	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
5821	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
5822	vperm2i128	ymm3,ymm4,ymm0,0x13
5823	vperm2i128	ymm0,ymm4,ymm0,0x02
5824	vperm2i128	ymm4,ymm12,ymm8,0x02
5825	vperm2i128	ymm12,ymm12,ymm8,0x13
5826	vmovdqa	ymm8,ymm3
5827
5828	lea	rsi,[384+rsi]
5829	lea	rdi,[384+rdi]
5830	sub	rbx,12*32
5831$L$open_avx2_tail_128_xor:
5832	cmp	rbx,32
5833	jb	NEAR $L$open_avx2_tail_32_xor
5834	sub	rbx,32
5835	vpxor	ymm0,ymm0,YMMWORD[rsi]
5836	vmovdqu	YMMWORD[rdi],ymm0
5837	lea	rsi,[32+rsi]
5838	lea	rdi,[32+rdi]
5839	vmovdqa	ymm0,ymm4
5840	vmovdqa	ymm4,ymm8
5841	vmovdqa	ymm8,ymm12
5842	jmp	NEAR $L$open_avx2_tail_128_xor
5843$L$open_avx2_tail_32_xor:
5844	cmp	rbx,16
5845	vmovdqa	xmm1,xmm0
5846	jb	NEAR $L$open_avx2_exit
5847	sub	rbx,16
5848
5849	vpxor	xmm1,xmm0,XMMWORD[rsi]
5850	vmovdqu	XMMWORD[rdi],xmm1
5851	lea	rsi,[16+rsi]
5852	lea	rdi,[16+rdi]
5853	vperm2i128	ymm0,ymm0,ymm0,0x11
5854	vmovdqa	xmm1,xmm0
5855$L$open_avx2_exit:
5856	vzeroupper
5857	jmp	NEAR $L$open_sse_tail_16
5858
5859$L$open_avx2_192:
5860	vmovdqa	ymm1,ymm0
5861	vmovdqa	ymm2,ymm0
5862	vmovdqa	ymm5,ymm4
5863	vmovdqa	ymm6,ymm4
5864	vmovdqa	ymm9,ymm8
5865	vmovdqa	ymm10,ymm8
5866	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
5867	vmovdqa	ymm11,ymm12
5868	vmovdqa	ymm15,ymm13
5869	mov	r10,10
5870$L$open_avx2_192_rounds:
5871	vpaddd	ymm0,ymm0,ymm4
5872	vpxor	ymm12,ymm12,ymm0
5873	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5874	vpaddd	ymm8,ymm8,ymm12
5875	vpxor	ymm4,ymm4,ymm8
5876	vpsrld	ymm3,ymm4,20
5877	vpslld	ymm4,ymm4,12
5878	vpxor	ymm4,ymm4,ymm3
5879	vpaddd	ymm0,ymm0,ymm4
5880	vpxor	ymm12,ymm12,ymm0
5881	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5882	vpaddd	ymm8,ymm8,ymm12
5883	vpxor	ymm4,ymm4,ymm8
5884	vpslld	ymm3,ymm4,7
5885	vpsrld	ymm4,ymm4,25
5886	vpxor	ymm4,ymm4,ymm3
5887	vpalignr	ymm12,ymm12,ymm12,12
5888	vpalignr	ymm8,ymm8,ymm8,8
5889	vpalignr	ymm4,ymm4,ymm4,4
5890	vpaddd	ymm1,ymm1,ymm5
5891	vpxor	ymm13,ymm13,ymm1
5892	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5893	vpaddd	ymm9,ymm9,ymm13
5894	vpxor	ymm5,ymm5,ymm9
5895	vpsrld	ymm3,ymm5,20
5896	vpslld	ymm5,ymm5,12
5897	vpxor	ymm5,ymm5,ymm3
5898	vpaddd	ymm1,ymm1,ymm5
5899	vpxor	ymm13,ymm13,ymm1
5900	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5901	vpaddd	ymm9,ymm9,ymm13
5902	vpxor	ymm5,ymm5,ymm9
5903	vpslld	ymm3,ymm5,7
5904	vpsrld	ymm5,ymm5,25
5905	vpxor	ymm5,ymm5,ymm3
5906	vpalignr	ymm13,ymm13,ymm13,12
5907	vpalignr	ymm9,ymm9,ymm9,8
5908	vpalignr	ymm5,ymm5,ymm5,4
5909	vpaddd	ymm0,ymm0,ymm4
5910	vpxor	ymm12,ymm12,ymm0
5911	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
5912	vpaddd	ymm8,ymm8,ymm12
5913	vpxor	ymm4,ymm4,ymm8
5914	vpsrld	ymm3,ymm4,20
5915	vpslld	ymm4,ymm4,12
5916	vpxor	ymm4,ymm4,ymm3
5917	vpaddd	ymm0,ymm0,ymm4
5918	vpxor	ymm12,ymm12,ymm0
5919	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
5920	vpaddd	ymm8,ymm8,ymm12
5921	vpxor	ymm4,ymm4,ymm8
5922	vpslld	ymm3,ymm4,7
5923	vpsrld	ymm4,ymm4,25
5924	vpxor	ymm4,ymm4,ymm3
5925	vpalignr	ymm12,ymm12,ymm12,4
5926	vpalignr	ymm8,ymm8,ymm8,8
5927	vpalignr	ymm4,ymm4,ymm4,12
5928	vpaddd	ymm1,ymm1,ymm5
5929	vpxor	ymm13,ymm13,ymm1
5930	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
5931	vpaddd	ymm9,ymm9,ymm13
5932	vpxor	ymm5,ymm5,ymm9
5933	vpsrld	ymm3,ymm5,20
5934	vpslld	ymm5,ymm5,12
5935	vpxor	ymm5,ymm5,ymm3
5936	vpaddd	ymm1,ymm1,ymm5
5937	vpxor	ymm13,ymm13,ymm1
5938	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
5939	vpaddd	ymm9,ymm9,ymm13
5940	vpxor	ymm5,ymm5,ymm9
5941	vpslld	ymm3,ymm5,7
5942	vpsrld	ymm5,ymm5,25
5943	vpxor	ymm5,ymm5,ymm3
5944	vpalignr	ymm13,ymm13,ymm13,4
5945	vpalignr	ymm9,ymm9,ymm9,8
5946	vpalignr	ymm5,ymm5,ymm5,12
5947
5948	dec	r10
5949	jne	NEAR $L$open_avx2_192_rounds
5950	vpaddd	ymm0,ymm0,ymm2
5951	vpaddd	ymm1,ymm1,ymm2
5952	vpaddd	ymm4,ymm4,ymm6
5953	vpaddd	ymm5,ymm5,ymm6
5954	vpaddd	ymm8,ymm8,ymm10
5955	vpaddd	ymm9,ymm9,ymm10
5956	vpaddd	ymm12,ymm12,ymm11
5957	vpaddd	ymm13,ymm13,ymm15
5958	vperm2i128	ymm3,ymm4,ymm0,0x02
5959
5960	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
5961	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
5962
5963	vperm2i128	ymm0,ymm4,ymm0,0x13
5964	vperm2i128	ymm4,ymm12,ymm8,0x13
5965	vperm2i128	ymm8,ymm5,ymm1,0x02
5966	vperm2i128	ymm12,ymm13,ymm9,0x02
5967	vperm2i128	ymm1,ymm5,ymm1,0x13
5968	vperm2i128	ymm5,ymm13,ymm9,0x13
5969$L$open_avx2_short:
5970	mov	r8,r8
5971	call	poly_hash_ad_internal
5972$L$open_avx2_short_hash_and_xor_loop:
5973	cmp	rbx,32
5974	jb	NEAR $L$open_avx2_short_tail_32
5975	sub	rbx,32
5976	add	r10,QWORD[((0+0))+rsi]
5977	adc	r11,QWORD[((8+0))+rsi]
5978	adc	r12,1
5979	mov	rax,QWORD[((0+160+0))+rbp]
5980	mov	r15,rax
5981	mul	r10
5982	mov	r13,rax
5983	mov	r14,rdx
5984	mov	rax,QWORD[((0+160+0))+rbp]
5985	mul	r11
5986	imul	r15,r12
5987	add	r14,rax
5988	adc	r15,rdx
5989	mov	rax,QWORD[((8+160+0))+rbp]
5990	mov	r9,rax
5991	mul	r10
5992	add	r14,rax
5993	adc	rdx,0
5994	mov	r10,rdx
5995	mov	rax,QWORD[((8+160+0))+rbp]
5996	mul	r11
5997	add	r15,rax
5998	adc	rdx,0
5999	imul	r9,r12
6000	add	r15,r10
6001	adc	r9,rdx
6002	mov	r10,r13
6003	mov	r11,r14
6004	mov	r12,r15
6005	and	r12,3
6006	mov	r13,r15
6007	and	r13,-4
6008	mov	r14,r9
6009	shrd	r15,r9,2
6010	shr	r9,2
6011	add	r15,r13
6012	adc	r9,r14
6013	add	r10,r15
6014	adc	r11,r9
6015	adc	r12,0
6016	add	r10,QWORD[((0+16))+rsi]
6017	adc	r11,QWORD[((8+16))+rsi]
6018	adc	r12,1
6019	mov	rax,QWORD[((0+160+0))+rbp]
6020	mov	r15,rax
6021	mul	r10
6022	mov	r13,rax
6023	mov	r14,rdx
6024	mov	rax,QWORD[((0+160+0))+rbp]
6025	mul	r11
6026	imul	r15,r12
6027	add	r14,rax
6028	adc	r15,rdx
6029	mov	rax,QWORD[((8+160+0))+rbp]
6030	mov	r9,rax
6031	mul	r10
6032	add	r14,rax
6033	adc	rdx,0
6034	mov	r10,rdx
6035	mov	rax,QWORD[((8+160+0))+rbp]
6036	mul	r11
6037	add	r15,rax
6038	adc	rdx,0
6039	imul	r9,r12
6040	add	r15,r10
6041	adc	r9,rdx
6042	mov	r10,r13
6043	mov	r11,r14
6044	mov	r12,r15
6045	and	r12,3
6046	mov	r13,r15
6047	and	r13,-4
6048	mov	r14,r9
6049	shrd	r15,r9,2
6050	shr	r9,2
6051	add	r15,r13
6052	adc	r9,r14
6053	add	r10,r15
6054	adc	r11,r9
6055	adc	r12,0
6056
6057
6058	vpxor	ymm0,ymm0,YMMWORD[rsi]
6059	vmovdqu	YMMWORD[rdi],ymm0
6060	lea	rsi,[32+rsi]
6061	lea	rdi,[32+rdi]
6062
6063	vmovdqa	ymm0,ymm4
6064	vmovdqa	ymm4,ymm8
6065	vmovdqa	ymm8,ymm12
6066	vmovdqa	ymm12,ymm1
6067	vmovdqa	ymm1,ymm5
6068	vmovdqa	ymm5,ymm9
6069	vmovdqa	ymm9,ymm13
6070	vmovdqa	ymm13,ymm2
6071	vmovdqa	ymm2,ymm6
6072	jmp	NEAR $L$open_avx2_short_hash_and_xor_loop
6073$L$open_avx2_short_tail_32:
6074	cmp	rbx,16
6075	vmovdqa	xmm1,xmm0
6076	jb	NEAR $L$open_avx2_short_tail_32_exit
6077	sub	rbx,16
6078	add	r10,QWORD[((0+0))+rsi]
6079	adc	r11,QWORD[((8+0))+rsi]
6080	adc	r12,1
6081	mov	rax,QWORD[((0+160+0))+rbp]
6082	mov	r15,rax
6083	mul	r10
6084	mov	r13,rax
6085	mov	r14,rdx
6086	mov	rax,QWORD[((0+160+0))+rbp]
6087	mul	r11
6088	imul	r15,r12
6089	add	r14,rax
6090	adc	r15,rdx
6091	mov	rax,QWORD[((8+160+0))+rbp]
6092	mov	r9,rax
6093	mul	r10
6094	add	r14,rax
6095	adc	rdx,0
6096	mov	r10,rdx
6097	mov	rax,QWORD[((8+160+0))+rbp]
6098	mul	r11
6099	add	r15,rax
6100	adc	rdx,0
6101	imul	r9,r12
6102	add	r15,r10
6103	adc	r9,rdx
6104	mov	r10,r13
6105	mov	r11,r14
6106	mov	r12,r15
6107	and	r12,3
6108	mov	r13,r15
6109	and	r13,-4
6110	mov	r14,r9
6111	shrd	r15,r9,2
6112	shr	r9,2
6113	add	r15,r13
6114	adc	r9,r14
6115	add	r10,r15
6116	adc	r11,r9
6117	adc	r12,0
6118
6119	vpxor	xmm3,xmm0,XMMWORD[rsi]
6120	vmovdqu	XMMWORD[rdi],xmm3
6121	lea	rsi,[16+rsi]
6122	lea	rdi,[16+rdi]
6123	vextracti128	xmm1,ymm0,1
6124$L$open_avx2_short_tail_32_exit:
6125	vzeroupper
6126	jmp	NEAR $L$open_sse_tail_16
6127
6128$L$open_avx2_320:
6129	vmovdqa	ymm1,ymm0
6130	vmovdqa	ymm2,ymm0
6131	vmovdqa	ymm5,ymm4
6132	vmovdqa	ymm6,ymm4
6133	vmovdqa	ymm9,ymm8
6134	vmovdqa	ymm10,ymm8
6135	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
6136	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
6137	vmovdqa	ymm7,ymm4
6138	vmovdqa	ymm11,ymm8
6139	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6140	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6141	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6142	mov	r10,10
6143$L$open_avx2_320_rounds:
6144	vpaddd	ymm0,ymm0,ymm4
6145	vpxor	ymm12,ymm12,ymm0
6146	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6147	vpaddd	ymm8,ymm8,ymm12
6148	vpxor	ymm4,ymm4,ymm8
6149	vpsrld	ymm3,ymm4,20
6150	vpslld	ymm4,ymm4,12
6151	vpxor	ymm4,ymm4,ymm3
6152	vpaddd	ymm0,ymm0,ymm4
6153	vpxor	ymm12,ymm12,ymm0
6154	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6155	vpaddd	ymm8,ymm8,ymm12
6156	vpxor	ymm4,ymm4,ymm8
6157	vpslld	ymm3,ymm4,7
6158	vpsrld	ymm4,ymm4,25
6159	vpxor	ymm4,ymm4,ymm3
6160	vpalignr	ymm12,ymm12,ymm12,12
6161	vpalignr	ymm8,ymm8,ymm8,8
6162	vpalignr	ymm4,ymm4,ymm4,4
6163	vpaddd	ymm1,ymm1,ymm5
6164	vpxor	ymm13,ymm13,ymm1
6165	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6166	vpaddd	ymm9,ymm9,ymm13
6167	vpxor	ymm5,ymm5,ymm9
6168	vpsrld	ymm3,ymm5,20
6169	vpslld	ymm5,ymm5,12
6170	vpxor	ymm5,ymm5,ymm3
6171	vpaddd	ymm1,ymm1,ymm5
6172	vpxor	ymm13,ymm13,ymm1
6173	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6174	vpaddd	ymm9,ymm9,ymm13
6175	vpxor	ymm5,ymm5,ymm9
6176	vpslld	ymm3,ymm5,7
6177	vpsrld	ymm5,ymm5,25
6178	vpxor	ymm5,ymm5,ymm3
6179	vpalignr	ymm13,ymm13,ymm13,12
6180	vpalignr	ymm9,ymm9,ymm9,8
6181	vpalignr	ymm5,ymm5,ymm5,4
6182	vpaddd	ymm2,ymm2,ymm6
6183	vpxor	ymm14,ymm14,ymm2
6184	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6185	vpaddd	ymm10,ymm10,ymm14
6186	vpxor	ymm6,ymm6,ymm10
6187	vpsrld	ymm3,ymm6,20
6188	vpslld	ymm6,ymm6,12
6189	vpxor	ymm6,ymm6,ymm3
6190	vpaddd	ymm2,ymm2,ymm6
6191	vpxor	ymm14,ymm14,ymm2
6192	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6193	vpaddd	ymm10,ymm10,ymm14
6194	vpxor	ymm6,ymm6,ymm10
6195	vpslld	ymm3,ymm6,7
6196	vpsrld	ymm6,ymm6,25
6197	vpxor	ymm6,ymm6,ymm3
6198	vpalignr	ymm14,ymm14,ymm14,12
6199	vpalignr	ymm10,ymm10,ymm10,8
6200	vpalignr	ymm6,ymm6,ymm6,4
6201	vpaddd	ymm0,ymm0,ymm4
6202	vpxor	ymm12,ymm12,ymm0
6203	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
6204	vpaddd	ymm8,ymm8,ymm12
6205	vpxor	ymm4,ymm4,ymm8
6206	vpsrld	ymm3,ymm4,20
6207	vpslld	ymm4,ymm4,12
6208	vpxor	ymm4,ymm4,ymm3
6209	vpaddd	ymm0,ymm0,ymm4
6210	vpxor	ymm12,ymm12,ymm0
6211	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
6212	vpaddd	ymm8,ymm8,ymm12
6213	vpxor	ymm4,ymm4,ymm8
6214	vpslld	ymm3,ymm4,7
6215	vpsrld	ymm4,ymm4,25
6216	vpxor	ymm4,ymm4,ymm3
6217	vpalignr	ymm12,ymm12,ymm12,4
6218	vpalignr	ymm8,ymm8,ymm8,8
6219	vpalignr	ymm4,ymm4,ymm4,12
6220	vpaddd	ymm1,ymm1,ymm5
6221	vpxor	ymm13,ymm13,ymm1
6222	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
6223	vpaddd	ymm9,ymm9,ymm13
6224	vpxor	ymm5,ymm5,ymm9
6225	vpsrld	ymm3,ymm5,20
6226	vpslld	ymm5,ymm5,12
6227	vpxor	ymm5,ymm5,ymm3
6228	vpaddd	ymm1,ymm1,ymm5
6229	vpxor	ymm13,ymm13,ymm1
6230	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
6231	vpaddd	ymm9,ymm9,ymm13
6232	vpxor	ymm5,ymm5,ymm9
6233	vpslld	ymm3,ymm5,7
6234	vpsrld	ymm5,ymm5,25
6235	vpxor	ymm5,ymm5,ymm3
6236	vpalignr	ymm13,ymm13,ymm13,4
6237	vpalignr	ymm9,ymm9,ymm9,8
6238	vpalignr	ymm5,ymm5,ymm5,12
6239	vpaddd	ymm2,ymm2,ymm6
6240	vpxor	ymm14,ymm14,ymm2
6241	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
6242	vpaddd	ymm10,ymm10,ymm14
6243	vpxor	ymm6,ymm6,ymm10
6244	vpsrld	ymm3,ymm6,20
6245	vpslld	ymm6,ymm6,12
6246	vpxor	ymm6,ymm6,ymm3
6247	vpaddd	ymm2,ymm2,ymm6
6248	vpxor	ymm14,ymm14,ymm2
6249	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
6250	vpaddd	ymm10,ymm10,ymm14
6251	vpxor	ymm6,ymm6,ymm10
6252	vpslld	ymm3,ymm6,7
6253	vpsrld	ymm6,ymm6,25
6254	vpxor	ymm6,ymm6,ymm3
6255	vpalignr	ymm14,ymm14,ymm14,4
6256	vpalignr	ymm10,ymm10,ymm10,8
6257	vpalignr	ymm6,ymm6,ymm6,12
6258
6259	dec	r10
6260	jne	NEAR $L$open_avx2_320_rounds
6261	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6262	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6263	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6264	vpaddd	ymm4,ymm4,ymm7
6265	vpaddd	ymm5,ymm5,ymm7
6266	vpaddd	ymm6,ymm6,ymm7
6267	vpaddd	ymm8,ymm8,ymm11
6268	vpaddd	ymm9,ymm9,ymm11
6269	vpaddd	ymm10,ymm10,ymm11
6270	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6271	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6272	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6273	vperm2i128	ymm3,ymm4,ymm0,0x02
6274
6275	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
6276	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
6277
6278	vperm2i128	ymm0,ymm4,ymm0,0x13
6279	vperm2i128	ymm4,ymm12,ymm8,0x13
6280	vperm2i128	ymm8,ymm5,ymm1,0x02
6281	vperm2i128	ymm12,ymm13,ymm9,0x02
6282	vperm2i128	ymm1,ymm5,ymm1,0x13
6283	vperm2i128	ymm5,ymm13,ymm9,0x13
6284	vperm2i128	ymm9,ymm6,ymm2,0x02
6285	vperm2i128	ymm13,ymm14,ymm10,0x02
6286	vperm2i128	ymm2,ymm6,ymm2,0x13
6287	vperm2i128	ymm6,ymm14,ymm10,0x13
6288	jmp	NEAR $L$open_avx2_short
6289
6290
6291
6292
6293
6294ALIGN	64
6295chacha20_poly1305_seal_avx2:
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308	vzeroupper
6309	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6310	vbroadcasti128	ymm4,XMMWORD[r9]
6311	vbroadcasti128	ymm8,XMMWORD[16+r9]
6312	vbroadcasti128	ymm12,XMMWORD[32+r9]
6313	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
6314	cmp	rbx,6*32
6315	jbe	NEAR $L$seal_avx2_192
6316	cmp	rbx,10*32
6317	jbe	NEAR $L$seal_avx2_320
6318	vmovdqa	ymm1,ymm0
6319	vmovdqa	ymm2,ymm0
6320	vmovdqa	ymm3,ymm0
6321	vmovdqa	ymm5,ymm4
6322	vmovdqa	ymm6,ymm4
6323	vmovdqa	ymm7,ymm4
6324	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
6325	vmovdqa	ymm9,ymm8
6326	vmovdqa	ymm10,ymm8
6327	vmovdqa	ymm11,ymm8
6328	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
6329	vmovdqa	ymm15,ymm12
6330	vpaddd	ymm14,ymm15,YMMWORD[$L$avx2_inc]
6331	vpaddd	ymm13,ymm14,YMMWORD[$L$avx2_inc]
6332	vpaddd	ymm12,ymm13,YMMWORD[$L$avx2_inc]
6333	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6334	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6335	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6336	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6337	mov	r10,10
6338$L$seal_avx2_init_rounds:
6339	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6340	vmovdqa	ymm8,YMMWORD[$L$rol16]
6341	vpaddd	ymm3,ymm3,ymm7
6342	vpaddd	ymm2,ymm2,ymm6
6343	vpaddd	ymm1,ymm1,ymm5
6344	vpaddd	ymm0,ymm0,ymm4
6345	vpxor	ymm15,ymm15,ymm3
6346	vpxor	ymm14,ymm14,ymm2
6347	vpxor	ymm13,ymm13,ymm1
6348	vpxor	ymm12,ymm12,ymm0
6349	vpshufb	ymm15,ymm15,ymm8
6350	vpshufb	ymm14,ymm14,ymm8
6351	vpshufb	ymm13,ymm13,ymm8
6352	vpshufb	ymm12,ymm12,ymm8
6353	vpaddd	ymm11,ymm11,ymm15
6354	vpaddd	ymm10,ymm10,ymm14
6355	vpaddd	ymm9,ymm9,ymm13
6356	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6357	vpxor	ymm7,ymm7,ymm11
6358	vpxor	ymm6,ymm6,ymm10
6359	vpxor	ymm5,ymm5,ymm9
6360	vpxor	ymm4,ymm4,ymm8
6361	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6362	vpsrld	ymm8,ymm7,20
6363	vpslld	ymm7,ymm7,32-20
6364	vpxor	ymm7,ymm7,ymm8
6365	vpsrld	ymm8,ymm6,20
6366	vpslld	ymm6,ymm6,32-20
6367	vpxor	ymm6,ymm6,ymm8
6368	vpsrld	ymm8,ymm5,20
6369	vpslld	ymm5,ymm5,32-20
6370	vpxor	ymm5,ymm5,ymm8
6371	vpsrld	ymm8,ymm4,20
6372	vpslld	ymm4,ymm4,32-20
6373	vpxor	ymm4,ymm4,ymm8
6374	vmovdqa	ymm8,YMMWORD[$L$rol8]
6375	vpaddd	ymm3,ymm3,ymm7
6376	vpaddd	ymm2,ymm2,ymm6
6377	vpaddd	ymm1,ymm1,ymm5
6378	vpaddd	ymm0,ymm0,ymm4
6379	vpxor	ymm15,ymm15,ymm3
6380	vpxor	ymm14,ymm14,ymm2
6381	vpxor	ymm13,ymm13,ymm1
6382	vpxor	ymm12,ymm12,ymm0
6383	vpshufb	ymm15,ymm15,ymm8
6384	vpshufb	ymm14,ymm14,ymm8
6385	vpshufb	ymm13,ymm13,ymm8
6386	vpshufb	ymm12,ymm12,ymm8
6387	vpaddd	ymm11,ymm11,ymm15
6388	vpaddd	ymm10,ymm10,ymm14
6389	vpaddd	ymm9,ymm9,ymm13
6390	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6391	vpxor	ymm7,ymm7,ymm11
6392	vpxor	ymm6,ymm6,ymm10
6393	vpxor	ymm5,ymm5,ymm9
6394	vpxor	ymm4,ymm4,ymm8
6395	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6396	vpsrld	ymm8,ymm7,25
6397	vpslld	ymm7,ymm7,32-25
6398	vpxor	ymm7,ymm7,ymm8
6399	vpsrld	ymm8,ymm6,25
6400	vpslld	ymm6,ymm6,32-25
6401	vpxor	ymm6,ymm6,ymm8
6402	vpsrld	ymm8,ymm5,25
6403	vpslld	ymm5,ymm5,32-25
6404	vpxor	ymm5,ymm5,ymm8
6405	vpsrld	ymm8,ymm4,25
6406	vpslld	ymm4,ymm4,32-25
6407	vpxor	ymm4,ymm4,ymm8
6408	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6409	vpalignr	ymm7,ymm7,ymm7,4
6410	vpalignr	ymm11,ymm11,ymm11,8
6411	vpalignr	ymm15,ymm15,ymm15,12
6412	vpalignr	ymm6,ymm6,ymm6,4
6413	vpalignr	ymm10,ymm10,ymm10,8
6414	vpalignr	ymm14,ymm14,ymm14,12
6415	vpalignr	ymm5,ymm5,ymm5,4
6416	vpalignr	ymm9,ymm9,ymm9,8
6417	vpalignr	ymm13,ymm13,ymm13,12
6418	vpalignr	ymm4,ymm4,ymm4,4
6419	vpalignr	ymm8,ymm8,ymm8,8
6420	vpalignr	ymm12,ymm12,ymm12,12
6421	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6422	vmovdqa	ymm8,YMMWORD[$L$rol16]
6423	vpaddd	ymm3,ymm3,ymm7
6424	vpaddd	ymm2,ymm2,ymm6
6425	vpaddd	ymm1,ymm1,ymm5
6426	vpaddd	ymm0,ymm0,ymm4
6427	vpxor	ymm15,ymm15,ymm3
6428	vpxor	ymm14,ymm14,ymm2
6429	vpxor	ymm13,ymm13,ymm1
6430	vpxor	ymm12,ymm12,ymm0
6431	vpshufb	ymm15,ymm15,ymm8
6432	vpshufb	ymm14,ymm14,ymm8
6433	vpshufb	ymm13,ymm13,ymm8
6434	vpshufb	ymm12,ymm12,ymm8
6435	vpaddd	ymm11,ymm11,ymm15
6436	vpaddd	ymm10,ymm10,ymm14
6437	vpaddd	ymm9,ymm9,ymm13
6438	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6439	vpxor	ymm7,ymm7,ymm11
6440	vpxor	ymm6,ymm6,ymm10
6441	vpxor	ymm5,ymm5,ymm9
6442	vpxor	ymm4,ymm4,ymm8
6443	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6444	vpsrld	ymm8,ymm7,20
6445	vpslld	ymm7,ymm7,32-20
6446	vpxor	ymm7,ymm7,ymm8
6447	vpsrld	ymm8,ymm6,20
6448	vpslld	ymm6,ymm6,32-20
6449	vpxor	ymm6,ymm6,ymm8
6450	vpsrld	ymm8,ymm5,20
6451	vpslld	ymm5,ymm5,32-20
6452	vpxor	ymm5,ymm5,ymm8
6453	vpsrld	ymm8,ymm4,20
6454	vpslld	ymm4,ymm4,32-20
6455	vpxor	ymm4,ymm4,ymm8
6456	vmovdqa	ymm8,YMMWORD[$L$rol8]
6457	vpaddd	ymm3,ymm3,ymm7
6458	vpaddd	ymm2,ymm2,ymm6
6459	vpaddd	ymm1,ymm1,ymm5
6460	vpaddd	ymm0,ymm0,ymm4
6461	vpxor	ymm15,ymm15,ymm3
6462	vpxor	ymm14,ymm14,ymm2
6463	vpxor	ymm13,ymm13,ymm1
6464	vpxor	ymm12,ymm12,ymm0
6465	vpshufb	ymm15,ymm15,ymm8
6466	vpshufb	ymm14,ymm14,ymm8
6467	vpshufb	ymm13,ymm13,ymm8
6468	vpshufb	ymm12,ymm12,ymm8
6469	vpaddd	ymm11,ymm11,ymm15
6470	vpaddd	ymm10,ymm10,ymm14
6471	vpaddd	ymm9,ymm9,ymm13
6472	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6473	vpxor	ymm7,ymm7,ymm11
6474	vpxor	ymm6,ymm6,ymm10
6475	vpxor	ymm5,ymm5,ymm9
6476	vpxor	ymm4,ymm4,ymm8
6477	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6478	vpsrld	ymm8,ymm7,25
6479	vpslld	ymm7,ymm7,32-25
6480	vpxor	ymm7,ymm7,ymm8
6481	vpsrld	ymm8,ymm6,25
6482	vpslld	ymm6,ymm6,32-25
6483	vpxor	ymm6,ymm6,ymm8
6484	vpsrld	ymm8,ymm5,25
6485	vpslld	ymm5,ymm5,32-25
6486	vpxor	ymm5,ymm5,ymm8
6487	vpsrld	ymm8,ymm4,25
6488	vpslld	ymm4,ymm4,32-25
6489	vpxor	ymm4,ymm4,ymm8
6490	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6491	vpalignr	ymm7,ymm7,ymm7,12
6492	vpalignr	ymm11,ymm11,ymm11,8
6493	vpalignr	ymm15,ymm15,ymm15,4
6494	vpalignr	ymm6,ymm6,ymm6,12
6495	vpalignr	ymm10,ymm10,ymm10,8
6496	vpalignr	ymm14,ymm14,ymm14,4
6497	vpalignr	ymm5,ymm5,ymm5,12
6498	vpalignr	ymm9,ymm9,ymm9,8
6499	vpalignr	ymm13,ymm13,ymm13,4
6500	vpalignr	ymm4,ymm4,ymm4,12
6501	vpalignr	ymm8,ymm8,ymm8,8
6502	vpalignr	ymm12,ymm12,ymm12,4
6503
6504	dec	r10
6505	jnz	NEAR $L$seal_avx2_init_rounds
6506	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
6507	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
6508	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
6509	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
6510	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
6511	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
6512	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
6513	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
6514	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
6515	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
6516	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
6517	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
6518	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
6519	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
6520	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
6521	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
6522
6523	vperm2i128	ymm11,ymm15,ymm11,0x13
6524	vperm2i128	ymm15,ymm7,ymm3,0x02
6525	vperm2i128	ymm3,ymm7,ymm3,0x13
6526	vpand	ymm15,ymm15,YMMWORD[$L$clamp]
6527	vmovdqa	YMMWORD[(160+0)+rbp],ymm15
6528	mov	r8,r8
6529	call	poly_hash_ad_internal
6530
6531	vpxor	ymm3,ymm3,YMMWORD[rsi]
6532	vpxor	ymm11,ymm11,YMMWORD[32+rsi]
6533	vmovdqu	YMMWORD[rdi],ymm3
6534	vmovdqu	YMMWORD[32+rdi],ymm11
6535	vperm2i128	ymm15,ymm6,ymm2,0x02
6536	vperm2i128	ymm6,ymm6,ymm2,0x13
6537	vperm2i128	ymm2,ymm14,ymm10,0x02
6538	vperm2i128	ymm10,ymm14,ymm10,0x13
6539	vpxor	ymm15,ymm15,YMMWORD[((0+64))+rsi]
6540	vpxor	ymm2,ymm2,YMMWORD[((32+64))+rsi]
6541	vpxor	ymm6,ymm6,YMMWORD[((64+64))+rsi]
6542	vpxor	ymm10,ymm10,YMMWORD[((96+64))+rsi]
6543	vmovdqu	YMMWORD[(0+64)+rdi],ymm15
6544	vmovdqu	YMMWORD[(32+64)+rdi],ymm2
6545	vmovdqu	YMMWORD[(64+64)+rdi],ymm6
6546	vmovdqu	YMMWORD[(96+64)+rdi],ymm10
6547	vperm2i128	ymm15,ymm5,ymm1,0x02
6548	vperm2i128	ymm5,ymm5,ymm1,0x13
6549	vperm2i128	ymm1,ymm13,ymm9,0x02
6550	vperm2i128	ymm9,ymm13,ymm9,0x13
6551	vpxor	ymm15,ymm15,YMMWORD[((0+192))+rsi]
6552	vpxor	ymm1,ymm1,YMMWORD[((32+192))+rsi]
6553	vpxor	ymm5,ymm5,YMMWORD[((64+192))+rsi]
6554	vpxor	ymm9,ymm9,YMMWORD[((96+192))+rsi]
6555	vmovdqu	YMMWORD[(0+192)+rdi],ymm15
6556	vmovdqu	YMMWORD[(32+192)+rdi],ymm1
6557	vmovdqu	YMMWORD[(64+192)+rdi],ymm5
6558	vmovdqu	YMMWORD[(96+192)+rdi],ymm9
6559	vperm2i128	ymm15,ymm4,ymm0,0x13
6560	vperm2i128	ymm0,ymm4,ymm0,0x02
6561	vperm2i128	ymm4,ymm12,ymm8,0x02
6562	vperm2i128	ymm12,ymm12,ymm8,0x13
6563	vmovdqa	ymm8,ymm15
6564
6565	lea	rsi,[320+rsi]
6566	sub	rbx,10*32
6567	mov	rcx,10*32
6568	cmp	rbx,4*32
6569	jbe	NEAR $L$seal_avx2_short_hash_remainder
6570	vpxor	ymm0,ymm0,YMMWORD[rsi]
6571	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
6572	vpxor	ymm8,ymm8,YMMWORD[64+rsi]
6573	vpxor	ymm12,ymm12,YMMWORD[96+rsi]
6574	vmovdqu	YMMWORD[320+rdi],ymm0
6575	vmovdqu	YMMWORD[352+rdi],ymm4
6576	vmovdqu	YMMWORD[384+rdi],ymm8
6577	vmovdqu	YMMWORD[416+rdi],ymm12
6578	lea	rsi,[128+rsi]
6579	sub	rbx,4*32
6580	mov	rcx,8
6581	mov	r8,2
6582	cmp	rbx,4*32
6583	jbe	NEAR $L$seal_avx2_tail_128
6584	cmp	rbx,8*32
6585	jbe	NEAR $L$seal_avx2_tail_256
6586	cmp	rbx,12*32
6587	jbe	NEAR $L$seal_avx2_tail_384
6588	cmp	rbx,16*32
6589	jbe	NEAR $L$seal_avx2_tail_512
6590	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6591	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6592	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6593	vmovdqa	ymm1,ymm0
6594	vmovdqa	ymm5,ymm4
6595	vmovdqa	ymm9,ymm8
6596	vmovdqa	ymm2,ymm0
6597	vmovdqa	ymm6,ymm4
6598	vmovdqa	ymm10,ymm8
6599	vmovdqa	ymm3,ymm0
6600	vmovdqa	ymm7,ymm4
6601	vmovdqa	ymm11,ymm8
6602	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6603	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6604	vpaddd	ymm14,ymm12,ymm15
6605	vpaddd	ymm13,ymm12,ymm14
6606	vpaddd	ymm12,ymm12,ymm13
6607	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6608	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6609	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6610	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6611	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6612	vmovdqa	ymm8,YMMWORD[$L$rol16]
6613	vpaddd	ymm3,ymm3,ymm7
6614	vpaddd	ymm2,ymm2,ymm6
6615	vpaddd	ymm1,ymm1,ymm5
6616	vpaddd	ymm0,ymm0,ymm4
6617	vpxor	ymm15,ymm15,ymm3
6618	vpxor	ymm14,ymm14,ymm2
6619	vpxor	ymm13,ymm13,ymm1
6620	vpxor	ymm12,ymm12,ymm0
6621	vpshufb	ymm15,ymm15,ymm8
6622	vpshufb	ymm14,ymm14,ymm8
6623	vpshufb	ymm13,ymm13,ymm8
6624	vpshufb	ymm12,ymm12,ymm8
6625	vpaddd	ymm11,ymm11,ymm15
6626	vpaddd	ymm10,ymm10,ymm14
6627	vpaddd	ymm9,ymm9,ymm13
6628	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6629	vpxor	ymm7,ymm7,ymm11
6630	vpxor	ymm6,ymm6,ymm10
6631	vpxor	ymm5,ymm5,ymm9
6632	vpxor	ymm4,ymm4,ymm8
6633	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6634	vpsrld	ymm8,ymm7,20
6635	vpslld	ymm7,ymm7,32-20
6636	vpxor	ymm7,ymm7,ymm8
6637	vpsrld	ymm8,ymm6,20
6638	vpslld	ymm6,ymm6,32-20
6639	vpxor	ymm6,ymm6,ymm8
6640	vpsrld	ymm8,ymm5,20
6641	vpslld	ymm5,ymm5,32-20
6642	vpxor	ymm5,ymm5,ymm8
6643	vpsrld	ymm8,ymm4,20
6644	vpslld	ymm4,ymm4,32-20
6645	vpxor	ymm4,ymm4,ymm8
6646	vmovdqa	ymm8,YMMWORD[$L$rol8]
6647	vpaddd	ymm3,ymm3,ymm7
6648	vpaddd	ymm2,ymm2,ymm6
6649	vpaddd	ymm1,ymm1,ymm5
6650	vpaddd	ymm0,ymm0,ymm4
6651	vpxor	ymm15,ymm15,ymm3
6652	vpxor	ymm14,ymm14,ymm2
6653	vpxor	ymm13,ymm13,ymm1
6654	vpxor	ymm12,ymm12,ymm0
6655	vpshufb	ymm15,ymm15,ymm8
6656	vpshufb	ymm14,ymm14,ymm8
6657	vpshufb	ymm13,ymm13,ymm8
6658	vpshufb	ymm12,ymm12,ymm8
6659	vpaddd	ymm11,ymm11,ymm15
6660	vpaddd	ymm10,ymm10,ymm14
6661	vpaddd	ymm9,ymm9,ymm13
6662	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6663	vpxor	ymm7,ymm7,ymm11
6664	vpxor	ymm6,ymm6,ymm10
6665	vpxor	ymm5,ymm5,ymm9
6666	vpxor	ymm4,ymm4,ymm8
6667	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6668	vpsrld	ymm8,ymm7,25
6669	vpslld	ymm7,ymm7,32-25
6670	vpxor	ymm7,ymm7,ymm8
6671	vpsrld	ymm8,ymm6,25
6672	vpslld	ymm6,ymm6,32-25
6673	vpxor	ymm6,ymm6,ymm8
6674	vpsrld	ymm8,ymm5,25
6675	vpslld	ymm5,ymm5,32-25
6676	vpxor	ymm5,ymm5,ymm8
6677	vpsrld	ymm8,ymm4,25
6678	vpslld	ymm4,ymm4,32-25
6679	vpxor	ymm4,ymm4,ymm8
6680	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6681	vpalignr	ymm7,ymm7,ymm7,4
6682	vpalignr	ymm11,ymm11,ymm11,8
6683	vpalignr	ymm15,ymm15,ymm15,12
6684	vpalignr	ymm6,ymm6,ymm6,4
6685	vpalignr	ymm10,ymm10,ymm10,8
6686	vpalignr	ymm14,ymm14,ymm14,12
6687	vpalignr	ymm5,ymm5,ymm5,4
6688	vpalignr	ymm9,ymm9,ymm9,8
6689	vpalignr	ymm13,ymm13,ymm13,12
6690	vpalignr	ymm4,ymm4,ymm4,4
6691	vpalignr	ymm8,ymm8,ymm8,8
6692	vpalignr	ymm12,ymm12,ymm12,12
6693	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6694	vmovdqa	ymm8,YMMWORD[$L$rol16]
6695	vpaddd	ymm3,ymm3,ymm7
6696	vpaddd	ymm2,ymm2,ymm6
6697	vpaddd	ymm1,ymm1,ymm5
6698	vpaddd	ymm0,ymm0,ymm4
6699	vpxor	ymm15,ymm15,ymm3
6700	vpxor	ymm14,ymm14,ymm2
6701	vpxor	ymm13,ymm13,ymm1
6702	vpxor	ymm12,ymm12,ymm0
6703	vpshufb	ymm15,ymm15,ymm8
6704	vpshufb	ymm14,ymm14,ymm8
6705	vpshufb	ymm13,ymm13,ymm8
6706	vpshufb	ymm12,ymm12,ymm8
6707	vpaddd	ymm11,ymm11,ymm15
6708	vpaddd	ymm10,ymm10,ymm14
6709	vpaddd	ymm9,ymm9,ymm13
6710	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6711	vpxor	ymm7,ymm7,ymm11
6712	vpxor	ymm6,ymm6,ymm10
6713	vpxor	ymm5,ymm5,ymm9
6714	vpxor	ymm4,ymm4,ymm8
6715	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6716	vpsrld	ymm8,ymm7,20
6717	vpslld	ymm7,ymm7,32-20
6718	vpxor	ymm7,ymm7,ymm8
6719	vpsrld	ymm8,ymm6,20
6720	vpslld	ymm6,ymm6,32-20
6721	vpxor	ymm6,ymm6,ymm8
6722	vpsrld	ymm8,ymm5,20
6723	vpslld	ymm5,ymm5,32-20
6724	vpxor	ymm5,ymm5,ymm8
6725	vpsrld	ymm8,ymm4,20
6726	vpslld	ymm4,ymm4,32-20
6727	vpxor	ymm4,ymm4,ymm8
6728	vmovdqa	ymm8,YMMWORD[$L$rol8]
6729	vpaddd	ymm3,ymm3,ymm7
6730	vpaddd	ymm2,ymm2,ymm6
6731	vpaddd	ymm1,ymm1,ymm5
6732	vpaddd	ymm0,ymm0,ymm4
6733	vpxor	ymm15,ymm15,ymm3
6734	vpxor	ymm14,ymm14,ymm2
6735	vpxor	ymm13,ymm13,ymm1
6736	vpxor	ymm12,ymm12,ymm0
6737	vpshufb	ymm15,ymm15,ymm8
6738	vpshufb	ymm14,ymm14,ymm8
6739	vpshufb	ymm13,ymm13,ymm8
6740	vpshufb	ymm12,ymm12,ymm8
6741	vpaddd	ymm11,ymm11,ymm15
6742	vpaddd	ymm10,ymm10,ymm14
6743	vpaddd	ymm9,ymm9,ymm13
6744	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6745	vpxor	ymm7,ymm7,ymm11
6746	vpxor	ymm6,ymm6,ymm10
6747	vpxor	ymm5,ymm5,ymm9
6748	vpxor	ymm4,ymm4,ymm8
6749	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6750	vpsrld	ymm8,ymm7,25
6751	vpslld	ymm7,ymm7,32-25
6752	vpxor	ymm7,ymm7,ymm8
6753	vpsrld	ymm8,ymm6,25
6754	vpslld	ymm6,ymm6,32-25
6755	vpxor	ymm6,ymm6,ymm8
6756	vpsrld	ymm8,ymm5,25
6757	vpslld	ymm5,ymm5,32-25
6758	vpxor	ymm5,ymm5,ymm8
6759	vpsrld	ymm8,ymm4,25
6760	vpslld	ymm4,ymm4,32-25
6761	vpxor	ymm4,ymm4,ymm8
6762	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6763	vpalignr	ymm7,ymm7,ymm7,12
6764	vpalignr	ymm11,ymm11,ymm11,8
6765	vpalignr	ymm15,ymm15,ymm15,4
6766	vpalignr	ymm6,ymm6,ymm6,12
6767	vpalignr	ymm10,ymm10,ymm10,8
6768	vpalignr	ymm14,ymm14,ymm14,4
6769	vpalignr	ymm5,ymm5,ymm5,12
6770	vpalignr	ymm9,ymm9,ymm9,8
6771	vpalignr	ymm13,ymm13,ymm13,4
6772	vpalignr	ymm4,ymm4,ymm4,12
6773	vpalignr	ymm8,ymm8,ymm8,8
6774	vpalignr	ymm12,ymm12,ymm12,4
6775	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6776	vmovdqa	ymm8,YMMWORD[$L$rol16]
6777	vpaddd	ymm3,ymm3,ymm7
6778	vpaddd	ymm2,ymm2,ymm6
6779	vpaddd	ymm1,ymm1,ymm5
6780	vpaddd	ymm0,ymm0,ymm4
6781	vpxor	ymm15,ymm15,ymm3
6782	vpxor	ymm14,ymm14,ymm2
6783	vpxor	ymm13,ymm13,ymm1
6784	vpxor	ymm12,ymm12,ymm0
6785	vpshufb	ymm15,ymm15,ymm8
6786	vpshufb	ymm14,ymm14,ymm8
6787	vpshufb	ymm13,ymm13,ymm8
6788	vpshufb	ymm12,ymm12,ymm8
6789	vpaddd	ymm11,ymm11,ymm15
6790	vpaddd	ymm10,ymm10,ymm14
6791	vpaddd	ymm9,ymm9,ymm13
6792	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6793	vpxor	ymm7,ymm7,ymm11
6794	vpxor	ymm6,ymm6,ymm10
6795	vpxor	ymm5,ymm5,ymm9
6796	vpxor	ymm4,ymm4,ymm8
6797	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6798	vpsrld	ymm8,ymm7,20
6799	vpslld	ymm7,ymm7,32-20
6800	vpxor	ymm7,ymm7,ymm8
6801	vpsrld	ymm8,ymm6,20
6802	vpslld	ymm6,ymm6,32-20
6803	vpxor	ymm6,ymm6,ymm8
6804	vpsrld	ymm8,ymm5,20
6805	vpslld	ymm5,ymm5,32-20
6806	vpxor	ymm5,ymm5,ymm8
6807	vpsrld	ymm8,ymm4,20
6808	vpslld	ymm4,ymm4,32-20
6809	vpxor	ymm4,ymm4,ymm8
6810	vmovdqa	ymm8,YMMWORD[$L$rol8]
6811	vpaddd	ymm3,ymm3,ymm7
6812	vpaddd	ymm2,ymm2,ymm6
6813	vpaddd	ymm1,ymm1,ymm5
6814	vpaddd	ymm0,ymm0,ymm4
6815	vpxor	ymm15,ymm15,ymm3
6816
6817	sub	rdi,16
6818	mov	rcx,9
6819	jmp	NEAR $L$seal_avx2_main_loop_rounds_entry
6820ALIGN	32
6821$L$seal_avx2_main_loop:
6822	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
6823	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
6824	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
6825	vmovdqa	ymm1,ymm0
6826	vmovdqa	ymm5,ymm4
6827	vmovdqa	ymm9,ymm8
6828	vmovdqa	ymm2,ymm0
6829	vmovdqa	ymm6,ymm4
6830	vmovdqa	ymm10,ymm8
6831	vmovdqa	ymm3,ymm0
6832	vmovdqa	ymm7,ymm4
6833	vmovdqa	ymm11,ymm8
6834	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
6835	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
6836	vpaddd	ymm14,ymm12,ymm15
6837	vpaddd	ymm13,ymm12,ymm14
6838	vpaddd	ymm12,ymm12,ymm13
6839	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
6840	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
6841	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
6842	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
6843
6844	mov	rcx,10
6845ALIGN	32
6846$L$seal_avx2_main_loop_rounds:
6847	add	r10,QWORD[((0+0))+rdi]
6848	adc	r11,QWORD[((8+0))+rdi]
6849	adc	r12,1
6850	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6851	vmovdqa	ymm8,YMMWORD[$L$rol16]
6852	vpaddd	ymm3,ymm3,ymm7
6853	vpaddd	ymm2,ymm2,ymm6
6854	vpaddd	ymm1,ymm1,ymm5
6855	vpaddd	ymm0,ymm0,ymm4
6856	vpxor	ymm15,ymm15,ymm3
6857	vpxor	ymm14,ymm14,ymm2
6858	vpxor	ymm13,ymm13,ymm1
6859	vpxor	ymm12,ymm12,ymm0
6860	mov	rdx,QWORD[((0+160+0))+rbp]
6861	mov	r15,rdx
6862	mulx	r14,r13,r10
6863	mulx	rdx,rax,r11
6864	imul	r15,r12
6865	add	r14,rax
6866	adc	r15,rdx
6867	vpshufb	ymm15,ymm15,ymm8
6868	vpshufb	ymm14,ymm14,ymm8
6869	vpshufb	ymm13,ymm13,ymm8
6870	vpshufb	ymm12,ymm12,ymm8
6871	vpaddd	ymm11,ymm11,ymm15
6872	vpaddd	ymm10,ymm10,ymm14
6873	vpaddd	ymm9,ymm9,ymm13
6874	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6875	vpxor	ymm7,ymm7,ymm11
6876	mov	rdx,QWORD[((8+160+0))+rbp]
6877	mulx	rax,r10,r10
6878	add	r14,r10
6879	mulx	r9,r11,r11
6880	adc	r15,r11
6881	adc	r9,0
6882	imul	rdx,r12
6883	vpxor	ymm6,ymm6,ymm10
6884	vpxor	ymm5,ymm5,ymm9
6885	vpxor	ymm4,ymm4,ymm8
6886	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6887	vpsrld	ymm8,ymm7,20
6888	vpslld	ymm7,ymm7,32-20
6889	vpxor	ymm7,ymm7,ymm8
6890	vpsrld	ymm8,ymm6,20
6891	vpslld	ymm6,ymm6,32-20
6892	vpxor	ymm6,ymm6,ymm8
6893	vpsrld	ymm8,ymm5,20
6894	vpslld	ymm5,ymm5,32-20
6895	add	r15,rax
6896	adc	r9,rdx
6897	vpxor	ymm5,ymm5,ymm8
6898	vpsrld	ymm8,ymm4,20
6899	vpslld	ymm4,ymm4,32-20
6900	vpxor	ymm4,ymm4,ymm8
6901	vmovdqa	ymm8,YMMWORD[$L$rol8]
6902	vpaddd	ymm3,ymm3,ymm7
6903	vpaddd	ymm2,ymm2,ymm6
6904	vpaddd	ymm1,ymm1,ymm5
6905	vpaddd	ymm0,ymm0,ymm4
6906	vpxor	ymm15,ymm15,ymm3
6907	mov	r10,r13
6908	mov	r11,r14
6909	mov	r12,r15
6910	and	r12,3
6911	mov	r13,r15
6912	and	r13,-4
6913	mov	r14,r9
6914	shrd	r15,r9,2
6915	shr	r9,2
6916	add	r15,r13
6917	adc	r9,r14
6918	add	r10,r15
6919	adc	r11,r9
6920	adc	r12,0
6921
6922$L$seal_avx2_main_loop_rounds_entry:
6923	vpxor	ymm14,ymm14,ymm2
6924	vpxor	ymm13,ymm13,ymm1
6925	vpxor	ymm12,ymm12,ymm0
6926	vpshufb	ymm15,ymm15,ymm8
6927	vpshufb	ymm14,ymm14,ymm8
6928	vpshufb	ymm13,ymm13,ymm8
6929	vpshufb	ymm12,ymm12,ymm8
6930	vpaddd	ymm11,ymm11,ymm15
6931	vpaddd	ymm10,ymm10,ymm14
6932	add	r10,QWORD[((0+16))+rdi]
6933	adc	r11,QWORD[((8+16))+rdi]
6934	adc	r12,1
6935	vpaddd	ymm9,ymm9,ymm13
6936	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
6937	vpxor	ymm7,ymm7,ymm11
6938	vpxor	ymm6,ymm6,ymm10
6939	vpxor	ymm5,ymm5,ymm9
6940	vpxor	ymm4,ymm4,ymm8
6941	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6942	vpsrld	ymm8,ymm7,25
6943	mov	rdx,QWORD[((0+160+0))+rbp]
6944	mov	r15,rdx
6945	mulx	r14,r13,r10
6946	mulx	rdx,rax,r11
6947	imul	r15,r12
6948	add	r14,rax
6949	adc	r15,rdx
6950	vpslld	ymm7,ymm7,32-25
6951	vpxor	ymm7,ymm7,ymm8
6952	vpsrld	ymm8,ymm6,25
6953	vpslld	ymm6,ymm6,32-25
6954	vpxor	ymm6,ymm6,ymm8
6955	vpsrld	ymm8,ymm5,25
6956	vpslld	ymm5,ymm5,32-25
6957	vpxor	ymm5,ymm5,ymm8
6958	vpsrld	ymm8,ymm4,25
6959	vpslld	ymm4,ymm4,32-25
6960	vpxor	ymm4,ymm4,ymm8
6961	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
6962	vpalignr	ymm7,ymm7,ymm7,4
6963	vpalignr	ymm11,ymm11,ymm11,8
6964	vpalignr	ymm15,ymm15,ymm15,12
6965	vpalignr	ymm6,ymm6,ymm6,4
6966	vpalignr	ymm10,ymm10,ymm10,8
6967	vpalignr	ymm14,ymm14,ymm14,12
6968	mov	rdx,QWORD[((8+160+0))+rbp]
6969	mulx	rax,r10,r10
6970	add	r14,r10
6971	mulx	r9,r11,r11
6972	adc	r15,r11
6973	adc	r9,0
6974	imul	rdx,r12
6975	vpalignr	ymm5,ymm5,ymm5,4
6976	vpalignr	ymm9,ymm9,ymm9,8
6977	vpalignr	ymm13,ymm13,ymm13,12
6978	vpalignr	ymm4,ymm4,ymm4,4
6979	vpalignr	ymm8,ymm8,ymm8,8
6980	vpalignr	ymm12,ymm12,ymm12,12
6981	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
6982	vmovdqa	ymm8,YMMWORD[$L$rol16]
6983	vpaddd	ymm3,ymm3,ymm7
6984	vpaddd	ymm2,ymm2,ymm6
6985	vpaddd	ymm1,ymm1,ymm5
6986	vpaddd	ymm0,ymm0,ymm4
6987	vpxor	ymm15,ymm15,ymm3
6988	vpxor	ymm14,ymm14,ymm2
6989	vpxor	ymm13,ymm13,ymm1
6990	vpxor	ymm12,ymm12,ymm0
6991	vpshufb	ymm15,ymm15,ymm8
6992	vpshufb	ymm14,ymm14,ymm8
6993	add	r15,rax
6994	adc	r9,rdx
6995	vpshufb	ymm13,ymm13,ymm8
6996	vpshufb	ymm12,ymm12,ymm8
6997	vpaddd	ymm11,ymm11,ymm15
6998	vpaddd	ymm10,ymm10,ymm14
6999	vpaddd	ymm9,ymm9,ymm13
7000	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7001	vpxor	ymm7,ymm7,ymm11
7002	vpxor	ymm6,ymm6,ymm10
7003	vpxor	ymm5,ymm5,ymm9
7004	mov	r10,r13
7005	mov	r11,r14
7006	mov	r12,r15
7007	and	r12,3
7008	mov	r13,r15
7009	and	r13,-4
7010	mov	r14,r9
7011	shrd	r15,r9,2
7012	shr	r9,2
7013	add	r15,r13
7014	adc	r9,r14
7015	add	r10,r15
7016	adc	r11,r9
7017	adc	r12,0
7018	vpxor	ymm4,ymm4,ymm8
7019	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7020	vpsrld	ymm8,ymm7,20
7021	vpslld	ymm7,ymm7,32-20
7022	vpxor	ymm7,ymm7,ymm8
7023	vpsrld	ymm8,ymm6,20
7024	vpslld	ymm6,ymm6,32-20
7025	vpxor	ymm6,ymm6,ymm8
7026	add	r10,QWORD[((0+32))+rdi]
7027	adc	r11,QWORD[((8+32))+rdi]
7028	adc	r12,1
7029
7030	lea	rdi,[48+rdi]
7031	vpsrld	ymm8,ymm5,20
7032	vpslld	ymm5,ymm5,32-20
7033	vpxor	ymm5,ymm5,ymm8
7034	vpsrld	ymm8,ymm4,20
7035	vpslld	ymm4,ymm4,32-20
7036	vpxor	ymm4,ymm4,ymm8
7037	vmovdqa	ymm8,YMMWORD[$L$rol8]
7038	vpaddd	ymm3,ymm3,ymm7
7039	vpaddd	ymm2,ymm2,ymm6
7040	vpaddd	ymm1,ymm1,ymm5
7041	vpaddd	ymm0,ymm0,ymm4
7042	vpxor	ymm15,ymm15,ymm3
7043	vpxor	ymm14,ymm14,ymm2
7044	vpxor	ymm13,ymm13,ymm1
7045	vpxor	ymm12,ymm12,ymm0
7046	vpshufb	ymm15,ymm15,ymm8
7047	vpshufb	ymm14,ymm14,ymm8
7048	vpshufb	ymm13,ymm13,ymm8
7049	mov	rdx,QWORD[((0+160+0))+rbp]
7050	mov	r15,rdx
7051	mulx	r14,r13,r10
7052	mulx	rdx,rax,r11
7053	imul	r15,r12
7054	add	r14,rax
7055	adc	r15,rdx
7056	vpshufb	ymm12,ymm12,ymm8
7057	vpaddd	ymm11,ymm11,ymm15
7058	vpaddd	ymm10,ymm10,ymm14
7059	vpaddd	ymm9,ymm9,ymm13
7060	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
7061	vpxor	ymm7,ymm7,ymm11
7062	vpxor	ymm6,ymm6,ymm10
7063	vpxor	ymm5,ymm5,ymm9
7064	mov	rdx,QWORD[((8+160+0))+rbp]
7065	mulx	rax,r10,r10
7066	add	r14,r10
7067	mulx	r9,r11,r11
7068	adc	r15,r11
7069	adc	r9,0
7070	imul	rdx,r12
7071	vpxor	ymm4,ymm4,ymm8
7072	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
7073	vpsrld	ymm8,ymm7,25
7074	vpslld	ymm7,ymm7,32-25
7075	vpxor	ymm7,ymm7,ymm8
7076	vpsrld	ymm8,ymm6,25
7077	vpslld	ymm6,ymm6,32-25
7078	vpxor	ymm6,ymm6,ymm8
7079	add	r15,rax
7080	adc	r9,rdx
7081	vpsrld	ymm8,ymm5,25
7082	vpslld	ymm5,ymm5,32-25
7083	vpxor	ymm5,ymm5,ymm8
7084	vpsrld	ymm8,ymm4,25
7085	vpslld	ymm4,ymm4,32-25
7086	vpxor	ymm4,ymm4,ymm8
7087	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
7088	vpalignr	ymm7,ymm7,ymm7,12
7089	vpalignr	ymm11,ymm11,ymm11,8
7090	vpalignr	ymm15,ymm15,ymm15,4
7091	vpalignr	ymm6,ymm6,ymm6,12
7092	vpalignr	ymm10,ymm10,ymm10,8
7093	vpalignr	ymm14,ymm14,ymm14,4
7094	vpalignr	ymm5,ymm5,ymm5,12
7095	vpalignr	ymm9,ymm9,ymm9,8
7096	vpalignr	ymm13,ymm13,ymm13,4
7097	vpalignr	ymm4,ymm4,ymm4,12
7098	vpalignr	ymm8,ymm8,ymm8,8
7099	mov	r10,r13
7100	mov	r11,r14
7101	mov	r12,r15
7102	and	r12,3
7103	mov	r13,r15
7104	and	r13,-4
7105	mov	r14,r9
7106	shrd	r15,r9,2
7107	shr	r9,2
7108	add	r15,r13
7109	adc	r9,r14
7110	add	r10,r15
7111	adc	r11,r9
7112	adc	r12,0
7113	vpalignr	ymm12,ymm12,ymm12,4
7114
7115	dec	rcx
7116	jne	NEAR $L$seal_avx2_main_loop_rounds
7117	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
7118	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
7119	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
7120	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
7121	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
7122	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
7123	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
7124	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
7125	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7126	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7127	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7128	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7129	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7130	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7131	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7132	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7133
7134	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
7135	add	r10,QWORD[((0+0))+rdi]
7136	adc	r11,QWORD[((8+0))+rdi]
7137	adc	r12,1
7138	mov	rdx,QWORD[((0+160+0))+rbp]
7139	mov	r15,rdx
7140	mulx	r14,r13,r10
7141	mulx	rdx,rax,r11
7142	imul	r15,r12
7143	add	r14,rax
7144	adc	r15,rdx
7145	mov	rdx,QWORD[((8+160+0))+rbp]
7146	mulx	rax,r10,r10
7147	add	r14,r10
7148	mulx	r9,r11,r11
7149	adc	r15,r11
7150	adc	r9,0
7151	imul	rdx,r12
7152	add	r15,rax
7153	adc	r9,rdx
7154	mov	r10,r13
7155	mov	r11,r14
7156	mov	r12,r15
7157	and	r12,3
7158	mov	r13,r15
7159	and	r13,-4
7160	mov	r14,r9
7161	shrd	r15,r9,2
7162	shr	r9,2
7163	add	r15,r13
7164	adc	r9,r14
7165	add	r10,r15
7166	adc	r11,r9
7167	adc	r12,0
7168	add	r10,QWORD[((0+16))+rdi]
7169	adc	r11,QWORD[((8+16))+rdi]
7170	adc	r12,1
7171	mov	rdx,QWORD[((0+160+0))+rbp]
7172	mov	r15,rdx
7173	mulx	r14,r13,r10
7174	mulx	rdx,rax,r11
7175	imul	r15,r12
7176	add	r14,rax
7177	adc	r15,rdx
7178	mov	rdx,QWORD[((8+160+0))+rbp]
7179	mulx	rax,r10,r10
7180	add	r14,r10
7181	mulx	r9,r11,r11
7182	adc	r15,r11
7183	adc	r9,0
7184	imul	rdx,r12
7185	add	r15,rax
7186	adc	r9,rdx
7187	mov	r10,r13
7188	mov	r11,r14
7189	mov	r12,r15
7190	and	r12,3
7191	mov	r13,r15
7192	and	r13,-4
7193	mov	r14,r9
7194	shrd	r15,r9,2
7195	shr	r9,2
7196	add	r15,r13
7197	adc	r9,r14
7198	add	r10,r15
7199	adc	r11,r9
7200	adc	r12,0
7201
7202	lea	rdi,[32+rdi]
7203	vperm2i128	ymm0,ymm7,ymm3,0x02
7204	vperm2i128	ymm7,ymm7,ymm3,0x13
7205	vperm2i128	ymm3,ymm15,ymm11,0x02
7206	vperm2i128	ymm11,ymm15,ymm11,0x13
7207	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
7208	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
7209	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
7210	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
7211	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
7212	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
7213	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
7214	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
7215
7216	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
7217	vperm2i128	ymm3,ymm6,ymm2,0x02
7218	vperm2i128	ymm6,ymm6,ymm2,0x13
7219	vperm2i128	ymm2,ymm14,ymm10,0x02
7220	vperm2i128	ymm10,ymm14,ymm10,0x13
7221	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
7222	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
7223	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
7224	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
7225	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
7226	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
7227	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
7228	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
7229	vperm2i128	ymm3,ymm5,ymm1,0x02
7230	vperm2i128	ymm5,ymm5,ymm1,0x13
7231	vperm2i128	ymm1,ymm13,ymm9,0x02
7232	vperm2i128	ymm9,ymm13,ymm9,0x13
7233	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
7234	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
7235	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
7236	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
7237	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
7238	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
7239	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
7240	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
7241	vperm2i128	ymm3,ymm4,ymm0,0x02
7242	vperm2i128	ymm4,ymm4,ymm0,0x13
7243	vperm2i128	ymm0,ymm12,ymm8,0x02
7244	vperm2i128	ymm8,ymm12,ymm8,0x13
7245	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
7246	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
7247	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
7248	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
7249	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
7250	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
7251	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
7252	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
7253
7254	lea	rsi,[512+rsi]
7255	sub	rbx,16*32
7256	cmp	rbx,16*32
7257	jg	NEAR $L$seal_avx2_main_loop
7258
7259	add	r10,QWORD[((0+0))+rdi]
7260	adc	r11,QWORD[((8+0))+rdi]
7261	adc	r12,1
7262	mov	rdx,QWORD[((0+160+0))+rbp]
7263	mov	r15,rdx
7264	mulx	r14,r13,r10
7265	mulx	rdx,rax,r11
7266	imul	r15,r12
7267	add	r14,rax
7268	adc	r15,rdx
7269	mov	rdx,QWORD[((8+160+0))+rbp]
7270	mulx	rax,r10,r10
7271	add	r14,r10
7272	mulx	r9,r11,r11
7273	adc	r15,r11
7274	adc	r9,0
7275	imul	rdx,r12
7276	add	r15,rax
7277	adc	r9,rdx
7278	mov	r10,r13
7279	mov	r11,r14
7280	mov	r12,r15
7281	and	r12,3
7282	mov	r13,r15
7283	and	r13,-4
7284	mov	r14,r9
7285	shrd	r15,r9,2
7286	shr	r9,2
7287	add	r15,r13
7288	adc	r9,r14
7289	add	r10,r15
7290	adc	r11,r9
7291	adc	r12,0
7292	add	r10,QWORD[((0+16))+rdi]
7293	adc	r11,QWORD[((8+16))+rdi]
7294	adc	r12,1
7295	mov	rdx,QWORD[((0+160+0))+rbp]
7296	mov	r15,rdx
7297	mulx	r14,r13,r10
7298	mulx	rdx,rax,r11
7299	imul	r15,r12
7300	add	r14,rax
7301	adc	r15,rdx
7302	mov	rdx,QWORD[((8+160+0))+rbp]
7303	mulx	rax,r10,r10
7304	add	r14,r10
7305	mulx	r9,r11,r11
7306	adc	r15,r11
7307	adc	r9,0
7308	imul	rdx,r12
7309	add	r15,rax
7310	adc	r9,rdx
7311	mov	r10,r13
7312	mov	r11,r14
7313	mov	r12,r15
7314	and	r12,3
7315	mov	r13,r15
7316	and	r13,-4
7317	mov	r14,r9
7318	shrd	r15,r9,2
7319	shr	r9,2
7320	add	r15,r13
7321	adc	r9,r14
7322	add	r10,r15
7323	adc	r11,r9
7324	adc	r12,0
7325
7326	lea	rdi,[32+rdi]
7327	mov	rcx,10
7328	xor	r8,r8
7329
7330	cmp	rbx,12*32
7331	ja	NEAR $L$seal_avx2_tail_512
7332	cmp	rbx,8*32
7333	ja	NEAR $L$seal_avx2_tail_384
7334	cmp	rbx,4*32
7335	ja	NEAR $L$seal_avx2_tail_256
7336
7337$L$seal_avx2_tail_128:
7338	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7339	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7340	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7341	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7342	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7343	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7344
7345$L$seal_avx2_tail_128_rounds_and_3xhash:
7346	add	r10,QWORD[((0+0))+rdi]
7347	adc	r11,QWORD[((8+0))+rdi]
7348	adc	r12,1
7349	mov	rdx,QWORD[((0+160+0))+rbp]
7350	mov	r15,rdx
7351	mulx	r14,r13,r10
7352	mulx	rdx,rax,r11
7353	imul	r15,r12
7354	add	r14,rax
7355	adc	r15,rdx
7356	mov	rdx,QWORD[((8+160+0))+rbp]
7357	mulx	rax,r10,r10
7358	add	r14,r10
7359	mulx	r9,r11,r11
7360	adc	r15,r11
7361	adc	r9,0
7362	imul	rdx,r12
7363	add	r15,rax
7364	adc	r9,rdx
7365	mov	r10,r13
7366	mov	r11,r14
7367	mov	r12,r15
7368	and	r12,3
7369	mov	r13,r15
7370	and	r13,-4
7371	mov	r14,r9
7372	shrd	r15,r9,2
7373	shr	r9,2
7374	add	r15,r13
7375	adc	r9,r14
7376	add	r10,r15
7377	adc	r11,r9
7378	adc	r12,0
7379
7380	lea	rdi,[16+rdi]
7381$L$seal_avx2_tail_128_rounds_and_2xhash:
7382	vpaddd	ymm0,ymm0,ymm4
7383	vpxor	ymm12,ymm12,ymm0
7384	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7385	vpaddd	ymm8,ymm8,ymm12
7386	vpxor	ymm4,ymm4,ymm8
7387	vpsrld	ymm3,ymm4,20
7388	vpslld	ymm4,ymm4,12
7389	vpxor	ymm4,ymm4,ymm3
7390	vpaddd	ymm0,ymm0,ymm4
7391	vpxor	ymm12,ymm12,ymm0
7392	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7393	vpaddd	ymm8,ymm8,ymm12
7394	vpxor	ymm4,ymm4,ymm8
7395	vpslld	ymm3,ymm4,7
7396	vpsrld	ymm4,ymm4,25
7397	vpxor	ymm4,ymm4,ymm3
7398	vpalignr	ymm12,ymm12,ymm12,12
7399	vpalignr	ymm8,ymm8,ymm8,8
7400	vpalignr	ymm4,ymm4,ymm4,4
7401	add	r10,QWORD[((0+0))+rdi]
7402	adc	r11,QWORD[((8+0))+rdi]
7403	adc	r12,1
7404	mov	rdx,QWORD[((0+160+0))+rbp]
7405	mov	r15,rdx
7406	mulx	r14,r13,r10
7407	mulx	rdx,rax,r11
7408	imul	r15,r12
7409	add	r14,rax
7410	adc	r15,rdx
7411	mov	rdx,QWORD[((8+160+0))+rbp]
7412	mulx	rax,r10,r10
7413	add	r14,r10
7414	mulx	r9,r11,r11
7415	adc	r15,r11
7416	adc	r9,0
7417	imul	rdx,r12
7418	add	r15,rax
7419	adc	r9,rdx
7420	mov	r10,r13
7421	mov	r11,r14
7422	mov	r12,r15
7423	and	r12,3
7424	mov	r13,r15
7425	and	r13,-4
7426	mov	r14,r9
7427	shrd	r15,r9,2
7428	shr	r9,2
7429	add	r15,r13
7430	adc	r9,r14
7431	add	r10,r15
7432	adc	r11,r9
7433	adc	r12,0
7434	vpaddd	ymm0,ymm0,ymm4
7435	vpxor	ymm12,ymm12,ymm0
7436	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7437	vpaddd	ymm8,ymm8,ymm12
7438	vpxor	ymm4,ymm4,ymm8
7439	vpsrld	ymm3,ymm4,20
7440	vpslld	ymm4,ymm4,12
7441	vpxor	ymm4,ymm4,ymm3
7442	vpaddd	ymm0,ymm0,ymm4
7443	vpxor	ymm12,ymm12,ymm0
7444	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7445	vpaddd	ymm8,ymm8,ymm12
7446	vpxor	ymm4,ymm4,ymm8
7447	vpslld	ymm3,ymm4,7
7448	vpsrld	ymm4,ymm4,25
7449	vpxor	ymm4,ymm4,ymm3
7450	vpalignr	ymm12,ymm12,ymm12,4
7451	vpalignr	ymm8,ymm8,ymm8,8
7452	vpalignr	ymm4,ymm4,ymm4,12
7453	add	r10,QWORD[((0+16))+rdi]
7454	adc	r11,QWORD[((8+16))+rdi]
7455	adc	r12,1
7456	mov	rdx,QWORD[((0+160+0))+rbp]
7457	mov	r15,rdx
7458	mulx	r14,r13,r10
7459	mulx	rdx,rax,r11
7460	imul	r15,r12
7461	add	r14,rax
7462	adc	r15,rdx
7463	mov	rdx,QWORD[((8+160+0))+rbp]
7464	mulx	rax,r10,r10
7465	add	r14,r10
7466	mulx	r9,r11,r11
7467	adc	r15,r11
7468	adc	r9,0
7469	imul	rdx,r12
7470	add	r15,rax
7471	adc	r9,rdx
7472	mov	r10,r13
7473	mov	r11,r14
7474	mov	r12,r15
7475	and	r12,3
7476	mov	r13,r15
7477	and	r13,-4
7478	mov	r14,r9
7479	shrd	r15,r9,2
7480	shr	r9,2
7481	add	r15,r13
7482	adc	r9,r14
7483	add	r10,r15
7484	adc	r11,r9
7485	adc	r12,0
7486
7487	lea	rdi,[32+rdi]
7488	dec	rcx
7489	jg	NEAR $L$seal_avx2_tail_128_rounds_and_3xhash
7490	dec	r8
7491	jge	NEAR $L$seal_avx2_tail_128_rounds_and_2xhash
7492	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7493	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7494	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7495	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7496	vperm2i128	ymm3,ymm4,ymm0,0x13
7497	vperm2i128	ymm0,ymm4,ymm0,0x02
7498	vperm2i128	ymm4,ymm12,ymm8,0x02
7499	vperm2i128	ymm12,ymm12,ymm8,0x13
7500	vmovdqa	ymm8,ymm3
7501
7502	jmp	NEAR $L$seal_avx2_short_loop
7503
7504$L$seal_avx2_tail_256:
7505	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7506	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7507	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7508	vmovdqa	ymm1,ymm0
7509	vmovdqa	ymm5,ymm4
7510	vmovdqa	ymm9,ymm8
7511	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7512	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
7513	vpaddd	ymm12,ymm12,ymm13
7514	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7515	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7516
7517$L$seal_avx2_tail_256_rounds_and_3xhash:
7518	add	r10,QWORD[((0+0))+rdi]
7519	adc	r11,QWORD[((8+0))+rdi]
7520	adc	r12,1
7521	mov	rax,QWORD[((0+160+0))+rbp]
7522	mov	r15,rax
7523	mul	r10
7524	mov	r13,rax
7525	mov	r14,rdx
7526	mov	rax,QWORD[((0+160+0))+rbp]
7527	mul	r11
7528	imul	r15,r12
7529	add	r14,rax
7530	adc	r15,rdx
7531	mov	rax,QWORD[((8+160+0))+rbp]
7532	mov	r9,rax
7533	mul	r10
7534	add	r14,rax
7535	adc	rdx,0
7536	mov	r10,rdx
7537	mov	rax,QWORD[((8+160+0))+rbp]
7538	mul	r11
7539	add	r15,rax
7540	adc	rdx,0
7541	imul	r9,r12
7542	add	r15,r10
7543	adc	r9,rdx
7544	mov	r10,r13
7545	mov	r11,r14
7546	mov	r12,r15
7547	and	r12,3
7548	mov	r13,r15
7549	and	r13,-4
7550	mov	r14,r9
7551	shrd	r15,r9,2
7552	shr	r9,2
7553	add	r15,r13
7554	adc	r9,r14
7555	add	r10,r15
7556	adc	r11,r9
7557	adc	r12,0
7558
7559	lea	rdi,[16+rdi]
7560$L$seal_avx2_tail_256_rounds_and_2xhash:
7561	vpaddd	ymm0,ymm0,ymm4
7562	vpxor	ymm12,ymm12,ymm0
7563	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7564	vpaddd	ymm8,ymm8,ymm12
7565	vpxor	ymm4,ymm4,ymm8
7566	vpsrld	ymm3,ymm4,20
7567	vpslld	ymm4,ymm4,12
7568	vpxor	ymm4,ymm4,ymm3
7569	vpaddd	ymm0,ymm0,ymm4
7570	vpxor	ymm12,ymm12,ymm0
7571	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7572	vpaddd	ymm8,ymm8,ymm12
7573	vpxor	ymm4,ymm4,ymm8
7574	vpslld	ymm3,ymm4,7
7575	vpsrld	ymm4,ymm4,25
7576	vpxor	ymm4,ymm4,ymm3
7577	vpalignr	ymm12,ymm12,ymm12,12
7578	vpalignr	ymm8,ymm8,ymm8,8
7579	vpalignr	ymm4,ymm4,ymm4,4
7580	vpaddd	ymm1,ymm1,ymm5
7581	vpxor	ymm13,ymm13,ymm1
7582	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7583	vpaddd	ymm9,ymm9,ymm13
7584	vpxor	ymm5,ymm5,ymm9
7585	vpsrld	ymm3,ymm5,20
7586	vpslld	ymm5,ymm5,12
7587	vpxor	ymm5,ymm5,ymm3
7588	vpaddd	ymm1,ymm1,ymm5
7589	vpxor	ymm13,ymm13,ymm1
7590	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7591	vpaddd	ymm9,ymm9,ymm13
7592	vpxor	ymm5,ymm5,ymm9
7593	vpslld	ymm3,ymm5,7
7594	vpsrld	ymm5,ymm5,25
7595	vpxor	ymm5,ymm5,ymm3
7596	vpalignr	ymm13,ymm13,ymm13,12
7597	vpalignr	ymm9,ymm9,ymm9,8
7598	vpalignr	ymm5,ymm5,ymm5,4
7599	add	r10,QWORD[((0+0))+rdi]
7600	adc	r11,QWORD[((8+0))+rdi]
7601	adc	r12,1
7602	mov	rax,QWORD[((0+160+0))+rbp]
7603	mov	r15,rax
7604	mul	r10
7605	mov	r13,rax
7606	mov	r14,rdx
7607	mov	rax,QWORD[((0+160+0))+rbp]
7608	mul	r11
7609	imul	r15,r12
7610	add	r14,rax
7611	adc	r15,rdx
7612	mov	rax,QWORD[((8+160+0))+rbp]
7613	mov	r9,rax
7614	mul	r10
7615	add	r14,rax
7616	adc	rdx,0
7617	mov	r10,rdx
7618	mov	rax,QWORD[((8+160+0))+rbp]
7619	mul	r11
7620	add	r15,rax
7621	adc	rdx,0
7622	imul	r9,r12
7623	add	r15,r10
7624	adc	r9,rdx
7625	mov	r10,r13
7626	mov	r11,r14
7627	mov	r12,r15
7628	and	r12,3
7629	mov	r13,r15
7630	and	r13,-4
7631	mov	r14,r9
7632	shrd	r15,r9,2
7633	shr	r9,2
7634	add	r15,r13
7635	adc	r9,r14
7636	add	r10,r15
7637	adc	r11,r9
7638	adc	r12,0
7639	vpaddd	ymm0,ymm0,ymm4
7640	vpxor	ymm12,ymm12,ymm0
7641	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7642	vpaddd	ymm8,ymm8,ymm12
7643	vpxor	ymm4,ymm4,ymm8
7644	vpsrld	ymm3,ymm4,20
7645	vpslld	ymm4,ymm4,12
7646	vpxor	ymm4,ymm4,ymm3
7647	vpaddd	ymm0,ymm0,ymm4
7648	vpxor	ymm12,ymm12,ymm0
7649	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7650	vpaddd	ymm8,ymm8,ymm12
7651	vpxor	ymm4,ymm4,ymm8
7652	vpslld	ymm3,ymm4,7
7653	vpsrld	ymm4,ymm4,25
7654	vpxor	ymm4,ymm4,ymm3
7655	vpalignr	ymm12,ymm12,ymm12,4
7656	vpalignr	ymm8,ymm8,ymm8,8
7657	vpalignr	ymm4,ymm4,ymm4,12
7658	vpaddd	ymm1,ymm1,ymm5
7659	vpxor	ymm13,ymm13,ymm1
7660	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7661	vpaddd	ymm9,ymm9,ymm13
7662	vpxor	ymm5,ymm5,ymm9
7663	vpsrld	ymm3,ymm5,20
7664	vpslld	ymm5,ymm5,12
7665	vpxor	ymm5,ymm5,ymm3
7666	vpaddd	ymm1,ymm1,ymm5
7667	vpxor	ymm13,ymm13,ymm1
7668	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7669	vpaddd	ymm9,ymm9,ymm13
7670	vpxor	ymm5,ymm5,ymm9
7671	vpslld	ymm3,ymm5,7
7672	vpsrld	ymm5,ymm5,25
7673	vpxor	ymm5,ymm5,ymm3
7674	vpalignr	ymm13,ymm13,ymm13,4
7675	vpalignr	ymm9,ymm9,ymm9,8
7676	vpalignr	ymm5,ymm5,ymm5,12
7677	add	r10,QWORD[((0+16))+rdi]
7678	adc	r11,QWORD[((8+16))+rdi]
7679	adc	r12,1
7680	mov	rax,QWORD[((0+160+0))+rbp]
7681	mov	r15,rax
7682	mul	r10
7683	mov	r13,rax
7684	mov	r14,rdx
7685	mov	rax,QWORD[((0+160+0))+rbp]
7686	mul	r11
7687	imul	r15,r12
7688	add	r14,rax
7689	adc	r15,rdx
7690	mov	rax,QWORD[((8+160+0))+rbp]
7691	mov	r9,rax
7692	mul	r10
7693	add	r14,rax
7694	adc	rdx,0
7695	mov	r10,rdx
7696	mov	rax,QWORD[((8+160+0))+rbp]
7697	mul	r11
7698	add	r15,rax
7699	adc	rdx,0
7700	imul	r9,r12
7701	add	r15,r10
7702	adc	r9,rdx
7703	mov	r10,r13
7704	mov	r11,r14
7705	mov	r12,r15
7706	and	r12,3
7707	mov	r13,r15
7708	and	r13,-4
7709	mov	r14,r9
7710	shrd	r15,r9,2
7711	shr	r9,2
7712	add	r15,r13
7713	adc	r9,r14
7714	add	r10,r15
7715	adc	r11,r9
7716	adc	r12,0
7717
7718	lea	rdi,[32+rdi]
7719	dec	rcx
7720	jg	NEAR $L$seal_avx2_tail_256_rounds_and_3xhash
7721	dec	r8
7722	jge	NEAR $L$seal_avx2_tail_256_rounds_and_2xhash
7723	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
7724	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
7725	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
7726	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
7727	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
7728	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
7729	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
7730	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
7731	vperm2i128	ymm3,ymm5,ymm1,0x02
7732	vperm2i128	ymm5,ymm5,ymm1,0x13
7733	vperm2i128	ymm1,ymm13,ymm9,0x02
7734	vperm2i128	ymm9,ymm13,ymm9,0x13
7735	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
7736	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
7737	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
7738	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
7739	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
7740	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
7741	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
7742	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
7743	vperm2i128	ymm3,ymm4,ymm0,0x13
7744	vperm2i128	ymm0,ymm4,ymm0,0x02
7745	vperm2i128	ymm4,ymm12,ymm8,0x02
7746	vperm2i128	ymm12,ymm12,ymm8,0x13
7747	vmovdqa	ymm8,ymm3
7748
7749	mov	rcx,4*32
7750	lea	rsi,[128+rsi]
7751	sub	rbx,4*32
7752	jmp	NEAR $L$seal_avx2_short_hash_remainder
7753
7754$L$seal_avx2_tail_384:
7755	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
7756	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
7757	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
7758	vmovdqa	ymm1,ymm0
7759	vmovdqa	ymm5,ymm4
7760	vmovdqa	ymm9,ymm8
7761	vmovdqa	ymm2,ymm0
7762	vmovdqa	ymm6,ymm4
7763	vmovdqa	ymm10,ymm8
7764	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
7765	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
7766	vpaddd	ymm13,ymm12,ymm14
7767	vpaddd	ymm12,ymm12,ymm13
7768	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
7769	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
7770	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
7771
7772$L$seal_avx2_tail_384_rounds_and_3xhash:
7773	add	r10,QWORD[((0+0))+rdi]
7774	adc	r11,QWORD[((8+0))+rdi]
7775	adc	r12,1
7776	mov	rax,QWORD[((0+160+0))+rbp]
7777	mov	r15,rax
7778	mul	r10
7779	mov	r13,rax
7780	mov	r14,rdx
7781	mov	rax,QWORD[((0+160+0))+rbp]
7782	mul	r11
7783	imul	r15,r12
7784	add	r14,rax
7785	adc	r15,rdx
7786	mov	rax,QWORD[((8+160+0))+rbp]
7787	mov	r9,rax
7788	mul	r10
7789	add	r14,rax
7790	adc	rdx,0
7791	mov	r10,rdx
7792	mov	rax,QWORD[((8+160+0))+rbp]
7793	mul	r11
7794	add	r15,rax
7795	adc	rdx,0
7796	imul	r9,r12
7797	add	r15,r10
7798	adc	r9,rdx
7799	mov	r10,r13
7800	mov	r11,r14
7801	mov	r12,r15
7802	and	r12,3
7803	mov	r13,r15
7804	and	r13,-4
7805	mov	r14,r9
7806	shrd	r15,r9,2
7807	shr	r9,2
7808	add	r15,r13
7809	adc	r9,r14
7810	add	r10,r15
7811	adc	r11,r9
7812	adc	r12,0
7813
7814	lea	rdi,[16+rdi]
7815$L$seal_avx2_tail_384_rounds_and_2xhash:
7816	vpaddd	ymm0,ymm0,ymm4
7817	vpxor	ymm12,ymm12,ymm0
7818	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7819	vpaddd	ymm8,ymm8,ymm12
7820	vpxor	ymm4,ymm4,ymm8
7821	vpsrld	ymm3,ymm4,20
7822	vpslld	ymm4,ymm4,12
7823	vpxor	ymm4,ymm4,ymm3
7824	vpaddd	ymm0,ymm0,ymm4
7825	vpxor	ymm12,ymm12,ymm0
7826	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7827	vpaddd	ymm8,ymm8,ymm12
7828	vpxor	ymm4,ymm4,ymm8
7829	vpslld	ymm3,ymm4,7
7830	vpsrld	ymm4,ymm4,25
7831	vpxor	ymm4,ymm4,ymm3
7832	vpalignr	ymm12,ymm12,ymm12,12
7833	vpalignr	ymm8,ymm8,ymm8,8
7834	vpalignr	ymm4,ymm4,ymm4,4
7835	vpaddd	ymm1,ymm1,ymm5
7836	vpxor	ymm13,ymm13,ymm1
7837	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7838	vpaddd	ymm9,ymm9,ymm13
7839	vpxor	ymm5,ymm5,ymm9
7840	vpsrld	ymm3,ymm5,20
7841	vpslld	ymm5,ymm5,12
7842	vpxor	ymm5,ymm5,ymm3
7843	vpaddd	ymm1,ymm1,ymm5
7844	vpxor	ymm13,ymm13,ymm1
7845	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7846	vpaddd	ymm9,ymm9,ymm13
7847	vpxor	ymm5,ymm5,ymm9
7848	vpslld	ymm3,ymm5,7
7849	vpsrld	ymm5,ymm5,25
7850	vpxor	ymm5,ymm5,ymm3
7851	vpalignr	ymm13,ymm13,ymm13,12
7852	vpalignr	ymm9,ymm9,ymm9,8
7853	vpalignr	ymm5,ymm5,ymm5,4
7854	add	r10,QWORD[((0+0))+rdi]
7855	adc	r11,QWORD[((8+0))+rdi]
7856	adc	r12,1
7857	mov	rax,QWORD[((0+160+0))+rbp]
7858	mov	r15,rax
7859	mul	r10
7860	mov	r13,rax
7861	mov	r14,rdx
7862	mov	rax,QWORD[((0+160+0))+rbp]
7863	mul	r11
7864	imul	r15,r12
7865	add	r14,rax
7866	adc	r15,rdx
7867	mov	rax,QWORD[((8+160+0))+rbp]
7868	mov	r9,rax
7869	mul	r10
7870	add	r14,rax
7871	adc	rdx,0
7872	mov	r10,rdx
7873	mov	rax,QWORD[((8+160+0))+rbp]
7874	mul	r11
7875	add	r15,rax
7876	adc	rdx,0
7877	imul	r9,r12
7878	add	r15,r10
7879	adc	r9,rdx
7880	mov	r10,r13
7881	mov	r11,r14
7882	mov	r12,r15
7883	and	r12,3
7884	mov	r13,r15
7885	and	r13,-4
7886	mov	r14,r9
7887	shrd	r15,r9,2
7888	shr	r9,2
7889	add	r15,r13
7890	adc	r9,r14
7891	add	r10,r15
7892	adc	r11,r9
7893	adc	r12,0
7894	vpaddd	ymm2,ymm2,ymm6
7895	vpxor	ymm14,ymm14,ymm2
7896	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
7897	vpaddd	ymm10,ymm10,ymm14
7898	vpxor	ymm6,ymm6,ymm10
7899	vpsrld	ymm3,ymm6,20
7900	vpslld	ymm6,ymm6,12
7901	vpxor	ymm6,ymm6,ymm3
7902	vpaddd	ymm2,ymm2,ymm6
7903	vpxor	ymm14,ymm14,ymm2
7904	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
7905	vpaddd	ymm10,ymm10,ymm14
7906	vpxor	ymm6,ymm6,ymm10
7907	vpslld	ymm3,ymm6,7
7908	vpsrld	ymm6,ymm6,25
7909	vpxor	ymm6,ymm6,ymm3
7910	vpalignr	ymm14,ymm14,ymm14,12
7911	vpalignr	ymm10,ymm10,ymm10,8
7912	vpalignr	ymm6,ymm6,ymm6,4
7913	vpaddd	ymm0,ymm0,ymm4
7914	vpxor	ymm12,ymm12,ymm0
7915	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
7916	vpaddd	ymm8,ymm8,ymm12
7917	vpxor	ymm4,ymm4,ymm8
7918	vpsrld	ymm3,ymm4,20
7919	vpslld	ymm4,ymm4,12
7920	vpxor	ymm4,ymm4,ymm3
7921	vpaddd	ymm0,ymm0,ymm4
7922	vpxor	ymm12,ymm12,ymm0
7923	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
7924	vpaddd	ymm8,ymm8,ymm12
7925	vpxor	ymm4,ymm4,ymm8
7926	vpslld	ymm3,ymm4,7
7927	vpsrld	ymm4,ymm4,25
7928	vpxor	ymm4,ymm4,ymm3
7929	vpalignr	ymm12,ymm12,ymm12,4
7930	vpalignr	ymm8,ymm8,ymm8,8
7931	vpalignr	ymm4,ymm4,ymm4,12
7932	add	r10,QWORD[((0+16))+rdi]
7933	adc	r11,QWORD[((8+16))+rdi]
7934	adc	r12,1
7935	mov	rax,QWORD[((0+160+0))+rbp]
7936	mov	r15,rax
7937	mul	r10
7938	mov	r13,rax
7939	mov	r14,rdx
7940	mov	rax,QWORD[((0+160+0))+rbp]
7941	mul	r11
7942	imul	r15,r12
7943	add	r14,rax
7944	adc	r15,rdx
7945	mov	rax,QWORD[((8+160+0))+rbp]
7946	mov	r9,rax
7947	mul	r10
7948	add	r14,rax
7949	adc	rdx,0
7950	mov	r10,rdx
7951	mov	rax,QWORD[((8+160+0))+rbp]
7952	mul	r11
7953	add	r15,rax
7954	adc	rdx,0
7955	imul	r9,r12
7956	add	r15,r10
7957	adc	r9,rdx
7958	mov	r10,r13
7959	mov	r11,r14
7960	mov	r12,r15
7961	and	r12,3
7962	mov	r13,r15
7963	and	r13,-4
7964	mov	r14,r9
7965	shrd	r15,r9,2
7966	shr	r9,2
7967	add	r15,r13
7968	adc	r9,r14
7969	add	r10,r15
7970	adc	r11,r9
7971	adc	r12,0
7972	vpaddd	ymm1,ymm1,ymm5
7973	vpxor	ymm13,ymm13,ymm1
7974	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
7975	vpaddd	ymm9,ymm9,ymm13
7976	vpxor	ymm5,ymm5,ymm9
7977	vpsrld	ymm3,ymm5,20
7978	vpslld	ymm5,ymm5,12
7979	vpxor	ymm5,ymm5,ymm3
7980	vpaddd	ymm1,ymm1,ymm5
7981	vpxor	ymm13,ymm13,ymm1
7982	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
7983	vpaddd	ymm9,ymm9,ymm13
7984	vpxor	ymm5,ymm5,ymm9
7985	vpslld	ymm3,ymm5,7
7986	vpsrld	ymm5,ymm5,25
7987	vpxor	ymm5,ymm5,ymm3
7988	vpalignr	ymm13,ymm13,ymm13,4
7989	vpalignr	ymm9,ymm9,ymm9,8
7990	vpalignr	ymm5,ymm5,ymm5,12
7991	vpaddd	ymm2,ymm2,ymm6
7992	vpxor	ymm14,ymm14,ymm2
7993	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
7994	vpaddd	ymm10,ymm10,ymm14
7995	vpxor	ymm6,ymm6,ymm10
7996	vpsrld	ymm3,ymm6,20
7997	vpslld	ymm6,ymm6,12
7998	vpxor	ymm6,ymm6,ymm3
7999	vpaddd	ymm2,ymm2,ymm6
8000	vpxor	ymm14,ymm14,ymm2
8001	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8002	vpaddd	ymm10,ymm10,ymm14
8003	vpxor	ymm6,ymm6,ymm10
8004	vpslld	ymm3,ymm6,7
8005	vpsrld	ymm6,ymm6,25
8006	vpxor	ymm6,ymm6,ymm3
8007	vpalignr	ymm14,ymm14,ymm14,4
8008	vpalignr	ymm10,ymm10,ymm10,8
8009	vpalignr	ymm6,ymm6,ymm6,12
8010
8011	lea	rdi,[32+rdi]
8012	dec	rcx
8013	jg	NEAR $L$seal_avx2_tail_384_rounds_and_3xhash
8014	dec	r8
8015	jge	NEAR $L$seal_avx2_tail_384_rounds_and_2xhash
8016	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8017	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8018	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8019	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8020	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8021	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8022	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8023	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8024	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8025	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8026	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8027	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8028	vperm2i128	ymm3,ymm6,ymm2,0x02
8029	vperm2i128	ymm6,ymm6,ymm2,0x13
8030	vperm2i128	ymm2,ymm14,ymm10,0x02
8031	vperm2i128	ymm10,ymm14,ymm10,0x13
8032	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
8033	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
8034	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
8035	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
8036	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
8037	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
8038	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
8039	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
8040	vperm2i128	ymm3,ymm5,ymm1,0x02
8041	vperm2i128	ymm5,ymm5,ymm1,0x13
8042	vperm2i128	ymm1,ymm13,ymm9,0x02
8043	vperm2i128	ymm9,ymm13,ymm9,0x13
8044	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8045	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
8046	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
8047	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
8048	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8049	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
8050	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
8051	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
8052	vperm2i128	ymm3,ymm4,ymm0,0x13
8053	vperm2i128	ymm0,ymm4,ymm0,0x02
8054	vperm2i128	ymm4,ymm12,ymm8,0x02
8055	vperm2i128	ymm12,ymm12,ymm8,0x13
8056	vmovdqa	ymm8,ymm3
8057
8058	mov	rcx,8*32
8059	lea	rsi,[256+rsi]
8060	sub	rbx,8*32
8061	jmp	NEAR $L$seal_avx2_short_hash_remainder
8062
8063$L$seal_avx2_tail_512:
8064	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
8065	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
8066	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
8067	vmovdqa	ymm1,ymm0
8068	vmovdqa	ymm5,ymm4
8069	vmovdqa	ymm9,ymm8
8070	vmovdqa	ymm2,ymm0
8071	vmovdqa	ymm6,ymm4
8072	vmovdqa	ymm10,ymm8
8073	vmovdqa	ymm3,ymm0
8074	vmovdqa	ymm7,ymm4
8075	vmovdqa	ymm11,ymm8
8076	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
8077	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
8078	vpaddd	ymm14,ymm12,ymm15
8079	vpaddd	ymm13,ymm12,ymm14
8080	vpaddd	ymm12,ymm12,ymm13
8081	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
8082	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8083	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8084	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8085
8086$L$seal_avx2_tail_512_rounds_and_3xhash:
8087	add	r10,QWORD[((0+0))+rdi]
8088	adc	r11,QWORD[((8+0))+rdi]
8089	adc	r12,1
8090	mov	rdx,QWORD[((0+160+0))+rbp]
8091	mov	r15,rdx
8092	mulx	r14,r13,r10
8093	mulx	rdx,rax,r11
8094	imul	r15,r12
8095	add	r14,rax
8096	adc	r15,rdx
8097	mov	rdx,QWORD[((8+160+0))+rbp]
8098	mulx	rax,r10,r10
8099	add	r14,r10
8100	mulx	r9,r11,r11
8101	adc	r15,r11
8102	adc	r9,0
8103	imul	rdx,r12
8104	add	r15,rax
8105	adc	r9,rdx
8106	mov	r10,r13
8107	mov	r11,r14
8108	mov	r12,r15
8109	and	r12,3
8110	mov	r13,r15
8111	and	r13,-4
8112	mov	r14,r9
8113	shrd	r15,r9,2
8114	shr	r9,2
8115	add	r15,r13
8116	adc	r9,r14
8117	add	r10,r15
8118	adc	r11,r9
8119	adc	r12,0
8120
8121	lea	rdi,[16+rdi]
8122$L$seal_avx2_tail_512_rounds_and_2xhash:
8123	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8124	vmovdqa	ymm8,YMMWORD[$L$rol16]
8125	vpaddd	ymm3,ymm3,ymm7
8126	vpaddd	ymm2,ymm2,ymm6
8127	vpaddd	ymm1,ymm1,ymm5
8128	vpaddd	ymm0,ymm0,ymm4
8129	vpxor	ymm15,ymm15,ymm3
8130	vpxor	ymm14,ymm14,ymm2
8131	vpxor	ymm13,ymm13,ymm1
8132	vpxor	ymm12,ymm12,ymm0
8133	vpshufb	ymm15,ymm15,ymm8
8134	vpshufb	ymm14,ymm14,ymm8
8135	vpshufb	ymm13,ymm13,ymm8
8136	vpshufb	ymm12,ymm12,ymm8
8137	vpaddd	ymm11,ymm11,ymm15
8138	vpaddd	ymm10,ymm10,ymm14
8139	vpaddd	ymm9,ymm9,ymm13
8140	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8141	vpxor	ymm7,ymm7,ymm11
8142	vpxor	ymm6,ymm6,ymm10
8143	add	r10,QWORD[((0+0))+rdi]
8144	adc	r11,QWORD[((8+0))+rdi]
8145	adc	r12,1
8146	vpxor	ymm5,ymm5,ymm9
8147	vpxor	ymm4,ymm4,ymm8
8148	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8149	vpsrld	ymm8,ymm7,20
8150	vpslld	ymm7,ymm7,32-20
8151	vpxor	ymm7,ymm7,ymm8
8152	vpsrld	ymm8,ymm6,20
8153	vpslld	ymm6,ymm6,32-20
8154	vpxor	ymm6,ymm6,ymm8
8155	vpsrld	ymm8,ymm5,20
8156	vpslld	ymm5,ymm5,32-20
8157	vpxor	ymm5,ymm5,ymm8
8158	vpsrld	ymm8,ymm4,20
8159	vpslld	ymm4,ymm4,32-20
8160	vpxor	ymm4,ymm4,ymm8
8161	vmovdqa	ymm8,YMMWORD[$L$rol8]
8162	vpaddd	ymm3,ymm3,ymm7
8163	vpaddd	ymm2,ymm2,ymm6
8164	vpaddd	ymm1,ymm1,ymm5
8165	vpaddd	ymm0,ymm0,ymm4
8166	mov	rdx,QWORD[((0+160+0))+rbp]
8167	mov	r15,rdx
8168	mulx	r14,r13,r10
8169	mulx	rdx,rax,r11
8170	imul	r15,r12
8171	add	r14,rax
8172	adc	r15,rdx
8173	vpxor	ymm15,ymm15,ymm3
8174	vpxor	ymm14,ymm14,ymm2
8175	vpxor	ymm13,ymm13,ymm1
8176	vpxor	ymm12,ymm12,ymm0
8177	vpshufb	ymm15,ymm15,ymm8
8178	vpshufb	ymm14,ymm14,ymm8
8179	vpshufb	ymm13,ymm13,ymm8
8180	vpshufb	ymm12,ymm12,ymm8
8181	vpaddd	ymm11,ymm11,ymm15
8182	vpaddd	ymm10,ymm10,ymm14
8183	vpaddd	ymm9,ymm9,ymm13
8184	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8185	vpxor	ymm7,ymm7,ymm11
8186	vpxor	ymm6,ymm6,ymm10
8187	vpxor	ymm5,ymm5,ymm9
8188	vpxor	ymm4,ymm4,ymm8
8189	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8190	vpsrld	ymm8,ymm7,25
8191	vpslld	ymm7,ymm7,32-25
8192	vpxor	ymm7,ymm7,ymm8
8193	mov	rdx,QWORD[((8+160+0))+rbp]
8194	mulx	rax,r10,r10
8195	add	r14,r10
8196	mulx	r9,r11,r11
8197	adc	r15,r11
8198	adc	r9,0
8199	imul	rdx,r12
8200	vpsrld	ymm8,ymm6,25
8201	vpslld	ymm6,ymm6,32-25
8202	vpxor	ymm6,ymm6,ymm8
8203	vpsrld	ymm8,ymm5,25
8204	vpslld	ymm5,ymm5,32-25
8205	vpxor	ymm5,ymm5,ymm8
8206	vpsrld	ymm8,ymm4,25
8207	vpslld	ymm4,ymm4,32-25
8208	vpxor	ymm4,ymm4,ymm8
8209	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8210	vpalignr	ymm7,ymm7,ymm7,4
8211	vpalignr	ymm11,ymm11,ymm11,8
8212	vpalignr	ymm15,ymm15,ymm15,12
8213	vpalignr	ymm6,ymm6,ymm6,4
8214	vpalignr	ymm10,ymm10,ymm10,8
8215	vpalignr	ymm14,ymm14,ymm14,12
8216	vpalignr	ymm5,ymm5,ymm5,4
8217	vpalignr	ymm9,ymm9,ymm9,8
8218	vpalignr	ymm13,ymm13,ymm13,12
8219	vpalignr	ymm4,ymm4,ymm4,4
8220	add	r15,rax
8221	adc	r9,rdx
8222	vpalignr	ymm8,ymm8,ymm8,8
8223	vpalignr	ymm12,ymm12,ymm12,12
8224	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8225	vmovdqa	ymm8,YMMWORD[$L$rol16]
8226	vpaddd	ymm3,ymm3,ymm7
8227	vpaddd	ymm2,ymm2,ymm6
8228	vpaddd	ymm1,ymm1,ymm5
8229	vpaddd	ymm0,ymm0,ymm4
8230	vpxor	ymm15,ymm15,ymm3
8231	vpxor	ymm14,ymm14,ymm2
8232	vpxor	ymm13,ymm13,ymm1
8233	vpxor	ymm12,ymm12,ymm0
8234	vpshufb	ymm15,ymm15,ymm8
8235	vpshufb	ymm14,ymm14,ymm8
8236	vpshufb	ymm13,ymm13,ymm8
8237	vpshufb	ymm12,ymm12,ymm8
8238	vpaddd	ymm11,ymm11,ymm15
8239	vpaddd	ymm10,ymm10,ymm14
8240	vpaddd	ymm9,ymm9,ymm13
8241	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8242	mov	r10,r13
8243	mov	r11,r14
8244	mov	r12,r15
8245	and	r12,3
8246	mov	r13,r15
8247	and	r13,-4
8248	mov	r14,r9
8249	shrd	r15,r9,2
8250	shr	r9,2
8251	add	r15,r13
8252	adc	r9,r14
8253	add	r10,r15
8254	adc	r11,r9
8255	adc	r12,0
8256	vpxor	ymm7,ymm7,ymm11
8257	vpxor	ymm6,ymm6,ymm10
8258	vpxor	ymm5,ymm5,ymm9
8259	vpxor	ymm4,ymm4,ymm8
8260	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8261	vpsrld	ymm8,ymm7,20
8262	vpslld	ymm7,ymm7,32-20
8263	vpxor	ymm7,ymm7,ymm8
8264	vpsrld	ymm8,ymm6,20
8265	vpslld	ymm6,ymm6,32-20
8266	vpxor	ymm6,ymm6,ymm8
8267	vpsrld	ymm8,ymm5,20
8268	vpslld	ymm5,ymm5,32-20
8269	vpxor	ymm5,ymm5,ymm8
8270	vpsrld	ymm8,ymm4,20
8271	vpslld	ymm4,ymm4,32-20
8272	vpxor	ymm4,ymm4,ymm8
8273	vmovdqa	ymm8,YMMWORD[$L$rol8]
8274	vpaddd	ymm3,ymm3,ymm7
8275	vpaddd	ymm2,ymm2,ymm6
8276	add	r10,QWORD[((0+16))+rdi]
8277	adc	r11,QWORD[((8+16))+rdi]
8278	adc	r12,1
8279	vpaddd	ymm1,ymm1,ymm5
8280	vpaddd	ymm0,ymm0,ymm4
8281	vpxor	ymm15,ymm15,ymm3
8282	vpxor	ymm14,ymm14,ymm2
8283	vpxor	ymm13,ymm13,ymm1
8284	vpxor	ymm12,ymm12,ymm0
8285	vpshufb	ymm15,ymm15,ymm8
8286	vpshufb	ymm14,ymm14,ymm8
8287	vpshufb	ymm13,ymm13,ymm8
8288	vpshufb	ymm12,ymm12,ymm8
8289	vpaddd	ymm11,ymm11,ymm15
8290	vpaddd	ymm10,ymm10,ymm14
8291	vpaddd	ymm9,ymm9,ymm13
8292	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
8293	vpxor	ymm7,ymm7,ymm11
8294	vpxor	ymm6,ymm6,ymm10
8295	vpxor	ymm5,ymm5,ymm9
8296	vpxor	ymm4,ymm4,ymm8
8297	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
8298	vpsrld	ymm8,ymm7,25
8299	mov	rdx,QWORD[((0+160+0))+rbp]
8300	mov	r15,rdx
8301	mulx	r14,r13,r10
8302	mulx	rdx,rax,r11
8303	imul	r15,r12
8304	add	r14,rax
8305	adc	r15,rdx
8306	vpslld	ymm7,ymm7,32-25
8307	vpxor	ymm7,ymm7,ymm8
8308	vpsrld	ymm8,ymm6,25
8309	vpslld	ymm6,ymm6,32-25
8310	vpxor	ymm6,ymm6,ymm8
8311	vpsrld	ymm8,ymm5,25
8312	vpslld	ymm5,ymm5,32-25
8313	vpxor	ymm5,ymm5,ymm8
8314	vpsrld	ymm8,ymm4,25
8315	vpslld	ymm4,ymm4,32-25
8316	vpxor	ymm4,ymm4,ymm8
8317	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
8318	vpalignr	ymm7,ymm7,ymm7,12
8319	vpalignr	ymm11,ymm11,ymm11,8
8320	vpalignr	ymm15,ymm15,ymm15,4
8321	vpalignr	ymm6,ymm6,ymm6,12
8322	vpalignr	ymm10,ymm10,ymm10,8
8323	vpalignr	ymm14,ymm14,ymm14,4
8324	vpalignr	ymm5,ymm5,ymm5,12
8325	vpalignr	ymm9,ymm9,ymm9,8
8326	mov	rdx,QWORD[((8+160+0))+rbp]
8327	mulx	rax,r10,r10
8328	add	r14,r10
8329	mulx	r9,r11,r11
8330	adc	r15,r11
8331	adc	r9,0
8332	imul	rdx,r12
8333	vpalignr	ymm13,ymm13,ymm13,4
8334	vpalignr	ymm4,ymm4,ymm4,12
8335	vpalignr	ymm8,ymm8,ymm8,8
8336	vpalignr	ymm12,ymm12,ymm12,4
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353	add	r15,rax
8354	adc	r9,rdx
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375	mov	r10,r13
8376	mov	r11,r14
8377	mov	r12,r15
8378	and	r12,3
8379	mov	r13,r15
8380	and	r13,-4
8381	mov	r14,r9
8382	shrd	r15,r9,2
8383	shr	r9,2
8384	add	r15,r13
8385	adc	r9,r14
8386	add	r10,r15
8387	adc	r11,r9
8388	adc	r12,0
8389
8390	lea	rdi,[32+rdi]
8391	dec	rcx
8392	jg	NEAR $L$seal_avx2_tail_512_rounds_and_3xhash
8393	dec	r8
8394	jge	NEAR $L$seal_avx2_tail_512_rounds_and_2xhash
8395	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
8396	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
8397	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
8398	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
8399	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8400	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
8401	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
8402	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8403	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8404	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
8405	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
8406	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8407	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8408	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
8409	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
8410	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8411
8412	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
8413	vperm2i128	ymm0,ymm7,ymm3,0x02
8414	vperm2i128	ymm7,ymm7,ymm3,0x13
8415	vperm2i128	ymm3,ymm15,ymm11,0x02
8416	vperm2i128	ymm11,ymm15,ymm11,0x13
8417	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
8418	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
8419	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
8420	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
8421	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
8422	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
8423	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
8424	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
8425
8426	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
8427	vperm2i128	ymm3,ymm6,ymm2,0x02
8428	vperm2i128	ymm6,ymm6,ymm2,0x13
8429	vperm2i128	ymm2,ymm14,ymm10,0x02
8430	vperm2i128	ymm10,ymm14,ymm10,0x13
8431	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
8432	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
8433	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
8434	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
8435	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
8436	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
8437	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
8438	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
8439	vperm2i128	ymm3,ymm5,ymm1,0x02
8440	vperm2i128	ymm5,ymm5,ymm1,0x13
8441	vperm2i128	ymm1,ymm13,ymm9,0x02
8442	vperm2i128	ymm9,ymm13,ymm9,0x13
8443	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
8444	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
8445	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
8446	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
8447	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
8448	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
8449	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
8450	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
8451	vperm2i128	ymm3,ymm4,ymm0,0x13
8452	vperm2i128	ymm0,ymm4,ymm0,0x02
8453	vperm2i128	ymm4,ymm12,ymm8,0x02
8454	vperm2i128	ymm12,ymm12,ymm8,0x13
8455	vmovdqa	ymm8,ymm3
8456
8457	mov	rcx,12*32
8458	lea	rsi,[384+rsi]
8459	sub	rbx,12*32
8460	jmp	NEAR $L$seal_avx2_short_hash_remainder
8461
8462$L$seal_avx2_320:
8463	vmovdqa	ymm1,ymm0
8464	vmovdqa	ymm2,ymm0
8465	vmovdqa	ymm5,ymm4
8466	vmovdqa	ymm6,ymm4
8467	vmovdqa	ymm9,ymm8
8468	vmovdqa	ymm10,ymm8
8469	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8470	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
8471	vmovdqa	ymm7,ymm4
8472	vmovdqa	ymm11,ymm8
8473	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
8474	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
8475	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
8476	mov	r10,10
8477$L$seal_avx2_320_rounds:
8478	vpaddd	ymm0,ymm0,ymm4
8479	vpxor	ymm12,ymm12,ymm0
8480	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8481	vpaddd	ymm8,ymm8,ymm12
8482	vpxor	ymm4,ymm4,ymm8
8483	vpsrld	ymm3,ymm4,20
8484	vpslld	ymm4,ymm4,12
8485	vpxor	ymm4,ymm4,ymm3
8486	vpaddd	ymm0,ymm0,ymm4
8487	vpxor	ymm12,ymm12,ymm0
8488	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8489	vpaddd	ymm8,ymm8,ymm12
8490	vpxor	ymm4,ymm4,ymm8
8491	vpslld	ymm3,ymm4,7
8492	vpsrld	ymm4,ymm4,25
8493	vpxor	ymm4,ymm4,ymm3
8494	vpalignr	ymm12,ymm12,ymm12,12
8495	vpalignr	ymm8,ymm8,ymm8,8
8496	vpalignr	ymm4,ymm4,ymm4,4
8497	vpaddd	ymm1,ymm1,ymm5
8498	vpxor	ymm13,ymm13,ymm1
8499	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8500	vpaddd	ymm9,ymm9,ymm13
8501	vpxor	ymm5,ymm5,ymm9
8502	vpsrld	ymm3,ymm5,20
8503	vpslld	ymm5,ymm5,12
8504	vpxor	ymm5,ymm5,ymm3
8505	vpaddd	ymm1,ymm1,ymm5
8506	vpxor	ymm13,ymm13,ymm1
8507	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8508	vpaddd	ymm9,ymm9,ymm13
8509	vpxor	ymm5,ymm5,ymm9
8510	vpslld	ymm3,ymm5,7
8511	vpsrld	ymm5,ymm5,25
8512	vpxor	ymm5,ymm5,ymm3
8513	vpalignr	ymm13,ymm13,ymm13,12
8514	vpalignr	ymm9,ymm9,ymm9,8
8515	vpalignr	ymm5,ymm5,ymm5,4
8516	vpaddd	ymm2,ymm2,ymm6
8517	vpxor	ymm14,ymm14,ymm2
8518	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8519	vpaddd	ymm10,ymm10,ymm14
8520	vpxor	ymm6,ymm6,ymm10
8521	vpsrld	ymm3,ymm6,20
8522	vpslld	ymm6,ymm6,12
8523	vpxor	ymm6,ymm6,ymm3
8524	vpaddd	ymm2,ymm2,ymm6
8525	vpxor	ymm14,ymm14,ymm2
8526	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8527	vpaddd	ymm10,ymm10,ymm14
8528	vpxor	ymm6,ymm6,ymm10
8529	vpslld	ymm3,ymm6,7
8530	vpsrld	ymm6,ymm6,25
8531	vpxor	ymm6,ymm6,ymm3
8532	vpalignr	ymm14,ymm14,ymm14,12
8533	vpalignr	ymm10,ymm10,ymm10,8
8534	vpalignr	ymm6,ymm6,ymm6,4
8535	vpaddd	ymm0,ymm0,ymm4
8536	vpxor	ymm12,ymm12,ymm0
8537	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8538	vpaddd	ymm8,ymm8,ymm12
8539	vpxor	ymm4,ymm4,ymm8
8540	vpsrld	ymm3,ymm4,20
8541	vpslld	ymm4,ymm4,12
8542	vpxor	ymm4,ymm4,ymm3
8543	vpaddd	ymm0,ymm0,ymm4
8544	vpxor	ymm12,ymm12,ymm0
8545	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8546	vpaddd	ymm8,ymm8,ymm12
8547	vpxor	ymm4,ymm4,ymm8
8548	vpslld	ymm3,ymm4,7
8549	vpsrld	ymm4,ymm4,25
8550	vpxor	ymm4,ymm4,ymm3
8551	vpalignr	ymm12,ymm12,ymm12,4
8552	vpalignr	ymm8,ymm8,ymm8,8
8553	vpalignr	ymm4,ymm4,ymm4,12
8554	vpaddd	ymm1,ymm1,ymm5
8555	vpxor	ymm13,ymm13,ymm1
8556	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8557	vpaddd	ymm9,ymm9,ymm13
8558	vpxor	ymm5,ymm5,ymm9
8559	vpsrld	ymm3,ymm5,20
8560	vpslld	ymm5,ymm5,12
8561	vpxor	ymm5,ymm5,ymm3
8562	vpaddd	ymm1,ymm1,ymm5
8563	vpxor	ymm13,ymm13,ymm1
8564	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8565	vpaddd	ymm9,ymm9,ymm13
8566	vpxor	ymm5,ymm5,ymm9
8567	vpslld	ymm3,ymm5,7
8568	vpsrld	ymm5,ymm5,25
8569	vpxor	ymm5,ymm5,ymm3
8570	vpalignr	ymm13,ymm13,ymm13,4
8571	vpalignr	ymm9,ymm9,ymm9,8
8572	vpalignr	ymm5,ymm5,ymm5,12
8573	vpaddd	ymm2,ymm2,ymm6
8574	vpxor	ymm14,ymm14,ymm2
8575	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
8576	vpaddd	ymm10,ymm10,ymm14
8577	vpxor	ymm6,ymm6,ymm10
8578	vpsrld	ymm3,ymm6,20
8579	vpslld	ymm6,ymm6,12
8580	vpxor	ymm6,ymm6,ymm3
8581	vpaddd	ymm2,ymm2,ymm6
8582	vpxor	ymm14,ymm14,ymm2
8583	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
8584	vpaddd	ymm10,ymm10,ymm14
8585	vpxor	ymm6,ymm6,ymm10
8586	vpslld	ymm3,ymm6,7
8587	vpsrld	ymm6,ymm6,25
8588	vpxor	ymm6,ymm6,ymm3
8589	vpalignr	ymm14,ymm14,ymm14,4
8590	vpalignr	ymm10,ymm10,ymm10,8
8591	vpalignr	ymm6,ymm6,ymm6,12
8592
8593	dec	r10
8594	jne	NEAR $L$seal_avx2_320_rounds
8595	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
8596	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
8597	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
8598	vpaddd	ymm4,ymm4,ymm7
8599	vpaddd	ymm5,ymm5,ymm7
8600	vpaddd	ymm6,ymm6,ymm7
8601	vpaddd	ymm8,ymm8,ymm11
8602	vpaddd	ymm9,ymm9,ymm11
8603	vpaddd	ymm10,ymm10,ymm11
8604	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
8605	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
8606	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
8607	vperm2i128	ymm3,ymm4,ymm0,0x02
8608
8609	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8610	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8611
8612	vperm2i128	ymm0,ymm4,ymm0,0x13
8613	vperm2i128	ymm4,ymm12,ymm8,0x13
8614	vperm2i128	ymm8,ymm5,ymm1,0x02
8615	vperm2i128	ymm12,ymm13,ymm9,0x02
8616	vperm2i128	ymm1,ymm5,ymm1,0x13
8617	vperm2i128	ymm5,ymm13,ymm9,0x13
8618	vperm2i128	ymm9,ymm6,ymm2,0x02
8619	vperm2i128	ymm13,ymm14,ymm10,0x02
8620	vperm2i128	ymm2,ymm6,ymm2,0x13
8621	vperm2i128	ymm6,ymm14,ymm10,0x13
8622	jmp	NEAR $L$seal_avx2_short
8623
8624$L$seal_avx2_192:
8625	vmovdqa	ymm1,ymm0
8626	vmovdqa	ymm2,ymm0
8627	vmovdqa	ymm5,ymm4
8628	vmovdqa	ymm6,ymm4
8629	vmovdqa	ymm9,ymm8
8630	vmovdqa	ymm10,ymm8
8631	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
8632	vmovdqa	ymm11,ymm12
8633	vmovdqa	ymm15,ymm13
8634	mov	r10,10
8635$L$seal_avx2_192_rounds:
8636	vpaddd	ymm0,ymm0,ymm4
8637	vpxor	ymm12,ymm12,ymm0
8638	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8639	vpaddd	ymm8,ymm8,ymm12
8640	vpxor	ymm4,ymm4,ymm8
8641	vpsrld	ymm3,ymm4,20
8642	vpslld	ymm4,ymm4,12
8643	vpxor	ymm4,ymm4,ymm3
8644	vpaddd	ymm0,ymm0,ymm4
8645	vpxor	ymm12,ymm12,ymm0
8646	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8647	vpaddd	ymm8,ymm8,ymm12
8648	vpxor	ymm4,ymm4,ymm8
8649	vpslld	ymm3,ymm4,7
8650	vpsrld	ymm4,ymm4,25
8651	vpxor	ymm4,ymm4,ymm3
8652	vpalignr	ymm12,ymm12,ymm12,12
8653	vpalignr	ymm8,ymm8,ymm8,8
8654	vpalignr	ymm4,ymm4,ymm4,4
8655	vpaddd	ymm1,ymm1,ymm5
8656	vpxor	ymm13,ymm13,ymm1
8657	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8658	vpaddd	ymm9,ymm9,ymm13
8659	vpxor	ymm5,ymm5,ymm9
8660	vpsrld	ymm3,ymm5,20
8661	vpslld	ymm5,ymm5,12
8662	vpxor	ymm5,ymm5,ymm3
8663	vpaddd	ymm1,ymm1,ymm5
8664	vpxor	ymm13,ymm13,ymm1
8665	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8666	vpaddd	ymm9,ymm9,ymm13
8667	vpxor	ymm5,ymm5,ymm9
8668	vpslld	ymm3,ymm5,7
8669	vpsrld	ymm5,ymm5,25
8670	vpxor	ymm5,ymm5,ymm3
8671	vpalignr	ymm13,ymm13,ymm13,12
8672	vpalignr	ymm9,ymm9,ymm9,8
8673	vpalignr	ymm5,ymm5,ymm5,4
8674	vpaddd	ymm0,ymm0,ymm4
8675	vpxor	ymm12,ymm12,ymm0
8676	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
8677	vpaddd	ymm8,ymm8,ymm12
8678	vpxor	ymm4,ymm4,ymm8
8679	vpsrld	ymm3,ymm4,20
8680	vpslld	ymm4,ymm4,12
8681	vpxor	ymm4,ymm4,ymm3
8682	vpaddd	ymm0,ymm0,ymm4
8683	vpxor	ymm12,ymm12,ymm0
8684	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
8685	vpaddd	ymm8,ymm8,ymm12
8686	vpxor	ymm4,ymm4,ymm8
8687	vpslld	ymm3,ymm4,7
8688	vpsrld	ymm4,ymm4,25
8689	vpxor	ymm4,ymm4,ymm3
8690	vpalignr	ymm12,ymm12,ymm12,4
8691	vpalignr	ymm8,ymm8,ymm8,8
8692	vpalignr	ymm4,ymm4,ymm4,12
8693	vpaddd	ymm1,ymm1,ymm5
8694	vpxor	ymm13,ymm13,ymm1
8695	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
8696	vpaddd	ymm9,ymm9,ymm13
8697	vpxor	ymm5,ymm5,ymm9
8698	vpsrld	ymm3,ymm5,20
8699	vpslld	ymm5,ymm5,12
8700	vpxor	ymm5,ymm5,ymm3
8701	vpaddd	ymm1,ymm1,ymm5
8702	vpxor	ymm13,ymm13,ymm1
8703	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
8704	vpaddd	ymm9,ymm9,ymm13
8705	vpxor	ymm5,ymm5,ymm9
8706	vpslld	ymm3,ymm5,7
8707	vpsrld	ymm5,ymm5,25
8708	vpxor	ymm5,ymm5,ymm3
8709	vpalignr	ymm13,ymm13,ymm13,4
8710	vpalignr	ymm9,ymm9,ymm9,8
8711	vpalignr	ymm5,ymm5,ymm5,12
8712
8713	dec	r10
8714	jne	NEAR $L$seal_avx2_192_rounds
8715	vpaddd	ymm0,ymm0,ymm2
8716	vpaddd	ymm1,ymm1,ymm2
8717	vpaddd	ymm4,ymm4,ymm6
8718	vpaddd	ymm5,ymm5,ymm6
8719	vpaddd	ymm8,ymm8,ymm10
8720	vpaddd	ymm9,ymm9,ymm10
8721	vpaddd	ymm12,ymm12,ymm11
8722	vpaddd	ymm13,ymm13,ymm15
8723	vperm2i128	ymm3,ymm4,ymm0,0x02
8724
8725	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
8726	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
8727
8728	vperm2i128	ymm0,ymm4,ymm0,0x13
8729	vperm2i128	ymm4,ymm12,ymm8,0x13
8730	vperm2i128	ymm8,ymm5,ymm1,0x02
8731	vperm2i128	ymm12,ymm13,ymm9,0x02
8732	vperm2i128	ymm1,ymm5,ymm1,0x13
8733	vperm2i128	ymm5,ymm13,ymm9,0x13
8734$L$seal_avx2_short:
8735	mov	r8,r8
8736	call	poly_hash_ad_internal
8737	xor	rcx,rcx
8738$L$seal_avx2_short_hash_remainder:
8739	cmp	rcx,16
8740	jb	NEAR $L$seal_avx2_short_loop
8741	add	r10,QWORD[((0+0))+rdi]
8742	adc	r11,QWORD[((8+0))+rdi]
8743	adc	r12,1
8744	mov	rax,QWORD[((0+160+0))+rbp]
8745	mov	r15,rax
8746	mul	r10
8747	mov	r13,rax
8748	mov	r14,rdx
8749	mov	rax,QWORD[((0+160+0))+rbp]
8750	mul	r11
8751	imul	r15,r12
8752	add	r14,rax
8753	adc	r15,rdx
8754	mov	rax,QWORD[((8+160+0))+rbp]
8755	mov	r9,rax
8756	mul	r10
8757	add	r14,rax
8758	adc	rdx,0
8759	mov	r10,rdx
8760	mov	rax,QWORD[((8+160+0))+rbp]
8761	mul	r11
8762	add	r15,rax
8763	adc	rdx,0
8764	imul	r9,r12
8765	add	r15,r10
8766	adc	r9,rdx
8767	mov	r10,r13
8768	mov	r11,r14
8769	mov	r12,r15
8770	and	r12,3
8771	mov	r13,r15
8772	and	r13,-4
8773	mov	r14,r9
8774	shrd	r15,r9,2
8775	shr	r9,2
8776	add	r15,r13
8777	adc	r9,r14
8778	add	r10,r15
8779	adc	r11,r9
8780	adc	r12,0
8781
8782	sub	rcx,16
8783	add	rdi,16
8784	jmp	NEAR $L$seal_avx2_short_hash_remainder
8785$L$seal_avx2_short_loop:
8786	cmp	rbx,32
8787	jb	NEAR $L$seal_avx2_short_tail
8788	sub	rbx,32
8789
8790	vpxor	ymm0,ymm0,YMMWORD[rsi]
8791	vmovdqu	YMMWORD[rdi],ymm0
8792	lea	rsi,[32+rsi]
8793
8794	add	r10,QWORD[((0+0))+rdi]
8795	adc	r11,QWORD[((8+0))+rdi]
8796	adc	r12,1
8797	mov	rax,QWORD[((0+160+0))+rbp]
8798	mov	r15,rax
8799	mul	r10
8800	mov	r13,rax
8801	mov	r14,rdx
8802	mov	rax,QWORD[((0+160+0))+rbp]
8803	mul	r11
8804	imul	r15,r12
8805	add	r14,rax
8806	adc	r15,rdx
8807	mov	rax,QWORD[((8+160+0))+rbp]
8808	mov	r9,rax
8809	mul	r10
8810	add	r14,rax
8811	adc	rdx,0
8812	mov	r10,rdx
8813	mov	rax,QWORD[((8+160+0))+rbp]
8814	mul	r11
8815	add	r15,rax
8816	adc	rdx,0
8817	imul	r9,r12
8818	add	r15,r10
8819	adc	r9,rdx
8820	mov	r10,r13
8821	mov	r11,r14
8822	mov	r12,r15
8823	and	r12,3
8824	mov	r13,r15
8825	and	r13,-4
8826	mov	r14,r9
8827	shrd	r15,r9,2
8828	shr	r9,2
8829	add	r15,r13
8830	adc	r9,r14
8831	add	r10,r15
8832	adc	r11,r9
8833	adc	r12,0
8834	add	r10,QWORD[((0+16))+rdi]
8835	adc	r11,QWORD[((8+16))+rdi]
8836	adc	r12,1
8837	mov	rax,QWORD[((0+160+0))+rbp]
8838	mov	r15,rax
8839	mul	r10
8840	mov	r13,rax
8841	mov	r14,rdx
8842	mov	rax,QWORD[((0+160+0))+rbp]
8843	mul	r11
8844	imul	r15,r12
8845	add	r14,rax
8846	adc	r15,rdx
8847	mov	rax,QWORD[((8+160+0))+rbp]
8848	mov	r9,rax
8849	mul	r10
8850	add	r14,rax
8851	adc	rdx,0
8852	mov	r10,rdx
8853	mov	rax,QWORD[((8+160+0))+rbp]
8854	mul	r11
8855	add	r15,rax
8856	adc	rdx,0
8857	imul	r9,r12
8858	add	r15,r10
8859	adc	r9,rdx
8860	mov	r10,r13
8861	mov	r11,r14
8862	mov	r12,r15
8863	and	r12,3
8864	mov	r13,r15
8865	and	r13,-4
8866	mov	r14,r9
8867	shrd	r15,r9,2
8868	shr	r9,2
8869	add	r15,r13
8870	adc	r9,r14
8871	add	r10,r15
8872	adc	r11,r9
8873	adc	r12,0
8874
8875	lea	rdi,[32+rdi]
8876
8877	vmovdqa	ymm0,ymm4
8878	vmovdqa	ymm4,ymm8
8879	vmovdqa	ymm8,ymm12
8880	vmovdqa	ymm12,ymm1
8881	vmovdqa	ymm1,ymm5
8882	vmovdqa	ymm5,ymm9
8883	vmovdqa	ymm9,ymm13
8884	vmovdqa	ymm13,ymm2
8885	vmovdqa	ymm2,ymm6
8886	jmp	NEAR $L$seal_avx2_short_loop
8887$L$seal_avx2_short_tail:
8888	cmp	rbx,16
8889	jb	NEAR $L$seal_avx2_exit
8890	sub	rbx,16
8891	vpxor	xmm3,xmm0,XMMWORD[rsi]
8892	vmovdqu	XMMWORD[rdi],xmm3
8893	lea	rsi,[16+rsi]
8894	add	r10,QWORD[((0+0))+rdi]
8895	adc	r11,QWORD[((8+0))+rdi]
8896	adc	r12,1
8897	mov	rax,QWORD[((0+160+0))+rbp]
8898	mov	r15,rax
8899	mul	r10
8900	mov	r13,rax
8901	mov	r14,rdx
8902	mov	rax,QWORD[((0+160+0))+rbp]
8903	mul	r11
8904	imul	r15,r12
8905	add	r14,rax
8906	adc	r15,rdx
8907	mov	rax,QWORD[((8+160+0))+rbp]
8908	mov	r9,rax
8909	mul	r10
8910	add	r14,rax
8911	adc	rdx,0
8912	mov	r10,rdx
8913	mov	rax,QWORD[((8+160+0))+rbp]
8914	mul	r11
8915	add	r15,rax
8916	adc	rdx,0
8917	imul	r9,r12
8918	add	r15,r10
8919	adc	r9,rdx
8920	mov	r10,r13
8921	mov	r11,r14
8922	mov	r12,r15
8923	and	r12,3
8924	mov	r13,r15
8925	and	r13,-4
8926	mov	r14,r9
8927	shrd	r15,r9,2
8928	shr	r9,2
8929	add	r15,r13
8930	adc	r9,r14
8931	add	r10,r15
8932	adc	r11,r9
8933	adc	r12,0
8934
8935	lea	rdi,[16+rdi]
8936	vextracti128	xmm0,ymm0,1
8937$L$seal_avx2_exit:
8938	vzeroupper
8939	jmp	NEAR $L$seal_sse_tail_16
8940
8941
8942