1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4default	rel
5%define XMMWORD
6%define YMMWORD
7%define ZMMWORD
8section	.text code align=64
9
10EXTERN	GFp_ia32cap_P
11global	GFp_aes_hw_encrypt
12
13ALIGN	16
14GFp_aes_hw_encrypt:
15
16	movups	xmm2,XMMWORD[rcx]
17	mov	eax,DWORD[240+r8]
18	movups	xmm0,XMMWORD[r8]
19	movups	xmm1,XMMWORD[16+r8]
20	lea	r8,[32+r8]
21	xorps	xmm2,xmm0
22$L$oop_enc1_1:
23DB	102,15,56,220,209
24	dec	eax
25	movups	xmm1,XMMWORD[r8]
26	lea	r8,[16+r8]
27	jnz	NEAR $L$oop_enc1_1
28DB	102,15,56,221,209
29	pxor	xmm0,xmm0
30	pxor	xmm1,xmm1
31	movups	XMMWORD[rdx],xmm2
32	pxor	xmm2,xmm2
33	DB	0F3h,0C3h		;repret
34
35
36
37ALIGN	16
38_aesni_encrypt2:
39
40	movups	xmm0,XMMWORD[rcx]
41	shl	eax,4
42	movups	xmm1,XMMWORD[16+rcx]
43	xorps	xmm2,xmm0
44	xorps	xmm3,xmm0
45	movups	xmm0,XMMWORD[32+rcx]
46	lea	rcx,[32+rax*1+rcx]
47	neg	rax
48	add	rax,16
49
50$L$enc_loop2:
51DB	102,15,56,220,209
52DB	102,15,56,220,217
53	movups	xmm1,XMMWORD[rax*1+rcx]
54	add	rax,32
55DB	102,15,56,220,208
56DB	102,15,56,220,216
57	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
58	jnz	NEAR $L$enc_loop2
59
60DB	102,15,56,220,209
61DB	102,15,56,220,217
62DB	102,15,56,221,208
63DB	102,15,56,221,216
64	DB	0F3h,0C3h		;repret
65
66
67
68ALIGN	16
69_aesni_encrypt3:
70
71	movups	xmm0,XMMWORD[rcx]
72	shl	eax,4
73	movups	xmm1,XMMWORD[16+rcx]
74	xorps	xmm2,xmm0
75	xorps	xmm3,xmm0
76	xorps	xmm4,xmm0
77	movups	xmm0,XMMWORD[32+rcx]
78	lea	rcx,[32+rax*1+rcx]
79	neg	rax
80	add	rax,16
81
82$L$enc_loop3:
83DB	102,15,56,220,209
84DB	102,15,56,220,217
85DB	102,15,56,220,225
86	movups	xmm1,XMMWORD[rax*1+rcx]
87	add	rax,32
88DB	102,15,56,220,208
89DB	102,15,56,220,216
90DB	102,15,56,220,224
91	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
92	jnz	NEAR $L$enc_loop3
93
94DB	102,15,56,220,209
95DB	102,15,56,220,217
96DB	102,15,56,220,225
97DB	102,15,56,221,208
98DB	102,15,56,221,216
99DB	102,15,56,221,224
100	DB	0F3h,0C3h		;repret
101
102
103
104ALIGN	16
105_aesni_encrypt4:
106
107	movups	xmm0,XMMWORD[rcx]
108	shl	eax,4
109	movups	xmm1,XMMWORD[16+rcx]
110	xorps	xmm2,xmm0
111	xorps	xmm3,xmm0
112	xorps	xmm4,xmm0
113	xorps	xmm5,xmm0
114	movups	xmm0,XMMWORD[32+rcx]
115	lea	rcx,[32+rax*1+rcx]
116	neg	rax
117DB	0x0f,0x1f,0x00
118	add	rax,16
119
120$L$enc_loop4:
121DB	102,15,56,220,209
122DB	102,15,56,220,217
123DB	102,15,56,220,225
124DB	102,15,56,220,233
125	movups	xmm1,XMMWORD[rax*1+rcx]
126	add	rax,32
127DB	102,15,56,220,208
128DB	102,15,56,220,216
129DB	102,15,56,220,224
130DB	102,15,56,220,232
131	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
132	jnz	NEAR $L$enc_loop4
133
134DB	102,15,56,220,209
135DB	102,15,56,220,217
136DB	102,15,56,220,225
137DB	102,15,56,220,233
138DB	102,15,56,221,208
139DB	102,15,56,221,216
140DB	102,15,56,221,224
141DB	102,15,56,221,232
142	DB	0F3h,0C3h		;repret
143
144
145
146ALIGN	16
147_aesni_encrypt6:
148
149	movups	xmm0,XMMWORD[rcx]
150	shl	eax,4
151	movups	xmm1,XMMWORD[16+rcx]
152	xorps	xmm2,xmm0
153	pxor	xmm3,xmm0
154	pxor	xmm4,xmm0
155DB	102,15,56,220,209
156	lea	rcx,[32+rax*1+rcx]
157	neg	rax
158DB	102,15,56,220,217
159	pxor	xmm5,xmm0
160	pxor	xmm6,xmm0
161DB	102,15,56,220,225
162	pxor	xmm7,xmm0
163	movups	xmm0,XMMWORD[rax*1+rcx]
164	add	rax,16
165	jmp	NEAR $L$enc_loop6_enter
166ALIGN	16
167$L$enc_loop6:
168DB	102,15,56,220,209
169DB	102,15,56,220,217
170DB	102,15,56,220,225
171$L$enc_loop6_enter:
172DB	102,15,56,220,233
173DB	102,15,56,220,241
174DB	102,15,56,220,249
175	movups	xmm1,XMMWORD[rax*1+rcx]
176	add	rax,32
177DB	102,15,56,220,208
178DB	102,15,56,220,216
179DB	102,15,56,220,224
180DB	102,15,56,220,232
181DB	102,15,56,220,240
182DB	102,15,56,220,248
183	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
184	jnz	NEAR $L$enc_loop6
185
186DB	102,15,56,220,209
187DB	102,15,56,220,217
188DB	102,15,56,220,225
189DB	102,15,56,220,233
190DB	102,15,56,220,241
191DB	102,15,56,220,249
192DB	102,15,56,221,208
193DB	102,15,56,221,216
194DB	102,15,56,221,224
195DB	102,15,56,221,232
196DB	102,15,56,221,240
197DB	102,15,56,221,248
198	DB	0F3h,0C3h		;repret
199
200
201
202ALIGN	16
203_aesni_encrypt8:
204
205	movups	xmm0,XMMWORD[rcx]
206	shl	eax,4
207	movups	xmm1,XMMWORD[16+rcx]
208	xorps	xmm2,xmm0
209	xorps	xmm3,xmm0
210	pxor	xmm4,xmm0
211	pxor	xmm5,xmm0
212	pxor	xmm6,xmm0
213	lea	rcx,[32+rax*1+rcx]
214	neg	rax
215DB	102,15,56,220,209
216	pxor	xmm7,xmm0
217	pxor	xmm8,xmm0
218DB	102,15,56,220,217
219	pxor	xmm9,xmm0
220	movups	xmm0,XMMWORD[rax*1+rcx]
221	add	rax,16
222	jmp	NEAR $L$enc_loop8_inner
223ALIGN	16
224$L$enc_loop8:
225DB	102,15,56,220,209
226DB	102,15,56,220,217
227$L$enc_loop8_inner:
228DB	102,15,56,220,225
229DB	102,15,56,220,233
230DB	102,15,56,220,241
231DB	102,15,56,220,249
232DB	102,68,15,56,220,193
233DB	102,68,15,56,220,201
234$L$enc_loop8_enter:
235	movups	xmm1,XMMWORD[rax*1+rcx]
236	add	rax,32
237DB	102,15,56,220,208
238DB	102,15,56,220,216
239DB	102,15,56,220,224
240DB	102,15,56,220,232
241DB	102,15,56,220,240
242DB	102,15,56,220,248
243DB	102,68,15,56,220,192
244DB	102,68,15,56,220,200
245	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
246	jnz	NEAR $L$enc_loop8
247
248DB	102,15,56,220,209
249DB	102,15,56,220,217
250DB	102,15,56,220,225
251DB	102,15,56,220,233
252DB	102,15,56,220,241
253DB	102,15,56,220,249
254DB	102,68,15,56,220,193
255DB	102,68,15,56,220,201
256DB	102,15,56,221,208
257DB	102,15,56,221,216
258DB	102,15,56,221,224
259DB	102,15,56,221,232
260DB	102,15,56,221,240
261DB	102,15,56,221,248
262DB	102,68,15,56,221,192
263DB	102,68,15,56,221,200
264	DB	0F3h,0C3h		;repret
265
266
267global	GFp_aes_hw_ctr32_encrypt_blocks
268
269ALIGN	16
270GFp_aes_hw_ctr32_encrypt_blocks:
271	mov	QWORD[8+rsp],rdi	;WIN64 prologue
272	mov	QWORD[16+rsp],rsi
273	mov	rax,rsp
274$L$SEH_begin_GFp_aes_hw_ctr32_encrypt_blocks:
275	mov	rdi,rcx
276	mov	rsi,rdx
277	mov	rdx,r8
278	mov	rcx,r9
279	mov	r8,QWORD[40+rsp]
280
281
282
283	cmp	rdx,1
284	jne	NEAR $L$ctr32_bulk
285
286
287
288	movups	xmm2,XMMWORD[r8]
289	movups	xmm3,XMMWORD[rdi]
290	mov	edx,DWORD[240+rcx]
291	movups	xmm0,XMMWORD[rcx]
292	movups	xmm1,XMMWORD[16+rcx]
293	lea	rcx,[32+rcx]
294	xorps	xmm2,xmm0
295$L$oop_enc1_2:
296DB	102,15,56,220,209
297	dec	edx
298	movups	xmm1,XMMWORD[rcx]
299	lea	rcx,[16+rcx]
300	jnz	NEAR $L$oop_enc1_2
301DB	102,15,56,221,209
302	pxor	xmm0,xmm0
303	pxor	xmm1,xmm1
304	xorps	xmm2,xmm3
305	pxor	xmm3,xmm3
306	movups	XMMWORD[rsi],xmm2
307	xorps	xmm2,xmm2
308	jmp	NEAR $L$ctr32_epilogue
309
310ALIGN	16
311$L$ctr32_bulk:
312	lea	r11,[rsp]
313
314	push	rbp
315
316	sub	rsp,288
317	and	rsp,-16
318	movaps	XMMWORD[(-168)+r11],xmm6
319	movaps	XMMWORD[(-152)+r11],xmm7
320	movaps	XMMWORD[(-136)+r11],xmm8
321	movaps	XMMWORD[(-120)+r11],xmm9
322	movaps	XMMWORD[(-104)+r11],xmm10
323	movaps	XMMWORD[(-88)+r11],xmm11
324	movaps	XMMWORD[(-72)+r11],xmm12
325	movaps	XMMWORD[(-56)+r11],xmm13
326	movaps	XMMWORD[(-40)+r11],xmm14
327	movaps	XMMWORD[(-24)+r11],xmm15
328$L$ctr32_body:
329
330
331
332
333	movdqu	xmm2,XMMWORD[r8]
334	movdqu	xmm0,XMMWORD[rcx]
335	mov	r8d,DWORD[12+r8]
336	pxor	xmm2,xmm0
337	mov	ebp,DWORD[12+rcx]
338	movdqa	XMMWORD[rsp],xmm2
339	bswap	r8d
340	movdqa	xmm3,xmm2
341	movdqa	xmm4,xmm2
342	movdqa	xmm5,xmm2
343	movdqa	XMMWORD[64+rsp],xmm2
344	movdqa	XMMWORD[80+rsp],xmm2
345	movdqa	XMMWORD[96+rsp],xmm2
346	mov	r10,rdx
347	movdqa	XMMWORD[112+rsp],xmm2
348
349	lea	rax,[1+r8]
350	lea	rdx,[2+r8]
351	bswap	eax
352	bswap	edx
353	xor	eax,ebp
354	xor	edx,ebp
355DB	102,15,58,34,216,3
356	lea	rax,[3+r8]
357	movdqa	XMMWORD[16+rsp],xmm3
358DB	102,15,58,34,226,3
359	bswap	eax
360	mov	rdx,r10
361	lea	r10,[4+r8]
362	movdqa	XMMWORD[32+rsp],xmm4
363	xor	eax,ebp
364	bswap	r10d
365DB	102,15,58,34,232,3
366	xor	r10d,ebp
367	movdqa	XMMWORD[48+rsp],xmm5
368	lea	r9,[5+r8]
369	mov	DWORD[((64+12))+rsp],r10d
370	bswap	r9d
371	lea	r10,[6+r8]
372	mov	eax,DWORD[240+rcx]
373	xor	r9d,ebp
374	bswap	r10d
375	mov	DWORD[((80+12))+rsp],r9d
376	xor	r10d,ebp
377	lea	r9,[7+r8]
378	mov	DWORD[((96+12))+rsp],r10d
379	bswap	r9d
380	lea	r10,[GFp_ia32cap_P]
381	mov	r10d,DWORD[4+r10]
382	xor	r9d,ebp
383	and	r10d,71303168
384	mov	DWORD[((112+12))+rsp],r9d
385
386	movups	xmm1,XMMWORD[16+rcx]
387
388	movdqa	xmm6,XMMWORD[64+rsp]
389	movdqa	xmm7,XMMWORD[80+rsp]
390
391	cmp	rdx,8
392	jb	NEAR $L$ctr32_tail
393
394	sub	rdx,6
395	cmp	r10d,4194304
396	je	NEAR $L$ctr32_6x
397
398	lea	rcx,[128+rcx]
399	sub	rdx,2
400	jmp	NEAR $L$ctr32_loop8
401
402ALIGN	16
403$L$ctr32_6x:
404	shl	eax,4
405	mov	r10d,48
406	bswap	ebp
407	lea	rcx,[32+rax*1+rcx]
408	sub	r10,rax
409	jmp	NEAR $L$ctr32_loop6
410
411ALIGN	16
412$L$ctr32_loop6:
413	add	r8d,6
414	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
415DB	102,15,56,220,209
416	mov	eax,r8d
417	xor	eax,ebp
418DB	102,15,56,220,217
419DB	0x0f,0x38,0xf1,0x44,0x24,12
420	lea	eax,[1+r8]
421DB	102,15,56,220,225
422	xor	eax,ebp
423DB	0x0f,0x38,0xf1,0x44,0x24,28
424DB	102,15,56,220,233
425	lea	eax,[2+r8]
426	xor	eax,ebp
427DB	102,15,56,220,241
428DB	0x0f,0x38,0xf1,0x44,0x24,44
429	lea	eax,[3+r8]
430DB	102,15,56,220,249
431	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
432	xor	eax,ebp
433
434DB	102,15,56,220,208
435DB	0x0f,0x38,0xf1,0x44,0x24,60
436	lea	eax,[4+r8]
437DB	102,15,56,220,216
438	xor	eax,ebp
439DB	0x0f,0x38,0xf1,0x44,0x24,76
440DB	102,15,56,220,224
441	lea	eax,[5+r8]
442	xor	eax,ebp
443DB	102,15,56,220,232
444DB	0x0f,0x38,0xf1,0x44,0x24,92
445	mov	rax,r10
446DB	102,15,56,220,240
447DB	102,15,56,220,248
448	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
449
450	call	$L$enc_loop6
451
452	movdqu	xmm8,XMMWORD[rdi]
453	movdqu	xmm9,XMMWORD[16+rdi]
454	movdqu	xmm10,XMMWORD[32+rdi]
455	movdqu	xmm11,XMMWORD[48+rdi]
456	movdqu	xmm12,XMMWORD[64+rdi]
457	movdqu	xmm13,XMMWORD[80+rdi]
458	lea	rdi,[96+rdi]
459	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
460	pxor	xmm8,xmm2
461	movaps	xmm2,XMMWORD[rsp]
462	pxor	xmm9,xmm3
463	movaps	xmm3,XMMWORD[16+rsp]
464	pxor	xmm10,xmm4
465	movaps	xmm4,XMMWORD[32+rsp]
466	pxor	xmm11,xmm5
467	movaps	xmm5,XMMWORD[48+rsp]
468	pxor	xmm12,xmm6
469	movaps	xmm6,XMMWORD[64+rsp]
470	pxor	xmm13,xmm7
471	movaps	xmm7,XMMWORD[80+rsp]
472	movdqu	XMMWORD[rsi],xmm8
473	movdqu	XMMWORD[16+rsi],xmm9
474	movdqu	XMMWORD[32+rsi],xmm10
475	movdqu	XMMWORD[48+rsi],xmm11
476	movdqu	XMMWORD[64+rsi],xmm12
477	movdqu	XMMWORD[80+rsi],xmm13
478	lea	rsi,[96+rsi]
479
480	sub	rdx,6
481	jnc	NEAR $L$ctr32_loop6
482
483	add	rdx,6
484	jz	NEAR $L$ctr32_done
485
486	lea	eax,[((-48))+r10]
487	lea	rcx,[((-80))+r10*1+rcx]
488	neg	eax
489	shr	eax,4
490	jmp	NEAR $L$ctr32_tail
491
492ALIGN	32
493$L$ctr32_loop8:
494	add	r8d,8
495	movdqa	xmm8,XMMWORD[96+rsp]
496DB	102,15,56,220,209
497	mov	r9d,r8d
498	movdqa	xmm9,XMMWORD[112+rsp]
499DB	102,15,56,220,217
500	bswap	r9d
501	movups	xmm0,XMMWORD[((32-128))+rcx]
502DB	102,15,56,220,225
503	xor	r9d,ebp
504	nop
505DB	102,15,56,220,233
506	mov	DWORD[((0+12))+rsp],r9d
507	lea	r9,[1+r8]
508DB	102,15,56,220,241
509DB	102,15,56,220,249
510DB	102,68,15,56,220,193
511DB	102,68,15,56,220,201
512	movups	xmm1,XMMWORD[((48-128))+rcx]
513	bswap	r9d
514DB	102,15,56,220,208
515DB	102,15,56,220,216
516	xor	r9d,ebp
517DB	0x66,0x90
518DB	102,15,56,220,224
519DB	102,15,56,220,232
520	mov	DWORD[((16+12))+rsp],r9d
521	lea	r9,[2+r8]
522DB	102,15,56,220,240
523DB	102,15,56,220,248
524DB	102,68,15,56,220,192
525DB	102,68,15,56,220,200
526	movups	xmm0,XMMWORD[((64-128))+rcx]
527	bswap	r9d
528DB	102,15,56,220,209
529DB	102,15,56,220,217
530	xor	r9d,ebp
531DB	0x66,0x90
532DB	102,15,56,220,225
533DB	102,15,56,220,233
534	mov	DWORD[((32+12))+rsp],r9d
535	lea	r9,[3+r8]
536DB	102,15,56,220,241
537DB	102,15,56,220,249
538DB	102,68,15,56,220,193
539DB	102,68,15,56,220,201
540	movups	xmm1,XMMWORD[((80-128))+rcx]
541	bswap	r9d
542DB	102,15,56,220,208
543DB	102,15,56,220,216
544	xor	r9d,ebp
545DB	0x66,0x90
546DB	102,15,56,220,224
547DB	102,15,56,220,232
548	mov	DWORD[((48+12))+rsp],r9d
549	lea	r9,[4+r8]
550DB	102,15,56,220,240
551DB	102,15,56,220,248
552DB	102,68,15,56,220,192
553DB	102,68,15,56,220,200
554	movups	xmm0,XMMWORD[((96-128))+rcx]
555	bswap	r9d
556DB	102,15,56,220,209
557DB	102,15,56,220,217
558	xor	r9d,ebp
559DB	0x66,0x90
560DB	102,15,56,220,225
561DB	102,15,56,220,233
562	mov	DWORD[((64+12))+rsp],r9d
563	lea	r9,[5+r8]
564DB	102,15,56,220,241
565DB	102,15,56,220,249
566DB	102,68,15,56,220,193
567DB	102,68,15,56,220,201
568	movups	xmm1,XMMWORD[((112-128))+rcx]
569	bswap	r9d
570DB	102,15,56,220,208
571DB	102,15,56,220,216
572	xor	r9d,ebp
573DB	0x66,0x90
574DB	102,15,56,220,224
575DB	102,15,56,220,232
576	mov	DWORD[((80+12))+rsp],r9d
577	lea	r9,[6+r8]
578DB	102,15,56,220,240
579DB	102,15,56,220,248
580DB	102,68,15,56,220,192
581DB	102,68,15,56,220,200
582	movups	xmm0,XMMWORD[((128-128))+rcx]
583	bswap	r9d
584DB	102,15,56,220,209
585DB	102,15,56,220,217
586	xor	r9d,ebp
587DB	0x66,0x90
588DB	102,15,56,220,225
589DB	102,15,56,220,233
590	mov	DWORD[((96+12))+rsp],r9d
591	lea	r9,[7+r8]
592DB	102,15,56,220,241
593DB	102,15,56,220,249
594DB	102,68,15,56,220,193
595DB	102,68,15,56,220,201
596	movups	xmm1,XMMWORD[((144-128))+rcx]
597	bswap	r9d
598DB	102,15,56,220,208
599DB	102,15,56,220,216
600DB	102,15,56,220,224
601	xor	r9d,ebp
602	movdqu	xmm10,XMMWORD[rdi]
603DB	102,15,56,220,232
604	mov	DWORD[((112+12))+rsp],r9d
605	cmp	eax,11
606DB	102,15,56,220,240
607DB	102,15,56,220,248
608DB	102,68,15,56,220,192
609DB	102,68,15,56,220,200
610	movups	xmm0,XMMWORD[((160-128))+rcx]
611
612	jb	NEAR $L$ctr32_enc_done
613
614DB	102,15,56,220,209
615DB	102,15,56,220,217
616DB	102,15,56,220,225
617DB	102,15,56,220,233
618DB	102,15,56,220,241
619DB	102,15,56,220,249
620DB	102,68,15,56,220,193
621DB	102,68,15,56,220,201
622	movups	xmm1,XMMWORD[((176-128))+rcx]
623
624DB	102,15,56,220,208
625DB	102,15,56,220,216
626DB	102,15,56,220,224
627DB	102,15,56,220,232
628DB	102,15,56,220,240
629DB	102,15,56,220,248
630DB	102,68,15,56,220,192
631DB	102,68,15,56,220,200
632	movups	xmm0,XMMWORD[((192-128))+rcx]
633
634
635
636DB	102,15,56,220,209
637DB	102,15,56,220,217
638DB	102,15,56,220,225
639DB	102,15,56,220,233
640DB	102,15,56,220,241
641DB	102,15,56,220,249
642DB	102,68,15,56,220,193
643DB	102,68,15,56,220,201
644	movups	xmm1,XMMWORD[((208-128))+rcx]
645
646DB	102,15,56,220,208
647DB	102,15,56,220,216
648DB	102,15,56,220,224
649DB	102,15,56,220,232
650DB	102,15,56,220,240
651DB	102,15,56,220,248
652DB	102,68,15,56,220,192
653DB	102,68,15,56,220,200
654	movups	xmm0,XMMWORD[((224-128))+rcx]
655	jmp	NEAR $L$ctr32_enc_done
656
657ALIGN	16
658$L$ctr32_enc_done:
659	movdqu	xmm11,XMMWORD[16+rdi]
660	pxor	xmm10,xmm0
661	movdqu	xmm12,XMMWORD[32+rdi]
662	pxor	xmm11,xmm0
663	movdqu	xmm13,XMMWORD[48+rdi]
664	pxor	xmm12,xmm0
665	movdqu	xmm14,XMMWORD[64+rdi]
666	pxor	xmm13,xmm0
667	movdqu	xmm15,XMMWORD[80+rdi]
668	pxor	xmm14,xmm0
669	pxor	xmm15,xmm0
670DB	102,15,56,220,209
671DB	102,15,56,220,217
672DB	102,15,56,220,225
673DB	102,15,56,220,233
674DB	102,15,56,220,241
675DB	102,15,56,220,249
676DB	102,68,15,56,220,193
677DB	102,68,15,56,220,201
678	movdqu	xmm1,XMMWORD[96+rdi]
679	lea	rdi,[128+rdi]
680
681DB	102,65,15,56,221,210
682	pxor	xmm1,xmm0
683	movdqu	xmm10,XMMWORD[((112-128))+rdi]
684DB	102,65,15,56,221,219
685	pxor	xmm10,xmm0
686	movdqa	xmm11,XMMWORD[rsp]
687DB	102,65,15,56,221,228
688DB	102,65,15,56,221,237
689	movdqa	xmm12,XMMWORD[16+rsp]
690	movdqa	xmm13,XMMWORD[32+rsp]
691DB	102,65,15,56,221,246
692DB	102,65,15,56,221,255
693	movdqa	xmm14,XMMWORD[48+rsp]
694	movdqa	xmm15,XMMWORD[64+rsp]
695DB	102,68,15,56,221,193
696	movdqa	xmm0,XMMWORD[80+rsp]
697	movups	xmm1,XMMWORD[((16-128))+rcx]
698DB	102,69,15,56,221,202
699
700	movups	XMMWORD[rsi],xmm2
701	movdqa	xmm2,xmm11
702	movups	XMMWORD[16+rsi],xmm3
703	movdqa	xmm3,xmm12
704	movups	XMMWORD[32+rsi],xmm4
705	movdqa	xmm4,xmm13
706	movups	XMMWORD[48+rsi],xmm5
707	movdqa	xmm5,xmm14
708	movups	XMMWORD[64+rsi],xmm6
709	movdqa	xmm6,xmm15
710	movups	XMMWORD[80+rsi],xmm7
711	movdqa	xmm7,xmm0
712	movups	XMMWORD[96+rsi],xmm8
713	movups	XMMWORD[112+rsi],xmm9
714	lea	rsi,[128+rsi]
715
716	sub	rdx,8
717	jnc	NEAR $L$ctr32_loop8
718
719	add	rdx,8
720	jz	NEAR $L$ctr32_done
721	lea	rcx,[((-128))+rcx]
722
723$L$ctr32_tail:
724
725
726	lea	rcx,[16+rcx]
727	cmp	rdx,4
728	jb	NEAR $L$ctr32_loop3
729	je	NEAR $L$ctr32_loop4
730
731
732	shl	eax,4
733	movdqa	xmm8,XMMWORD[96+rsp]
734	pxor	xmm9,xmm9
735
736	movups	xmm0,XMMWORD[16+rcx]
737DB	102,15,56,220,209
738DB	102,15,56,220,217
739	lea	rcx,[((32-16))+rax*1+rcx]
740	neg	rax
741DB	102,15,56,220,225
742	add	rax,16
743	movups	xmm10,XMMWORD[rdi]
744DB	102,15,56,220,233
745DB	102,15,56,220,241
746	movups	xmm11,XMMWORD[16+rdi]
747	movups	xmm12,XMMWORD[32+rdi]
748DB	102,15,56,220,249
749DB	102,68,15,56,220,193
750
751	call	$L$enc_loop8_enter
752
753	movdqu	xmm13,XMMWORD[48+rdi]
754	pxor	xmm2,xmm10
755	movdqu	xmm10,XMMWORD[64+rdi]
756	pxor	xmm3,xmm11
757	movdqu	XMMWORD[rsi],xmm2
758	pxor	xmm4,xmm12
759	movdqu	XMMWORD[16+rsi],xmm3
760	pxor	xmm5,xmm13
761	movdqu	XMMWORD[32+rsi],xmm4
762	pxor	xmm6,xmm10
763	movdqu	XMMWORD[48+rsi],xmm5
764	movdqu	XMMWORD[64+rsi],xmm6
765	cmp	rdx,6
766	jb	NEAR $L$ctr32_done
767
768	movups	xmm11,XMMWORD[80+rdi]
769	xorps	xmm7,xmm11
770	movups	XMMWORD[80+rsi],xmm7
771	je	NEAR $L$ctr32_done
772
773	movups	xmm12,XMMWORD[96+rdi]
774	xorps	xmm8,xmm12
775	movups	XMMWORD[96+rsi],xmm8
776	jmp	NEAR $L$ctr32_done
777
778ALIGN	32
779$L$ctr32_loop4:
780DB	102,15,56,220,209
781	lea	rcx,[16+rcx]
782	dec	eax
783DB	102,15,56,220,217
784DB	102,15,56,220,225
785DB	102,15,56,220,233
786	movups	xmm1,XMMWORD[rcx]
787	jnz	NEAR $L$ctr32_loop4
788DB	102,15,56,221,209
789DB	102,15,56,221,217
790	movups	xmm10,XMMWORD[rdi]
791	movups	xmm11,XMMWORD[16+rdi]
792DB	102,15,56,221,225
793DB	102,15,56,221,233
794	movups	xmm12,XMMWORD[32+rdi]
795	movups	xmm13,XMMWORD[48+rdi]
796
797	xorps	xmm2,xmm10
798	movups	XMMWORD[rsi],xmm2
799	xorps	xmm3,xmm11
800	movups	XMMWORD[16+rsi],xmm3
801	pxor	xmm4,xmm12
802	movdqu	XMMWORD[32+rsi],xmm4
803	pxor	xmm5,xmm13
804	movdqu	XMMWORD[48+rsi],xmm5
805	jmp	NEAR $L$ctr32_done
806
807ALIGN	32
808$L$ctr32_loop3:
809DB	102,15,56,220,209
810	lea	rcx,[16+rcx]
811	dec	eax
812DB	102,15,56,220,217
813DB	102,15,56,220,225
814	movups	xmm1,XMMWORD[rcx]
815	jnz	NEAR $L$ctr32_loop3
816DB	102,15,56,221,209
817DB	102,15,56,221,217
818DB	102,15,56,221,225
819
820	movups	xmm10,XMMWORD[rdi]
821	xorps	xmm2,xmm10
822	movups	XMMWORD[rsi],xmm2
823	cmp	rdx,2
824	jb	NEAR $L$ctr32_done
825
826	movups	xmm11,XMMWORD[16+rdi]
827	xorps	xmm3,xmm11
828	movups	XMMWORD[16+rsi],xmm3
829	je	NEAR $L$ctr32_done
830
831	movups	xmm12,XMMWORD[32+rdi]
832	xorps	xmm4,xmm12
833	movups	XMMWORD[32+rsi],xmm4
834
835$L$ctr32_done:
836	xorps	xmm0,xmm0
837	xor	ebp,ebp
838	pxor	xmm1,xmm1
839	pxor	xmm2,xmm2
840	pxor	xmm3,xmm3
841	pxor	xmm4,xmm4
842	pxor	xmm5,xmm5
843	movaps	xmm6,XMMWORD[((-168))+r11]
844	movaps	XMMWORD[(-168)+r11],xmm0
845	movaps	xmm7,XMMWORD[((-152))+r11]
846	movaps	XMMWORD[(-152)+r11],xmm0
847	movaps	xmm8,XMMWORD[((-136))+r11]
848	movaps	XMMWORD[(-136)+r11],xmm0
849	movaps	xmm9,XMMWORD[((-120))+r11]
850	movaps	XMMWORD[(-120)+r11],xmm0
851	movaps	xmm10,XMMWORD[((-104))+r11]
852	movaps	XMMWORD[(-104)+r11],xmm0
853	movaps	xmm11,XMMWORD[((-88))+r11]
854	movaps	XMMWORD[(-88)+r11],xmm0
855	movaps	xmm12,XMMWORD[((-72))+r11]
856	movaps	XMMWORD[(-72)+r11],xmm0
857	movaps	xmm13,XMMWORD[((-56))+r11]
858	movaps	XMMWORD[(-56)+r11],xmm0
859	movaps	xmm14,XMMWORD[((-40))+r11]
860	movaps	XMMWORD[(-40)+r11],xmm0
861	movaps	xmm15,XMMWORD[((-24))+r11]
862	movaps	XMMWORD[(-24)+r11],xmm0
863	movaps	XMMWORD[rsp],xmm0
864	movaps	XMMWORD[16+rsp],xmm0
865	movaps	XMMWORD[32+rsp],xmm0
866	movaps	XMMWORD[48+rsp],xmm0
867	movaps	XMMWORD[64+rsp],xmm0
868	movaps	XMMWORD[80+rsp],xmm0
869	movaps	XMMWORD[96+rsp],xmm0
870	movaps	XMMWORD[112+rsp],xmm0
871	mov	rbp,QWORD[((-8))+r11]
872
873	lea	rsp,[r11]
874
875$L$ctr32_epilogue:
876	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
877	mov	rsi,QWORD[16+rsp]
878	DB	0F3h,0C3h		;repret
879
880$L$SEH_end_GFp_aes_hw_ctr32_encrypt_blocks:
881global	GFp_aes_hw_set_encrypt_key
882
883ALIGN	16
884GFp_aes_hw_set_encrypt_key:
885__aesni_set_encrypt_key:
886
887DB	0x48,0x83,0xEC,0x08
888
889	mov	rax,-1
890	test	rcx,rcx
891	jz	NEAR $L$enc_key_ret
892	test	r8,r8
893	jz	NEAR $L$enc_key_ret
894
895	movups	xmm0,XMMWORD[rcx]
896	xorps	xmm4,xmm4
897	lea	r10,[GFp_ia32cap_P]
898	mov	r10d,DWORD[4+r10]
899	and	r10d,268437504
900	lea	rax,[16+r8]
901	cmp	edx,256
902	je	NEAR $L$14rounds
903
904	cmp	edx,128
905	jne	NEAR $L$bad_keybits
906
907$L$10rounds:
908	mov	edx,9
909	cmp	r10d,268435456
910	je	NEAR $L$10rounds_alt
911
912	movups	XMMWORD[r8],xmm0
913DB	102,15,58,223,200,1
914	call	$L$key_expansion_128_cold
915DB	102,15,58,223,200,2
916	call	$L$key_expansion_128
917DB	102,15,58,223,200,4
918	call	$L$key_expansion_128
919DB	102,15,58,223,200,8
920	call	$L$key_expansion_128
921DB	102,15,58,223,200,16
922	call	$L$key_expansion_128
923DB	102,15,58,223,200,32
924	call	$L$key_expansion_128
925DB	102,15,58,223,200,64
926	call	$L$key_expansion_128
927DB	102,15,58,223,200,128
928	call	$L$key_expansion_128
929DB	102,15,58,223,200,27
930	call	$L$key_expansion_128
931DB	102,15,58,223,200,54
932	call	$L$key_expansion_128
933	movups	XMMWORD[rax],xmm0
934	mov	DWORD[80+rax],edx
935	xor	eax,eax
936	jmp	NEAR $L$enc_key_ret
937
938ALIGN	16
939$L$10rounds_alt:
940	movdqa	xmm5,XMMWORD[$L$key_rotate]
941	mov	r10d,8
942	movdqa	xmm4,XMMWORD[$L$key_rcon1]
943	movdqa	xmm2,xmm0
944	movdqu	XMMWORD[r8],xmm0
945	jmp	NEAR $L$oop_key128
946
947ALIGN	16
948$L$oop_key128:
949DB	102,15,56,0,197
950DB	102,15,56,221,196
951	pslld	xmm4,1
952	lea	rax,[16+rax]
953
954	movdqa	xmm3,xmm2
955	pslldq	xmm2,4
956	pxor	xmm3,xmm2
957	pslldq	xmm2,4
958	pxor	xmm3,xmm2
959	pslldq	xmm2,4
960	pxor	xmm2,xmm3
961
962	pxor	xmm0,xmm2
963	movdqu	XMMWORD[(-16)+rax],xmm0
964	movdqa	xmm2,xmm0
965
966	dec	r10d
967	jnz	NEAR $L$oop_key128
968
969	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
970
971DB	102,15,56,0,197
972DB	102,15,56,221,196
973	pslld	xmm4,1
974
975	movdqa	xmm3,xmm2
976	pslldq	xmm2,4
977	pxor	xmm3,xmm2
978	pslldq	xmm2,4
979	pxor	xmm3,xmm2
980	pslldq	xmm2,4
981	pxor	xmm2,xmm3
982
983	pxor	xmm0,xmm2
984	movdqu	XMMWORD[rax],xmm0
985
986	movdqa	xmm2,xmm0
987DB	102,15,56,0,197
988DB	102,15,56,221,196
989
990	movdqa	xmm3,xmm2
991	pslldq	xmm2,4
992	pxor	xmm3,xmm2
993	pslldq	xmm2,4
994	pxor	xmm3,xmm2
995	pslldq	xmm2,4
996	pxor	xmm2,xmm3
997
998	pxor	xmm0,xmm2
999	movdqu	XMMWORD[16+rax],xmm0
1000
1001	mov	DWORD[96+rax],edx
1002	xor	eax,eax
1003	jmp	NEAR $L$enc_key_ret
1004
1005
1006
1007ALIGN	16
1008$L$14rounds:
1009	movups	xmm2,XMMWORD[16+rcx]
1010	mov	edx,13
1011	lea	rax,[16+rax]
1012	cmp	r10d,268435456
1013	je	NEAR $L$14rounds_alt
1014
1015	movups	XMMWORD[r8],xmm0
1016	movups	XMMWORD[16+r8],xmm2
1017DB	102,15,58,223,202,1
1018	call	$L$key_expansion_256a_cold
1019DB	102,15,58,223,200,1
1020	call	$L$key_expansion_256b
1021DB	102,15,58,223,202,2
1022	call	$L$key_expansion_256a
1023DB	102,15,58,223,200,2
1024	call	$L$key_expansion_256b
1025DB	102,15,58,223,202,4
1026	call	$L$key_expansion_256a
1027DB	102,15,58,223,200,4
1028	call	$L$key_expansion_256b
1029DB	102,15,58,223,202,8
1030	call	$L$key_expansion_256a
1031DB	102,15,58,223,200,8
1032	call	$L$key_expansion_256b
1033DB	102,15,58,223,202,16
1034	call	$L$key_expansion_256a
1035DB	102,15,58,223,200,16
1036	call	$L$key_expansion_256b
1037DB	102,15,58,223,202,32
1038	call	$L$key_expansion_256a
1039DB	102,15,58,223,200,32
1040	call	$L$key_expansion_256b
1041DB	102,15,58,223,202,64
1042	call	$L$key_expansion_256a
1043	movups	XMMWORD[rax],xmm0
1044	mov	DWORD[16+rax],edx
1045	xor	rax,rax
1046	jmp	NEAR $L$enc_key_ret
1047
1048ALIGN	16
1049$L$14rounds_alt:
1050	movdqa	xmm5,XMMWORD[$L$key_rotate]
1051	movdqa	xmm4,XMMWORD[$L$key_rcon1]
1052	mov	r10d,7
1053	movdqu	XMMWORD[r8],xmm0
1054	movdqa	xmm1,xmm2
1055	movdqu	XMMWORD[16+r8],xmm2
1056	jmp	NEAR $L$oop_key256
1057
1058ALIGN	16
1059$L$oop_key256:
1060DB	102,15,56,0,213
1061DB	102,15,56,221,212
1062
1063	movdqa	xmm3,xmm0
1064	pslldq	xmm0,4
1065	pxor	xmm3,xmm0
1066	pslldq	xmm0,4
1067	pxor	xmm3,xmm0
1068	pslldq	xmm0,4
1069	pxor	xmm0,xmm3
1070	pslld	xmm4,1
1071
1072	pxor	xmm0,xmm2
1073	movdqu	XMMWORD[rax],xmm0
1074
1075	dec	r10d
1076	jz	NEAR $L$done_key256
1077
1078	pshufd	xmm2,xmm0,0xff
1079	pxor	xmm3,xmm3
1080DB	102,15,56,221,211
1081
1082	movdqa	xmm3,xmm1
1083	pslldq	xmm1,4
1084	pxor	xmm3,xmm1
1085	pslldq	xmm1,4
1086	pxor	xmm3,xmm1
1087	pslldq	xmm1,4
1088	pxor	xmm1,xmm3
1089
1090	pxor	xmm2,xmm1
1091	movdqu	XMMWORD[16+rax],xmm2
1092	lea	rax,[32+rax]
1093	movdqa	xmm1,xmm2
1094
1095	jmp	NEAR $L$oop_key256
1096
1097$L$done_key256:
1098	mov	DWORD[16+rax],edx
1099	xor	eax,eax
1100	jmp	NEAR $L$enc_key_ret
1101
1102ALIGN	16
1103$L$bad_keybits:
1104	mov	rax,-2
1105$L$enc_key_ret:
1106	pxor	xmm0,xmm0
1107	pxor	xmm1,xmm1
1108	pxor	xmm2,xmm2
1109	pxor	xmm3,xmm3
1110	pxor	xmm4,xmm4
1111	pxor	xmm5,xmm5
1112	add	rsp,8
1113
1114	DB	0F3h,0C3h		;repret
1115
1116$L$SEH_end_GFp_set_encrypt_key:
1117
1118ALIGN	16
1119$L$key_expansion_128:
1120	movups	XMMWORD[rax],xmm0
1121	lea	rax,[16+rax]
1122$L$key_expansion_128_cold:
1123	shufps	xmm4,xmm0,16
1124	xorps	xmm0,xmm4
1125	shufps	xmm4,xmm0,140
1126	xorps	xmm0,xmm4
1127	shufps	xmm1,xmm1,255
1128	xorps	xmm0,xmm1
1129	DB	0F3h,0C3h		;repret
1130
1131ALIGN	16
1132$L$key_expansion_192a:
1133	movups	XMMWORD[rax],xmm0
1134	lea	rax,[16+rax]
1135$L$key_expansion_192a_cold:
1136	movaps	xmm5,xmm2
1137$L$key_expansion_192b_warm:
1138	shufps	xmm4,xmm0,16
1139	movdqa	xmm3,xmm2
1140	xorps	xmm0,xmm4
1141	shufps	xmm4,xmm0,140
1142	pslldq	xmm3,4
1143	xorps	xmm0,xmm4
1144	pshufd	xmm1,xmm1,85
1145	pxor	xmm2,xmm3
1146	pxor	xmm0,xmm1
1147	pshufd	xmm3,xmm0,255
1148	pxor	xmm2,xmm3
1149	DB	0F3h,0C3h		;repret
1150
1151ALIGN	16
1152$L$key_expansion_192b:
1153	movaps	xmm3,xmm0
1154	shufps	xmm5,xmm0,68
1155	movups	XMMWORD[rax],xmm5
1156	shufps	xmm3,xmm2,78
1157	movups	XMMWORD[16+rax],xmm3
1158	lea	rax,[32+rax]
1159	jmp	NEAR $L$key_expansion_192b_warm
1160
1161ALIGN	16
1162$L$key_expansion_256a:
1163	movups	XMMWORD[rax],xmm2
1164	lea	rax,[16+rax]
1165$L$key_expansion_256a_cold:
1166	shufps	xmm4,xmm0,16
1167	xorps	xmm0,xmm4
1168	shufps	xmm4,xmm0,140
1169	xorps	xmm0,xmm4
1170	shufps	xmm1,xmm1,255
1171	xorps	xmm0,xmm1
1172	DB	0F3h,0C3h		;repret
1173
1174ALIGN	16
1175$L$key_expansion_256b:
1176	movups	XMMWORD[rax],xmm0
1177	lea	rax,[16+rax]
1178
1179	shufps	xmm4,xmm2,16
1180	xorps	xmm2,xmm4
1181	shufps	xmm4,xmm2,140
1182	xorps	xmm2,xmm4
1183	shufps	xmm1,xmm1,170
1184	xorps	xmm2,xmm1
1185	DB	0F3h,0C3h		;repret
1186
1187
1188ALIGN	64
1189$L$bswap_mask:
1190DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1191$L$increment32:
1192	DD	6,6,6,0
1193$L$increment64:
1194	DD	1,0,0,0
1195$L$increment1:
1196DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1197$L$key_rotate:
1198	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
1199$L$key_rotate192:
1200	DD	0x04070605,0x04070605,0x04070605,0x04070605
1201$L$key_rcon1:
1202	DD	1,1,1,1
1203$L$key_rcon1b:
1204	DD	0x1b,0x1b,0x1b,0x1b
1205
1206DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
1207DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
1208DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
1209DB	115,108,46,111,114,103,62,0
1210ALIGN	64
1211EXTERN	__imp_RtlVirtualUnwind
1212
1213ALIGN	16
1214ctr_xts_se_handler:
1215	push	rsi
1216	push	rdi
1217	push	rbx
1218	push	rbp
1219	push	r12
1220	push	r13
1221	push	r14
1222	push	r15
1223	pushfq
1224	sub	rsp,64
1225
1226	mov	rax,QWORD[120+r8]
1227	mov	rbx,QWORD[248+r8]
1228
1229	mov	rsi,QWORD[8+r9]
1230	mov	r11,QWORD[56+r9]
1231
1232	mov	r10d,DWORD[r11]
1233	lea	r10,[r10*1+rsi]
1234	cmp	rbx,r10
1235	jb	NEAR $L$common_seh_tail
1236
1237	mov	rax,QWORD[152+r8]
1238
1239	mov	r10d,DWORD[4+r11]
1240	lea	r10,[r10*1+rsi]
1241	cmp	rbx,r10
1242	jae	NEAR $L$common_seh_tail
1243
1244	mov	rax,QWORD[208+r8]
1245
1246	lea	rsi,[((-168))+rax]
1247	lea	rdi,[512+r8]
1248	mov	ecx,20
1249	DD	0xa548f3fc
1250
1251	mov	rbp,QWORD[((-8))+rax]
1252	mov	QWORD[160+r8],rbp
1253
1254
1255$L$common_seh_tail:
1256	mov	rdi,QWORD[8+rax]
1257	mov	rsi,QWORD[16+rax]
1258	mov	QWORD[152+r8],rax
1259	mov	QWORD[168+r8],rsi
1260	mov	QWORD[176+r8],rdi
1261
1262	mov	rdi,QWORD[40+r9]
1263	mov	rsi,r8
1264	mov	ecx,154
1265	DD	0xa548f3fc
1266
1267	mov	rsi,r9
1268	xor	rcx,rcx
1269	mov	rdx,QWORD[8+rsi]
1270	mov	r8,QWORD[rsi]
1271	mov	r9,QWORD[16+rsi]
1272	mov	r10,QWORD[40+rsi]
1273	lea	r11,[56+rsi]
1274	lea	r12,[24+rsi]
1275	mov	QWORD[32+rsp],r10
1276	mov	QWORD[40+rsp],r11
1277	mov	QWORD[48+rsp],r12
1278	mov	QWORD[56+rsp],rcx
1279	call	QWORD[__imp_RtlVirtualUnwind]
1280
1281	mov	eax,1
1282	add	rsp,64
1283	popfq
1284	pop	r15
1285	pop	r14
1286	pop	r13
1287	pop	r12
1288	pop	rbp
1289	pop	rbx
1290	pop	rdi
1291	pop	rsi
1292	DB	0F3h,0C3h		;repret
1293
1294
1295section	.pdata rdata align=4
1296ALIGN	4
1297	DD	$L$SEH_begin_GFp_aes_hw_ctr32_encrypt_blocks wrt ..imagebase
1298	DD	$L$SEH_end_GFp_aes_hw_ctr32_encrypt_blocks wrt ..imagebase
1299	DD	$L$SEH_info_GFp_ctr32 wrt ..imagebase
1300	DD	GFp_aes_hw_set_encrypt_key wrt ..imagebase
1301	DD	$L$SEH_end_GFp_set_encrypt_key wrt ..imagebase
1302	DD	$L$SEH_info_GFp_key wrt ..imagebase
1303section	.xdata rdata align=8
1304ALIGN	8
1305$L$SEH_info_GFp_ctr32:
1306DB	9,0,0,0
1307	DD	ctr_xts_se_handler wrt ..imagebase
1308	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
1309$L$SEH_info_GFp_key:
1310DB	0x01,0x04,0x01,0x00
1311DB	0x04,0x02,0x00,0x00
1312