1%ifidn __OUTPUT_FORMAT__,obj
2section	code	use32 class=code align=64
3%elifidn __OUTPUT_FORMAT__,win32
4$@feat.00 equ 1
5section	.text	code align=64
6%else
7section	.text	code
8%endif
9align	64
10L$_vpaes_consts:
11dd	218628480,235210255,168496130,67568393
12dd	252381056,17041926,33884169,51187212
13dd	252645135,252645135,252645135,252645135
14dd	1512730624,3266504856,1377990664,3401244816
15dd	830229760,1275146365,2969422977,3447763452
16dd	3411033600,2979783055,338359620,2782886510
17dd	4209124096,907596821,221174255,1006095553
18dd	191964160,3799684038,3164090317,1589111125
19dd	182528256,1777043520,2877432650,3265356744
20dd	1874708224,3503451415,3305285752,363511674
21dd	1606117888,3487855781,1093350906,2384367825
22dd	197121,67569157,134941193,202313229
23dd	67569157,134941193,202313229,197121
24dd	134941193,202313229,197121,67569157
25dd	202313229,197121,67569157,134941193
26dd	33619971,100992007,168364043,235736079
27dd	235736079,33619971,100992007,168364043
28dd	168364043,235736079,33619971,100992007
29dd	100992007,168364043,235736079,33619971
30dd	50462976,117835012,185207048,252579084
31dd	252314880,51251460,117574920,184942860
32dd	184682752,252054788,50987272,118359308
33dd	118099200,185467140,251790600,50727180
34dd	2946363062,528716217,1300004225,1881839624
35dd	1532713819,1532713819,1532713819,1532713819
36dd	3602276352,4288629033,3737020424,4153884961
37dd	1354558464,32357713,2958822624,3775749553
38dd	1201988352,132424512,1572796698,503232858
39dd	2213177600,1597421020,4103937655,675398315
40dd	2749646592,4273543773,1511898873,121693092
41dd	3040248576,1103263732,2871565598,1608280554
42dd	2236667136,2588920351,482954393,64377734
43dd	3069987328,291237287,2117370568,3650299247
44dd	533321216,3573750986,2572112006,1401264716
45dd	1339849704,2721158661,548607111,3445553514
46dd	2128193280,3054596040,2183486460,1257083700
47dd	655635200,1165381986,3923443150,2344132524
48dd	190078720,256924420,290342170,357187870
49dd	1610966272,2263057382,4103205268,309794674
50dd	2592527872,2233205587,1335446729,3402964816
51dd	3973531904,3225098121,3002836325,1918774430
52dd	3870401024,2102906079,2284471353,4117666579
53dd	617007872,1021508343,366931923,691083277
54dd	2528395776,3491914898,2968704004,1613121270
55dd	3445188352,3247741094,844474987,4093578302
56dd	651481088,1190302358,1689581232,574775300
57dd	4289380608,206939853,2555985458,2489840491
58dd	2130264064,327674451,3566485037,3349835193
59dd	2470714624,316102159,3636825756,3393945945
60db	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
61db	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
62db	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
63db	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
64db	118,101,114,115,105,116,121,41,0
65align	64
66align	16
67__vpaes_preheat:
68	add	ebp,DWORD [esp]
69	movdqa	xmm7,[ebp-48]
70	movdqa	xmm6,[ebp-16]
71	ret
72align	16
73__vpaes_encrypt_core:
74	mov	ecx,16
75	mov	eax,DWORD [240+edx]
76	movdqa	xmm1,xmm6
77	movdqa	xmm2,[ebp]
78	pandn	xmm1,xmm0
79	pand	xmm0,xmm6
80	movdqu	xmm5,[edx]
81db	102,15,56,0,208
82	movdqa	xmm0,[16+ebp]
83	pxor	xmm2,xmm5
84	psrld	xmm1,4
85	add	edx,16
86db	102,15,56,0,193
87	lea	ebx,[192+ebp]
88	pxor	xmm0,xmm2
89	jmp	NEAR L$000enc_entry
90align	16
91L$001enc_loop:
92	movdqa	xmm4,[32+ebp]
93	movdqa	xmm0,[48+ebp]
94db	102,15,56,0,226
95db	102,15,56,0,195
96	pxor	xmm4,xmm5
97	movdqa	xmm5,[64+ebp]
98	pxor	xmm0,xmm4
99	movdqa	xmm1,[ecx*1+ebx-64]
100db	102,15,56,0,234
101	movdqa	xmm2,[80+ebp]
102	movdqa	xmm4,[ecx*1+ebx]
103db	102,15,56,0,211
104	movdqa	xmm3,xmm0
105	pxor	xmm2,xmm5
106db	102,15,56,0,193
107	add	edx,16
108	pxor	xmm0,xmm2
109db	102,15,56,0,220
110	add	ecx,16
111	pxor	xmm3,xmm0
112db	102,15,56,0,193
113	and	ecx,48
114	sub	eax,1
115	pxor	xmm0,xmm3
116L$000enc_entry:
117	movdqa	xmm1,xmm6
118	movdqa	xmm5,[ebp-32]
119	pandn	xmm1,xmm0
120	psrld	xmm1,4
121	pand	xmm0,xmm6
122db	102,15,56,0,232
123	movdqa	xmm3,xmm7
124	pxor	xmm0,xmm1
125db	102,15,56,0,217
126	movdqa	xmm4,xmm7
127	pxor	xmm3,xmm5
128db	102,15,56,0,224
129	movdqa	xmm2,xmm7
130	pxor	xmm4,xmm5
131db	102,15,56,0,211
132	movdqa	xmm3,xmm7
133	pxor	xmm2,xmm0
134db	102,15,56,0,220
135	movdqu	xmm5,[edx]
136	pxor	xmm3,xmm1
137	jnz	NEAR L$001enc_loop
138	movdqa	xmm4,[96+ebp]
139	movdqa	xmm0,[112+ebp]
140db	102,15,56,0,226
141	pxor	xmm4,xmm5
142db	102,15,56,0,195
143	movdqa	xmm1,[64+ecx*1+ebx]
144	pxor	xmm0,xmm4
145db	102,15,56,0,193
146	ret
147align	16
148__vpaes_decrypt_core:
149	lea	ebx,[608+ebp]
150	mov	eax,DWORD [240+edx]
151	movdqa	xmm1,xmm6
152	movdqa	xmm2,[ebx-64]
153	pandn	xmm1,xmm0
154	mov	ecx,eax
155	psrld	xmm1,4
156	movdqu	xmm5,[edx]
157	shl	ecx,4
158	pand	xmm0,xmm6
159db	102,15,56,0,208
160	movdqa	xmm0,[ebx-48]
161	xor	ecx,48
162db	102,15,56,0,193
163	and	ecx,48
164	pxor	xmm2,xmm5
165	movdqa	xmm5,[176+ebp]
166	pxor	xmm0,xmm2
167	add	edx,16
168	lea	ecx,[ecx*1+ebx-352]
169	jmp	NEAR L$002dec_entry
170align	16
171L$003dec_loop:
172	movdqa	xmm4,[ebx-32]
173	movdqa	xmm1,[ebx-16]
174db	102,15,56,0,226
175db	102,15,56,0,203
176	pxor	xmm0,xmm4
177	movdqa	xmm4,[ebx]
178	pxor	xmm0,xmm1
179	movdqa	xmm1,[16+ebx]
180db	102,15,56,0,226
181db	102,15,56,0,197
182db	102,15,56,0,203
183	pxor	xmm0,xmm4
184	movdqa	xmm4,[32+ebx]
185	pxor	xmm0,xmm1
186	movdqa	xmm1,[48+ebx]
187db	102,15,56,0,226
188db	102,15,56,0,197
189db	102,15,56,0,203
190	pxor	xmm0,xmm4
191	movdqa	xmm4,[64+ebx]
192	pxor	xmm0,xmm1
193	movdqa	xmm1,[80+ebx]
194db	102,15,56,0,226
195db	102,15,56,0,197
196db	102,15,56,0,203
197	pxor	xmm0,xmm4
198	add	edx,16
199db	102,15,58,15,237,12
200	pxor	xmm0,xmm1
201	sub	eax,1
202L$002dec_entry:
203	movdqa	xmm1,xmm6
204	movdqa	xmm2,[ebp-32]
205	pandn	xmm1,xmm0
206	pand	xmm0,xmm6
207	psrld	xmm1,4
208db	102,15,56,0,208
209	movdqa	xmm3,xmm7
210	pxor	xmm0,xmm1
211db	102,15,56,0,217
212	movdqa	xmm4,xmm7
213	pxor	xmm3,xmm2
214db	102,15,56,0,224
215	pxor	xmm4,xmm2
216	movdqa	xmm2,xmm7
217db	102,15,56,0,211
218	movdqa	xmm3,xmm7
219	pxor	xmm2,xmm0
220db	102,15,56,0,220
221	movdqu	xmm0,[edx]
222	pxor	xmm3,xmm1
223	jnz	NEAR L$003dec_loop
224	movdqa	xmm4,[96+ebx]
225db	102,15,56,0,226
226	pxor	xmm4,xmm0
227	movdqa	xmm0,[112+ebx]
228	movdqa	xmm2,[ecx]
229db	102,15,56,0,195
230	pxor	xmm0,xmm4
231db	102,15,56,0,194
232	ret
233align	16
234__vpaes_schedule_core:
235	add	ebp,DWORD [esp]
236	movdqu	xmm0,[esi]
237	movdqa	xmm2,[320+ebp]
238	movdqa	xmm3,xmm0
239	lea	ebx,[ebp]
240	movdqa	[4+esp],xmm2
241	call	__vpaes_schedule_transform
242	movdqa	xmm7,xmm0
243	test	edi,edi
244	jnz	NEAR L$004schedule_am_decrypting
245	movdqu	[edx],xmm0
246	jmp	NEAR L$005schedule_go
247L$004schedule_am_decrypting:
248	movdqa	xmm1,[256+ecx*1+ebp]
249db	102,15,56,0,217
250	movdqu	[edx],xmm3
251	xor	ecx,48
252L$005schedule_go:
253	cmp	eax,192
254	ja	NEAR L$006schedule_256
255	je	NEAR L$007schedule_192
256L$008schedule_128:
257	mov	eax,10
258L$009loop_schedule_128:
259	call	__vpaes_schedule_round
260	dec	eax
261	jz	NEAR L$010schedule_mangle_last
262	call	__vpaes_schedule_mangle
263	jmp	NEAR L$009loop_schedule_128
264align	16
265L$007schedule_192:
266	movdqu	xmm0,[8+esi]
267	call	__vpaes_schedule_transform
268	movdqa	xmm6,xmm0
269	pxor	xmm4,xmm4
270	movhlps	xmm6,xmm4
271	mov	eax,4
272L$011loop_schedule_192:
273	call	__vpaes_schedule_round
274db	102,15,58,15,198,8
275	call	__vpaes_schedule_mangle
276	call	__vpaes_schedule_192_smear
277	call	__vpaes_schedule_mangle
278	call	__vpaes_schedule_round
279	dec	eax
280	jz	NEAR L$010schedule_mangle_last
281	call	__vpaes_schedule_mangle
282	call	__vpaes_schedule_192_smear
283	jmp	NEAR L$011loop_schedule_192
284align	16
285L$006schedule_256:
286	movdqu	xmm0,[16+esi]
287	call	__vpaes_schedule_transform
288	mov	eax,7
289L$012loop_schedule_256:
290	call	__vpaes_schedule_mangle
291	movdqa	xmm6,xmm0
292	call	__vpaes_schedule_round
293	dec	eax
294	jz	NEAR L$010schedule_mangle_last
295	call	__vpaes_schedule_mangle
296	pshufd	xmm0,xmm0,255
297	movdqa	[20+esp],xmm7
298	movdqa	xmm7,xmm6
299	call	L$_vpaes_schedule_low_round
300	movdqa	xmm7,[20+esp]
301	jmp	NEAR L$012loop_schedule_256
302align	16
303L$010schedule_mangle_last:
304	lea	ebx,[384+ebp]
305	test	edi,edi
306	jnz	NEAR L$013schedule_mangle_last_dec
307	movdqa	xmm1,[256+ecx*1+ebp]
308db	102,15,56,0,193
309	lea	ebx,[352+ebp]
310	add	edx,32
311L$013schedule_mangle_last_dec:
312	add	edx,-16
313	pxor	xmm0,[336+ebp]
314	call	__vpaes_schedule_transform
315	movdqu	[edx],xmm0
316	pxor	xmm0,xmm0
317	pxor	xmm1,xmm1
318	pxor	xmm2,xmm2
319	pxor	xmm3,xmm3
320	pxor	xmm4,xmm4
321	pxor	xmm5,xmm5
322	pxor	xmm6,xmm6
323	pxor	xmm7,xmm7
324	ret
325align	16
326__vpaes_schedule_192_smear:
327	pshufd	xmm1,xmm6,128
328	pshufd	xmm0,xmm7,254
329	pxor	xmm6,xmm1
330	pxor	xmm1,xmm1
331	pxor	xmm6,xmm0
332	movdqa	xmm0,xmm6
333	movhlps	xmm6,xmm1
334	ret
335align	16
336__vpaes_schedule_round:
337	movdqa	xmm2,[8+esp]
338	pxor	xmm1,xmm1
339db	102,15,58,15,202,15
340db	102,15,58,15,210,15
341	pxor	xmm7,xmm1
342	pshufd	xmm0,xmm0,255
343db	102,15,58,15,192,1
344	movdqa	[8+esp],xmm2
345L$_vpaes_schedule_low_round:
346	movdqa	xmm1,xmm7
347	pslldq	xmm7,4
348	pxor	xmm7,xmm1
349	movdqa	xmm1,xmm7
350	pslldq	xmm7,8
351	pxor	xmm7,xmm1
352	pxor	xmm7,[336+ebp]
353	movdqa	xmm4,[ebp-16]
354	movdqa	xmm5,[ebp-48]
355	movdqa	xmm1,xmm4
356	pandn	xmm1,xmm0
357	psrld	xmm1,4
358	pand	xmm0,xmm4
359	movdqa	xmm2,[ebp-32]
360db	102,15,56,0,208
361	pxor	xmm0,xmm1
362	movdqa	xmm3,xmm5
363db	102,15,56,0,217
364	pxor	xmm3,xmm2
365	movdqa	xmm4,xmm5
366db	102,15,56,0,224
367	pxor	xmm4,xmm2
368	movdqa	xmm2,xmm5
369db	102,15,56,0,211
370	pxor	xmm2,xmm0
371	movdqa	xmm3,xmm5
372db	102,15,56,0,220
373	pxor	xmm3,xmm1
374	movdqa	xmm4,[32+ebp]
375db	102,15,56,0,226
376	movdqa	xmm0,[48+ebp]
377db	102,15,56,0,195
378	pxor	xmm0,xmm4
379	pxor	xmm0,xmm7
380	movdqa	xmm7,xmm0
381	ret
382align	16
383__vpaes_schedule_transform:
384	movdqa	xmm2,[ebp-16]
385	movdqa	xmm1,xmm2
386	pandn	xmm1,xmm0
387	psrld	xmm1,4
388	pand	xmm0,xmm2
389	movdqa	xmm2,[ebx]
390db	102,15,56,0,208
391	movdqa	xmm0,[16+ebx]
392db	102,15,56,0,193
393	pxor	xmm0,xmm2
394	ret
395align	16
396__vpaes_schedule_mangle:
397	movdqa	xmm4,xmm0
398	movdqa	xmm5,[128+ebp]
399	test	edi,edi
400	jnz	NEAR L$014schedule_mangle_dec
401	add	edx,16
402	pxor	xmm4,[336+ebp]
403db	102,15,56,0,229
404	movdqa	xmm3,xmm4
405db	102,15,56,0,229
406	pxor	xmm3,xmm4
407db	102,15,56,0,229
408	pxor	xmm3,xmm4
409	jmp	NEAR L$015schedule_mangle_both
410align	16
411L$014schedule_mangle_dec:
412	movdqa	xmm2,[ebp-16]
413	lea	esi,[416+ebp]
414	movdqa	xmm1,xmm2
415	pandn	xmm1,xmm4
416	psrld	xmm1,4
417	pand	xmm4,xmm2
418	movdqa	xmm2,[esi]
419db	102,15,56,0,212
420	movdqa	xmm3,[16+esi]
421db	102,15,56,0,217
422	pxor	xmm3,xmm2
423db	102,15,56,0,221
424	movdqa	xmm2,[32+esi]
425db	102,15,56,0,212
426	pxor	xmm2,xmm3
427	movdqa	xmm3,[48+esi]
428db	102,15,56,0,217
429	pxor	xmm3,xmm2
430db	102,15,56,0,221
431	movdqa	xmm2,[64+esi]
432db	102,15,56,0,212
433	pxor	xmm2,xmm3
434	movdqa	xmm3,[80+esi]
435db	102,15,56,0,217
436	pxor	xmm3,xmm2
437db	102,15,56,0,221
438	movdqa	xmm2,[96+esi]
439db	102,15,56,0,212
440	pxor	xmm2,xmm3
441	movdqa	xmm3,[112+esi]
442db	102,15,56,0,217
443	pxor	xmm3,xmm2
444	add	edx,-16
445L$015schedule_mangle_both:
446	movdqa	xmm1,[256+ecx*1+ebp]
447db	102,15,56,0,217
448	add	ecx,-16
449	and	ecx,48
450	movdqu	[edx],xmm3
451	ret
452global	_vpaes_set_encrypt_key
453align	16
454_vpaes_set_encrypt_key:
455L$_vpaes_set_encrypt_key_begin:
456	push	ebp
457	push	ebx
458	push	esi
459	push	edi
460	mov	esi,DWORD [20+esp]
461	lea	ebx,[esp-56]
462	mov	eax,DWORD [24+esp]
463	and	ebx,-16
464	mov	edx,DWORD [28+esp]
465	xchg	ebx,esp
466	mov	DWORD [48+esp],ebx
467	mov	ebx,eax
468	shr	ebx,5
469	add	ebx,5
470	mov	DWORD [240+edx],ebx
471	mov	ecx,48
472	mov	edi,0
473	lea	ebp,[(L$_vpaes_consts+0x30-L$016pic_point)]
474	call	__vpaes_schedule_core
475L$016pic_point:
476	mov	esp,DWORD [48+esp]
477	xor	eax,eax
478	pop	edi
479	pop	esi
480	pop	ebx
481	pop	ebp
482	ret
483global	_vpaes_set_decrypt_key
484align	16
485_vpaes_set_decrypt_key:
486L$_vpaes_set_decrypt_key_begin:
487	push	ebp
488	push	ebx
489	push	esi
490	push	edi
491	mov	esi,DWORD [20+esp]
492	lea	ebx,[esp-56]
493	mov	eax,DWORD [24+esp]
494	and	ebx,-16
495	mov	edx,DWORD [28+esp]
496	xchg	ebx,esp
497	mov	DWORD [48+esp],ebx
498	mov	ebx,eax
499	shr	ebx,5
500	add	ebx,5
501	mov	DWORD [240+edx],ebx
502	shl	ebx,4
503	lea	edx,[16+ebx*1+edx]
504	mov	edi,1
505	mov	ecx,eax
506	shr	ecx,1
507	and	ecx,32
508	xor	ecx,32
509	lea	ebp,[(L$_vpaes_consts+0x30-L$017pic_point)]
510	call	__vpaes_schedule_core
511L$017pic_point:
512	mov	esp,DWORD [48+esp]
513	xor	eax,eax
514	pop	edi
515	pop	esi
516	pop	ebx
517	pop	ebp
518	ret
519global	_vpaes_encrypt
520align	16
521_vpaes_encrypt:
522L$_vpaes_encrypt_begin:
523	push	ebp
524	push	ebx
525	push	esi
526	push	edi
527	lea	ebp,[(L$_vpaes_consts+0x30-L$018pic_point)]
528	call	__vpaes_preheat
529L$018pic_point:
530	mov	esi,DWORD [20+esp]
531	lea	ebx,[esp-56]
532	mov	edi,DWORD [24+esp]
533	and	ebx,-16
534	mov	edx,DWORD [28+esp]
535	xchg	ebx,esp
536	mov	DWORD [48+esp],ebx
537	movdqu	xmm0,[esi]
538	call	__vpaes_encrypt_core
539	movdqu	[edi],xmm0
540	mov	esp,DWORD [48+esp]
541	pop	edi
542	pop	esi
543	pop	ebx
544	pop	ebp
545	ret
546global	_vpaes_decrypt
547align	16
548_vpaes_decrypt:
549L$_vpaes_decrypt_begin:
550	push	ebp
551	push	ebx
552	push	esi
553	push	edi
554	lea	ebp,[(L$_vpaes_consts+0x30-L$019pic_point)]
555	call	__vpaes_preheat
556L$019pic_point:
557	mov	esi,DWORD [20+esp]
558	lea	ebx,[esp-56]
559	mov	edi,DWORD [24+esp]
560	and	ebx,-16
561	mov	edx,DWORD [28+esp]
562	xchg	ebx,esp
563	mov	DWORD [48+esp],ebx
564	movdqu	xmm0,[esi]
565	call	__vpaes_decrypt_core
566	movdqu	[edi],xmm0
567	mov	esp,DWORD [48+esp]
568	pop	edi
569	pop	esi
570	pop	ebx
571	pop	ebp
572	ret
573global	_vpaes_cbc_encrypt
574align	16
575_vpaes_cbc_encrypt:
576L$_vpaes_cbc_encrypt_begin:
577	push	ebp
578	push	ebx
579	push	esi
580	push	edi
581	mov	esi,DWORD [20+esp]
582	mov	edi,DWORD [24+esp]
583	mov	eax,DWORD [28+esp]
584	mov	edx,DWORD [32+esp]
585	sub	eax,16
586	jc	NEAR L$020cbc_abort
587	lea	ebx,[esp-56]
588	mov	ebp,DWORD [36+esp]
589	and	ebx,-16
590	mov	ecx,DWORD [40+esp]
591	xchg	ebx,esp
592	movdqu	xmm1,[ebp]
593	sub	edi,esi
594	mov	DWORD [48+esp],ebx
595	mov	DWORD [esp],edi
596	mov	DWORD [4+esp],edx
597	mov	DWORD [8+esp],ebp
598	mov	edi,eax
599	lea	ebp,[(L$_vpaes_consts+0x30-L$021pic_point)]
600	call	__vpaes_preheat
601L$021pic_point:
602	cmp	ecx,0
603	je	NEAR L$022cbc_dec_loop
604	jmp	NEAR L$023cbc_enc_loop
605align	16
606L$023cbc_enc_loop:
607	movdqu	xmm0,[esi]
608	pxor	xmm0,xmm1
609	call	__vpaes_encrypt_core
610	mov	ebx,DWORD [esp]
611	mov	edx,DWORD [4+esp]
612	movdqa	xmm1,xmm0
613	movdqu	[esi*1+ebx],xmm0
614	lea	esi,[16+esi]
615	sub	edi,16
616	jnc	NEAR L$023cbc_enc_loop
617	jmp	NEAR L$024cbc_done
618align	16
619L$022cbc_dec_loop:
620	movdqu	xmm0,[esi]
621	movdqa	[16+esp],xmm1
622	movdqa	[32+esp],xmm0
623	call	__vpaes_decrypt_core
624	mov	ebx,DWORD [esp]
625	mov	edx,DWORD [4+esp]
626	pxor	xmm0,[16+esp]
627	movdqa	xmm1,[32+esp]
628	movdqu	[esi*1+ebx],xmm0
629	lea	esi,[16+esi]
630	sub	edi,16
631	jnc	NEAR L$022cbc_dec_loop
632L$024cbc_done:
633	mov	ebx,DWORD [8+esp]
634	mov	esp,DWORD [48+esp]
635	movdqu	[ebx],xmm1
636L$020cbc_abort:
637	pop	edi
638	pop	esi
639	pop	ebx
640	pop	ebp
641	ret
642