1; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp"
2; 1 "<built-in>" 1
3; 1 "<built-in>" 3
4; 340 "<built-in>" 3
5; 1 "<command line>" 1
6; 1 "<built-in>" 2
7; 1 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
8OPTION	DOTNAME
9
10; 1 "./crypto/x86_arch.h" 1
11
12
13; 16 "./crypto/x86_arch.h"
14
15
16
17
18
19
20
21
22
23; 40 "./crypto/x86_arch.h"
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69; 3 "crypto/aes/bsaes-masm-x86_64.S.tmp" 2
70.text$	SEGMENT ALIGN(64) 'CODE'
71
72EXTERN	asm_AES_encrypt:NEAR
73EXTERN	asm_AES_decrypt:NEAR
74
75
76ALIGN	64
77_bsaes_encrypt8	PROC PRIVATE
78	lea	r11,QWORD PTR[$L$BS0]
79
80	movdqa	xmm8,XMMWORD PTR[rax]
81	lea	rax,QWORD PTR[16+rax]
82	movdqa	xmm7,XMMWORD PTR[80+r11]
83	pxor	xmm15,xmm8
84	pxor	xmm0,xmm8
85DB	102,68,15,56,0,255
86	pxor	xmm1,xmm8
87DB	102,15,56,0,199
88	pxor	xmm2,xmm8
89DB	102,15,56,0,207
90	pxor	xmm3,xmm8
91DB	102,15,56,0,215
92	pxor	xmm4,xmm8
93DB	102,15,56,0,223
94	pxor	xmm5,xmm8
95DB	102,15,56,0,231
96	pxor	xmm6,xmm8
97DB	102,15,56,0,239
98DB	102,15,56,0,247
99_bsaes_encrypt8_bitslice::
100	movdqa	xmm7,XMMWORD PTR[r11]
101	movdqa	xmm8,XMMWORD PTR[16+r11]
102	movdqa	xmm9,xmm5
103	psrlq	xmm5,1
104	movdqa	xmm10,xmm3
105	psrlq	xmm3,1
106	pxor	xmm5,xmm6
107	pxor	xmm3,xmm4
108	pand	xmm5,xmm7
109	pand	xmm3,xmm7
110	pxor	xmm6,xmm5
111	psllq	xmm5,1
112	pxor	xmm4,xmm3
113	psllq	xmm3,1
114	pxor	xmm5,xmm9
115	pxor	xmm3,xmm10
116	movdqa	xmm9,xmm1
117	psrlq	xmm1,1
118	movdqa	xmm10,xmm15
119	psrlq	xmm15,1
120	pxor	xmm1,xmm2
121	pxor	xmm15,xmm0
122	pand	xmm1,xmm7
123	pand	xmm15,xmm7
124	pxor	xmm2,xmm1
125	psllq	xmm1,1
126	pxor	xmm0,xmm15
127	psllq	xmm15,1
128	pxor	xmm1,xmm9
129	pxor	xmm15,xmm10
130	movdqa	xmm7,XMMWORD PTR[32+r11]
131	movdqa	xmm9,xmm4
132	psrlq	xmm4,2
133	movdqa	xmm10,xmm3
134	psrlq	xmm3,2
135	pxor	xmm4,xmm6
136	pxor	xmm3,xmm5
137	pand	xmm4,xmm8
138	pand	xmm3,xmm8
139	pxor	xmm6,xmm4
140	psllq	xmm4,2
141	pxor	xmm5,xmm3
142	psllq	xmm3,2
143	pxor	xmm4,xmm9
144	pxor	xmm3,xmm10
145	movdqa	xmm9,xmm0
146	psrlq	xmm0,2
147	movdqa	xmm10,xmm15
148	psrlq	xmm15,2
149	pxor	xmm0,xmm2
150	pxor	xmm15,xmm1
151	pand	xmm0,xmm8
152	pand	xmm15,xmm8
153	pxor	xmm2,xmm0
154	psllq	xmm0,2
155	pxor	xmm1,xmm15
156	psllq	xmm15,2
157	pxor	xmm0,xmm9
158	pxor	xmm15,xmm10
159	movdqa	xmm9,xmm2
160	psrlq	xmm2,4
161	movdqa	xmm10,xmm1
162	psrlq	xmm1,4
163	pxor	xmm2,xmm6
164	pxor	xmm1,xmm5
165	pand	xmm2,xmm7
166	pand	xmm1,xmm7
167	pxor	xmm6,xmm2
168	psllq	xmm2,4
169	pxor	xmm5,xmm1
170	psllq	xmm1,4
171	pxor	xmm2,xmm9
172	pxor	xmm1,xmm10
173	movdqa	xmm9,xmm0
174	psrlq	xmm0,4
175	movdqa	xmm10,xmm15
176	psrlq	xmm15,4
177	pxor	xmm0,xmm4
178	pxor	xmm15,xmm3
179	pand	xmm0,xmm7
180	pand	xmm15,xmm7
181	pxor	xmm4,xmm0
182	psllq	xmm0,4
183	pxor	xmm3,xmm15
184	psllq	xmm15,4
185	pxor	xmm0,xmm9
186	pxor	xmm15,xmm10
187	dec	r10d
188	jmp	$L$enc_sbox
189ALIGN	16
190$L$enc_loop::
191	pxor	xmm15,XMMWORD PTR[rax]
192	pxor	xmm0,XMMWORD PTR[16+rax]
193DB	102,68,15,56,0,255
194	pxor	xmm1,XMMWORD PTR[32+rax]
195DB	102,15,56,0,199
196	pxor	xmm2,XMMWORD PTR[48+rax]
197DB	102,15,56,0,207
198	pxor	xmm3,XMMWORD PTR[64+rax]
199DB	102,15,56,0,215
200	pxor	xmm4,XMMWORD PTR[80+rax]
201DB	102,15,56,0,223
202	pxor	xmm5,XMMWORD PTR[96+rax]
203DB	102,15,56,0,231
204	pxor	xmm6,XMMWORD PTR[112+rax]
205DB	102,15,56,0,239
206	lea	rax,QWORD PTR[128+rax]
207DB	102,15,56,0,247
208$L$enc_sbox::
209	pxor	xmm4,xmm5
210	pxor	xmm1,xmm0
211	pxor	xmm2,xmm15
212	pxor	xmm5,xmm1
213	pxor	xmm4,xmm15
214
215	pxor	xmm5,xmm2
216	pxor	xmm2,xmm6
217	pxor	xmm6,xmm4
218	pxor	xmm2,xmm3
219	pxor	xmm3,xmm4
220	pxor	xmm2,xmm0
221
222	pxor	xmm1,xmm6
223	pxor	xmm0,xmm4
224	movdqa	xmm10,xmm6
225	movdqa	xmm9,xmm0
226	movdqa	xmm8,xmm4
227	movdqa	xmm12,xmm1
228	movdqa	xmm11,xmm5
229
230	pxor	xmm10,xmm3
231	pxor	xmm9,xmm1
232	pxor	xmm8,xmm2
233	movdqa	xmm13,xmm10
234	pxor	xmm12,xmm3
235	movdqa	xmm7,xmm9
236	pxor	xmm11,xmm15
237	movdqa	xmm14,xmm10
238
239	por	xmm9,xmm8
240	por	xmm10,xmm11
241	pxor	xmm14,xmm7
242	pand	xmm13,xmm11
243	pxor	xmm11,xmm8
244	pand	xmm7,xmm8
245	pand	xmm14,xmm11
246	movdqa	xmm11,xmm2
247	pxor	xmm11,xmm15
248	pand	xmm12,xmm11
249	pxor	xmm10,xmm12
250	pxor	xmm9,xmm12
251	movdqa	xmm12,xmm6
252	movdqa	xmm11,xmm4
253	pxor	xmm12,xmm0
254	pxor	xmm11,xmm5
255	movdqa	xmm8,xmm12
256	pand	xmm12,xmm11
257	por	xmm8,xmm11
258	pxor	xmm7,xmm12
259	pxor	xmm10,xmm14
260	pxor	xmm9,xmm13
261	pxor	xmm8,xmm14
262	movdqa	xmm11,xmm1
263	pxor	xmm7,xmm13
264	movdqa	xmm12,xmm3
265	pxor	xmm8,xmm13
266	movdqa	xmm13,xmm0
267	pand	xmm11,xmm2
268	movdqa	xmm14,xmm6
269	pand	xmm12,xmm15
270	pand	xmm13,xmm4
271	por	xmm14,xmm5
272	pxor	xmm10,xmm11
273	pxor	xmm9,xmm12
274	pxor	xmm8,xmm13
275	pxor	xmm7,xmm14
276
277
278
279
280
281	movdqa	xmm11,xmm10
282	pand	xmm10,xmm8
283	pxor	xmm11,xmm9
284
285	movdqa	xmm13,xmm7
286	movdqa	xmm14,xmm11
287	pxor	xmm13,xmm10
288	pand	xmm14,xmm13
289
290	movdqa	xmm12,xmm8
291	pxor	xmm14,xmm9
292	pxor	xmm12,xmm7
293
294	pxor	xmm10,xmm9
295
296	pand	xmm12,xmm10
297
298	movdqa	xmm9,xmm13
299	pxor	xmm12,xmm7
300
301	pxor	xmm9,xmm12
302	pxor	xmm8,xmm12
303
304	pand	xmm9,xmm7
305
306	pxor	xmm13,xmm9
307	pxor	xmm8,xmm9
308
309	pand	xmm13,xmm14
310
311	pxor	xmm13,xmm11
312	movdqa	xmm11,xmm5
313	movdqa	xmm7,xmm4
314	movdqa	xmm9,xmm14
315	pxor	xmm9,xmm13
316	pand	xmm9,xmm5
317	pxor	xmm5,xmm4
318	pand	xmm4,xmm14
319	pand	xmm5,xmm13
320	pxor	xmm5,xmm4
321	pxor	xmm4,xmm9
322	pxor	xmm11,xmm15
323	pxor	xmm7,xmm2
324	pxor	xmm14,xmm12
325	pxor	xmm13,xmm8
326	movdqa	xmm10,xmm14
327	movdqa	xmm9,xmm12
328	pxor	xmm10,xmm13
329	pxor	xmm9,xmm8
330	pand	xmm10,xmm11
331	pand	xmm9,xmm15
332	pxor	xmm11,xmm7
333	pxor	xmm15,xmm2
334	pand	xmm7,xmm14
335	pand	xmm2,xmm12
336	pand	xmm11,xmm13
337	pand	xmm15,xmm8
338	pxor	xmm7,xmm11
339	pxor	xmm15,xmm2
340	pxor	xmm11,xmm10
341	pxor	xmm2,xmm9
342	pxor	xmm5,xmm11
343	pxor	xmm15,xmm11
344	pxor	xmm4,xmm7
345	pxor	xmm2,xmm7
346
347	movdqa	xmm11,xmm6
348	movdqa	xmm7,xmm0
349	pxor	xmm11,xmm3
350	pxor	xmm7,xmm1
351	movdqa	xmm10,xmm14
352	movdqa	xmm9,xmm12
353	pxor	xmm10,xmm13
354	pxor	xmm9,xmm8
355	pand	xmm10,xmm11
356	pand	xmm9,xmm3
357	pxor	xmm11,xmm7
358	pxor	xmm3,xmm1
359	pand	xmm7,xmm14
360	pand	xmm1,xmm12
361	pand	xmm11,xmm13
362	pand	xmm3,xmm8
363	pxor	xmm7,xmm11
364	pxor	xmm3,xmm1
365	pxor	xmm11,xmm10
366	pxor	xmm1,xmm9
367	pxor	xmm14,xmm12
368	pxor	xmm13,xmm8
369	movdqa	xmm10,xmm14
370	pxor	xmm10,xmm13
371	pand	xmm10,xmm6
372	pxor	xmm6,xmm0
373	pand	xmm0,xmm14
374	pand	xmm6,xmm13
375	pxor	xmm6,xmm0
376	pxor	xmm0,xmm10
377	pxor	xmm6,xmm11
378	pxor	xmm3,xmm11
379	pxor	xmm0,xmm7
380	pxor	xmm1,xmm7
381	pxor	xmm6,xmm15
382	pxor	xmm0,xmm5
383	pxor	xmm3,xmm6
384	pxor	xmm5,xmm15
385	pxor	xmm15,xmm0
386
387	pxor	xmm0,xmm4
388	pxor	xmm4,xmm1
389	pxor	xmm1,xmm2
390	pxor	xmm2,xmm4
391	pxor	xmm3,xmm4
392
393	pxor	xmm5,xmm2
394	dec	r10d
395	jl	$L$enc_done
396	pshufd	xmm7,xmm15,093h
397	pshufd	xmm8,xmm0,093h
398	pxor	xmm15,xmm7
399	pshufd	xmm9,xmm3,093h
400	pxor	xmm0,xmm8
401	pshufd	xmm10,xmm5,093h
402	pxor	xmm3,xmm9
403	pshufd	xmm11,xmm2,093h
404	pxor	xmm5,xmm10
405	pshufd	xmm12,xmm6,093h
406	pxor	xmm2,xmm11
407	pshufd	xmm13,xmm1,093h
408	pxor	xmm6,xmm12
409	pshufd	xmm14,xmm4,093h
410	pxor	xmm1,xmm13
411	pxor	xmm4,xmm14
412
413	pxor	xmm8,xmm15
414	pxor	xmm7,xmm4
415	pxor	xmm8,xmm4
416	pshufd	xmm15,xmm15,04Eh
417	pxor	xmm9,xmm0
418	pshufd	xmm0,xmm0,04Eh
419	pxor	xmm12,xmm2
420	pxor	xmm15,xmm7
421	pxor	xmm13,xmm6
422	pxor	xmm0,xmm8
423	pxor	xmm11,xmm5
424	pshufd	xmm7,xmm2,04Eh
425	pxor	xmm14,xmm1
426	pshufd	xmm8,xmm6,04Eh
427	pxor	xmm10,xmm3
428	pshufd	xmm2,xmm5,04Eh
429	pxor	xmm10,xmm4
430	pshufd	xmm6,xmm4,04Eh
431	pxor	xmm11,xmm4
432	pshufd	xmm5,xmm1,04Eh
433	pxor	xmm7,xmm11
434	pshufd	xmm1,xmm3,04Eh
435	pxor	xmm8,xmm12
436	pxor	xmm2,xmm10
437	pxor	xmm6,xmm14
438	pxor	xmm5,xmm13
439	movdqa	xmm3,xmm7
440	pxor	xmm1,xmm9
441	movdqa	xmm4,xmm8
442	movdqa	xmm7,XMMWORD PTR[48+r11]
443	jnz	$L$enc_loop
444	movdqa	xmm7,XMMWORD PTR[64+r11]
445	jmp	$L$enc_loop
446ALIGN	16
447$L$enc_done::
448	movdqa	xmm7,XMMWORD PTR[r11]
449	movdqa	xmm8,XMMWORD PTR[16+r11]
450	movdqa	xmm9,xmm1
451	psrlq	xmm1,1
452	movdqa	xmm10,xmm2
453	psrlq	xmm2,1
454	pxor	xmm1,xmm4
455	pxor	xmm2,xmm6
456	pand	xmm1,xmm7
457	pand	xmm2,xmm7
458	pxor	xmm4,xmm1
459	psllq	xmm1,1
460	pxor	xmm6,xmm2
461	psllq	xmm2,1
462	pxor	xmm1,xmm9
463	pxor	xmm2,xmm10
464	movdqa	xmm9,xmm3
465	psrlq	xmm3,1
466	movdqa	xmm10,xmm15
467	psrlq	xmm15,1
468	pxor	xmm3,xmm5
469	pxor	xmm15,xmm0
470	pand	xmm3,xmm7
471	pand	xmm15,xmm7
472	pxor	xmm5,xmm3
473	psllq	xmm3,1
474	pxor	xmm0,xmm15
475	psllq	xmm15,1
476	pxor	xmm3,xmm9
477	pxor	xmm15,xmm10
478	movdqa	xmm7,XMMWORD PTR[32+r11]
479	movdqa	xmm9,xmm6
480	psrlq	xmm6,2
481	movdqa	xmm10,xmm2
482	psrlq	xmm2,2
483	pxor	xmm6,xmm4
484	pxor	xmm2,xmm1
485	pand	xmm6,xmm8
486	pand	xmm2,xmm8
487	pxor	xmm4,xmm6
488	psllq	xmm6,2
489	pxor	xmm1,xmm2
490	psllq	xmm2,2
491	pxor	xmm6,xmm9
492	pxor	xmm2,xmm10
493	movdqa	xmm9,xmm0
494	psrlq	xmm0,2
495	movdqa	xmm10,xmm15
496	psrlq	xmm15,2
497	pxor	xmm0,xmm5
498	pxor	xmm15,xmm3
499	pand	xmm0,xmm8
500	pand	xmm15,xmm8
501	pxor	xmm5,xmm0
502	psllq	xmm0,2
503	pxor	xmm3,xmm15
504	psllq	xmm15,2
505	pxor	xmm0,xmm9
506	pxor	xmm15,xmm10
507	movdqa	xmm9,xmm5
508	psrlq	xmm5,4
509	movdqa	xmm10,xmm3
510	psrlq	xmm3,4
511	pxor	xmm5,xmm4
512	pxor	xmm3,xmm1
513	pand	xmm5,xmm7
514	pand	xmm3,xmm7
515	pxor	xmm4,xmm5
516	psllq	xmm5,4
517	pxor	xmm1,xmm3
518	psllq	xmm3,4
519	pxor	xmm5,xmm9
520	pxor	xmm3,xmm10
521	movdqa	xmm9,xmm0
522	psrlq	xmm0,4
523	movdqa	xmm10,xmm15
524	psrlq	xmm15,4
525	pxor	xmm0,xmm6
526	pxor	xmm15,xmm2
527	pand	xmm0,xmm7
528	pand	xmm15,xmm7
529	pxor	xmm6,xmm0
530	psllq	xmm0,4
531	pxor	xmm2,xmm15
532	psllq	xmm15,4
533	pxor	xmm0,xmm9
534	pxor	xmm15,xmm10
535	movdqa	xmm7,XMMWORD PTR[rax]
536	pxor	xmm3,xmm7
537	pxor	xmm5,xmm7
538	pxor	xmm2,xmm7
539	pxor	xmm6,xmm7
540	pxor	xmm1,xmm7
541	pxor	xmm4,xmm7
542	pxor	xmm15,xmm7
543	pxor	xmm0,xmm7
544	DB	0F3h,0C3h		;repret
545_bsaes_encrypt8	ENDP
546
547
548ALIGN	64
549_bsaes_decrypt8	PROC PRIVATE
550	lea	r11,QWORD PTR[$L$BS0]
551
552	movdqa	xmm8,XMMWORD PTR[rax]
553	lea	rax,QWORD PTR[16+rax]
554	movdqa	xmm7,XMMWORD PTR[((-48))+r11]
555	pxor	xmm15,xmm8
556	pxor	xmm0,xmm8
557DB	102,68,15,56,0,255
558	pxor	xmm1,xmm8
559DB	102,15,56,0,199
560	pxor	xmm2,xmm8
561DB	102,15,56,0,207
562	pxor	xmm3,xmm8
563DB	102,15,56,0,215
564	pxor	xmm4,xmm8
565DB	102,15,56,0,223
566	pxor	xmm5,xmm8
567DB	102,15,56,0,231
568	pxor	xmm6,xmm8
569DB	102,15,56,0,239
570DB	102,15,56,0,247
571	movdqa	xmm7,XMMWORD PTR[r11]
572	movdqa	xmm8,XMMWORD PTR[16+r11]
573	movdqa	xmm9,xmm5
574	psrlq	xmm5,1
575	movdqa	xmm10,xmm3
576	psrlq	xmm3,1
577	pxor	xmm5,xmm6
578	pxor	xmm3,xmm4
579	pand	xmm5,xmm7
580	pand	xmm3,xmm7
581	pxor	xmm6,xmm5
582	psllq	xmm5,1
583	pxor	xmm4,xmm3
584	psllq	xmm3,1
585	pxor	xmm5,xmm9
586	pxor	xmm3,xmm10
587	movdqa	xmm9,xmm1
588	psrlq	xmm1,1
589	movdqa	xmm10,xmm15
590	psrlq	xmm15,1
591	pxor	xmm1,xmm2
592	pxor	xmm15,xmm0
593	pand	xmm1,xmm7
594	pand	xmm15,xmm7
595	pxor	xmm2,xmm1
596	psllq	xmm1,1
597	pxor	xmm0,xmm15
598	psllq	xmm15,1
599	pxor	xmm1,xmm9
600	pxor	xmm15,xmm10
601	movdqa	xmm7,XMMWORD PTR[32+r11]
602	movdqa	xmm9,xmm4
603	psrlq	xmm4,2
604	movdqa	xmm10,xmm3
605	psrlq	xmm3,2
606	pxor	xmm4,xmm6
607	pxor	xmm3,xmm5
608	pand	xmm4,xmm8
609	pand	xmm3,xmm8
610	pxor	xmm6,xmm4
611	psllq	xmm4,2
612	pxor	xmm5,xmm3
613	psllq	xmm3,2
614	pxor	xmm4,xmm9
615	pxor	xmm3,xmm10
616	movdqa	xmm9,xmm0
617	psrlq	xmm0,2
618	movdqa	xmm10,xmm15
619	psrlq	xmm15,2
620	pxor	xmm0,xmm2
621	pxor	xmm15,xmm1
622	pand	xmm0,xmm8
623	pand	xmm15,xmm8
624	pxor	xmm2,xmm0
625	psllq	xmm0,2
626	pxor	xmm1,xmm15
627	psllq	xmm15,2
628	pxor	xmm0,xmm9
629	pxor	xmm15,xmm10
630	movdqa	xmm9,xmm2
631	psrlq	xmm2,4
632	movdqa	xmm10,xmm1
633	psrlq	xmm1,4
634	pxor	xmm2,xmm6
635	pxor	xmm1,xmm5
636	pand	xmm2,xmm7
637	pand	xmm1,xmm7
638	pxor	xmm6,xmm2
639	psllq	xmm2,4
640	pxor	xmm5,xmm1
641	psllq	xmm1,4
642	pxor	xmm2,xmm9
643	pxor	xmm1,xmm10
644	movdqa	xmm9,xmm0
645	psrlq	xmm0,4
646	movdqa	xmm10,xmm15
647	psrlq	xmm15,4
648	pxor	xmm0,xmm4
649	pxor	xmm15,xmm3
650	pand	xmm0,xmm7
651	pand	xmm15,xmm7
652	pxor	xmm4,xmm0
653	psllq	xmm0,4
654	pxor	xmm3,xmm15
655	psllq	xmm15,4
656	pxor	xmm0,xmm9
657	pxor	xmm15,xmm10
658	dec	r10d
659	jmp	$L$dec_sbox
660ALIGN	16
661$L$dec_loop::
662	pxor	xmm15,XMMWORD PTR[rax]
663	pxor	xmm0,XMMWORD PTR[16+rax]
664DB	102,68,15,56,0,255
665	pxor	xmm1,XMMWORD PTR[32+rax]
666DB	102,15,56,0,199
667	pxor	xmm2,XMMWORD PTR[48+rax]
668DB	102,15,56,0,207
669	pxor	xmm3,XMMWORD PTR[64+rax]
670DB	102,15,56,0,215
671	pxor	xmm4,XMMWORD PTR[80+rax]
672DB	102,15,56,0,223
673	pxor	xmm5,XMMWORD PTR[96+rax]
674DB	102,15,56,0,231
675	pxor	xmm6,XMMWORD PTR[112+rax]
676DB	102,15,56,0,239
677	lea	rax,QWORD PTR[128+rax]
678DB	102,15,56,0,247
679$L$dec_sbox::
680	pxor	xmm2,xmm3
681
682	pxor	xmm3,xmm6
683	pxor	xmm1,xmm6
684	pxor	xmm5,xmm3
685	pxor	xmm6,xmm5
686	pxor	xmm0,xmm6
687
688	pxor	xmm15,xmm0
689	pxor	xmm1,xmm4
690	pxor	xmm2,xmm15
691	pxor	xmm4,xmm15
692	pxor	xmm0,xmm2
693	movdqa	xmm10,xmm2
694	movdqa	xmm9,xmm6
695	movdqa	xmm8,xmm0
696	movdqa	xmm12,xmm3
697	movdqa	xmm11,xmm4
698
699	pxor	xmm10,xmm15
700	pxor	xmm9,xmm3
701	pxor	xmm8,xmm5
702	movdqa	xmm13,xmm10
703	pxor	xmm12,xmm15
704	movdqa	xmm7,xmm9
705	pxor	xmm11,xmm1
706	movdqa	xmm14,xmm10
707
708	por	xmm9,xmm8
709	por	xmm10,xmm11
710	pxor	xmm14,xmm7
711	pand	xmm13,xmm11
712	pxor	xmm11,xmm8
713	pand	xmm7,xmm8
714	pand	xmm14,xmm11
715	movdqa	xmm11,xmm5
716	pxor	xmm11,xmm1
717	pand	xmm12,xmm11
718	pxor	xmm10,xmm12
719	pxor	xmm9,xmm12
720	movdqa	xmm12,xmm2
721	movdqa	xmm11,xmm0
722	pxor	xmm12,xmm6
723	pxor	xmm11,xmm4
724	movdqa	xmm8,xmm12
725	pand	xmm12,xmm11
726	por	xmm8,xmm11
727	pxor	xmm7,xmm12
728	pxor	xmm10,xmm14
729	pxor	xmm9,xmm13
730	pxor	xmm8,xmm14
731	movdqa	xmm11,xmm3
732	pxor	xmm7,xmm13
733	movdqa	xmm12,xmm15
734	pxor	xmm8,xmm13
735	movdqa	xmm13,xmm6
736	pand	xmm11,xmm5
737	movdqa	xmm14,xmm2
738	pand	xmm12,xmm1
739	pand	xmm13,xmm0
740	por	xmm14,xmm4
741	pxor	xmm10,xmm11
742	pxor	xmm9,xmm12
743	pxor	xmm8,xmm13
744	pxor	xmm7,xmm14
745
746
747
748
749
750	movdqa	xmm11,xmm10
751	pand	xmm10,xmm8
752	pxor	xmm11,xmm9
753
754	movdqa	xmm13,xmm7
755	movdqa	xmm14,xmm11
756	pxor	xmm13,xmm10
757	pand	xmm14,xmm13
758
759	movdqa	xmm12,xmm8
760	pxor	xmm14,xmm9
761	pxor	xmm12,xmm7
762
763	pxor	xmm10,xmm9
764
765	pand	xmm12,xmm10
766
767	movdqa	xmm9,xmm13
768	pxor	xmm12,xmm7
769
770	pxor	xmm9,xmm12
771	pxor	xmm8,xmm12
772
773	pand	xmm9,xmm7
774
775	pxor	xmm13,xmm9
776	pxor	xmm8,xmm9
777
778	pand	xmm13,xmm14
779
780	pxor	xmm13,xmm11
781	movdqa	xmm11,xmm4
782	movdqa	xmm7,xmm0
783	movdqa	xmm9,xmm14
784	pxor	xmm9,xmm13
785	pand	xmm9,xmm4
786	pxor	xmm4,xmm0
787	pand	xmm0,xmm14
788	pand	xmm4,xmm13
789	pxor	xmm4,xmm0
790	pxor	xmm0,xmm9
791	pxor	xmm11,xmm1
792	pxor	xmm7,xmm5
793	pxor	xmm14,xmm12
794	pxor	xmm13,xmm8
795	movdqa	xmm10,xmm14
796	movdqa	xmm9,xmm12
797	pxor	xmm10,xmm13
798	pxor	xmm9,xmm8
799	pand	xmm10,xmm11
800	pand	xmm9,xmm1
801	pxor	xmm11,xmm7
802	pxor	xmm1,xmm5
803	pand	xmm7,xmm14
804	pand	xmm5,xmm12
805	pand	xmm11,xmm13
806	pand	xmm1,xmm8
807	pxor	xmm7,xmm11
808	pxor	xmm1,xmm5
809	pxor	xmm11,xmm10
810	pxor	xmm5,xmm9
811	pxor	xmm4,xmm11
812	pxor	xmm1,xmm11
813	pxor	xmm0,xmm7
814	pxor	xmm5,xmm7
815
816	movdqa	xmm11,xmm2
817	movdqa	xmm7,xmm6
818	pxor	xmm11,xmm15
819	pxor	xmm7,xmm3
820	movdqa	xmm10,xmm14
821	movdqa	xmm9,xmm12
822	pxor	xmm10,xmm13
823	pxor	xmm9,xmm8
824	pand	xmm10,xmm11
825	pand	xmm9,xmm15
826	pxor	xmm11,xmm7
827	pxor	xmm15,xmm3
828	pand	xmm7,xmm14
829	pand	xmm3,xmm12
830	pand	xmm11,xmm13
831	pand	xmm15,xmm8
832	pxor	xmm7,xmm11
833	pxor	xmm15,xmm3
834	pxor	xmm11,xmm10
835	pxor	xmm3,xmm9
836	pxor	xmm14,xmm12
837	pxor	xmm13,xmm8
838	movdqa	xmm10,xmm14
839	pxor	xmm10,xmm13
840	pand	xmm10,xmm2
841	pxor	xmm2,xmm6
842	pand	xmm6,xmm14
843	pand	xmm2,xmm13
844	pxor	xmm2,xmm6
845	pxor	xmm6,xmm10
846	pxor	xmm2,xmm11
847	pxor	xmm15,xmm11
848	pxor	xmm6,xmm7
849	pxor	xmm3,xmm7
850	pxor	xmm0,xmm6
851	pxor	xmm5,xmm4
852
853	pxor	xmm3,xmm0
854	pxor	xmm1,xmm6
855	pxor	xmm4,xmm6
856	pxor	xmm3,xmm1
857	pxor	xmm6,xmm15
858	pxor	xmm3,xmm4
859	pxor	xmm2,xmm5
860	pxor	xmm5,xmm0
861	pxor	xmm2,xmm3
862
863	pxor	xmm3,xmm15
864	pxor	xmm6,xmm2
865	dec	r10d
866	jl	$L$dec_done
867
868	pshufd	xmm7,xmm15,04Eh
869	pshufd	xmm13,xmm2,04Eh
870	pxor	xmm7,xmm15
871	pshufd	xmm14,xmm4,04Eh
872	pxor	xmm13,xmm2
873	pshufd	xmm8,xmm0,04Eh
874	pxor	xmm14,xmm4
875	pshufd	xmm9,xmm5,04Eh
876	pxor	xmm8,xmm0
877	pshufd	xmm10,xmm3,04Eh
878	pxor	xmm9,xmm5
879	pxor	xmm15,xmm13
880	pxor	xmm0,xmm13
881	pshufd	xmm11,xmm1,04Eh
882	pxor	xmm10,xmm3
883	pxor	xmm5,xmm7
884	pxor	xmm3,xmm8
885	pshufd	xmm12,xmm6,04Eh
886	pxor	xmm11,xmm1
887	pxor	xmm0,xmm14
888	pxor	xmm1,xmm9
889	pxor	xmm12,xmm6
890
891	pxor	xmm5,xmm14
892	pxor	xmm3,xmm13
893	pxor	xmm1,xmm13
894	pxor	xmm6,xmm10
895	pxor	xmm2,xmm11
896	pxor	xmm1,xmm14
897	pxor	xmm6,xmm14
898	pxor	xmm4,xmm12
899	pshufd	xmm7,xmm15,093h
900	pshufd	xmm8,xmm0,093h
901	pxor	xmm15,xmm7
902	pshufd	xmm9,xmm5,093h
903	pxor	xmm0,xmm8
904	pshufd	xmm10,xmm3,093h
905	pxor	xmm5,xmm9
906	pshufd	xmm11,xmm1,093h
907	pxor	xmm3,xmm10
908	pshufd	xmm12,xmm6,093h
909	pxor	xmm1,xmm11
910	pshufd	xmm13,xmm2,093h
911	pxor	xmm6,xmm12
912	pshufd	xmm14,xmm4,093h
913	pxor	xmm2,xmm13
914	pxor	xmm4,xmm14
915
916	pxor	xmm8,xmm15
917	pxor	xmm7,xmm4
918	pxor	xmm8,xmm4
919	pshufd	xmm15,xmm15,04Eh
920	pxor	xmm9,xmm0
921	pshufd	xmm0,xmm0,04Eh
922	pxor	xmm12,xmm1
923	pxor	xmm15,xmm7
924	pxor	xmm13,xmm6
925	pxor	xmm0,xmm8
926	pxor	xmm11,xmm3
927	pshufd	xmm7,xmm1,04Eh
928	pxor	xmm14,xmm2
929	pshufd	xmm8,xmm6,04Eh
930	pxor	xmm10,xmm5
931	pshufd	xmm1,xmm3,04Eh
932	pxor	xmm10,xmm4
933	pshufd	xmm6,xmm4,04Eh
934	pxor	xmm11,xmm4
935	pshufd	xmm3,xmm2,04Eh
936	pxor	xmm7,xmm11
937	pshufd	xmm2,xmm5,04Eh
938	pxor	xmm8,xmm12
939	pxor	xmm10,xmm1
940	pxor	xmm6,xmm14
941	pxor	xmm13,xmm3
942	movdqa	xmm3,xmm7
943	pxor	xmm2,xmm9
944	movdqa	xmm5,xmm13
945	movdqa	xmm4,xmm8
946	movdqa	xmm1,xmm2
947	movdqa	xmm2,xmm10
948	movdqa	xmm7,XMMWORD PTR[((-16))+r11]
949	jnz	$L$dec_loop
950	movdqa	xmm7,XMMWORD PTR[((-32))+r11]
951	jmp	$L$dec_loop
952ALIGN	16
953$L$dec_done::
954	movdqa	xmm7,XMMWORD PTR[r11]
955	movdqa	xmm8,XMMWORD PTR[16+r11]
956	movdqa	xmm9,xmm2
957	psrlq	xmm2,1
958	movdqa	xmm10,xmm1
959	psrlq	xmm1,1
960	pxor	xmm2,xmm4
961	pxor	xmm1,xmm6
962	pand	xmm2,xmm7
963	pand	xmm1,xmm7
964	pxor	xmm4,xmm2
965	psllq	xmm2,1
966	pxor	xmm6,xmm1
967	psllq	xmm1,1
968	pxor	xmm2,xmm9
969	pxor	xmm1,xmm10
970	movdqa	xmm9,xmm5
971	psrlq	xmm5,1
972	movdqa	xmm10,xmm15
973	psrlq	xmm15,1
974	pxor	xmm5,xmm3
975	pxor	xmm15,xmm0
976	pand	xmm5,xmm7
977	pand	xmm15,xmm7
978	pxor	xmm3,xmm5
979	psllq	xmm5,1
980	pxor	xmm0,xmm15
981	psllq	xmm15,1
982	pxor	xmm5,xmm9
983	pxor	xmm15,xmm10
984	movdqa	xmm7,XMMWORD PTR[32+r11]
985	movdqa	xmm9,xmm6
986	psrlq	xmm6,2
987	movdqa	xmm10,xmm1
988	psrlq	xmm1,2
989	pxor	xmm6,xmm4
990	pxor	xmm1,xmm2
991	pand	xmm6,xmm8
992	pand	xmm1,xmm8
993	pxor	xmm4,xmm6
994	psllq	xmm6,2
995	pxor	xmm2,xmm1
996	psllq	xmm1,2
997	pxor	xmm6,xmm9
998	pxor	xmm1,xmm10
999	movdqa	xmm9,xmm0
1000	psrlq	xmm0,2
1001	movdqa	xmm10,xmm15
1002	psrlq	xmm15,2
1003	pxor	xmm0,xmm3
1004	pxor	xmm15,xmm5
1005	pand	xmm0,xmm8
1006	pand	xmm15,xmm8
1007	pxor	xmm3,xmm0
1008	psllq	xmm0,2
1009	pxor	xmm5,xmm15
1010	psllq	xmm15,2
1011	pxor	xmm0,xmm9
1012	pxor	xmm15,xmm10
1013	movdqa	xmm9,xmm3
1014	psrlq	xmm3,4
1015	movdqa	xmm10,xmm5
1016	psrlq	xmm5,4
1017	pxor	xmm3,xmm4
1018	pxor	xmm5,xmm2
1019	pand	xmm3,xmm7
1020	pand	xmm5,xmm7
1021	pxor	xmm4,xmm3
1022	psllq	xmm3,4
1023	pxor	xmm2,xmm5
1024	psllq	xmm5,4
1025	pxor	xmm3,xmm9
1026	pxor	xmm5,xmm10
1027	movdqa	xmm9,xmm0
1028	psrlq	xmm0,4
1029	movdqa	xmm10,xmm15
1030	psrlq	xmm15,4
1031	pxor	xmm0,xmm6
1032	pxor	xmm15,xmm1
1033	pand	xmm0,xmm7
1034	pand	xmm15,xmm7
1035	pxor	xmm6,xmm0
1036	psllq	xmm0,4
1037	pxor	xmm1,xmm15
1038	psllq	xmm15,4
1039	pxor	xmm0,xmm9
1040	pxor	xmm15,xmm10
1041	movdqa	xmm7,XMMWORD PTR[rax]
1042	pxor	xmm5,xmm7
1043	pxor	xmm3,xmm7
1044	pxor	xmm1,xmm7
1045	pxor	xmm6,xmm7
1046	pxor	xmm2,xmm7
1047	pxor	xmm4,xmm7
1048	pxor	xmm15,xmm7
1049	pxor	xmm0,xmm7
1050	DB	0F3h,0C3h		;repret
1051_bsaes_decrypt8	ENDP
1052
1053ALIGN	16
1054_bsaes_key_convert	PROC PRIVATE
1055	lea	r11,QWORD PTR[$L$masks]
1056	movdqu	xmm7,XMMWORD PTR[rcx]
1057	lea	rcx,QWORD PTR[16+rcx]
1058	movdqa	xmm0,XMMWORD PTR[r11]
1059	movdqa	xmm1,XMMWORD PTR[16+r11]
1060	movdqa	xmm2,XMMWORD PTR[32+r11]
1061	movdqa	xmm3,XMMWORD PTR[48+r11]
1062	movdqa	xmm4,XMMWORD PTR[64+r11]
1063	pcmpeqd	xmm5,xmm5
1064
1065	movdqu	xmm6,XMMWORD PTR[rcx]
1066	movdqa	XMMWORD PTR[rax],xmm7
1067	lea	rax,QWORD PTR[16+rax]
1068	dec	r10d
1069	jmp	$L$key_loop
1070ALIGN	16
1071$L$key_loop::
1072DB	102,15,56,0,244
1073
1074	movdqa	xmm8,xmm0
1075	movdqa	xmm9,xmm1
1076
1077	pand	xmm8,xmm6
1078	pand	xmm9,xmm6
1079	movdqa	xmm10,xmm2
1080	pcmpeqb	xmm8,xmm0
1081	psllq	xmm0,4
1082	movdqa	xmm11,xmm3
1083	pcmpeqb	xmm9,xmm1
1084	psllq	xmm1,4
1085
1086	pand	xmm10,xmm6
1087	pand	xmm11,xmm6
1088	movdqa	xmm12,xmm0
1089	pcmpeqb	xmm10,xmm2
1090	psllq	xmm2,4
1091	movdqa	xmm13,xmm1
1092	pcmpeqb	xmm11,xmm3
1093	psllq	xmm3,4
1094
1095	movdqa	xmm14,xmm2
1096	movdqa	xmm15,xmm3
1097	pxor	xmm8,xmm5
1098	pxor	xmm9,xmm5
1099
1100	pand	xmm12,xmm6
1101	pand	xmm13,xmm6
1102	movdqa	XMMWORD PTR[rax],xmm8
1103	pcmpeqb	xmm12,xmm0
1104	psrlq	xmm0,4
1105	movdqa	XMMWORD PTR[16+rax],xmm9
1106	pcmpeqb	xmm13,xmm1
1107	psrlq	xmm1,4
1108	lea	rcx,QWORD PTR[16+rcx]
1109
1110	pand	xmm14,xmm6
1111	pand	xmm15,xmm6
1112	movdqa	XMMWORD PTR[32+rax],xmm10
1113	pcmpeqb	xmm14,xmm2
1114	psrlq	xmm2,4
1115	movdqa	XMMWORD PTR[48+rax],xmm11
1116	pcmpeqb	xmm15,xmm3
1117	psrlq	xmm3,4
1118	movdqu	xmm6,XMMWORD PTR[rcx]
1119
1120	pxor	xmm13,xmm5
1121	pxor	xmm14,xmm5
1122	movdqa	XMMWORD PTR[64+rax],xmm12
1123	movdqa	XMMWORD PTR[80+rax],xmm13
1124	movdqa	XMMWORD PTR[96+rax],xmm14
1125	movdqa	XMMWORD PTR[112+rax],xmm15
1126	lea	rax,QWORD PTR[128+rax]
1127	dec	r10d
1128	jnz	$L$key_loop
1129
1130	movdqa	xmm7,XMMWORD PTR[80+r11]
1131
1132	DB	0F3h,0C3h		;repret
1133_bsaes_key_convert	ENDP
1134EXTERN	asm_AES_cbc_encrypt:NEAR
1135PUBLIC	bsaes_cbc_encrypt
1136
1137ALIGN	16
1138bsaes_cbc_encrypt	PROC PUBLIC
1139	mov	r11d,DWORD PTR[48+rsp]
1140	cmp	r11d,0
1141	jne	asm_AES_cbc_encrypt
1142	cmp	r8,128
1143	jb	asm_AES_cbc_encrypt
1144
1145	mov	rax,rsp
1146$L$cbc_dec_prologue::
1147	push	rbp
1148	push	rbx
1149	push	r12
1150	push	r13
1151	push	r14
1152	push	r15
1153	lea	rsp,QWORD PTR[((-72))+rsp]
1154	mov	r10,QWORD PTR[160+rsp]
1155	lea	rsp,QWORD PTR[((-160))+rsp]
1156	movaps	XMMWORD PTR[64+rsp],xmm6
1157	movaps	XMMWORD PTR[80+rsp],xmm7
1158	movaps	XMMWORD PTR[96+rsp],xmm8
1159	movaps	XMMWORD PTR[112+rsp],xmm9
1160	movaps	XMMWORD PTR[128+rsp],xmm10
1161	movaps	XMMWORD PTR[144+rsp],xmm11
1162	movaps	XMMWORD PTR[160+rsp],xmm12
1163	movaps	XMMWORD PTR[176+rsp],xmm13
1164	movaps	XMMWORD PTR[192+rsp],xmm14
1165	movaps	XMMWORD PTR[208+rsp],xmm15
1166$L$cbc_dec_body::
1167	mov	rbp,rsp
1168	mov	eax,DWORD PTR[240+r9]
1169	mov	r12,rcx
1170	mov	r13,rdx
1171	mov	r14,r8
1172	mov	r15,r9
1173	mov	rbx,r10
1174	shr	r14,4
1175
1176	mov	edx,eax
1177	shl	rax,7
1178	sub	rax,96
1179	sub	rsp,rax
1180
1181	mov	rax,rsp
1182	mov	rcx,r15
1183	mov	r10d,edx
1184	call	_bsaes_key_convert
1185	pxor	xmm7,XMMWORD PTR[rsp]
1186	movdqa	XMMWORD PTR[rax],xmm6
1187	movdqa	XMMWORD PTR[rsp],xmm7
1188
1189	movdqu	xmm14,XMMWORD PTR[rbx]
1190	sub	r14,8
1191$L$cbc_dec_loop::
1192	movdqu	xmm15,XMMWORD PTR[r12]
1193	movdqu	xmm0,XMMWORD PTR[16+r12]
1194	movdqu	xmm1,XMMWORD PTR[32+r12]
1195	movdqu	xmm2,XMMWORD PTR[48+r12]
1196	movdqu	xmm3,XMMWORD PTR[64+r12]
1197	movdqu	xmm4,XMMWORD PTR[80+r12]
1198	mov	rax,rsp
1199	movdqu	xmm5,XMMWORD PTR[96+r12]
1200	mov	r10d,edx
1201	movdqu	xmm6,XMMWORD PTR[112+r12]
1202	movdqa	XMMWORD PTR[32+rbp],xmm14
1203
1204	call	_bsaes_decrypt8
1205
1206	pxor	xmm15,XMMWORD PTR[32+rbp]
1207	movdqu	xmm7,XMMWORD PTR[r12]
1208	movdqu	xmm8,XMMWORD PTR[16+r12]
1209	pxor	xmm0,xmm7
1210	movdqu	xmm9,XMMWORD PTR[32+r12]
1211	pxor	xmm5,xmm8
1212	movdqu	xmm10,XMMWORD PTR[48+r12]
1213	pxor	xmm3,xmm9
1214	movdqu	xmm11,XMMWORD PTR[64+r12]
1215	pxor	xmm1,xmm10
1216	movdqu	xmm12,XMMWORD PTR[80+r12]
1217	pxor	xmm6,xmm11
1218	movdqu	xmm13,XMMWORD PTR[96+r12]
1219	pxor	xmm2,xmm12
1220	movdqu	xmm14,XMMWORD PTR[112+r12]
1221	pxor	xmm4,xmm13
1222	movdqu	XMMWORD PTR[r13],xmm15
1223	lea	r12,QWORD PTR[128+r12]
1224	movdqu	XMMWORD PTR[16+r13],xmm0
1225	movdqu	XMMWORD PTR[32+r13],xmm5
1226	movdqu	XMMWORD PTR[48+r13],xmm3
1227	movdqu	XMMWORD PTR[64+r13],xmm1
1228	movdqu	XMMWORD PTR[80+r13],xmm6
1229	movdqu	XMMWORD PTR[96+r13],xmm2
1230	movdqu	XMMWORD PTR[112+r13],xmm4
1231	lea	r13,QWORD PTR[128+r13]
1232	sub	r14,8
1233	jnc	$L$cbc_dec_loop
1234
1235	add	r14,8
1236	jz	$L$cbc_dec_done
1237
1238	movdqu	xmm15,XMMWORD PTR[r12]
1239	mov	rax,rsp
1240	mov	r10d,edx
1241	cmp	r14,2
1242	jb	$L$cbc_dec_one
1243	movdqu	xmm0,XMMWORD PTR[16+r12]
1244	je	$L$cbc_dec_two
1245	movdqu	xmm1,XMMWORD PTR[32+r12]
1246	cmp	r14,4
1247	jb	$L$cbc_dec_three
1248	movdqu	xmm2,XMMWORD PTR[48+r12]
1249	je	$L$cbc_dec_four
1250	movdqu	xmm3,XMMWORD PTR[64+r12]
1251	cmp	r14,6
1252	jb	$L$cbc_dec_five
1253	movdqu	xmm4,XMMWORD PTR[80+r12]
1254	je	$L$cbc_dec_six
1255	movdqu	xmm5,XMMWORD PTR[96+r12]
1256	movdqa	XMMWORD PTR[32+rbp],xmm14
1257	call	_bsaes_decrypt8
1258	pxor	xmm15,XMMWORD PTR[32+rbp]
1259	movdqu	xmm7,XMMWORD PTR[r12]
1260	movdqu	xmm8,XMMWORD PTR[16+r12]
1261	pxor	xmm0,xmm7
1262	movdqu	xmm9,XMMWORD PTR[32+r12]
1263	pxor	xmm5,xmm8
1264	movdqu	xmm10,XMMWORD PTR[48+r12]
1265	pxor	xmm3,xmm9
1266	movdqu	xmm11,XMMWORD PTR[64+r12]
1267	pxor	xmm1,xmm10
1268	movdqu	xmm12,XMMWORD PTR[80+r12]
1269	pxor	xmm6,xmm11
1270	movdqu	xmm14,XMMWORD PTR[96+r12]
1271	pxor	xmm2,xmm12
1272	movdqu	XMMWORD PTR[r13],xmm15
1273	movdqu	XMMWORD PTR[16+r13],xmm0
1274	movdqu	XMMWORD PTR[32+r13],xmm5
1275	movdqu	XMMWORD PTR[48+r13],xmm3
1276	movdqu	XMMWORD PTR[64+r13],xmm1
1277	movdqu	XMMWORD PTR[80+r13],xmm6
1278	movdqu	XMMWORD PTR[96+r13],xmm2
1279	jmp	$L$cbc_dec_done
1280ALIGN	16
1281$L$cbc_dec_six::
1282	movdqa	XMMWORD PTR[32+rbp],xmm14
1283	call	_bsaes_decrypt8
1284	pxor	xmm15,XMMWORD PTR[32+rbp]
1285	movdqu	xmm7,XMMWORD PTR[r12]
1286	movdqu	xmm8,XMMWORD PTR[16+r12]
1287	pxor	xmm0,xmm7
1288	movdqu	xmm9,XMMWORD PTR[32+r12]
1289	pxor	xmm5,xmm8
1290	movdqu	xmm10,XMMWORD PTR[48+r12]
1291	pxor	xmm3,xmm9
1292	movdqu	xmm11,XMMWORD PTR[64+r12]
1293	pxor	xmm1,xmm10
1294	movdqu	xmm14,XMMWORD PTR[80+r12]
1295	pxor	xmm6,xmm11
1296	movdqu	XMMWORD PTR[r13],xmm15
1297	movdqu	XMMWORD PTR[16+r13],xmm0
1298	movdqu	XMMWORD PTR[32+r13],xmm5
1299	movdqu	XMMWORD PTR[48+r13],xmm3
1300	movdqu	XMMWORD PTR[64+r13],xmm1
1301	movdqu	XMMWORD PTR[80+r13],xmm6
1302	jmp	$L$cbc_dec_done
1303ALIGN	16
1304$L$cbc_dec_five::
1305	movdqa	XMMWORD PTR[32+rbp],xmm14
1306	call	_bsaes_decrypt8
1307	pxor	xmm15,XMMWORD PTR[32+rbp]
1308	movdqu	xmm7,XMMWORD PTR[r12]
1309	movdqu	xmm8,XMMWORD PTR[16+r12]
1310	pxor	xmm0,xmm7
1311	movdqu	xmm9,XMMWORD PTR[32+r12]
1312	pxor	xmm5,xmm8
1313	movdqu	xmm10,XMMWORD PTR[48+r12]
1314	pxor	xmm3,xmm9
1315	movdqu	xmm14,XMMWORD PTR[64+r12]
1316	pxor	xmm1,xmm10
1317	movdqu	XMMWORD PTR[r13],xmm15
1318	movdqu	XMMWORD PTR[16+r13],xmm0
1319	movdqu	XMMWORD PTR[32+r13],xmm5
1320	movdqu	XMMWORD PTR[48+r13],xmm3
1321	movdqu	XMMWORD PTR[64+r13],xmm1
1322	jmp	$L$cbc_dec_done
1323ALIGN	16
1324$L$cbc_dec_four::
1325	movdqa	XMMWORD PTR[32+rbp],xmm14
1326	call	_bsaes_decrypt8
1327	pxor	xmm15,XMMWORD PTR[32+rbp]
1328	movdqu	xmm7,XMMWORD PTR[r12]
1329	movdqu	xmm8,XMMWORD PTR[16+r12]
1330	pxor	xmm0,xmm7
1331	movdqu	xmm9,XMMWORD PTR[32+r12]
1332	pxor	xmm5,xmm8
1333	movdqu	xmm14,XMMWORD PTR[48+r12]
1334	pxor	xmm3,xmm9
1335	movdqu	XMMWORD PTR[r13],xmm15
1336	movdqu	XMMWORD PTR[16+r13],xmm0
1337	movdqu	XMMWORD PTR[32+r13],xmm5
1338	movdqu	XMMWORD PTR[48+r13],xmm3
1339	jmp	$L$cbc_dec_done
1340ALIGN	16
1341$L$cbc_dec_three::
1342	movdqa	XMMWORD PTR[32+rbp],xmm14
1343	call	_bsaes_decrypt8
1344	pxor	xmm15,XMMWORD PTR[32+rbp]
1345	movdqu	xmm7,XMMWORD PTR[r12]
1346	movdqu	xmm8,XMMWORD PTR[16+r12]
1347	pxor	xmm0,xmm7
1348	movdqu	xmm14,XMMWORD PTR[32+r12]
1349	pxor	xmm5,xmm8
1350	movdqu	XMMWORD PTR[r13],xmm15
1351	movdqu	XMMWORD PTR[16+r13],xmm0
1352	movdqu	XMMWORD PTR[32+r13],xmm5
1353	jmp	$L$cbc_dec_done
1354ALIGN	16
1355$L$cbc_dec_two::
1356	movdqa	XMMWORD PTR[32+rbp],xmm14
1357	call	_bsaes_decrypt8
1358	pxor	xmm15,XMMWORD PTR[32+rbp]
1359	movdqu	xmm7,XMMWORD PTR[r12]
1360	movdqu	xmm14,XMMWORD PTR[16+r12]
1361	pxor	xmm0,xmm7
1362	movdqu	XMMWORD PTR[r13],xmm15
1363	movdqu	XMMWORD PTR[16+r13],xmm0
1364	jmp	$L$cbc_dec_done
1365ALIGN	16
1366$L$cbc_dec_one::
1367	lea	rcx,QWORD PTR[r12]
1368	lea	rdx,QWORD PTR[32+rbp]
1369	lea	r8,QWORD PTR[r15]
1370	call	asm_AES_decrypt
1371	pxor	xmm14,XMMWORD PTR[32+rbp]
1372	movdqu	XMMWORD PTR[r13],xmm14
1373	movdqa	xmm14,xmm15
1374
1375$L$cbc_dec_done::
1376	movdqu	XMMWORD PTR[rbx],xmm14
1377	lea	rax,QWORD PTR[rsp]
1378	pxor	xmm0,xmm0
1379$L$cbc_dec_bzero::
1380	movdqa	XMMWORD PTR[rax],xmm0
1381	movdqa	XMMWORD PTR[16+rax],xmm0
1382	lea	rax,QWORD PTR[32+rax]
1383	cmp	rbp,rax
1384	ja	$L$cbc_dec_bzero
1385
1386	lea	rsp,QWORD PTR[rbp]
1387	movaps	xmm6,XMMWORD PTR[64+rbp]
1388	movaps	xmm7,XMMWORD PTR[80+rbp]
1389	movaps	xmm8,XMMWORD PTR[96+rbp]
1390	movaps	xmm9,XMMWORD PTR[112+rbp]
1391	movaps	xmm10,XMMWORD PTR[128+rbp]
1392	movaps	xmm11,XMMWORD PTR[144+rbp]
1393	movaps	xmm12,XMMWORD PTR[160+rbp]
1394	movaps	xmm13,XMMWORD PTR[176+rbp]
1395	movaps	xmm14,XMMWORD PTR[192+rbp]
1396	movaps	xmm15,XMMWORD PTR[208+rbp]
1397	lea	rsp,QWORD PTR[160+rbp]
1398	mov	r15,QWORD PTR[72+rsp]
1399	mov	r14,QWORD PTR[80+rsp]
1400	mov	r13,QWORD PTR[88+rsp]
1401	mov	r12,QWORD PTR[96+rsp]
1402	mov	rbx,QWORD PTR[104+rsp]
1403	mov	rax,QWORD PTR[112+rsp]
1404	lea	rsp,QWORD PTR[120+rsp]
1405	mov	rbp,rax
1406$L$cbc_dec_epilogue::
1407	DB	0F3h,0C3h		;repret
1408bsaes_cbc_encrypt	ENDP
1409
1410PUBLIC	bsaes_ctr32_encrypt_blocks
1411
1412ALIGN	16
1413bsaes_ctr32_encrypt_blocks	PROC PUBLIC
1414	mov	rax,rsp
1415$L$ctr_enc_prologue::
1416	push	rbp
1417	push	rbx
1418	push	r12
1419	push	r13
1420	push	r14
1421	push	r15
1422	lea	rsp,QWORD PTR[((-72))+rsp]
1423	mov	r10,QWORD PTR[160+rsp]
1424	lea	rsp,QWORD PTR[((-160))+rsp]
1425	movaps	XMMWORD PTR[64+rsp],xmm6
1426	movaps	XMMWORD PTR[80+rsp],xmm7
1427	movaps	XMMWORD PTR[96+rsp],xmm8
1428	movaps	XMMWORD PTR[112+rsp],xmm9
1429	movaps	XMMWORD PTR[128+rsp],xmm10
1430	movaps	XMMWORD PTR[144+rsp],xmm11
1431	movaps	XMMWORD PTR[160+rsp],xmm12
1432	movaps	XMMWORD PTR[176+rsp],xmm13
1433	movaps	XMMWORD PTR[192+rsp],xmm14
1434	movaps	XMMWORD PTR[208+rsp],xmm15
1435$L$ctr_enc_body::
1436	mov	rbp,rsp
1437	movdqu	xmm0,XMMWORD PTR[r10]
1438	mov	eax,DWORD PTR[240+r9]
1439	mov	r12,rcx
1440	mov	r13,rdx
1441	mov	r14,r8
1442	mov	r15,r9
1443	movdqa	XMMWORD PTR[32+rbp],xmm0
1444	cmp	r8,8
1445	jb	$L$ctr_enc_short
1446
1447	mov	ebx,eax
1448	shl	rax,7
1449	sub	rax,96
1450	sub	rsp,rax
1451
1452	mov	rax,rsp
1453	mov	rcx,r15
1454	mov	r10d,ebx
1455	call	_bsaes_key_convert
1456	pxor	xmm7,xmm6
1457	movdqa	XMMWORD PTR[rax],xmm7
1458
1459	movdqa	xmm8,XMMWORD PTR[rsp]
1460	lea	r11,QWORD PTR[$L$ADD1]
1461	movdqa	xmm15,XMMWORD PTR[32+rbp]
1462	movdqa	xmm7,XMMWORD PTR[((-32))+r11]
1463DB	102,68,15,56,0,199
1464DB	102,68,15,56,0,255
1465	movdqa	XMMWORD PTR[rsp],xmm8
1466	jmp	$L$ctr_enc_loop
1467ALIGN	16
1468$L$ctr_enc_loop::
1469	movdqa	XMMWORD PTR[32+rbp],xmm15
1470	movdqa	xmm0,xmm15
1471	movdqa	xmm1,xmm15
1472	paddd	xmm0,XMMWORD PTR[r11]
1473	movdqa	xmm2,xmm15
1474	paddd	xmm1,XMMWORD PTR[16+r11]
1475	movdqa	xmm3,xmm15
1476	paddd	xmm2,XMMWORD PTR[32+r11]
1477	movdqa	xmm4,xmm15
1478	paddd	xmm3,XMMWORD PTR[48+r11]
1479	movdqa	xmm5,xmm15
1480	paddd	xmm4,XMMWORD PTR[64+r11]
1481	movdqa	xmm6,xmm15
1482	paddd	xmm5,XMMWORD PTR[80+r11]
1483	paddd	xmm6,XMMWORD PTR[96+r11]
1484
1485
1486
1487	movdqa	xmm8,XMMWORD PTR[rsp]
1488	lea	rax,QWORD PTR[16+rsp]
1489	movdqa	xmm7,XMMWORD PTR[((-16))+r11]
1490	pxor	xmm15,xmm8
1491	pxor	xmm0,xmm8
1492DB	102,68,15,56,0,255
1493	pxor	xmm1,xmm8
1494DB	102,15,56,0,199
1495	pxor	xmm2,xmm8
1496DB	102,15,56,0,207
1497	pxor	xmm3,xmm8
1498DB	102,15,56,0,215
1499	pxor	xmm4,xmm8
1500DB	102,15,56,0,223
1501	pxor	xmm5,xmm8
1502DB	102,15,56,0,231
1503	pxor	xmm6,xmm8
1504DB	102,15,56,0,239
1505	lea	r11,QWORD PTR[$L$BS0]
1506DB	102,15,56,0,247
1507	mov	r10d,ebx
1508
1509	call	_bsaes_encrypt8_bitslice
1510
1511	sub	r14,8
1512	jc	$L$ctr_enc_loop_done
1513
1514	movdqu	xmm7,XMMWORD PTR[r12]
1515	movdqu	xmm8,XMMWORD PTR[16+r12]
1516	movdqu	xmm9,XMMWORD PTR[32+r12]
1517	movdqu	xmm10,XMMWORD PTR[48+r12]
1518	movdqu	xmm11,XMMWORD PTR[64+r12]
1519	movdqu	xmm12,XMMWORD PTR[80+r12]
1520	movdqu	xmm13,XMMWORD PTR[96+r12]
1521	movdqu	xmm14,XMMWORD PTR[112+r12]
1522	lea	r12,QWORD PTR[128+r12]
1523	pxor	xmm7,xmm15
1524	movdqa	xmm15,XMMWORD PTR[32+rbp]
1525	pxor	xmm0,xmm8
1526	movdqu	XMMWORD PTR[r13],xmm7
1527	pxor	xmm3,xmm9
1528	movdqu	XMMWORD PTR[16+r13],xmm0
1529	pxor	xmm5,xmm10
1530	movdqu	XMMWORD PTR[32+r13],xmm3
1531	pxor	xmm2,xmm11
1532	movdqu	XMMWORD PTR[48+r13],xmm5
1533	pxor	xmm6,xmm12
1534	movdqu	XMMWORD PTR[64+r13],xmm2
1535	pxor	xmm1,xmm13
1536	movdqu	XMMWORD PTR[80+r13],xmm6
1537	pxor	xmm4,xmm14
1538	movdqu	XMMWORD PTR[96+r13],xmm1
1539	lea	r11,QWORD PTR[$L$ADD1]
1540	movdqu	XMMWORD PTR[112+r13],xmm4
1541	lea	r13,QWORD PTR[128+r13]
1542	paddd	xmm15,XMMWORD PTR[112+r11]
1543	jnz	$L$ctr_enc_loop
1544
1545	jmp	$L$ctr_enc_done
1546ALIGN	16
1547$L$ctr_enc_loop_done::
1548	add	r14,8
1549	movdqu	xmm7,XMMWORD PTR[r12]
1550	pxor	xmm15,xmm7
1551	movdqu	XMMWORD PTR[r13],xmm15
1552	cmp	r14,2
1553	jb	$L$ctr_enc_done
1554	movdqu	xmm8,XMMWORD PTR[16+r12]
1555	pxor	xmm0,xmm8
1556	movdqu	XMMWORD PTR[16+r13],xmm0
1557	je	$L$ctr_enc_done
1558	movdqu	xmm9,XMMWORD PTR[32+r12]
1559	pxor	xmm3,xmm9
1560	movdqu	XMMWORD PTR[32+r13],xmm3
1561	cmp	r14,4
1562	jb	$L$ctr_enc_done
1563	movdqu	xmm10,XMMWORD PTR[48+r12]
1564	pxor	xmm5,xmm10
1565	movdqu	XMMWORD PTR[48+r13],xmm5
1566	je	$L$ctr_enc_done
1567	movdqu	xmm11,XMMWORD PTR[64+r12]
1568	pxor	xmm2,xmm11
1569	movdqu	XMMWORD PTR[64+r13],xmm2
1570	cmp	r14,6
1571	jb	$L$ctr_enc_done
1572	movdqu	xmm12,XMMWORD PTR[80+r12]
1573	pxor	xmm6,xmm12
1574	movdqu	XMMWORD PTR[80+r13],xmm6
1575	je	$L$ctr_enc_done
1576	movdqu	xmm13,XMMWORD PTR[96+r12]
1577	pxor	xmm1,xmm13
1578	movdqu	XMMWORD PTR[96+r13],xmm1
1579	jmp	$L$ctr_enc_done
1580
1581ALIGN	16
1582$L$ctr_enc_short::
1583	lea	rcx,QWORD PTR[32+rbp]
1584	lea	rdx,QWORD PTR[48+rbp]
1585	lea	r8,QWORD PTR[r15]
1586	call	asm_AES_encrypt
1587	movdqu	xmm0,XMMWORD PTR[r12]
1588	lea	r12,QWORD PTR[16+r12]
1589	mov	eax,DWORD PTR[44+rbp]
1590	bswap	eax
1591	pxor	xmm0,XMMWORD PTR[48+rbp]
1592	inc	eax
1593	movdqu	XMMWORD PTR[r13],xmm0
1594	bswap	eax
1595	lea	r13,QWORD PTR[16+r13]
1596	mov	DWORD PTR[44+rsp],eax
1597	dec	r14
1598	jnz	$L$ctr_enc_short
1599
1600$L$ctr_enc_done::
1601	lea	rax,QWORD PTR[rsp]
1602	pxor	xmm0,xmm0
1603$L$ctr_enc_bzero::
1604	movdqa	XMMWORD PTR[rax],xmm0
1605	movdqa	XMMWORD PTR[16+rax],xmm0
1606	lea	rax,QWORD PTR[32+rax]
1607	cmp	rbp,rax
1608	ja	$L$ctr_enc_bzero
1609
1610	lea	rsp,QWORD PTR[rbp]
1611	movaps	xmm6,XMMWORD PTR[64+rbp]
1612	movaps	xmm7,XMMWORD PTR[80+rbp]
1613	movaps	xmm8,XMMWORD PTR[96+rbp]
1614	movaps	xmm9,XMMWORD PTR[112+rbp]
1615	movaps	xmm10,XMMWORD PTR[128+rbp]
1616	movaps	xmm11,XMMWORD PTR[144+rbp]
1617	movaps	xmm12,XMMWORD PTR[160+rbp]
1618	movaps	xmm13,XMMWORD PTR[176+rbp]
1619	movaps	xmm14,XMMWORD PTR[192+rbp]
1620	movaps	xmm15,XMMWORD PTR[208+rbp]
1621	lea	rsp,QWORD PTR[160+rbp]
1622	mov	r15,QWORD PTR[72+rsp]
1623	mov	r14,QWORD PTR[80+rsp]
1624	mov	r13,QWORD PTR[88+rsp]
1625	mov	r12,QWORD PTR[96+rsp]
1626	mov	rbx,QWORD PTR[104+rsp]
1627	mov	rax,QWORD PTR[112+rsp]
1628	lea	rsp,QWORD PTR[120+rsp]
1629	mov	rbp,rax
1630$L$ctr_enc_epilogue::
1631	DB	0F3h,0C3h		;repret
1632bsaes_ctr32_encrypt_blocks	ENDP
1633PUBLIC	bsaes_xts_encrypt
1634
1635ALIGN	16
1636bsaes_xts_encrypt	PROC PUBLIC
1637	mov	rax,rsp
1638$L$xts_enc_prologue::
1639	push	rbp
1640	push	rbx
1641	push	r12
1642	push	r13
1643	push	r14
1644	push	r15
1645	lea	rsp,QWORD PTR[((-72))+rsp]
1646	mov	r10,QWORD PTR[160+rsp]
1647	mov	r11,QWORD PTR[168+rsp]
1648	lea	rsp,QWORD PTR[((-160))+rsp]
1649	movaps	XMMWORD PTR[64+rsp],xmm6
1650	movaps	XMMWORD PTR[80+rsp],xmm7
1651	movaps	XMMWORD PTR[96+rsp],xmm8
1652	movaps	XMMWORD PTR[112+rsp],xmm9
1653	movaps	XMMWORD PTR[128+rsp],xmm10
1654	movaps	XMMWORD PTR[144+rsp],xmm11
1655	movaps	XMMWORD PTR[160+rsp],xmm12
1656	movaps	XMMWORD PTR[176+rsp],xmm13
1657	movaps	XMMWORD PTR[192+rsp],xmm14
1658	movaps	XMMWORD PTR[208+rsp],xmm15
1659$L$xts_enc_body::
1660	mov	rbp,rsp
1661	mov	r12,rcx
1662	mov	r13,rdx
1663	mov	r14,r8
1664	mov	r15,r9
1665
1666	lea	rcx,QWORD PTR[r11]
1667	lea	rdx,QWORD PTR[32+rbp]
1668	lea	r8,QWORD PTR[r10]
1669	call	asm_AES_encrypt
1670
1671	mov	eax,DWORD PTR[240+r15]
1672	mov	rbx,r14
1673
1674	mov	edx,eax
1675	shl	rax,7
1676	sub	rax,96
1677	sub	rsp,rax
1678
1679	mov	rax,rsp
1680	mov	rcx,r15
1681	mov	r10d,edx
1682	call	_bsaes_key_convert
1683	pxor	xmm7,xmm6
1684	movdqa	XMMWORD PTR[rax],xmm7
1685
1686	and	r14,-16
1687	sub	rsp,080h
1688	movdqa	xmm6,XMMWORD PTR[32+rbp]
1689
1690	pxor	xmm14,xmm14
1691	movdqa	xmm12,XMMWORD PTR[$L$xts_magic]
1692	pcmpgtd	xmm14,xmm6
1693
1694	sub	r14,080h
1695	jc	$L$xts_enc_short
1696	jmp	$L$xts_enc_loop
1697
1698ALIGN	16
1699$L$xts_enc_loop::
1700	pshufd	xmm13,xmm14,013h
1701	pxor	xmm14,xmm14
1702	movdqa	xmm15,xmm6
1703	movdqa	XMMWORD PTR[rsp],xmm6
1704	paddq	xmm6,xmm6
1705	pand	xmm13,xmm12
1706	pcmpgtd	xmm14,xmm6
1707	pxor	xmm6,xmm13
1708	pshufd	xmm13,xmm14,013h
1709	pxor	xmm14,xmm14
1710	movdqa	xmm0,xmm6
1711	movdqa	XMMWORD PTR[16+rsp],xmm6
1712	paddq	xmm6,xmm6
1713	pand	xmm13,xmm12
1714	pcmpgtd	xmm14,xmm6
1715	pxor	xmm6,xmm13
1716	movdqu	xmm7,XMMWORD PTR[r12]
1717	pshufd	xmm13,xmm14,013h
1718	pxor	xmm14,xmm14
1719	movdqa	xmm1,xmm6
1720	movdqa	XMMWORD PTR[32+rsp],xmm6
1721	paddq	xmm6,xmm6
1722	pand	xmm13,xmm12
1723	pcmpgtd	xmm14,xmm6
1724	pxor	xmm6,xmm13
1725	movdqu	xmm8,XMMWORD PTR[16+r12]
1726	pxor	xmm15,xmm7
1727	pshufd	xmm13,xmm14,013h
1728	pxor	xmm14,xmm14
1729	movdqa	xmm2,xmm6
1730	movdqa	XMMWORD PTR[48+rsp],xmm6
1731	paddq	xmm6,xmm6
1732	pand	xmm13,xmm12
1733	pcmpgtd	xmm14,xmm6
1734	pxor	xmm6,xmm13
1735	movdqu	xmm9,XMMWORD PTR[32+r12]
1736	pxor	xmm0,xmm8
1737	pshufd	xmm13,xmm14,013h
1738	pxor	xmm14,xmm14
1739	movdqa	xmm3,xmm6
1740	movdqa	XMMWORD PTR[64+rsp],xmm6
1741	paddq	xmm6,xmm6
1742	pand	xmm13,xmm12
1743	pcmpgtd	xmm14,xmm6
1744	pxor	xmm6,xmm13
1745	movdqu	xmm10,XMMWORD PTR[48+r12]
1746	pxor	xmm1,xmm9
1747	pshufd	xmm13,xmm14,013h
1748	pxor	xmm14,xmm14
1749	movdqa	xmm4,xmm6
1750	movdqa	XMMWORD PTR[80+rsp],xmm6
1751	paddq	xmm6,xmm6
1752	pand	xmm13,xmm12
1753	pcmpgtd	xmm14,xmm6
1754	pxor	xmm6,xmm13
1755	movdqu	xmm11,XMMWORD PTR[64+r12]
1756	pxor	xmm2,xmm10
1757	pshufd	xmm13,xmm14,013h
1758	pxor	xmm14,xmm14
1759	movdqa	xmm5,xmm6
1760	movdqa	XMMWORD PTR[96+rsp],xmm6
1761	paddq	xmm6,xmm6
1762	pand	xmm13,xmm12
1763	pcmpgtd	xmm14,xmm6
1764	pxor	xmm6,xmm13
1765	movdqu	xmm12,XMMWORD PTR[80+r12]
1766	pxor	xmm3,xmm11
1767	movdqu	xmm13,XMMWORD PTR[96+r12]
1768	pxor	xmm4,xmm12
1769	movdqu	xmm14,XMMWORD PTR[112+r12]
1770	lea	r12,QWORD PTR[128+r12]
1771	movdqa	XMMWORD PTR[112+rsp],xmm6
1772	pxor	xmm5,xmm13
1773	lea	rax,QWORD PTR[128+rsp]
1774	pxor	xmm6,xmm14
1775	mov	r10d,edx
1776
1777	call	_bsaes_encrypt8
1778
1779	pxor	xmm15,XMMWORD PTR[rsp]
1780	pxor	xmm0,XMMWORD PTR[16+rsp]
1781	movdqu	XMMWORD PTR[r13],xmm15
1782	pxor	xmm3,XMMWORD PTR[32+rsp]
1783	movdqu	XMMWORD PTR[16+r13],xmm0
1784	pxor	xmm5,XMMWORD PTR[48+rsp]
1785	movdqu	XMMWORD PTR[32+r13],xmm3
1786	pxor	xmm2,XMMWORD PTR[64+rsp]
1787	movdqu	XMMWORD PTR[48+r13],xmm5
1788	pxor	xmm6,XMMWORD PTR[80+rsp]
1789	movdqu	XMMWORD PTR[64+r13],xmm2
1790	pxor	xmm1,XMMWORD PTR[96+rsp]
1791	movdqu	XMMWORD PTR[80+r13],xmm6
1792	pxor	xmm4,XMMWORD PTR[112+rsp]
1793	movdqu	XMMWORD PTR[96+r13],xmm1
1794	movdqu	XMMWORD PTR[112+r13],xmm4
1795	lea	r13,QWORD PTR[128+r13]
1796
1797	movdqa	xmm6,XMMWORD PTR[112+rsp]
1798	pxor	xmm14,xmm14
1799	movdqa	xmm12,XMMWORD PTR[$L$xts_magic]
1800	pcmpgtd	xmm14,xmm6
1801	pshufd	xmm13,xmm14,013h
1802	pxor	xmm14,xmm14
1803	paddq	xmm6,xmm6
1804	pand	xmm13,xmm12
1805	pcmpgtd	xmm14,xmm6
1806	pxor	xmm6,xmm13
1807
1808	sub	r14,080h
1809	jnc	$L$xts_enc_loop
1810
1811$L$xts_enc_short::
1812	add	r14,080h
1813	jz	$L$xts_enc_done
1814	pshufd	xmm13,xmm14,013h
1815	pxor	xmm14,xmm14
1816	movdqa	xmm15,xmm6
1817	movdqa	XMMWORD PTR[rsp],xmm6
1818	paddq	xmm6,xmm6
1819	pand	xmm13,xmm12
1820	pcmpgtd	xmm14,xmm6
1821	pxor	xmm6,xmm13
1822	pshufd	xmm13,xmm14,013h
1823	pxor	xmm14,xmm14
1824	movdqa	xmm0,xmm6
1825	movdqa	XMMWORD PTR[16+rsp],xmm6
1826	paddq	xmm6,xmm6
1827	pand	xmm13,xmm12
1828	pcmpgtd	xmm14,xmm6
1829	pxor	xmm6,xmm13
1830	movdqu	xmm7,XMMWORD PTR[r12]
1831	cmp	r14,16
1832	je	$L$xts_enc_1
1833	pshufd	xmm13,xmm14,013h
1834	pxor	xmm14,xmm14
1835	movdqa	xmm1,xmm6
1836	movdqa	XMMWORD PTR[32+rsp],xmm6
1837	paddq	xmm6,xmm6
1838	pand	xmm13,xmm12
1839	pcmpgtd	xmm14,xmm6
1840	pxor	xmm6,xmm13
1841	movdqu	xmm8,XMMWORD PTR[16+r12]
1842	cmp	r14,32
1843	je	$L$xts_enc_2
1844	pxor	xmm15,xmm7
1845	pshufd	xmm13,xmm14,013h
1846	pxor	xmm14,xmm14
1847	movdqa	xmm2,xmm6
1848	movdqa	XMMWORD PTR[48+rsp],xmm6
1849	paddq	xmm6,xmm6
1850	pand	xmm13,xmm12
1851	pcmpgtd	xmm14,xmm6
1852	pxor	xmm6,xmm13
1853	movdqu	xmm9,XMMWORD PTR[32+r12]
1854	cmp	r14,48
1855	je	$L$xts_enc_3
1856	pxor	xmm0,xmm8
1857	pshufd	xmm13,xmm14,013h
1858	pxor	xmm14,xmm14
1859	movdqa	xmm3,xmm6
1860	movdqa	XMMWORD PTR[64+rsp],xmm6
1861	paddq	xmm6,xmm6
1862	pand	xmm13,xmm12
1863	pcmpgtd	xmm14,xmm6
1864	pxor	xmm6,xmm13
1865	movdqu	xmm10,XMMWORD PTR[48+r12]
1866	cmp	r14,64
1867	je	$L$xts_enc_4
1868	pxor	xmm1,xmm9
1869	pshufd	xmm13,xmm14,013h
1870	pxor	xmm14,xmm14
1871	movdqa	xmm4,xmm6
1872	movdqa	XMMWORD PTR[80+rsp],xmm6
1873	paddq	xmm6,xmm6
1874	pand	xmm13,xmm12
1875	pcmpgtd	xmm14,xmm6
1876	pxor	xmm6,xmm13
1877	movdqu	xmm11,XMMWORD PTR[64+r12]
1878	cmp	r14,80
1879	je	$L$xts_enc_5
1880	pxor	xmm2,xmm10
1881	pshufd	xmm13,xmm14,013h
1882	pxor	xmm14,xmm14
1883	movdqa	xmm5,xmm6
1884	movdqa	XMMWORD PTR[96+rsp],xmm6
1885	paddq	xmm6,xmm6
1886	pand	xmm13,xmm12
1887	pcmpgtd	xmm14,xmm6
1888	pxor	xmm6,xmm13
1889	movdqu	xmm12,XMMWORD PTR[80+r12]
1890	cmp	r14,96
1891	je	$L$xts_enc_6
1892	pxor	xmm3,xmm11
1893	movdqu	xmm13,XMMWORD PTR[96+r12]
1894	pxor	xmm4,xmm12
1895	movdqa	XMMWORD PTR[112+rsp],xmm6
1896	lea	r12,QWORD PTR[112+r12]
1897	pxor	xmm5,xmm13
1898	lea	rax,QWORD PTR[128+rsp]
1899	mov	r10d,edx
1900
1901	call	_bsaes_encrypt8
1902
1903	pxor	xmm15,XMMWORD PTR[rsp]
1904	pxor	xmm0,XMMWORD PTR[16+rsp]
1905	movdqu	XMMWORD PTR[r13],xmm15
1906	pxor	xmm3,XMMWORD PTR[32+rsp]
1907	movdqu	XMMWORD PTR[16+r13],xmm0
1908	pxor	xmm5,XMMWORD PTR[48+rsp]
1909	movdqu	XMMWORD PTR[32+r13],xmm3
1910	pxor	xmm2,XMMWORD PTR[64+rsp]
1911	movdqu	XMMWORD PTR[48+r13],xmm5
1912	pxor	xmm6,XMMWORD PTR[80+rsp]
1913	movdqu	XMMWORD PTR[64+r13],xmm2
1914	pxor	xmm1,XMMWORD PTR[96+rsp]
1915	movdqu	XMMWORD PTR[80+r13],xmm6
1916	movdqu	XMMWORD PTR[96+r13],xmm1
1917	lea	r13,QWORD PTR[112+r13]
1918
1919	movdqa	xmm6,XMMWORD PTR[112+rsp]
1920	jmp	$L$xts_enc_done
1921ALIGN	16
1922$L$xts_enc_6::
1923	pxor	xmm3,xmm11
1924	lea	r12,QWORD PTR[96+r12]
1925	pxor	xmm4,xmm12
1926	lea	rax,QWORD PTR[128+rsp]
1927	mov	r10d,edx
1928
1929	call	_bsaes_encrypt8
1930
1931	pxor	xmm15,XMMWORD PTR[rsp]
1932	pxor	xmm0,XMMWORD PTR[16+rsp]
1933	movdqu	XMMWORD PTR[r13],xmm15
1934	pxor	xmm3,XMMWORD PTR[32+rsp]
1935	movdqu	XMMWORD PTR[16+r13],xmm0
1936	pxor	xmm5,XMMWORD PTR[48+rsp]
1937	movdqu	XMMWORD PTR[32+r13],xmm3
1938	pxor	xmm2,XMMWORD PTR[64+rsp]
1939	movdqu	XMMWORD PTR[48+r13],xmm5
1940	pxor	xmm6,XMMWORD PTR[80+rsp]
1941	movdqu	XMMWORD PTR[64+r13],xmm2
1942	movdqu	XMMWORD PTR[80+r13],xmm6
1943	lea	r13,QWORD PTR[96+r13]
1944
1945	movdqa	xmm6,XMMWORD PTR[96+rsp]
1946	jmp	$L$xts_enc_done
1947ALIGN	16
1948$L$xts_enc_5::
1949	pxor	xmm2,xmm10
1950	lea	r12,QWORD PTR[80+r12]
1951	pxor	xmm3,xmm11
1952	lea	rax,QWORD PTR[128+rsp]
1953	mov	r10d,edx
1954
1955	call	_bsaes_encrypt8
1956
1957	pxor	xmm15,XMMWORD PTR[rsp]
1958	pxor	xmm0,XMMWORD PTR[16+rsp]
1959	movdqu	XMMWORD PTR[r13],xmm15
1960	pxor	xmm3,XMMWORD PTR[32+rsp]
1961	movdqu	XMMWORD PTR[16+r13],xmm0
1962	pxor	xmm5,XMMWORD PTR[48+rsp]
1963	movdqu	XMMWORD PTR[32+r13],xmm3
1964	pxor	xmm2,XMMWORD PTR[64+rsp]
1965	movdqu	XMMWORD PTR[48+r13],xmm5
1966	movdqu	XMMWORD PTR[64+r13],xmm2
1967	lea	r13,QWORD PTR[80+r13]
1968
1969	movdqa	xmm6,XMMWORD PTR[80+rsp]
1970	jmp	$L$xts_enc_done
1971ALIGN	16
1972$L$xts_enc_4::
1973	pxor	xmm1,xmm9
1974	lea	r12,QWORD PTR[64+r12]
1975	pxor	xmm2,xmm10
1976	lea	rax,QWORD PTR[128+rsp]
1977	mov	r10d,edx
1978
1979	call	_bsaes_encrypt8
1980
1981	pxor	xmm15,XMMWORD PTR[rsp]
1982	pxor	xmm0,XMMWORD PTR[16+rsp]
1983	movdqu	XMMWORD PTR[r13],xmm15
1984	pxor	xmm3,XMMWORD PTR[32+rsp]
1985	movdqu	XMMWORD PTR[16+r13],xmm0
1986	pxor	xmm5,XMMWORD PTR[48+rsp]
1987	movdqu	XMMWORD PTR[32+r13],xmm3
1988	movdqu	XMMWORD PTR[48+r13],xmm5
1989	lea	r13,QWORD PTR[64+r13]
1990
1991	movdqa	xmm6,XMMWORD PTR[64+rsp]
1992	jmp	$L$xts_enc_done
1993ALIGN	16
1994$L$xts_enc_3::
1995	pxor	xmm0,xmm8
1996	lea	r12,QWORD PTR[48+r12]
1997	pxor	xmm1,xmm9
1998	lea	rax,QWORD PTR[128+rsp]
1999	mov	r10d,edx
2000
2001	call	_bsaes_encrypt8
2002
2003	pxor	xmm15,XMMWORD PTR[rsp]
2004	pxor	xmm0,XMMWORD PTR[16+rsp]
2005	movdqu	XMMWORD PTR[r13],xmm15
2006	pxor	xmm3,XMMWORD PTR[32+rsp]
2007	movdqu	XMMWORD PTR[16+r13],xmm0
2008	movdqu	XMMWORD PTR[32+r13],xmm3
2009	lea	r13,QWORD PTR[48+r13]
2010
2011	movdqa	xmm6,XMMWORD PTR[48+rsp]
2012	jmp	$L$xts_enc_done
2013ALIGN	16
2014$L$xts_enc_2::
2015	pxor	xmm15,xmm7
2016	lea	r12,QWORD PTR[32+r12]
2017	pxor	xmm0,xmm8
2018	lea	rax,QWORD PTR[128+rsp]
2019	mov	r10d,edx
2020
2021	call	_bsaes_encrypt8
2022
2023	pxor	xmm15,XMMWORD PTR[rsp]
2024	pxor	xmm0,XMMWORD PTR[16+rsp]
2025	movdqu	XMMWORD PTR[r13],xmm15
2026	movdqu	XMMWORD PTR[16+r13],xmm0
2027	lea	r13,QWORD PTR[32+r13]
2028
2029	movdqa	xmm6,XMMWORD PTR[32+rsp]
2030	jmp	$L$xts_enc_done
2031ALIGN	16
2032$L$xts_enc_1::
2033	pxor	xmm7,xmm15
2034	lea	r12,QWORD PTR[16+r12]
2035	movdqa	XMMWORD PTR[32+rbp],xmm7
2036	lea	rcx,QWORD PTR[32+rbp]
2037	lea	rdx,QWORD PTR[32+rbp]
2038	lea	r8,QWORD PTR[r15]
2039	call	asm_AES_encrypt
2040	pxor	xmm15,XMMWORD PTR[32+rbp]
2041
2042
2043
2044
2045
2046	movdqu	XMMWORD PTR[r13],xmm15
2047	lea	r13,QWORD PTR[16+r13]
2048
2049	movdqa	xmm6,XMMWORD PTR[16+rsp]
2050
2051$L$xts_enc_done::
2052	and	ebx,15
2053	jz	$L$xts_enc_ret
2054	mov	rdx,r13
2055
2056$L$xts_enc_steal::
2057	movzx	eax,BYTE PTR[r12]
2058	movzx	ecx,BYTE PTR[((-16))+rdx]
2059	lea	r12,QWORD PTR[1+r12]
2060	mov	BYTE PTR[((-16))+rdx],al
2061	mov	BYTE PTR[rdx],cl
2062	lea	rdx,QWORD PTR[1+rdx]
2063	sub	ebx,1
2064	jnz	$L$xts_enc_steal
2065
2066	movdqu	xmm15,XMMWORD PTR[((-16))+r13]
2067	lea	rcx,QWORD PTR[32+rbp]
2068	pxor	xmm15,xmm6
2069	lea	rdx,QWORD PTR[32+rbp]
2070	movdqa	XMMWORD PTR[32+rbp],xmm15
2071	lea	r8,QWORD PTR[r15]
2072	call	asm_AES_encrypt
2073	pxor	xmm6,XMMWORD PTR[32+rbp]
2074	movdqu	XMMWORD PTR[(-16)+r13],xmm6
2075
2076$L$xts_enc_ret::
2077	lea	rax,QWORD PTR[rsp]
2078	pxor	xmm0,xmm0
2079$L$xts_enc_bzero::
2080	movdqa	XMMWORD PTR[rax],xmm0
2081	movdqa	XMMWORD PTR[16+rax],xmm0
2082	lea	rax,QWORD PTR[32+rax]
2083	cmp	rbp,rax
2084	ja	$L$xts_enc_bzero
2085
2086	lea	rsp,QWORD PTR[rbp]
2087	movaps	xmm6,XMMWORD PTR[64+rbp]
2088	movaps	xmm7,XMMWORD PTR[80+rbp]
2089	movaps	xmm8,XMMWORD PTR[96+rbp]
2090	movaps	xmm9,XMMWORD PTR[112+rbp]
2091	movaps	xmm10,XMMWORD PTR[128+rbp]
2092	movaps	xmm11,XMMWORD PTR[144+rbp]
2093	movaps	xmm12,XMMWORD PTR[160+rbp]
2094	movaps	xmm13,XMMWORD PTR[176+rbp]
2095	movaps	xmm14,XMMWORD PTR[192+rbp]
2096	movaps	xmm15,XMMWORD PTR[208+rbp]
2097	lea	rsp,QWORD PTR[160+rbp]
2098	mov	r15,QWORD PTR[72+rsp]
2099	mov	r14,QWORD PTR[80+rsp]
2100	mov	r13,QWORD PTR[88+rsp]
2101	mov	r12,QWORD PTR[96+rsp]
2102	mov	rbx,QWORD PTR[104+rsp]
2103	mov	rax,QWORD PTR[112+rsp]
2104	lea	rsp,QWORD PTR[120+rsp]
2105	mov	rbp,rax
2106$L$xts_enc_epilogue::
2107	DB	0F3h,0C3h		;repret
2108bsaes_xts_encrypt	ENDP
2109
2110PUBLIC	bsaes_xts_decrypt
2111
2112ALIGN	16
2113bsaes_xts_decrypt	PROC PUBLIC
2114	mov	rax,rsp
2115$L$xts_dec_prologue::
2116	push	rbp
2117	push	rbx
2118	push	r12
2119	push	r13
2120	push	r14
2121	push	r15
2122	lea	rsp,QWORD PTR[((-72))+rsp]
2123	mov	r10,QWORD PTR[160+rsp]
2124	mov	r11,QWORD PTR[168+rsp]
2125	lea	rsp,QWORD PTR[((-160))+rsp]
2126	movaps	XMMWORD PTR[64+rsp],xmm6
2127	movaps	XMMWORD PTR[80+rsp],xmm7
2128	movaps	XMMWORD PTR[96+rsp],xmm8
2129	movaps	XMMWORD PTR[112+rsp],xmm9
2130	movaps	XMMWORD PTR[128+rsp],xmm10
2131	movaps	XMMWORD PTR[144+rsp],xmm11
2132	movaps	XMMWORD PTR[160+rsp],xmm12
2133	movaps	XMMWORD PTR[176+rsp],xmm13
2134	movaps	XMMWORD PTR[192+rsp],xmm14
2135	movaps	XMMWORD PTR[208+rsp],xmm15
2136$L$xts_dec_body::
2137	mov	rbp,rsp
2138	mov	r12,rcx
2139	mov	r13,rdx
2140	mov	r14,r8
2141	mov	r15,r9
2142
2143	lea	rcx,QWORD PTR[r11]
2144	lea	rdx,QWORD PTR[32+rbp]
2145	lea	r8,QWORD PTR[r10]
2146	call	asm_AES_encrypt
2147
2148	mov	eax,DWORD PTR[240+r15]
2149	mov	rbx,r14
2150
2151	mov	edx,eax
2152	shl	rax,7
2153	sub	rax,96
2154	sub	rsp,rax
2155
2156	mov	rax,rsp
2157	mov	rcx,r15
2158	mov	r10d,edx
2159	call	_bsaes_key_convert
2160	pxor	xmm7,XMMWORD PTR[rsp]
2161	movdqa	XMMWORD PTR[rax],xmm6
2162	movdqa	XMMWORD PTR[rsp],xmm7
2163
2164	xor	eax,eax
2165	and	r14,-16
2166	test	ebx,15
2167	setnz	al
2168	shl	rax,4
2169	sub	r14,rax
2170
2171	sub	rsp,080h
2172	movdqa	xmm6,XMMWORD PTR[32+rbp]
2173
2174	pxor	xmm14,xmm14
2175	movdqa	xmm12,XMMWORD PTR[$L$xts_magic]
2176	pcmpgtd	xmm14,xmm6
2177
2178	sub	r14,080h
2179	jc	$L$xts_dec_short
2180	jmp	$L$xts_dec_loop
2181
2182ALIGN	16
2183$L$xts_dec_loop::
2184	pshufd	xmm13,xmm14,013h
2185	pxor	xmm14,xmm14
2186	movdqa	xmm15,xmm6
2187	movdqa	XMMWORD PTR[rsp],xmm6
2188	paddq	xmm6,xmm6
2189	pand	xmm13,xmm12
2190	pcmpgtd	xmm14,xmm6
2191	pxor	xmm6,xmm13
2192	pshufd	xmm13,xmm14,013h
2193	pxor	xmm14,xmm14
2194	movdqa	xmm0,xmm6
2195	movdqa	XMMWORD PTR[16+rsp],xmm6
2196	paddq	xmm6,xmm6
2197	pand	xmm13,xmm12
2198	pcmpgtd	xmm14,xmm6
2199	pxor	xmm6,xmm13
2200	movdqu	xmm7,XMMWORD PTR[r12]
2201	pshufd	xmm13,xmm14,013h
2202	pxor	xmm14,xmm14
2203	movdqa	xmm1,xmm6
2204	movdqa	XMMWORD PTR[32+rsp],xmm6
2205	paddq	xmm6,xmm6
2206	pand	xmm13,xmm12
2207	pcmpgtd	xmm14,xmm6
2208	pxor	xmm6,xmm13
2209	movdqu	xmm8,XMMWORD PTR[16+r12]
2210	pxor	xmm15,xmm7
2211	pshufd	xmm13,xmm14,013h
2212	pxor	xmm14,xmm14
2213	movdqa	xmm2,xmm6
2214	movdqa	XMMWORD PTR[48+rsp],xmm6
2215	paddq	xmm6,xmm6
2216	pand	xmm13,xmm12
2217	pcmpgtd	xmm14,xmm6
2218	pxor	xmm6,xmm13
2219	movdqu	xmm9,XMMWORD PTR[32+r12]
2220	pxor	xmm0,xmm8
2221	pshufd	xmm13,xmm14,013h
2222	pxor	xmm14,xmm14
2223	movdqa	xmm3,xmm6
2224	movdqa	XMMWORD PTR[64+rsp],xmm6
2225	paddq	xmm6,xmm6
2226	pand	xmm13,xmm12
2227	pcmpgtd	xmm14,xmm6
2228	pxor	xmm6,xmm13
2229	movdqu	xmm10,XMMWORD PTR[48+r12]
2230	pxor	xmm1,xmm9
2231	pshufd	xmm13,xmm14,013h
2232	pxor	xmm14,xmm14
2233	movdqa	xmm4,xmm6
2234	movdqa	XMMWORD PTR[80+rsp],xmm6
2235	paddq	xmm6,xmm6
2236	pand	xmm13,xmm12
2237	pcmpgtd	xmm14,xmm6
2238	pxor	xmm6,xmm13
2239	movdqu	xmm11,XMMWORD PTR[64+r12]
2240	pxor	xmm2,xmm10
2241	pshufd	xmm13,xmm14,013h
2242	pxor	xmm14,xmm14
2243	movdqa	xmm5,xmm6
2244	movdqa	XMMWORD PTR[96+rsp],xmm6
2245	paddq	xmm6,xmm6
2246	pand	xmm13,xmm12
2247	pcmpgtd	xmm14,xmm6
2248	pxor	xmm6,xmm13
2249	movdqu	xmm12,XMMWORD PTR[80+r12]
2250	pxor	xmm3,xmm11
2251	movdqu	xmm13,XMMWORD PTR[96+r12]
2252	pxor	xmm4,xmm12
2253	movdqu	xmm14,XMMWORD PTR[112+r12]
2254	lea	r12,QWORD PTR[128+r12]
2255	movdqa	XMMWORD PTR[112+rsp],xmm6
2256	pxor	xmm5,xmm13
2257	lea	rax,QWORD PTR[128+rsp]
2258	pxor	xmm6,xmm14
2259	mov	r10d,edx
2260
2261	call	_bsaes_decrypt8
2262
2263	pxor	xmm15,XMMWORD PTR[rsp]
2264	pxor	xmm0,XMMWORD PTR[16+rsp]
2265	movdqu	XMMWORD PTR[r13],xmm15
2266	pxor	xmm5,XMMWORD PTR[32+rsp]
2267	movdqu	XMMWORD PTR[16+r13],xmm0
2268	pxor	xmm3,XMMWORD PTR[48+rsp]
2269	movdqu	XMMWORD PTR[32+r13],xmm5
2270	pxor	xmm1,XMMWORD PTR[64+rsp]
2271	movdqu	XMMWORD PTR[48+r13],xmm3
2272	pxor	xmm6,XMMWORD PTR[80+rsp]
2273	movdqu	XMMWORD PTR[64+r13],xmm1
2274	pxor	xmm2,XMMWORD PTR[96+rsp]
2275	movdqu	XMMWORD PTR[80+r13],xmm6
2276	pxor	xmm4,XMMWORD PTR[112+rsp]
2277	movdqu	XMMWORD PTR[96+r13],xmm2
2278	movdqu	XMMWORD PTR[112+r13],xmm4
2279	lea	r13,QWORD PTR[128+r13]
2280
2281	movdqa	xmm6,XMMWORD PTR[112+rsp]
2282	pxor	xmm14,xmm14
2283	movdqa	xmm12,XMMWORD PTR[$L$xts_magic]
2284	pcmpgtd	xmm14,xmm6
2285	pshufd	xmm13,xmm14,013h
2286	pxor	xmm14,xmm14
2287	paddq	xmm6,xmm6
2288	pand	xmm13,xmm12
2289	pcmpgtd	xmm14,xmm6
2290	pxor	xmm6,xmm13
2291
2292	sub	r14,080h
2293	jnc	$L$xts_dec_loop
2294
2295$L$xts_dec_short::
2296	add	r14,080h
2297	jz	$L$xts_dec_done
2298	pshufd	xmm13,xmm14,013h
2299	pxor	xmm14,xmm14
2300	movdqa	xmm15,xmm6
2301	movdqa	XMMWORD PTR[rsp],xmm6
2302	paddq	xmm6,xmm6
2303	pand	xmm13,xmm12
2304	pcmpgtd	xmm14,xmm6
2305	pxor	xmm6,xmm13
2306	pshufd	xmm13,xmm14,013h
2307	pxor	xmm14,xmm14
2308	movdqa	xmm0,xmm6
2309	movdqa	XMMWORD PTR[16+rsp],xmm6
2310	paddq	xmm6,xmm6
2311	pand	xmm13,xmm12
2312	pcmpgtd	xmm14,xmm6
2313	pxor	xmm6,xmm13
2314	movdqu	xmm7,XMMWORD PTR[r12]
2315	cmp	r14,16
2316	je	$L$xts_dec_1
2317	pshufd	xmm13,xmm14,013h
2318	pxor	xmm14,xmm14
2319	movdqa	xmm1,xmm6
2320	movdqa	XMMWORD PTR[32+rsp],xmm6
2321	paddq	xmm6,xmm6
2322	pand	xmm13,xmm12
2323	pcmpgtd	xmm14,xmm6
2324	pxor	xmm6,xmm13
2325	movdqu	xmm8,XMMWORD PTR[16+r12]
2326	cmp	r14,32
2327	je	$L$xts_dec_2
2328	pxor	xmm15,xmm7
2329	pshufd	xmm13,xmm14,013h
2330	pxor	xmm14,xmm14
2331	movdqa	xmm2,xmm6
2332	movdqa	XMMWORD PTR[48+rsp],xmm6
2333	paddq	xmm6,xmm6
2334	pand	xmm13,xmm12
2335	pcmpgtd	xmm14,xmm6
2336	pxor	xmm6,xmm13
2337	movdqu	xmm9,XMMWORD PTR[32+r12]
2338	cmp	r14,48
2339	je	$L$xts_dec_3
2340	pxor	xmm0,xmm8
2341	pshufd	xmm13,xmm14,013h
2342	pxor	xmm14,xmm14
2343	movdqa	xmm3,xmm6
2344	movdqa	XMMWORD PTR[64+rsp],xmm6
2345	paddq	xmm6,xmm6
2346	pand	xmm13,xmm12
2347	pcmpgtd	xmm14,xmm6
2348	pxor	xmm6,xmm13
2349	movdqu	xmm10,XMMWORD PTR[48+r12]
2350	cmp	r14,64
2351	je	$L$xts_dec_4
2352	pxor	xmm1,xmm9
2353	pshufd	xmm13,xmm14,013h
2354	pxor	xmm14,xmm14
2355	movdqa	xmm4,xmm6
2356	movdqa	XMMWORD PTR[80+rsp],xmm6
2357	paddq	xmm6,xmm6
2358	pand	xmm13,xmm12
2359	pcmpgtd	xmm14,xmm6
2360	pxor	xmm6,xmm13
2361	movdqu	xmm11,XMMWORD PTR[64+r12]
2362	cmp	r14,80
2363	je	$L$xts_dec_5
2364	pxor	xmm2,xmm10
2365	pshufd	xmm13,xmm14,013h
2366	pxor	xmm14,xmm14
2367	movdqa	xmm5,xmm6
2368	movdqa	XMMWORD PTR[96+rsp],xmm6
2369	paddq	xmm6,xmm6
2370	pand	xmm13,xmm12
2371	pcmpgtd	xmm14,xmm6
2372	pxor	xmm6,xmm13
2373	movdqu	xmm12,XMMWORD PTR[80+r12]
2374	cmp	r14,96
2375	je	$L$xts_dec_6
2376	pxor	xmm3,xmm11
2377	movdqu	xmm13,XMMWORD PTR[96+r12]
2378	pxor	xmm4,xmm12
2379	movdqa	XMMWORD PTR[112+rsp],xmm6
2380	lea	r12,QWORD PTR[112+r12]
2381	pxor	xmm5,xmm13
2382	lea	rax,QWORD PTR[128+rsp]
2383	mov	r10d,edx
2384
2385	call	_bsaes_decrypt8
2386
2387	pxor	xmm15,XMMWORD PTR[rsp]
2388	pxor	xmm0,XMMWORD PTR[16+rsp]
2389	movdqu	XMMWORD PTR[r13],xmm15
2390	pxor	xmm5,XMMWORD PTR[32+rsp]
2391	movdqu	XMMWORD PTR[16+r13],xmm0
2392	pxor	xmm3,XMMWORD PTR[48+rsp]
2393	movdqu	XMMWORD PTR[32+r13],xmm5
2394	pxor	xmm1,XMMWORD PTR[64+rsp]
2395	movdqu	XMMWORD PTR[48+r13],xmm3
2396	pxor	xmm6,XMMWORD PTR[80+rsp]
2397	movdqu	XMMWORD PTR[64+r13],xmm1
2398	pxor	xmm2,XMMWORD PTR[96+rsp]
2399	movdqu	XMMWORD PTR[80+r13],xmm6
2400	movdqu	XMMWORD PTR[96+r13],xmm2
2401	lea	r13,QWORD PTR[112+r13]
2402
2403	movdqa	xmm6,XMMWORD PTR[112+rsp]
2404	jmp	$L$xts_dec_done
2405ALIGN	16
2406$L$xts_dec_6::
2407	pxor	xmm3,xmm11
2408	lea	r12,QWORD PTR[96+r12]
2409	pxor	xmm4,xmm12
2410	lea	rax,QWORD PTR[128+rsp]
2411	mov	r10d,edx
2412
2413	call	_bsaes_decrypt8
2414
2415	pxor	xmm15,XMMWORD PTR[rsp]
2416	pxor	xmm0,XMMWORD PTR[16+rsp]
2417	movdqu	XMMWORD PTR[r13],xmm15
2418	pxor	xmm5,XMMWORD PTR[32+rsp]
2419	movdqu	XMMWORD PTR[16+r13],xmm0
2420	pxor	xmm3,XMMWORD PTR[48+rsp]
2421	movdqu	XMMWORD PTR[32+r13],xmm5
2422	pxor	xmm1,XMMWORD PTR[64+rsp]
2423	movdqu	XMMWORD PTR[48+r13],xmm3
2424	pxor	xmm6,XMMWORD PTR[80+rsp]
2425	movdqu	XMMWORD PTR[64+r13],xmm1
2426	movdqu	XMMWORD PTR[80+r13],xmm6
2427	lea	r13,QWORD PTR[96+r13]
2428
2429	movdqa	xmm6,XMMWORD PTR[96+rsp]
2430	jmp	$L$xts_dec_done
2431ALIGN	16
2432$L$xts_dec_5::
2433	pxor	xmm2,xmm10
2434	lea	r12,QWORD PTR[80+r12]
2435	pxor	xmm3,xmm11
2436	lea	rax,QWORD PTR[128+rsp]
2437	mov	r10d,edx
2438
2439	call	_bsaes_decrypt8
2440
2441	pxor	xmm15,XMMWORD PTR[rsp]
2442	pxor	xmm0,XMMWORD PTR[16+rsp]
2443	movdqu	XMMWORD PTR[r13],xmm15
2444	pxor	xmm5,XMMWORD PTR[32+rsp]
2445	movdqu	XMMWORD PTR[16+r13],xmm0
2446	pxor	xmm3,XMMWORD PTR[48+rsp]
2447	movdqu	XMMWORD PTR[32+r13],xmm5
2448	pxor	xmm1,XMMWORD PTR[64+rsp]
2449	movdqu	XMMWORD PTR[48+r13],xmm3
2450	movdqu	XMMWORD PTR[64+r13],xmm1
2451	lea	r13,QWORD PTR[80+r13]
2452
2453	movdqa	xmm6,XMMWORD PTR[80+rsp]
2454	jmp	$L$xts_dec_done
2455ALIGN	16
2456$L$xts_dec_4::
2457	pxor	xmm1,xmm9
2458	lea	r12,QWORD PTR[64+r12]
2459	pxor	xmm2,xmm10
2460	lea	rax,QWORD PTR[128+rsp]
2461	mov	r10d,edx
2462
2463	call	_bsaes_decrypt8
2464
2465	pxor	xmm15,XMMWORD PTR[rsp]
2466	pxor	xmm0,XMMWORD PTR[16+rsp]
2467	movdqu	XMMWORD PTR[r13],xmm15
2468	pxor	xmm5,XMMWORD PTR[32+rsp]
2469	movdqu	XMMWORD PTR[16+r13],xmm0
2470	pxor	xmm3,XMMWORD PTR[48+rsp]
2471	movdqu	XMMWORD PTR[32+r13],xmm5
2472	movdqu	XMMWORD PTR[48+r13],xmm3
2473	lea	r13,QWORD PTR[64+r13]
2474
2475	movdqa	xmm6,XMMWORD PTR[64+rsp]
2476	jmp	$L$xts_dec_done
2477ALIGN	16
2478$L$xts_dec_3::
2479	pxor	xmm0,xmm8
2480	lea	r12,QWORD PTR[48+r12]
2481	pxor	xmm1,xmm9
2482	lea	rax,QWORD PTR[128+rsp]
2483	mov	r10d,edx
2484
2485	call	_bsaes_decrypt8
2486
2487	pxor	xmm15,XMMWORD PTR[rsp]
2488	pxor	xmm0,XMMWORD PTR[16+rsp]
2489	movdqu	XMMWORD PTR[r13],xmm15
2490	pxor	xmm5,XMMWORD PTR[32+rsp]
2491	movdqu	XMMWORD PTR[16+r13],xmm0
2492	movdqu	XMMWORD PTR[32+r13],xmm5
2493	lea	r13,QWORD PTR[48+r13]
2494
2495	movdqa	xmm6,XMMWORD PTR[48+rsp]
2496	jmp	$L$xts_dec_done
2497ALIGN	16
2498$L$xts_dec_2::
2499	pxor	xmm15,xmm7
2500	lea	r12,QWORD PTR[32+r12]
2501	pxor	xmm0,xmm8
2502	lea	rax,QWORD PTR[128+rsp]
2503	mov	r10d,edx
2504
2505	call	_bsaes_decrypt8
2506
2507	pxor	xmm15,XMMWORD PTR[rsp]
2508	pxor	xmm0,XMMWORD PTR[16+rsp]
2509	movdqu	XMMWORD PTR[r13],xmm15
2510	movdqu	XMMWORD PTR[16+r13],xmm0
2511	lea	r13,QWORD PTR[32+r13]
2512
2513	movdqa	xmm6,XMMWORD PTR[32+rsp]
2514	jmp	$L$xts_dec_done
2515ALIGN	16
2516$L$xts_dec_1::
2517	pxor	xmm7,xmm15
2518	lea	r12,QWORD PTR[16+r12]
2519	movdqa	XMMWORD PTR[32+rbp],xmm7
2520	lea	rcx,QWORD PTR[32+rbp]
2521	lea	rdx,QWORD PTR[32+rbp]
2522	lea	r8,QWORD PTR[r15]
2523	call	asm_AES_decrypt
2524	pxor	xmm15,XMMWORD PTR[32+rbp]
2525
2526
2527
2528
2529
2530	movdqu	XMMWORD PTR[r13],xmm15
2531	lea	r13,QWORD PTR[16+r13]
2532
2533	movdqa	xmm6,XMMWORD PTR[16+rsp]
2534
2535$L$xts_dec_done::
2536	and	ebx,15
2537	jz	$L$xts_dec_ret
2538
2539	pxor	xmm14,xmm14
2540	movdqa	xmm12,XMMWORD PTR[$L$xts_magic]
2541	pcmpgtd	xmm14,xmm6
2542	pshufd	xmm13,xmm14,013h
2543	movdqa	xmm5,xmm6
2544	paddq	xmm6,xmm6
2545	pand	xmm13,xmm12
2546	movdqu	xmm15,XMMWORD PTR[r12]
2547	pxor	xmm6,xmm13
2548
2549	lea	rcx,QWORD PTR[32+rbp]
2550	pxor	xmm15,xmm6
2551	lea	rdx,QWORD PTR[32+rbp]
2552	movdqa	XMMWORD PTR[32+rbp],xmm15
2553	lea	r8,QWORD PTR[r15]
2554	call	asm_AES_decrypt
2555	pxor	xmm6,XMMWORD PTR[32+rbp]
2556	mov	rdx,r13
2557	movdqu	XMMWORD PTR[r13],xmm6
2558
2559$L$xts_dec_steal::
2560	movzx	eax,BYTE PTR[16+r12]
2561	movzx	ecx,BYTE PTR[rdx]
2562	lea	r12,QWORD PTR[1+r12]
2563	mov	BYTE PTR[rdx],al
2564	mov	BYTE PTR[16+rdx],cl
2565	lea	rdx,QWORD PTR[1+rdx]
2566	sub	ebx,1
2567	jnz	$L$xts_dec_steal
2568
2569	movdqu	xmm15,XMMWORD PTR[r13]
2570	lea	rcx,QWORD PTR[32+rbp]
2571	pxor	xmm15,xmm5
2572	lea	rdx,QWORD PTR[32+rbp]
2573	movdqa	XMMWORD PTR[32+rbp],xmm15
2574	lea	r8,QWORD PTR[r15]
2575	call	asm_AES_decrypt
2576	pxor	xmm5,XMMWORD PTR[32+rbp]
2577	movdqu	XMMWORD PTR[r13],xmm5
2578
2579$L$xts_dec_ret::
2580	lea	rax,QWORD PTR[rsp]
2581	pxor	xmm0,xmm0
2582$L$xts_dec_bzero::
2583	movdqa	XMMWORD PTR[rax],xmm0
2584	movdqa	XMMWORD PTR[16+rax],xmm0
2585	lea	rax,QWORD PTR[32+rax]
2586	cmp	rbp,rax
2587	ja	$L$xts_dec_bzero
2588
2589	lea	rsp,QWORD PTR[rbp]
2590	movaps	xmm6,XMMWORD PTR[64+rbp]
2591	movaps	xmm7,XMMWORD PTR[80+rbp]
2592	movaps	xmm8,XMMWORD PTR[96+rbp]
2593	movaps	xmm9,XMMWORD PTR[112+rbp]
2594	movaps	xmm10,XMMWORD PTR[128+rbp]
2595	movaps	xmm11,XMMWORD PTR[144+rbp]
2596	movaps	xmm12,XMMWORD PTR[160+rbp]
2597	movaps	xmm13,XMMWORD PTR[176+rbp]
2598	movaps	xmm14,XMMWORD PTR[192+rbp]
2599	movaps	xmm15,XMMWORD PTR[208+rbp]
2600	lea	rsp,QWORD PTR[160+rbp]
2601	mov	r15,QWORD PTR[72+rsp]
2602	mov	r14,QWORD PTR[80+rsp]
2603	mov	r13,QWORD PTR[88+rsp]
2604	mov	r12,QWORD PTR[96+rsp]
2605	mov	rbx,QWORD PTR[104+rsp]
2606	mov	rax,QWORD PTR[112+rsp]
2607	lea	rsp,QWORD PTR[120+rsp]
2608	mov	rbp,rax
2609$L$xts_dec_epilogue::
2610	DB	0F3h,0C3h		;repret
2611bsaes_xts_decrypt	ENDP
2612
2613ALIGN	64
2614_bsaes_const::
2615$L$M0ISR::
2616	DQ	00a0e0206070b0f03h,00004080c0d010509h
2617$L$ISRM0::
2618	DQ	001040b0e0205080fh,00306090c00070a0dh
2619$L$ISR::
2620	DQ	00504070602010003h,00f0e0d0c080b0a09h
2621$L$BS0::
2622	DQ	05555555555555555h,05555555555555555h
2623$L$BS1::
2624	DQ	03333333333333333h,03333333333333333h
2625$L$BS2::
2626	DQ	00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh
2627$L$SR::
2628	DQ	00504070600030201h,00f0e0d0c0a09080bh
2629$L$SRM0::
2630	DQ	00304090e00050a0fh,001060b0c0207080dh
2631$L$M0SR::
2632	DQ	00a0e02060f03070bh,00004080c05090d01h
2633$L$SWPUP::
2634	DQ	00706050403020100h,00c0d0e0f0b0a0908h
2635$L$SWPUPM0SR::
2636	DQ	00a0d02060c03070bh,00004080f05090e01h
2637$L$ADD1::
2638	DQ	00000000000000000h,00000000100000000h
2639$L$ADD2::
2640	DQ	00000000000000000h,00000000200000000h
2641$L$ADD3::
2642	DQ	00000000000000000h,00000000300000000h
2643$L$ADD4::
2644	DQ	00000000000000000h,00000000400000000h
2645$L$ADD5::
2646	DQ	00000000000000000h,00000000500000000h
2647$L$ADD6::
2648	DQ	00000000000000000h,00000000600000000h
2649$L$ADD7::
2650	DQ	00000000000000000h,00000000700000000h
2651$L$ADD8::
2652	DQ	00000000000000000h,00000000800000000h
2653$L$xts_magic::
2654	DD	087h,0,1,0
2655$L$masks::
2656	DQ	00101010101010101h,00101010101010101h
2657	DQ	00202020202020202h,00202020202020202h
2658	DQ	00404040404040404h,00404040404040404h
2659	DQ	00808080808080808h,00808080808080808h
2660$L$M0::
2661	DQ	002060a0e03070b0fh,00004080c0105090dh
2662$L$63::
2663	DQ	06363636363636363h,06363636363636363h
2664DB	66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
2665DB	111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
2666DB	32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
2667DB	32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
2668DB	65,110,100,121,32,80,111,108,121,97,107,111,118,0
2669ALIGN	64
2670
2671EXTERN	__imp_RtlVirtualUnwind:NEAR
2672
2673ALIGN	16
2674se_handler	PROC PRIVATE
2675	push	rsi
2676	push	rdi
2677	push	rbx
2678	push	rbp
2679	push	r12
2680	push	r13
2681	push	r14
2682	push	r15
2683	pushfq
2684	sub	rsp,64
2685
2686	mov	rax,QWORD PTR[120+r8]
2687	mov	rbx,QWORD PTR[248+r8]
2688
2689	mov	rsi,QWORD PTR[8+r9]
2690	mov	r11,QWORD PTR[56+r9]
2691
2692	mov	r10d,DWORD PTR[r11]
2693	lea	r10,QWORD PTR[r10*1+rsi]
2694	cmp	rbx,r10
2695	jb	$L$in_prologue
2696
2697	mov	rax,QWORD PTR[152+r8]
2698
2699	mov	r10d,DWORD PTR[4+r11]
2700	lea	r10,QWORD PTR[r10*1+rsi]
2701	cmp	rbx,r10
2702	jae	$L$in_prologue
2703
2704	mov	rax,QWORD PTR[160+r8]
2705
2706	lea	rsi,QWORD PTR[64+rax]
2707	lea	rdi,QWORD PTR[512+r8]
2708	mov	ecx,20
2709	DD	0a548f3fch
2710	lea	rax,QWORD PTR[160+rax]
2711
2712	mov	rbp,QWORD PTR[112+rax]
2713	mov	rbx,QWORD PTR[104+rax]
2714	mov	r12,QWORD PTR[96+rax]
2715	mov	r13,QWORD PTR[88+rax]
2716	mov	r14,QWORD PTR[80+rax]
2717	mov	r15,QWORD PTR[72+rax]
2718	lea	rax,QWORD PTR[120+rax]
2719	mov	QWORD PTR[144+r8],rbx
2720	mov	QWORD PTR[160+r8],rbp
2721	mov	QWORD PTR[216+r8],r12
2722	mov	QWORD PTR[224+r8],r13
2723	mov	QWORD PTR[232+r8],r14
2724	mov	QWORD PTR[240+r8],r15
2725
2726$L$in_prologue::
2727	mov	QWORD PTR[152+r8],rax
2728
2729	mov	rdi,QWORD PTR[40+r9]
2730	mov	rsi,r8
2731	mov	ecx,154
2732	DD	0a548f3fch
2733
2734	mov	rsi,r9
2735	xor	rcx,rcx
2736	mov	rdx,QWORD PTR[8+rsi]
2737	mov	r8,QWORD PTR[rsi]
2738	mov	r9,QWORD PTR[16+rsi]
2739	mov	r10,QWORD PTR[40+rsi]
2740	lea	r11,QWORD PTR[56+rsi]
2741	lea	r12,QWORD PTR[24+rsi]
2742	mov	QWORD PTR[32+rsp],r10
2743	mov	QWORD PTR[40+rsp],r11
2744	mov	QWORD PTR[48+rsp],r12
2745	mov	QWORD PTR[56+rsp],rcx
2746	call	QWORD PTR[__imp_RtlVirtualUnwind]
2747
2748	mov	eax,1
2749	add	rsp,64
2750	popfq
2751	pop	r15
2752	pop	r14
2753	pop	r13
2754	pop	r12
2755	pop	rbp
2756	pop	rbx
2757	pop	rdi
2758	pop	rsi
2759	DB	0F3h,0C3h		;repret
2760se_handler	ENDP
2761
2762.text$	ENDS
2763.pdata	SEGMENT READONLY ALIGN(4)
2764ALIGN	4
2765	DD	imagerel $L$cbc_dec_prologue
2766	DD	imagerel $L$cbc_dec_epilogue
2767	DD	imagerel $L$cbc_dec_info
2768
2769	DD	imagerel $L$ctr_enc_prologue
2770	DD	imagerel $L$ctr_enc_epilogue
2771	DD	imagerel $L$ctr_enc_info
2772
2773	DD	imagerel $L$xts_enc_prologue
2774	DD	imagerel $L$xts_enc_epilogue
2775	DD	imagerel $L$xts_enc_info
2776
2777	DD	imagerel $L$xts_dec_prologue
2778	DD	imagerel $L$xts_dec_epilogue
2779	DD	imagerel $L$xts_dec_info
2780
2781.pdata	ENDS
2782.xdata	SEGMENT READONLY ALIGN(8)
2783ALIGN	8
2784$L$cbc_dec_info::
2785DB	9,0,0,0
2786	DD	imagerel se_handler
2787	DD	imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue
2788$L$ctr_enc_info::
2789DB	9,0,0,0
2790	DD	imagerel se_handler
2791	DD	imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue
2792$L$xts_enc_info::
2793DB	9,0,0,0
2794	DD	imagerel se_handler
2795	DD	imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
2796$L$xts_dec_info::
2797DB	9,0,0,0
2798	DD	imagerel se_handler
2799	DD	imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
2800
2801.xdata	ENDS
2802END
2803
2804