1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10%ifdef __YASM_VERSION_ID__
11%if __YASM_VERSION_ID__ < 01010000h
12%error yasm version 1.1.0 or later needed.
13%endif
14; Yasm automatically includes .00 and complains about redefining it.
15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
16%else
17$@feat.00 equ 1
18%endif
19section	.text	code align=64
20%else
21section	.text	code
22%endif
23;extern	_GFp_ia32cap_P
24global	_GFp_aes_hw_encrypt
25align	16
26_GFp_aes_hw_encrypt:
27L$_GFp_aes_hw_encrypt_begin:
28	mov	eax,DWORD [4+esp]
29	mov	edx,DWORD [12+esp]
30	movups	xmm2,[eax]
31	mov	ecx,DWORD [240+edx]
32	mov	eax,DWORD [8+esp]
33	movups	xmm0,[edx]
34	movups	xmm1,[16+edx]
35	lea	edx,[32+edx]
36	xorps	xmm2,xmm0
37L$000enc1_loop_1:
38db	102,15,56,220,209
39	dec	ecx
40	movups	xmm1,[edx]
41	lea	edx,[16+edx]
42	jnz	NEAR L$000enc1_loop_1
43db	102,15,56,221,209
44	pxor	xmm0,xmm0
45	pxor	xmm1,xmm1
46	movups	[eax],xmm2
47	pxor	xmm2,xmm2
48	ret
49align	16
50__aesni_encrypt2:
51	movups	xmm0,[edx]
52	shl	ecx,4
53	movups	xmm1,[16+edx]
54	xorps	xmm2,xmm0
55	pxor	xmm3,xmm0
56	movups	xmm0,[32+edx]
57	lea	edx,[32+ecx*1+edx]
58	neg	ecx
59	add	ecx,16
60L$001enc2_loop:
61db	102,15,56,220,209
62db	102,15,56,220,217
63	movups	xmm1,[ecx*1+edx]
64	add	ecx,32
65db	102,15,56,220,208
66db	102,15,56,220,216
67	movups	xmm0,[ecx*1+edx-16]
68	jnz	NEAR L$001enc2_loop
69db	102,15,56,220,209
70db	102,15,56,220,217
71db	102,15,56,221,208
72db	102,15,56,221,216
73	ret
74align	16
75__aesni_encrypt3:
76	movups	xmm0,[edx]
77	shl	ecx,4
78	movups	xmm1,[16+edx]
79	xorps	xmm2,xmm0
80	pxor	xmm3,xmm0
81	pxor	xmm4,xmm0
82	movups	xmm0,[32+edx]
83	lea	edx,[32+ecx*1+edx]
84	neg	ecx
85	add	ecx,16
86L$002enc3_loop:
87db	102,15,56,220,209
88db	102,15,56,220,217
89db	102,15,56,220,225
90	movups	xmm1,[ecx*1+edx]
91	add	ecx,32
92db	102,15,56,220,208
93db	102,15,56,220,216
94db	102,15,56,220,224
95	movups	xmm0,[ecx*1+edx-16]
96	jnz	NEAR L$002enc3_loop
97db	102,15,56,220,209
98db	102,15,56,220,217
99db	102,15,56,220,225
100db	102,15,56,221,208
101db	102,15,56,221,216
102db	102,15,56,221,224
103	ret
104align	16
105__aesni_encrypt4:
106	movups	xmm0,[edx]
107	movups	xmm1,[16+edx]
108	shl	ecx,4
109	xorps	xmm2,xmm0
110	pxor	xmm3,xmm0
111	pxor	xmm4,xmm0
112	pxor	xmm5,xmm0
113	movups	xmm0,[32+edx]
114	lea	edx,[32+ecx*1+edx]
115	neg	ecx
116db	15,31,64,0
117	add	ecx,16
118L$003enc4_loop:
119db	102,15,56,220,209
120db	102,15,56,220,217
121db	102,15,56,220,225
122db	102,15,56,220,233
123	movups	xmm1,[ecx*1+edx]
124	add	ecx,32
125db	102,15,56,220,208
126db	102,15,56,220,216
127db	102,15,56,220,224
128db	102,15,56,220,232
129	movups	xmm0,[ecx*1+edx-16]
130	jnz	NEAR L$003enc4_loop
131db	102,15,56,220,209
132db	102,15,56,220,217
133db	102,15,56,220,225
134db	102,15,56,220,233
135db	102,15,56,221,208
136db	102,15,56,221,216
137db	102,15,56,221,224
138db	102,15,56,221,232
139	ret
140align	16
141__aesni_encrypt6:
142	movups	xmm0,[edx]
143	shl	ecx,4
144	movups	xmm1,[16+edx]
145	xorps	xmm2,xmm0
146	pxor	xmm3,xmm0
147	pxor	xmm4,xmm0
148db	102,15,56,220,209
149	pxor	xmm5,xmm0
150	pxor	xmm6,xmm0
151db	102,15,56,220,217
152	lea	edx,[32+ecx*1+edx]
153	neg	ecx
154db	102,15,56,220,225
155	pxor	xmm7,xmm0
156	movups	xmm0,[ecx*1+edx]
157	add	ecx,16
158	jmp	NEAR L$004_aesni_encrypt6_inner
159align	16
160L$005enc6_loop:
161db	102,15,56,220,209
162db	102,15,56,220,217
163db	102,15,56,220,225
164L$004_aesni_encrypt6_inner:
165db	102,15,56,220,233
166db	102,15,56,220,241
167db	102,15,56,220,249
168L$_aesni_encrypt6_enter:
169	movups	xmm1,[ecx*1+edx]
170	add	ecx,32
171db	102,15,56,220,208
172db	102,15,56,220,216
173db	102,15,56,220,224
174db	102,15,56,220,232
175db	102,15,56,220,240
176db	102,15,56,220,248
177	movups	xmm0,[ecx*1+edx-16]
178	jnz	NEAR L$005enc6_loop
179db	102,15,56,220,209
180db	102,15,56,220,217
181db	102,15,56,220,225
182db	102,15,56,220,233
183db	102,15,56,220,241
184db	102,15,56,220,249
185db	102,15,56,221,208
186db	102,15,56,221,216
187db	102,15,56,221,224
188db	102,15,56,221,232
189db	102,15,56,221,240
190db	102,15,56,221,248
191	ret
192global	_GFp_aes_hw_ctr32_encrypt_blocks
193align	16
194_GFp_aes_hw_ctr32_encrypt_blocks:
195L$_GFp_aes_hw_ctr32_encrypt_blocks_begin:
196	push	ebp
197	push	ebx
198	push	esi
199	push	edi
200	mov	esi,DWORD [20+esp]
201	mov	edi,DWORD [24+esp]
202	mov	eax,DWORD [28+esp]
203	mov	edx,DWORD [32+esp]
204	mov	ebx,DWORD [36+esp]
205	mov	ebp,esp
206	sub	esp,88
207	and	esp,-16
208	mov	DWORD [80+esp],ebp
209	cmp	eax,1
210	je	NEAR L$006ctr32_one_shortcut
211	movdqu	xmm7,[ebx]
212	mov	DWORD [esp],202182159
213	mov	DWORD [4+esp],134810123
214	mov	DWORD [8+esp],67438087
215	mov	DWORD [12+esp],66051
216	mov	ecx,6
217	xor	ebp,ebp
218	mov	DWORD [16+esp],ecx
219	mov	DWORD [20+esp],ecx
220	mov	DWORD [24+esp],ecx
221	mov	DWORD [28+esp],ebp
222db	102,15,58,22,251,3
223db	102,15,58,34,253,3
224	mov	ecx,DWORD [240+edx]
225	bswap	ebx
226	pxor	xmm0,xmm0
227	pxor	xmm1,xmm1
228	movdqa	xmm2,[esp]
229db	102,15,58,34,195,0
230	lea	ebp,[3+ebx]
231db	102,15,58,34,205,0
232	inc	ebx
233db	102,15,58,34,195,1
234	inc	ebp
235db	102,15,58,34,205,1
236	inc	ebx
237db	102,15,58,34,195,2
238	inc	ebp
239db	102,15,58,34,205,2
240	movdqa	[48+esp],xmm0
241db	102,15,56,0,194
242	movdqu	xmm6,[edx]
243	movdqa	[64+esp],xmm1
244db	102,15,56,0,202
245	pshufd	xmm2,xmm0,192
246	pshufd	xmm3,xmm0,128
247	cmp	eax,6
248	jb	NEAR L$007ctr32_tail
249	pxor	xmm7,xmm6
250	shl	ecx,4
251	mov	ebx,16
252	movdqa	[32+esp],xmm7
253	mov	ebp,edx
254	sub	ebx,ecx
255	lea	edx,[32+ecx*1+edx]
256	sub	eax,6
257	jmp	NEAR L$008ctr32_loop6
258align	16
259L$008ctr32_loop6:
260	pshufd	xmm4,xmm0,64
261	movdqa	xmm0,[32+esp]
262	pshufd	xmm5,xmm1,192
263	pxor	xmm2,xmm0
264	pshufd	xmm6,xmm1,128
265	pxor	xmm3,xmm0
266	pshufd	xmm7,xmm1,64
267	movups	xmm1,[16+ebp]
268	pxor	xmm4,xmm0
269	pxor	xmm5,xmm0
270db	102,15,56,220,209
271	pxor	xmm6,xmm0
272	pxor	xmm7,xmm0
273db	102,15,56,220,217
274	movups	xmm0,[32+ebp]
275	mov	ecx,ebx
276db	102,15,56,220,225
277db	102,15,56,220,233
278db	102,15,56,220,241
279db	102,15,56,220,249
280	call	L$_aesni_encrypt6_enter
281	movups	xmm1,[esi]
282	movups	xmm0,[16+esi]
283	xorps	xmm2,xmm1
284	movups	xmm1,[32+esi]
285	xorps	xmm3,xmm0
286	movups	[edi],xmm2
287	movdqa	xmm0,[16+esp]
288	xorps	xmm4,xmm1
289	movdqa	xmm1,[64+esp]
290	movups	[16+edi],xmm3
291	movups	[32+edi],xmm4
292	paddd	xmm1,xmm0
293	paddd	xmm0,[48+esp]
294	movdqa	xmm2,[esp]
295	movups	xmm3,[48+esi]
296	movups	xmm4,[64+esi]
297	xorps	xmm5,xmm3
298	movups	xmm3,[80+esi]
299	lea	esi,[96+esi]
300	movdqa	[48+esp],xmm0
301db	102,15,56,0,194
302	xorps	xmm6,xmm4
303	movups	[48+edi],xmm5
304	xorps	xmm7,xmm3
305	movdqa	[64+esp],xmm1
306db	102,15,56,0,202
307	movups	[64+edi],xmm6
308	pshufd	xmm2,xmm0,192
309	movups	[80+edi],xmm7
310	lea	edi,[96+edi]
311	pshufd	xmm3,xmm0,128
312	sub	eax,6
313	jnc	NEAR L$008ctr32_loop6
314	add	eax,6
315	jz	NEAR L$009ctr32_ret
316	movdqu	xmm7,[ebp]
317	mov	edx,ebp
318	pxor	xmm7,[32+esp]
319	mov	ecx,DWORD [240+ebp]
320L$007ctr32_tail:
321	por	xmm2,xmm7
322	cmp	eax,2
323	jb	NEAR L$010ctr32_one
324	pshufd	xmm4,xmm0,64
325	por	xmm3,xmm7
326	je	NEAR L$011ctr32_two
327	pshufd	xmm5,xmm1,192
328	por	xmm4,xmm7
329	cmp	eax,4
330	jb	NEAR L$012ctr32_three
331	pshufd	xmm6,xmm1,128
332	por	xmm5,xmm7
333	je	NEAR L$013ctr32_four
334	por	xmm6,xmm7
335	call	__aesni_encrypt6
336	movups	xmm1,[esi]
337	movups	xmm0,[16+esi]
338	xorps	xmm2,xmm1
339	movups	xmm1,[32+esi]
340	xorps	xmm3,xmm0
341	movups	xmm0,[48+esi]
342	xorps	xmm4,xmm1
343	movups	xmm1,[64+esi]
344	xorps	xmm5,xmm0
345	movups	[edi],xmm2
346	xorps	xmm6,xmm1
347	movups	[16+edi],xmm3
348	movups	[32+edi],xmm4
349	movups	[48+edi],xmm5
350	movups	[64+edi],xmm6
351	jmp	NEAR L$009ctr32_ret
352align	16
353L$006ctr32_one_shortcut:
354	movups	xmm2,[ebx]
355	mov	ecx,DWORD [240+edx]
356L$010ctr32_one:
357	movups	xmm0,[edx]
358	movups	xmm1,[16+edx]
359	lea	edx,[32+edx]
360	xorps	xmm2,xmm0
361L$014enc1_loop_2:
362db	102,15,56,220,209
363	dec	ecx
364	movups	xmm1,[edx]
365	lea	edx,[16+edx]
366	jnz	NEAR L$014enc1_loop_2
367db	102,15,56,221,209
368	movups	xmm6,[esi]
369	xorps	xmm6,xmm2
370	movups	[edi],xmm6
371	jmp	NEAR L$009ctr32_ret
372align	16
373L$011ctr32_two:
374	call	__aesni_encrypt2
375	movups	xmm5,[esi]
376	movups	xmm6,[16+esi]
377	xorps	xmm2,xmm5
378	xorps	xmm3,xmm6
379	movups	[edi],xmm2
380	movups	[16+edi],xmm3
381	jmp	NEAR L$009ctr32_ret
382align	16
383L$012ctr32_three:
384	call	__aesni_encrypt3
385	movups	xmm5,[esi]
386	movups	xmm6,[16+esi]
387	xorps	xmm2,xmm5
388	movups	xmm7,[32+esi]
389	xorps	xmm3,xmm6
390	movups	[edi],xmm2
391	xorps	xmm4,xmm7
392	movups	[16+edi],xmm3
393	movups	[32+edi],xmm4
394	jmp	NEAR L$009ctr32_ret
395align	16
396L$013ctr32_four:
397	call	__aesni_encrypt4
398	movups	xmm6,[esi]
399	movups	xmm7,[16+esi]
400	movups	xmm1,[32+esi]
401	xorps	xmm2,xmm6
402	movups	xmm0,[48+esi]
403	xorps	xmm3,xmm7
404	movups	[edi],xmm2
405	xorps	xmm4,xmm1
406	movups	[16+edi],xmm3
407	xorps	xmm5,xmm0
408	movups	[32+edi],xmm4
409	movups	[48+edi],xmm5
410L$009ctr32_ret:
411	pxor	xmm0,xmm0
412	pxor	xmm1,xmm1
413	pxor	xmm2,xmm2
414	pxor	xmm3,xmm3
415	pxor	xmm4,xmm4
416	movdqa	[32+esp],xmm0
417	pxor	xmm5,xmm5
418	movdqa	[48+esp],xmm0
419	pxor	xmm6,xmm6
420	movdqa	[64+esp],xmm0
421	pxor	xmm7,xmm7
422	mov	esp,DWORD [80+esp]
423	pop	edi
424	pop	esi
425	pop	ebx
426	pop	ebp
427	ret
428align	16
429__aesni_set_encrypt_key:
430	push	ebp
431	push	ebx
432	test	eax,eax
433	jz	NEAR L$015bad_pointer
434	test	edx,edx
435	jz	NEAR L$015bad_pointer
436	call	L$016pic
437L$016pic:
438	pop	ebx
439	lea	ebx,[(L$key_const-L$016pic)+ebx]
440	lea	ebp,[_GFp_ia32cap_P]
441	movups	xmm0,[eax]
442	xorps	xmm4,xmm4
443	mov	ebp,DWORD [4+ebp]
444	lea	edx,[16+edx]
445	and	ebp,268437504
446	cmp	ecx,256
447	je	NEAR L$01714rounds
448	cmp	ecx,128
449	jne	NEAR L$018bad_keybits
450align	16
451L$01910rounds:
452	cmp	ebp,268435456
453	je	NEAR L$02010rounds_alt
454	mov	ecx,9
455	movups	[edx-16],xmm0
456db	102,15,58,223,200,1
457	call	L$021key_128_cold
458db	102,15,58,223,200,2
459	call	L$022key_128
460db	102,15,58,223,200,4
461	call	L$022key_128
462db	102,15,58,223,200,8
463	call	L$022key_128
464db	102,15,58,223,200,16
465	call	L$022key_128
466db	102,15,58,223,200,32
467	call	L$022key_128
468db	102,15,58,223,200,64
469	call	L$022key_128
470db	102,15,58,223,200,128
471	call	L$022key_128
472db	102,15,58,223,200,27
473	call	L$022key_128
474db	102,15,58,223,200,54
475	call	L$022key_128
476	movups	[edx],xmm0
477	mov	DWORD [80+edx],ecx
478	jmp	NEAR L$023good_key
479align	16
480L$022key_128:
481	movups	[edx],xmm0
482	lea	edx,[16+edx]
483L$021key_128_cold:
484	shufps	xmm4,xmm0,16
485	xorps	xmm0,xmm4
486	shufps	xmm4,xmm0,140
487	xorps	xmm0,xmm4
488	shufps	xmm1,xmm1,255
489	xorps	xmm0,xmm1
490	ret
491align	16
492L$02010rounds_alt:
493	movdqa	xmm5,[ebx]
494	mov	ecx,8
495	movdqa	xmm4,[32+ebx]
496	movdqa	xmm2,xmm0
497	movdqu	[edx-16],xmm0
498L$024loop_key128:
499db	102,15,56,0,197
500db	102,15,56,221,196
501	pslld	xmm4,1
502	lea	edx,[16+edx]
503	movdqa	xmm3,xmm2
504	pslldq	xmm2,4
505	pxor	xmm3,xmm2
506	pslldq	xmm2,4
507	pxor	xmm3,xmm2
508	pslldq	xmm2,4
509	pxor	xmm2,xmm3
510	pxor	xmm0,xmm2
511	movdqu	[edx-16],xmm0
512	movdqa	xmm2,xmm0
513	dec	ecx
514	jnz	NEAR L$024loop_key128
515	movdqa	xmm4,[48+ebx]
516db	102,15,56,0,197
517db	102,15,56,221,196
518	pslld	xmm4,1
519	movdqa	xmm3,xmm2
520	pslldq	xmm2,4
521	pxor	xmm3,xmm2
522	pslldq	xmm2,4
523	pxor	xmm3,xmm2
524	pslldq	xmm2,4
525	pxor	xmm2,xmm3
526	pxor	xmm0,xmm2
527	movdqu	[edx],xmm0
528	movdqa	xmm2,xmm0
529db	102,15,56,0,197
530db	102,15,56,221,196
531	movdqa	xmm3,xmm2
532	pslldq	xmm2,4
533	pxor	xmm3,xmm2
534	pslldq	xmm2,4
535	pxor	xmm3,xmm2
536	pslldq	xmm2,4
537	pxor	xmm2,xmm3
538	pxor	xmm0,xmm2
539	movdqu	[16+edx],xmm0
540	mov	ecx,9
541	mov	DWORD [96+edx],ecx
542	jmp	NEAR L$023good_key
543align	16
544L$01714rounds:
545	movups	xmm2,[16+eax]
546	lea	edx,[16+edx]
547	cmp	ebp,268435456
548	je	NEAR L$02514rounds_alt
549	mov	ecx,13
550	movups	[edx-32],xmm0
551	movups	[edx-16],xmm2
552db	102,15,58,223,202,1
553	call	L$026key_256a_cold
554db	102,15,58,223,200,1
555	call	L$027key_256b
556db	102,15,58,223,202,2
557	call	L$028key_256a
558db	102,15,58,223,200,2
559	call	L$027key_256b
560db	102,15,58,223,202,4
561	call	L$028key_256a
562db	102,15,58,223,200,4
563	call	L$027key_256b
564db	102,15,58,223,202,8
565	call	L$028key_256a
566db	102,15,58,223,200,8
567	call	L$027key_256b
568db	102,15,58,223,202,16
569	call	L$028key_256a
570db	102,15,58,223,200,16
571	call	L$027key_256b
572db	102,15,58,223,202,32
573	call	L$028key_256a
574db	102,15,58,223,200,32
575	call	L$027key_256b
576db	102,15,58,223,202,64
577	call	L$028key_256a
578	movups	[edx],xmm0
579	mov	DWORD [16+edx],ecx
580	xor	eax,eax
581	jmp	NEAR L$023good_key
582align	16
583L$028key_256a:
584	movups	[edx],xmm2
585	lea	edx,[16+edx]
586L$026key_256a_cold:
587	shufps	xmm4,xmm0,16
588	xorps	xmm0,xmm4
589	shufps	xmm4,xmm0,140
590	xorps	xmm0,xmm4
591	shufps	xmm1,xmm1,255
592	xorps	xmm0,xmm1
593	ret
594align	16
595L$027key_256b:
596	movups	[edx],xmm0
597	lea	edx,[16+edx]
598	shufps	xmm4,xmm2,16
599	xorps	xmm2,xmm4
600	shufps	xmm4,xmm2,140
601	xorps	xmm2,xmm4
602	shufps	xmm1,xmm1,170
603	xorps	xmm2,xmm1
604	ret
605align	16
606L$02514rounds_alt:
607	movdqa	xmm5,[ebx]
608	movdqa	xmm4,[32+ebx]
609	mov	ecx,7
610	movdqu	[edx-32],xmm0
611	movdqa	xmm1,xmm2
612	movdqu	[edx-16],xmm2
613L$029loop_key256:
614db	102,15,56,0,213
615db	102,15,56,221,212
616	movdqa	xmm3,xmm0
617	pslldq	xmm0,4
618	pxor	xmm3,xmm0
619	pslldq	xmm0,4
620	pxor	xmm3,xmm0
621	pslldq	xmm0,4
622	pxor	xmm0,xmm3
623	pslld	xmm4,1
624	pxor	xmm0,xmm2
625	movdqu	[edx],xmm0
626	dec	ecx
627	jz	NEAR L$030done_key256
628	pshufd	xmm2,xmm0,255
629	pxor	xmm3,xmm3
630db	102,15,56,221,211
631	movdqa	xmm3,xmm1
632	pslldq	xmm1,4
633	pxor	xmm3,xmm1
634	pslldq	xmm1,4
635	pxor	xmm3,xmm1
636	pslldq	xmm1,4
637	pxor	xmm1,xmm3
638	pxor	xmm2,xmm1
639	movdqu	[16+edx],xmm2
640	lea	edx,[32+edx]
641	movdqa	xmm1,xmm2
642	jmp	NEAR L$029loop_key256
643L$030done_key256:
644	mov	ecx,13
645	mov	DWORD [16+edx],ecx
646L$023good_key:
647	pxor	xmm0,xmm0
648	pxor	xmm1,xmm1
649	pxor	xmm2,xmm2
650	pxor	xmm3,xmm3
651	pxor	xmm4,xmm4
652	pxor	xmm5,xmm5
653	xor	eax,eax
654	pop	ebx
655	pop	ebp
656	ret
657align	4
658L$015bad_pointer:
659	mov	eax,-1
660	pop	ebx
661	pop	ebp
662	ret
663align	4
664L$018bad_keybits:
665	pxor	xmm0,xmm0
666	mov	eax,-2
667	pop	ebx
668	pop	ebp
669	ret
670global	_GFp_aes_hw_set_encrypt_key
671align	16
672_GFp_aes_hw_set_encrypt_key:
673L$_GFp_aes_hw_set_encrypt_key_begin:
674	mov	eax,DWORD [4+esp]
675	mov	ecx,DWORD [8+esp]
676	mov	edx,DWORD [12+esp]
677	call	__aesni_set_encrypt_key
678	ret
679align	64
680L$key_const:
681dd	202313229,202313229,202313229,202313229
682dd	67569157,67569157,67569157,67569157
683dd	1,1,1,1
684dd	27,27,27,27
685db	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
686db	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
687db	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
688db	115,108,46,111,114,103,62,0
689segment	.bss
690common	_GFp_ia32cap_P 16
691