1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10%ifdef __YASM_VERSION_ID__
11%if __YASM_VERSION_ID__ < 01010000h
12%error yasm version 1.1.0 or later needed.
13%endif
14; Yasm automatically includes .00 and complains about redefining it.
15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
16%else
17$@feat.00 equ 1
18%endif
19section	.text	code align=64
20%else
21section	.text	code
22%endif
23global	_GFp_gcm_init_clmul
24align	16
25_GFp_gcm_init_clmul:
26L$_GFp_gcm_init_clmul_begin:
27	mov	edx,DWORD [4+esp]
28	mov	eax,DWORD [8+esp]
29	call	L$000pic
30L$000pic:
31	pop	ecx
32	lea	ecx,[(L$bswap-L$000pic)+ecx]
33	movdqu	xmm2,[eax]
34	pshufd	xmm2,xmm2,78
35	pshufd	xmm4,xmm2,255
36	movdqa	xmm3,xmm2
37	psllq	xmm2,1
38	pxor	xmm5,xmm5
39	psrlq	xmm3,63
40	pcmpgtd	xmm5,xmm4
41	pslldq	xmm3,8
42	por	xmm2,xmm3
43	pand	xmm5,[16+ecx]
44	pxor	xmm2,xmm5
45	movdqa	xmm0,xmm2
46	movdqa	xmm1,xmm0
47	pshufd	xmm3,xmm0,78
48	pshufd	xmm4,xmm2,78
49	pxor	xmm3,xmm0
50	pxor	xmm4,xmm2
51db	102,15,58,68,194,0
52db	102,15,58,68,202,17
53db	102,15,58,68,220,0
54	xorps	xmm3,xmm0
55	xorps	xmm3,xmm1
56	movdqa	xmm4,xmm3
57	psrldq	xmm3,8
58	pslldq	xmm4,8
59	pxor	xmm1,xmm3
60	pxor	xmm0,xmm4
61	movdqa	xmm4,xmm0
62	movdqa	xmm3,xmm0
63	psllq	xmm0,5
64	pxor	xmm3,xmm0
65	psllq	xmm0,1
66	pxor	xmm0,xmm3
67	psllq	xmm0,57
68	movdqa	xmm3,xmm0
69	pslldq	xmm0,8
70	psrldq	xmm3,8
71	pxor	xmm0,xmm4
72	pxor	xmm1,xmm3
73	movdqa	xmm4,xmm0
74	psrlq	xmm0,1
75	pxor	xmm1,xmm4
76	pxor	xmm4,xmm0
77	psrlq	xmm0,5
78	pxor	xmm0,xmm4
79	psrlq	xmm0,1
80	pxor	xmm0,xmm1
81	pshufd	xmm3,xmm2,78
82	pshufd	xmm4,xmm0,78
83	pxor	xmm3,xmm2
84	movdqu	[edx],xmm2
85	pxor	xmm4,xmm0
86	movdqu	[16+edx],xmm0
87db	102,15,58,15,227,8
88	movdqu	[32+edx],xmm4
89	ret
90global	_GFp_gcm_gmult_clmul
91align	16
92_GFp_gcm_gmult_clmul:
93L$_GFp_gcm_gmult_clmul_begin:
94	mov	eax,DWORD [4+esp]
95	mov	edx,DWORD [8+esp]
96	call	L$001pic
97L$001pic:
98	pop	ecx
99	lea	ecx,[(L$bswap-L$001pic)+ecx]
100	movdqu	xmm0,[eax]
101	movdqa	xmm5,[ecx]
102	movups	xmm2,[edx]
103db	102,15,56,0,197
104	movups	xmm4,[32+edx]
105	movdqa	xmm1,xmm0
106	pshufd	xmm3,xmm0,78
107	pxor	xmm3,xmm0
108db	102,15,58,68,194,0
109db	102,15,58,68,202,17
110db	102,15,58,68,220,0
111	xorps	xmm3,xmm0
112	xorps	xmm3,xmm1
113	movdqa	xmm4,xmm3
114	psrldq	xmm3,8
115	pslldq	xmm4,8
116	pxor	xmm1,xmm3
117	pxor	xmm0,xmm4
118	movdqa	xmm4,xmm0
119	movdqa	xmm3,xmm0
120	psllq	xmm0,5
121	pxor	xmm3,xmm0
122	psllq	xmm0,1
123	pxor	xmm0,xmm3
124	psllq	xmm0,57
125	movdqa	xmm3,xmm0
126	pslldq	xmm0,8
127	psrldq	xmm3,8
128	pxor	xmm0,xmm4
129	pxor	xmm1,xmm3
130	movdqa	xmm4,xmm0
131	psrlq	xmm0,1
132	pxor	xmm1,xmm4
133	pxor	xmm4,xmm0
134	psrlq	xmm0,5
135	pxor	xmm0,xmm4
136	psrlq	xmm0,1
137	pxor	xmm0,xmm1
138db	102,15,56,0,197
139	movdqu	[eax],xmm0
140	ret
141global	_GFp_gcm_ghash_clmul
142align	16
143_GFp_gcm_ghash_clmul:
144L$_GFp_gcm_ghash_clmul_begin:
145	push	ebp
146	push	ebx
147	push	esi
148	push	edi
149	mov	eax,DWORD [20+esp]
150	mov	edx,DWORD [24+esp]
151	mov	esi,DWORD [28+esp]
152	mov	ebx,DWORD [32+esp]
153	call	L$002pic
154L$002pic:
155	pop	ecx
156	lea	ecx,[(L$bswap-L$002pic)+ecx]
157	movdqu	xmm0,[eax]
158	movdqa	xmm5,[ecx]
159	movdqu	xmm2,[edx]
160db	102,15,56,0,197
161	sub	ebx,16
162	jz	NEAR L$003odd_tail
163	movdqu	xmm3,[esi]
164	movdqu	xmm6,[16+esi]
165db	102,15,56,0,221
166db	102,15,56,0,245
167	movdqu	xmm5,[32+edx]
168	pxor	xmm0,xmm3
169	pshufd	xmm3,xmm6,78
170	movdqa	xmm7,xmm6
171	pxor	xmm3,xmm6
172	lea	esi,[32+esi]
173db	102,15,58,68,242,0
174db	102,15,58,68,250,17
175db	102,15,58,68,221,0
176	movups	xmm2,[16+edx]
177	nop
178	sub	ebx,32
179	jbe	NEAR L$004even_tail
180	jmp	NEAR L$005mod_loop
181align	32
182L$005mod_loop:
183	pshufd	xmm4,xmm0,78
184	movdqa	xmm1,xmm0
185	pxor	xmm4,xmm0
186	nop
187db	102,15,58,68,194,0
188db	102,15,58,68,202,17
189db	102,15,58,68,229,16
190	movups	xmm2,[edx]
191	xorps	xmm0,xmm6
192	movdqa	xmm5,[ecx]
193	xorps	xmm1,xmm7
194	movdqu	xmm7,[esi]
195	pxor	xmm3,xmm0
196	movdqu	xmm6,[16+esi]
197	pxor	xmm3,xmm1
198db	102,15,56,0,253
199	pxor	xmm4,xmm3
200	movdqa	xmm3,xmm4
201	psrldq	xmm4,8
202	pslldq	xmm3,8
203	pxor	xmm1,xmm4
204	pxor	xmm0,xmm3
205db	102,15,56,0,245
206	pxor	xmm1,xmm7
207	movdqa	xmm7,xmm6
208	movdqa	xmm4,xmm0
209	movdqa	xmm3,xmm0
210	psllq	xmm0,5
211	pxor	xmm3,xmm0
212	psllq	xmm0,1
213	pxor	xmm0,xmm3
214db	102,15,58,68,242,0
215	movups	xmm5,[32+edx]
216	psllq	xmm0,57
217	movdqa	xmm3,xmm0
218	pslldq	xmm0,8
219	psrldq	xmm3,8
220	pxor	xmm0,xmm4
221	pxor	xmm1,xmm3
222	pshufd	xmm3,xmm7,78
223	movdqa	xmm4,xmm0
224	psrlq	xmm0,1
225	pxor	xmm3,xmm7
226	pxor	xmm1,xmm4
227db	102,15,58,68,250,17
228	movups	xmm2,[16+edx]
229	pxor	xmm4,xmm0
230	psrlq	xmm0,5
231	pxor	xmm0,xmm4
232	psrlq	xmm0,1
233	pxor	xmm0,xmm1
234db	102,15,58,68,221,0
235	lea	esi,[32+esi]
236	sub	ebx,32
237	ja	NEAR L$005mod_loop
238L$004even_tail:
239	pshufd	xmm4,xmm0,78
240	movdqa	xmm1,xmm0
241	pxor	xmm4,xmm0
242db	102,15,58,68,194,0
243db	102,15,58,68,202,17
244db	102,15,58,68,229,16
245	movdqa	xmm5,[ecx]
246	xorps	xmm0,xmm6
247	xorps	xmm1,xmm7
248	pxor	xmm3,xmm0
249	pxor	xmm3,xmm1
250	pxor	xmm4,xmm3
251	movdqa	xmm3,xmm4
252	psrldq	xmm4,8
253	pslldq	xmm3,8
254	pxor	xmm1,xmm4
255	pxor	xmm0,xmm3
256	movdqa	xmm4,xmm0
257	movdqa	xmm3,xmm0
258	psllq	xmm0,5
259	pxor	xmm3,xmm0
260	psllq	xmm0,1
261	pxor	xmm0,xmm3
262	psllq	xmm0,57
263	movdqa	xmm3,xmm0
264	pslldq	xmm0,8
265	psrldq	xmm3,8
266	pxor	xmm0,xmm4
267	pxor	xmm1,xmm3
268	movdqa	xmm4,xmm0
269	psrlq	xmm0,1
270	pxor	xmm1,xmm4
271	pxor	xmm4,xmm0
272	psrlq	xmm0,5
273	pxor	xmm0,xmm4
274	psrlq	xmm0,1
275	pxor	xmm0,xmm1
276	test	ebx,ebx
277	jnz	NEAR L$006done
278	movups	xmm2,[edx]
279L$003odd_tail:
280	movdqu	xmm3,[esi]
281db	102,15,56,0,221
282	pxor	xmm0,xmm3
283	movdqa	xmm1,xmm0
284	pshufd	xmm3,xmm0,78
285	pshufd	xmm4,xmm2,78
286	pxor	xmm3,xmm0
287	pxor	xmm4,xmm2
288db	102,15,58,68,194,0
289db	102,15,58,68,202,17
290db	102,15,58,68,220,0
291	xorps	xmm3,xmm0
292	xorps	xmm3,xmm1
293	movdqa	xmm4,xmm3
294	psrldq	xmm3,8
295	pslldq	xmm4,8
296	pxor	xmm1,xmm3
297	pxor	xmm0,xmm4
298	movdqa	xmm4,xmm0
299	movdqa	xmm3,xmm0
300	psllq	xmm0,5
301	pxor	xmm3,xmm0
302	psllq	xmm0,1
303	pxor	xmm0,xmm3
304	psllq	xmm0,57
305	movdqa	xmm3,xmm0
306	pslldq	xmm0,8
307	psrldq	xmm3,8
308	pxor	xmm0,xmm4
309	pxor	xmm1,xmm3
310	movdqa	xmm4,xmm0
311	psrlq	xmm0,1
312	pxor	xmm1,xmm4
313	pxor	xmm4,xmm0
314	psrlq	xmm0,5
315	pxor	xmm0,xmm4
316	psrlq	xmm0,1
317	pxor	xmm0,xmm1
318L$006done:
319db	102,15,56,0,197
320	movdqu	[eax],xmm0
321	pop	edi
322	pop	esi
323	pop	ebx
324	pop	ebp
325	ret
326align	64
327L$bswap:
328db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
329db	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
330align	64
331L$007rem_8bit:
332dw	0,450,900,582,1800,1738,1164,1358
333dw	3600,4050,3476,3158,2328,2266,2716,2910
334dw	7200,7650,8100,7782,6952,6890,6316,6510
335dw	4656,5106,4532,4214,5432,5370,5820,6014
336dw	14400,14722,15300,14854,16200,16010,15564,15630
337dw	13904,14226,13780,13334,12632,12442,13020,13086
338dw	9312,9634,10212,9766,9064,8874,8428,8494
339dw	10864,11186,10740,10294,11640,11450,12028,12094
340dw	28800,28994,29444,29382,30600,30282,29708,30158
341dw	32400,32594,32020,31958,31128,30810,31260,31710
342dw	27808,28002,28452,28390,27560,27242,26668,27118
343dw	25264,25458,24884,24822,26040,25722,26172,26622
344dw	18624,18690,19268,19078,20424,19978,19532,19854
345dw	18128,18194,17748,17558,16856,16410,16988,17310
346dw	21728,21794,22372,22182,21480,21034,20588,20910
347dw	23280,23346,22900,22710,24056,23610,24188,24510
348dw	57600,57538,57988,58182,58888,59338,58764,58446
349dw	61200,61138,60564,60758,59416,59866,60316,59998
350dw	64800,64738,65188,65382,64040,64490,63916,63598
351dw	62256,62194,61620,61814,62520,62970,63420,63102
352dw	55616,55426,56004,56070,56904,57226,56780,56334
353dw	55120,54930,54484,54550,53336,53658,54236,53790
354dw	50528,50338,50916,50982,49768,50090,49644,49198
355dw	52080,51890,51444,51510,52344,52666,53244,52798
356dw	37248,36930,37380,37830,38536,38730,38156,38094
357dw	40848,40530,39956,40406,39064,39258,39708,39646
358dw	36256,35938,36388,36838,35496,35690,35116,35054
359dw	33712,33394,32820,33270,33976,34170,34620,34558
360dw	43456,43010,43588,43910,44744,44810,44364,44174
361dw	42960,42514,42068,42390,41176,41242,41820,41630
362dw	46560,46114,46692,47014,45800,45866,45420,45230
363dw	48112,47666,47220,47542,48376,48442,49020,48830
364db	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
365db	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
366db	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
367db	0
368