1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10%ifdef __YASM_VERSION_ID__
11%if __YASM_VERSION_ID__ < 01010000h
12%error yasm version 1.1.0 or later needed.
13%endif
14; Yasm automatically includes .00 and complains about redefining it.
15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
16%else
17$@feat.00 equ 1
18%endif
19section	.text	code align=64
20%else
21section	.text	code
22%endif
23;extern	_GFp_ia32cap_P
24global	_GFp_bn_mul_mont
25align	16
26_GFp_bn_mul_mont:
27L$_GFp_bn_mul_mont_begin:
28	push	ebp
29	push	ebx
30	push	esi
31	push	edi
32	xor	eax,eax
33	mov	edi,DWORD [40+esp]
34	lea	esi,[20+esp]
35	lea	edx,[24+esp]
36	add	edi,2
37	neg	edi
38	lea	ebp,[edi*4+esp-32]
39	neg	edi
40	mov	eax,ebp
41	sub	eax,edx
42	and	eax,2047
43	sub	ebp,eax
44	xor	edx,ebp
45	and	edx,2048
46	xor	edx,2048
47	sub	ebp,edx
48	and	ebp,-64
49	mov	eax,esp
50	sub	eax,ebp
51	and	eax,-4096
52	mov	edx,esp
53	lea	esp,[eax*1+ebp]
54	mov	eax,DWORD [esp]
55	cmp	esp,ebp
56	ja	NEAR L$000page_walk
57	jmp	NEAR L$001page_walk_done
58align	16
59L$000page_walk:
60	lea	esp,[esp-4096]
61	mov	eax,DWORD [esp]
62	cmp	esp,ebp
63	ja	NEAR L$000page_walk
64L$001page_walk_done:
65	mov	eax,DWORD [esi]
66	mov	ebx,DWORD [4+esi]
67	mov	ecx,DWORD [8+esi]
68	mov	ebp,DWORD [12+esi]
69	mov	esi,DWORD [16+esi]
70	mov	esi,DWORD [esi]
71	mov	DWORD [4+esp],eax
72	mov	DWORD [8+esp],ebx
73	mov	DWORD [12+esp],ecx
74	mov	DWORD [16+esp],ebp
75	mov	DWORD [20+esp],esi
76	lea	ebx,[edi-3]
77	mov	DWORD [24+esp],edx
78	lea	eax,[_GFp_ia32cap_P]
79	bt	DWORD [eax],26
80	mov	eax,-1
81	movd	mm7,eax
82	mov	esi,DWORD [8+esp]
83	mov	edi,DWORD [12+esp]
84	mov	ebp,DWORD [16+esp]
85	xor	edx,edx
86	xor	ecx,ecx
87	movd	mm4,DWORD [edi]
88	movd	mm5,DWORD [esi]
89	movd	mm3,DWORD [ebp]
90	pmuludq	mm5,mm4
91	movq	mm2,mm5
92	movq	mm0,mm5
93	pand	mm0,mm7
94	pmuludq	mm5,[20+esp]
95	pmuludq	mm3,mm5
96	paddq	mm3,mm0
97	movd	mm1,DWORD [4+ebp]
98	movd	mm0,DWORD [4+esi]
99	psrlq	mm2,32
100	psrlq	mm3,32
101	inc	ecx
102align	16
103L$0021st:
104	pmuludq	mm0,mm4
105	pmuludq	mm1,mm5
106	paddq	mm2,mm0
107	paddq	mm3,mm1
108	movq	mm0,mm2
109	pand	mm0,mm7
110	movd	mm1,DWORD [4+ecx*4+ebp]
111	paddq	mm3,mm0
112	movd	mm0,DWORD [4+ecx*4+esi]
113	psrlq	mm2,32
114	movd	DWORD [28+ecx*4+esp],mm3
115	psrlq	mm3,32
116	lea	ecx,[1+ecx]
117	cmp	ecx,ebx
118	jl	NEAR L$0021st
119	pmuludq	mm0,mm4
120	pmuludq	mm1,mm5
121	paddq	mm2,mm0
122	paddq	mm3,mm1
123	movq	mm0,mm2
124	pand	mm0,mm7
125	paddq	mm3,mm0
126	movd	DWORD [28+ecx*4+esp],mm3
127	psrlq	mm2,32
128	psrlq	mm3,32
129	paddq	mm3,mm2
130	movq	[32+ebx*4+esp],mm3
131	inc	edx
132L$003outer:
133	xor	ecx,ecx
134	movd	mm4,DWORD [edx*4+edi]
135	movd	mm5,DWORD [esi]
136	movd	mm6,DWORD [32+esp]
137	movd	mm3,DWORD [ebp]
138	pmuludq	mm5,mm4
139	paddq	mm5,mm6
140	movq	mm0,mm5
141	movq	mm2,mm5
142	pand	mm0,mm7
143	pmuludq	mm5,[20+esp]
144	pmuludq	mm3,mm5
145	paddq	mm3,mm0
146	movd	mm6,DWORD [36+esp]
147	movd	mm1,DWORD [4+ebp]
148	movd	mm0,DWORD [4+esi]
149	psrlq	mm2,32
150	psrlq	mm3,32
151	paddq	mm2,mm6
152	inc	ecx
153	dec	ebx
154L$004inner:
155	pmuludq	mm0,mm4
156	pmuludq	mm1,mm5
157	paddq	mm2,mm0
158	paddq	mm3,mm1
159	movq	mm0,mm2
160	movd	mm6,DWORD [36+ecx*4+esp]
161	pand	mm0,mm7
162	movd	mm1,DWORD [4+ecx*4+ebp]
163	paddq	mm3,mm0
164	movd	mm0,DWORD [4+ecx*4+esi]
165	psrlq	mm2,32
166	movd	DWORD [28+ecx*4+esp],mm3
167	psrlq	mm3,32
168	paddq	mm2,mm6
169	dec	ebx
170	lea	ecx,[1+ecx]
171	jnz	NEAR L$004inner
172	mov	ebx,ecx
173	pmuludq	mm0,mm4
174	pmuludq	mm1,mm5
175	paddq	mm2,mm0
176	paddq	mm3,mm1
177	movq	mm0,mm2
178	pand	mm0,mm7
179	paddq	mm3,mm0
180	movd	DWORD [28+ecx*4+esp],mm3
181	psrlq	mm2,32
182	psrlq	mm3,32
183	movd	mm6,DWORD [36+ebx*4+esp]
184	paddq	mm3,mm2
185	paddq	mm3,mm6
186	movq	[32+ebx*4+esp],mm3
187	lea	edx,[1+edx]
188	cmp	edx,ebx
189	jle	NEAR L$003outer
190	emms
191align	16
192L$005common_tail:
193	mov	ebp,DWORD [16+esp]
194	mov	edi,DWORD [4+esp]
195	lea	esi,[32+esp]
196	mov	eax,DWORD [esi]
197	mov	ecx,ebx
198	xor	edx,edx
199align	16
200L$006sub:
201	sbb	eax,DWORD [edx*4+ebp]
202	mov	DWORD [edx*4+edi],eax
203	dec	ecx
204	mov	eax,DWORD [4+edx*4+esi]
205	lea	edx,[1+edx]
206	jge	NEAR L$006sub
207	sbb	eax,0
208	mov	edx,-1
209	xor	edx,eax
210	jmp	NEAR L$007copy
211align	16
212L$007copy:
213	mov	esi,DWORD [32+ebx*4+esp]
214	mov	ebp,DWORD [ebx*4+edi]
215	mov	DWORD [32+ebx*4+esp],ecx
216	and	esi,eax
217	and	ebp,edx
218	or	ebp,esi
219	mov	DWORD [ebx*4+edi],ebp
220	dec	ebx
221	jge	NEAR L$007copy
222	mov	esp,DWORD [24+esp]
223	mov	eax,1
224	pop	edi
225	pop	esi
226	pop	ebx
227	pop	ebp
228	ret
229db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
230db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
231db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
232db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
233db	111,114,103,62,0
234segment	.bss
235common	_GFp_ia32cap_P 16
236