1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10$@feat.00 equ 1
11section	.text	code align=64
12%else
13section	.text	code
14%endif
15;extern	_GFp_ia32cap_P
16global	_GFp_bn_mul_mont
17align	16
18_GFp_bn_mul_mont:
19L$_GFp_bn_mul_mont_begin:
20	push	ebp
21	push	ebx
22	push	esi
23	push	edi
24	xor	eax,eax
25	mov	edi,DWORD [40+esp]
26	lea	esi,[20+esp]
27	lea	edx,[24+esp]
28	add	edi,2
29	neg	edi
30	lea	ebp,[edi*4+esp-32]
31	neg	edi
32	mov	eax,ebp
33	sub	eax,edx
34	and	eax,2047
35	sub	ebp,eax
36	xor	edx,ebp
37	and	edx,2048
38	xor	edx,2048
39	sub	ebp,edx
40	and	ebp,-64
41	mov	eax,esp
42	sub	eax,ebp
43	and	eax,-4096
44	mov	edx,esp
45	lea	esp,[eax*1+ebp]
46	mov	eax,DWORD [esp]
47	cmp	esp,ebp
48	ja	NEAR L$000page_walk
49	jmp	NEAR L$001page_walk_done
50align	16
51L$000page_walk:
52	lea	esp,[esp-4096]
53	mov	eax,DWORD [esp]
54	cmp	esp,ebp
55	ja	NEAR L$000page_walk
56L$001page_walk_done:
57	mov	eax,DWORD [esi]
58	mov	ebx,DWORD [4+esi]
59	mov	ecx,DWORD [8+esi]
60	mov	ebp,DWORD [12+esi]
61	mov	esi,DWORD [16+esi]
62	mov	esi,DWORD [esi]
63	mov	DWORD [4+esp],eax
64	mov	DWORD [8+esp],ebx
65	mov	DWORD [12+esp],ecx
66	mov	DWORD [16+esp],ebp
67	mov	DWORD [20+esp],esi
68	lea	ebx,[edi-3]
69	mov	DWORD [24+esp],edx
70	lea	eax,[_GFp_ia32cap_P]
71	bt	DWORD [eax],26
72	mov	eax,-1
73	movd	mm7,eax
74	mov	esi,DWORD [8+esp]
75	mov	edi,DWORD [12+esp]
76	mov	ebp,DWORD [16+esp]
77	xor	edx,edx
78	xor	ecx,ecx
79	movd	mm4,DWORD [edi]
80	movd	mm5,DWORD [esi]
81	movd	mm3,DWORD [ebp]
82	pmuludq	mm5,mm4
83	movq	mm2,mm5
84	movq	mm0,mm5
85	pand	mm0,mm7
86	pmuludq	mm5,[20+esp]
87	pmuludq	mm3,mm5
88	paddq	mm3,mm0
89	movd	mm1,DWORD [4+ebp]
90	movd	mm0,DWORD [4+esi]
91	psrlq	mm2,32
92	psrlq	mm3,32
93	inc	ecx
94align	16
95L$0021st:
96	pmuludq	mm0,mm4
97	pmuludq	mm1,mm5
98	paddq	mm2,mm0
99	paddq	mm3,mm1
100	movq	mm0,mm2
101	pand	mm0,mm7
102	movd	mm1,DWORD [4+ecx*4+ebp]
103	paddq	mm3,mm0
104	movd	mm0,DWORD [4+ecx*4+esi]
105	psrlq	mm2,32
106	movd	DWORD [28+ecx*4+esp],mm3
107	psrlq	mm3,32
108	lea	ecx,[1+ecx]
109	cmp	ecx,ebx
110	jl	NEAR L$0021st
111	pmuludq	mm0,mm4
112	pmuludq	mm1,mm5
113	paddq	mm2,mm0
114	paddq	mm3,mm1
115	movq	mm0,mm2
116	pand	mm0,mm7
117	paddq	mm3,mm0
118	movd	DWORD [28+ecx*4+esp],mm3
119	psrlq	mm2,32
120	psrlq	mm3,32
121	paddq	mm3,mm2
122	movq	[32+ebx*4+esp],mm3
123	inc	edx
124L$003outer:
125	xor	ecx,ecx
126	movd	mm4,DWORD [edx*4+edi]
127	movd	mm5,DWORD [esi]
128	movd	mm6,DWORD [32+esp]
129	movd	mm3,DWORD [ebp]
130	pmuludq	mm5,mm4
131	paddq	mm5,mm6
132	movq	mm0,mm5
133	movq	mm2,mm5
134	pand	mm0,mm7
135	pmuludq	mm5,[20+esp]
136	pmuludq	mm3,mm5
137	paddq	mm3,mm0
138	movd	mm6,DWORD [36+esp]
139	movd	mm1,DWORD [4+ebp]
140	movd	mm0,DWORD [4+esi]
141	psrlq	mm2,32
142	psrlq	mm3,32
143	paddq	mm2,mm6
144	inc	ecx
145	dec	ebx
146L$004inner:
147	pmuludq	mm0,mm4
148	pmuludq	mm1,mm5
149	paddq	mm2,mm0
150	paddq	mm3,mm1
151	movq	mm0,mm2
152	movd	mm6,DWORD [36+ecx*4+esp]
153	pand	mm0,mm7
154	movd	mm1,DWORD [4+ecx*4+ebp]
155	paddq	mm3,mm0
156	movd	mm0,DWORD [4+ecx*4+esi]
157	psrlq	mm2,32
158	movd	DWORD [28+ecx*4+esp],mm3
159	psrlq	mm3,32
160	paddq	mm2,mm6
161	dec	ebx
162	lea	ecx,[1+ecx]
163	jnz	NEAR L$004inner
164	mov	ebx,ecx
165	pmuludq	mm0,mm4
166	pmuludq	mm1,mm5
167	paddq	mm2,mm0
168	paddq	mm3,mm1
169	movq	mm0,mm2
170	pand	mm0,mm7
171	paddq	mm3,mm0
172	movd	DWORD [28+ecx*4+esp],mm3
173	psrlq	mm2,32
174	psrlq	mm3,32
175	movd	mm6,DWORD [36+ebx*4+esp]
176	paddq	mm3,mm2
177	paddq	mm3,mm6
178	movq	[32+ebx*4+esp],mm3
179	lea	edx,[1+edx]
180	cmp	edx,ebx
181	jle	NEAR L$003outer
182	emms
183align	16
184L$005common_tail:
185	mov	ebp,DWORD [16+esp]
186	mov	edi,DWORD [4+esp]
187	lea	esi,[32+esp]
188	mov	eax,DWORD [esi]
189	mov	ecx,ebx
190	xor	edx,edx
191align	16
192L$006sub:
193	sbb	eax,DWORD [edx*4+ebp]
194	mov	DWORD [edx*4+edi],eax
195	dec	ecx
196	mov	eax,DWORD [4+edx*4+esi]
197	lea	edx,[1+edx]
198	jge	NEAR L$006sub
199	sbb	eax,0
200	mov	edx,-1
201	xor	edx,eax
202	jmp	NEAR L$007copy
203align	16
204L$007copy:
205	mov	esi,DWORD [32+ebx*4+esp]
206	mov	ebp,DWORD [ebx*4+edi]
207	mov	DWORD [32+ebx*4+esp],ecx
208	and	esi,eax
209	and	ebp,edx
210	or	ebp,esi
211	mov	DWORD [ebx*4+edi],ebp
212	dec	ebx
213	jge	NEAR L$007copy
214	mov	esp,DWORD [24+esp]
215	mov	eax,1
216	pop	edi
217	pop	esi
218	pop	ebx
219	pop	ebp
220	ret
221db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
222db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
223db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
224db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
225db	111,114,103,62,0
226segment	.bss
227common	_GFp_ia32cap_P 16
228