xref: /freebsd/sys/crypto/openssl/i386/x86-mont.S (revision 06c3fb27)
1/* Do not modify. This file is auto-generated from x86-mont.pl. */
2#ifdef PIC
3.text
4.globl	bn_mul_mont
5.type	bn_mul_mont,@function
6.align	16
7bn_mul_mont:
8.L_bn_mul_mont_begin:
9	#ifdef __CET__
10
11.byte	243,15,30,251
12	#endif
13
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	xorl	%eax,%eax
19	movl	40(%esp),%edi
20	cmpl	$4,%edi
21	jl	.L000just_leave
22	leal	20(%esp),%esi
23	leal	24(%esp),%edx
24	addl	$2,%edi
25	negl	%edi
26	leal	-32(%esp,%edi,4),%ebp
27	negl	%edi
28	movl	%ebp,%eax
29	subl	%edx,%eax
30	andl	$2047,%eax
31	subl	%eax,%ebp
32	xorl	%ebp,%edx
33	andl	$2048,%edx
34	xorl	$2048,%edx
35	subl	%edx,%ebp
36	andl	$-64,%ebp
37	movl	%esp,%eax
38	subl	%ebp,%eax
39	andl	$-4096,%eax
40	movl	%esp,%edx
41	leal	(%ebp,%eax,1),%esp
42	movl	(%esp),%eax
43	cmpl	%ebp,%esp
44	ja	.L001page_walk
45	jmp	.L002page_walk_done
46.align	16
47.L001page_walk:
48	leal	-4096(%esp),%esp
49	movl	(%esp),%eax
50	cmpl	%ebp,%esp
51	ja	.L001page_walk
52.L002page_walk_done:
53	movl	(%esi),%eax
54	movl	4(%esi),%ebx
55	movl	8(%esi),%ecx
56	movl	12(%esi),%ebp
57	movl	16(%esi),%esi
58	movl	(%esi),%esi
59	movl	%eax,4(%esp)
60	movl	%ebx,8(%esp)
61	movl	%ecx,12(%esp)
62	movl	%ebp,16(%esp)
63	movl	%esi,20(%esp)
64	leal	-3(%edi),%ebx
65	movl	%edx,24(%esp)
66	call	.L003PIC_me_up
67.L003PIC_me_up:
68	popl	%eax
69	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
70	btl	$26,(%eax)
71	jnc	.L004non_sse2
72	movl	$-1,%eax
73	movd	%eax,%mm7
74	movl	8(%esp),%esi
75	movl	12(%esp),%edi
76	movl	16(%esp),%ebp
77	xorl	%edx,%edx
78	xorl	%ecx,%ecx
79	movd	(%edi),%mm4
80	movd	(%esi),%mm5
81	movd	(%ebp),%mm3
82	pmuludq	%mm4,%mm5
83	movq	%mm5,%mm2
84	movq	%mm5,%mm0
85	pand	%mm7,%mm0
86	pmuludq	20(%esp),%mm5
87	pmuludq	%mm5,%mm3
88	paddq	%mm0,%mm3
89	movd	4(%ebp),%mm1
90	movd	4(%esi),%mm0
91	psrlq	$32,%mm2
92	psrlq	$32,%mm3
93	incl	%ecx
94.align	16
95.L0051st:
96	pmuludq	%mm4,%mm0
97	pmuludq	%mm5,%mm1
98	paddq	%mm0,%mm2
99	paddq	%mm1,%mm3
100	movq	%mm2,%mm0
101	pand	%mm7,%mm0
102	movd	4(%ebp,%ecx,4),%mm1
103	paddq	%mm0,%mm3
104	movd	4(%esi,%ecx,4),%mm0
105	psrlq	$32,%mm2
106	movd	%mm3,28(%esp,%ecx,4)
107	psrlq	$32,%mm3
108	leal	1(%ecx),%ecx
109	cmpl	%ebx,%ecx
110	jl	.L0051st
111	pmuludq	%mm4,%mm0
112	pmuludq	%mm5,%mm1
113	paddq	%mm0,%mm2
114	paddq	%mm1,%mm3
115	movq	%mm2,%mm0
116	pand	%mm7,%mm0
117	paddq	%mm0,%mm3
118	movd	%mm3,28(%esp,%ecx,4)
119	psrlq	$32,%mm2
120	psrlq	$32,%mm3
121	paddq	%mm2,%mm3
122	movq	%mm3,32(%esp,%ebx,4)
123	incl	%edx
124.L006outer:
125	xorl	%ecx,%ecx
126	movd	(%edi,%edx,4),%mm4
127	movd	(%esi),%mm5
128	movd	32(%esp),%mm6
129	movd	(%ebp),%mm3
130	pmuludq	%mm4,%mm5
131	paddq	%mm6,%mm5
132	movq	%mm5,%mm0
133	movq	%mm5,%mm2
134	pand	%mm7,%mm0
135	pmuludq	20(%esp),%mm5
136	pmuludq	%mm5,%mm3
137	paddq	%mm0,%mm3
138	movd	36(%esp),%mm6
139	movd	4(%ebp),%mm1
140	movd	4(%esi),%mm0
141	psrlq	$32,%mm2
142	psrlq	$32,%mm3
143	paddq	%mm6,%mm2
144	incl	%ecx
145	decl	%ebx
146.L007inner:
147	pmuludq	%mm4,%mm0
148	pmuludq	%mm5,%mm1
149	paddq	%mm0,%mm2
150	paddq	%mm1,%mm3
151	movq	%mm2,%mm0
152	movd	36(%esp,%ecx,4),%mm6
153	pand	%mm7,%mm0
154	movd	4(%ebp,%ecx,4),%mm1
155	paddq	%mm0,%mm3
156	movd	4(%esi,%ecx,4),%mm0
157	psrlq	$32,%mm2
158	movd	%mm3,28(%esp,%ecx,4)
159	psrlq	$32,%mm3
160	paddq	%mm6,%mm2
161	decl	%ebx
162	leal	1(%ecx),%ecx
163	jnz	.L007inner
164	movl	%ecx,%ebx
165	pmuludq	%mm4,%mm0
166	pmuludq	%mm5,%mm1
167	paddq	%mm0,%mm2
168	paddq	%mm1,%mm3
169	movq	%mm2,%mm0
170	pand	%mm7,%mm0
171	paddq	%mm0,%mm3
172	movd	%mm3,28(%esp,%ecx,4)
173	psrlq	$32,%mm2
174	psrlq	$32,%mm3
175	movd	36(%esp,%ebx,4),%mm6
176	paddq	%mm2,%mm3
177	paddq	%mm6,%mm3
178	movq	%mm3,32(%esp,%ebx,4)
179	leal	1(%edx),%edx
180	cmpl	%ebx,%edx
181	jle	.L006outer
182	emms
183	jmp	.L008common_tail
184.align	16
185.L004non_sse2:
186	movl	8(%esp),%esi
187	leal	1(%ebx),%ebp
188	movl	12(%esp),%edi
189	xorl	%ecx,%ecx
190	movl	%esi,%edx
191	andl	$1,%ebp
192	subl	%edi,%edx
193	leal	4(%edi,%ebx,4),%eax
194	orl	%edx,%ebp
195	movl	(%edi),%edi
196	jz	.L009bn_sqr_mont
197	movl	%eax,28(%esp)
198	movl	(%esi),%eax
199	xorl	%edx,%edx
200.align	16
201.L010mull:
202	movl	%edx,%ebp
203	mull	%edi
204	addl	%eax,%ebp
205	leal	1(%ecx),%ecx
206	adcl	$0,%edx
207	movl	(%esi,%ecx,4),%eax
208	cmpl	%ebx,%ecx
209	movl	%ebp,28(%esp,%ecx,4)
210	jl	.L010mull
211	movl	%edx,%ebp
212	mull	%edi
213	movl	20(%esp),%edi
214	addl	%ebp,%eax
215	movl	16(%esp),%esi
216	adcl	$0,%edx
217	imull	32(%esp),%edi
218	movl	%eax,32(%esp,%ebx,4)
219	xorl	%ecx,%ecx
220	movl	%edx,36(%esp,%ebx,4)
221	movl	%ecx,40(%esp,%ebx,4)
222	movl	(%esi),%eax
223	mull	%edi
224	addl	32(%esp),%eax
225	movl	4(%esi),%eax
226	adcl	$0,%edx
227	incl	%ecx
228	jmp	.L0112ndmadd
229.align	16
230.L0121stmadd:
231	movl	%edx,%ebp
232	mull	%edi
233	addl	32(%esp,%ecx,4),%ebp
234	leal	1(%ecx),%ecx
235	adcl	$0,%edx
236	addl	%eax,%ebp
237	movl	(%esi,%ecx,4),%eax
238	adcl	$0,%edx
239	cmpl	%ebx,%ecx
240	movl	%ebp,28(%esp,%ecx,4)
241	jl	.L0121stmadd
242	movl	%edx,%ebp
243	mull	%edi
244	addl	32(%esp,%ebx,4),%eax
245	movl	20(%esp),%edi
246	adcl	$0,%edx
247	movl	16(%esp),%esi
248	addl	%eax,%ebp
249	adcl	$0,%edx
250	imull	32(%esp),%edi
251	xorl	%ecx,%ecx
252	addl	36(%esp,%ebx,4),%edx
253	movl	%ebp,32(%esp,%ebx,4)
254	adcl	$0,%ecx
255	movl	(%esi),%eax
256	movl	%edx,36(%esp,%ebx,4)
257	movl	%ecx,40(%esp,%ebx,4)
258	mull	%edi
259	addl	32(%esp),%eax
260	movl	4(%esi),%eax
261	adcl	$0,%edx
262	movl	$1,%ecx
263.align	16
264.L0112ndmadd:
265	movl	%edx,%ebp
266	mull	%edi
267	addl	32(%esp,%ecx,4),%ebp
268	leal	1(%ecx),%ecx
269	adcl	$0,%edx
270	addl	%eax,%ebp
271	movl	(%esi,%ecx,4),%eax
272	adcl	$0,%edx
273	cmpl	%ebx,%ecx
274	movl	%ebp,24(%esp,%ecx,4)
275	jl	.L0112ndmadd
276	movl	%edx,%ebp
277	mull	%edi
278	addl	32(%esp,%ebx,4),%ebp
279	adcl	$0,%edx
280	addl	%eax,%ebp
281	adcl	$0,%edx
282	movl	%ebp,28(%esp,%ebx,4)
283	xorl	%eax,%eax
284	movl	12(%esp),%ecx
285	addl	36(%esp,%ebx,4),%edx
286	adcl	40(%esp,%ebx,4),%eax
287	leal	4(%ecx),%ecx
288	movl	%edx,32(%esp,%ebx,4)
289	cmpl	28(%esp),%ecx
290	movl	%eax,36(%esp,%ebx,4)
291	je	.L008common_tail
292	movl	(%ecx),%edi
293	movl	8(%esp),%esi
294	movl	%ecx,12(%esp)
295	xorl	%ecx,%ecx
296	xorl	%edx,%edx
297	movl	(%esi),%eax
298	jmp	.L0121stmadd
299.align	16
300.L009bn_sqr_mont:
301	movl	%ebx,(%esp)
302	movl	%ecx,12(%esp)
303	movl	%edi,%eax
304	mull	%edi
305	movl	%eax,32(%esp)
306	movl	%edx,%ebx
307	shrl	$1,%edx
308	andl	$1,%ebx
309	incl	%ecx
310.align	16
311.L013sqr:
312	movl	(%esi,%ecx,4),%eax
313	movl	%edx,%ebp
314	mull	%edi
315	addl	%ebp,%eax
316	leal	1(%ecx),%ecx
317	adcl	$0,%edx
318	leal	(%ebx,%eax,2),%ebp
319	shrl	$31,%eax
320	cmpl	(%esp),%ecx
321	movl	%eax,%ebx
322	movl	%ebp,28(%esp,%ecx,4)
323	jl	.L013sqr
324	movl	(%esi,%ecx,4),%eax
325	movl	%edx,%ebp
326	mull	%edi
327	addl	%ebp,%eax
328	movl	20(%esp),%edi
329	adcl	$0,%edx
330	movl	16(%esp),%esi
331	leal	(%ebx,%eax,2),%ebp
332	imull	32(%esp),%edi
333	shrl	$31,%eax
334	movl	%ebp,32(%esp,%ecx,4)
335	leal	(%eax,%edx,2),%ebp
336	movl	(%esi),%eax
337	shrl	$31,%edx
338	movl	%ebp,36(%esp,%ecx,4)
339	movl	%edx,40(%esp,%ecx,4)
340	mull	%edi
341	addl	32(%esp),%eax
342	movl	%ecx,%ebx
343	adcl	$0,%edx
344	movl	4(%esi),%eax
345	movl	$1,%ecx
346.align	16
347.L0143rdmadd:
348	movl	%edx,%ebp
349	mull	%edi
350	addl	32(%esp,%ecx,4),%ebp
351	adcl	$0,%edx
352	addl	%eax,%ebp
353	movl	4(%esi,%ecx,4),%eax
354	adcl	$0,%edx
355	movl	%ebp,28(%esp,%ecx,4)
356	movl	%edx,%ebp
357	mull	%edi
358	addl	36(%esp,%ecx,4),%ebp
359	leal	2(%ecx),%ecx
360	adcl	$0,%edx
361	addl	%eax,%ebp
362	movl	(%esi,%ecx,4),%eax
363	adcl	$0,%edx
364	cmpl	%ebx,%ecx
365	movl	%ebp,24(%esp,%ecx,4)
366	jl	.L0143rdmadd
367	movl	%edx,%ebp
368	mull	%edi
369	addl	32(%esp,%ebx,4),%ebp
370	adcl	$0,%edx
371	addl	%eax,%ebp
372	adcl	$0,%edx
373	movl	%ebp,28(%esp,%ebx,4)
374	movl	12(%esp),%ecx
375	xorl	%eax,%eax
376	movl	8(%esp),%esi
377	addl	36(%esp,%ebx,4),%edx
378	adcl	40(%esp,%ebx,4),%eax
379	movl	%edx,32(%esp,%ebx,4)
380	cmpl	%ebx,%ecx
381	movl	%eax,36(%esp,%ebx,4)
382	je	.L008common_tail
383	movl	4(%esi,%ecx,4),%edi
384	leal	1(%ecx),%ecx
385	movl	%edi,%eax
386	movl	%ecx,12(%esp)
387	mull	%edi
388	addl	32(%esp,%ecx,4),%eax
389	adcl	$0,%edx
390	movl	%eax,32(%esp,%ecx,4)
391	xorl	%ebp,%ebp
392	cmpl	%ebx,%ecx
393	leal	1(%ecx),%ecx
394	je	.L015sqrlast
395	movl	%edx,%ebx
396	shrl	$1,%edx
397	andl	$1,%ebx
398.align	16
399.L016sqradd:
400	movl	(%esi,%ecx,4),%eax
401	movl	%edx,%ebp
402	mull	%edi
403	addl	%ebp,%eax
404	leal	(%eax,%eax,1),%ebp
405	adcl	$0,%edx
406	shrl	$31,%eax
407	addl	32(%esp,%ecx,4),%ebp
408	leal	1(%ecx),%ecx
409	adcl	$0,%eax
410	addl	%ebx,%ebp
411	adcl	$0,%eax
412	cmpl	(%esp),%ecx
413	movl	%ebp,28(%esp,%ecx,4)
414	movl	%eax,%ebx
415	jle	.L016sqradd
416	movl	%edx,%ebp
417	addl	%edx,%edx
418	shrl	$31,%ebp
419	addl	%ebx,%edx
420	adcl	$0,%ebp
421.L015sqrlast:
422	movl	20(%esp),%edi
423	movl	16(%esp),%esi
424	imull	32(%esp),%edi
425	addl	32(%esp,%ecx,4),%edx
426	movl	(%esi),%eax
427	adcl	$0,%ebp
428	movl	%edx,32(%esp,%ecx,4)
429	movl	%ebp,36(%esp,%ecx,4)
430	mull	%edi
431	addl	32(%esp),%eax
432	leal	-1(%ecx),%ebx
433	adcl	$0,%edx
434	movl	$1,%ecx
435	movl	4(%esi),%eax
436	jmp	.L0143rdmadd
437.align	16
438.L008common_tail:
439	movl	16(%esp),%ebp
440	movl	4(%esp),%edi
441	leal	32(%esp),%esi
442	movl	(%esi),%eax
443	movl	%ebx,%ecx
444	xorl	%edx,%edx
445.align	16
446.L017sub:
447	sbbl	(%ebp,%edx,4),%eax
448	movl	%eax,(%edi,%edx,4)
449	decl	%ecx
450	movl	4(%esi,%edx,4),%eax
451	leal	1(%edx),%edx
452	jge	.L017sub
453	sbbl	$0,%eax
454	movl	$-1,%edx
455	xorl	%eax,%edx
456	jmp	.L018copy
457.align	16
458.L018copy:
459	movl	32(%esp,%ebx,4),%esi
460	movl	(%edi,%ebx,4),%ebp
461	movl	%ecx,32(%esp,%ebx,4)
462	andl	%eax,%esi
463	andl	%edx,%ebp
464	orl	%esi,%ebp
465	movl	%ebp,(%edi,%ebx,4)
466	decl	%ebx
467	jge	.L018copy
468	movl	24(%esp),%esp
469	movl	$1,%eax
470.L000just_leave:
471	popl	%edi
472	popl	%esi
473	popl	%ebx
474	popl	%ebp
475	ret
476.size	bn_mul_mont,.-.L_bn_mul_mont_begin
477.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
478.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
479.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
480.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
481.byte	111,114,103,62,0
482.comm	OPENSSL_ia32cap_P,16,4
483
484	.section ".note.gnu.property", "a"
485	.p2align 2
486	.long 1f - 0f
487	.long 4f - 1f
488	.long 5
4890:
490	.asciz "GNU"
4911:
492	.p2align 2
493	.long 0xc0000002
494	.long 3f - 2f
4952:
496	.long 3
4973:
498	.p2align 2
4994:
500#else
501.text
502.globl	bn_mul_mont
503.type	bn_mul_mont,@function
504.align	16
505bn_mul_mont:
506.L_bn_mul_mont_begin:
507	#ifdef __CET__
508
509.byte	243,15,30,251
510	#endif
511
512	pushl	%ebp
513	pushl	%ebx
514	pushl	%esi
515	pushl	%edi
516	xorl	%eax,%eax
517	movl	40(%esp),%edi
518	cmpl	$4,%edi
519	jl	.L000just_leave
520	leal	20(%esp),%esi
521	leal	24(%esp),%edx
522	addl	$2,%edi
523	negl	%edi
524	leal	-32(%esp,%edi,4),%ebp
525	negl	%edi
526	movl	%ebp,%eax
527	subl	%edx,%eax
528	andl	$2047,%eax
529	subl	%eax,%ebp
530	xorl	%ebp,%edx
531	andl	$2048,%edx
532	xorl	$2048,%edx
533	subl	%edx,%ebp
534	andl	$-64,%ebp
535	movl	%esp,%eax
536	subl	%ebp,%eax
537	andl	$-4096,%eax
538	movl	%esp,%edx
539	leal	(%ebp,%eax,1),%esp
540	movl	(%esp),%eax
541	cmpl	%ebp,%esp
542	ja	.L001page_walk
543	jmp	.L002page_walk_done
544.align	16
545.L001page_walk:
546	leal	-4096(%esp),%esp
547	movl	(%esp),%eax
548	cmpl	%ebp,%esp
549	ja	.L001page_walk
550.L002page_walk_done:
551	movl	(%esi),%eax
552	movl	4(%esi),%ebx
553	movl	8(%esi),%ecx
554	movl	12(%esi),%ebp
555	movl	16(%esi),%esi
556	movl	(%esi),%esi
557	movl	%eax,4(%esp)
558	movl	%ebx,8(%esp)
559	movl	%ecx,12(%esp)
560	movl	%ebp,16(%esp)
561	movl	%esi,20(%esp)
562	leal	-3(%edi),%ebx
563	movl	%edx,24(%esp)
564	leal	OPENSSL_ia32cap_P,%eax
565	btl	$26,(%eax)
566	jnc	.L003non_sse2
567	movl	$-1,%eax
568	movd	%eax,%mm7
569	movl	8(%esp),%esi
570	movl	12(%esp),%edi
571	movl	16(%esp),%ebp
572	xorl	%edx,%edx
573	xorl	%ecx,%ecx
574	movd	(%edi),%mm4
575	movd	(%esi),%mm5
576	movd	(%ebp),%mm3
577	pmuludq	%mm4,%mm5
578	movq	%mm5,%mm2
579	movq	%mm5,%mm0
580	pand	%mm7,%mm0
581	pmuludq	20(%esp),%mm5
582	pmuludq	%mm5,%mm3
583	paddq	%mm0,%mm3
584	movd	4(%ebp),%mm1
585	movd	4(%esi),%mm0
586	psrlq	$32,%mm2
587	psrlq	$32,%mm3
588	incl	%ecx
589.align	16
590.L0041st:
591	pmuludq	%mm4,%mm0
592	pmuludq	%mm5,%mm1
593	paddq	%mm0,%mm2
594	paddq	%mm1,%mm3
595	movq	%mm2,%mm0
596	pand	%mm7,%mm0
597	movd	4(%ebp,%ecx,4),%mm1
598	paddq	%mm0,%mm3
599	movd	4(%esi,%ecx,4),%mm0
600	psrlq	$32,%mm2
601	movd	%mm3,28(%esp,%ecx,4)
602	psrlq	$32,%mm3
603	leal	1(%ecx),%ecx
604	cmpl	%ebx,%ecx
605	jl	.L0041st
606	pmuludq	%mm4,%mm0
607	pmuludq	%mm5,%mm1
608	paddq	%mm0,%mm2
609	paddq	%mm1,%mm3
610	movq	%mm2,%mm0
611	pand	%mm7,%mm0
612	paddq	%mm0,%mm3
613	movd	%mm3,28(%esp,%ecx,4)
614	psrlq	$32,%mm2
615	psrlq	$32,%mm3
616	paddq	%mm2,%mm3
617	movq	%mm3,32(%esp,%ebx,4)
618	incl	%edx
619.L005outer:
620	xorl	%ecx,%ecx
621	movd	(%edi,%edx,4),%mm4
622	movd	(%esi),%mm5
623	movd	32(%esp),%mm6
624	movd	(%ebp),%mm3
625	pmuludq	%mm4,%mm5
626	paddq	%mm6,%mm5
627	movq	%mm5,%mm0
628	movq	%mm5,%mm2
629	pand	%mm7,%mm0
630	pmuludq	20(%esp),%mm5
631	pmuludq	%mm5,%mm3
632	paddq	%mm0,%mm3
633	movd	36(%esp),%mm6
634	movd	4(%ebp),%mm1
635	movd	4(%esi),%mm0
636	psrlq	$32,%mm2
637	psrlq	$32,%mm3
638	paddq	%mm6,%mm2
639	incl	%ecx
640	decl	%ebx
641.L006inner:
642	pmuludq	%mm4,%mm0
643	pmuludq	%mm5,%mm1
644	paddq	%mm0,%mm2
645	paddq	%mm1,%mm3
646	movq	%mm2,%mm0
647	movd	36(%esp,%ecx,4),%mm6
648	pand	%mm7,%mm0
649	movd	4(%ebp,%ecx,4),%mm1
650	paddq	%mm0,%mm3
651	movd	4(%esi,%ecx,4),%mm0
652	psrlq	$32,%mm2
653	movd	%mm3,28(%esp,%ecx,4)
654	psrlq	$32,%mm3
655	paddq	%mm6,%mm2
656	decl	%ebx
657	leal	1(%ecx),%ecx
658	jnz	.L006inner
659	movl	%ecx,%ebx
660	pmuludq	%mm4,%mm0
661	pmuludq	%mm5,%mm1
662	paddq	%mm0,%mm2
663	paddq	%mm1,%mm3
664	movq	%mm2,%mm0
665	pand	%mm7,%mm0
666	paddq	%mm0,%mm3
667	movd	%mm3,28(%esp,%ecx,4)
668	psrlq	$32,%mm2
669	psrlq	$32,%mm3
670	movd	36(%esp,%ebx,4),%mm6
671	paddq	%mm2,%mm3
672	paddq	%mm6,%mm3
673	movq	%mm3,32(%esp,%ebx,4)
674	leal	1(%edx),%edx
675	cmpl	%ebx,%edx
676	jle	.L005outer
677	emms
678	jmp	.L007common_tail
679.align	16
680.L003non_sse2:
681	movl	8(%esp),%esi
682	leal	1(%ebx),%ebp
683	movl	12(%esp),%edi
684	xorl	%ecx,%ecx
685	movl	%esi,%edx
686	andl	$1,%ebp
687	subl	%edi,%edx
688	leal	4(%edi,%ebx,4),%eax
689	orl	%edx,%ebp
690	movl	(%edi),%edi
691	jz	.L008bn_sqr_mont
692	movl	%eax,28(%esp)
693	movl	(%esi),%eax
694	xorl	%edx,%edx
695.align	16
696.L009mull:
697	movl	%edx,%ebp
698	mull	%edi
699	addl	%eax,%ebp
700	leal	1(%ecx),%ecx
701	adcl	$0,%edx
702	movl	(%esi,%ecx,4),%eax
703	cmpl	%ebx,%ecx
704	movl	%ebp,28(%esp,%ecx,4)
705	jl	.L009mull
706	movl	%edx,%ebp
707	mull	%edi
708	movl	20(%esp),%edi
709	addl	%ebp,%eax
710	movl	16(%esp),%esi
711	adcl	$0,%edx
712	imull	32(%esp),%edi
713	movl	%eax,32(%esp,%ebx,4)
714	xorl	%ecx,%ecx
715	movl	%edx,36(%esp,%ebx,4)
716	movl	%ecx,40(%esp,%ebx,4)
717	movl	(%esi),%eax
718	mull	%edi
719	addl	32(%esp),%eax
720	movl	4(%esi),%eax
721	adcl	$0,%edx
722	incl	%ecx
723	jmp	.L0102ndmadd
724.align	16
725.L0111stmadd:
726	movl	%edx,%ebp
727	mull	%edi
728	addl	32(%esp,%ecx,4),%ebp
729	leal	1(%ecx),%ecx
730	adcl	$0,%edx
731	addl	%eax,%ebp
732	movl	(%esi,%ecx,4),%eax
733	adcl	$0,%edx
734	cmpl	%ebx,%ecx
735	movl	%ebp,28(%esp,%ecx,4)
736	jl	.L0111stmadd
737	movl	%edx,%ebp
738	mull	%edi
739	addl	32(%esp,%ebx,4),%eax
740	movl	20(%esp),%edi
741	adcl	$0,%edx
742	movl	16(%esp),%esi
743	addl	%eax,%ebp
744	adcl	$0,%edx
745	imull	32(%esp),%edi
746	xorl	%ecx,%ecx
747	addl	36(%esp,%ebx,4),%edx
748	movl	%ebp,32(%esp,%ebx,4)
749	adcl	$0,%ecx
750	movl	(%esi),%eax
751	movl	%edx,36(%esp,%ebx,4)
752	movl	%ecx,40(%esp,%ebx,4)
753	mull	%edi
754	addl	32(%esp),%eax
755	movl	4(%esi),%eax
756	adcl	$0,%edx
757	movl	$1,%ecx
758.align	16
759.L0102ndmadd:
760	movl	%edx,%ebp
761	mull	%edi
762	addl	32(%esp,%ecx,4),%ebp
763	leal	1(%ecx),%ecx
764	adcl	$0,%edx
765	addl	%eax,%ebp
766	movl	(%esi,%ecx,4),%eax
767	adcl	$0,%edx
768	cmpl	%ebx,%ecx
769	movl	%ebp,24(%esp,%ecx,4)
770	jl	.L0102ndmadd
771	movl	%edx,%ebp
772	mull	%edi
773	addl	32(%esp,%ebx,4),%ebp
774	adcl	$0,%edx
775	addl	%eax,%ebp
776	adcl	$0,%edx
777	movl	%ebp,28(%esp,%ebx,4)
778	xorl	%eax,%eax
779	movl	12(%esp),%ecx
780	addl	36(%esp,%ebx,4),%edx
781	adcl	40(%esp,%ebx,4),%eax
782	leal	4(%ecx),%ecx
783	movl	%edx,32(%esp,%ebx,4)
784	cmpl	28(%esp),%ecx
785	movl	%eax,36(%esp,%ebx,4)
786	je	.L007common_tail
787	movl	(%ecx),%edi
788	movl	8(%esp),%esi
789	movl	%ecx,12(%esp)
790	xorl	%ecx,%ecx
791	xorl	%edx,%edx
792	movl	(%esi),%eax
793	jmp	.L0111stmadd
794.align	16
795.L008bn_sqr_mont:
796	movl	%ebx,(%esp)
797	movl	%ecx,12(%esp)
798	movl	%edi,%eax
799	mull	%edi
800	movl	%eax,32(%esp)
801	movl	%edx,%ebx
802	shrl	$1,%edx
803	andl	$1,%ebx
804	incl	%ecx
805.align	16
806.L012sqr:
807	movl	(%esi,%ecx,4),%eax
808	movl	%edx,%ebp
809	mull	%edi
810	addl	%ebp,%eax
811	leal	1(%ecx),%ecx
812	adcl	$0,%edx
813	leal	(%ebx,%eax,2),%ebp
814	shrl	$31,%eax
815	cmpl	(%esp),%ecx
816	movl	%eax,%ebx
817	movl	%ebp,28(%esp,%ecx,4)
818	jl	.L012sqr
819	movl	(%esi,%ecx,4),%eax
820	movl	%edx,%ebp
821	mull	%edi
822	addl	%ebp,%eax
823	movl	20(%esp),%edi
824	adcl	$0,%edx
825	movl	16(%esp),%esi
826	leal	(%ebx,%eax,2),%ebp
827	imull	32(%esp),%edi
828	shrl	$31,%eax
829	movl	%ebp,32(%esp,%ecx,4)
830	leal	(%eax,%edx,2),%ebp
831	movl	(%esi),%eax
832	shrl	$31,%edx
833	movl	%ebp,36(%esp,%ecx,4)
834	movl	%edx,40(%esp,%ecx,4)
835	mull	%edi
836	addl	32(%esp),%eax
837	movl	%ecx,%ebx
838	adcl	$0,%edx
839	movl	4(%esi),%eax
840	movl	$1,%ecx
841.align	16
842.L0133rdmadd:
843	movl	%edx,%ebp
844	mull	%edi
845	addl	32(%esp,%ecx,4),%ebp
846	adcl	$0,%edx
847	addl	%eax,%ebp
848	movl	4(%esi,%ecx,4),%eax
849	adcl	$0,%edx
850	movl	%ebp,28(%esp,%ecx,4)
851	movl	%edx,%ebp
852	mull	%edi
853	addl	36(%esp,%ecx,4),%ebp
854	leal	2(%ecx),%ecx
855	adcl	$0,%edx
856	addl	%eax,%ebp
857	movl	(%esi,%ecx,4),%eax
858	adcl	$0,%edx
859	cmpl	%ebx,%ecx
860	movl	%ebp,24(%esp,%ecx,4)
861	jl	.L0133rdmadd
862	movl	%edx,%ebp
863	mull	%edi
864	addl	32(%esp,%ebx,4),%ebp
865	adcl	$0,%edx
866	addl	%eax,%ebp
867	adcl	$0,%edx
868	movl	%ebp,28(%esp,%ebx,4)
869	movl	12(%esp),%ecx
870	xorl	%eax,%eax
871	movl	8(%esp),%esi
872	addl	36(%esp,%ebx,4),%edx
873	adcl	40(%esp,%ebx,4),%eax
874	movl	%edx,32(%esp,%ebx,4)
875	cmpl	%ebx,%ecx
876	movl	%eax,36(%esp,%ebx,4)
877	je	.L007common_tail
878	movl	4(%esi,%ecx,4),%edi
879	leal	1(%ecx),%ecx
880	movl	%edi,%eax
881	movl	%ecx,12(%esp)
882	mull	%edi
883	addl	32(%esp,%ecx,4),%eax
884	adcl	$0,%edx
885	movl	%eax,32(%esp,%ecx,4)
886	xorl	%ebp,%ebp
887	cmpl	%ebx,%ecx
888	leal	1(%ecx),%ecx
889	je	.L014sqrlast
890	movl	%edx,%ebx
891	shrl	$1,%edx
892	andl	$1,%ebx
893.align	16
894.L015sqradd:
895	movl	(%esi,%ecx,4),%eax
896	movl	%edx,%ebp
897	mull	%edi
898	addl	%ebp,%eax
899	leal	(%eax,%eax,1),%ebp
900	adcl	$0,%edx
901	shrl	$31,%eax
902	addl	32(%esp,%ecx,4),%ebp
903	leal	1(%ecx),%ecx
904	adcl	$0,%eax
905	addl	%ebx,%ebp
906	adcl	$0,%eax
907	cmpl	(%esp),%ecx
908	movl	%ebp,28(%esp,%ecx,4)
909	movl	%eax,%ebx
910	jle	.L015sqradd
911	movl	%edx,%ebp
912	addl	%edx,%edx
913	shrl	$31,%ebp
914	addl	%ebx,%edx
915	adcl	$0,%ebp
916.L014sqrlast:
917	movl	20(%esp),%edi
918	movl	16(%esp),%esi
919	imull	32(%esp),%edi
920	addl	32(%esp,%ecx,4),%edx
921	movl	(%esi),%eax
922	adcl	$0,%ebp
923	movl	%edx,32(%esp,%ecx,4)
924	movl	%ebp,36(%esp,%ecx,4)
925	mull	%edi
926	addl	32(%esp),%eax
927	leal	-1(%ecx),%ebx
928	adcl	$0,%edx
929	movl	$1,%ecx
930	movl	4(%esi),%eax
931	jmp	.L0133rdmadd
932.align	16
933.L007common_tail:
934	movl	16(%esp),%ebp
935	movl	4(%esp),%edi
936	leal	32(%esp),%esi
937	movl	(%esi),%eax
938	movl	%ebx,%ecx
939	xorl	%edx,%edx
940.align	16
941.L016sub:
942	sbbl	(%ebp,%edx,4),%eax
943	movl	%eax,(%edi,%edx,4)
944	decl	%ecx
945	movl	4(%esi,%edx,4),%eax
946	leal	1(%edx),%edx
947	jge	.L016sub
948	sbbl	$0,%eax
949	movl	$-1,%edx
950	xorl	%eax,%edx
951	jmp	.L017copy
952.align	16
953.L017copy:
954	movl	32(%esp,%ebx,4),%esi
955	movl	(%edi,%ebx,4),%ebp
956	movl	%ecx,32(%esp,%ebx,4)
957	andl	%eax,%esi
958	andl	%edx,%ebp
959	orl	%esi,%ebp
960	movl	%ebp,(%edi,%ebx,4)
961	decl	%ebx
962	jge	.L017copy
963	movl	24(%esp),%esp
964	movl	$1,%eax
965.L000just_leave:
966	popl	%edi
967	popl	%esi
968	popl	%ebx
969	popl	%ebp
970	ret
971.size	bn_mul_mont,.-.L_bn_mul_mont_begin
972.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
973.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
974.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
975.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
976.byte	111,114,103,62,0
977.comm	OPENSSL_ia32cap_P,16,4
978
979	.section ".note.gnu.property", "a"
980	.p2align 2
981	.long 1f - 0f
982	.long 4f - 1f
983	.long 5
9840:
985	.asciz "GNU"
9861:
987	.p2align 2
988	.long 0xc0000002
989	.long 3f - 2f
9902:
991	.long 3
9923:
993	.p2align 2
9944:
995#endif
996