xref: /freebsd/sys/crypto/openssl/i386/x86-mont.S (revision d0b2dbfa)
1/* Do not modify. This file is auto-generated from x86-mont.pl. */
2#ifdef PIC
3.text
4.globl	bn_mul_mont
5.type	bn_mul_mont,@function
6.align	16
7bn_mul_mont:
8.L_bn_mul_mont_begin:
9	pushl	%ebp
10	pushl	%ebx
11	pushl	%esi
12	pushl	%edi
13	xorl	%eax,%eax
14	movl	40(%esp),%edi
15	cmpl	$4,%edi
16	jl	.L000just_leave
17	leal	20(%esp),%esi
18	leal	24(%esp),%edx
19	addl	$2,%edi
20	negl	%edi
21	leal	-32(%esp,%edi,4),%ebp
22	negl	%edi
23	movl	%ebp,%eax
24	subl	%edx,%eax
25	andl	$2047,%eax
26	subl	%eax,%ebp
27	xorl	%ebp,%edx
28	andl	$2048,%edx
29	xorl	$2048,%edx
30	subl	%edx,%ebp
31	andl	$-64,%ebp
32	movl	%esp,%eax
33	subl	%ebp,%eax
34	andl	$-4096,%eax
35	movl	%esp,%edx
36	leal	(%ebp,%eax,1),%esp
37	movl	(%esp),%eax
38	cmpl	%ebp,%esp
39	ja	.L001page_walk
40	jmp	.L002page_walk_done
41.align	16
42.L001page_walk:
43	leal	-4096(%esp),%esp
44	movl	(%esp),%eax
45	cmpl	%ebp,%esp
46	ja	.L001page_walk
47.L002page_walk_done:
48	movl	(%esi),%eax
49	movl	4(%esi),%ebx
50	movl	8(%esi),%ecx
51	movl	12(%esi),%ebp
52	movl	16(%esi),%esi
53	movl	(%esi),%esi
54	movl	%eax,4(%esp)
55	movl	%ebx,8(%esp)
56	movl	%ecx,12(%esp)
57	movl	%ebp,16(%esp)
58	movl	%esi,20(%esp)
59	leal	-3(%edi),%ebx
60	movl	%edx,24(%esp)
61	call	.L003PIC_me_up
62.L003PIC_me_up:
63	popl	%eax
64	leal	OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax
65	btl	$26,(%eax)
66	jnc	.L004non_sse2
67	movl	$-1,%eax
68	movd	%eax,%mm7
69	movl	8(%esp),%esi
70	movl	12(%esp),%edi
71	movl	16(%esp),%ebp
72	xorl	%edx,%edx
73	xorl	%ecx,%ecx
74	movd	(%edi),%mm4
75	movd	(%esi),%mm5
76	movd	(%ebp),%mm3
77	pmuludq	%mm4,%mm5
78	movq	%mm5,%mm2
79	movq	%mm5,%mm0
80	pand	%mm7,%mm0
81	pmuludq	20(%esp),%mm5
82	pmuludq	%mm5,%mm3
83	paddq	%mm0,%mm3
84	movd	4(%ebp),%mm1
85	movd	4(%esi),%mm0
86	psrlq	$32,%mm2
87	psrlq	$32,%mm3
88	incl	%ecx
89.align	16
90.L0051st:
91	pmuludq	%mm4,%mm0
92	pmuludq	%mm5,%mm1
93	paddq	%mm0,%mm2
94	paddq	%mm1,%mm3
95	movq	%mm2,%mm0
96	pand	%mm7,%mm0
97	movd	4(%ebp,%ecx,4),%mm1
98	paddq	%mm0,%mm3
99	movd	4(%esi,%ecx,4),%mm0
100	psrlq	$32,%mm2
101	movd	%mm3,28(%esp,%ecx,4)
102	psrlq	$32,%mm3
103	leal	1(%ecx),%ecx
104	cmpl	%ebx,%ecx
105	jl	.L0051st
106	pmuludq	%mm4,%mm0
107	pmuludq	%mm5,%mm1
108	paddq	%mm0,%mm2
109	paddq	%mm1,%mm3
110	movq	%mm2,%mm0
111	pand	%mm7,%mm0
112	paddq	%mm0,%mm3
113	movd	%mm3,28(%esp,%ecx,4)
114	psrlq	$32,%mm2
115	psrlq	$32,%mm3
116	paddq	%mm2,%mm3
117	movq	%mm3,32(%esp,%ebx,4)
118	incl	%edx
119.L006outer:
120	xorl	%ecx,%ecx
121	movd	(%edi,%edx,4),%mm4
122	movd	(%esi),%mm5
123	movd	32(%esp),%mm6
124	movd	(%ebp),%mm3
125	pmuludq	%mm4,%mm5
126	paddq	%mm6,%mm5
127	movq	%mm5,%mm0
128	movq	%mm5,%mm2
129	pand	%mm7,%mm0
130	pmuludq	20(%esp),%mm5
131	pmuludq	%mm5,%mm3
132	paddq	%mm0,%mm3
133	movd	36(%esp),%mm6
134	movd	4(%ebp),%mm1
135	movd	4(%esi),%mm0
136	psrlq	$32,%mm2
137	psrlq	$32,%mm3
138	paddq	%mm6,%mm2
139	incl	%ecx
140	decl	%ebx
141.L007inner:
142	pmuludq	%mm4,%mm0
143	pmuludq	%mm5,%mm1
144	paddq	%mm0,%mm2
145	paddq	%mm1,%mm3
146	movq	%mm2,%mm0
147	movd	36(%esp,%ecx,4),%mm6
148	pand	%mm7,%mm0
149	movd	4(%ebp,%ecx,4),%mm1
150	paddq	%mm0,%mm3
151	movd	4(%esi,%ecx,4),%mm0
152	psrlq	$32,%mm2
153	movd	%mm3,28(%esp,%ecx,4)
154	psrlq	$32,%mm3
155	paddq	%mm6,%mm2
156	decl	%ebx
157	leal	1(%ecx),%ecx
158	jnz	.L007inner
159	movl	%ecx,%ebx
160	pmuludq	%mm4,%mm0
161	pmuludq	%mm5,%mm1
162	paddq	%mm0,%mm2
163	paddq	%mm1,%mm3
164	movq	%mm2,%mm0
165	pand	%mm7,%mm0
166	paddq	%mm0,%mm3
167	movd	%mm3,28(%esp,%ecx,4)
168	psrlq	$32,%mm2
169	psrlq	$32,%mm3
170	movd	36(%esp,%ebx,4),%mm6
171	paddq	%mm2,%mm3
172	paddq	%mm6,%mm3
173	movq	%mm3,32(%esp,%ebx,4)
174	leal	1(%edx),%edx
175	cmpl	%ebx,%edx
176	jle	.L006outer
177	emms
178	jmp	.L008common_tail
179.align	16
180.L004non_sse2:
181	movl	8(%esp),%esi
182	leal	1(%ebx),%ebp
183	movl	12(%esp),%edi
184	xorl	%ecx,%ecx
185	movl	%esi,%edx
186	andl	$1,%ebp
187	subl	%edi,%edx
188	leal	4(%edi,%ebx,4),%eax
189	orl	%edx,%ebp
190	movl	(%edi),%edi
191	jz	.L009bn_sqr_mont
192	movl	%eax,28(%esp)
193	movl	(%esi),%eax
194	xorl	%edx,%edx
195.align	16
196.L010mull:
197	movl	%edx,%ebp
198	mull	%edi
199	addl	%eax,%ebp
200	leal	1(%ecx),%ecx
201	adcl	$0,%edx
202	movl	(%esi,%ecx,4),%eax
203	cmpl	%ebx,%ecx
204	movl	%ebp,28(%esp,%ecx,4)
205	jl	.L010mull
206	movl	%edx,%ebp
207	mull	%edi
208	movl	20(%esp),%edi
209	addl	%ebp,%eax
210	movl	16(%esp),%esi
211	adcl	$0,%edx
212	imull	32(%esp),%edi
213	movl	%eax,32(%esp,%ebx,4)
214	xorl	%ecx,%ecx
215	movl	%edx,36(%esp,%ebx,4)
216	movl	%ecx,40(%esp,%ebx,4)
217	movl	(%esi),%eax
218	mull	%edi
219	addl	32(%esp),%eax
220	movl	4(%esi),%eax
221	adcl	$0,%edx
222	incl	%ecx
223	jmp	.L0112ndmadd
224.align	16
225.L0121stmadd:
226	movl	%edx,%ebp
227	mull	%edi
228	addl	32(%esp,%ecx,4),%ebp
229	leal	1(%ecx),%ecx
230	adcl	$0,%edx
231	addl	%eax,%ebp
232	movl	(%esi,%ecx,4),%eax
233	adcl	$0,%edx
234	cmpl	%ebx,%ecx
235	movl	%ebp,28(%esp,%ecx,4)
236	jl	.L0121stmadd
237	movl	%edx,%ebp
238	mull	%edi
239	addl	32(%esp,%ebx,4),%eax
240	movl	20(%esp),%edi
241	adcl	$0,%edx
242	movl	16(%esp),%esi
243	addl	%eax,%ebp
244	adcl	$0,%edx
245	imull	32(%esp),%edi
246	xorl	%ecx,%ecx
247	addl	36(%esp,%ebx,4),%edx
248	movl	%ebp,32(%esp,%ebx,4)
249	adcl	$0,%ecx
250	movl	(%esi),%eax
251	movl	%edx,36(%esp,%ebx,4)
252	movl	%ecx,40(%esp,%ebx,4)
253	mull	%edi
254	addl	32(%esp),%eax
255	movl	4(%esi),%eax
256	adcl	$0,%edx
257	movl	$1,%ecx
258.align	16
259.L0112ndmadd:
260	movl	%edx,%ebp
261	mull	%edi
262	addl	32(%esp,%ecx,4),%ebp
263	leal	1(%ecx),%ecx
264	adcl	$0,%edx
265	addl	%eax,%ebp
266	movl	(%esi,%ecx,4),%eax
267	adcl	$0,%edx
268	cmpl	%ebx,%ecx
269	movl	%ebp,24(%esp,%ecx,4)
270	jl	.L0112ndmadd
271	movl	%edx,%ebp
272	mull	%edi
273	addl	32(%esp,%ebx,4),%ebp
274	adcl	$0,%edx
275	addl	%eax,%ebp
276	adcl	$0,%edx
277	movl	%ebp,28(%esp,%ebx,4)
278	xorl	%eax,%eax
279	movl	12(%esp),%ecx
280	addl	36(%esp,%ebx,4),%edx
281	adcl	40(%esp,%ebx,4),%eax
282	leal	4(%ecx),%ecx
283	movl	%edx,32(%esp,%ebx,4)
284	cmpl	28(%esp),%ecx
285	movl	%eax,36(%esp,%ebx,4)
286	je	.L008common_tail
287	movl	(%ecx),%edi
288	movl	8(%esp),%esi
289	movl	%ecx,12(%esp)
290	xorl	%ecx,%ecx
291	xorl	%edx,%edx
292	movl	(%esi),%eax
293	jmp	.L0121stmadd
294.align	16
295.L009bn_sqr_mont:
296	movl	%ebx,(%esp)
297	movl	%ecx,12(%esp)
298	movl	%edi,%eax
299	mull	%edi
300	movl	%eax,32(%esp)
301	movl	%edx,%ebx
302	shrl	$1,%edx
303	andl	$1,%ebx
304	incl	%ecx
305.align	16
306.L013sqr:
307	movl	(%esi,%ecx,4),%eax
308	movl	%edx,%ebp
309	mull	%edi
310	addl	%ebp,%eax
311	leal	1(%ecx),%ecx
312	adcl	$0,%edx
313	leal	(%ebx,%eax,2),%ebp
314	shrl	$31,%eax
315	cmpl	(%esp),%ecx
316	movl	%eax,%ebx
317	movl	%ebp,28(%esp,%ecx,4)
318	jl	.L013sqr
319	movl	(%esi,%ecx,4),%eax
320	movl	%edx,%ebp
321	mull	%edi
322	addl	%ebp,%eax
323	movl	20(%esp),%edi
324	adcl	$0,%edx
325	movl	16(%esp),%esi
326	leal	(%ebx,%eax,2),%ebp
327	imull	32(%esp),%edi
328	shrl	$31,%eax
329	movl	%ebp,32(%esp,%ecx,4)
330	leal	(%eax,%edx,2),%ebp
331	movl	(%esi),%eax
332	shrl	$31,%edx
333	movl	%ebp,36(%esp,%ecx,4)
334	movl	%edx,40(%esp,%ecx,4)
335	mull	%edi
336	addl	32(%esp),%eax
337	movl	%ecx,%ebx
338	adcl	$0,%edx
339	movl	4(%esi),%eax
340	movl	$1,%ecx
341.align	16
342.L0143rdmadd:
343	movl	%edx,%ebp
344	mull	%edi
345	addl	32(%esp,%ecx,4),%ebp
346	adcl	$0,%edx
347	addl	%eax,%ebp
348	movl	4(%esi,%ecx,4),%eax
349	adcl	$0,%edx
350	movl	%ebp,28(%esp,%ecx,4)
351	movl	%edx,%ebp
352	mull	%edi
353	addl	36(%esp,%ecx,4),%ebp
354	leal	2(%ecx),%ecx
355	adcl	$0,%edx
356	addl	%eax,%ebp
357	movl	(%esi,%ecx,4),%eax
358	adcl	$0,%edx
359	cmpl	%ebx,%ecx
360	movl	%ebp,24(%esp,%ecx,4)
361	jl	.L0143rdmadd
362	movl	%edx,%ebp
363	mull	%edi
364	addl	32(%esp,%ebx,4),%ebp
365	adcl	$0,%edx
366	addl	%eax,%ebp
367	adcl	$0,%edx
368	movl	%ebp,28(%esp,%ebx,4)
369	movl	12(%esp),%ecx
370	xorl	%eax,%eax
371	movl	8(%esp),%esi
372	addl	36(%esp,%ebx,4),%edx
373	adcl	40(%esp,%ebx,4),%eax
374	movl	%edx,32(%esp,%ebx,4)
375	cmpl	%ebx,%ecx
376	movl	%eax,36(%esp,%ebx,4)
377	je	.L008common_tail
378	movl	4(%esi,%ecx,4),%edi
379	leal	1(%ecx),%ecx
380	movl	%edi,%eax
381	movl	%ecx,12(%esp)
382	mull	%edi
383	addl	32(%esp,%ecx,4),%eax
384	adcl	$0,%edx
385	movl	%eax,32(%esp,%ecx,4)
386	xorl	%ebp,%ebp
387	cmpl	%ebx,%ecx
388	leal	1(%ecx),%ecx
389	je	.L015sqrlast
390	movl	%edx,%ebx
391	shrl	$1,%edx
392	andl	$1,%ebx
393.align	16
394.L016sqradd:
395	movl	(%esi,%ecx,4),%eax
396	movl	%edx,%ebp
397	mull	%edi
398	addl	%ebp,%eax
399	leal	(%eax,%eax,1),%ebp
400	adcl	$0,%edx
401	shrl	$31,%eax
402	addl	32(%esp,%ecx,4),%ebp
403	leal	1(%ecx),%ecx
404	adcl	$0,%eax
405	addl	%ebx,%ebp
406	adcl	$0,%eax
407	cmpl	(%esp),%ecx
408	movl	%ebp,28(%esp,%ecx,4)
409	movl	%eax,%ebx
410	jle	.L016sqradd
411	movl	%edx,%ebp
412	addl	%edx,%edx
413	shrl	$31,%ebp
414	addl	%ebx,%edx
415	adcl	$0,%ebp
416.L015sqrlast:
417	movl	20(%esp),%edi
418	movl	16(%esp),%esi
419	imull	32(%esp),%edi
420	addl	32(%esp,%ecx,4),%edx
421	movl	(%esi),%eax
422	adcl	$0,%ebp
423	movl	%edx,32(%esp,%ecx,4)
424	movl	%ebp,36(%esp,%ecx,4)
425	mull	%edi
426	addl	32(%esp),%eax
427	leal	-1(%ecx),%ebx
428	adcl	$0,%edx
429	movl	$1,%ecx
430	movl	4(%esi),%eax
431	jmp	.L0143rdmadd
432.align	16
433.L008common_tail:
434	movl	16(%esp),%ebp
435	movl	4(%esp),%edi
436	leal	32(%esp),%esi
437	movl	(%esi),%eax
438	movl	%ebx,%ecx
439	xorl	%edx,%edx
440.align	16
441.L017sub:
442	sbbl	(%ebp,%edx,4),%eax
443	movl	%eax,(%edi,%edx,4)
444	decl	%ecx
445	movl	4(%esi,%edx,4),%eax
446	leal	1(%edx),%edx
447	jge	.L017sub
448	sbbl	$0,%eax
449	movl	$-1,%edx
450	xorl	%eax,%edx
451	jmp	.L018copy
452.align	16
453.L018copy:
454	movl	32(%esp,%ebx,4),%esi
455	movl	(%edi,%ebx,4),%ebp
456	movl	%ecx,32(%esp,%ebx,4)
457	andl	%eax,%esi
458	andl	%edx,%ebp
459	orl	%esi,%ebp
460	movl	%ebp,(%edi,%ebx,4)
461	decl	%ebx
462	jge	.L018copy
463	movl	24(%esp),%esp
464	movl	$1,%eax
465.L000just_leave:
466	popl	%edi
467	popl	%esi
468	popl	%ebx
469	popl	%ebp
470	ret
471.size	bn_mul_mont,.-.L_bn_mul_mont_begin
472.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
473.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
474.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
475.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
476.byte	111,114,103,62,0
477.comm	OPENSSL_ia32cap_P,16,4
478#else
479.text
480.globl	bn_mul_mont
481.type	bn_mul_mont,@function
482.align	16
483bn_mul_mont:
484.L_bn_mul_mont_begin:
485	pushl	%ebp
486	pushl	%ebx
487	pushl	%esi
488	pushl	%edi
489	xorl	%eax,%eax
490	movl	40(%esp),%edi
491	cmpl	$4,%edi
492	jl	.L000just_leave
493	leal	20(%esp),%esi
494	leal	24(%esp),%edx
495	addl	$2,%edi
496	negl	%edi
497	leal	-32(%esp,%edi,4),%ebp
498	negl	%edi
499	movl	%ebp,%eax
500	subl	%edx,%eax
501	andl	$2047,%eax
502	subl	%eax,%ebp
503	xorl	%ebp,%edx
504	andl	$2048,%edx
505	xorl	$2048,%edx
506	subl	%edx,%ebp
507	andl	$-64,%ebp
508	movl	%esp,%eax
509	subl	%ebp,%eax
510	andl	$-4096,%eax
511	movl	%esp,%edx
512	leal	(%ebp,%eax,1),%esp
513	movl	(%esp),%eax
514	cmpl	%ebp,%esp
515	ja	.L001page_walk
516	jmp	.L002page_walk_done
517.align	16
518.L001page_walk:
519	leal	-4096(%esp),%esp
520	movl	(%esp),%eax
521	cmpl	%ebp,%esp
522	ja	.L001page_walk
523.L002page_walk_done:
524	movl	(%esi),%eax
525	movl	4(%esi),%ebx
526	movl	8(%esi),%ecx
527	movl	12(%esi),%ebp
528	movl	16(%esi),%esi
529	movl	(%esi),%esi
530	movl	%eax,4(%esp)
531	movl	%ebx,8(%esp)
532	movl	%ecx,12(%esp)
533	movl	%ebp,16(%esp)
534	movl	%esi,20(%esp)
535	leal	-3(%edi),%ebx
536	movl	%edx,24(%esp)
537	leal	OPENSSL_ia32cap_P,%eax
538	btl	$26,(%eax)
539	jnc	.L003non_sse2
540	movl	$-1,%eax
541	movd	%eax,%mm7
542	movl	8(%esp),%esi
543	movl	12(%esp),%edi
544	movl	16(%esp),%ebp
545	xorl	%edx,%edx
546	xorl	%ecx,%ecx
547	movd	(%edi),%mm4
548	movd	(%esi),%mm5
549	movd	(%ebp),%mm3
550	pmuludq	%mm4,%mm5
551	movq	%mm5,%mm2
552	movq	%mm5,%mm0
553	pand	%mm7,%mm0
554	pmuludq	20(%esp),%mm5
555	pmuludq	%mm5,%mm3
556	paddq	%mm0,%mm3
557	movd	4(%ebp),%mm1
558	movd	4(%esi),%mm0
559	psrlq	$32,%mm2
560	psrlq	$32,%mm3
561	incl	%ecx
562.align	16
563.L0041st:
564	pmuludq	%mm4,%mm0
565	pmuludq	%mm5,%mm1
566	paddq	%mm0,%mm2
567	paddq	%mm1,%mm3
568	movq	%mm2,%mm0
569	pand	%mm7,%mm0
570	movd	4(%ebp,%ecx,4),%mm1
571	paddq	%mm0,%mm3
572	movd	4(%esi,%ecx,4),%mm0
573	psrlq	$32,%mm2
574	movd	%mm3,28(%esp,%ecx,4)
575	psrlq	$32,%mm3
576	leal	1(%ecx),%ecx
577	cmpl	%ebx,%ecx
578	jl	.L0041st
579	pmuludq	%mm4,%mm0
580	pmuludq	%mm5,%mm1
581	paddq	%mm0,%mm2
582	paddq	%mm1,%mm3
583	movq	%mm2,%mm0
584	pand	%mm7,%mm0
585	paddq	%mm0,%mm3
586	movd	%mm3,28(%esp,%ecx,4)
587	psrlq	$32,%mm2
588	psrlq	$32,%mm3
589	paddq	%mm2,%mm3
590	movq	%mm3,32(%esp,%ebx,4)
591	incl	%edx
592.L005outer:
593	xorl	%ecx,%ecx
594	movd	(%edi,%edx,4),%mm4
595	movd	(%esi),%mm5
596	movd	32(%esp),%mm6
597	movd	(%ebp),%mm3
598	pmuludq	%mm4,%mm5
599	paddq	%mm6,%mm5
600	movq	%mm5,%mm0
601	movq	%mm5,%mm2
602	pand	%mm7,%mm0
603	pmuludq	20(%esp),%mm5
604	pmuludq	%mm5,%mm3
605	paddq	%mm0,%mm3
606	movd	36(%esp),%mm6
607	movd	4(%ebp),%mm1
608	movd	4(%esi),%mm0
609	psrlq	$32,%mm2
610	psrlq	$32,%mm3
611	paddq	%mm6,%mm2
612	incl	%ecx
613	decl	%ebx
614.L006inner:
615	pmuludq	%mm4,%mm0
616	pmuludq	%mm5,%mm1
617	paddq	%mm0,%mm2
618	paddq	%mm1,%mm3
619	movq	%mm2,%mm0
620	movd	36(%esp,%ecx,4),%mm6
621	pand	%mm7,%mm0
622	movd	4(%ebp,%ecx,4),%mm1
623	paddq	%mm0,%mm3
624	movd	4(%esi,%ecx,4),%mm0
625	psrlq	$32,%mm2
626	movd	%mm3,28(%esp,%ecx,4)
627	psrlq	$32,%mm3
628	paddq	%mm6,%mm2
629	decl	%ebx
630	leal	1(%ecx),%ecx
631	jnz	.L006inner
632	movl	%ecx,%ebx
633	pmuludq	%mm4,%mm0
634	pmuludq	%mm5,%mm1
635	paddq	%mm0,%mm2
636	paddq	%mm1,%mm3
637	movq	%mm2,%mm0
638	pand	%mm7,%mm0
639	paddq	%mm0,%mm3
640	movd	%mm3,28(%esp,%ecx,4)
641	psrlq	$32,%mm2
642	psrlq	$32,%mm3
643	movd	36(%esp,%ebx,4),%mm6
644	paddq	%mm2,%mm3
645	paddq	%mm6,%mm3
646	movq	%mm3,32(%esp,%ebx,4)
647	leal	1(%edx),%edx
648	cmpl	%ebx,%edx
649	jle	.L005outer
650	emms
651	jmp	.L007common_tail
652.align	16
653.L003non_sse2:
654	movl	8(%esp),%esi
655	leal	1(%ebx),%ebp
656	movl	12(%esp),%edi
657	xorl	%ecx,%ecx
658	movl	%esi,%edx
659	andl	$1,%ebp
660	subl	%edi,%edx
661	leal	4(%edi,%ebx,4),%eax
662	orl	%edx,%ebp
663	movl	(%edi),%edi
664	jz	.L008bn_sqr_mont
665	movl	%eax,28(%esp)
666	movl	(%esi),%eax
667	xorl	%edx,%edx
668.align	16
669.L009mull:
670	movl	%edx,%ebp
671	mull	%edi
672	addl	%eax,%ebp
673	leal	1(%ecx),%ecx
674	adcl	$0,%edx
675	movl	(%esi,%ecx,4),%eax
676	cmpl	%ebx,%ecx
677	movl	%ebp,28(%esp,%ecx,4)
678	jl	.L009mull
679	movl	%edx,%ebp
680	mull	%edi
681	movl	20(%esp),%edi
682	addl	%ebp,%eax
683	movl	16(%esp),%esi
684	adcl	$0,%edx
685	imull	32(%esp),%edi
686	movl	%eax,32(%esp,%ebx,4)
687	xorl	%ecx,%ecx
688	movl	%edx,36(%esp,%ebx,4)
689	movl	%ecx,40(%esp,%ebx,4)
690	movl	(%esi),%eax
691	mull	%edi
692	addl	32(%esp),%eax
693	movl	4(%esi),%eax
694	adcl	$0,%edx
695	incl	%ecx
696	jmp	.L0102ndmadd
697.align	16
698.L0111stmadd:
699	movl	%edx,%ebp
700	mull	%edi
701	addl	32(%esp,%ecx,4),%ebp
702	leal	1(%ecx),%ecx
703	adcl	$0,%edx
704	addl	%eax,%ebp
705	movl	(%esi,%ecx,4),%eax
706	adcl	$0,%edx
707	cmpl	%ebx,%ecx
708	movl	%ebp,28(%esp,%ecx,4)
709	jl	.L0111stmadd
710	movl	%edx,%ebp
711	mull	%edi
712	addl	32(%esp,%ebx,4),%eax
713	movl	20(%esp),%edi
714	adcl	$0,%edx
715	movl	16(%esp),%esi
716	addl	%eax,%ebp
717	adcl	$0,%edx
718	imull	32(%esp),%edi
719	xorl	%ecx,%ecx
720	addl	36(%esp,%ebx,4),%edx
721	movl	%ebp,32(%esp,%ebx,4)
722	adcl	$0,%ecx
723	movl	(%esi),%eax
724	movl	%edx,36(%esp,%ebx,4)
725	movl	%ecx,40(%esp,%ebx,4)
726	mull	%edi
727	addl	32(%esp),%eax
728	movl	4(%esi),%eax
729	adcl	$0,%edx
730	movl	$1,%ecx
731.align	16
732.L0102ndmadd:
733	movl	%edx,%ebp
734	mull	%edi
735	addl	32(%esp,%ecx,4),%ebp
736	leal	1(%ecx),%ecx
737	adcl	$0,%edx
738	addl	%eax,%ebp
739	movl	(%esi,%ecx,4),%eax
740	adcl	$0,%edx
741	cmpl	%ebx,%ecx
742	movl	%ebp,24(%esp,%ecx,4)
743	jl	.L0102ndmadd
744	movl	%edx,%ebp
745	mull	%edi
746	addl	32(%esp,%ebx,4),%ebp
747	adcl	$0,%edx
748	addl	%eax,%ebp
749	adcl	$0,%edx
750	movl	%ebp,28(%esp,%ebx,4)
751	xorl	%eax,%eax
752	movl	12(%esp),%ecx
753	addl	36(%esp,%ebx,4),%edx
754	adcl	40(%esp,%ebx,4),%eax
755	leal	4(%ecx),%ecx
756	movl	%edx,32(%esp,%ebx,4)
757	cmpl	28(%esp),%ecx
758	movl	%eax,36(%esp,%ebx,4)
759	je	.L007common_tail
760	movl	(%ecx),%edi
761	movl	8(%esp),%esi
762	movl	%ecx,12(%esp)
763	xorl	%ecx,%ecx
764	xorl	%edx,%edx
765	movl	(%esi),%eax
766	jmp	.L0111stmadd
767.align	16
768.L008bn_sqr_mont:
769	movl	%ebx,(%esp)
770	movl	%ecx,12(%esp)
771	movl	%edi,%eax
772	mull	%edi
773	movl	%eax,32(%esp)
774	movl	%edx,%ebx
775	shrl	$1,%edx
776	andl	$1,%ebx
777	incl	%ecx
778.align	16
779.L012sqr:
780	movl	(%esi,%ecx,4),%eax
781	movl	%edx,%ebp
782	mull	%edi
783	addl	%ebp,%eax
784	leal	1(%ecx),%ecx
785	adcl	$0,%edx
786	leal	(%ebx,%eax,2),%ebp
787	shrl	$31,%eax
788	cmpl	(%esp),%ecx
789	movl	%eax,%ebx
790	movl	%ebp,28(%esp,%ecx,4)
791	jl	.L012sqr
792	movl	(%esi,%ecx,4),%eax
793	movl	%edx,%ebp
794	mull	%edi
795	addl	%ebp,%eax
796	movl	20(%esp),%edi
797	adcl	$0,%edx
798	movl	16(%esp),%esi
799	leal	(%ebx,%eax,2),%ebp
800	imull	32(%esp),%edi
801	shrl	$31,%eax
802	movl	%ebp,32(%esp,%ecx,4)
803	leal	(%eax,%edx,2),%ebp
804	movl	(%esi),%eax
805	shrl	$31,%edx
806	movl	%ebp,36(%esp,%ecx,4)
807	movl	%edx,40(%esp,%ecx,4)
808	mull	%edi
809	addl	32(%esp),%eax
810	movl	%ecx,%ebx
811	adcl	$0,%edx
812	movl	4(%esi),%eax
813	movl	$1,%ecx
814.align	16
815.L0133rdmadd:
816	movl	%edx,%ebp
817	mull	%edi
818	addl	32(%esp,%ecx,4),%ebp
819	adcl	$0,%edx
820	addl	%eax,%ebp
821	movl	4(%esi,%ecx,4),%eax
822	adcl	$0,%edx
823	movl	%ebp,28(%esp,%ecx,4)
824	movl	%edx,%ebp
825	mull	%edi
826	addl	36(%esp,%ecx,4),%ebp
827	leal	2(%ecx),%ecx
828	adcl	$0,%edx
829	addl	%eax,%ebp
830	movl	(%esi,%ecx,4),%eax
831	adcl	$0,%edx
832	cmpl	%ebx,%ecx
833	movl	%ebp,24(%esp,%ecx,4)
834	jl	.L0133rdmadd
835	movl	%edx,%ebp
836	mull	%edi
837	addl	32(%esp,%ebx,4),%ebp
838	adcl	$0,%edx
839	addl	%eax,%ebp
840	adcl	$0,%edx
841	movl	%ebp,28(%esp,%ebx,4)
842	movl	12(%esp),%ecx
843	xorl	%eax,%eax
844	movl	8(%esp),%esi
845	addl	36(%esp,%ebx,4),%edx
846	adcl	40(%esp,%ebx,4),%eax
847	movl	%edx,32(%esp,%ebx,4)
848	cmpl	%ebx,%ecx
849	movl	%eax,36(%esp,%ebx,4)
850	je	.L007common_tail
851	movl	4(%esi,%ecx,4),%edi
852	leal	1(%ecx),%ecx
853	movl	%edi,%eax
854	movl	%ecx,12(%esp)
855	mull	%edi
856	addl	32(%esp,%ecx,4),%eax
857	adcl	$0,%edx
858	movl	%eax,32(%esp,%ecx,4)
859	xorl	%ebp,%ebp
860	cmpl	%ebx,%ecx
861	leal	1(%ecx),%ecx
862	je	.L014sqrlast
863	movl	%edx,%ebx
864	shrl	$1,%edx
865	andl	$1,%ebx
866.align	16
867.L015sqradd:
868	movl	(%esi,%ecx,4),%eax
869	movl	%edx,%ebp
870	mull	%edi
871	addl	%ebp,%eax
872	leal	(%eax,%eax,1),%ebp
873	adcl	$0,%edx
874	shrl	$31,%eax
875	addl	32(%esp,%ecx,4),%ebp
876	leal	1(%ecx),%ecx
877	adcl	$0,%eax
878	addl	%ebx,%ebp
879	adcl	$0,%eax
880	cmpl	(%esp),%ecx
881	movl	%ebp,28(%esp,%ecx,4)
882	movl	%eax,%ebx
883	jle	.L015sqradd
884	movl	%edx,%ebp
885	addl	%edx,%edx
886	shrl	$31,%ebp
887	addl	%ebx,%edx
888	adcl	$0,%ebp
889.L014sqrlast:
890	movl	20(%esp),%edi
891	movl	16(%esp),%esi
892	imull	32(%esp),%edi
893	addl	32(%esp,%ecx,4),%edx
894	movl	(%esi),%eax
895	adcl	$0,%ebp
896	movl	%edx,32(%esp,%ecx,4)
897	movl	%ebp,36(%esp,%ecx,4)
898	mull	%edi
899	addl	32(%esp),%eax
900	leal	-1(%ecx),%ebx
901	adcl	$0,%edx
902	movl	$1,%ecx
903	movl	4(%esi),%eax
904	jmp	.L0133rdmadd
905.align	16
906.L007common_tail:
907	movl	16(%esp),%ebp
908	movl	4(%esp),%edi
909	leal	32(%esp),%esi
910	movl	(%esi),%eax
911	movl	%ebx,%ecx
912	xorl	%edx,%edx
913.align	16
914.L016sub:
915	sbbl	(%ebp,%edx,4),%eax
916	movl	%eax,(%edi,%edx,4)
917	decl	%ecx
918	movl	4(%esi,%edx,4),%eax
919	leal	1(%edx),%edx
920	jge	.L016sub
921	sbbl	$0,%eax
922	movl	$-1,%edx
923	xorl	%eax,%edx
924	jmp	.L017copy
925.align	16
926.L017copy:
927	movl	32(%esp,%ebx,4),%esi
928	movl	(%edi,%ebx,4),%ebp
929	movl	%ecx,32(%esp,%ebx,4)
930	andl	%eax,%esi
931	andl	%edx,%ebp
932	orl	%esi,%ebp
933	movl	%ebp,(%edi,%ebx,4)
934	decl	%ebx
935	jge	.L017copy
936	movl	24(%esp),%esp
937	movl	$1,%eax
938.L000just_leave:
939	popl	%edi
940	popl	%esi
941	popl	%ebx
942	popl	%ebp
943	ret
944.size	bn_mul_mont,.-.L_bn_mul_mont_begin
945.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
946.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
947.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
948.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
949.byte	111,114,103,62,0
950.comm	OPENSSL_ia32cap_P,16,4
951#endif
952