xref: /freebsd/sys/crypto/openssl/amd64/rsaz-x86_64.S (revision bc3d5698)
1bc3d5698SJohn Baldwin/* $FreeBSD$ */
2bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
3bc3d5698SJohn Baldwin.text
4bc3d5698SJohn Baldwin
5bc3d5698SJohn Baldwin
6bc3d5698SJohn Baldwin
7bc3d5698SJohn Baldwin.globl	rsaz_512_sqr
8bc3d5698SJohn Baldwin.type	rsaz_512_sqr,@function
9bc3d5698SJohn Baldwin.align	32
10bc3d5698SJohn Baldwinrsaz_512_sqr:
11bc3d5698SJohn Baldwin.cfi_startproc
12bc3d5698SJohn Baldwin	pushq	%rbx
13bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
14bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
15bc3d5698SJohn Baldwin	pushq	%rbp
16bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
17bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
18bc3d5698SJohn Baldwin	pushq	%r12
19bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
20bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
21bc3d5698SJohn Baldwin	pushq	%r13
22bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
23bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
24bc3d5698SJohn Baldwin	pushq	%r14
25bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
26bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
27bc3d5698SJohn Baldwin	pushq	%r15
28bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
29bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
30bc3d5698SJohn Baldwin
31bc3d5698SJohn Baldwin	subq	$128+24,%rsp
32bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
33bc3d5698SJohn Baldwin.Lsqr_body:
34bc3d5698SJohn Baldwin.byte	102,72,15,110,202
35bc3d5698SJohn Baldwin	movq	(%rsi),%rdx
36bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
37bc3d5698SJohn Baldwin	movq	%rcx,128(%rsp)
38bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
39bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
40bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
41bc3d5698SJohn Baldwin	je	.Loop_sqrx
42bc3d5698SJohn Baldwin	jmp	.Loop_sqr
43bc3d5698SJohn Baldwin
44bc3d5698SJohn Baldwin.align	32
45bc3d5698SJohn Baldwin.Loop_sqr:
46bc3d5698SJohn Baldwin	movl	%r8d,128+8(%rsp)
47bc3d5698SJohn Baldwin
48bc3d5698SJohn Baldwin	movq	%rdx,%rbx
49bc3d5698SJohn Baldwin	movq	%rax,%rbp
50bc3d5698SJohn Baldwin	mulq	%rdx
51bc3d5698SJohn Baldwin	movq	%rax,%r8
52bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
53bc3d5698SJohn Baldwin	movq	%rdx,%r9
54bc3d5698SJohn Baldwin
55bc3d5698SJohn Baldwin	mulq	%rbx
56bc3d5698SJohn Baldwin	addq	%rax,%r9
57bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
58bc3d5698SJohn Baldwin	movq	%rdx,%r10
59bc3d5698SJohn Baldwin	adcq	$0,%r10
60bc3d5698SJohn Baldwin
61bc3d5698SJohn Baldwin	mulq	%rbx
62bc3d5698SJohn Baldwin	addq	%rax,%r10
63bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
64bc3d5698SJohn Baldwin	movq	%rdx,%r11
65bc3d5698SJohn Baldwin	adcq	$0,%r11
66bc3d5698SJohn Baldwin
67bc3d5698SJohn Baldwin	mulq	%rbx
68bc3d5698SJohn Baldwin	addq	%rax,%r11
69bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
70bc3d5698SJohn Baldwin	movq	%rdx,%r12
71bc3d5698SJohn Baldwin	adcq	$0,%r12
72bc3d5698SJohn Baldwin
73bc3d5698SJohn Baldwin	mulq	%rbx
74bc3d5698SJohn Baldwin	addq	%rax,%r12
75bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
76bc3d5698SJohn Baldwin	movq	%rdx,%r13
77bc3d5698SJohn Baldwin	adcq	$0,%r13
78bc3d5698SJohn Baldwin
79bc3d5698SJohn Baldwin	mulq	%rbx
80bc3d5698SJohn Baldwin	addq	%rax,%r13
81bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
82bc3d5698SJohn Baldwin	movq	%rdx,%r14
83bc3d5698SJohn Baldwin	adcq	$0,%r14
84bc3d5698SJohn Baldwin
85bc3d5698SJohn Baldwin	mulq	%rbx
86bc3d5698SJohn Baldwin	addq	%rax,%r14
87bc3d5698SJohn Baldwin	movq	%rbx,%rax
88bc3d5698SJohn Baldwin	adcq	$0,%rdx
89bc3d5698SJohn Baldwin
90bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
91bc3d5698SJohn Baldwin	addq	%r8,%r8
92bc3d5698SJohn Baldwin	movq	%rdx,%r15
93bc3d5698SJohn Baldwin	adcq	$0,%rcx
94bc3d5698SJohn Baldwin
95bc3d5698SJohn Baldwin	mulq	%rax
96bc3d5698SJohn Baldwin	addq	%r8,%rdx
97bc3d5698SJohn Baldwin	adcq	$0,%rcx
98bc3d5698SJohn Baldwin
99bc3d5698SJohn Baldwin	movq	%rax,(%rsp)
100bc3d5698SJohn Baldwin	movq	%rdx,8(%rsp)
101bc3d5698SJohn Baldwin
102bc3d5698SJohn Baldwin
103bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
104bc3d5698SJohn Baldwin	mulq	%rbp
105bc3d5698SJohn Baldwin	addq	%rax,%r10
106bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
107bc3d5698SJohn Baldwin	movq	%rdx,%rbx
108bc3d5698SJohn Baldwin	adcq	$0,%rbx
109bc3d5698SJohn Baldwin
110bc3d5698SJohn Baldwin	mulq	%rbp
111bc3d5698SJohn Baldwin	addq	%rax,%r11
112bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
113bc3d5698SJohn Baldwin	adcq	$0,%rdx
114bc3d5698SJohn Baldwin	addq	%rbx,%r11
115bc3d5698SJohn Baldwin	movq	%rdx,%rbx
116bc3d5698SJohn Baldwin	adcq	$0,%rbx
117bc3d5698SJohn Baldwin
118bc3d5698SJohn Baldwin	mulq	%rbp
119bc3d5698SJohn Baldwin	addq	%rax,%r12
120bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
121bc3d5698SJohn Baldwin	adcq	$0,%rdx
122bc3d5698SJohn Baldwin	addq	%rbx,%r12
123bc3d5698SJohn Baldwin	movq	%rdx,%rbx
124bc3d5698SJohn Baldwin	adcq	$0,%rbx
125bc3d5698SJohn Baldwin
126bc3d5698SJohn Baldwin	mulq	%rbp
127bc3d5698SJohn Baldwin	addq	%rax,%r13
128bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
129bc3d5698SJohn Baldwin	adcq	$0,%rdx
130bc3d5698SJohn Baldwin	addq	%rbx,%r13
131bc3d5698SJohn Baldwin	movq	%rdx,%rbx
132bc3d5698SJohn Baldwin	adcq	$0,%rbx
133bc3d5698SJohn Baldwin
134bc3d5698SJohn Baldwin	mulq	%rbp
135bc3d5698SJohn Baldwin	addq	%rax,%r14
136bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
137bc3d5698SJohn Baldwin	adcq	$0,%rdx
138bc3d5698SJohn Baldwin	addq	%rbx,%r14
139bc3d5698SJohn Baldwin	movq	%rdx,%rbx
140bc3d5698SJohn Baldwin	adcq	$0,%rbx
141bc3d5698SJohn Baldwin
142bc3d5698SJohn Baldwin	mulq	%rbp
143bc3d5698SJohn Baldwin	addq	%rax,%r15
144bc3d5698SJohn Baldwin	movq	%rbp,%rax
145bc3d5698SJohn Baldwin	adcq	$0,%rdx
146bc3d5698SJohn Baldwin	addq	%rbx,%r15
147bc3d5698SJohn Baldwin	adcq	$0,%rdx
148bc3d5698SJohn Baldwin
149bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
150bc3d5698SJohn Baldwin	addq	%r9,%r9
151bc3d5698SJohn Baldwin	movq	%rdx,%r8
152bc3d5698SJohn Baldwin	adcq	%r10,%r10
153bc3d5698SJohn Baldwin	adcq	$0,%rbx
154bc3d5698SJohn Baldwin
155bc3d5698SJohn Baldwin	mulq	%rax
156bc3d5698SJohn Baldwin
157bc3d5698SJohn Baldwin	addq	%rcx,%rax
158bc3d5698SJohn Baldwin	movq	16(%rsi),%rbp
159bc3d5698SJohn Baldwin	addq	%rax,%r9
160bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
161bc3d5698SJohn Baldwin	adcq	%rdx,%r10
162bc3d5698SJohn Baldwin	adcq	$0,%rbx
163bc3d5698SJohn Baldwin
164bc3d5698SJohn Baldwin	movq	%r9,16(%rsp)
165bc3d5698SJohn Baldwin	movq	%r10,24(%rsp)
166bc3d5698SJohn Baldwin
167bc3d5698SJohn Baldwin
168bc3d5698SJohn Baldwin	mulq	%rbp
169bc3d5698SJohn Baldwin	addq	%rax,%r12
170bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
171bc3d5698SJohn Baldwin	movq	%rdx,%rcx
172bc3d5698SJohn Baldwin	adcq	$0,%rcx
173bc3d5698SJohn Baldwin
174bc3d5698SJohn Baldwin	mulq	%rbp
175bc3d5698SJohn Baldwin	addq	%rax,%r13
176bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
177bc3d5698SJohn Baldwin	adcq	$0,%rdx
178bc3d5698SJohn Baldwin	addq	%rcx,%r13
179bc3d5698SJohn Baldwin	movq	%rdx,%rcx
180bc3d5698SJohn Baldwin	adcq	$0,%rcx
181bc3d5698SJohn Baldwin
182bc3d5698SJohn Baldwin	mulq	%rbp
183bc3d5698SJohn Baldwin	addq	%rax,%r14
184bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
185bc3d5698SJohn Baldwin	adcq	$0,%rdx
186bc3d5698SJohn Baldwin	addq	%rcx,%r14
187bc3d5698SJohn Baldwin	movq	%rdx,%rcx
188bc3d5698SJohn Baldwin	adcq	$0,%rcx
189bc3d5698SJohn Baldwin
190bc3d5698SJohn Baldwin	mulq	%rbp
191bc3d5698SJohn Baldwin	addq	%rax,%r15
192bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
193bc3d5698SJohn Baldwin	adcq	$0,%rdx
194bc3d5698SJohn Baldwin	addq	%rcx,%r15
195bc3d5698SJohn Baldwin	movq	%rdx,%rcx
196bc3d5698SJohn Baldwin	adcq	$0,%rcx
197bc3d5698SJohn Baldwin
198bc3d5698SJohn Baldwin	mulq	%rbp
199bc3d5698SJohn Baldwin	addq	%rax,%r8
200bc3d5698SJohn Baldwin	movq	%rbp,%rax
201bc3d5698SJohn Baldwin	adcq	$0,%rdx
202bc3d5698SJohn Baldwin	addq	%rcx,%r8
203bc3d5698SJohn Baldwin	adcq	$0,%rdx
204bc3d5698SJohn Baldwin
205bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
206bc3d5698SJohn Baldwin	addq	%r11,%r11
207bc3d5698SJohn Baldwin	movq	%rdx,%r9
208bc3d5698SJohn Baldwin	adcq	%r12,%r12
209bc3d5698SJohn Baldwin	adcq	$0,%rcx
210bc3d5698SJohn Baldwin
211bc3d5698SJohn Baldwin	mulq	%rax
212bc3d5698SJohn Baldwin
213bc3d5698SJohn Baldwin	addq	%rbx,%rax
214bc3d5698SJohn Baldwin	movq	24(%rsi),%r10
215bc3d5698SJohn Baldwin	addq	%rax,%r11
216bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
217bc3d5698SJohn Baldwin	adcq	%rdx,%r12
218bc3d5698SJohn Baldwin	adcq	$0,%rcx
219bc3d5698SJohn Baldwin
220bc3d5698SJohn Baldwin	movq	%r11,32(%rsp)
221bc3d5698SJohn Baldwin	movq	%r12,40(%rsp)
222bc3d5698SJohn Baldwin
223bc3d5698SJohn Baldwin
224bc3d5698SJohn Baldwin	movq	%rax,%r11
225bc3d5698SJohn Baldwin	mulq	%r10
226bc3d5698SJohn Baldwin	addq	%rax,%r14
227bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
228bc3d5698SJohn Baldwin	movq	%rdx,%rbx
229bc3d5698SJohn Baldwin	adcq	$0,%rbx
230bc3d5698SJohn Baldwin
231bc3d5698SJohn Baldwin	movq	%rax,%r12
232bc3d5698SJohn Baldwin	mulq	%r10
233bc3d5698SJohn Baldwin	addq	%rax,%r15
234bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
235bc3d5698SJohn Baldwin	adcq	$0,%rdx
236bc3d5698SJohn Baldwin	addq	%rbx,%r15
237bc3d5698SJohn Baldwin	movq	%rdx,%rbx
238bc3d5698SJohn Baldwin	adcq	$0,%rbx
239bc3d5698SJohn Baldwin
240bc3d5698SJohn Baldwin	movq	%rax,%rbp
241bc3d5698SJohn Baldwin	mulq	%r10
242bc3d5698SJohn Baldwin	addq	%rax,%r8
243bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
244bc3d5698SJohn Baldwin	adcq	$0,%rdx
245bc3d5698SJohn Baldwin	addq	%rbx,%r8
246bc3d5698SJohn Baldwin	movq	%rdx,%rbx
247bc3d5698SJohn Baldwin	adcq	$0,%rbx
248bc3d5698SJohn Baldwin
249bc3d5698SJohn Baldwin	mulq	%r10
250bc3d5698SJohn Baldwin	addq	%rax,%r9
251bc3d5698SJohn Baldwin	movq	%r10,%rax
252bc3d5698SJohn Baldwin	adcq	$0,%rdx
253bc3d5698SJohn Baldwin	addq	%rbx,%r9
254bc3d5698SJohn Baldwin	adcq	$0,%rdx
255bc3d5698SJohn Baldwin
256bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
257bc3d5698SJohn Baldwin	addq	%r13,%r13
258bc3d5698SJohn Baldwin	movq	%rdx,%r10
259bc3d5698SJohn Baldwin	adcq	%r14,%r14
260bc3d5698SJohn Baldwin	adcq	$0,%rbx
261bc3d5698SJohn Baldwin
262bc3d5698SJohn Baldwin	mulq	%rax
263bc3d5698SJohn Baldwin
264bc3d5698SJohn Baldwin	addq	%rcx,%rax
265bc3d5698SJohn Baldwin	addq	%rax,%r13
266bc3d5698SJohn Baldwin	movq	%r12,%rax
267bc3d5698SJohn Baldwin	adcq	%rdx,%r14
268bc3d5698SJohn Baldwin	adcq	$0,%rbx
269bc3d5698SJohn Baldwin
270bc3d5698SJohn Baldwin	movq	%r13,48(%rsp)
271bc3d5698SJohn Baldwin	movq	%r14,56(%rsp)
272bc3d5698SJohn Baldwin
273bc3d5698SJohn Baldwin
274bc3d5698SJohn Baldwin	mulq	%r11
275bc3d5698SJohn Baldwin	addq	%rax,%r8
276bc3d5698SJohn Baldwin	movq	%rbp,%rax
277bc3d5698SJohn Baldwin	movq	%rdx,%rcx
278bc3d5698SJohn Baldwin	adcq	$0,%rcx
279bc3d5698SJohn Baldwin
280bc3d5698SJohn Baldwin	mulq	%r11
281bc3d5698SJohn Baldwin	addq	%rax,%r9
282bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
283bc3d5698SJohn Baldwin	adcq	$0,%rdx
284bc3d5698SJohn Baldwin	addq	%rcx,%r9
285bc3d5698SJohn Baldwin	movq	%rdx,%rcx
286bc3d5698SJohn Baldwin	adcq	$0,%rcx
287bc3d5698SJohn Baldwin
288bc3d5698SJohn Baldwin	movq	%rax,%r14
289bc3d5698SJohn Baldwin	mulq	%r11
290bc3d5698SJohn Baldwin	addq	%rax,%r10
291bc3d5698SJohn Baldwin	movq	%r11,%rax
292bc3d5698SJohn Baldwin	adcq	$0,%rdx
293bc3d5698SJohn Baldwin	addq	%rcx,%r10
294bc3d5698SJohn Baldwin	adcq	$0,%rdx
295bc3d5698SJohn Baldwin
296bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
297bc3d5698SJohn Baldwin	addq	%r15,%r15
298bc3d5698SJohn Baldwin	movq	%rdx,%r11
299bc3d5698SJohn Baldwin	adcq	%r8,%r8
300bc3d5698SJohn Baldwin	adcq	$0,%rcx
301bc3d5698SJohn Baldwin
302bc3d5698SJohn Baldwin	mulq	%rax
303bc3d5698SJohn Baldwin
304bc3d5698SJohn Baldwin	addq	%rbx,%rax
305bc3d5698SJohn Baldwin	addq	%rax,%r15
306bc3d5698SJohn Baldwin	movq	%rbp,%rax
307bc3d5698SJohn Baldwin	adcq	%rdx,%r8
308bc3d5698SJohn Baldwin	adcq	$0,%rcx
309bc3d5698SJohn Baldwin
310bc3d5698SJohn Baldwin	movq	%r15,64(%rsp)
311bc3d5698SJohn Baldwin	movq	%r8,72(%rsp)
312bc3d5698SJohn Baldwin
313bc3d5698SJohn Baldwin
314bc3d5698SJohn Baldwin	mulq	%r12
315bc3d5698SJohn Baldwin	addq	%rax,%r10
316bc3d5698SJohn Baldwin	movq	%r14,%rax
317bc3d5698SJohn Baldwin	movq	%rdx,%rbx
318bc3d5698SJohn Baldwin	adcq	$0,%rbx
319bc3d5698SJohn Baldwin
320bc3d5698SJohn Baldwin	mulq	%r12
321bc3d5698SJohn Baldwin	addq	%rax,%r11
322bc3d5698SJohn Baldwin	movq	%r12,%rax
323bc3d5698SJohn Baldwin	adcq	$0,%rdx
324bc3d5698SJohn Baldwin	addq	%rbx,%r11
325bc3d5698SJohn Baldwin	adcq	$0,%rdx
326bc3d5698SJohn Baldwin
327bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
328bc3d5698SJohn Baldwin	addq	%r9,%r9
329bc3d5698SJohn Baldwin	movq	%rdx,%r12
330bc3d5698SJohn Baldwin	adcq	%r10,%r10
331bc3d5698SJohn Baldwin	adcq	$0,%rbx
332bc3d5698SJohn Baldwin
333bc3d5698SJohn Baldwin	mulq	%rax
334bc3d5698SJohn Baldwin
335bc3d5698SJohn Baldwin	addq	%rcx,%rax
336bc3d5698SJohn Baldwin	addq	%rax,%r9
337bc3d5698SJohn Baldwin	movq	%r14,%rax
338bc3d5698SJohn Baldwin	adcq	%rdx,%r10
339bc3d5698SJohn Baldwin	adcq	$0,%rbx
340bc3d5698SJohn Baldwin
341bc3d5698SJohn Baldwin	movq	%r9,80(%rsp)
342bc3d5698SJohn Baldwin	movq	%r10,88(%rsp)
343bc3d5698SJohn Baldwin
344bc3d5698SJohn Baldwin
345bc3d5698SJohn Baldwin	mulq	%rbp
346bc3d5698SJohn Baldwin	addq	%rax,%r12
347bc3d5698SJohn Baldwin	movq	%rbp,%rax
348bc3d5698SJohn Baldwin	adcq	$0,%rdx
349bc3d5698SJohn Baldwin
350bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
351bc3d5698SJohn Baldwin	addq	%r11,%r11
352bc3d5698SJohn Baldwin	movq	%rdx,%r13
353bc3d5698SJohn Baldwin	adcq	%r12,%r12
354bc3d5698SJohn Baldwin	adcq	$0,%rcx
355bc3d5698SJohn Baldwin
356bc3d5698SJohn Baldwin	mulq	%rax
357bc3d5698SJohn Baldwin
358bc3d5698SJohn Baldwin	addq	%rbx,%rax
359bc3d5698SJohn Baldwin	addq	%rax,%r11
360bc3d5698SJohn Baldwin	movq	%r14,%rax
361bc3d5698SJohn Baldwin	adcq	%rdx,%r12
362bc3d5698SJohn Baldwin	adcq	$0,%rcx
363bc3d5698SJohn Baldwin
364bc3d5698SJohn Baldwin	movq	%r11,96(%rsp)
365bc3d5698SJohn Baldwin	movq	%r12,104(%rsp)
366bc3d5698SJohn Baldwin
367bc3d5698SJohn Baldwin
368bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
369bc3d5698SJohn Baldwin	addq	%r13,%r13
370bc3d5698SJohn Baldwin	adcq	$0,%rbx
371bc3d5698SJohn Baldwin
372bc3d5698SJohn Baldwin	mulq	%rax
373bc3d5698SJohn Baldwin
374bc3d5698SJohn Baldwin	addq	%rcx,%rax
375bc3d5698SJohn Baldwin	addq	%r13,%rax
376bc3d5698SJohn Baldwin	adcq	%rbx,%rdx
377bc3d5698SJohn Baldwin
378bc3d5698SJohn Baldwin	movq	(%rsp),%r8
379bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
380bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
381bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
382bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
383bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
384bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
385bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
386bc3d5698SJohn Baldwin.byte	102,72,15,126,205
387bc3d5698SJohn Baldwin
388bc3d5698SJohn Baldwin	movq	%rax,112(%rsp)
389bc3d5698SJohn Baldwin	movq	%rdx,120(%rsp)
390bc3d5698SJohn Baldwin
391bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
392bc3d5698SJohn Baldwin
393bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
394bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
395bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
396bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
397bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
398bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
399bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
400bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
401bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
402bc3d5698SJohn Baldwin
403bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
404bc3d5698SJohn Baldwin
405bc3d5698SJohn Baldwin	movq	%r8,%rdx
406bc3d5698SJohn Baldwin	movq	%r9,%rax
407bc3d5698SJohn Baldwin	movl	128+8(%rsp),%r8d
408bc3d5698SJohn Baldwin	movq	%rdi,%rsi
409bc3d5698SJohn Baldwin
410bc3d5698SJohn Baldwin	decl	%r8d
411bc3d5698SJohn Baldwin	jnz	.Loop_sqr
412bc3d5698SJohn Baldwin	jmp	.Lsqr_tail
413bc3d5698SJohn Baldwin
414bc3d5698SJohn Baldwin.align	32
415bc3d5698SJohn Baldwin.Loop_sqrx:
416bc3d5698SJohn Baldwin	movl	%r8d,128+8(%rsp)
417bc3d5698SJohn Baldwin.byte	102,72,15,110,199
418bc3d5698SJohn Baldwin
419bc3d5698SJohn Baldwin	mulxq	%rax,%r8,%r9
420bc3d5698SJohn Baldwin	movq	%rax,%rbx
421bc3d5698SJohn Baldwin
422bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rcx,%r10
423bc3d5698SJohn Baldwin	xorq	%rbp,%rbp
424bc3d5698SJohn Baldwin
425bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
426bc3d5698SJohn Baldwin	adcxq	%rcx,%r9
427bc3d5698SJohn Baldwin
428bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
429bc3d5698SJohn Baldwin	adcxq	%rax,%r10
430bc3d5698SJohn Baldwin
431bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
432bc3d5698SJohn Baldwin	adcxq	%rcx,%r11
433bc3d5698SJohn Baldwin
434bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rcx,%r14
435bc3d5698SJohn Baldwin	adcxq	%rax,%r12
436bc3d5698SJohn Baldwin	adcxq	%rcx,%r13
437bc3d5698SJohn Baldwin
438bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
439bc3d5698SJohn Baldwin	adcxq	%rax,%r14
440bc3d5698SJohn Baldwin	adcxq	%rbp,%r15
441bc3d5698SJohn Baldwin
442bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
443bc3d5698SJohn Baldwin	movq	%rbx,%rdx
444bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
445bc3d5698SJohn Baldwin	adoxq	%r8,%r8
446bc3d5698SJohn Baldwin	adcxq	%rdi,%r8
447bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
448bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
449bc3d5698SJohn Baldwin
450bc3d5698SJohn Baldwin	movq	%rax,(%rsp)
451bc3d5698SJohn Baldwin	movq	%r8,8(%rsp)
452bc3d5698SJohn Baldwin
453bc3d5698SJohn Baldwin
454bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
455bc3d5698SJohn Baldwin	adoxq	%rax,%r10
456bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
457bc3d5698SJohn Baldwin
458bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rdi,%r8
459bc3d5698SJohn Baldwin	adoxq	%rdi,%r11
460bc3d5698SJohn Baldwin.byte	0x66
461bc3d5698SJohn Baldwin	adcxq	%r8,%r12
462bc3d5698SJohn Baldwin
463bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%rbx
464bc3d5698SJohn Baldwin	adoxq	%rax,%r12
465bc3d5698SJohn Baldwin	adcxq	%rbx,%r13
466bc3d5698SJohn Baldwin
467bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rdi,%r8
468bc3d5698SJohn Baldwin	adoxq	%rdi,%r13
469bc3d5698SJohn Baldwin	adcxq	%r8,%r14
470bc3d5698SJohn Baldwin
471bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
472bc3d5698SJohn Baldwin	adoxq	%rax,%r14
473bc3d5698SJohn Baldwin	adcxq	%rbx,%r15
474bc3d5698SJohn Baldwin
475bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
476bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
477bc3d5698SJohn Baldwin	adcxq	%rbp,%r8
478bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
479bc3d5698SJohn Baldwin	adoxq	%rbp,%r8
480bc3d5698SJohn Baldwin.byte	0x48,0x8b,0x96,0x10,0x00,0x00,0x00
481bc3d5698SJohn Baldwin
482bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
483bc3d5698SJohn Baldwin	adoxq	%r9,%r9
484bc3d5698SJohn Baldwin
485bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
486bc3d5698SJohn Baldwin	adoxq	%r10,%r10
487bc3d5698SJohn Baldwin	adcxq	%rax,%r9
488bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
489bc3d5698SJohn Baldwin	adcxq	%rdi,%r10
490bc3d5698SJohn Baldwin	adcxq	%rbp,%rbx
491bc3d5698SJohn Baldwin
492bc3d5698SJohn Baldwin	movq	%r9,16(%rsp)
493bc3d5698SJohn Baldwin.byte	0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
494bc3d5698SJohn Baldwin
495bc3d5698SJohn Baldwin
496bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rdi,%r9
497bc3d5698SJohn Baldwin	adoxq	%rdi,%r12
498bc3d5698SJohn Baldwin	adcxq	%r9,%r13
499bc3d5698SJohn Baldwin
500bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%rcx
501bc3d5698SJohn Baldwin	adoxq	%rax,%r13
502bc3d5698SJohn Baldwin	adcxq	%rcx,%r14
503bc3d5698SJohn Baldwin
504bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
505bc3d5698SJohn Baldwin	adoxq	%rdi,%r14
506bc3d5698SJohn Baldwin	adcxq	%r9,%r15
507bc3d5698SJohn Baldwin
508bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
509bc3d5698SJohn Baldwin	adoxq	%rax,%r15
510bc3d5698SJohn Baldwin	adcxq	%rcx,%r8
511bc3d5698SJohn Baldwin
512bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rdi,%r9
513bc3d5698SJohn Baldwin	adoxq	%rdi,%r8
514bc3d5698SJohn Baldwin	adcxq	%rbp,%r9
515bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
516bc3d5698SJohn Baldwin	adoxq	%rbp,%r9
517bc3d5698SJohn Baldwin	movq	24(%rsi),%rdx
518bc3d5698SJohn Baldwin
519bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
520bc3d5698SJohn Baldwin	adoxq	%r11,%r11
521bc3d5698SJohn Baldwin
522bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
523bc3d5698SJohn Baldwin	adoxq	%r12,%r12
524bc3d5698SJohn Baldwin	adcxq	%rax,%r11
525bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
526bc3d5698SJohn Baldwin	adcxq	%rdi,%r12
527bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
528bc3d5698SJohn Baldwin
529bc3d5698SJohn Baldwin	movq	%r11,32(%rsp)
530bc3d5698SJohn Baldwin	movq	%r12,40(%rsp)
531bc3d5698SJohn Baldwin
532bc3d5698SJohn Baldwin
533bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%rbx
534bc3d5698SJohn Baldwin	adoxq	%rax,%r14
535bc3d5698SJohn Baldwin	adcxq	%rbx,%r15
536bc3d5698SJohn Baldwin
537bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rdi,%r10
538bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
539bc3d5698SJohn Baldwin	adcxq	%r10,%r8
540bc3d5698SJohn Baldwin
541bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rax,%rbx
542bc3d5698SJohn Baldwin	adoxq	%rax,%r8
543bc3d5698SJohn Baldwin	adcxq	%rbx,%r9
544bc3d5698SJohn Baldwin
545bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rdi,%r10
546bc3d5698SJohn Baldwin	adoxq	%rdi,%r9
547bc3d5698SJohn Baldwin	adcxq	%rbp,%r10
548bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
549bc3d5698SJohn Baldwin	adoxq	%rbp,%r10
550bc3d5698SJohn Baldwin	movq	32(%rsi),%rdx
551bc3d5698SJohn Baldwin
552bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
553bc3d5698SJohn Baldwin	adoxq	%r13,%r13
554bc3d5698SJohn Baldwin
555bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
556bc3d5698SJohn Baldwin	adoxq	%r14,%r14
557bc3d5698SJohn Baldwin	adcxq	%rax,%r13
558bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
559bc3d5698SJohn Baldwin	adcxq	%rdi,%r14
560bc3d5698SJohn Baldwin	adcxq	%rbp,%rbx
561bc3d5698SJohn Baldwin
562bc3d5698SJohn Baldwin	movq	%r13,48(%rsp)
563bc3d5698SJohn Baldwin	movq	%r14,56(%rsp)
564bc3d5698SJohn Baldwin
565bc3d5698SJohn Baldwin
566bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rdi,%r11
567bc3d5698SJohn Baldwin	adoxq	%rdi,%r8
568bc3d5698SJohn Baldwin	adcxq	%r11,%r9
569bc3d5698SJohn Baldwin
570bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rax,%rcx
571bc3d5698SJohn Baldwin	adoxq	%rax,%r9
572bc3d5698SJohn Baldwin	adcxq	%rcx,%r10
573bc3d5698SJohn Baldwin
574bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rdi,%r11
575bc3d5698SJohn Baldwin	adoxq	%rdi,%r10
576bc3d5698SJohn Baldwin	adcxq	%rbp,%r11
577bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
578bc3d5698SJohn Baldwin	movq	40(%rsi),%rdx
579bc3d5698SJohn Baldwin	adoxq	%rbp,%r11
580bc3d5698SJohn Baldwin
581bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
582bc3d5698SJohn Baldwin	adoxq	%r15,%r15
583bc3d5698SJohn Baldwin
584bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
585bc3d5698SJohn Baldwin	adoxq	%r8,%r8
586bc3d5698SJohn Baldwin	adcxq	%rax,%r15
587bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
588bc3d5698SJohn Baldwin	adcxq	%rdi,%r8
589bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
590bc3d5698SJohn Baldwin
591bc3d5698SJohn Baldwin	movq	%r15,64(%rsp)
592bc3d5698SJohn Baldwin	movq	%r8,72(%rsp)
593bc3d5698SJohn Baldwin
594bc3d5698SJohn Baldwin
595bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
596bc3d5698SJohn Baldwin	adoxq	%rax,%r10
597bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
598bc3d5698SJohn Baldwin
599bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
600bc3d5698SJohn Baldwin	adoxq	%rdi,%r11
601bc3d5698SJohn Baldwin	adcxq	%rbp,%r12
602bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
603bc3d5698SJohn Baldwin	adoxq	%rbp,%r12
604bc3d5698SJohn Baldwin	movq	48(%rsi),%rdx
605bc3d5698SJohn Baldwin
606bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
607bc3d5698SJohn Baldwin	adoxq	%r9,%r9
608bc3d5698SJohn Baldwin
609bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
610bc3d5698SJohn Baldwin	adoxq	%r10,%r10
611bc3d5698SJohn Baldwin	adcxq	%rax,%r9
612bc3d5698SJohn Baldwin	adcxq	%rdi,%r10
613bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
614bc3d5698SJohn Baldwin	adcxq	%rbp,%rbx
615bc3d5698SJohn Baldwin
616bc3d5698SJohn Baldwin	movq	%r9,80(%rsp)
617bc3d5698SJohn Baldwin	movq	%r10,88(%rsp)
618bc3d5698SJohn Baldwin
619bc3d5698SJohn Baldwin
620bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
621bc3d5698SJohn Baldwin	adoxq	%rax,%r12
622bc3d5698SJohn Baldwin	adoxq	%rbp,%r13
623bc3d5698SJohn Baldwin
624bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
625bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
626bc3d5698SJohn Baldwin	movq	56(%rsi),%rdx
627bc3d5698SJohn Baldwin	adoxq	%r11,%r11
628bc3d5698SJohn Baldwin
629bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
630bc3d5698SJohn Baldwin	adoxq	%r12,%r12
631bc3d5698SJohn Baldwin	adcxq	%rax,%r11
632bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
633bc3d5698SJohn Baldwin	adcxq	%rdi,%r12
634bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
635bc3d5698SJohn Baldwin
636bc3d5698SJohn Baldwin.byte	0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
637bc3d5698SJohn Baldwin.byte	0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
638bc3d5698SJohn Baldwin
639bc3d5698SJohn Baldwin
640bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdx
641bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
642bc3d5698SJohn Baldwin	adoxq	%r13,%r13
643bc3d5698SJohn Baldwin
644bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
645bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
646bc3d5698SJohn Baldwin	adcxq	%r13,%rax
647bc3d5698SJohn Baldwin	adcxq	%rdx,%rbx
648bc3d5698SJohn Baldwin
649bc3d5698SJohn Baldwin.byte	102,72,15,126,199
650bc3d5698SJohn Baldwin.byte	102,72,15,126,205
651bc3d5698SJohn Baldwin
652bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
653bc3d5698SJohn Baldwin	movq	(%rsp),%r8
654bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
655bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
656bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
657bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
658bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
659bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
660bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
661bc3d5698SJohn Baldwin
662bc3d5698SJohn Baldwin	movq	%rax,112(%rsp)
663bc3d5698SJohn Baldwin	movq	%rbx,120(%rsp)
664bc3d5698SJohn Baldwin
665bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
666bc3d5698SJohn Baldwin
667bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
668bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
669bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
670bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
671bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
672bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
673bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
674bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
675bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
676bc3d5698SJohn Baldwin
677bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
678bc3d5698SJohn Baldwin
679bc3d5698SJohn Baldwin	movq	%r8,%rdx
680bc3d5698SJohn Baldwin	movq	%r9,%rax
681bc3d5698SJohn Baldwin	movl	128+8(%rsp),%r8d
682bc3d5698SJohn Baldwin	movq	%rdi,%rsi
683bc3d5698SJohn Baldwin
684bc3d5698SJohn Baldwin	decl	%r8d
685bc3d5698SJohn Baldwin	jnz	.Loop_sqrx
686bc3d5698SJohn Baldwin
687bc3d5698SJohn Baldwin.Lsqr_tail:
688bc3d5698SJohn Baldwin
689bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
690bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
691bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
692bc3d5698SJohn Baldwin.cfi_restore	%r15
693bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
694bc3d5698SJohn Baldwin.cfi_restore	%r14
695bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
696bc3d5698SJohn Baldwin.cfi_restore	%r13
697bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
698bc3d5698SJohn Baldwin.cfi_restore	%r12
699bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
700bc3d5698SJohn Baldwin.cfi_restore	%rbp
701bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
702bc3d5698SJohn Baldwin.cfi_restore	%rbx
703bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
704bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
705bc3d5698SJohn Baldwin.Lsqr_epilogue:
706bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
707bc3d5698SJohn Baldwin.cfi_endproc
708bc3d5698SJohn Baldwin.size	rsaz_512_sqr,.-rsaz_512_sqr
709bc3d5698SJohn Baldwin.globl	rsaz_512_mul
710bc3d5698SJohn Baldwin.type	rsaz_512_mul,@function
711bc3d5698SJohn Baldwin.align	32
712bc3d5698SJohn Baldwinrsaz_512_mul:
713bc3d5698SJohn Baldwin.cfi_startproc
714bc3d5698SJohn Baldwin	pushq	%rbx
715bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
716bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
717bc3d5698SJohn Baldwin	pushq	%rbp
718bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
719bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
720bc3d5698SJohn Baldwin	pushq	%r12
721bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
722bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
723bc3d5698SJohn Baldwin	pushq	%r13
724bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
725bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
726bc3d5698SJohn Baldwin	pushq	%r14
727bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
728bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
729bc3d5698SJohn Baldwin	pushq	%r15
730bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
731bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
732bc3d5698SJohn Baldwin
733bc3d5698SJohn Baldwin	subq	$128+24,%rsp
734bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
735bc3d5698SJohn Baldwin.Lmul_body:
736bc3d5698SJohn Baldwin.byte	102,72,15,110,199
737bc3d5698SJohn Baldwin.byte	102,72,15,110,201
738bc3d5698SJohn Baldwin	movq	%r8,128(%rsp)
739bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
740bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
741bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
742bc3d5698SJohn Baldwin	je	.Lmulx
743bc3d5698SJohn Baldwin	movq	(%rdx),%rbx
744bc3d5698SJohn Baldwin	movq	%rdx,%rbp
745bc3d5698SJohn Baldwin	call	__rsaz_512_mul
746bc3d5698SJohn Baldwin
747bc3d5698SJohn Baldwin.byte	102,72,15,126,199
748bc3d5698SJohn Baldwin.byte	102,72,15,126,205
749bc3d5698SJohn Baldwin
750bc3d5698SJohn Baldwin	movq	(%rsp),%r8
751bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
752bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
753bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
754bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
755bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
756bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
757bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
758bc3d5698SJohn Baldwin
759bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
760bc3d5698SJohn Baldwin	jmp	.Lmul_tail
761bc3d5698SJohn Baldwin
762bc3d5698SJohn Baldwin.align	32
763bc3d5698SJohn Baldwin.Lmulx:
764bc3d5698SJohn Baldwin	movq	%rdx,%rbp
765bc3d5698SJohn Baldwin	movq	(%rdx),%rdx
766bc3d5698SJohn Baldwin	call	__rsaz_512_mulx
767bc3d5698SJohn Baldwin
768bc3d5698SJohn Baldwin.byte	102,72,15,126,199
769bc3d5698SJohn Baldwin.byte	102,72,15,126,205
770bc3d5698SJohn Baldwin
771bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
772bc3d5698SJohn Baldwin	movq	(%rsp),%r8
773bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
774bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
775bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
776bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
777bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
778bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
779bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
780bc3d5698SJohn Baldwin
781bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
782bc3d5698SJohn Baldwin.Lmul_tail:
783bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
784bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
785bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
786bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
787bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
788bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
789bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
790bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
791bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
792bc3d5698SJohn Baldwin
793bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
794bc3d5698SJohn Baldwin
795bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
796bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
797bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
798bc3d5698SJohn Baldwin.cfi_restore	%r15
799bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
800bc3d5698SJohn Baldwin.cfi_restore	%r14
801bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
802bc3d5698SJohn Baldwin.cfi_restore	%r13
803bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
804bc3d5698SJohn Baldwin.cfi_restore	%r12
805bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
806bc3d5698SJohn Baldwin.cfi_restore	%rbp
807bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
808bc3d5698SJohn Baldwin.cfi_restore	%rbx
809bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
810bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
811bc3d5698SJohn Baldwin.Lmul_epilogue:
812bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
813bc3d5698SJohn Baldwin.cfi_endproc
814bc3d5698SJohn Baldwin.size	rsaz_512_mul,.-rsaz_512_mul
815bc3d5698SJohn Baldwin.globl	rsaz_512_mul_gather4
816bc3d5698SJohn Baldwin.type	rsaz_512_mul_gather4,@function
817bc3d5698SJohn Baldwin.align	32
818bc3d5698SJohn Baldwinrsaz_512_mul_gather4:
819bc3d5698SJohn Baldwin.cfi_startproc
820bc3d5698SJohn Baldwin	pushq	%rbx
821bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
822bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
823bc3d5698SJohn Baldwin	pushq	%rbp
824bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
825bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
826bc3d5698SJohn Baldwin	pushq	%r12
827bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
828bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
829bc3d5698SJohn Baldwin	pushq	%r13
830bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
831bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
832bc3d5698SJohn Baldwin	pushq	%r14
833bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
834bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
835bc3d5698SJohn Baldwin	pushq	%r15
836bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
837bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
838bc3d5698SJohn Baldwin
839bc3d5698SJohn Baldwin	subq	$152,%rsp
840bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	152
841bc3d5698SJohn Baldwin.Lmul_gather4_body:
842bc3d5698SJohn Baldwin	movd	%r9d,%xmm8
843bc3d5698SJohn Baldwin	movdqa	.Linc+16(%rip),%xmm1
844bc3d5698SJohn Baldwin	movdqa	.Linc(%rip),%xmm0
845bc3d5698SJohn Baldwin
846bc3d5698SJohn Baldwin	pshufd	$0,%xmm8,%xmm8
847bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
848bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm2
849bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm1
850bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm0
851bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
852bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm2
853bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm1
854bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
855bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm3
856bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm2
857bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
858bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm4
859bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm3
860bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm6
861bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
862bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm4
863bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm6
864bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm5
865bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm7
866bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm6
867bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm7
868bc3d5698SJohn Baldwin
869bc3d5698SJohn Baldwin	movdqa	0(%rdx),%xmm8
870bc3d5698SJohn Baldwin	movdqa	16(%rdx),%xmm9
871bc3d5698SJohn Baldwin	movdqa	32(%rdx),%xmm10
872bc3d5698SJohn Baldwin	movdqa	48(%rdx),%xmm11
873bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
874bc3d5698SJohn Baldwin	movdqa	64(%rdx),%xmm12
875bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
876bc3d5698SJohn Baldwin	movdqa	80(%rdx),%xmm13
877bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
878bc3d5698SJohn Baldwin	movdqa	96(%rdx),%xmm14
879bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
880bc3d5698SJohn Baldwin	movdqa	112(%rdx),%xmm15
881bc3d5698SJohn Baldwin	leaq	128(%rdx),%rbp
882bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
883bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
884bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
885bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
886bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
887bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
888bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
889bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
890bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
891bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
892bc3d5698SJohn Baldwin
893bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
894bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
895bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
896bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
897bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
898bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
899bc3d5698SJohn Baldwin	je	.Lmulx_gather
900bc3d5698SJohn Baldwin.byte	102,76,15,126,195
901bc3d5698SJohn Baldwin
902bc3d5698SJohn Baldwin	movq	%r8,128(%rsp)
903bc3d5698SJohn Baldwin	movq	%rdi,128+8(%rsp)
904bc3d5698SJohn Baldwin	movq	%rcx,128+16(%rsp)
905bc3d5698SJohn Baldwin
906bc3d5698SJohn Baldwin	movq	(%rsi),%rax
907bc3d5698SJohn Baldwin	movq	8(%rsi),%rcx
908bc3d5698SJohn Baldwin	mulq	%rbx
909bc3d5698SJohn Baldwin	movq	%rax,(%rsp)
910bc3d5698SJohn Baldwin	movq	%rcx,%rax
911bc3d5698SJohn Baldwin	movq	%rdx,%r8
912bc3d5698SJohn Baldwin
913bc3d5698SJohn Baldwin	mulq	%rbx
914bc3d5698SJohn Baldwin	addq	%rax,%r8
915bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
916bc3d5698SJohn Baldwin	movq	%rdx,%r9
917bc3d5698SJohn Baldwin	adcq	$0,%r9
918bc3d5698SJohn Baldwin
919bc3d5698SJohn Baldwin	mulq	%rbx
920bc3d5698SJohn Baldwin	addq	%rax,%r9
921bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
922bc3d5698SJohn Baldwin	movq	%rdx,%r10
923bc3d5698SJohn Baldwin	adcq	$0,%r10
924bc3d5698SJohn Baldwin
925bc3d5698SJohn Baldwin	mulq	%rbx
926bc3d5698SJohn Baldwin	addq	%rax,%r10
927bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
928bc3d5698SJohn Baldwin	movq	%rdx,%r11
929bc3d5698SJohn Baldwin	adcq	$0,%r11
930bc3d5698SJohn Baldwin
931bc3d5698SJohn Baldwin	mulq	%rbx
932bc3d5698SJohn Baldwin	addq	%rax,%r11
933bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
934bc3d5698SJohn Baldwin	movq	%rdx,%r12
935bc3d5698SJohn Baldwin	adcq	$0,%r12
936bc3d5698SJohn Baldwin
937bc3d5698SJohn Baldwin	mulq	%rbx
938bc3d5698SJohn Baldwin	addq	%rax,%r12
939bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
940bc3d5698SJohn Baldwin	movq	%rdx,%r13
941bc3d5698SJohn Baldwin	adcq	$0,%r13
942bc3d5698SJohn Baldwin
943bc3d5698SJohn Baldwin	mulq	%rbx
944bc3d5698SJohn Baldwin	addq	%rax,%r13
945bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
946bc3d5698SJohn Baldwin	movq	%rdx,%r14
947bc3d5698SJohn Baldwin	adcq	$0,%r14
948bc3d5698SJohn Baldwin
949bc3d5698SJohn Baldwin	mulq	%rbx
950bc3d5698SJohn Baldwin	addq	%rax,%r14
951bc3d5698SJohn Baldwin	movq	(%rsi),%rax
952bc3d5698SJohn Baldwin	movq	%rdx,%r15
953bc3d5698SJohn Baldwin	adcq	$0,%r15
954bc3d5698SJohn Baldwin
955bc3d5698SJohn Baldwin	leaq	8(%rsp),%rdi
956bc3d5698SJohn Baldwin	movl	$7,%ecx
957bc3d5698SJohn Baldwin	jmp	.Loop_mul_gather
958bc3d5698SJohn Baldwin
959bc3d5698SJohn Baldwin.align	32
960bc3d5698SJohn Baldwin.Loop_mul_gather:
961bc3d5698SJohn Baldwin	movdqa	0(%rbp),%xmm8
962bc3d5698SJohn Baldwin	movdqa	16(%rbp),%xmm9
963bc3d5698SJohn Baldwin	movdqa	32(%rbp),%xmm10
964bc3d5698SJohn Baldwin	movdqa	48(%rbp),%xmm11
965bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
966bc3d5698SJohn Baldwin	movdqa	64(%rbp),%xmm12
967bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
968bc3d5698SJohn Baldwin	movdqa	80(%rbp),%xmm13
969bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
970bc3d5698SJohn Baldwin	movdqa	96(%rbp),%xmm14
971bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
972bc3d5698SJohn Baldwin	movdqa	112(%rbp),%xmm15
973bc3d5698SJohn Baldwin	leaq	128(%rbp),%rbp
974bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
975bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
976bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
977bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
978bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
979bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
980bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
981bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
982bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
983bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
984bc3d5698SJohn Baldwin
985bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
986bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
987bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
988bc3d5698SJohn Baldwin.byte	102,76,15,126,195
989bc3d5698SJohn Baldwin
990bc3d5698SJohn Baldwin	mulq	%rbx
991bc3d5698SJohn Baldwin	addq	%rax,%r8
992bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
993bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
994bc3d5698SJohn Baldwin	movq	%rdx,%r8
995bc3d5698SJohn Baldwin	adcq	$0,%r8
996bc3d5698SJohn Baldwin
997bc3d5698SJohn Baldwin	mulq	%rbx
998bc3d5698SJohn Baldwin	addq	%rax,%r9
999bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
1000bc3d5698SJohn Baldwin	adcq	$0,%rdx
1001bc3d5698SJohn Baldwin	addq	%r9,%r8
1002bc3d5698SJohn Baldwin	movq	%rdx,%r9
1003bc3d5698SJohn Baldwin	adcq	$0,%r9
1004bc3d5698SJohn Baldwin
1005bc3d5698SJohn Baldwin	mulq	%rbx
1006bc3d5698SJohn Baldwin	addq	%rax,%r10
1007bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
1008bc3d5698SJohn Baldwin	adcq	$0,%rdx
1009bc3d5698SJohn Baldwin	addq	%r10,%r9
1010bc3d5698SJohn Baldwin	movq	%rdx,%r10
1011bc3d5698SJohn Baldwin	adcq	$0,%r10
1012bc3d5698SJohn Baldwin
1013bc3d5698SJohn Baldwin	mulq	%rbx
1014bc3d5698SJohn Baldwin	addq	%rax,%r11
1015bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
1016bc3d5698SJohn Baldwin	adcq	$0,%rdx
1017bc3d5698SJohn Baldwin	addq	%r11,%r10
1018bc3d5698SJohn Baldwin	movq	%rdx,%r11
1019bc3d5698SJohn Baldwin	adcq	$0,%r11
1020bc3d5698SJohn Baldwin
1021bc3d5698SJohn Baldwin	mulq	%rbx
1022bc3d5698SJohn Baldwin	addq	%rax,%r12
1023bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
1024bc3d5698SJohn Baldwin	adcq	$0,%rdx
1025bc3d5698SJohn Baldwin	addq	%r12,%r11
1026bc3d5698SJohn Baldwin	movq	%rdx,%r12
1027bc3d5698SJohn Baldwin	adcq	$0,%r12
1028bc3d5698SJohn Baldwin
1029bc3d5698SJohn Baldwin	mulq	%rbx
1030bc3d5698SJohn Baldwin	addq	%rax,%r13
1031bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
1032bc3d5698SJohn Baldwin	adcq	$0,%rdx
1033bc3d5698SJohn Baldwin	addq	%r13,%r12
1034bc3d5698SJohn Baldwin	movq	%rdx,%r13
1035bc3d5698SJohn Baldwin	adcq	$0,%r13
1036bc3d5698SJohn Baldwin
1037bc3d5698SJohn Baldwin	mulq	%rbx
1038bc3d5698SJohn Baldwin	addq	%rax,%r14
1039bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
1040bc3d5698SJohn Baldwin	adcq	$0,%rdx
1041bc3d5698SJohn Baldwin	addq	%r14,%r13
1042bc3d5698SJohn Baldwin	movq	%rdx,%r14
1043bc3d5698SJohn Baldwin	adcq	$0,%r14
1044bc3d5698SJohn Baldwin
1045bc3d5698SJohn Baldwin	mulq	%rbx
1046bc3d5698SJohn Baldwin	addq	%rax,%r15
1047bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1048bc3d5698SJohn Baldwin	adcq	$0,%rdx
1049bc3d5698SJohn Baldwin	addq	%r15,%r14
1050bc3d5698SJohn Baldwin	movq	%rdx,%r15
1051bc3d5698SJohn Baldwin	adcq	$0,%r15
1052bc3d5698SJohn Baldwin
1053bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
1054bc3d5698SJohn Baldwin
1055bc3d5698SJohn Baldwin	decl	%ecx
1056bc3d5698SJohn Baldwin	jnz	.Loop_mul_gather
1057bc3d5698SJohn Baldwin
1058bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1059bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1060bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1061bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1062bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1063bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1064bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1065bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1066bc3d5698SJohn Baldwin
1067bc3d5698SJohn Baldwin	movq	128+8(%rsp),%rdi
1068bc3d5698SJohn Baldwin	movq	128+16(%rsp),%rbp
1069bc3d5698SJohn Baldwin
1070bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1071bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1072bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1073bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1074bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1075bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1076bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1077bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1078bc3d5698SJohn Baldwin
1079bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
1080bc3d5698SJohn Baldwin	jmp	.Lmul_gather_tail
1081bc3d5698SJohn Baldwin
1082bc3d5698SJohn Baldwin.align	32
1083bc3d5698SJohn Baldwin.Lmulx_gather:
1084bc3d5698SJohn Baldwin.byte	102,76,15,126,194
1085bc3d5698SJohn Baldwin
1086bc3d5698SJohn Baldwin	movq	%r8,128(%rsp)
1087bc3d5698SJohn Baldwin	movq	%rdi,128+8(%rsp)
1088bc3d5698SJohn Baldwin	movq	%rcx,128+16(%rsp)
1089bc3d5698SJohn Baldwin
1090bc3d5698SJohn Baldwin	mulxq	(%rsi),%rbx,%r8
1091bc3d5698SJohn Baldwin	movq	%rbx,(%rsp)
1092bc3d5698SJohn Baldwin	xorl	%edi,%edi
1093bc3d5698SJohn Baldwin
1094bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1095bc3d5698SJohn Baldwin
1096bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rbx,%r10
1097bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1098bc3d5698SJohn Baldwin
1099bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1100bc3d5698SJohn Baldwin	adcxq	%rbx,%r9
1101bc3d5698SJohn Baldwin
1102bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rbx,%r12
1103bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1104bc3d5698SJohn Baldwin
1105bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1106bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
1107bc3d5698SJohn Baldwin
1108bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rbx,%r14
1109bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1110bc3d5698SJohn Baldwin
1111bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1112bc3d5698SJohn Baldwin	adcxq	%rbx,%r13
1113bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1114bc3d5698SJohn Baldwin.byte	0x67
1115bc3d5698SJohn Baldwin	movq	%r8,%rbx
1116bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1117bc3d5698SJohn Baldwin
1118bc3d5698SJohn Baldwin	movq	$-7,%rcx
1119bc3d5698SJohn Baldwin	jmp	.Loop_mulx_gather
1120bc3d5698SJohn Baldwin
1121bc3d5698SJohn Baldwin.align	32
1122bc3d5698SJohn Baldwin.Loop_mulx_gather:
1123bc3d5698SJohn Baldwin	movdqa	0(%rbp),%xmm8
1124bc3d5698SJohn Baldwin	movdqa	16(%rbp),%xmm9
1125bc3d5698SJohn Baldwin	movdqa	32(%rbp),%xmm10
1126bc3d5698SJohn Baldwin	movdqa	48(%rbp),%xmm11
1127bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
1128bc3d5698SJohn Baldwin	movdqa	64(%rbp),%xmm12
1129bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
1130bc3d5698SJohn Baldwin	movdqa	80(%rbp),%xmm13
1131bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
1132bc3d5698SJohn Baldwin	movdqa	96(%rbp),%xmm14
1133bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
1134bc3d5698SJohn Baldwin	movdqa	112(%rbp),%xmm15
1135bc3d5698SJohn Baldwin	leaq	128(%rbp),%rbp
1136bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
1137bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
1138bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
1139bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
1140bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
1141bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
1142bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
1143bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
1144bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
1145bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
1146bc3d5698SJohn Baldwin
1147bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
1148bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
1149bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
1150bc3d5698SJohn Baldwin.byte	102,76,15,126,194
1151bc3d5698SJohn Baldwin
1152bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1153bc3d5698SJohn Baldwin	adcxq	%rax,%rbx
1154bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1155bc3d5698SJohn Baldwin
1156bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1157bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1158bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1159bc3d5698SJohn Baldwin
1160bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rax,%r10
1161bc3d5698SJohn Baldwin	adcxq	%rax,%r9
1162bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1163bc3d5698SJohn Baldwin
1164bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1165bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1166bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1167bc3d5698SJohn Baldwin
1168bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%r12
1169bc3d5698SJohn Baldwin	adcxq	%rax,%r11
1170bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1171bc3d5698SJohn Baldwin
1172bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1173bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1174bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1175bc3d5698SJohn Baldwin
1176bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1177bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1178bc3d5698SJohn Baldwin.byte	0x67
1179bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1180bc3d5698SJohn Baldwin
1181bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1182bc3d5698SJohn Baldwin	movq	%rbx,64(%rsp,%rcx,8)
1183bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1184bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
1185bc3d5698SJohn Baldwin	movq	%r8,%rbx
1186bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1187bc3d5698SJohn Baldwin
1188bc3d5698SJohn Baldwin	incq	%rcx
1189bc3d5698SJohn Baldwin	jnz	.Loop_mulx_gather
1190bc3d5698SJohn Baldwin
1191bc3d5698SJohn Baldwin	movq	%r8,64(%rsp)
1192bc3d5698SJohn Baldwin	movq	%r9,64+8(%rsp)
1193bc3d5698SJohn Baldwin	movq	%r10,64+16(%rsp)
1194bc3d5698SJohn Baldwin	movq	%r11,64+24(%rsp)
1195bc3d5698SJohn Baldwin	movq	%r12,64+32(%rsp)
1196bc3d5698SJohn Baldwin	movq	%r13,64+40(%rsp)
1197bc3d5698SJohn Baldwin	movq	%r14,64+48(%rsp)
1198bc3d5698SJohn Baldwin	movq	%r15,64+56(%rsp)
1199bc3d5698SJohn Baldwin
1200bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
1201bc3d5698SJohn Baldwin	movq	128+8(%rsp),%rdi
1202bc3d5698SJohn Baldwin	movq	128+16(%rsp),%rbp
1203bc3d5698SJohn Baldwin
1204bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1205bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1206bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1207bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1208bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1209bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1210bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1211bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1212bc3d5698SJohn Baldwin
1213bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
1214bc3d5698SJohn Baldwin
1215bc3d5698SJohn Baldwin.Lmul_gather_tail:
1216bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
1217bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
1218bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
1219bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
1220bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
1221bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
1222bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
1223bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
1224bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
1225bc3d5698SJohn Baldwin
1226bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
1227bc3d5698SJohn Baldwin
1228bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
1229bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
1230bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
1231bc3d5698SJohn Baldwin.cfi_restore	%r15
1232bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
1233bc3d5698SJohn Baldwin.cfi_restore	%r14
1234bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
1235bc3d5698SJohn Baldwin.cfi_restore	%r13
1236bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
1237bc3d5698SJohn Baldwin.cfi_restore	%r12
1238bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
1239bc3d5698SJohn Baldwin.cfi_restore	%rbp
1240bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
1241bc3d5698SJohn Baldwin.cfi_restore	%rbx
1242bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
1243bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
1244bc3d5698SJohn Baldwin.Lmul_gather4_epilogue:
1245bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1246bc3d5698SJohn Baldwin.cfi_endproc
1247bc3d5698SJohn Baldwin.size	rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1248bc3d5698SJohn Baldwin.globl	rsaz_512_mul_scatter4
1249bc3d5698SJohn Baldwin.type	rsaz_512_mul_scatter4,@function
1250bc3d5698SJohn Baldwin.align	32
1251bc3d5698SJohn Baldwinrsaz_512_mul_scatter4:
1252bc3d5698SJohn Baldwin.cfi_startproc
1253bc3d5698SJohn Baldwin	pushq	%rbx
1254bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1255bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
1256bc3d5698SJohn Baldwin	pushq	%rbp
1257bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1258bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
1259bc3d5698SJohn Baldwin	pushq	%r12
1260bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1261bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
1262bc3d5698SJohn Baldwin	pushq	%r13
1263bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1264bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
1265bc3d5698SJohn Baldwin	pushq	%r14
1266bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1267bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
1268bc3d5698SJohn Baldwin	pushq	%r15
1269bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1270bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
1271bc3d5698SJohn Baldwin
1272bc3d5698SJohn Baldwin	movl	%r9d,%r9d
1273bc3d5698SJohn Baldwin	subq	$128+24,%rsp
1274bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
1275bc3d5698SJohn Baldwin.Lmul_scatter4_body:
1276bc3d5698SJohn Baldwin	leaq	(%r8,%r9,8),%r8
1277bc3d5698SJohn Baldwin.byte	102,72,15,110,199
1278bc3d5698SJohn Baldwin.byte	102,72,15,110,202
1279bc3d5698SJohn Baldwin.byte	102,73,15,110,208
1280bc3d5698SJohn Baldwin	movq	%rcx,128(%rsp)
1281bc3d5698SJohn Baldwin
1282bc3d5698SJohn Baldwin	movq	%rdi,%rbp
1283bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
1284bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
1285bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
1286bc3d5698SJohn Baldwin	je	.Lmulx_scatter
1287bc3d5698SJohn Baldwin	movq	(%rdi),%rbx
1288bc3d5698SJohn Baldwin	call	__rsaz_512_mul
1289bc3d5698SJohn Baldwin
1290bc3d5698SJohn Baldwin.byte	102,72,15,126,199
1291bc3d5698SJohn Baldwin.byte	102,72,15,126,205
1292bc3d5698SJohn Baldwin
1293bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1294bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1295bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1296bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1297bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1298bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1299bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1300bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1301bc3d5698SJohn Baldwin
1302bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
1303bc3d5698SJohn Baldwin	jmp	.Lmul_scatter_tail
1304bc3d5698SJohn Baldwin
1305bc3d5698SJohn Baldwin.align	32
1306bc3d5698SJohn Baldwin.Lmulx_scatter:
1307bc3d5698SJohn Baldwin	movq	(%rdi),%rdx
1308bc3d5698SJohn Baldwin	call	__rsaz_512_mulx
1309bc3d5698SJohn Baldwin
1310bc3d5698SJohn Baldwin.byte	102,72,15,126,199
1311bc3d5698SJohn Baldwin.byte	102,72,15,126,205
1312bc3d5698SJohn Baldwin
1313bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
1314bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1315bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1316bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1317bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1318bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1319bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1320bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1321bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1322bc3d5698SJohn Baldwin
1323bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
1324bc3d5698SJohn Baldwin
1325bc3d5698SJohn Baldwin.Lmul_scatter_tail:
1326bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
1327bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
1328bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
1329bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
1330bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
1331bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
1332bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
1333bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
1334bc3d5698SJohn Baldwin.byte	102,72,15,126,214
1335bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
1336bc3d5698SJohn Baldwin
1337bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
1338bc3d5698SJohn Baldwin
1339bc3d5698SJohn Baldwin	movq	%r8,0(%rsi)
1340bc3d5698SJohn Baldwin	movq	%r9,128(%rsi)
1341bc3d5698SJohn Baldwin	movq	%r10,256(%rsi)
1342bc3d5698SJohn Baldwin	movq	%r11,384(%rsi)
1343bc3d5698SJohn Baldwin	movq	%r12,512(%rsi)
1344bc3d5698SJohn Baldwin	movq	%r13,640(%rsi)
1345bc3d5698SJohn Baldwin	movq	%r14,768(%rsi)
1346bc3d5698SJohn Baldwin	movq	%r15,896(%rsi)
1347bc3d5698SJohn Baldwin
1348bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
1349bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
1350bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
1351bc3d5698SJohn Baldwin.cfi_restore	%r15
1352bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
1353bc3d5698SJohn Baldwin.cfi_restore	%r14
1354bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
1355bc3d5698SJohn Baldwin.cfi_restore	%r13
1356bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
1357bc3d5698SJohn Baldwin.cfi_restore	%r12
1358bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
1359bc3d5698SJohn Baldwin.cfi_restore	%rbp
1360bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
1361bc3d5698SJohn Baldwin.cfi_restore	%rbx
1362bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
1363bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
1364bc3d5698SJohn Baldwin.Lmul_scatter4_epilogue:
1365bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1366bc3d5698SJohn Baldwin.cfi_endproc
1367bc3d5698SJohn Baldwin.size	rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1368bc3d5698SJohn Baldwin.globl	rsaz_512_mul_by_one
1369bc3d5698SJohn Baldwin.type	rsaz_512_mul_by_one,@function
1370bc3d5698SJohn Baldwin.align	32
1371bc3d5698SJohn Baldwinrsaz_512_mul_by_one:
1372bc3d5698SJohn Baldwin.cfi_startproc
1373bc3d5698SJohn Baldwin	pushq	%rbx
1374bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1375bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
1376bc3d5698SJohn Baldwin	pushq	%rbp
1377bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1378bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
1379bc3d5698SJohn Baldwin	pushq	%r12
1380bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1381bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
1382bc3d5698SJohn Baldwin	pushq	%r13
1383bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1384bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
1385bc3d5698SJohn Baldwin	pushq	%r14
1386bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1387bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
1388bc3d5698SJohn Baldwin	pushq	%r15
1389bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1390bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
1391bc3d5698SJohn Baldwin
1392bc3d5698SJohn Baldwin	subq	$128+24,%rsp
1393bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
1394bc3d5698SJohn Baldwin.Lmul_by_one_body:
1395bc3d5698SJohn Baldwin	movl	OPENSSL_ia32cap_P+8(%rip),%eax
1396bc3d5698SJohn Baldwin	movq	%rdx,%rbp
1397bc3d5698SJohn Baldwin	movq	%rcx,128(%rsp)
1398bc3d5698SJohn Baldwin
1399bc3d5698SJohn Baldwin	movq	(%rsi),%r8
1400bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1401bc3d5698SJohn Baldwin	movq	8(%rsi),%r9
1402bc3d5698SJohn Baldwin	movq	16(%rsi),%r10
1403bc3d5698SJohn Baldwin	movq	24(%rsi),%r11
1404bc3d5698SJohn Baldwin	movq	32(%rsi),%r12
1405bc3d5698SJohn Baldwin	movq	40(%rsi),%r13
1406bc3d5698SJohn Baldwin	movq	48(%rsi),%r14
1407bc3d5698SJohn Baldwin	movq	56(%rsi),%r15
1408bc3d5698SJohn Baldwin
1409bc3d5698SJohn Baldwin	movdqa	%xmm0,(%rsp)
1410bc3d5698SJohn Baldwin	movdqa	%xmm0,16(%rsp)
1411bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%rsp)
1412bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%rsp)
1413bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%rsp)
1414bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%rsp)
1415bc3d5698SJohn Baldwin	movdqa	%xmm0,96(%rsp)
1416bc3d5698SJohn Baldwin	andl	$0x80100,%eax
1417bc3d5698SJohn Baldwin	cmpl	$0x80100,%eax
1418bc3d5698SJohn Baldwin	je	.Lby_one_callx
1419bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
1420bc3d5698SJohn Baldwin	jmp	.Lby_one_tail
1421bc3d5698SJohn Baldwin.align	32
1422bc3d5698SJohn Baldwin.Lby_one_callx:
1423bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
1424bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
1425bc3d5698SJohn Baldwin.Lby_one_tail:
1426bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1427bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1428bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1429bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1430bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1431bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1432bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1433bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1434bc3d5698SJohn Baldwin
1435bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
1436bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
1437bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
1438bc3d5698SJohn Baldwin.cfi_restore	%r15
1439bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
1440bc3d5698SJohn Baldwin.cfi_restore	%r14
1441bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
1442bc3d5698SJohn Baldwin.cfi_restore	%r13
1443bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
1444bc3d5698SJohn Baldwin.cfi_restore	%r12
1445bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
1446bc3d5698SJohn Baldwin.cfi_restore	%rbp
1447bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
1448bc3d5698SJohn Baldwin.cfi_restore	%rbx
1449bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
1450bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
1451bc3d5698SJohn Baldwin.Lmul_by_one_epilogue:
1452bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1453bc3d5698SJohn Baldwin.cfi_endproc
1454bc3d5698SJohn Baldwin.size	rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1455bc3d5698SJohn Baldwin.type	__rsaz_512_reduce,@function
1456bc3d5698SJohn Baldwin.align	32
1457bc3d5698SJohn Baldwin__rsaz_512_reduce:
1458bc3d5698SJohn Baldwin.cfi_startproc
1459bc3d5698SJohn Baldwin	movq	%r8,%rbx
1460bc3d5698SJohn Baldwin	imulq	128+8(%rsp),%rbx
1461bc3d5698SJohn Baldwin	movq	0(%rbp),%rax
1462bc3d5698SJohn Baldwin	movl	$8,%ecx
1463bc3d5698SJohn Baldwin	jmp	.Lreduction_loop
1464bc3d5698SJohn Baldwin
1465bc3d5698SJohn Baldwin.align	32
1466bc3d5698SJohn Baldwin.Lreduction_loop:
1467bc3d5698SJohn Baldwin	mulq	%rbx
1468bc3d5698SJohn Baldwin	movq	8(%rbp),%rax
1469bc3d5698SJohn Baldwin	negq	%r8
1470bc3d5698SJohn Baldwin	movq	%rdx,%r8
1471bc3d5698SJohn Baldwin	adcq	$0,%r8
1472bc3d5698SJohn Baldwin
1473bc3d5698SJohn Baldwin	mulq	%rbx
1474bc3d5698SJohn Baldwin	addq	%rax,%r9
1475bc3d5698SJohn Baldwin	movq	16(%rbp),%rax
1476bc3d5698SJohn Baldwin	adcq	$0,%rdx
1477bc3d5698SJohn Baldwin	addq	%r9,%r8
1478bc3d5698SJohn Baldwin	movq	%rdx,%r9
1479bc3d5698SJohn Baldwin	adcq	$0,%r9
1480bc3d5698SJohn Baldwin
1481bc3d5698SJohn Baldwin	mulq	%rbx
1482bc3d5698SJohn Baldwin	addq	%rax,%r10
1483bc3d5698SJohn Baldwin	movq	24(%rbp),%rax
1484bc3d5698SJohn Baldwin	adcq	$0,%rdx
1485bc3d5698SJohn Baldwin	addq	%r10,%r9
1486bc3d5698SJohn Baldwin	movq	%rdx,%r10
1487bc3d5698SJohn Baldwin	adcq	$0,%r10
1488bc3d5698SJohn Baldwin
1489bc3d5698SJohn Baldwin	mulq	%rbx
1490bc3d5698SJohn Baldwin	addq	%rax,%r11
1491bc3d5698SJohn Baldwin	movq	32(%rbp),%rax
1492bc3d5698SJohn Baldwin	adcq	$0,%rdx
1493bc3d5698SJohn Baldwin	addq	%r11,%r10
1494bc3d5698SJohn Baldwin	movq	128+8(%rsp),%rsi
1495bc3d5698SJohn Baldwin
1496bc3d5698SJohn Baldwin
1497bc3d5698SJohn Baldwin	adcq	$0,%rdx
1498bc3d5698SJohn Baldwin	movq	%rdx,%r11
1499bc3d5698SJohn Baldwin
1500bc3d5698SJohn Baldwin	mulq	%rbx
1501bc3d5698SJohn Baldwin	addq	%rax,%r12
1502bc3d5698SJohn Baldwin	movq	40(%rbp),%rax
1503bc3d5698SJohn Baldwin	adcq	$0,%rdx
1504bc3d5698SJohn Baldwin	imulq	%r8,%rsi
1505bc3d5698SJohn Baldwin	addq	%r12,%r11
1506bc3d5698SJohn Baldwin	movq	%rdx,%r12
1507bc3d5698SJohn Baldwin	adcq	$0,%r12
1508bc3d5698SJohn Baldwin
1509bc3d5698SJohn Baldwin	mulq	%rbx
1510bc3d5698SJohn Baldwin	addq	%rax,%r13
1511bc3d5698SJohn Baldwin	movq	48(%rbp),%rax
1512bc3d5698SJohn Baldwin	adcq	$0,%rdx
1513bc3d5698SJohn Baldwin	addq	%r13,%r12
1514bc3d5698SJohn Baldwin	movq	%rdx,%r13
1515bc3d5698SJohn Baldwin	adcq	$0,%r13
1516bc3d5698SJohn Baldwin
1517bc3d5698SJohn Baldwin	mulq	%rbx
1518bc3d5698SJohn Baldwin	addq	%rax,%r14
1519bc3d5698SJohn Baldwin	movq	56(%rbp),%rax
1520bc3d5698SJohn Baldwin	adcq	$0,%rdx
1521bc3d5698SJohn Baldwin	addq	%r14,%r13
1522bc3d5698SJohn Baldwin	movq	%rdx,%r14
1523bc3d5698SJohn Baldwin	adcq	$0,%r14
1524bc3d5698SJohn Baldwin
1525bc3d5698SJohn Baldwin	mulq	%rbx
1526bc3d5698SJohn Baldwin	movq	%rsi,%rbx
1527bc3d5698SJohn Baldwin	addq	%rax,%r15
1528bc3d5698SJohn Baldwin	movq	0(%rbp),%rax
1529bc3d5698SJohn Baldwin	adcq	$0,%rdx
1530bc3d5698SJohn Baldwin	addq	%r15,%r14
1531bc3d5698SJohn Baldwin	movq	%rdx,%r15
1532bc3d5698SJohn Baldwin	adcq	$0,%r15
1533bc3d5698SJohn Baldwin
1534bc3d5698SJohn Baldwin	decl	%ecx
1535bc3d5698SJohn Baldwin	jne	.Lreduction_loop
1536bc3d5698SJohn Baldwin
1537bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1538bc3d5698SJohn Baldwin.cfi_endproc
1539bc3d5698SJohn Baldwin.size	__rsaz_512_reduce,.-__rsaz_512_reduce
1540bc3d5698SJohn Baldwin.type	__rsaz_512_reducex,@function
1541bc3d5698SJohn Baldwin.align	32
1542bc3d5698SJohn Baldwin__rsaz_512_reducex:
1543bc3d5698SJohn Baldwin.cfi_startproc
1544bc3d5698SJohn Baldwin
1545bc3d5698SJohn Baldwin	imulq	%r8,%rdx
1546bc3d5698SJohn Baldwin	xorq	%rsi,%rsi
1547bc3d5698SJohn Baldwin	movl	$8,%ecx
1548bc3d5698SJohn Baldwin	jmp	.Lreduction_loopx
1549bc3d5698SJohn Baldwin
1550bc3d5698SJohn Baldwin.align	32
1551bc3d5698SJohn Baldwin.Lreduction_loopx:
1552bc3d5698SJohn Baldwin	movq	%r8,%rbx
1553bc3d5698SJohn Baldwin	mulxq	0(%rbp),%rax,%r8
1554bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
1555bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1556bc3d5698SJohn Baldwin
1557bc3d5698SJohn Baldwin	mulxq	8(%rbp),%rax,%r9
1558bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1559bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1560bc3d5698SJohn Baldwin
1561bc3d5698SJohn Baldwin	mulxq	16(%rbp),%rbx,%r10
1562bc3d5698SJohn Baldwin	adcxq	%rbx,%r9
1563bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1564bc3d5698SJohn Baldwin
1565bc3d5698SJohn Baldwin	mulxq	24(%rbp),%rbx,%r11
1566bc3d5698SJohn Baldwin	adcxq	%rbx,%r10
1567bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1568bc3d5698SJohn Baldwin
1569bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1570bc3d5698SJohn Baldwin	movq	%rdx,%rax
1571bc3d5698SJohn Baldwin	movq	%r8,%rdx
1572bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
1573bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1574bc3d5698SJohn Baldwin
1575bc3d5698SJohn Baldwin	mulxq	128+8(%rsp),%rbx,%rdx
1576bc3d5698SJohn Baldwin	movq	%rax,%rdx
1577bc3d5698SJohn Baldwin
1578bc3d5698SJohn Baldwin	mulxq	40(%rbp),%rax,%r13
1579bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1580bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1581bc3d5698SJohn Baldwin
1582bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1583bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1584bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1585bc3d5698SJohn Baldwin
1586bc3d5698SJohn Baldwin	mulxq	56(%rbp),%rax,%r15
1587bc3d5698SJohn Baldwin	movq	%rbx,%rdx
1588bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1589bc3d5698SJohn Baldwin	adoxq	%rsi,%r15
1590bc3d5698SJohn Baldwin	adcxq	%rsi,%r15
1591bc3d5698SJohn Baldwin
1592bc3d5698SJohn Baldwin	decl	%ecx
1593bc3d5698SJohn Baldwin	jne	.Lreduction_loopx
1594bc3d5698SJohn Baldwin
1595bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1596bc3d5698SJohn Baldwin.cfi_endproc
1597bc3d5698SJohn Baldwin.size	__rsaz_512_reducex,.-__rsaz_512_reducex
1598bc3d5698SJohn Baldwin.type	__rsaz_512_subtract,@function
1599bc3d5698SJohn Baldwin.align	32
1600bc3d5698SJohn Baldwin__rsaz_512_subtract:
1601bc3d5698SJohn Baldwin.cfi_startproc
1602bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1603bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1604bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1605bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1606bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1607bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1608bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1609bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1610bc3d5698SJohn Baldwin
1611bc3d5698SJohn Baldwin	movq	0(%rbp),%r8
1612bc3d5698SJohn Baldwin	movq	8(%rbp),%r9
1613bc3d5698SJohn Baldwin	negq	%r8
1614bc3d5698SJohn Baldwin	notq	%r9
1615bc3d5698SJohn Baldwin	andq	%rcx,%r8
1616bc3d5698SJohn Baldwin	movq	16(%rbp),%r10
1617bc3d5698SJohn Baldwin	andq	%rcx,%r9
1618bc3d5698SJohn Baldwin	notq	%r10
1619bc3d5698SJohn Baldwin	movq	24(%rbp),%r11
1620bc3d5698SJohn Baldwin	andq	%rcx,%r10
1621bc3d5698SJohn Baldwin	notq	%r11
1622bc3d5698SJohn Baldwin	movq	32(%rbp),%r12
1623bc3d5698SJohn Baldwin	andq	%rcx,%r11
1624bc3d5698SJohn Baldwin	notq	%r12
1625bc3d5698SJohn Baldwin	movq	40(%rbp),%r13
1626bc3d5698SJohn Baldwin	andq	%rcx,%r12
1627bc3d5698SJohn Baldwin	notq	%r13
1628bc3d5698SJohn Baldwin	movq	48(%rbp),%r14
1629bc3d5698SJohn Baldwin	andq	%rcx,%r13
1630bc3d5698SJohn Baldwin	notq	%r14
1631bc3d5698SJohn Baldwin	movq	56(%rbp),%r15
1632bc3d5698SJohn Baldwin	andq	%rcx,%r14
1633bc3d5698SJohn Baldwin	notq	%r15
1634bc3d5698SJohn Baldwin	andq	%rcx,%r15
1635bc3d5698SJohn Baldwin
1636bc3d5698SJohn Baldwin	addq	(%rdi),%r8
1637bc3d5698SJohn Baldwin	adcq	8(%rdi),%r9
1638bc3d5698SJohn Baldwin	adcq	16(%rdi),%r10
1639bc3d5698SJohn Baldwin	adcq	24(%rdi),%r11
1640bc3d5698SJohn Baldwin	adcq	32(%rdi),%r12
1641bc3d5698SJohn Baldwin	adcq	40(%rdi),%r13
1642bc3d5698SJohn Baldwin	adcq	48(%rdi),%r14
1643bc3d5698SJohn Baldwin	adcq	56(%rdi),%r15
1644bc3d5698SJohn Baldwin
1645bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1646bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1647bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1648bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1649bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1650bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1651bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1652bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1653bc3d5698SJohn Baldwin
1654bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1655bc3d5698SJohn Baldwin.cfi_endproc
1656bc3d5698SJohn Baldwin.size	__rsaz_512_subtract,.-__rsaz_512_subtract
1657bc3d5698SJohn Baldwin.type	__rsaz_512_mul,@function
1658bc3d5698SJohn Baldwin.align	32
1659bc3d5698SJohn Baldwin__rsaz_512_mul:
1660bc3d5698SJohn Baldwin.cfi_startproc
1661bc3d5698SJohn Baldwin	leaq	8(%rsp),%rdi
1662bc3d5698SJohn Baldwin
1663bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1664bc3d5698SJohn Baldwin	mulq	%rbx
1665bc3d5698SJohn Baldwin	movq	%rax,(%rdi)
1666bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
1667bc3d5698SJohn Baldwin	movq	%rdx,%r8
1668bc3d5698SJohn Baldwin
1669bc3d5698SJohn Baldwin	mulq	%rbx
1670bc3d5698SJohn Baldwin	addq	%rax,%r8
1671bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
1672bc3d5698SJohn Baldwin	movq	%rdx,%r9
1673bc3d5698SJohn Baldwin	adcq	$0,%r9
1674bc3d5698SJohn Baldwin
1675bc3d5698SJohn Baldwin	mulq	%rbx
1676bc3d5698SJohn Baldwin	addq	%rax,%r9
1677bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
1678bc3d5698SJohn Baldwin	movq	%rdx,%r10
1679bc3d5698SJohn Baldwin	adcq	$0,%r10
1680bc3d5698SJohn Baldwin
1681bc3d5698SJohn Baldwin	mulq	%rbx
1682bc3d5698SJohn Baldwin	addq	%rax,%r10
1683bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
1684bc3d5698SJohn Baldwin	movq	%rdx,%r11
1685bc3d5698SJohn Baldwin	adcq	$0,%r11
1686bc3d5698SJohn Baldwin
1687bc3d5698SJohn Baldwin	mulq	%rbx
1688bc3d5698SJohn Baldwin	addq	%rax,%r11
1689bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
1690bc3d5698SJohn Baldwin	movq	%rdx,%r12
1691bc3d5698SJohn Baldwin	adcq	$0,%r12
1692bc3d5698SJohn Baldwin
1693bc3d5698SJohn Baldwin	mulq	%rbx
1694bc3d5698SJohn Baldwin	addq	%rax,%r12
1695bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
1696bc3d5698SJohn Baldwin	movq	%rdx,%r13
1697bc3d5698SJohn Baldwin	adcq	$0,%r13
1698bc3d5698SJohn Baldwin
1699bc3d5698SJohn Baldwin	mulq	%rbx
1700bc3d5698SJohn Baldwin	addq	%rax,%r13
1701bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
1702bc3d5698SJohn Baldwin	movq	%rdx,%r14
1703bc3d5698SJohn Baldwin	adcq	$0,%r14
1704bc3d5698SJohn Baldwin
1705bc3d5698SJohn Baldwin	mulq	%rbx
1706bc3d5698SJohn Baldwin	addq	%rax,%r14
1707bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1708bc3d5698SJohn Baldwin	movq	%rdx,%r15
1709bc3d5698SJohn Baldwin	adcq	$0,%r15
1710bc3d5698SJohn Baldwin
1711bc3d5698SJohn Baldwin	leaq	8(%rbp),%rbp
1712bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
1713bc3d5698SJohn Baldwin
1714bc3d5698SJohn Baldwin	movl	$7,%ecx
1715bc3d5698SJohn Baldwin	jmp	.Loop_mul
1716bc3d5698SJohn Baldwin
1717bc3d5698SJohn Baldwin.align	32
1718bc3d5698SJohn Baldwin.Loop_mul:
1719bc3d5698SJohn Baldwin	movq	(%rbp),%rbx
1720bc3d5698SJohn Baldwin	mulq	%rbx
1721bc3d5698SJohn Baldwin	addq	%rax,%r8
1722bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
1723bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1724bc3d5698SJohn Baldwin	movq	%rdx,%r8
1725bc3d5698SJohn Baldwin	adcq	$0,%r8
1726bc3d5698SJohn Baldwin
1727bc3d5698SJohn Baldwin	mulq	%rbx
1728bc3d5698SJohn Baldwin	addq	%rax,%r9
1729bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
1730bc3d5698SJohn Baldwin	adcq	$0,%rdx
1731bc3d5698SJohn Baldwin	addq	%r9,%r8
1732bc3d5698SJohn Baldwin	movq	%rdx,%r9
1733bc3d5698SJohn Baldwin	adcq	$0,%r9
1734bc3d5698SJohn Baldwin
1735bc3d5698SJohn Baldwin	mulq	%rbx
1736bc3d5698SJohn Baldwin	addq	%rax,%r10
1737bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
1738bc3d5698SJohn Baldwin	adcq	$0,%rdx
1739bc3d5698SJohn Baldwin	addq	%r10,%r9
1740bc3d5698SJohn Baldwin	movq	%rdx,%r10
1741bc3d5698SJohn Baldwin	adcq	$0,%r10
1742bc3d5698SJohn Baldwin
1743bc3d5698SJohn Baldwin	mulq	%rbx
1744bc3d5698SJohn Baldwin	addq	%rax,%r11
1745bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
1746bc3d5698SJohn Baldwin	adcq	$0,%rdx
1747bc3d5698SJohn Baldwin	addq	%r11,%r10
1748bc3d5698SJohn Baldwin	movq	%rdx,%r11
1749bc3d5698SJohn Baldwin	adcq	$0,%r11
1750bc3d5698SJohn Baldwin
1751bc3d5698SJohn Baldwin	mulq	%rbx
1752bc3d5698SJohn Baldwin	addq	%rax,%r12
1753bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
1754bc3d5698SJohn Baldwin	adcq	$0,%rdx
1755bc3d5698SJohn Baldwin	addq	%r12,%r11
1756bc3d5698SJohn Baldwin	movq	%rdx,%r12
1757bc3d5698SJohn Baldwin	adcq	$0,%r12
1758bc3d5698SJohn Baldwin
1759bc3d5698SJohn Baldwin	mulq	%rbx
1760bc3d5698SJohn Baldwin	addq	%rax,%r13
1761bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
1762bc3d5698SJohn Baldwin	adcq	$0,%rdx
1763bc3d5698SJohn Baldwin	addq	%r13,%r12
1764bc3d5698SJohn Baldwin	movq	%rdx,%r13
1765bc3d5698SJohn Baldwin	adcq	$0,%r13
1766bc3d5698SJohn Baldwin
1767bc3d5698SJohn Baldwin	mulq	%rbx
1768bc3d5698SJohn Baldwin	addq	%rax,%r14
1769bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
1770bc3d5698SJohn Baldwin	adcq	$0,%rdx
1771bc3d5698SJohn Baldwin	addq	%r14,%r13
1772bc3d5698SJohn Baldwin	movq	%rdx,%r14
1773bc3d5698SJohn Baldwin	leaq	8(%rbp),%rbp
1774bc3d5698SJohn Baldwin	adcq	$0,%r14
1775bc3d5698SJohn Baldwin
1776bc3d5698SJohn Baldwin	mulq	%rbx
1777bc3d5698SJohn Baldwin	addq	%rax,%r15
1778bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1779bc3d5698SJohn Baldwin	adcq	$0,%rdx
1780bc3d5698SJohn Baldwin	addq	%r15,%r14
1781bc3d5698SJohn Baldwin	movq	%rdx,%r15
1782bc3d5698SJohn Baldwin	adcq	$0,%r15
1783bc3d5698SJohn Baldwin
1784bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
1785bc3d5698SJohn Baldwin
1786bc3d5698SJohn Baldwin	decl	%ecx
1787bc3d5698SJohn Baldwin	jnz	.Loop_mul
1788bc3d5698SJohn Baldwin
1789bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1790bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1791bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1792bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1793bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1794bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1795bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1796bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1797bc3d5698SJohn Baldwin
1798bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1799bc3d5698SJohn Baldwin.cfi_endproc
1800bc3d5698SJohn Baldwin.size	__rsaz_512_mul,.-__rsaz_512_mul
1801bc3d5698SJohn Baldwin.type	__rsaz_512_mulx,@function
1802bc3d5698SJohn Baldwin.align	32
1803bc3d5698SJohn Baldwin__rsaz_512_mulx:
1804bc3d5698SJohn Baldwin.cfi_startproc
1805bc3d5698SJohn Baldwin	mulxq	(%rsi),%rbx,%r8
1806bc3d5698SJohn Baldwin	movq	$-6,%rcx
1807bc3d5698SJohn Baldwin
1808bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1809bc3d5698SJohn Baldwin	movq	%rbx,8(%rsp)
1810bc3d5698SJohn Baldwin
1811bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rbx,%r10
1812bc3d5698SJohn Baldwin	adcq	%rax,%r8
1813bc3d5698SJohn Baldwin
1814bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1815bc3d5698SJohn Baldwin	adcq	%rbx,%r9
1816bc3d5698SJohn Baldwin
1817bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rbx,%r12
1818bc3d5698SJohn Baldwin	adcq	%rax,%r10
1819bc3d5698SJohn Baldwin
1820bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1821bc3d5698SJohn Baldwin	adcq	%rbx,%r11
1822bc3d5698SJohn Baldwin
1823bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rbx,%r14
1824bc3d5698SJohn Baldwin	adcq	%rax,%r12
1825bc3d5698SJohn Baldwin
1826bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1827bc3d5698SJohn Baldwin	movq	8(%rbp),%rdx
1828bc3d5698SJohn Baldwin	adcq	%rbx,%r13
1829bc3d5698SJohn Baldwin	adcq	%rax,%r14
1830bc3d5698SJohn Baldwin	adcq	$0,%r15
1831bc3d5698SJohn Baldwin
1832bc3d5698SJohn Baldwin	xorq	%rdi,%rdi
1833bc3d5698SJohn Baldwin	jmp	.Loop_mulx
1834bc3d5698SJohn Baldwin
1835bc3d5698SJohn Baldwin.align	32
1836bc3d5698SJohn Baldwin.Loop_mulx:
1837bc3d5698SJohn Baldwin	movq	%r8,%rbx
1838bc3d5698SJohn Baldwin	mulxq	(%rsi),%rax,%r8
1839bc3d5698SJohn Baldwin	adcxq	%rax,%rbx
1840bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1841bc3d5698SJohn Baldwin
1842bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1843bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1844bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1845bc3d5698SJohn Baldwin
1846bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rax,%r10
1847bc3d5698SJohn Baldwin	adcxq	%rax,%r9
1848bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1849bc3d5698SJohn Baldwin
1850bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1851bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1852bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1853bc3d5698SJohn Baldwin
1854bc3d5698SJohn Baldwin.byte	0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1855bc3d5698SJohn Baldwin	adcxq	%rax,%r11
1856bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1857bc3d5698SJohn Baldwin
1858bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1859bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1860bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1861bc3d5698SJohn Baldwin
1862bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rax,%r14
1863bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1864bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1865bc3d5698SJohn Baldwin
1866bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1867bc3d5698SJohn Baldwin	movq	64(%rbp,%rcx,8),%rdx
1868bc3d5698SJohn Baldwin	movq	%rbx,8+64-8(%rsp,%rcx,8)
1869bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1870bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
1871bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1872bc3d5698SJohn Baldwin
1873bc3d5698SJohn Baldwin	incq	%rcx
1874bc3d5698SJohn Baldwin	jnz	.Loop_mulx
1875bc3d5698SJohn Baldwin
1876bc3d5698SJohn Baldwin	movq	%r8,%rbx
1877bc3d5698SJohn Baldwin	mulxq	(%rsi),%rax,%r8
1878bc3d5698SJohn Baldwin	adcxq	%rax,%rbx
1879bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1880bc3d5698SJohn Baldwin
1881bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1882bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1883bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1884bc3d5698SJohn Baldwin
1885bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1886bc3d5698SJohn Baldwin	adcxq	%rax,%r9
1887bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1888bc3d5698SJohn Baldwin
1889bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1890bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1891bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1892bc3d5698SJohn Baldwin
1893bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%r12
1894bc3d5698SJohn Baldwin	adcxq	%rax,%r11
1895bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1896bc3d5698SJohn Baldwin
1897bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1898bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1899bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1900bc3d5698SJohn Baldwin
1901bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1902bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1903bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1904bc3d5698SJohn Baldwin
1905bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1906bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1907bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
1908bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1909bc3d5698SJohn Baldwin
1910bc3d5698SJohn Baldwin	movq	%rbx,8+64-8(%rsp)
1911bc3d5698SJohn Baldwin	movq	%r8,8+64(%rsp)
1912bc3d5698SJohn Baldwin	movq	%r9,8+64+8(%rsp)
1913bc3d5698SJohn Baldwin	movq	%r10,8+64+16(%rsp)
1914bc3d5698SJohn Baldwin	movq	%r11,8+64+24(%rsp)
1915bc3d5698SJohn Baldwin	movq	%r12,8+64+32(%rsp)
1916bc3d5698SJohn Baldwin	movq	%r13,8+64+40(%rsp)
1917bc3d5698SJohn Baldwin	movq	%r14,8+64+48(%rsp)
1918bc3d5698SJohn Baldwin	movq	%r15,8+64+56(%rsp)
1919bc3d5698SJohn Baldwin
1920bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1921bc3d5698SJohn Baldwin.cfi_endproc
1922bc3d5698SJohn Baldwin.size	__rsaz_512_mulx,.-__rsaz_512_mulx
1923bc3d5698SJohn Baldwin.globl	rsaz_512_scatter4
1924bc3d5698SJohn Baldwin.type	rsaz_512_scatter4,@function
1925bc3d5698SJohn Baldwin.align	16
1926bc3d5698SJohn Baldwinrsaz_512_scatter4:
1927bc3d5698SJohn Baldwin.cfi_startproc
1928bc3d5698SJohn Baldwin	leaq	(%rdi,%rdx,8),%rdi
1929bc3d5698SJohn Baldwin	movl	$8,%r9d
1930bc3d5698SJohn Baldwin	jmp	.Loop_scatter
1931bc3d5698SJohn Baldwin.align	16
1932bc3d5698SJohn Baldwin.Loop_scatter:
1933bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1934bc3d5698SJohn Baldwin	leaq	8(%rsi),%rsi
1935bc3d5698SJohn Baldwin	movq	%rax,(%rdi)
1936bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
1937bc3d5698SJohn Baldwin	decl	%r9d
1938bc3d5698SJohn Baldwin	jnz	.Loop_scatter
1939bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1940bc3d5698SJohn Baldwin.cfi_endproc
1941bc3d5698SJohn Baldwin.size	rsaz_512_scatter4,.-rsaz_512_scatter4
1942bc3d5698SJohn Baldwin
1943bc3d5698SJohn Baldwin.globl	rsaz_512_gather4
1944bc3d5698SJohn Baldwin.type	rsaz_512_gather4,@function
1945bc3d5698SJohn Baldwin.align	16
1946bc3d5698SJohn Baldwinrsaz_512_gather4:
1947bc3d5698SJohn Baldwin.cfi_startproc
1948bc3d5698SJohn Baldwin	movd	%edx,%xmm8
1949bc3d5698SJohn Baldwin	movdqa	.Linc+16(%rip),%xmm1
1950bc3d5698SJohn Baldwin	movdqa	.Linc(%rip),%xmm0
1951bc3d5698SJohn Baldwin
1952bc3d5698SJohn Baldwin	pshufd	$0,%xmm8,%xmm8
1953bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
1954bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm2
1955bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm1
1956bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm0
1957bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
1958bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm2
1959bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm1
1960bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
1961bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm3
1962bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm2
1963bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1964bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm4
1965bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm3
1966bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm6
1967bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
1968bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm4
1969bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm6
1970bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm5
1971bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm7
1972bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm6
1973bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm7
1974bc3d5698SJohn Baldwin	movl	$8,%r9d
1975bc3d5698SJohn Baldwin	jmp	.Loop_gather
1976bc3d5698SJohn Baldwin.align	16
1977bc3d5698SJohn Baldwin.Loop_gather:
1978bc3d5698SJohn Baldwin	movdqa	0(%rsi),%xmm8
1979bc3d5698SJohn Baldwin	movdqa	16(%rsi),%xmm9
1980bc3d5698SJohn Baldwin	movdqa	32(%rsi),%xmm10
1981bc3d5698SJohn Baldwin	movdqa	48(%rsi),%xmm11
1982bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
1983bc3d5698SJohn Baldwin	movdqa	64(%rsi),%xmm12
1984bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
1985bc3d5698SJohn Baldwin	movdqa	80(%rsi),%xmm13
1986bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
1987bc3d5698SJohn Baldwin	movdqa	96(%rsi),%xmm14
1988bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
1989bc3d5698SJohn Baldwin	movdqa	112(%rsi),%xmm15
1990bc3d5698SJohn Baldwin	leaq	128(%rsi),%rsi
1991bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
1992bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
1993bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
1994bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
1995bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
1996bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
1997bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
1998bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
1999bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
2000bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
2001bc3d5698SJohn Baldwin
2002bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
2003bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
2004bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
2005bc3d5698SJohn Baldwin	movq	%xmm8,(%rdi)
2006bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
2007bc3d5698SJohn Baldwin	decl	%r9d
2008bc3d5698SJohn Baldwin	jnz	.Loop_gather
2009bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
2010bc3d5698SJohn Baldwin.LSEH_end_rsaz_512_gather4:
2011bc3d5698SJohn Baldwin.cfi_endproc
2012bc3d5698SJohn Baldwin.size	rsaz_512_gather4,.-rsaz_512_gather4
2013bc3d5698SJohn Baldwin
2014bc3d5698SJohn Baldwin.align	64
2015bc3d5698SJohn Baldwin.Linc:
2016bc3d5698SJohn Baldwin.long	0,0, 1,1
2017bc3d5698SJohn Baldwin.long	2,2, 2,2
2018