xref: /freebsd/sys/crypto/openssl/amd64/rsaz-x86_64.S (revision c0855eaa)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
2bc3d5698SJohn Baldwin.text
3bc3d5698SJohn Baldwin
4bc3d5698SJohn Baldwin
5bc3d5698SJohn Baldwin
6bc3d5698SJohn Baldwin.globl	rsaz_512_sqr
7bc3d5698SJohn Baldwin.type	rsaz_512_sqr,@function
8bc3d5698SJohn Baldwin.align	32
9bc3d5698SJohn Baldwinrsaz_512_sqr:
10bc3d5698SJohn Baldwin.cfi_startproc
11bc3d5698SJohn Baldwin	pushq	%rbx
12bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
13bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
14bc3d5698SJohn Baldwin	pushq	%rbp
15bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
16bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
17bc3d5698SJohn Baldwin	pushq	%r12
18bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
19bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
20bc3d5698SJohn Baldwin	pushq	%r13
21bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
22bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
23bc3d5698SJohn Baldwin	pushq	%r14
24bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
25bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
26bc3d5698SJohn Baldwin	pushq	%r15
27bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
28bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
29bc3d5698SJohn Baldwin
30bc3d5698SJohn Baldwin	subq	$128+24,%rsp
31bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
32bc3d5698SJohn Baldwin.Lsqr_body:
33bc3d5698SJohn Baldwin.byte	102,72,15,110,202
34bc3d5698SJohn Baldwin	movq	(%rsi),%rdx
35bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
36bc3d5698SJohn Baldwin	movq	%rcx,128(%rsp)
37bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
38bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
39bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
40bc3d5698SJohn Baldwin	je	.Loop_sqrx
41bc3d5698SJohn Baldwin	jmp	.Loop_sqr
42bc3d5698SJohn Baldwin
43bc3d5698SJohn Baldwin.align	32
44bc3d5698SJohn Baldwin.Loop_sqr:
45bc3d5698SJohn Baldwin	movl	%r8d,128+8(%rsp)
46bc3d5698SJohn Baldwin
47bc3d5698SJohn Baldwin	movq	%rdx,%rbx
48bc3d5698SJohn Baldwin	movq	%rax,%rbp
49bc3d5698SJohn Baldwin	mulq	%rdx
50bc3d5698SJohn Baldwin	movq	%rax,%r8
51bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
52bc3d5698SJohn Baldwin	movq	%rdx,%r9
53bc3d5698SJohn Baldwin
54bc3d5698SJohn Baldwin	mulq	%rbx
55bc3d5698SJohn Baldwin	addq	%rax,%r9
56bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
57bc3d5698SJohn Baldwin	movq	%rdx,%r10
58bc3d5698SJohn Baldwin	adcq	$0,%r10
59bc3d5698SJohn Baldwin
60bc3d5698SJohn Baldwin	mulq	%rbx
61bc3d5698SJohn Baldwin	addq	%rax,%r10
62bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
63bc3d5698SJohn Baldwin	movq	%rdx,%r11
64bc3d5698SJohn Baldwin	adcq	$0,%r11
65bc3d5698SJohn Baldwin
66bc3d5698SJohn Baldwin	mulq	%rbx
67bc3d5698SJohn Baldwin	addq	%rax,%r11
68bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
69bc3d5698SJohn Baldwin	movq	%rdx,%r12
70bc3d5698SJohn Baldwin	adcq	$0,%r12
71bc3d5698SJohn Baldwin
72bc3d5698SJohn Baldwin	mulq	%rbx
73bc3d5698SJohn Baldwin	addq	%rax,%r12
74bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
75bc3d5698SJohn Baldwin	movq	%rdx,%r13
76bc3d5698SJohn Baldwin	adcq	$0,%r13
77bc3d5698SJohn Baldwin
78bc3d5698SJohn Baldwin	mulq	%rbx
79bc3d5698SJohn Baldwin	addq	%rax,%r13
80bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
81bc3d5698SJohn Baldwin	movq	%rdx,%r14
82bc3d5698SJohn Baldwin	adcq	$0,%r14
83bc3d5698SJohn Baldwin
84bc3d5698SJohn Baldwin	mulq	%rbx
85bc3d5698SJohn Baldwin	addq	%rax,%r14
86bc3d5698SJohn Baldwin	movq	%rbx,%rax
87bc3d5698SJohn Baldwin	adcq	$0,%rdx
88bc3d5698SJohn Baldwin
89bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
90bc3d5698SJohn Baldwin	addq	%r8,%r8
91bc3d5698SJohn Baldwin	movq	%rdx,%r15
92bc3d5698SJohn Baldwin	adcq	$0,%rcx
93bc3d5698SJohn Baldwin
94bc3d5698SJohn Baldwin	mulq	%rax
95bc3d5698SJohn Baldwin	addq	%r8,%rdx
96bc3d5698SJohn Baldwin	adcq	$0,%rcx
97bc3d5698SJohn Baldwin
98bc3d5698SJohn Baldwin	movq	%rax,(%rsp)
99bc3d5698SJohn Baldwin	movq	%rdx,8(%rsp)
100bc3d5698SJohn Baldwin
101bc3d5698SJohn Baldwin
102bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
103bc3d5698SJohn Baldwin	mulq	%rbp
104bc3d5698SJohn Baldwin	addq	%rax,%r10
105bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
106bc3d5698SJohn Baldwin	movq	%rdx,%rbx
107bc3d5698SJohn Baldwin	adcq	$0,%rbx
108bc3d5698SJohn Baldwin
109bc3d5698SJohn Baldwin	mulq	%rbp
110bc3d5698SJohn Baldwin	addq	%rax,%r11
111bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
112bc3d5698SJohn Baldwin	adcq	$0,%rdx
113bc3d5698SJohn Baldwin	addq	%rbx,%r11
114bc3d5698SJohn Baldwin	movq	%rdx,%rbx
115bc3d5698SJohn Baldwin	adcq	$0,%rbx
116bc3d5698SJohn Baldwin
117bc3d5698SJohn Baldwin	mulq	%rbp
118bc3d5698SJohn Baldwin	addq	%rax,%r12
119bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
120bc3d5698SJohn Baldwin	adcq	$0,%rdx
121bc3d5698SJohn Baldwin	addq	%rbx,%r12
122bc3d5698SJohn Baldwin	movq	%rdx,%rbx
123bc3d5698SJohn Baldwin	adcq	$0,%rbx
124bc3d5698SJohn Baldwin
125bc3d5698SJohn Baldwin	mulq	%rbp
126bc3d5698SJohn Baldwin	addq	%rax,%r13
127bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
128bc3d5698SJohn Baldwin	adcq	$0,%rdx
129bc3d5698SJohn Baldwin	addq	%rbx,%r13
130bc3d5698SJohn Baldwin	movq	%rdx,%rbx
131bc3d5698SJohn Baldwin	adcq	$0,%rbx
132bc3d5698SJohn Baldwin
133bc3d5698SJohn Baldwin	mulq	%rbp
134bc3d5698SJohn Baldwin	addq	%rax,%r14
135bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
136bc3d5698SJohn Baldwin	adcq	$0,%rdx
137bc3d5698SJohn Baldwin	addq	%rbx,%r14
138bc3d5698SJohn Baldwin	movq	%rdx,%rbx
139bc3d5698SJohn Baldwin	adcq	$0,%rbx
140bc3d5698SJohn Baldwin
141bc3d5698SJohn Baldwin	mulq	%rbp
142bc3d5698SJohn Baldwin	addq	%rax,%r15
143bc3d5698SJohn Baldwin	movq	%rbp,%rax
144bc3d5698SJohn Baldwin	adcq	$0,%rdx
145bc3d5698SJohn Baldwin	addq	%rbx,%r15
146bc3d5698SJohn Baldwin	adcq	$0,%rdx
147bc3d5698SJohn Baldwin
148bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
149bc3d5698SJohn Baldwin	addq	%r9,%r9
150bc3d5698SJohn Baldwin	movq	%rdx,%r8
151bc3d5698SJohn Baldwin	adcq	%r10,%r10
152bc3d5698SJohn Baldwin	adcq	$0,%rbx
153bc3d5698SJohn Baldwin
154bc3d5698SJohn Baldwin	mulq	%rax
155bc3d5698SJohn Baldwin
156bc3d5698SJohn Baldwin	addq	%rcx,%rax
157bc3d5698SJohn Baldwin	movq	16(%rsi),%rbp
158bc3d5698SJohn Baldwin	addq	%rax,%r9
159bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
160bc3d5698SJohn Baldwin	adcq	%rdx,%r10
161bc3d5698SJohn Baldwin	adcq	$0,%rbx
162bc3d5698SJohn Baldwin
163bc3d5698SJohn Baldwin	movq	%r9,16(%rsp)
164bc3d5698SJohn Baldwin	movq	%r10,24(%rsp)
165bc3d5698SJohn Baldwin
166bc3d5698SJohn Baldwin
167bc3d5698SJohn Baldwin	mulq	%rbp
168bc3d5698SJohn Baldwin	addq	%rax,%r12
169bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
170bc3d5698SJohn Baldwin	movq	%rdx,%rcx
171bc3d5698SJohn Baldwin	adcq	$0,%rcx
172bc3d5698SJohn Baldwin
173bc3d5698SJohn Baldwin	mulq	%rbp
174bc3d5698SJohn Baldwin	addq	%rax,%r13
175bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
176bc3d5698SJohn Baldwin	adcq	$0,%rdx
177bc3d5698SJohn Baldwin	addq	%rcx,%r13
178bc3d5698SJohn Baldwin	movq	%rdx,%rcx
179bc3d5698SJohn Baldwin	adcq	$0,%rcx
180bc3d5698SJohn Baldwin
181bc3d5698SJohn Baldwin	mulq	%rbp
182bc3d5698SJohn Baldwin	addq	%rax,%r14
183bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
184bc3d5698SJohn Baldwin	adcq	$0,%rdx
185bc3d5698SJohn Baldwin	addq	%rcx,%r14
186bc3d5698SJohn Baldwin	movq	%rdx,%rcx
187bc3d5698SJohn Baldwin	adcq	$0,%rcx
188bc3d5698SJohn Baldwin
189bc3d5698SJohn Baldwin	mulq	%rbp
190bc3d5698SJohn Baldwin	addq	%rax,%r15
191bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
192bc3d5698SJohn Baldwin	adcq	$0,%rdx
193bc3d5698SJohn Baldwin	addq	%rcx,%r15
194bc3d5698SJohn Baldwin	movq	%rdx,%rcx
195bc3d5698SJohn Baldwin	adcq	$0,%rcx
196bc3d5698SJohn Baldwin
197bc3d5698SJohn Baldwin	mulq	%rbp
198bc3d5698SJohn Baldwin	addq	%rax,%r8
199bc3d5698SJohn Baldwin	movq	%rbp,%rax
200bc3d5698SJohn Baldwin	adcq	$0,%rdx
201bc3d5698SJohn Baldwin	addq	%rcx,%r8
202bc3d5698SJohn Baldwin	adcq	$0,%rdx
203bc3d5698SJohn Baldwin
204bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
205bc3d5698SJohn Baldwin	addq	%r11,%r11
206bc3d5698SJohn Baldwin	movq	%rdx,%r9
207bc3d5698SJohn Baldwin	adcq	%r12,%r12
208bc3d5698SJohn Baldwin	adcq	$0,%rcx
209bc3d5698SJohn Baldwin
210bc3d5698SJohn Baldwin	mulq	%rax
211bc3d5698SJohn Baldwin
212bc3d5698SJohn Baldwin	addq	%rbx,%rax
213bc3d5698SJohn Baldwin	movq	24(%rsi),%r10
214bc3d5698SJohn Baldwin	addq	%rax,%r11
215bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
216bc3d5698SJohn Baldwin	adcq	%rdx,%r12
217bc3d5698SJohn Baldwin	adcq	$0,%rcx
218bc3d5698SJohn Baldwin
219bc3d5698SJohn Baldwin	movq	%r11,32(%rsp)
220bc3d5698SJohn Baldwin	movq	%r12,40(%rsp)
221bc3d5698SJohn Baldwin
222bc3d5698SJohn Baldwin
223bc3d5698SJohn Baldwin	movq	%rax,%r11
224bc3d5698SJohn Baldwin	mulq	%r10
225bc3d5698SJohn Baldwin	addq	%rax,%r14
226bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
227bc3d5698SJohn Baldwin	movq	%rdx,%rbx
228bc3d5698SJohn Baldwin	adcq	$0,%rbx
229bc3d5698SJohn Baldwin
230bc3d5698SJohn Baldwin	movq	%rax,%r12
231bc3d5698SJohn Baldwin	mulq	%r10
232bc3d5698SJohn Baldwin	addq	%rax,%r15
233bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
234bc3d5698SJohn Baldwin	adcq	$0,%rdx
235bc3d5698SJohn Baldwin	addq	%rbx,%r15
236bc3d5698SJohn Baldwin	movq	%rdx,%rbx
237bc3d5698SJohn Baldwin	adcq	$0,%rbx
238bc3d5698SJohn Baldwin
239bc3d5698SJohn Baldwin	movq	%rax,%rbp
240bc3d5698SJohn Baldwin	mulq	%r10
241bc3d5698SJohn Baldwin	addq	%rax,%r8
242bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
243bc3d5698SJohn Baldwin	adcq	$0,%rdx
244bc3d5698SJohn Baldwin	addq	%rbx,%r8
245bc3d5698SJohn Baldwin	movq	%rdx,%rbx
246bc3d5698SJohn Baldwin	adcq	$0,%rbx
247bc3d5698SJohn Baldwin
248bc3d5698SJohn Baldwin	mulq	%r10
249bc3d5698SJohn Baldwin	addq	%rax,%r9
250bc3d5698SJohn Baldwin	movq	%r10,%rax
251bc3d5698SJohn Baldwin	adcq	$0,%rdx
252bc3d5698SJohn Baldwin	addq	%rbx,%r9
253bc3d5698SJohn Baldwin	adcq	$0,%rdx
254bc3d5698SJohn Baldwin
255bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
256bc3d5698SJohn Baldwin	addq	%r13,%r13
257bc3d5698SJohn Baldwin	movq	%rdx,%r10
258bc3d5698SJohn Baldwin	adcq	%r14,%r14
259bc3d5698SJohn Baldwin	adcq	$0,%rbx
260bc3d5698SJohn Baldwin
261bc3d5698SJohn Baldwin	mulq	%rax
262bc3d5698SJohn Baldwin
263bc3d5698SJohn Baldwin	addq	%rcx,%rax
264bc3d5698SJohn Baldwin	addq	%rax,%r13
265bc3d5698SJohn Baldwin	movq	%r12,%rax
266bc3d5698SJohn Baldwin	adcq	%rdx,%r14
267bc3d5698SJohn Baldwin	adcq	$0,%rbx
268bc3d5698SJohn Baldwin
269bc3d5698SJohn Baldwin	movq	%r13,48(%rsp)
270bc3d5698SJohn Baldwin	movq	%r14,56(%rsp)
271bc3d5698SJohn Baldwin
272bc3d5698SJohn Baldwin
273bc3d5698SJohn Baldwin	mulq	%r11
274bc3d5698SJohn Baldwin	addq	%rax,%r8
275bc3d5698SJohn Baldwin	movq	%rbp,%rax
276bc3d5698SJohn Baldwin	movq	%rdx,%rcx
277bc3d5698SJohn Baldwin	adcq	$0,%rcx
278bc3d5698SJohn Baldwin
279bc3d5698SJohn Baldwin	mulq	%r11
280bc3d5698SJohn Baldwin	addq	%rax,%r9
281bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
282bc3d5698SJohn Baldwin	adcq	$0,%rdx
283bc3d5698SJohn Baldwin	addq	%rcx,%r9
284bc3d5698SJohn Baldwin	movq	%rdx,%rcx
285bc3d5698SJohn Baldwin	adcq	$0,%rcx
286bc3d5698SJohn Baldwin
287bc3d5698SJohn Baldwin	movq	%rax,%r14
288bc3d5698SJohn Baldwin	mulq	%r11
289bc3d5698SJohn Baldwin	addq	%rax,%r10
290bc3d5698SJohn Baldwin	movq	%r11,%rax
291bc3d5698SJohn Baldwin	adcq	$0,%rdx
292bc3d5698SJohn Baldwin	addq	%rcx,%r10
293bc3d5698SJohn Baldwin	adcq	$0,%rdx
294bc3d5698SJohn Baldwin
295bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
296bc3d5698SJohn Baldwin	addq	%r15,%r15
297bc3d5698SJohn Baldwin	movq	%rdx,%r11
298bc3d5698SJohn Baldwin	adcq	%r8,%r8
299bc3d5698SJohn Baldwin	adcq	$0,%rcx
300bc3d5698SJohn Baldwin
301bc3d5698SJohn Baldwin	mulq	%rax
302bc3d5698SJohn Baldwin
303bc3d5698SJohn Baldwin	addq	%rbx,%rax
304bc3d5698SJohn Baldwin	addq	%rax,%r15
305bc3d5698SJohn Baldwin	movq	%rbp,%rax
306bc3d5698SJohn Baldwin	adcq	%rdx,%r8
307bc3d5698SJohn Baldwin	adcq	$0,%rcx
308bc3d5698SJohn Baldwin
309bc3d5698SJohn Baldwin	movq	%r15,64(%rsp)
310bc3d5698SJohn Baldwin	movq	%r8,72(%rsp)
311bc3d5698SJohn Baldwin
312bc3d5698SJohn Baldwin
313bc3d5698SJohn Baldwin	mulq	%r12
314bc3d5698SJohn Baldwin	addq	%rax,%r10
315bc3d5698SJohn Baldwin	movq	%r14,%rax
316bc3d5698SJohn Baldwin	movq	%rdx,%rbx
317bc3d5698SJohn Baldwin	adcq	$0,%rbx
318bc3d5698SJohn Baldwin
319bc3d5698SJohn Baldwin	mulq	%r12
320bc3d5698SJohn Baldwin	addq	%rax,%r11
321bc3d5698SJohn Baldwin	movq	%r12,%rax
322bc3d5698SJohn Baldwin	adcq	$0,%rdx
323bc3d5698SJohn Baldwin	addq	%rbx,%r11
324bc3d5698SJohn Baldwin	adcq	$0,%rdx
325bc3d5698SJohn Baldwin
326bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
327bc3d5698SJohn Baldwin	addq	%r9,%r9
328bc3d5698SJohn Baldwin	movq	%rdx,%r12
329bc3d5698SJohn Baldwin	adcq	%r10,%r10
330bc3d5698SJohn Baldwin	adcq	$0,%rbx
331bc3d5698SJohn Baldwin
332bc3d5698SJohn Baldwin	mulq	%rax
333bc3d5698SJohn Baldwin
334bc3d5698SJohn Baldwin	addq	%rcx,%rax
335bc3d5698SJohn Baldwin	addq	%rax,%r9
336bc3d5698SJohn Baldwin	movq	%r14,%rax
337bc3d5698SJohn Baldwin	adcq	%rdx,%r10
338bc3d5698SJohn Baldwin	adcq	$0,%rbx
339bc3d5698SJohn Baldwin
340bc3d5698SJohn Baldwin	movq	%r9,80(%rsp)
341bc3d5698SJohn Baldwin	movq	%r10,88(%rsp)
342bc3d5698SJohn Baldwin
343bc3d5698SJohn Baldwin
344bc3d5698SJohn Baldwin	mulq	%rbp
345bc3d5698SJohn Baldwin	addq	%rax,%r12
346bc3d5698SJohn Baldwin	movq	%rbp,%rax
347bc3d5698SJohn Baldwin	adcq	$0,%rdx
348bc3d5698SJohn Baldwin
349bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
350bc3d5698SJohn Baldwin	addq	%r11,%r11
351bc3d5698SJohn Baldwin	movq	%rdx,%r13
352bc3d5698SJohn Baldwin	adcq	%r12,%r12
353bc3d5698SJohn Baldwin	adcq	$0,%rcx
354bc3d5698SJohn Baldwin
355bc3d5698SJohn Baldwin	mulq	%rax
356bc3d5698SJohn Baldwin
357bc3d5698SJohn Baldwin	addq	%rbx,%rax
358bc3d5698SJohn Baldwin	addq	%rax,%r11
359bc3d5698SJohn Baldwin	movq	%r14,%rax
360bc3d5698SJohn Baldwin	adcq	%rdx,%r12
361bc3d5698SJohn Baldwin	adcq	$0,%rcx
362bc3d5698SJohn Baldwin
363bc3d5698SJohn Baldwin	movq	%r11,96(%rsp)
364bc3d5698SJohn Baldwin	movq	%r12,104(%rsp)
365bc3d5698SJohn Baldwin
366bc3d5698SJohn Baldwin
367bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
368bc3d5698SJohn Baldwin	addq	%r13,%r13
369bc3d5698SJohn Baldwin	adcq	$0,%rbx
370bc3d5698SJohn Baldwin
371bc3d5698SJohn Baldwin	mulq	%rax
372bc3d5698SJohn Baldwin
373bc3d5698SJohn Baldwin	addq	%rcx,%rax
374bc3d5698SJohn Baldwin	addq	%r13,%rax
375bc3d5698SJohn Baldwin	adcq	%rbx,%rdx
376bc3d5698SJohn Baldwin
377bc3d5698SJohn Baldwin	movq	(%rsp),%r8
378bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
379bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
380bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
381bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
382bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
383bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
384bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
385bc3d5698SJohn Baldwin.byte	102,72,15,126,205
386bc3d5698SJohn Baldwin
387bc3d5698SJohn Baldwin	movq	%rax,112(%rsp)
388bc3d5698SJohn Baldwin	movq	%rdx,120(%rsp)
389bc3d5698SJohn Baldwin
390bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
391bc3d5698SJohn Baldwin
392bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
393bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
394bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
395bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
396bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
397bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
398bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
399bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
400bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
401bc3d5698SJohn Baldwin
402bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
403bc3d5698SJohn Baldwin
404bc3d5698SJohn Baldwin	movq	%r8,%rdx
405bc3d5698SJohn Baldwin	movq	%r9,%rax
406bc3d5698SJohn Baldwin	movl	128+8(%rsp),%r8d
407bc3d5698SJohn Baldwin	movq	%rdi,%rsi
408bc3d5698SJohn Baldwin
409bc3d5698SJohn Baldwin	decl	%r8d
410bc3d5698SJohn Baldwin	jnz	.Loop_sqr
411bc3d5698SJohn Baldwin	jmp	.Lsqr_tail
412bc3d5698SJohn Baldwin
413bc3d5698SJohn Baldwin.align	32
414bc3d5698SJohn Baldwin.Loop_sqrx:
415bc3d5698SJohn Baldwin	movl	%r8d,128+8(%rsp)
416bc3d5698SJohn Baldwin.byte	102,72,15,110,199
417bc3d5698SJohn Baldwin
418bc3d5698SJohn Baldwin	mulxq	%rax,%r8,%r9
419bc3d5698SJohn Baldwin	movq	%rax,%rbx
420bc3d5698SJohn Baldwin
421bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rcx,%r10
422bc3d5698SJohn Baldwin	xorq	%rbp,%rbp
423bc3d5698SJohn Baldwin
424bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
425bc3d5698SJohn Baldwin	adcxq	%rcx,%r9
426bc3d5698SJohn Baldwin
427bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
428bc3d5698SJohn Baldwin	adcxq	%rax,%r10
429bc3d5698SJohn Baldwin
430bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
431bc3d5698SJohn Baldwin	adcxq	%rcx,%r11
432bc3d5698SJohn Baldwin
433bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rcx,%r14
434bc3d5698SJohn Baldwin	adcxq	%rax,%r12
435bc3d5698SJohn Baldwin	adcxq	%rcx,%r13
436bc3d5698SJohn Baldwin
437bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
438bc3d5698SJohn Baldwin	adcxq	%rax,%r14
439bc3d5698SJohn Baldwin	adcxq	%rbp,%r15
440bc3d5698SJohn Baldwin
441bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
442bc3d5698SJohn Baldwin	movq	%rbx,%rdx
443bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
444bc3d5698SJohn Baldwin	adoxq	%r8,%r8
445bc3d5698SJohn Baldwin	adcxq	%rdi,%r8
446bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
447bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
448bc3d5698SJohn Baldwin
449bc3d5698SJohn Baldwin	movq	%rax,(%rsp)
450bc3d5698SJohn Baldwin	movq	%r8,8(%rsp)
451bc3d5698SJohn Baldwin
452bc3d5698SJohn Baldwin
453bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
454bc3d5698SJohn Baldwin	adoxq	%rax,%r10
455bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
456bc3d5698SJohn Baldwin
457bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rdi,%r8
458bc3d5698SJohn Baldwin	adoxq	%rdi,%r11
459bc3d5698SJohn Baldwin.byte	0x66
460bc3d5698SJohn Baldwin	adcxq	%r8,%r12
461bc3d5698SJohn Baldwin
462bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%rbx
463bc3d5698SJohn Baldwin	adoxq	%rax,%r12
464bc3d5698SJohn Baldwin	adcxq	%rbx,%r13
465bc3d5698SJohn Baldwin
466bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rdi,%r8
467bc3d5698SJohn Baldwin	adoxq	%rdi,%r13
468bc3d5698SJohn Baldwin	adcxq	%r8,%r14
469bc3d5698SJohn Baldwin
470bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
471bc3d5698SJohn Baldwin	adoxq	%rax,%r14
472bc3d5698SJohn Baldwin	adcxq	%rbx,%r15
473bc3d5698SJohn Baldwin
474bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
475bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
476bc3d5698SJohn Baldwin	adcxq	%rbp,%r8
477bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
478bc3d5698SJohn Baldwin	adoxq	%rbp,%r8
479bc3d5698SJohn Baldwin.byte	0x48,0x8b,0x96,0x10,0x00,0x00,0x00
480bc3d5698SJohn Baldwin
481bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
482bc3d5698SJohn Baldwin	adoxq	%r9,%r9
483bc3d5698SJohn Baldwin
484bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
485bc3d5698SJohn Baldwin	adoxq	%r10,%r10
486bc3d5698SJohn Baldwin	adcxq	%rax,%r9
487bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
488bc3d5698SJohn Baldwin	adcxq	%rdi,%r10
489bc3d5698SJohn Baldwin	adcxq	%rbp,%rbx
490bc3d5698SJohn Baldwin
491bc3d5698SJohn Baldwin	movq	%r9,16(%rsp)
492bc3d5698SJohn Baldwin.byte	0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
493bc3d5698SJohn Baldwin
494bc3d5698SJohn Baldwin
495bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rdi,%r9
496bc3d5698SJohn Baldwin	adoxq	%rdi,%r12
497bc3d5698SJohn Baldwin	adcxq	%r9,%r13
498bc3d5698SJohn Baldwin
499bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%rcx
500bc3d5698SJohn Baldwin	adoxq	%rax,%r13
501bc3d5698SJohn Baldwin	adcxq	%rcx,%r14
502bc3d5698SJohn Baldwin
503bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
504bc3d5698SJohn Baldwin	adoxq	%rdi,%r14
505bc3d5698SJohn Baldwin	adcxq	%r9,%r15
506bc3d5698SJohn Baldwin
507bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
508bc3d5698SJohn Baldwin	adoxq	%rax,%r15
509bc3d5698SJohn Baldwin	adcxq	%rcx,%r8
510bc3d5698SJohn Baldwin
511bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rdi,%r9
512bc3d5698SJohn Baldwin	adoxq	%rdi,%r8
513bc3d5698SJohn Baldwin	adcxq	%rbp,%r9
514bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
515bc3d5698SJohn Baldwin	adoxq	%rbp,%r9
516bc3d5698SJohn Baldwin	movq	24(%rsi),%rdx
517bc3d5698SJohn Baldwin
518bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
519bc3d5698SJohn Baldwin	adoxq	%r11,%r11
520bc3d5698SJohn Baldwin
521bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
522bc3d5698SJohn Baldwin	adoxq	%r12,%r12
523bc3d5698SJohn Baldwin	adcxq	%rax,%r11
524bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
525bc3d5698SJohn Baldwin	adcxq	%rdi,%r12
526bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
527bc3d5698SJohn Baldwin
528bc3d5698SJohn Baldwin	movq	%r11,32(%rsp)
529bc3d5698SJohn Baldwin	movq	%r12,40(%rsp)
530bc3d5698SJohn Baldwin
531bc3d5698SJohn Baldwin
532bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%rbx
533bc3d5698SJohn Baldwin	adoxq	%rax,%r14
534bc3d5698SJohn Baldwin	adcxq	%rbx,%r15
535bc3d5698SJohn Baldwin
536bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rdi,%r10
537bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
538bc3d5698SJohn Baldwin	adcxq	%r10,%r8
539bc3d5698SJohn Baldwin
540bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rax,%rbx
541bc3d5698SJohn Baldwin	adoxq	%rax,%r8
542bc3d5698SJohn Baldwin	adcxq	%rbx,%r9
543bc3d5698SJohn Baldwin
544bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rdi,%r10
545bc3d5698SJohn Baldwin	adoxq	%rdi,%r9
546bc3d5698SJohn Baldwin	adcxq	%rbp,%r10
547bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
548bc3d5698SJohn Baldwin	adoxq	%rbp,%r10
549bc3d5698SJohn Baldwin	movq	32(%rsi),%rdx
550bc3d5698SJohn Baldwin
551bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
552bc3d5698SJohn Baldwin	adoxq	%r13,%r13
553bc3d5698SJohn Baldwin
554bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
555bc3d5698SJohn Baldwin	adoxq	%r14,%r14
556bc3d5698SJohn Baldwin	adcxq	%rax,%r13
557bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
558bc3d5698SJohn Baldwin	adcxq	%rdi,%r14
559bc3d5698SJohn Baldwin	adcxq	%rbp,%rbx
560bc3d5698SJohn Baldwin
561bc3d5698SJohn Baldwin	movq	%r13,48(%rsp)
562bc3d5698SJohn Baldwin	movq	%r14,56(%rsp)
563bc3d5698SJohn Baldwin
564bc3d5698SJohn Baldwin
565bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rdi,%r11
566bc3d5698SJohn Baldwin	adoxq	%rdi,%r8
567bc3d5698SJohn Baldwin	adcxq	%r11,%r9
568bc3d5698SJohn Baldwin
569bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rax,%rcx
570bc3d5698SJohn Baldwin	adoxq	%rax,%r9
571bc3d5698SJohn Baldwin	adcxq	%rcx,%r10
572bc3d5698SJohn Baldwin
573bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rdi,%r11
574bc3d5698SJohn Baldwin	adoxq	%rdi,%r10
575bc3d5698SJohn Baldwin	adcxq	%rbp,%r11
576bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
577bc3d5698SJohn Baldwin	movq	40(%rsi),%rdx
578bc3d5698SJohn Baldwin	adoxq	%rbp,%r11
579bc3d5698SJohn Baldwin
580bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
581bc3d5698SJohn Baldwin	adoxq	%r15,%r15
582bc3d5698SJohn Baldwin
583bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
584bc3d5698SJohn Baldwin	adoxq	%r8,%r8
585bc3d5698SJohn Baldwin	adcxq	%rax,%r15
586bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
587bc3d5698SJohn Baldwin	adcxq	%rdi,%r8
588bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
589bc3d5698SJohn Baldwin
590bc3d5698SJohn Baldwin	movq	%r15,64(%rsp)
591bc3d5698SJohn Baldwin	movq	%r8,72(%rsp)
592bc3d5698SJohn Baldwin
593bc3d5698SJohn Baldwin
594bc3d5698SJohn Baldwin.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
595bc3d5698SJohn Baldwin	adoxq	%rax,%r10
596bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
597bc3d5698SJohn Baldwin
598bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
599bc3d5698SJohn Baldwin	adoxq	%rdi,%r11
600bc3d5698SJohn Baldwin	adcxq	%rbp,%r12
601bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
602bc3d5698SJohn Baldwin	adoxq	%rbp,%r12
603bc3d5698SJohn Baldwin	movq	48(%rsi),%rdx
604bc3d5698SJohn Baldwin
605bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
606bc3d5698SJohn Baldwin	adoxq	%r9,%r9
607bc3d5698SJohn Baldwin
608bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
609bc3d5698SJohn Baldwin	adoxq	%r10,%r10
610bc3d5698SJohn Baldwin	adcxq	%rax,%r9
611bc3d5698SJohn Baldwin	adcxq	%rdi,%r10
612bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
613bc3d5698SJohn Baldwin	adcxq	%rbp,%rbx
614bc3d5698SJohn Baldwin
615bc3d5698SJohn Baldwin	movq	%r9,80(%rsp)
616bc3d5698SJohn Baldwin	movq	%r10,88(%rsp)
617bc3d5698SJohn Baldwin
618bc3d5698SJohn Baldwin
619bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
620bc3d5698SJohn Baldwin	adoxq	%rax,%r12
621bc3d5698SJohn Baldwin	adoxq	%rbp,%r13
622bc3d5698SJohn Baldwin
623bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdi
624bc3d5698SJohn Baldwin	xorq	%rcx,%rcx
625bc3d5698SJohn Baldwin	movq	56(%rsi),%rdx
626bc3d5698SJohn Baldwin	adoxq	%r11,%r11
627bc3d5698SJohn Baldwin
628bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
629bc3d5698SJohn Baldwin	adoxq	%r12,%r12
630bc3d5698SJohn Baldwin	adcxq	%rax,%r11
631bc3d5698SJohn Baldwin	adoxq	%rbp,%rcx
632bc3d5698SJohn Baldwin	adcxq	%rdi,%r12
633bc3d5698SJohn Baldwin	adcxq	%rbp,%rcx
634bc3d5698SJohn Baldwin
635bc3d5698SJohn Baldwin.byte	0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
636bc3d5698SJohn Baldwin.byte	0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
637bc3d5698SJohn Baldwin
638bc3d5698SJohn Baldwin
639bc3d5698SJohn Baldwin	mulxq	%rdx,%rax,%rdx
640bc3d5698SJohn Baldwin	xorq	%rbx,%rbx
641bc3d5698SJohn Baldwin	adoxq	%r13,%r13
642bc3d5698SJohn Baldwin
643bc3d5698SJohn Baldwin	adcxq	%rcx,%rax
644bc3d5698SJohn Baldwin	adoxq	%rbp,%rbx
645bc3d5698SJohn Baldwin	adcxq	%r13,%rax
646bc3d5698SJohn Baldwin	adcxq	%rdx,%rbx
647bc3d5698SJohn Baldwin
648bc3d5698SJohn Baldwin.byte	102,72,15,126,199
649bc3d5698SJohn Baldwin.byte	102,72,15,126,205
650bc3d5698SJohn Baldwin
651bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
652bc3d5698SJohn Baldwin	movq	(%rsp),%r8
653bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
654bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
655bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
656bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
657bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
658bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
659bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
660bc3d5698SJohn Baldwin
661bc3d5698SJohn Baldwin	movq	%rax,112(%rsp)
662bc3d5698SJohn Baldwin	movq	%rbx,120(%rsp)
663bc3d5698SJohn Baldwin
664bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
665bc3d5698SJohn Baldwin
666bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
667bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
668bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
669bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
670bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
671bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
672bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
673bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
674bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
675bc3d5698SJohn Baldwin
676bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
677bc3d5698SJohn Baldwin
678bc3d5698SJohn Baldwin	movq	%r8,%rdx
679bc3d5698SJohn Baldwin	movq	%r9,%rax
680bc3d5698SJohn Baldwin	movl	128+8(%rsp),%r8d
681bc3d5698SJohn Baldwin	movq	%rdi,%rsi
682bc3d5698SJohn Baldwin
683bc3d5698SJohn Baldwin	decl	%r8d
684bc3d5698SJohn Baldwin	jnz	.Loop_sqrx
685bc3d5698SJohn Baldwin
686bc3d5698SJohn Baldwin.Lsqr_tail:
687bc3d5698SJohn Baldwin
688bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
689bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
690bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
691bc3d5698SJohn Baldwin.cfi_restore	%r15
692bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
693bc3d5698SJohn Baldwin.cfi_restore	%r14
694bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
695bc3d5698SJohn Baldwin.cfi_restore	%r13
696bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
697bc3d5698SJohn Baldwin.cfi_restore	%r12
698bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
699bc3d5698SJohn Baldwin.cfi_restore	%rbp
700bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
701bc3d5698SJohn Baldwin.cfi_restore	%rbx
702bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
703bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
704bc3d5698SJohn Baldwin.Lsqr_epilogue:
705bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
706bc3d5698SJohn Baldwin.cfi_endproc
707bc3d5698SJohn Baldwin.size	rsaz_512_sqr,.-rsaz_512_sqr
708bc3d5698SJohn Baldwin.globl	rsaz_512_mul
709bc3d5698SJohn Baldwin.type	rsaz_512_mul,@function
710bc3d5698SJohn Baldwin.align	32
711bc3d5698SJohn Baldwinrsaz_512_mul:
712bc3d5698SJohn Baldwin.cfi_startproc
713bc3d5698SJohn Baldwin	pushq	%rbx
714bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
715bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
716bc3d5698SJohn Baldwin	pushq	%rbp
717bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
718bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
719bc3d5698SJohn Baldwin	pushq	%r12
720bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
721bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
722bc3d5698SJohn Baldwin	pushq	%r13
723bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
724bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
725bc3d5698SJohn Baldwin	pushq	%r14
726bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
727bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
728bc3d5698SJohn Baldwin	pushq	%r15
729bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
730bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
731bc3d5698SJohn Baldwin
732bc3d5698SJohn Baldwin	subq	$128+24,%rsp
733bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
734bc3d5698SJohn Baldwin.Lmul_body:
735bc3d5698SJohn Baldwin.byte	102,72,15,110,199
736bc3d5698SJohn Baldwin.byte	102,72,15,110,201
737bc3d5698SJohn Baldwin	movq	%r8,128(%rsp)
738bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
739bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
740bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
741bc3d5698SJohn Baldwin	je	.Lmulx
742bc3d5698SJohn Baldwin	movq	(%rdx),%rbx
743bc3d5698SJohn Baldwin	movq	%rdx,%rbp
744bc3d5698SJohn Baldwin	call	__rsaz_512_mul
745bc3d5698SJohn Baldwin
746bc3d5698SJohn Baldwin.byte	102,72,15,126,199
747bc3d5698SJohn Baldwin.byte	102,72,15,126,205
748bc3d5698SJohn Baldwin
749bc3d5698SJohn Baldwin	movq	(%rsp),%r8
750bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
751bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
752bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
753bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
754bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
755bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
756bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
757bc3d5698SJohn Baldwin
758bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
759bc3d5698SJohn Baldwin	jmp	.Lmul_tail
760bc3d5698SJohn Baldwin
761bc3d5698SJohn Baldwin.align	32
762bc3d5698SJohn Baldwin.Lmulx:
763bc3d5698SJohn Baldwin	movq	%rdx,%rbp
764bc3d5698SJohn Baldwin	movq	(%rdx),%rdx
765bc3d5698SJohn Baldwin	call	__rsaz_512_mulx
766bc3d5698SJohn Baldwin
767bc3d5698SJohn Baldwin.byte	102,72,15,126,199
768bc3d5698SJohn Baldwin.byte	102,72,15,126,205
769bc3d5698SJohn Baldwin
770bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
771bc3d5698SJohn Baldwin	movq	(%rsp),%r8
772bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
773bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
774bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
775bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
776bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
777bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
778bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
779bc3d5698SJohn Baldwin
780bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
781bc3d5698SJohn Baldwin.Lmul_tail:
782bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
783bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
784bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
785bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
786bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
787bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
788bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
789bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
790bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
791bc3d5698SJohn Baldwin
792bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
793bc3d5698SJohn Baldwin
794bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
795bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
796bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
797bc3d5698SJohn Baldwin.cfi_restore	%r15
798bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
799bc3d5698SJohn Baldwin.cfi_restore	%r14
800bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
801bc3d5698SJohn Baldwin.cfi_restore	%r13
802bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
803bc3d5698SJohn Baldwin.cfi_restore	%r12
804bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
805bc3d5698SJohn Baldwin.cfi_restore	%rbp
806bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
807bc3d5698SJohn Baldwin.cfi_restore	%rbx
808bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
809bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
810bc3d5698SJohn Baldwin.Lmul_epilogue:
811bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
812bc3d5698SJohn Baldwin.cfi_endproc
813bc3d5698SJohn Baldwin.size	rsaz_512_mul,.-rsaz_512_mul
814bc3d5698SJohn Baldwin.globl	rsaz_512_mul_gather4
815bc3d5698SJohn Baldwin.type	rsaz_512_mul_gather4,@function
816bc3d5698SJohn Baldwin.align	32
817bc3d5698SJohn Baldwinrsaz_512_mul_gather4:
818bc3d5698SJohn Baldwin.cfi_startproc
819bc3d5698SJohn Baldwin	pushq	%rbx
820bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
821bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
822bc3d5698SJohn Baldwin	pushq	%rbp
823bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
824bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
825bc3d5698SJohn Baldwin	pushq	%r12
826bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
827bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
828bc3d5698SJohn Baldwin	pushq	%r13
829bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
830bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
831bc3d5698SJohn Baldwin	pushq	%r14
832bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
833bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
834bc3d5698SJohn Baldwin	pushq	%r15
835bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
836bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
837bc3d5698SJohn Baldwin
838bc3d5698SJohn Baldwin	subq	$152,%rsp
839bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	152
840bc3d5698SJohn Baldwin.Lmul_gather4_body:
841bc3d5698SJohn Baldwin	movd	%r9d,%xmm8
842bc3d5698SJohn Baldwin	movdqa	.Linc+16(%rip),%xmm1
843bc3d5698SJohn Baldwin	movdqa	.Linc(%rip),%xmm0
844bc3d5698SJohn Baldwin
845bc3d5698SJohn Baldwin	pshufd	$0,%xmm8,%xmm8
846bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
847bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm2
848bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm1
849bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm0
850bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
851bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm2
852bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm1
853bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
854bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm3
855bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm2
856bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
857bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm4
858bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm3
859bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm6
860bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
861bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm4
862bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm6
863bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm5
864bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm7
865bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm6
866bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm7
867bc3d5698SJohn Baldwin
868bc3d5698SJohn Baldwin	movdqa	0(%rdx),%xmm8
869bc3d5698SJohn Baldwin	movdqa	16(%rdx),%xmm9
870bc3d5698SJohn Baldwin	movdqa	32(%rdx),%xmm10
871bc3d5698SJohn Baldwin	movdqa	48(%rdx),%xmm11
872bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
873bc3d5698SJohn Baldwin	movdqa	64(%rdx),%xmm12
874bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
875bc3d5698SJohn Baldwin	movdqa	80(%rdx),%xmm13
876bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
877bc3d5698SJohn Baldwin	movdqa	96(%rdx),%xmm14
878bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
879bc3d5698SJohn Baldwin	movdqa	112(%rdx),%xmm15
880bc3d5698SJohn Baldwin	leaq	128(%rdx),%rbp
881bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
882bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
883bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
884bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
885bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
886bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
887bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
888bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
889bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
890bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
891bc3d5698SJohn Baldwin
892bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
893bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
894bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
895bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
896bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
897bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
898bc3d5698SJohn Baldwin	je	.Lmulx_gather
899bc3d5698SJohn Baldwin.byte	102,76,15,126,195
900bc3d5698SJohn Baldwin
901bc3d5698SJohn Baldwin	movq	%r8,128(%rsp)
902bc3d5698SJohn Baldwin	movq	%rdi,128+8(%rsp)
903bc3d5698SJohn Baldwin	movq	%rcx,128+16(%rsp)
904bc3d5698SJohn Baldwin
905bc3d5698SJohn Baldwin	movq	(%rsi),%rax
906bc3d5698SJohn Baldwin	movq	8(%rsi),%rcx
907bc3d5698SJohn Baldwin	mulq	%rbx
908bc3d5698SJohn Baldwin	movq	%rax,(%rsp)
909bc3d5698SJohn Baldwin	movq	%rcx,%rax
910bc3d5698SJohn Baldwin	movq	%rdx,%r8
911bc3d5698SJohn Baldwin
912bc3d5698SJohn Baldwin	mulq	%rbx
913bc3d5698SJohn Baldwin	addq	%rax,%r8
914bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
915bc3d5698SJohn Baldwin	movq	%rdx,%r9
916bc3d5698SJohn Baldwin	adcq	$0,%r9
917bc3d5698SJohn Baldwin
918bc3d5698SJohn Baldwin	mulq	%rbx
919bc3d5698SJohn Baldwin	addq	%rax,%r9
920bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
921bc3d5698SJohn Baldwin	movq	%rdx,%r10
922bc3d5698SJohn Baldwin	adcq	$0,%r10
923bc3d5698SJohn Baldwin
924bc3d5698SJohn Baldwin	mulq	%rbx
925bc3d5698SJohn Baldwin	addq	%rax,%r10
926bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
927bc3d5698SJohn Baldwin	movq	%rdx,%r11
928bc3d5698SJohn Baldwin	adcq	$0,%r11
929bc3d5698SJohn Baldwin
930bc3d5698SJohn Baldwin	mulq	%rbx
931bc3d5698SJohn Baldwin	addq	%rax,%r11
932bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
933bc3d5698SJohn Baldwin	movq	%rdx,%r12
934bc3d5698SJohn Baldwin	adcq	$0,%r12
935bc3d5698SJohn Baldwin
936bc3d5698SJohn Baldwin	mulq	%rbx
937bc3d5698SJohn Baldwin	addq	%rax,%r12
938bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
939bc3d5698SJohn Baldwin	movq	%rdx,%r13
940bc3d5698SJohn Baldwin	adcq	$0,%r13
941bc3d5698SJohn Baldwin
942bc3d5698SJohn Baldwin	mulq	%rbx
943bc3d5698SJohn Baldwin	addq	%rax,%r13
944bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
945bc3d5698SJohn Baldwin	movq	%rdx,%r14
946bc3d5698SJohn Baldwin	adcq	$0,%r14
947bc3d5698SJohn Baldwin
948bc3d5698SJohn Baldwin	mulq	%rbx
949bc3d5698SJohn Baldwin	addq	%rax,%r14
950bc3d5698SJohn Baldwin	movq	(%rsi),%rax
951bc3d5698SJohn Baldwin	movq	%rdx,%r15
952bc3d5698SJohn Baldwin	adcq	$0,%r15
953bc3d5698SJohn Baldwin
954bc3d5698SJohn Baldwin	leaq	8(%rsp),%rdi
955bc3d5698SJohn Baldwin	movl	$7,%ecx
956bc3d5698SJohn Baldwin	jmp	.Loop_mul_gather
957bc3d5698SJohn Baldwin
958bc3d5698SJohn Baldwin.align	32
959bc3d5698SJohn Baldwin.Loop_mul_gather:
960bc3d5698SJohn Baldwin	movdqa	0(%rbp),%xmm8
961bc3d5698SJohn Baldwin	movdqa	16(%rbp),%xmm9
962bc3d5698SJohn Baldwin	movdqa	32(%rbp),%xmm10
963bc3d5698SJohn Baldwin	movdqa	48(%rbp),%xmm11
964bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
965bc3d5698SJohn Baldwin	movdqa	64(%rbp),%xmm12
966bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
967bc3d5698SJohn Baldwin	movdqa	80(%rbp),%xmm13
968bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
969bc3d5698SJohn Baldwin	movdqa	96(%rbp),%xmm14
970bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
971bc3d5698SJohn Baldwin	movdqa	112(%rbp),%xmm15
972bc3d5698SJohn Baldwin	leaq	128(%rbp),%rbp
973bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
974bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
975bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
976bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
977bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
978bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
979bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
980bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
981bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
982bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
983bc3d5698SJohn Baldwin
984bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
985bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
986bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
987bc3d5698SJohn Baldwin.byte	102,76,15,126,195
988bc3d5698SJohn Baldwin
989bc3d5698SJohn Baldwin	mulq	%rbx
990bc3d5698SJohn Baldwin	addq	%rax,%r8
991bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
992bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
993bc3d5698SJohn Baldwin	movq	%rdx,%r8
994bc3d5698SJohn Baldwin	adcq	$0,%r8
995bc3d5698SJohn Baldwin
996bc3d5698SJohn Baldwin	mulq	%rbx
997bc3d5698SJohn Baldwin	addq	%rax,%r9
998bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
999bc3d5698SJohn Baldwin	adcq	$0,%rdx
1000bc3d5698SJohn Baldwin	addq	%r9,%r8
1001bc3d5698SJohn Baldwin	movq	%rdx,%r9
1002bc3d5698SJohn Baldwin	adcq	$0,%r9
1003bc3d5698SJohn Baldwin
1004bc3d5698SJohn Baldwin	mulq	%rbx
1005bc3d5698SJohn Baldwin	addq	%rax,%r10
1006bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
1007bc3d5698SJohn Baldwin	adcq	$0,%rdx
1008bc3d5698SJohn Baldwin	addq	%r10,%r9
1009bc3d5698SJohn Baldwin	movq	%rdx,%r10
1010bc3d5698SJohn Baldwin	adcq	$0,%r10
1011bc3d5698SJohn Baldwin
1012bc3d5698SJohn Baldwin	mulq	%rbx
1013bc3d5698SJohn Baldwin	addq	%rax,%r11
1014bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
1015bc3d5698SJohn Baldwin	adcq	$0,%rdx
1016bc3d5698SJohn Baldwin	addq	%r11,%r10
1017bc3d5698SJohn Baldwin	movq	%rdx,%r11
1018bc3d5698SJohn Baldwin	adcq	$0,%r11
1019bc3d5698SJohn Baldwin
1020bc3d5698SJohn Baldwin	mulq	%rbx
1021bc3d5698SJohn Baldwin	addq	%rax,%r12
1022bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
1023bc3d5698SJohn Baldwin	adcq	$0,%rdx
1024bc3d5698SJohn Baldwin	addq	%r12,%r11
1025bc3d5698SJohn Baldwin	movq	%rdx,%r12
1026bc3d5698SJohn Baldwin	adcq	$0,%r12
1027bc3d5698SJohn Baldwin
1028bc3d5698SJohn Baldwin	mulq	%rbx
1029bc3d5698SJohn Baldwin	addq	%rax,%r13
1030bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
1031bc3d5698SJohn Baldwin	adcq	$0,%rdx
1032bc3d5698SJohn Baldwin	addq	%r13,%r12
1033bc3d5698SJohn Baldwin	movq	%rdx,%r13
1034bc3d5698SJohn Baldwin	adcq	$0,%r13
1035bc3d5698SJohn Baldwin
1036bc3d5698SJohn Baldwin	mulq	%rbx
1037bc3d5698SJohn Baldwin	addq	%rax,%r14
1038bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
1039bc3d5698SJohn Baldwin	adcq	$0,%rdx
1040bc3d5698SJohn Baldwin	addq	%r14,%r13
1041bc3d5698SJohn Baldwin	movq	%rdx,%r14
1042bc3d5698SJohn Baldwin	adcq	$0,%r14
1043bc3d5698SJohn Baldwin
1044bc3d5698SJohn Baldwin	mulq	%rbx
1045bc3d5698SJohn Baldwin	addq	%rax,%r15
1046bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1047bc3d5698SJohn Baldwin	adcq	$0,%rdx
1048bc3d5698SJohn Baldwin	addq	%r15,%r14
1049bc3d5698SJohn Baldwin	movq	%rdx,%r15
1050bc3d5698SJohn Baldwin	adcq	$0,%r15
1051bc3d5698SJohn Baldwin
1052bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
1053bc3d5698SJohn Baldwin
1054bc3d5698SJohn Baldwin	decl	%ecx
1055bc3d5698SJohn Baldwin	jnz	.Loop_mul_gather
1056bc3d5698SJohn Baldwin
1057bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1058bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1059bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1060bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1061bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1062bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1063bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1064bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1065bc3d5698SJohn Baldwin
1066bc3d5698SJohn Baldwin	movq	128+8(%rsp),%rdi
1067bc3d5698SJohn Baldwin	movq	128+16(%rsp),%rbp
1068bc3d5698SJohn Baldwin
1069bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1070bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1071bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1072bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1073bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1074bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1075bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1076bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1077bc3d5698SJohn Baldwin
1078bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
1079bc3d5698SJohn Baldwin	jmp	.Lmul_gather_tail
1080bc3d5698SJohn Baldwin
1081bc3d5698SJohn Baldwin.align	32
1082bc3d5698SJohn Baldwin.Lmulx_gather:
1083bc3d5698SJohn Baldwin.byte	102,76,15,126,194
1084bc3d5698SJohn Baldwin
1085bc3d5698SJohn Baldwin	movq	%r8,128(%rsp)
1086bc3d5698SJohn Baldwin	movq	%rdi,128+8(%rsp)
1087bc3d5698SJohn Baldwin	movq	%rcx,128+16(%rsp)
1088bc3d5698SJohn Baldwin
1089bc3d5698SJohn Baldwin	mulxq	(%rsi),%rbx,%r8
1090bc3d5698SJohn Baldwin	movq	%rbx,(%rsp)
1091bc3d5698SJohn Baldwin	xorl	%edi,%edi
1092bc3d5698SJohn Baldwin
1093bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1094bc3d5698SJohn Baldwin
1095bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rbx,%r10
1096bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1097bc3d5698SJohn Baldwin
1098bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1099bc3d5698SJohn Baldwin	adcxq	%rbx,%r9
1100bc3d5698SJohn Baldwin
1101bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rbx,%r12
1102bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1103bc3d5698SJohn Baldwin
1104bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1105bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
1106bc3d5698SJohn Baldwin
1107bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rbx,%r14
1108bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1109bc3d5698SJohn Baldwin
1110bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1111bc3d5698SJohn Baldwin	adcxq	%rbx,%r13
1112bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1113bc3d5698SJohn Baldwin.byte	0x67
1114bc3d5698SJohn Baldwin	movq	%r8,%rbx
1115bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1116bc3d5698SJohn Baldwin
1117bc3d5698SJohn Baldwin	movq	$-7,%rcx
1118bc3d5698SJohn Baldwin	jmp	.Loop_mulx_gather
1119bc3d5698SJohn Baldwin
1120bc3d5698SJohn Baldwin.align	32
1121bc3d5698SJohn Baldwin.Loop_mulx_gather:
1122bc3d5698SJohn Baldwin	movdqa	0(%rbp),%xmm8
1123bc3d5698SJohn Baldwin	movdqa	16(%rbp),%xmm9
1124bc3d5698SJohn Baldwin	movdqa	32(%rbp),%xmm10
1125bc3d5698SJohn Baldwin	movdqa	48(%rbp),%xmm11
1126bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
1127bc3d5698SJohn Baldwin	movdqa	64(%rbp),%xmm12
1128bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
1129bc3d5698SJohn Baldwin	movdqa	80(%rbp),%xmm13
1130bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
1131bc3d5698SJohn Baldwin	movdqa	96(%rbp),%xmm14
1132bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
1133bc3d5698SJohn Baldwin	movdqa	112(%rbp),%xmm15
1134bc3d5698SJohn Baldwin	leaq	128(%rbp),%rbp
1135bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
1136bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
1137bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
1138bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
1139bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
1140bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
1141bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
1142bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
1143bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
1144bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
1145bc3d5698SJohn Baldwin
1146bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
1147bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
1148bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
1149bc3d5698SJohn Baldwin.byte	102,76,15,126,194
1150bc3d5698SJohn Baldwin
1151bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1152bc3d5698SJohn Baldwin	adcxq	%rax,%rbx
1153bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1154bc3d5698SJohn Baldwin
1155bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1156bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1157bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1158bc3d5698SJohn Baldwin
1159bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rax,%r10
1160bc3d5698SJohn Baldwin	adcxq	%rax,%r9
1161bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1162bc3d5698SJohn Baldwin
1163bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1164bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1165bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1166bc3d5698SJohn Baldwin
1167bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%r12
1168bc3d5698SJohn Baldwin	adcxq	%rax,%r11
1169bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1170bc3d5698SJohn Baldwin
1171bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1172bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1173bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1174bc3d5698SJohn Baldwin
1175bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1176bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1177bc3d5698SJohn Baldwin.byte	0x67
1178bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1179bc3d5698SJohn Baldwin
1180bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1181bc3d5698SJohn Baldwin	movq	%rbx,64(%rsp,%rcx,8)
1182bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1183bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
1184bc3d5698SJohn Baldwin	movq	%r8,%rbx
1185bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1186bc3d5698SJohn Baldwin
1187bc3d5698SJohn Baldwin	incq	%rcx
1188bc3d5698SJohn Baldwin	jnz	.Loop_mulx_gather
1189bc3d5698SJohn Baldwin
1190bc3d5698SJohn Baldwin	movq	%r8,64(%rsp)
1191bc3d5698SJohn Baldwin	movq	%r9,64+8(%rsp)
1192bc3d5698SJohn Baldwin	movq	%r10,64+16(%rsp)
1193bc3d5698SJohn Baldwin	movq	%r11,64+24(%rsp)
1194bc3d5698SJohn Baldwin	movq	%r12,64+32(%rsp)
1195bc3d5698SJohn Baldwin	movq	%r13,64+40(%rsp)
1196bc3d5698SJohn Baldwin	movq	%r14,64+48(%rsp)
1197bc3d5698SJohn Baldwin	movq	%r15,64+56(%rsp)
1198bc3d5698SJohn Baldwin
1199bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
1200bc3d5698SJohn Baldwin	movq	128+8(%rsp),%rdi
1201bc3d5698SJohn Baldwin	movq	128+16(%rsp),%rbp
1202bc3d5698SJohn Baldwin
1203bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1204bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1205bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1206bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1207bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1208bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1209bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1210bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1211bc3d5698SJohn Baldwin
1212bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
1213bc3d5698SJohn Baldwin
1214bc3d5698SJohn Baldwin.Lmul_gather_tail:
1215bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
1216bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
1217bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
1218bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
1219bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
1220bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
1221bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
1222bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
1223bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
1224bc3d5698SJohn Baldwin
1225bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
1226bc3d5698SJohn Baldwin
1227bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
1228bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
1229bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
1230bc3d5698SJohn Baldwin.cfi_restore	%r15
1231bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
1232bc3d5698SJohn Baldwin.cfi_restore	%r14
1233bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
1234bc3d5698SJohn Baldwin.cfi_restore	%r13
1235bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
1236bc3d5698SJohn Baldwin.cfi_restore	%r12
1237bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
1238bc3d5698SJohn Baldwin.cfi_restore	%rbp
1239bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
1240bc3d5698SJohn Baldwin.cfi_restore	%rbx
1241bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
1242bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
1243bc3d5698SJohn Baldwin.Lmul_gather4_epilogue:
1244bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1245bc3d5698SJohn Baldwin.cfi_endproc
1246bc3d5698SJohn Baldwin.size	rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1247bc3d5698SJohn Baldwin.globl	rsaz_512_mul_scatter4
1248bc3d5698SJohn Baldwin.type	rsaz_512_mul_scatter4,@function
1249bc3d5698SJohn Baldwin.align	32
1250bc3d5698SJohn Baldwinrsaz_512_mul_scatter4:
1251bc3d5698SJohn Baldwin.cfi_startproc
1252bc3d5698SJohn Baldwin	pushq	%rbx
1253bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1254bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
1255bc3d5698SJohn Baldwin	pushq	%rbp
1256bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1257bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
1258bc3d5698SJohn Baldwin	pushq	%r12
1259bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1260bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
1261bc3d5698SJohn Baldwin	pushq	%r13
1262bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1263bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
1264bc3d5698SJohn Baldwin	pushq	%r14
1265bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1266bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
1267bc3d5698SJohn Baldwin	pushq	%r15
1268bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1269bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
1270bc3d5698SJohn Baldwin
1271bc3d5698SJohn Baldwin	movl	%r9d,%r9d
1272bc3d5698SJohn Baldwin	subq	$128+24,%rsp
1273bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
1274bc3d5698SJohn Baldwin.Lmul_scatter4_body:
1275bc3d5698SJohn Baldwin	leaq	(%r8,%r9,8),%r8
1276bc3d5698SJohn Baldwin.byte	102,72,15,110,199
1277bc3d5698SJohn Baldwin.byte	102,72,15,110,202
1278bc3d5698SJohn Baldwin.byte	102,73,15,110,208
1279bc3d5698SJohn Baldwin	movq	%rcx,128(%rsp)
1280bc3d5698SJohn Baldwin
1281bc3d5698SJohn Baldwin	movq	%rdi,%rbp
1282bc3d5698SJohn Baldwin	movl	$0x80100,%r11d
1283bc3d5698SJohn Baldwin	andl	OPENSSL_ia32cap_P+8(%rip),%r11d
1284bc3d5698SJohn Baldwin	cmpl	$0x80100,%r11d
1285bc3d5698SJohn Baldwin	je	.Lmulx_scatter
1286bc3d5698SJohn Baldwin	movq	(%rdi),%rbx
1287bc3d5698SJohn Baldwin	call	__rsaz_512_mul
1288bc3d5698SJohn Baldwin
1289bc3d5698SJohn Baldwin.byte	102,72,15,126,199
1290bc3d5698SJohn Baldwin.byte	102,72,15,126,205
1291bc3d5698SJohn Baldwin
1292bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1293bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1294bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1295bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1296bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1297bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1298bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1299bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1300bc3d5698SJohn Baldwin
1301bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
1302bc3d5698SJohn Baldwin	jmp	.Lmul_scatter_tail
1303bc3d5698SJohn Baldwin
1304bc3d5698SJohn Baldwin.align	32
1305bc3d5698SJohn Baldwin.Lmulx_scatter:
1306bc3d5698SJohn Baldwin	movq	(%rdi),%rdx
1307bc3d5698SJohn Baldwin	call	__rsaz_512_mulx
1308bc3d5698SJohn Baldwin
1309bc3d5698SJohn Baldwin.byte	102,72,15,126,199
1310bc3d5698SJohn Baldwin.byte	102,72,15,126,205
1311bc3d5698SJohn Baldwin
1312bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
1313bc3d5698SJohn Baldwin	movq	(%rsp),%r8
1314bc3d5698SJohn Baldwin	movq	8(%rsp),%r9
1315bc3d5698SJohn Baldwin	movq	16(%rsp),%r10
1316bc3d5698SJohn Baldwin	movq	24(%rsp),%r11
1317bc3d5698SJohn Baldwin	movq	32(%rsp),%r12
1318bc3d5698SJohn Baldwin	movq	40(%rsp),%r13
1319bc3d5698SJohn Baldwin	movq	48(%rsp),%r14
1320bc3d5698SJohn Baldwin	movq	56(%rsp),%r15
1321bc3d5698SJohn Baldwin
1322bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
1323bc3d5698SJohn Baldwin
1324bc3d5698SJohn Baldwin.Lmul_scatter_tail:
1325bc3d5698SJohn Baldwin	addq	64(%rsp),%r8
1326bc3d5698SJohn Baldwin	adcq	72(%rsp),%r9
1327bc3d5698SJohn Baldwin	adcq	80(%rsp),%r10
1328bc3d5698SJohn Baldwin	adcq	88(%rsp),%r11
1329bc3d5698SJohn Baldwin	adcq	96(%rsp),%r12
1330bc3d5698SJohn Baldwin	adcq	104(%rsp),%r13
1331bc3d5698SJohn Baldwin	adcq	112(%rsp),%r14
1332bc3d5698SJohn Baldwin	adcq	120(%rsp),%r15
1333bc3d5698SJohn Baldwin.byte	102,72,15,126,214
1334bc3d5698SJohn Baldwin	sbbq	%rcx,%rcx
1335bc3d5698SJohn Baldwin
1336bc3d5698SJohn Baldwin	call	__rsaz_512_subtract
1337bc3d5698SJohn Baldwin
1338bc3d5698SJohn Baldwin	movq	%r8,0(%rsi)
1339bc3d5698SJohn Baldwin	movq	%r9,128(%rsi)
1340bc3d5698SJohn Baldwin	movq	%r10,256(%rsi)
1341bc3d5698SJohn Baldwin	movq	%r11,384(%rsi)
1342bc3d5698SJohn Baldwin	movq	%r12,512(%rsi)
1343bc3d5698SJohn Baldwin	movq	%r13,640(%rsi)
1344bc3d5698SJohn Baldwin	movq	%r14,768(%rsi)
1345bc3d5698SJohn Baldwin	movq	%r15,896(%rsi)
1346bc3d5698SJohn Baldwin
1347bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
1348bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
1349bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
1350bc3d5698SJohn Baldwin.cfi_restore	%r15
1351bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
1352bc3d5698SJohn Baldwin.cfi_restore	%r14
1353bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
1354bc3d5698SJohn Baldwin.cfi_restore	%r13
1355bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
1356bc3d5698SJohn Baldwin.cfi_restore	%r12
1357bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
1358bc3d5698SJohn Baldwin.cfi_restore	%rbp
1359bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
1360bc3d5698SJohn Baldwin.cfi_restore	%rbx
1361bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
1362bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
1363bc3d5698SJohn Baldwin.Lmul_scatter4_epilogue:
1364bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1365bc3d5698SJohn Baldwin.cfi_endproc
1366bc3d5698SJohn Baldwin.size	rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1367bc3d5698SJohn Baldwin.globl	rsaz_512_mul_by_one
1368bc3d5698SJohn Baldwin.type	rsaz_512_mul_by_one,@function
1369bc3d5698SJohn Baldwin.align	32
1370bc3d5698SJohn Baldwinrsaz_512_mul_by_one:
1371bc3d5698SJohn Baldwin.cfi_startproc
1372bc3d5698SJohn Baldwin	pushq	%rbx
1373bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1374bc3d5698SJohn Baldwin.cfi_offset	%rbx,-16
1375bc3d5698SJohn Baldwin	pushq	%rbp
1376bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1377bc3d5698SJohn Baldwin.cfi_offset	%rbp,-24
1378bc3d5698SJohn Baldwin	pushq	%r12
1379bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1380bc3d5698SJohn Baldwin.cfi_offset	%r12,-32
1381bc3d5698SJohn Baldwin	pushq	%r13
1382bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1383bc3d5698SJohn Baldwin.cfi_offset	%r13,-40
1384bc3d5698SJohn Baldwin	pushq	%r14
1385bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1386bc3d5698SJohn Baldwin.cfi_offset	%r14,-48
1387bc3d5698SJohn Baldwin	pushq	%r15
1388bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	8
1389bc3d5698SJohn Baldwin.cfi_offset	%r15,-56
1390bc3d5698SJohn Baldwin
1391bc3d5698SJohn Baldwin	subq	$128+24,%rsp
1392bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset	128+24
1393bc3d5698SJohn Baldwin.Lmul_by_one_body:
1394bc3d5698SJohn Baldwin	movl	OPENSSL_ia32cap_P+8(%rip),%eax
1395bc3d5698SJohn Baldwin	movq	%rdx,%rbp
1396bc3d5698SJohn Baldwin	movq	%rcx,128(%rsp)
1397bc3d5698SJohn Baldwin
1398bc3d5698SJohn Baldwin	movq	(%rsi),%r8
1399bc3d5698SJohn Baldwin	pxor	%xmm0,%xmm0
1400bc3d5698SJohn Baldwin	movq	8(%rsi),%r9
1401bc3d5698SJohn Baldwin	movq	16(%rsi),%r10
1402bc3d5698SJohn Baldwin	movq	24(%rsi),%r11
1403bc3d5698SJohn Baldwin	movq	32(%rsi),%r12
1404bc3d5698SJohn Baldwin	movq	40(%rsi),%r13
1405bc3d5698SJohn Baldwin	movq	48(%rsi),%r14
1406bc3d5698SJohn Baldwin	movq	56(%rsi),%r15
1407bc3d5698SJohn Baldwin
1408bc3d5698SJohn Baldwin	movdqa	%xmm0,(%rsp)
1409bc3d5698SJohn Baldwin	movdqa	%xmm0,16(%rsp)
1410bc3d5698SJohn Baldwin	movdqa	%xmm0,32(%rsp)
1411bc3d5698SJohn Baldwin	movdqa	%xmm0,48(%rsp)
1412bc3d5698SJohn Baldwin	movdqa	%xmm0,64(%rsp)
1413bc3d5698SJohn Baldwin	movdqa	%xmm0,80(%rsp)
1414bc3d5698SJohn Baldwin	movdqa	%xmm0,96(%rsp)
1415bc3d5698SJohn Baldwin	andl	$0x80100,%eax
1416bc3d5698SJohn Baldwin	cmpl	$0x80100,%eax
1417bc3d5698SJohn Baldwin	je	.Lby_one_callx
1418bc3d5698SJohn Baldwin	call	__rsaz_512_reduce
1419bc3d5698SJohn Baldwin	jmp	.Lby_one_tail
1420bc3d5698SJohn Baldwin.align	32
1421bc3d5698SJohn Baldwin.Lby_one_callx:
1422bc3d5698SJohn Baldwin	movq	128(%rsp),%rdx
1423bc3d5698SJohn Baldwin	call	__rsaz_512_reducex
1424bc3d5698SJohn Baldwin.Lby_one_tail:
1425bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1426bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1427bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1428bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1429bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1430bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1431bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1432bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1433bc3d5698SJohn Baldwin
1434bc3d5698SJohn Baldwin	leaq	128+24+48(%rsp),%rax
1435bc3d5698SJohn Baldwin.cfi_def_cfa	%rax,8
1436bc3d5698SJohn Baldwin	movq	-48(%rax),%r15
1437bc3d5698SJohn Baldwin.cfi_restore	%r15
1438bc3d5698SJohn Baldwin	movq	-40(%rax),%r14
1439bc3d5698SJohn Baldwin.cfi_restore	%r14
1440bc3d5698SJohn Baldwin	movq	-32(%rax),%r13
1441bc3d5698SJohn Baldwin.cfi_restore	%r13
1442bc3d5698SJohn Baldwin	movq	-24(%rax),%r12
1443bc3d5698SJohn Baldwin.cfi_restore	%r12
1444bc3d5698SJohn Baldwin	movq	-16(%rax),%rbp
1445bc3d5698SJohn Baldwin.cfi_restore	%rbp
1446bc3d5698SJohn Baldwin	movq	-8(%rax),%rbx
1447bc3d5698SJohn Baldwin.cfi_restore	%rbx
1448bc3d5698SJohn Baldwin	leaq	(%rax),%rsp
1449bc3d5698SJohn Baldwin.cfi_def_cfa_register	%rsp
1450bc3d5698SJohn Baldwin.Lmul_by_one_epilogue:
1451bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1452bc3d5698SJohn Baldwin.cfi_endproc
1453bc3d5698SJohn Baldwin.size	rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1454bc3d5698SJohn Baldwin.type	__rsaz_512_reduce,@function
1455bc3d5698SJohn Baldwin.align	32
1456bc3d5698SJohn Baldwin__rsaz_512_reduce:
1457bc3d5698SJohn Baldwin.cfi_startproc
1458bc3d5698SJohn Baldwin	movq	%r8,%rbx
1459bc3d5698SJohn Baldwin	imulq	128+8(%rsp),%rbx
1460bc3d5698SJohn Baldwin	movq	0(%rbp),%rax
1461bc3d5698SJohn Baldwin	movl	$8,%ecx
1462bc3d5698SJohn Baldwin	jmp	.Lreduction_loop
1463bc3d5698SJohn Baldwin
1464bc3d5698SJohn Baldwin.align	32
1465bc3d5698SJohn Baldwin.Lreduction_loop:
1466bc3d5698SJohn Baldwin	mulq	%rbx
1467bc3d5698SJohn Baldwin	movq	8(%rbp),%rax
1468bc3d5698SJohn Baldwin	negq	%r8
1469bc3d5698SJohn Baldwin	movq	%rdx,%r8
1470bc3d5698SJohn Baldwin	adcq	$0,%r8
1471bc3d5698SJohn Baldwin
1472bc3d5698SJohn Baldwin	mulq	%rbx
1473bc3d5698SJohn Baldwin	addq	%rax,%r9
1474bc3d5698SJohn Baldwin	movq	16(%rbp),%rax
1475bc3d5698SJohn Baldwin	adcq	$0,%rdx
1476bc3d5698SJohn Baldwin	addq	%r9,%r8
1477bc3d5698SJohn Baldwin	movq	%rdx,%r9
1478bc3d5698SJohn Baldwin	adcq	$0,%r9
1479bc3d5698SJohn Baldwin
1480bc3d5698SJohn Baldwin	mulq	%rbx
1481bc3d5698SJohn Baldwin	addq	%rax,%r10
1482bc3d5698SJohn Baldwin	movq	24(%rbp),%rax
1483bc3d5698SJohn Baldwin	adcq	$0,%rdx
1484bc3d5698SJohn Baldwin	addq	%r10,%r9
1485bc3d5698SJohn Baldwin	movq	%rdx,%r10
1486bc3d5698SJohn Baldwin	adcq	$0,%r10
1487bc3d5698SJohn Baldwin
1488bc3d5698SJohn Baldwin	mulq	%rbx
1489bc3d5698SJohn Baldwin	addq	%rax,%r11
1490bc3d5698SJohn Baldwin	movq	32(%rbp),%rax
1491bc3d5698SJohn Baldwin	adcq	$0,%rdx
1492bc3d5698SJohn Baldwin	addq	%r11,%r10
1493bc3d5698SJohn Baldwin	movq	128+8(%rsp),%rsi
1494bc3d5698SJohn Baldwin
1495bc3d5698SJohn Baldwin
1496bc3d5698SJohn Baldwin	adcq	$0,%rdx
1497bc3d5698SJohn Baldwin	movq	%rdx,%r11
1498bc3d5698SJohn Baldwin
1499bc3d5698SJohn Baldwin	mulq	%rbx
1500bc3d5698SJohn Baldwin	addq	%rax,%r12
1501bc3d5698SJohn Baldwin	movq	40(%rbp),%rax
1502bc3d5698SJohn Baldwin	adcq	$0,%rdx
1503bc3d5698SJohn Baldwin	imulq	%r8,%rsi
1504bc3d5698SJohn Baldwin	addq	%r12,%r11
1505bc3d5698SJohn Baldwin	movq	%rdx,%r12
1506bc3d5698SJohn Baldwin	adcq	$0,%r12
1507bc3d5698SJohn Baldwin
1508bc3d5698SJohn Baldwin	mulq	%rbx
1509bc3d5698SJohn Baldwin	addq	%rax,%r13
1510bc3d5698SJohn Baldwin	movq	48(%rbp),%rax
1511bc3d5698SJohn Baldwin	adcq	$0,%rdx
1512bc3d5698SJohn Baldwin	addq	%r13,%r12
1513bc3d5698SJohn Baldwin	movq	%rdx,%r13
1514bc3d5698SJohn Baldwin	adcq	$0,%r13
1515bc3d5698SJohn Baldwin
1516bc3d5698SJohn Baldwin	mulq	%rbx
1517bc3d5698SJohn Baldwin	addq	%rax,%r14
1518bc3d5698SJohn Baldwin	movq	56(%rbp),%rax
1519bc3d5698SJohn Baldwin	adcq	$0,%rdx
1520bc3d5698SJohn Baldwin	addq	%r14,%r13
1521bc3d5698SJohn Baldwin	movq	%rdx,%r14
1522bc3d5698SJohn Baldwin	adcq	$0,%r14
1523bc3d5698SJohn Baldwin
1524bc3d5698SJohn Baldwin	mulq	%rbx
1525bc3d5698SJohn Baldwin	movq	%rsi,%rbx
1526bc3d5698SJohn Baldwin	addq	%rax,%r15
1527bc3d5698SJohn Baldwin	movq	0(%rbp),%rax
1528bc3d5698SJohn Baldwin	adcq	$0,%rdx
1529bc3d5698SJohn Baldwin	addq	%r15,%r14
1530bc3d5698SJohn Baldwin	movq	%rdx,%r15
1531bc3d5698SJohn Baldwin	adcq	$0,%r15
1532bc3d5698SJohn Baldwin
1533bc3d5698SJohn Baldwin	decl	%ecx
1534bc3d5698SJohn Baldwin	jne	.Lreduction_loop
1535bc3d5698SJohn Baldwin
1536bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1537bc3d5698SJohn Baldwin.cfi_endproc
1538bc3d5698SJohn Baldwin.size	__rsaz_512_reduce,.-__rsaz_512_reduce
1539bc3d5698SJohn Baldwin.type	__rsaz_512_reducex,@function
1540bc3d5698SJohn Baldwin.align	32
1541bc3d5698SJohn Baldwin__rsaz_512_reducex:
1542bc3d5698SJohn Baldwin.cfi_startproc
1543bc3d5698SJohn Baldwin
1544bc3d5698SJohn Baldwin	imulq	%r8,%rdx
1545bc3d5698SJohn Baldwin	xorq	%rsi,%rsi
1546bc3d5698SJohn Baldwin	movl	$8,%ecx
1547bc3d5698SJohn Baldwin	jmp	.Lreduction_loopx
1548bc3d5698SJohn Baldwin
1549bc3d5698SJohn Baldwin.align	32
1550bc3d5698SJohn Baldwin.Lreduction_loopx:
1551bc3d5698SJohn Baldwin	movq	%r8,%rbx
1552bc3d5698SJohn Baldwin	mulxq	0(%rbp),%rax,%r8
1553bc3d5698SJohn Baldwin	adcxq	%rbx,%rax
1554bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1555bc3d5698SJohn Baldwin
1556bc3d5698SJohn Baldwin	mulxq	8(%rbp),%rax,%r9
1557bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1558bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1559bc3d5698SJohn Baldwin
1560bc3d5698SJohn Baldwin	mulxq	16(%rbp),%rbx,%r10
1561bc3d5698SJohn Baldwin	adcxq	%rbx,%r9
1562bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1563bc3d5698SJohn Baldwin
1564bc3d5698SJohn Baldwin	mulxq	24(%rbp),%rbx,%r11
1565bc3d5698SJohn Baldwin	adcxq	%rbx,%r10
1566bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1567bc3d5698SJohn Baldwin
1568bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1569bc3d5698SJohn Baldwin	movq	%rdx,%rax
1570bc3d5698SJohn Baldwin	movq	%r8,%rdx
1571bc3d5698SJohn Baldwin	adcxq	%rbx,%r11
1572bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1573bc3d5698SJohn Baldwin
1574bc3d5698SJohn Baldwin	mulxq	128+8(%rsp),%rbx,%rdx
1575bc3d5698SJohn Baldwin	movq	%rax,%rdx
1576bc3d5698SJohn Baldwin
1577bc3d5698SJohn Baldwin	mulxq	40(%rbp),%rax,%r13
1578bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1579bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1580bc3d5698SJohn Baldwin
1581bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1582bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1583bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1584bc3d5698SJohn Baldwin
1585bc3d5698SJohn Baldwin	mulxq	56(%rbp),%rax,%r15
1586bc3d5698SJohn Baldwin	movq	%rbx,%rdx
1587bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1588bc3d5698SJohn Baldwin	adoxq	%rsi,%r15
1589bc3d5698SJohn Baldwin	adcxq	%rsi,%r15
1590bc3d5698SJohn Baldwin
1591bc3d5698SJohn Baldwin	decl	%ecx
1592bc3d5698SJohn Baldwin	jne	.Lreduction_loopx
1593bc3d5698SJohn Baldwin
1594bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1595bc3d5698SJohn Baldwin.cfi_endproc
1596bc3d5698SJohn Baldwin.size	__rsaz_512_reducex,.-__rsaz_512_reducex
1597bc3d5698SJohn Baldwin.type	__rsaz_512_subtract,@function
1598bc3d5698SJohn Baldwin.align	32
1599bc3d5698SJohn Baldwin__rsaz_512_subtract:
1600bc3d5698SJohn Baldwin.cfi_startproc
1601bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1602bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1603bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1604bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1605bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1606bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1607bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1608bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1609bc3d5698SJohn Baldwin
1610bc3d5698SJohn Baldwin	movq	0(%rbp),%r8
1611bc3d5698SJohn Baldwin	movq	8(%rbp),%r9
1612bc3d5698SJohn Baldwin	negq	%r8
1613bc3d5698SJohn Baldwin	notq	%r9
1614bc3d5698SJohn Baldwin	andq	%rcx,%r8
1615bc3d5698SJohn Baldwin	movq	16(%rbp),%r10
1616bc3d5698SJohn Baldwin	andq	%rcx,%r9
1617bc3d5698SJohn Baldwin	notq	%r10
1618bc3d5698SJohn Baldwin	movq	24(%rbp),%r11
1619bc3d5698SJohn Baldwin	andq	%rcx,%r10
1620bc3d5698SJohn Baldwin	notq	%r11
1621bc3d5698SJohn Baldwin	movq	32(%rbp),%r12
1622bc3d5698SJohn Baldwin	andq	%rcx,%r11
1623bc3d5698SJohn Baldwin	notq	%r12
1624bc3d5698SJohn Baldwin	movq	40(%rbp),%r13
1625bc3d5698SJohn Baldwin	andq	%rcx,%r12
1626bc3d5698SJohn Baldwin	notq	%r13
1627bc3d5698SJohn Baldwin	movq	48(%rbp),%r14
1628bc3d5698SJohn Baldwin	andq	%rcx,%r13
1629bc3d5698SJohn Baldwin	notq	%r14
1630bc3d5698SJohn Baldwin	movq	56(%rbp),%r15
1631bc3d5698SJohn Baldwin	andq	%rcx,%r14
1632bc3d5698SJohn Baldwin	notq	%r15
1633bc3d5698SJohn Baldwin	andq	%rcx,%r15
1634bc3d5698SJohn Baldwin
1635bc3d5698SJohn Baldwin	addq	(%rdi),%r8
1636bc3d5698SJohn Baldwin	adcq	8(%rdi),%r9
1637bc3d5698SJohn Baldwin	adcq	16(%rdi),%r10
1638bc3d5698SJohn Baldwin	adcq	24(%rdi),%r11
1639bc3d5698SJohn Baldwin	adcq	32(%rdi),%r12
1640bc3d5698SJohn Baldwin	adcq	40(%rdi),%r13
1641bc3d5698SJohn Baldwin	adcq	48(%rdi),%r14
1642bc3d5698SJohn Baldwin	adcq	56(%rdi),%r15
1643bc3d5698SJohn Baldwin
1644bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1645bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1646bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1647bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1648bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1649bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1650bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1651bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1652bc3d5698SJohn Baldwin
1653bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1654bc3d5698SJohn Baldwin.cfi_endproc
1655bc3d5698SJohn Baldwin.size	__rsaz_512_subtract,.-__rsaz_512_subtract
1656bc3d5698SJohn Baldwin.type	__rsaz_512_mul,@function
1657bc3d5698SJohn Baldwin.align	32
1658bc3d5698SJohn Baldwin__rsaz_512_mul:
1659bc3d5698SJohn Baldwin.cfi_startproc
1660bc3d5698SJohn Baldwin	leaq	8(%rsp),%rdi
1661bc3d5698SJohn Baldwin
1662bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1663bc3d5698SJohn Baldwin	mulq	%rbx
1664bc3d5698SJohn Baldwin	movq	%rax,(%rdi)
1665bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
1666bc3d5698SJohn Baldwin	movq	%rdx,%r8
1667bc3d5698SJohn Baldwin
1668bc3d5698SJohn Baldwin	mulq	%rbx
1669bc3d5698SJohn Baldwin	addq	%rax,%r8
1670bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
1671bc3d5698SJohn Baldwin	movq	%rdx,%r9
1672bc3d5698SJohn Baldwin	adcq	$0,%r9
1673bc3d5698SJohn Baldwin
1674bc3d5698SJohn Baldwin	mulq	%rbx
1675bc3d5698SJohn Baldwin	addq	%rax,%r9
1676bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
1677bc3d5698SJohn Baldwin	movq	%rdx,%r10
1678bc3d5698SJohn Baldwin	adcq	$0,%r10
1679bc3d5698SJohn Baldwin
1680bc3d5698SJohn Baldwin	mulq	%rbx
1681bc3d5698SJohn Baldwin	addq	%rax,%r10
1682bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
1683bc3d5698SJohn Baldwin	movq	%rdx,%r11
1684bc3d5698SJohn Baldwin	adcq	$0,%r11
1685bc3d5698SJohn Baldwin
1686bc3d5698SJohn Baldwin	mulq	%rbx
1687bc3d5698SJohn Baldwin	addq	%rax,%r11
1688bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
1689bc3d5698SJohn Baldwin	movq	%rdx,%r12
1690bc3d5698SJohn Baldwin	adcq	$0,%r12
1691bc3d5698SJohn Baldwin
1692bc3d5698SJohn Baldwin	mulq	%rbx
1693bc3d5698SJohn Baldwin	addq	%rax,%r12
1694bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
1695bc3d5698SJohn Baldwin	movq	%rdx,%r13
1696bc3d5698SJohn Baldwin	adcq	$0,%r13
1697bc3d5698SJohn Baldwin
1698bc3d5698SJohn Baldwin	mulq	%rbx
1699bc3d5698SJohn Baldwin	addq	%rax,%r13
1700bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
1701bc3d5698SJohn Baldwin	movq	%rdx,%r14
1702bc3d5698SJohn Baldwin	adcq	$0,%r14
1703bc3d5698SJohn Baldwin
1704bc3d5698SJohn Baldwin	mulq	%rbx
1705bc3d5698SJohn Baldwin	addq	%rax,%r14
1706bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1707bc3d5698SJohn Baldwin	movq	%rdx,%r15
1708bc3d5698SJohn Baldwin	adcq	$0,%r15
1709bc3d5698SJohn Baldwin
1710bc3d5698SJohn Baldwin	leaq	8(%rbp),%rbp
1711bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
1712bc3d5698SJohn Baldwin
1713bc3d5698SJohn Baldwin	movl	$7,%ecx
1714bc3d5698SJohn Baldwin	jmp	.Loop_mul
1715bc3d5698SJohn Baldwin
1716bc3d5698SJohn Baldwin.align	32
1717bc3d5698SJohn Baldwin.Loop_mul:
1718bc3d5698SJohn Baldwin	movq	(%rbp),%rbx
1719bc3d5698SJohn Baldwin	mulq	%rbx
1720bc3d5698SJohn Baldwin	addq	%rax,%r8
1721bc3d5698SJohn Baldwin	movq	8(%rsi),%rax
1722bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1723bc3d5698SJohn Baldwin	movq	%rdx,%r8
1724bc3d5698SJohn Baldwin	adcq	$0,%r8
1725bc3d5698SJohn Baldwin
1726bc3d5698SJohn Baldwin	mulq	%rbx
1727bc3d5698SJohn Baldwin	addq	%rax,%r9
1728bc3d5698SJohn Baldwin	movq	16(%rsi),%rax
1729bc3d5698SJohn Baldwin	adcq	$0,%rdx
1730bc3d5698SJohn Baldwin	addq	%r9,%r8
1731bc3d5698SJohn Baldwin	movq	%rdx,%r9
1732bc3d5698SJohn Baldwin	adcq	$0,%r9
1733bc3d5698SJohn Baldwin
1734bc3d5698SJohn Baldwin	mulq	%rbx
1735bc3d5698SJohn Baldwin	addq	%rax,%r10
1736bc3d5698SJohn Baldwin	movq	24(%rsi),%rax
1737bc3d5698SJohn Baldwin	adcq	$0,%rdx
1738bc3d5698SJohn Baldwin	addq	%r10,%r9
1739bc3d5698SJohn Baldwin	movq	%rdx,%r10
1740bc3d5698SJohn Baldwin	adcq	$0,%r10
1741bc3d5698SJohn Baldwin
1742bc3d5698SJohn Baldwin	mulq	%rbx
1743bc3d5698SJohn Baldwin	addq	%rax,%r11
1744bc3d5698SJohn Baldwin	movq	32(%rsi),%rax
1745bc3d5698SJohn Baldwin	adcq	$0,%rdx
1746bc3d5698SJohn Baldwin	addq	%r11,%r10
1747bc3d5698SJohn Baldwin	movq	%rdx,%r11
1748bc3d5698SJohn Baldwin	adcq	$0,%r11
1749bc3d5698SJohn Baldwin
1750bc3d5698SJohn Baldwin	mulq	%rbx
1751bc3d5698SJohn Baldwin	addq	%rax,%r12
1752bc3d5698SJohn Baldwin	movq	40(%rsi),%rax
1753bc3d5698SJohn Baldwin	adcq	$0,%rdx
1754bc3d5698SJohn Baldwin	addq	%r12,%r11
1755bc3d5698SJohn Baldwin	movq	%rdx,%r12
1756bc3d5698SJohn Baldwin	adcq	$0,%r12
1757bc3d5698SJohn Baldwin
1758bc3d5698SJohn Baldwin	mulq	%rbx
1759bc3d5698SJohn Baldwin	addq	%rax,%r13
1760bc3d5698SJohn Baldwin	movq	48(%rsi),%rax
1761bc3d5698SJohn Baldwin	adcq	$0,%rdx
1762bc3d5698SJohn Baldwin	addq	%r13,%r12
1763bc3d5698SJohn Baldwin	movq	%rdx,%r13
1764bc3d5698SJohn Baldwin	adcq	$0,%r13
1765bc3d5698SJohn Baldwin
1766bc3d5698SJohn Baldwin	mulq	%rbx
1767bc3d5698SJohn Baldwin	addq	%rax,%r14
1768bc3d5698SJohn Baldwin	movq	56(%rsi),%rax
1769bc3d5698SJohn Baldwin	adcq	$0,%rdx
1770bc3d5698SJohn Baldwin	addq	%r14,%r13
1771bc3d5698SJohn Baldwin	movq	%rdx,%r14
1772bc3d5698SJohn Baldwin	leaq	8(%rbp),%rbp
1773bc3d5698SJohn Baldwin	adcq	$0,%r14
1774bc3d5698SJohn Baldwin
1775bc3d5698SJohn Baldwin	mulq	%rbx
1776bc3d5698SJohn Baldwin	addq	%rax,%r15
1777bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1778bc3d5698SJohn Baldwin	adcq	$0,%rdx
1779bc3d5698SJohn Baldwin	addq	%r15,%r14
1780bc3d5698SJohn Baldwin	movq	%rdx,%r15
1781bc3d5698SJohn Baldwin	adcq	$0,%r15
1782bc3d5698SJohn Baldwin
1783bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
1784bc3d5698SJohn Baldwin
1785bc3d5698SJohn Baldwin	decl	%ecx
1786bc3d5698SJohn Baldwin	jnz	.Loop_mul
1787bc3d5698SJohn Baldwin
1788bc3d5698SJohn Baldwin	movq	%r8,(%rdi)
1789bc3d5698SJohn Baldwin	movq	%r9,8(%rdi)
1790bc3d5698SJohn Baldwin	movq	%r10,16(%rdi)
1791bc3d5698SJohn Baldwin	movq	%r11,24(%rdi)
1792bc3d5698SJohn Baldwin	movq	%r12,32(%rdi)
1793bc3d5698SJohn Baldwin	movq	%r13,40(%rdi)
1794bc3d5698SJohn Baldwin	movq	%r14,48(%rdi)
1795bc3d5698SJohn Baldwin	movq	%r15,56(%rdi)
1796bc3d5698SJohn Baldwin
1797bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1798bc3d5698SJohn Baldwin.cfi_endproc
1799bc3d5698SJohn Baldwin.size	__rsaz_512_mul,.-__rsaz_512_mul
1800bc3d5698SJohn Baldwin.type	__rsaz_512_mulx,@function
1801bc3d5698SJohn Baldwin.align	32
1802bc3d5698SJohn Baldwin__rsaz_512_mulx:
1803bc3d5698SJohn Baldwin.cfi_startproc
1804bc3d5698SJohn Baldwin	mulxq	(%rsi),%rbx,%r8
1805bc3d5698SJohn Baldwin	movq	$-6,%rcx
1806bc3d5698SJohn Baldwin
1807bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1808bc3d5698SJohn Baldwin	movq	%rbx,8(%rsp)
1809bc3d5698SJohn Baldwin
1810bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rbx,%r10
1811bc3d5698SJohn Baldwin	adcq	%rax,%r8
1812bc3d5698SJohn Baldwin
1813bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1814bc3d5698SJohn Baldwin	adcq	%rbx,%r9
1815bc3d5698SJohn Baldwin
1816bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rbx,%r12
1817bc3d5698SJohn Baldwin	adcq	%rax,%r10
1818bc3d5698SJohn Baldwin
1819bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1820bc3d5698SJohn Baldwin	adcq	%rbx,%r11
1821bc3d5698SJohn Baldwin
1822bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rbx,%r14
1823bc3d5698SJohn Baldwin	adcq	%rax,%r12
1824bc3d5698SJohn Baldwin
1825bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1826bc3d5698SJohn Baldwin	movq	8(%rbp),%rdx
1827bc3d5698SJohn Baldwin	adcq	%rbx,%r13
1828bc3d5698SJohn Baldwin	adcq	%rax,%r14
1829bc3d5698SJohn Baldwin	adcq	$0,%r15
1830bc3d5698SJohn Baldwin
1831bc3d5698SJohn Baldwin	xorq	%rdi,%rdi
1832bc3d5698SJohn Baldwin	jmp	.Loop_mulx
1833bc3d5698SJohn Baldwin
1834bc3d5698SJohn Baldwin.align	32
1835bc3d5698SJohn Baldwin.Loop_mulx:
1836bc3d5698SJohn Baldwin	movq	%r8,%rbx
1837bc3d5698SJohn Baldwin	mulxq	(%rsi),%rax,%r8
1838bc3d5698SJohn Baldwin	adcxq	%rax,%rbx
1839bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1840bc3d5698SJohn Baldwin
1841bc3d5698SJohn Baldwin	mulxq	8(%rsi),%rax,%r9
1842bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1843bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1844bc3d5698SJohn Baldwin
1845bc3d5698SJohn Baldwin	mulxq	16(%rsi),%rax,%r10
1846bc3d5698SJohn Baldwin	adcxq	%rax,%r9
1847bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1848bc3d5698SJohn Baldwin
1849bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1850bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1851bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1852bc3d5698SJohn Baldwin
1853bc3d5698SJohn Baldwin.byte	0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1854bc3d5698SJohn Baldwin	adcxq	%rax,%r11
1855bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1856bc3d5698SJohn Baldwin
1857bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1858bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1859bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1860bc3d5698SJohn Baldwin
1861bc3d5698SJohn Baldwin	mulxq	48(%rsi),%rax,%r14
1862bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1863bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1864bc3d5698SJohn Baldwin
1865bc3d5698SJohn Baldwin	mulxq	56(%rsi),%rax,%r15
1866bc3d5698SJohn Baldwin	movq	64(%rbp,%rcx,8),%rdx
1867bc3d5698SJohn Baldwin	movq	%rbx,8+64-8(%rsp,%rcx,8)
1868bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1869bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
1870bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1871bc3d5698SJohn Baldwin
1872bc3d5698SJohn Baldwin	incq	%rcx
1873bc3d5698SJohn Baldwin	jnz	.Loop_mulx
1874bc3d5698SJohn Baldwin
1875bc3d5698SJohn Baldwin	movq	%r8,%rbx
1876bc3d5698SJohn Baldwin	mulxq	(%rsi),%rax,%r8
1877bc3d5698SJohn Baldwin	adcxq	%rax,%rbx
1878bc3d5698SJohn Baldwin	adoxq	%r9,%r8
1879bc3d5698SJohn Baldwin
1880bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1881bc3d5698SJohn Baldwin	adcxq	%rax,%r8
1882bc3d5698SJohn Baldwin	adoxq	%r10,%r9
1883bc3d5698SJohn Baldwin
1884bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1885bc3d5698SJohn Baldwin	adcxq	%rax,%r9
1886bc3d5698SJohn Baldwin	adoxq	%r11,%r10
1887bc3d5698SJohn Baldwin
1888bc3d5698SJohn Baldwin	mulxq	24(%rsi),%rax,%r11
1889bc3d5698SJohn Baldwin	adcxq	%rax,%r10
1890bc3d5698SJohn Baldwin	adoxq	%r12,%r11
1891bc3d5698SJohn Baldwin
1892bc3d5698SJohn Baldwin	mulxq	32(%rsi),%rax,%r12
1893bc3d5698SJohn Baldwin	adcxq	%rax,%r11
1894bc3d5698SJohn Baldwin	adoxq	%r13,%r12
1895bc3d5698SJohn Baldwin
1896bc3d5698SJohn Baldwin	mulxq	40(%rsi),%rax,%r13
1897bc3d5698SJohn Baldwin	adcxq	%rax,%r12
1898bc3d5698SJohn Baldwin	adoxq	%r14,%r13
1899bc3d5698SJohn Baldwin
1900bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1901bc3d5698SJohn Baldwin	adcxq	%rax,%r13
1902bc3d5698SJohn Baldwin	adoxq	%r15,%r14
1903bc3d5698SJohn Baldwin
1904bc3d5698SJohn Baldwin.byte	0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1905bc3d5698SJohn Baldwin	adcxq	%rax,%r14
1906bc3d5698SJohn Baldwin	adoxq	%rdi,%r15
1907bc3d5698SJohn Baldwin	adcxq	%rdi,%r15
1908bc3d5698SJohn Baldwin
1909bc3d5698SJohn Baldwin	movq	%rbx,8+64-8(%rsp)
1910bc3d5698SJohn Baldwin	movq	%r8,8+64(%rsp)
1911bc3d5698SJohn Baldwin	movq	%r9,8+64+8(%rsp)
1912bc3d5698SJohn Baldwin	movq	%r10,8+64+16(%rsp)
1913bc3d5698SJohn Baldwin	movq	%r11,8+64+24(%rsp)
1914bc3d5698SJohn Baldwin	movq	%r12,8+64+32(%rsp)
1915bc3d5698SJohn Baldwin	movq	%r13,8+64+40(%rsp)
1916bc3d5698SJohn Baldwin	movq	%r14,8+64+48(%rsp)
1917bc3d5698SJohn Baldwin	movq	%r15,8+64+56(%rsp)
1918bc3d5698SJohn Baldwin
1919bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1920bc3d5698SJohn Baldwin.cfi_endproc
1921bc3d5698SJohn Baldwin.size	__rsaz_512_mulx,.-__rsaz_512_mulx
1922bc3d5698SJohn Baldwin.globl	rsaz_512_scatter4
1923bc3d5698SJohn Baldwin.type	rsaz_512_scatter4,@function
1924bc3d5698SJohn Baldwin.align	16
1925bc3d5698SJohn Baldwinrsaz_512_scatter4:
1926bc3d5698SJohn Baldwin.cfi_startproc
1927bc3d5698SJohn Baldwin	leaq	(%rdi,%rdx,8),%rdi
1928bc3d5698SJohn Baldwin	movl	$8,%r9d
1929bc3d5698SJohn Baldwin	jmp	.Loop_scatter
1930bc3d5698SJohn Baldwin.align	16
1931bc3d5698SJohn Baldwin.Loop_scatter:
1932bc3d5698SJohn Baldwin	movq	(%rsi),%rax
1933bc3d5698SJohn Baldwin	leaq	8(%rsi),%rsi
1934bc3d5698SJohn Baldwin	movq	%rax,(%rdi)
1935bc3d5698SJohn Baldwin	leaq	128(%rdi),%rdi
1936bc3d5698SJohn Baldwin	decl	%r9d
1937bc3d5698SJohn Baldwin	jnz	.Loop_scatter
1938bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
1939bc3d5698SJohn Baldwin.cfi_endproc
1940bc3d5698SJohn Baldwin.size	rsaz_512_scatter4,.-rsaz_512_scatter4
1941bc3d5698SJohn Baldwin
1942bc3d5698SJohn Baldwin.globl	rsaz_512_gather4
1943bc3d5698SJohn Baldwin.type	rsaz_512_gather4,@function
1944bc3d5698SJohn Baldwin.align	16
1945bc3d5698SJohn Baldwinrsaz_512_gather4:
1946bc3d5698SJohn Baldwin.cfi_startproc
1947bc3d5698SJohn Baldwin	movd	%edx,%xmm8
1948bc3d5698SJohn Baldwin	movdqa	.Linc+16(%rip),%xmm1
1949bc3d5698SJohn Baldwin	movdqa	.Linc(%rip),%xmm0
1950bc3d5698SJohn Baldwin
1951bc3d5698SJohn Baldwin	pshufd	$0,%xmm8,%xmm8
1952bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm7
1953bc3d5698SJohn Baldwin	movdqa	%xmm1,%xmm2
1954bc3d5698SJohn Baldwin	paddd	%xmm0,%xmm1
1955bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm0
1956bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm3
1957bc3d5698SJohn Baldwin	paddd	%xmm1,%xmm2
1958bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm1
1959bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm4
1960bc3d5698SJohn Baldwin	paddd	%xmm2,%xmm3
1961bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm2
1962bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm5
1963bc3d5698SJohn Baldwin	paddd	%xmm3,%xmm4
1964bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm3
1965bc3d5698SJohn Baldwin	movdqa	%xmm7,%xmm6
1966bc3d5698SJohn Baldwin	paddd	%xmm4,%xmm5
1967bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm4
1968bc3d5698SJohn Baldwin	paddd	%xmm5,%xmm6
1969bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm5
1970bc3d5698SJohn Baldwin	paddd	%xmm6,%xmm7
1971bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm6
1972bc3d5698SJohn Baldwin	pcmpeqd	%xmm8,%xmm7
1973bc3d5698SJohn Baldwin	movl	$8,%r9d
1974bc3d5698SJohn Baldwin	jmp	.Loop_gather
1975bc3d5698SJohn Baldwin.align	16
1976bc3d5698SJohn Baldwin.Loop_gather:
1977bc3d5698SJohn Baldwin	movdqa	0(%rsi),%xmm8
1978bc3d5698SJohn Baldwin	movdqa	16(%rsi),%xmm9
1979bc3d5698SJohn Baldwin	movdqa	32(%rsi),%xmm10
1980bc3d5698SJohn Baldwin	movdqa	48(%rsi),%xmm11
1981bc3d5698SJohn Baldwin	pand	%xmm0,%xmm8
1982bc3d5698SJohn Baldwin	movdqa	64(%rsi),%xmm12
1983bc3d5698SJohn Baldwin	pand	%xmm1,%xmm9
1984bc3d5698SJohn Baldwin	movdqa	80(%rsi),%xmm13
1985bc3d5698SJohn Baldwin	pand	%xmm2,%xmm10
1986bc3d5698SJohn Baldwin	movdqa	96(%rsi),%xmm14
1987bc3d5698SJohn Baldwin	pand	%xmm3,%xmm11
1988bc3d5698SJohn Baldwin	movdqa	112(%rsi),%xmm15
1989bc3d5698SJohn Baldwin	leaq	128(%rsi),%rsi
1990bc3d5698SJohn Baldwin	pand	%xmm4,%xmm12
1991bc3d5698SJohn Baldwin	pand	%xmm5,%xmm13
1992bc3d5698SJohn Baldwin	pand	%xmm6,%xmm14
1993bc3d5698SJohn Baldwin	pand	%xmm7,%xmm15
1994bc3d5698SJohn Baldwin	por	%xmm10,%xmm8
1995bc3d5698SJohn Baldwin	por	%xmm11,%xmm9
1996bc3d5698SJohn Baldwin	por	%xmm12,%xmm8
1997bc3d5698SJohn Baldwin	por	%xmm13,%xmm9
1998bc3d5698SJohn Baldwin	por	%xmm14,%xmm8
1999bc3d5698SJohn Baldwin	por	%xmm15,%xmm9
2000bc3d5698SJohn Baldwin
2001bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
2002bc3d5698SJohn Baldwin	pshufd	$0x4e,%xmm8,%xmm9
2003bc3d5698SJohn Baldwin	por	%xmm9,%xmm8
2004bc3d5698SJohn Baldwin	movq	%xmm8,(%rdi)
2005bc3d5698SJohn Baldwin	leaq	8(%rdi),%rdi
2006bc3d5698SJohn Baldwin	decl	%r9d
2007bc3d5698SJohn Baldwin	jnz	.Loop_gather
2008bc3d5698SJohn Baldwin	.byte	0xf3,0xc3
2009bc3d5698SJohn Baldwin.LSEH_end_rsaz_512_gather4:
2010bc3d5698SJohn Baldwin.cfi_endproc
2011bc3d5698SJohn Baldwin.size	rsaz_512_gather4,.-rsaz_512_gather4
2012bc3d5698SJohn Baldwin
2013bc3d5698SJohn Baldwin.align	64
2014bc3d5698SJohn Baldwin.Linc:
2015bc3d5698SJohn Baldwin.long	0,0, 1,1
2016bc3d5698SJohn Baldwin.long	2,2, 2,2
2017c0855eaaSJohn Baldwin	.section ".note.gnu.property", "a"
2018c0855eaaSJohn Baldwin	.p2align 3
2019c0855eaaSJohn Baldwin	.long 1f - 0f
2020c0855eaaSJohn Baldwin	.long 4f - 1f
2021c0855eaaSJohn Baldwin	.long 5
2022c0855eaaSJohn Baldwin0:
2023c0855eaaSJohn Baldwin	# "GNU" encoded with .byte, since .asciz isn't supported
2024c0855eaaSJohn Baldwin	# on Solaris.
2025c0855eaaSJohn Baldwin	.byte 0x47
2026c0855eaaSJohn Baldwin	.byte 0x4e
2027c0855eaaSJohn Baldwin	.byte 0x55
2028c0855eaaSJohn Baldwin	.byte 0
2029c0855eaaSJohn Baldwin1:
2030c0855eaaSJohn Baldwin	.p2align 3
2031c0855eaaSJohn Baldwin	.long 0xc0000002
2032c0855eaaSJohn Baldwin	.long 3f - 2f
2033c0855eaaSJohn Baldwin2:
2034c0855eaaSJohn Baldwin	.long 3
2035c0855eaaSJohn Baldwin3:
2036c0855eaaSJohn Baldwin	.p2align 3
2037c0855eaaSJohn Baldwin4:
2038