1; 1 "crypto/bn/mont5-masm-x86_64.S.tmp"
2; 1 "<built-in>" 1
3; 1 "<built-in>" 3
4; 340 "<built-in>" 3
5; 1 "<command line>" 1
6; 1 "<built-in>" 2
7; 1 "crypto/bn/mont5-masm-x86_64.S.tmp" 2
8OPTION	DOTNAME
9
10; 1 "./crypto/x86_arch.h" 1
11
12
13; 16 "./crypto/x86_arch.h"
14
15
16
17
18
19
20
21
22
23; 40 "./crypto/x86_arch.h"
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69; 3 "crypto/bn/mont5-masm-x86_64.S.tmp" 2
70.text$	SEGMENT ALIGN(64) 'CODE'
71
72PUBLIC	bn_mul_mont_gather5
73
74ALIGN	64
75bn_mul_mont_gather5	PROC PUBLIC
76	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
77	mov	QWORD PTR[16+rsp],rsi
78	mov	rax,rsp
79$L$SEH_begin_bn_mul_mont_gather5::
80	mov	rdi,rcx
81	mov	rsi,rdx
82	mov	rdx,r8
83	mov	rcx,r9
84	mov	r8,QWORD PTR[40+rsp]
85	mov	r9,QWORD PTR[48+rsp]
86
87
88	test	r9d,3
89	jnz	$L$mul_enter
90	cmp	r9d,8
91	jb	$L$mul_enter
92	jmp	$L$mul4x_enter
93
94ALIGN	16
95$L$mul_enter::
96	mov	r9d,r9d
97	movd	xmm5,DWORD PTR[56+rsp]
98	lea	r10,QWORD PTR[$L$inc]
99	push	rbx
100	push	rbp
101	push	r12
102	push	r13
103	push	r14
104	push	r15
105
106$L$mul_alloca::
107	mov	rax,rsp
108	lea	r11,QWORD PTR[2+r9]
109	neg	r11
110	lea	rsp,QWORD PTR[((-264))+r11*8+rsp]
111	and	rsp,-1024
112
113	mov	QWORD PTR[8+r9*8+rsp],rax
114$L$mul_body::
115	lea	r12,QWORD PTR[128+rdx]
116	movdqa	xmm0,XMMWORD PTR[r10]
117	movdqa	xmm1,XMMWORD PTR[16+r10]
118	lea	r10,QWORD PTR[((24-112))+r9*8+rsp]
119	and	r10,-16
120
121	pshufd	xmm5,xmm5,0
122	movdqa	xmm4,xmm1
123	movdqa	xmm2,xmm1
124	paddd	xmm1,xmm0
125	pcmpeqd	xmm0,xmm5
126DB	067h
127	movdqa	xmm3,xmm4
128	paddd	xmm2,xmm1
129	pcmpeqd	xmm1,xmm5
130	movdqa	XMMWORD PTR[112+r10],xmm0
131	movdqa	xmm0,xmm4
132
133	paddd	xmm3,xmm2
134	pcmpeqd	xmm2,xmm5
135	movdqa	XMMWORD PTR[128+r10],xmm1
136	movdqa	xmm1,xmm4
137
138	paddd	xmm0,xmm3
139	pcmpeqd	xmm3,xmm5
140	movdqa	XMMWORD PTR[144+r10],xmm2
141	movdqa	xmm2,xmm4
142
143	paddd	xmm1,xmm0
144	pcmpeqd	xmm0,xmm5
145	movdqa	XMMWORD PTR[160+r10],xmm3
146	movdqa	xmm3,xmm4
147	paddd	xmm2,xmm1
148	pcmpeqd	xmm1,xmm5
149	movdqa	XMMWORD PTR[176+r10],xmm0
150	movdqa	xmm0,xmm4
151
152	paddd	xmm3,xmm2
153	pcmpeqd	xmm2,xmm5
154	movdqa	XMMWORD PTR[192+r10],xmm1
155	movdqa	xmm1,xmm4
156
157	paddd	xmm0,xmm3
158	pcmpeqd	xmm3,xmm5
159	movdqa	XMMWORD PTR[208+r10],xmm2
160	movdqa	xmm2,xmm4
161
162	paddd	xmm1,xmm0
163	pcmpeqd	xmm0,xmm5
164	movdqa	XMMWORD PTR[224+r10],xmm3
165	movdqa	xmm3,xmm4
166	paddd	xmm2,xmm1
167	pcmpeqd	xmm1,xmm5
168	movdqa	XMMWORD PTR[240+r10],xmm0
169	movdqa	xmm0,xmm4
170
171	paddd	xmm3,xmm2
172	pcmpeqd	xmm2,xmm5
173	movdqa	XMMWORD PTR[256+r10],xmm1
174	movdqa	xmm1,xmm4
175
176	paddd	xmm0,xmm3
177	pcmpeqd	xmm3,xmm5
178	movdqa	XMMWORD PTR[272+r10],xmm2
179	movdqa	xmm2,xmm4
180
181	paddd	xmm1,xmm0
182	pcmpeqd	xmm0,xmm5
183	movdqa	XMMWORD PTR[288+r10],xmm3
184	movdqa	xmm3,xmm4
185	paddd	xmm2,xmm1
186	pcmpeqd	xmm1,xmm5
187	movdqa	XMMWORD PTR[304+r10],xmm0
188
189	paddd	xmm3,xmm2
190DB	067h
191	pcmpeqd	xmm2,xmm5
192	movdqa	XMMWORD PTR[320+r10],xmm1
193
194	pcmpeqd	xmm3,xmm5
195	movdqa	XMMWORD PTR[336+r10],xmm2
196	pand	xmm0,XMMWORD PTR[64+r12]
197
198	pand	xmm1,XMMWORD PTR[80+r12]
199	pand	xmm2,XMMWORD PTR[96+r12]
200	movdqa	XMMWORD PTR[352+r10],xmm3
201	pand	xmm3,XMMWORD PTR[112+r12]
202	por	xmm0,xmm2
203	por	xmm1,xmm3
204	movdqa	xmm4,XMMWORD PTR[((-128))+r12]
205	movdqa	xmm5,XMMWORD PTR[((-112))+r12]
206	movdqa	xmm2,XMMWORD PTR[((-96))+r12]
207	pand	xmm4,XMMWORD PTR[112+r10]
208	movdqa	xmm3,XMMWORD PTR[((-80))+r12]
209	pand	xmm5,XMMWORD PTR[128+r10]
210	por	xmm0,xmm4
211	pand	xmm2,XMMWORD PTR[144+r10]
212	por	xmm1,xmm5
213	pand	xmm3,XMMWORD PTR[160+r10]
214	por	xmm0,xmm2
215	por	xmm1,xmm3
216	movdqa	xmm4,XMMWORD PTR[((-64))+r12]
217	movdqa	xmm5,XMMWORD PTR[((-48))+r12]
218	movdqa	xmm2,XMMWORD PTR[((-32))+r12]
219	pand	xmm4,XMMWORD PTR[176+r10]
220	movdqa	xmm3,XMMWORD PTR[((-16))+r12]
221	pand	xmm5,XMMWORD PTR[192+r10]
222	por	xmm0,xmm4
223	pand	xmm2,XMMWORD PTR[208+r10]
224	por	xmm1,xmm5
225	pand	xmm3,XMMWORD PTR[224+r10]
226	por	xmm0,xmm2
227	por	xmm1,xmm3
228	movdqa	xmm4,XMMWORD PTR[r12]
229	movdqa	xmm5,XMMWORD PTR[16+r12]
230	movdqa	xmm2,XMMWORD PTR[32+r12]
231	pand	xmm4,XMMWORD PTR[240+r10]
232	movdqa	xmm3,XMMWORD PTR[48+r12]
233	pand	xmm5,XMMWORD PTR[256+r10]
234	por	xmm0,xmm4
235	pand	xmm2,XMMWORD PTR[272+r10]
236	por	xmm1,xmm5
237	pand	xmm3,XMMWORD PTR[288+r10]
238	por	xmm0,xmm2
239	por	xmm1,xmm3
240	por	xmm0,xmm1
241	pshufd	xmm1,xmm0,04eh
242	por	xmm0,xmm1
243	lea	r12,QWORD PTR[256+r12]
244	movd	rbx,xmm0
245
246	mov	r8,QWORD PTR[r8]
247	mov	rax,QWORD PTR[rsi]
248
249	xor	r14,r14
250	xor	r15,r15
251
252	mov	rbp,r8
253	mul	rbx
254	mov	r10,rax
255	mov	rax,QWORD PTR[rcx]
256
257	imul	rbp,r10
258	mov	r11,rdx
259
260	mul	rbp
261	add	r10,rax
262	mov	rax,QWORD PTR[8+rsi]
263	adc	rdx,0
264	mov	r13,rdx
265
266	lea	r15,QWORD PTR[1+r15]
267	jmp	$L$1st_enter
268
269ALIGN	16
270$L$1st::
271	add	r13,rax
272	mov	rax,QWORD PTR[r15*8+rsi]
273	adc	rdx,0
274	add	r13,r11
275	mov	r11,r10
276	adc	rdx,0
277	mov	QWORD PTR[((-16))+r15*8+rsp],r13
278	mov	r13,rdx
279
280$L$1st_enter::
281	mul	rbx
282	add	r11,rax
283	mov	rax,QWORD PTR[r15*8+rcx]
284	adc	rdx,0
285	lea	r15,QWORD PTR[1+r15]
286	mov	r10,rdx
287
288	mul	rbp
289	cmp	r15,r9
290	jl	$L$1st
291
292	add	r13,rax
293	mov	rax,QWORD PTR[rsi]
294	adc	rdx,0
295	add	r13,r11
296	adc	rdx,0
297	mov	QWORD PTR[((-16))+r15*8+rsp],r13
298	mov	r13,rdx
299	mov	r11,r10
300
301	xor	rdx,rdx
302	add	r13,r11
303	adc	rdx,0
304	mov	QWORD PTR[((-8))+r9*8+rsp],r13
305	mov	QWORD PTR[r9*8+rsp],rdx
306
307	lea	r14,QWORD PTR[1+r14]
308	jmp	$L$outer
309ALIGN	16
310$L$outer::
311	lea	rdx,QWORD PTR[((24+128))+r9*8+rsp]
312	and	rdx,-16
313	pxor	xmm4,xmm4
314	pxor	xmm5,xmm5
315	movdqa	xmm0,XMMWORD PTR[((-128))+r12]
316	movdqa	xmm1,XMMWORD PTR[((-112))+r12]
317	movdqa	xmm2,XMMWORD PTR[((-96))+r12]
318	movdqa	xmm3,XMMWORD PTR[((-80))+r12]
319	pand	xmm0,XMMWORD PTR[((-128))+rdx]
320	pand	xmm1,XMMWORD PTR[((-112))+rdx]
321	por	xmm4,xmm0
322	pand	xmm2,XMMWORD PTR[((-96))+rdx]
323	por	xmm5,xmm1
324	pand	xmm3,XMMWORD PTR[((-80))+rdx]
325	por	xmm4,xmm2
326	por	xmm5,xmm3
327	movdqa	xmm0,XMMWORD PTR[((-64))+r12]
328	movdqa	xmm1,XMMWORD PTR[((-48))+r12]
329	movdqa	xmm2,XMMWORD PTR[((-32))+r12]
330	movdqa	xmm3,XMMWORD PTR[((-16))+r12]
331	pand	xmm0,XMMWORD PTR[((-64))+rdx]
332	pand	xmm1,XMMWORD PTR[((-48))+rdx]
333	por	xmm4,xmm0
334	pand	xmm2,XMMWORD PTR[((-32))+rdx]
335	por	xmm5,xmm1
336	pand	xmm3,XMMWORD PTR[((-16))+rdx]
337	por	xmm4,xmm2
338	por	xmm5,xmm3
339	movdqa	xmm0,XMMWORD PTR[r12]
340	movdqa	xmm1,XMMWORD PTR[16+r12]
341	movdqa	xmm2,XMMWORD PTR[32+r12]
342	movdqa	xmm3,XMMWORD PTR[48+r12]
343	pand	xmm0,XMMWORD PTR[rdx]
344	pand	xmm1,XMMWORD PTR[16+rdx]
345	por	xmm4,xmm0
346	pand	xmm2,XMMWORD PTR[32+rdx]
347	por	xmm5,xmm1
348	pand	xmm3,XMMWORD PTR[48+rdx]
349	por	xmm4,xmm2
350	por	xmm5,xmm3
351	movdqa	xmm0,XMMWORD PTR[64+r12]
352	movdqa	xmm1,XMMWORD PTR[80+r12]
353	movdqa	xmm2,XMMWORD PTR[96+r12]
354	movdqa	xmm3,XMMWORD PTR[112+r12]
355	pand	xmm0,XMMWORD PTR[64+rdx]
356	pand	xmm1,XMMWORD PTR[80+rdx]
357	por	xmm4,xmm0
358	pand	xmm2,XMMWORD PTR[96+rdx]
359	por	xmm5,xmm1
360	pand	xmm3,XMMWORD PTR[112+rdx]
361	por	xmm4,xmm2
362	por	xmm5,xmm3
363	por	xmm4,xmm5
364	pshufd	xmm0,xmm4,04eh
365	por	xmm0,xmm4
366	lea	r12,QWORD PTR[256+r12]
367	movd	rbx,xmm0
368
369	xor	r15,r15
370	mov	rbp,r8
371	mov	r10,QWORD PTR[rsp]
372
373	mul	rbx
374	add	r10,rax
375	mov	rax,QWORD PTR[rcx]
376	adc	rdx,0
377
378	imul	rbp,r10
379	mov	r11,rdx
380
381	mul	rbp
382	add	r10,rax
383	mov	rax,QWORD PTR[8+rsi]
384	adc	rdx,0
385	mov	r10,QWORD PTR[8+rsp]
386	mov	r13,rdx
387
388	lea	r15,QWORD PTR[1+r15]
389	jmp	$L$inner_enter
390
391ALIGN	16
392$L$inner::
393	add	r13,rax
394	mov	rax,QWORD PTR[r15*8+rsi]
395	adc	rdx,0
396	add	r13,r10
397	mov	r10,QWORD PTR[r15*8+rsp]
398	adc	rdx,0
399	mov	QWORD PTR[((-16))+r15*8+rsp],r13
400	mov	r13,rdx
401
402$L$inner_enter::
403	mul	rbx
404	add	r11,rax
405	mov	rax,QWORD PTR[r15*8+rcx]
406	adc	rdx,0
407	add	r10,r11
408	mov	r11,rdx
409	adc	r11,0
410	lea	r15,QWORD PTR[1+r15]
411
412	mul	rbp
413	cmp	r15,r9
414	jl	$L$inner
415
416	add	r13,rax
417	mov	rax,QWORD PTR[rsi]
418	adc	rdx,0
419	add	r13,r10
420	mov	r10,QWORD PTR[r15*8+rsp]
421	adc	rdx,0
422	mov	QWORD PTR[((-16))+r15*8+rsp],r13
423	mov	r13,rdx
424
425	xor	rdx,rdx
426	add	r13,r11
427	adc	rdx,0
428	add	r13,r10
429	adc	rdx,0
430	mov	QWORD PTR[((-8))+r9*8+rsp],r13
431	mov	QWORD PTR[r9*8+rsp],rdx
432
433	lea	r14,QWORD PTR[1+r14]
434	cmp	r14,r9
435	jl	$L$outer
436
437	xor	r14,r14
438	mov	rax,QWORD PTR[rsp]
439	lea	rsi,QWORD PTR[rsp]
440	mov	r15,r9
441	jmp	$L$sub
442ALIGN	16
443$L$sub::	sbb	rax,QWORD PTR[r14*8+rcx]
444	mov	QWORD PTR[r14*8+rdi],rax
445	mov	rax,QWORD PTR[8+r14*8+rsi]
446	lea	r14,QWORD PTR[1+r14]
447	dec	r15
448	jnz	$L$sub
449
450	sbb	rax,0
451	xor	r14,r14
452	and	rsi,rax
453	not	rax
454	mov	rcx,rdi
455	and	rcx,rax
456	mov	r15,r9
457	or	rsi,rcx
458ALIGN	16
459$L$copy::
460	mov	rax,QWORD PTR[r14*8+rsi]
461	mov	QWORD PTR[r14*8+rsp],r14
462	mov	QWORD PTR[r14*8+rdi],rax
463	lea	r14,QWORD PTR[1+r14]
464	sub	r15,1
465	jnz	$L$copy
466
467	mov	rsi,QWORD PTR[8+r9*8+rsp]
468	mov	rax,1
469
470	mov	r15,QWORD PTR[rsi]
471	mov	r14,QWORD PTR[8+rsi]
472	mov	r13,QWORD PTR[16+rsi]
473	mov	r12,QWORD PTR[24+rsi]
474	mov	rbp,QWORD PTR[32+rsi]
475	mov	rbx,QWORD PTR[40+rsi]
476	lea	rsp,QWORD PTR[48+rsi]
477$L$mul_epilogue::
478	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
479	mov	rsi,QWORD PTR[16+rsp]
480	DB	0F3h,0C3h		;repret
481$L$SEH_end_bn_mul_mont_gather5::
482bn_mul_mont_gather5	ENDP
483
484ALIGN	16
485bn_mul4x_mont_gather5	PROC PRIVATE
486	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
487	mov	QWORD PTR[16+rsp],rsi
488	mov	rax,rsp
489$L$SEH_begin_bn_mul4x_mont_gather5::
490	mov	rdi,rcx
491	mov	rsi,rdx
492	mov	rdx,r8
493	mov	rcx,r9
494	mov	r8,QWORD PTR[40+rsp]
495	mov	r9,QWORD PTR[48+rsp]
496
497
498$L$mul4x_enter::
499	mov	r9d,r9d
500	movd	xmm5,DWORD PTR[56+rsp]
501	lea	r10,QWORD PTR[$L$inc]
502	push	rbx
503	push	rbp
504	push	r12
505	push	r13
506	push	r14
507	push	r15
508
509$L$mul4x_alloca::
510	mov	rax,rsp
511	lea	r11,QWORD PTR[4+r9]
512	neg	r11
513	lea	rsp,QWORD PTR[((-256))+r11*8+rsp]
514	and	rsp,-1024
515
516	mov	QWORD PTR[8+r9*8+rsp],rax
517$L$mul4x_body::
518	mov	QWORD PTR[16+r9*8+rsp],rdi
519	lea	r12,QWORD PTR[128+rdx]
520	movdqa	xmm0,XMMWORD PTR[r10]
521	movdqa	xmm1,XMMWORD PTR[16+r10]
522	lea	r10,QWORD PTR[((32-112))+r9*8+rsp]
523
524	pshufd	xmm5,xmm5,0
525	movdqa	xmm4,xmm1
526DB	067h,067h
527	movdqa	xmm2,xmm1
528	paddd	xmm1,xmm0
529	pcmpeqd	xmm0,xmm5
530DB	067h
531	movdqa	xmm3,xmm4
532	paddd	xmm2,xmm1
533	pcmpeqd	xmm1,xmm5
534	movdqa	XMMWORD PTR[112+r10],xmm0
535	movdqa	xmm0,xmm4
536
537	paddd	xmm3,xmm2
538	pcmpeqd	xmm2,xmm5
539	movdqa	XMMWORD PTR[128+r10],xmm1
540	movdqa	xmm1,xmm4
541
542	paddd	xmm0,xmm3
543	pcmpeqd	xmm3,xmm5
544	movdqa	XMMWORD PTR[144+r10],xmm2
545	movdqa	xmm2,xmm4
546
547	paddd	xmm1,xmm0
548	pcmpeqd	xmm0,xmm5
549	movdqa	XMMWORD PTR[160+r10],xmm3
550	movdqa	xmm3,xmm4
551	paddd	xmm2,xmm1
552	pcmpeqd	xmm1,xmm5
553	movdqa	XMMWORD PTR[176+r10],xmm0
554	movdqa	xmm0,xmm4
555
556	paddd	xmm3,xmm2
557	pcmpeqd	xmm2,xmm5
558	movdqa	XMMWORD PTR[192+r10],xmm1
559	movdqa	xmm1,xmm4
560
561	paddd	xmm0,xmm3
562	pcmpeqd	xmm3,xmm5
563	movdqa	XMMWORD PTR[208+r10],xmm2
564	movdqa	xmm2,xmm4
565
566	paddd	xmm1,xmm0
567	pcmpeqd	xmm0,xmm5
568	movdqa	XMMWORD PTR[224+r10],xmm3
569	movdqa	xmm3,xmm4
570	paddd	xmm2,xmm1
571	pcmpeqd	xmm1,xmm5
572	movdqa	XMMWORD PTR[240+r10],xmm0
573	movdqa	xmm0,xmm4
574
575	paddd	xmm3,xmm2
576	pcmpeqd	xmm2,xmm5
577	movdqa	XMMWORD PTR[256+r10],xmm1
578	movdqa	xmm1,xmm4
579
580	paddd	xmm0,xmm3
581	pcmpeqd	xmm3,xmm5
582	movdqa	XMMWORD PTR[272+r10],xmm2
583	movdqa	xmm2,xmm4
584
585	paddd	xmm1,xmm0
586	pcmpeqd	xmm0,xmm5
587	movdqa	XMMWORD PTR[288+r10],xmm3
588	movdqa	xmm3,xmm4
589	paddd	xmm2,xmm1
590	pcmpeqd	xmm1,xmm5
591	movdqa	XMMWORD PTR[304+r10],xmm0
592
593	paddd	xmm3,xmm2
594DB	067h
595	pcmpeqd	xmm2,xmm5
596	movdqa	XMMWORD PTR[320+r10],xmm1
597
598	pcmpeqd	xmm3,xmm5
599	movdqa	XMMWORD PTR[336+r10],xmm2
600	pand	xmm0,XMMWORD PTR[64+r12]
601
602	pand	xmm1,XMMWORD PTR[80+r12]
603	pand	xmm2,XMMWORD PTR[96+r12]
604	movdqa	XMMWORD PTR[352+r10],xmm3
605	pand	xmm3,XMMWORD PTR[112+r12]
606	por	xmm0,xmm2
607	por	xmm1,xmm3
608	movdqa	xmm4,XMMWORD PTR[((-128))+r12]
609	movdqa	xmm5,XMMWORD PTR[((-112))+r12]
610	movdqa	xmm2,XMMWORD PTR[((-96))+r12]
611	pand	xmm4,XMMWORD PTR[112+r10]
612	movdqa	xmm3,XMMWORD PTR[((-80))+r12]
613	pand	xmm5,XMMWORD PTR[128+r10]
614	por	xmm0,xmm4
615	pand	xmm2,XMMWORD PTR[144+r10]
616	por	xmm1,xmm5
617	pand	xmm3,XMMWORD PTR[160+r10]
618	por	xmm0,xmm2
619	por	xmm1,xmm3
620	movdqa	xmm4,XMMWORD PTR[((-64))+r12]
621	movdqa	xmm5,XMMWORD PTR[((-48))+r12]
622	movdqa	xmm2,XMMWORD PTR[((-32))+r12]
623	pand	xmm4,XMMWORD PTR[176+r10]
624	movdqa	xmm3,XMMWORD PTR[((-16))+r12]
625	pand	xmm5,XMMWORD PTR[192+r10]
626	por	xmm0,xmm4
627	pand	xmm2,XMMWORD PTR[208+r10]
628	por	xmm1,xmm5
629	pand	xmm3,XMMWORD PTR[224+r10]
630	por	xmm0,xmm2
631	por	xmm1,xmm3
632	movdqa	xmm4,XMMWORD PTR[r12]
633	movdqa	xmm5,XMMWORD PTR[16+r12]
634	movdqa	xmm2,XMMWORD PTR[32+r12]
635	pand	xmm4,XMMWORD PTR[240+r10]
636	movdqa	xmm3,XMMWORD PTR[48+r12]
637	pand	xmm5,XMMWORD PTR[256+r10]
638	por	xmm0,xmm4
639	pand	xmm2,XMMWORD PTR[272+r10]
640	por	xmm1,xmm5
641	pand	xmm3,XMMWORD PTR[288+r10]
642	por	xmm0,xmm2
643	por	xmm1,xmm3
644	por	xmm0,xmm1
645	pshufd	xmm1,xmm0,04eh
646	por	xmm0,xmm1
647	lea	r12,QWORD PTR[256+r12]
648	movd	rbx,xmm0
649
650	mov	r8,QWORD PTR[r8]
651	mov	rax,QWORD PTR[rsi]
652
653	xor	r14,r14
654	xor	r15,r15
655
656	mov	rbp,r8
657	mul	rbx
658	mov	r10,rax
659	mov	rax,QWORD PTR[rcx]
660
661	imul	rbp,r10
662	mov	r11,rdx
663
664	mul	rbp
665	add	r10,rax
666	mov	rax,QWORD PTR[8+rsi]
667	adc	rdx,0
668	mov	rdi,rdx
669
670	mul	rbx
671	add	r11,rax
672	mov	rax,QWORD PTR[8+rcx]
673	adc	rdx,0
674	mov	r10,rdx
675
676	mul	rbp
677	add	rdi,rax
678	mov	rax,QWORD PTR[16+rsi]
679	adc	rdx,0
680	add	rdi,r11
681	lea	r15,QWORD PTR[4+r15]
682	adc	rdx,0
683	mov	QWORD PTR[rsp],rdi
684	mov	r13,rdx
685	jmp	$L$1st4x
686ALIGN	16
687$L$1st4x::
688	mul	rbx
689	add	r10,rax
690	mov	rax,QWORD PTR[((-16))+r15*8+rcx]
691	adc	rdx,0
692	mov	r11,rdx
693
694	mul	rbp
695	add	r13,rax
696	mov	rax,QWORD PTR[((-8))+r15*8+rsi]
697	adc	rdx,0
698	add	r13,r10
699	adc	rdx,0
700	mov	QWORD PTR[((-24))+r15*8+rsp],r13
701	mov	rdi,rdx
702
703	mul	rbx
704	add	r11,rax
705	mov	rax,QWORD PTR[((-8))+r15*8+rcx]
706	adc	rdx,0
707	mov	r10,rdx
708
709	mul	rbp
710	add	rdi,rax
711	mov	rax,QWORD PTR[r15*8+rsi]
712	adc	rdx,0
713	add	rdi,r11
714	adc	rdx,0
715	mov	QWORD PTR[((-16))+r15*8+rsp],rdi
716	mov	r13,rdx
717
718	mul	rbx
719	add	r10,rax
720	mov	rax,QWORD PTR[r15*8+rcx]
721	adc	rdx,0
722	mov	r11,rdx
723
724	mul	rbp
725	add	r13,rax
726	mov	rax,QWORD PTR[8+r15*8+rsi]
727	adc	rdx,0
728	add	r13,r10
729	adc	rdx,0
730	mov	QWORD PTR[((-8))+r15*8+rsp],r13
731	mov	rdi,rdx
732
733	mul	rbx
734	add	r11,rax
735	mov	rax,QWORD PTR[8+r15*8+rcx]
736	adc	rdx,0
737	lea	r15,QWORD PTR[4+r15]
738	mov	r10,rdx
739
740	mul	rbp
741	add	rdi,rax
742	mov	rax,QWORD PTR[((-16))+r15*8+rsi]
743	adc	rdx,0
744	add	rdi,r11
745	adc	rdx,0
746	mov	QWORD PTR[((-32))+r15*8+rsp],rdi
747	mov	r13,rdx
748	cmp	r15,r9
749	jl	$L$1st4x
750
751	mul	rbx
752	add	r10,rax
753	mov	rax,QWORD PTR[((-16))+r15*8+rcx]
754	adc	rdx,0
755	mov	r11,rdx
756
757	mul	rbp
758	add	r13,rax
759	mov	rax,QWORD PTR[((-8))+r15*8+rsi]
760	adc	rdx,0
761	add	r13,r10
762	adc	rdx,0
763	mov	QWORD PTR[((-24))+r15*8+rsp],r13
764	mov	rdi,rdx
765
766	mul	rbx
767	add	r11,rax
768	mov	rax,QWORD PTR[((-8))+r15*8+rcx]
769	adc	rdx,0
770	mov	r10,rdx
771
772	mul	rbp
773	add	rdi,rax
774	mov	rax,QWORD PTR[rsi]
775	adc	rdx,0
776	add	rdi,r11
777	adc	rdx,0
778	mov	QWORD PTR[((-16))+r15*8+rsp],rdi
779	mov	r13,rdx
780
781	xor	rdi,rdi
782	add	r13,r10
783	adc	rdi,0
784	mov	QWORD PTR[((-8))+r15*8+rsp],r13
785	mov	QWORD PTR[r15*8+rsp],rdi
786
787	lea	r14,QWORD PTR[1+r14]
788ALIGN	4
789$L$outer4x::
790	lea	rdx,QWORD PTR[((32+128))+r9*8+rsp]
791	pxor	xmm4,xmm4
792	pxor	xmm5,xmm5
793	movdqa	xmm0,XMMWORD PTR[((-128))+r12]
794	movdqa	xmm1,XMMWORD PTR[((-112))+r12]
795	movdqa	xmm2,XMMWORD PTR[((-96))+r12]
796	movdqa	xmm3,XMMWORD PTR[((-80))+r12]
797	pand	xmm0,XMMWORD PTR[((-128))+rdx]
798	pand	xmm1,XMMWORD PTR[((-112))+rdx]
799	por	xmm4,xmm0
800	pand	xmm2,XMMWORD PTR[((-96))+rdx]
801	por	xmm5,xmm1
802	pand	xmm3,XMMWORD PTR[((-80))+rdx]
803	por	xmm4,xmm2
804	por	xmm5,xmm3
805	movdqa	xmm0,XMMWORD PTR[((-64))+r12]
806	movdqa	xmm1,XMMWORD PTR[((-48))+r12]
807	movdqa	xmm2,XMMWORD PTR[((-32))+r12]
808	movdqa	xmm3,XMMWORD PTR[((-16))+r12]
809	pand	xmm0,XMMWORD PTR[((-64))+rdx]
810	pand	xmm1,XMMWORD PTR[((-48))+rdx]
811	por	xmm4,xmm0
812	pand	xmm2,XMMWORD PTR[((-32))+rdx]
813	por	xmm5,xmm1
814	pand	xmm3,XMMWORD PTR[((-16))+rdx]
815	por	xmm4,xmm2
816	por	xmm5,xmm3
817	movdqa	xmm0,XMMWORD PTR[r12]
818	movdqa	xmm1,XMMWORD PTR[16+r12]
819	movdqa	xmm2,XMMWORD PTR[32+r12]
820	movdqa	xmm3,XMMWORD PTR[48+r12]
821	pand	xmm0,XMMWORD PTR[rdx]
822	pand	xmm1,XMMWORD PTR[16+rdx]
823	por	xmm4,xmm0
824	pand	xmm2,XMMWORD PTR[32+rdx]
825	por	xmm5,xmm1
826	pand	xmm3,XMMWORD PTR[48+rdx]
827	por	xmm4,xmm2
828	por	xmm5,xmm3
829	movdqa	xmm0,XMMWORD PTR[64+r12]
830	movdqa	xmm1,XMMWORD PTR[80+r12]
831	movdqa	xmm2,XMMWORD PTR[96+r12]
832	movdqa	xmm3,XMMWORD PTR[112+r12]
833	pand	xmm0,XMMWORD PTR[64+rdx]
834	pand	xmm1,XMMWORD PTR[80+rdx]
835	por	xmm4,xmm0
836	pand	xmm2,XMMWORD PTR[96+rdx]
837	por	xmm5,xmm1
838	pand	xmm3,XMMWORD PTR[112+rdx]
839	por	xmm4,xmm2
840	por	xmm5,xmm3
841	por	xmm4,xmm5
842	pshufd	xmm0,xmm4,04eh
843	por	xmm0,xmm4
844	lea	r12,QWORD PTR[256+r12]
845	movd	rbx,xmm0
846
847	xor	r15,r15
848
849	mov	r10,QWORD PTR[rsp]
850	mov	rbp,r8
851	mul	rbx
852	add	r10,rax
853	mov	rax,QWORD PTR[rcx]
854	adc	rdx,0
855
856	imul	rbp,r10
857	mov	r11,rdx
858
859	mul	rbp
860	add	r10,rax
861	mov	rax,QWORD PTR[8+rsi]
862	adc	rdx,0
863	mov	rdi,rdx
864
865	mul	rbx
866	add	r11,rax
867	mov	rax,QWORD PTR[8+rcx]
868	adc	rdx,0
869	add	r11,QWORD PTR[8+rsp]
870	adc	rdx,0
871	mov	r10,rdx
872
873	mul	rbp
874	add	rdi,rax
875	mov	rax,QWORD PTR[16+rsi]
876	adc	rdx,0
877	add	rdi,r11
878	lea	r15,QWORD PTR[4+r15]
879	adc	rdx,0
880	mov	r13,rdx
881	jmp	$L$inner4x
882ALIGN	16
883$L$inner4x::
884	mul	rbx
885	add	r10,rax
886	mov	rax,QWORD PTR[((-16))+r15*8+rcx]
887	adc	rdx,0
888	add	r10,QWORD PTR[((-16))+r15*8+rsp]
889	adc	rdx,0
890	mov	r11,rdx
891
892	mul	rbp
893	add	r13,rax
894	mov	rax,QWORD PTR[((-8))+r15*8+rsi]
895	adc	rdx,0
896	add	r13,r10
897	adc	rdx,0
898	mov	QWORD PTR[((-32))+r15*8+rsp],rdi
899	mov	rdi,rdx
900
901	mul	rbx
902	add	r11,rax
903	mov	rax,QWORD PTR[((-8))+r15*8+rcx]
904	adc	rdx,0
905	add	r11,QWORD PTR[((-8))+r15*8+rsp]
906	adc	rdx,0
907	mov	r10,rdx
908
909	mul	rbp
910	add	rdi,rax
911	mov	rax,QWORD PTR[r15*8+rsi]
912	adc	rdx,0
913	add	rdi,r11
914	adc	rdx,0
915	mov	QWORD PTR[((-24))+r15*8+rsp],r13
916	mov	r13,rdx
917
918	mul	rbx
919	add	r10,rax
920	mov	rax,QWORD PTR[r15*8+rcx]
921	adc	rdx,0
922	add	r10,QWORD PTR[r15*8+rsp]
923	adc	rdx,0
924	mov	r11,rdx
925
926	mul	rbp
927	add	r13,rax
928	mov	rax,QWORD PTR[8+r15*8+rsi]
929	adc	rdx,0
930	add	r13,r10
931	adc	rdx,0
932	mov	QWORD PTR[((-16))+r15*8+rsp],rdi
933	mov	rdi,rdx
934
935	mul	rbx
936	add	r11,rax
937	mov	rax,QWORD PTR[8+r15*8+rcx]
938	adc	rdx,0
939	add	r11,QWORD PTR[8+r15*8+rsp]
940	adc	rdx,0
941	lea	r15,QWORD PTR[4+r15]
942	mov	r10,rdx
943
944	mul	rbp
945	add	rdi,rax
946	mov	rax,QWORD PTR[((-16))+r15*8+rsi]
947	adc	rdx,0
948	add	rdi,r11
949	adc	rdx,0
950	mov	QWORD PTR[((-40))+r15*8+rsp],r13
951	mov	r13,rdx
952	cmp	r15,r9
953	jl	$L$inner4x
954
955	mul	rbx
956	add	r10,rax
957	mov	rax,QWORD PTR[((-16))+r15*8+rcx]
958	adc	rdx,0
959	add	r10,QWORD PTR[((-16))+r15*8+rsp]
960	adc	rdx,0
961	mov	r11,rdx
962
963	mul	rbp
964	add	r13,rax
965	mov	rax,QWORD PTR[((-8))+r15*8+rsi]
966	adc	rdx,0
967	add	r13,r10
968	adc	rdx,0
969	mov	QWORD PTR[((-32))+r15*8+rsp],rdi
970	mov	rdi,rdx
971
972	mul	rbx
973	add	r11,rax
974	mov	rax,QWORD PTR[((-8))+r15*8+rcx]
975	adc	rdx,0
976	add	r11,QWORD PTR[((-8))+r15*8+rsp]
977	adc	rdx,0
978	lea	r14,QWORD PTR[1+r14]
979	mov	r10,rdx
980
981	mul	rbp
982	add	rdi,rax
983	mov	rax,QWORD PTR[rsi]
984	adc	rdx,0
985	add	rdi,r11
986	adc	rdx,0
987	mov	QWORD PTR[((-24))+r15*8+rsp],r13
988	mov	r13,rdx
989
990	mov	QWORD PTR[((-16))+r15*8+rsp],rdi
991
992	xor	rdi,rdi
993	add	r13,r10
994	adc	rdi,0
995	add	r13,QWORD PTR[r9*8+rsp]
996	adc	rdi,0
997	mov	QWORD PTR[((-8))+r15*8+rsp],r13
998	mov	QWORD PTR[r15*8+rsp],rdi
999
1000	cmp	r14,r9
1001	jl	$L$outer4x
1002	mov	rdi,QWORD PTR[16+r9*8+rsp]
1003	mov	rax,QWORD PTR[rsp]
1004	pxor	xmm0,xmm0
1005	mov	rdx,QWORD PTR[8+rsp]
1006	shr	r9,2
1007	lea	rsi,QWORD PTR[rsp]
1008	xor	r14,r14
1009
1010	sub	rax,QWORD PTR[rcx]
1011	mov	rbx,QWORD PTR[16+rsi]
1012	mov	rbp,QWORD PTR[24+rsi]
1013	sbb	rdx,QWORD PTR[8+rcx]
1014	lea	r15,QWORD PTR[((-1))+r9]
1015	jmp	$L$sub4x
1016ALIGN	16
1017$L$sub4x::
1018	mov	QWORD PTR[r14*8+rdi],rax
1019	mov	QWORD PTR[8+r14*8+rdi],rdx
1020	sbb	rbx,QWORD PTR[16+r14*8+rcx]
1021	mov	rax,QWORD PTR[32+r14*8+rsi]
1022	mov	rdx,QWORD PTR[40+r14*8+rsi]
1023	sbb	rbp,QWORD PTR[24+r14*8+rcx]
1024	mov	QWORD PTR[16+r14*8+rdi],rbx
1025	mov	QWORD PTR[24+r14*8+rdi],rbp
1026	sbb	rax,QWORD PTR[32+r14*8+rcx]
1027	mov	rbx,QWORD PTR[48+r14*8+rsi]
1028	mov	rbp,QWORD PTR[56+r14*8+rsi]
1029	sbb	rdx,QWORD PTR[40+r14*8+rcx]
1030	lea	r14,QWORD PTR[4+r14]
1031	dec	r15
1032	jnz	$L$sub4x
1033
1034	mov	QWORD PTR[r14*8+rdi],rax
1035	mov	rax,QWORD PTR[32+r14*8+rsi]
1036	sbb	rbx,QWORD PTR[16+r14*8+rcx]
1037	mov	QWORD PTR[8+r14*8+rdi],rdx
1038	sbb	rbp,QWORD PTR[24+r14*8+rcx]
1039	mov	QWORD PTR[16+r14*8+rdi],rbx
1040
1041	sbb	rax,0
1042	mov	QWORD PTR[24+r14*8+rdi],rbp
1043	xor	r14,r14
1044	and	rsi,rax
1045	not	rax
1046	mov	rcx,rdi
1047	and	rcx,rax
1048	lea	r15,QWORD PTR[((-1))+r9]
1049	or	rsi,rcx
1050
1051	movdqu	xmm1,XMMWORD PTR[rsi]
1052	movdqa	XMMWORD PTR[rsp],xmm0
1053	movdqu	XMMWORD PTR[rdi],xmm1
1054	jmp	$L$copy4x
1055ALIGN	16
1056$L$copy4x::
1057	movdqu	xmm2,XMMWORD PTR[16+r14*1+rsi]
1058	movdqu	xmm1,XMMWORD PTR[32+r14*1+rsi]
1059	movdqa	XMMWORD PTR[16+r14*1+rsp],xmm0
1060	movdqu	XMMWORD PTR[16+r14*1+rdi],xmm2
1061	movdqa	XMMWORD PTR[32+r14*1+rsp],xmm0
1062	movdqu	XMMWORD PTR[32+r14*1+rdi],xmm1
1063	lea	r14,QWORD PTR[32+r14]
1064	dec	r15
1065	jnz	$L$copy4x
1066
1067	shl	r9,2
1068	movdqu	xmm2,XMMWORD PTR[16+r14*1+rsi]
1069	movdqa	XMMWORD PTR[16+r14*1+rsp],xmm0
1070	movdqu	XMMWORD PTR[16+r14*1+rdi],xmm2
1071	mov	rsi,QWORD PTR[8+r9*8+rsp]
1072	mov	rax,1
1073
1074	mov	r15,QWORD PTR[rsi]
1075	mov	r14,QWORD PTR[8+rsi]
1076	mov	r13,QWORD PTR[16+rsi]
1077	mov	r12,QWORD PTR[24+rsi]
1078	mov	rbp,QWORD PTR[32+rsi]
1079	mov	rbx,QWORD PTR[40+rsi]
1080	lea	rsp,QWORD PTR[48+rsi]
1081$L$mul4x_epilogue::
1082	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
1083	mov	rsi,QWORD PTR[16+rsp]
1084	DB	0F3h,0C3h		;repret
1085$L$SEH_end_bn_mul4x_mont_gather5::
1086bn_mul4x_mont_gather5	ENDP
1087PUBLIC	bn_scatter5
1088
1089ALIGN	16
1090bn_scatter5	PROC PUBLIC
1091	cmp	rdx,0
1092	jz	$L$scatter_epilogue
1093	lea	r8,QWORD PTR[r9*8+r8]
1094$L$scatter::
1095	mov	rax,QWORD PTR[rcx]
1096	lea	rcx,QWORD PTR[8+rcx]
1097	mov	QWORD PTR[r8],rax
1098	lea	r8,QWORD PTR[256+r8]
1099	sub	rdx,1
1100	jnz	$L$scatter
1101$L$scatter_epilogue::
1102	DB	0F3h,0C3h		;repret
1103bn_scatter5	ENDP
1104
1105PUBLIC	bn_gather5
1106
1107ALIGN	16
1108bn_gather5	PROC PUBLIC
1109$L$SEH_begin_bn_gather5::
1110
1111DB	04ch,08dh,014h,024h
1112DB	048h,081h,0ech,008h,001h,000h,000h
1113	lea	rax,QWORD PTR[$L$inc]
1114	and	rsp,-16
1115
1116	movd	xmm5,r9d
1117	movdqa	xmm0,XMMWORD PTR[rax]
1118	movdqa	xmm1,XMMWORD PTR[16+rax]
1119	lea	r11,QWORD PTR[128+r8]
1120	lea	rax,QWORD PTR[128+rsp]
1121
1122	pshufd	xmm5,xmm5,0
1123	movdqa	xmm4,xmm1
1124	movdqa	xmm2,xmm1
1125	paddd	xmm1,xmm0
1126	pcmpeqd	xmm0,xmm5
1127	movdqa	xmm3,xmm4
1128
1129	paddd	xmm2,xmm1
1130	pcmpeqd	xmm1,xmm5
1131	movdqa	XMMWORD PTR[(-128)+rax],xmm0
1132	movdqa	xmm0,xmm4
1133
1134	paddd	xmm3,xmm2
1135	pcmpeqd	xmm2,xmm5
1136	movdqa	XMMWORD PTR[(-112)+rax],xmm1
1137	movdqa	xmm1,xmm4
1138
1139	paddd	xmm0,xmm3
1140	pcmpeqd	xmm3,xmm5
1141	movdqa	XMMWORD PTR[(-96)+rax],xmm2
1142	movdqa	xmm2,xmm4
1143	paddd	xmm1,xmm0
1144	pcmpeqd	xmm0,xmm5
1145	movdqa	XMMWORD PTR[(-80)+rax],xmm3
1146	movdqa	xmm3,xmm4
1147
1148	paddd	xmm2,xmm1
1149	pcmpeqd	xmm1,xmm5
1150	movdqa	XMMWORD PTR[(-64)+rax],xmm0
1151	movdqa	xmm0,xmm4
1152
1153	paddd	xmm3,xmm2
1154	pcmpeqd	xmm2,xmm5
1155	movdqa	XMMWORD PTR[(-48)+rax],xmm1
1156	movdqa	xmm1,xmm4
1157
1158	paddd	xmm0,xmm3
1159	pcmpeqd	xmm3,xmm5
1160	movdqa	XMMWORD PTR[(-32)+rax],xmm2
1161	movdqa	xmm2,xmm4
1162	paddd	xmm1,xmm0
1163	pcmpeqd	xmm0,xmm5
1164	movdqa	XMMWORD PTR[(-16)+rax],xmm3
1165	movdqa	xmm3,xmm4
1166
1167	paddd	xmm2,xmm1
1168	pcmpeqd	xmm1,xmm5
1169	movdqa	XMMWORD PTR[rax],xmm0
1170	movdqa	xmm0,xmm4
1171
1172	paddd	xmm3,xmm2
1173	pcmpeqd	xmm2,xmm5
1174	movdqa	XMMWORD PTR[16+rax],xmm1
1175	movdqa	xmm1,xmm4
1176
1177	paddd	xmm0,xmm3
1178	pcmpeqd	xmm3,xmm5
1179	movdqa	XMMWORD PTR[32+rax],xmm2
1180	movdqa	xmm2,xmm4
1181	paddd	xmm1,xmm0
1182	pcmpeqd	xmm0,xmm5
1183	movdqa	XMMWORD PTR[48+rax],xmm3
1184	movdqa	xmm3,xmm4
1185
1186	paddd	xmm2,xmm1
1187	pcmpeqd	xmm1,xmm5
1188	movdqa	XMMWORD PTR[64+rax],xmm0
1189	movdqa	xmm0,xmm4
1190
1191	paddd	xmm3,xmm2
1192	pcmpeqd	xmm2,xmm5
1193	movdqa	XMMWORD PTR[80+rax],xmm1
1194	movdqa	xmm1,xmm4
1195
1196	paddd	xmm0,xmm3
1197	pcmpeqd	xmm3,xmm5
1198	movdqa	XMMWORD PTR[96+rax],xmm2
1199	movdqa	xmm2,xmm4
1200	movdqa	XMMWORD PTR[112+rax],xmm3
1201	jmp	$L$gather
1202
1203ALIGN	32
1204$L$gather::
1205	pxor	xmm4,xmm4
1206	pxor	xmm5,xmm5
1207	movdqa	xmm0,XMMWORD PTR[((-128))+r11]
1208	movdqa	xmm1,XMMWORD PTR[((-112))+r11]
1209	movdqa	xmm2,XMMWORD PTR[((-96))+r11]
1210	pand	xmm0,XMMWORD PTR[((-128))+rax]
1211	movdqa	xmm3,XMMWORD PTR[((-80))+r11]
1212	pand	xmm1,XMMWORD PTR[((-112))+rax]
1213	por	xmm4,xmm0
1214	pand	xmm2,XMMWORD PTR[((-96))+rax]
1215	por	xmm5,xmm1
1216	pand	xmm3,XMMWORD PTR[((-80))+rax]
1217	por	xmm4,xmm2
1218	por	xmm5,xmm3
1219	movdqa	xmm0,XMMWORD PTR[((-64))+r11]
1220	movdqa	xmm1,XMMWORD PTR[((-48))+r11]
1221	movdqa	xmm2,XMMWORD PTR[((-32))+r11]
1222	pand	xmm0,XMMWORD PTR[((-64))+rax]
1223	movdqa	xmm3,XMMWORD PTR[((-16))+r11]
1224	pand	xmm1,XMMWORD PTR[((-48))+rax]
1225	por	xmm4,xmm0
1226	pand	xmm2,XMMWORD PTR[((-32))+rax]
1227	por	xmm5,xmm1
1228	pand	xmm3,XMMWORD PTR[((-16))+rax]
1229	por	xmm4,xmm2
1230	por	xmm5,xmm3
1231	movdqa	xmm0,XMMWORD PTR[r11]
1232	movdqa	xmm1,XMMWORD PTR[16+r11]
1233	movdqa	xmm2,XMMWORD PTR[32+r11]
1234	pand	xmm0,XMMWORD PTR[rax]
1235	movdqa	xmm3,XMMWORD PTR[48+r11]
1236	pand	xmm1,XMMWORD PTR[16+rax]
1237	por	xmm4,xmm0
1238	pand	xmm2,XMMWORD PTR[32+rax]
1239	por	xmm5,xmm1
1240	pand	xmm3,XMMWORD PTR[48+rax]
1241	por	xmm4,xmm2
1242	por	xmm5,xmm3
1243	movdqa	xmm0,XMMWORD PTR[64+r11]
1244	movdqa	xmm1,XMMWORD PTR[80+r11]
1245	movdqa	xmm2,XMMWORD PTR[96+r11]
1246	pand	xmm0,XMMWORD PTR[64+rax]
1247	movdqa	xmm3,XMMWORD PTR[112+r11]
1248	pand	xmm1,XMMWORD PTR[80+rax]
1249	por	xmm4,xmm0
1250	pand	xmm2,XMMWORD PTR[96+rax]
1251	por	xmm5,xmm1
1252	pand	xmm3,XMMWORD PTR[112+rax]
1253	por	xmm4,xmm2
1254	por	xmm5,xmm3
1255	por	xmm4,xmm5
1256	lea	r11,QWORD PTR[256+r11]
1257	pshufd	xmm0,xmm4,04eh
1258	por	xmm0,xmm4
1259	movq	QWORD PTR[rcx],xmm0
1260	lea	rcx,QWORD PTR[8+rcx]
1261	sub	rdx,1
1262	jnz	$L$gather
1263
1264	lea	rsp,QWORD PTR[r10]
1265	DB	0F3h,0C3h		;repret
1266$L$SEH_end_bn_gather5::
1267bn_gather5	ENDP
1268ALIGN	64
1269$L$inc::
1270	DD	0,0,1,1
1271	DD	2,2,2,2
1272DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
1273DB	112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
1274DB	99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
1275DB	114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
1276DB	71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
1277DB	112,101,110,115,115,108,46,111,114,103,62,0
1278EXTERN	__imp_RtlVirtualUnwind:NEAR
1279
1280ALIGN	16
1281mul_handler	PROC PRIVATE
1282	push	rsi
1283	push	rdi
1284	push	rbx
1285	push	rbp
1286	push	r12
1287	push	r13
1288	push	r14
1289	push	r15
1290	pushfq
1291	sub	rsp,64
1292
1293	mov	rax,QWORD PTR[120+r8]
1294	mov	rbx,QWORD PTR[248+r8]
1295
1296	mov	rsi,QWORD PTR[8+r9]
1297	mov	r11,QWORD PTR[56+r9]
1298
1299	mov	r10d,DWORD PTR[r11]
1300	lea	r10,QWORD PTR[r10*1+rsi]
1301	cmp	rbx,r10
1302	jb	$L$common_seh_tail
1303
1304	lea	rax,QWORD PTR[48+rax]
1305
1306	mov	r10d,DWORD PTR[4+r11]
1307	lea	r10,QWORD PTR[r10*1+rsi]
1308	cmp	rbx,r10
1309	jb	$L$common_seh_tail
1310
1311	mov	rax,QWORD PTR[152+r8]
1312
1313	mov	r10d,DWORD PTR[8+r11]
1314	lea	r10,QWORD PTR[r10*1+rsi]
1315	cmp	rbx,r10
1316	jae	$L$common_seh_tail
1317
1318	mov	r10,QWORD PTR[192+r8]
1319	mov	rax,QWORD PTR[8+r10*8+rax]
1320
1321	lea	rax,QWORD PTR[48+rax]
1322
1323	mov	rbx,QWORD PTR[((-8))+rax]
1324	mov	rbp,QWORD PTR[((-16))+rax]
1325	mov	r12,QWORD PTR[((-24))+rax]
1326	mov	r13,QWORD PTR[((-32))+rax]
1327	mov	r14,QWORD PTR[((-40))+rax]
1328	mov	r15,QWORD PTR[((-48))+rax]
1329	mov	QWORD PTR[144+r8],rbx
1330	mov	QWORD PTR[160+r8],rbp
1331	mov	QWORD PTR[216+r8],r12
1332	mov	QWORD PTR[224+r8],r13
1333	mov	QWORD PTR[232+r8],r14
1334	mov	QWORD PTR[240+r8],r15
1335
1336$L$common_seh_tail::
1337	mov	rdi,QWORD PTR[8+rax]
1338	mov	rsi,QWORD PTR[16+rax]
1339	mov	QWORD PTR[152+r8],rax
1340	mov	QWORD PTR[168+r8],rsi
1341	mov	QWORD PTR[176+r8],rdi
1342
1343	mov	rdi,QWORD PTR[40+r9]
1344	mov	rsi,r8
1345	mov	ecx,154
1346	DD	0a548f3fch
1347
1348	mov	rsi,r9
1349	xor	rcx,rcx
1350	mov	rdx,QWORD PTR[8+rsi]
1351	mov	r8,QWORD PTR[rsi]
1352	mov	r9,QWORD PTR[16+rsi]
1353	mov	r10,QWORD PTR[40+rsi]
1354	lea	r11,QWORD PTR[56+rsi]
1355	lea	r12,QWORD PTR[24+rsi]
1356	mov	QWORD PTR[32+rsp],r10
1357	mov	QWORD PTR[40+rsp],r11
1358	mov	QWORD PTR[48+rsp],r12
1359	mov	QWORD PTR[56+rsp],rcx
1360	call	QWORD PTR[__imp_RtlVirtualUnwind]
1361
1362	mov	eax,1
1363	add	rsp,64
1364	popfq
1365	pop	r15
1366	pop	r14
1367	pop	r13
1368	pop	r12
1369	pop	rbp
1370	pop	rbx
1371	pop	rdi
1372	pop	rsi
1373	DB	0F3h,0C3h		;repret
1374mul_handler	ENDP
1375
1376.text$	ENDS
1377.pdata	SEGMENT READONLY ALIGN(4)
1378ALIGN	4
1379	DD	imagerel $L$SEH_begin_bn_mul_mont_gather5
1380	DD	imagerel $L$SEH_end_bn_mul_mont_gather5
1381	DD	imagerel $L$SEH_info_bn_mul_mont_gather5
1382
1383	DD	imagerel $L$SEH_begin_bn_mul4x_mont_gather5
1384	DD	imagerel $L$SEH_end_bn_mul4x_mont_gather5
1385	DD	imagerel $L$SEH_info_bn_mul4x_mont_gather5
1386
1387	DD	imagerel $L$SEH_begin_bn_gather5
1388	DD	imagerel $L$SEH_end_bn_gather5
1389	DD	imagerel $L$SEH_info_bn_gather5
1390
1391.pdata	ENDS
1392.xdata	SEGMENT READONLY ALIGN(8)
1393ALIGN	8
1394$L$SEH_info_bn_mul_mont_gather5::
1395DB	9,0,0,0
1396	DD	imagerel mul_handler
1397	DD	imagerel $L$mul_alloca,imagerel $L$mul_body,imagerel $L$mul_epilogue
1398ALIGN	8
1399$L$SEH_info_bn_mul4x_mont_gather5::
1400DB	9,0,0,0
1401	DD	imagerel mul_handler
1402	DD	imagerel $L$mul4x_alloca,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue
1403ALIGN	8
1404$L$SEH_info_bn_gather5::
1405DB	001h,00bh,003h,00ah
1406DB	00bh,001h,021h,000h
1407DB	004h,0a3h,000h,000h
1408ALIGN	8
1409
1410.xdata	ENDS
1411END
1412
1413