1; 1 "crypto/bn/modexp512-masm-x86_64.S.tmp"
2; 1 "<built-in>" 1
3; 1 "<built-in>" 3
4; 340 "<built-in>" 3
5; 1 "<command line>" 1
6; 1 "<built-in>" 2
7; 1 "crypto/bn/modexp512-masm-x86_64.S.tmp" 2
8OPTION	DOTNAME
9
10; 1 "./crypto/x86_arch.h" 1
11
12
13; 16 "./crypto/x86_arch.h"
14
15
16
17
18
19
20
21
22
23; 40 "./crypto/x86_arch.h"
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69; 3 "crypto/bn/modexp512-masm-x86_64.S.tmp" 2
70.text$	SEGMENT ALIGN(64) 'CODE'
71
72
73ALIGN	16
74MULADD_128x512	PROC PRIVATE
75	mov	rax,QWORD PTR[rsi]
76	mul	rbp
77	add	r8,rax
78	adc	rdx,0
79	mov	QWORD PTR[rcx],r8
80	mov	rbx,rdx
81
82	mov	rax,QWORD PTR[8+rsi]
83	mul	rbp
84	add	r9,rax
85	adc	rdx,0
86	add	r9,rbx
87	adc	rdx,0
88	mov	rbx,rdx
89
90	mov	rax,QWORD PTR[16+rsi]
91	mul	rbp
92	add	r10,rax
93	adc	rdx,0
94	add	r10,rbx
95	adc	rdx,0
96	mov	rbx,rdx
97
98	mov	rax,QWORD PTR[24+rsi]
99	mul	rbp
100	add	r11,rax
101	adc	rdx,0
102	add	r11,rbx
103	adc	rdx,0
104	mov	rbx,rdx
105
106	mov	rax,QWORD PTR[32+rsi]
107	mul	rbp
108	add	r12,rax
109	adc	rdx,0
110	add	r12,rbx
111	adc	rdx,0
112	mov	rbx,rdx
113
114	mov	rax,QWORD PTR[40+rsi]
115	mul	rbp
116	add	r13,rax
117	adc	rdx,0
118	add	r13,rbx
119	adc	rdx,0
120	mov	rbx,rdx
121
122	mov	rax,QWORD PTR[48+rsi]
123	mul	rbp
124	add	r14,rax
125	adc	rdx,0
126	add	r14,rbx
127	adc	rdx,0
128	mov	rbx,rdx
129
130	mov	rax,QWORD PTR[56+rsi]
131	mul	rbp
132	add	r15,rax
133	adc	rdx,0
134	add	r15,rbx
135	adc	rdx,0
136	mov	r8,rdx
137	mov	rbp,QWORD PTR[8+rdi]
138	mov	rax,QWORD PTR[rsi]
139	mul	rbp
140	add	r9,rax
141	adc	rdx,0
142	mov	QWORD PTR[8+rcx],r9
143	mov	rbx,rdx
144
145	mov	rax,QWORD PTR[8+rsi]
146	mul	rbp
147	add	r10,rax
148	adc	rdx,0
149	add	r10,rbx
150	adc	rdx,0
151	mov	rbx,rdx
152
153	mov	rax,QWORD PTR[16+rsi]
154	mul	rbp
155	add	r11,rax
156	adc	rdx,0
157	add	r11,rbx
158	adc	rdx,0
159	mov	rbx,rdx
160
161	mov	rax,QWORD PTR[24+rsi]
162	mul	rbp
163	add	r12,rax
164	adc	rdx,0
165	add	r12,rbx
166	adc	rdx,0
167	mov	rbx,rdx
168
169	mov	rax,QWORD PTR[32+rsi]
170	mul	rbp
171	add	r13,rax
172	adc	rdx,0
173	add	r13,rbx
174	adc	rdx,0
175	mov	rbx,rdx
176
177	mov	rax,QWORD PTR[40+rsi]
178	mul	rbp
179	add	r14,rax
180	adc	rdx,0
181	add	r14,rbx
182	adc	rdx,0
183	mov	rbx,rdx
184
185	mov	rax,QWORD PTR[48+rsi]
186	mul	rbp
187	add	r15,rax
188	adc	rdx,0
189	add	r15,rbx
190	adc	rdx,0
191	mov	rbx,rdx
192
193	mov	rax,QWORD PTR[56+rsi]
194	mul	rbp
195	add	r8,rax
196	adc	rdx,0
197	add	r8,rbx
198	adc	rdx,0
199	mov	r9,rdx
200	DB	0F3h,0C3h		;repret
201MULADD_128x512	ENDP
202
203ALIGN	16
204mont_reduce	PROC PRIVATE
205	lea	rdi,QWORD PTR[192+rsp]
206	mov	rsi,QWORD PTR[32+rsp]
207	add	rsi,576
208	lea	rcx,QWORD PTR[520+rsp]
209
210	mov	rbp,QWORD PTR[96+rcx]
211	mov	rax,QWORD PTR[rsi]
212	mul	rbp
213	mov	r8,QWORD PTR[rcx]
214	add	r8,rax
215	adc	rdx,0
216	mov	QWORD PTR[rdi],r8
217	mov	rbx,rdx
218
219	mov	rax,QWORD PTR[8+rsi]
220	mul	rbp
221	mov	r9,QWORD PTR[8+rcx]
222	add	r9,rax
223	adc	rdx,0
224	add	r9,rbx
225	adc	rdx,0
226	mov	rbx,rdx
227
228	mov	rax,QWORD PTR[16+rsi]
229	mul	rbp
230	mov	r10,QWORD PTR[16+rcx]
231	add	r10,rax
232	adc	rdx,0
233	add	r10,rbx
234	adc	rdx,0
235	mov	rbx,rdx
236
237	mov	rax,QWORD PTR[24+rsi]
238	mul	rbp
239	mov	r11,QWORD PTR[24+rcx]
240	add	r11,rax
241	adc	rdx,0
242	add	r11,rbx
243	adc	rdx,0
244	mov	rbx,rdx
245
246	mov	rax,QWORD PTR[32+rsi]
247	mul	rbp
248	mov	r12,QWORD PTR[32+rcx]
249	add	r12,rax
250	adc	rdx,0
251	add	r12,rbx
252	adc	rdx,0
253	mov	rbx,rdx
254
255	mov	rax,QWORD PTR[40+rsi]
256	mul	rbp
257	mov	r13,QWORD PTR[40+rcx]
258	add	r13,rax
259	adc	rdx,0
260	add	r13,rbx
261	adc	rdx,0
262	mov	rbx,rdx
263
264	mov	rax,QWORD PTR[48+rsi]
265	mul	rbp
266	mov	r14,QWORD PTR[48+rcx]
267	add	r14,rax
268	adc	rdx,0
269	add	r14,rbx
270	adc	rdx,0
271	mov	rbx,rdx
272
273	mov	rax,QWORD PTR[56+rsi]
274	mul	rbp
275	mov	r15,QWORD PTR[56+rcx]
276	add	r15,rax
277	adc	rdx,0
278	add	r15,rbx
279	adc	rdx,0
280	mov	r8,rdx
281	mov	rbp,QWORD PTR[104+rcx]
282	mov	rax,QWORD PTR[rsi]
283	mul	rbp
284	add	r9,rax
285	adc	rdx,0
286	mov	QWORD PTR[8+rdi],r9
287	mov	rbx,rdx
288
289	mov	rax,QWORD PTR[8+rsi]
290	mul	rbp
291	add	r10,rax
292	adc	rdx,0
293	add	r10,rbx
294	adc	rdx,0
295	mov	rbx,rdx
296
297	mov	rax,QWORD PTR[16+rsi]
298	mul	rbp
299	add	r11,rax
300	adc	rdx,0
301	add	r11,rbx
302	adc	rdx,0
303	mov	rbx,rdx
304
305	mov	rax,QWORD PTR[24+rsi]
306	mul	rbp
307	add	r12,rax
308	adc	rdx,0
309	add	r12,rbx
310	adc	rdx,0
311	mov	rbx,rdx
312
313	mov	rax,QWORD PTR[32+rsi]
314	mul	rbp
315	add	r13,rax
316	adc	rdx,0
317	add	r13,rbx
318	adc	rdx,0
319	mov	rbx,rdx
320
321	mov	rax,QWORD PTR[40+rsi]
322	mul	rbp
323	add	r14,rax
324	adc	rdx,0
325	add	r14,rbx
326	adc	rdx,0
327	mov	rbx,rdx
328
329	mov	rax,QWORD PTR[48+rsi]
330	mul	rbp
331	add	r15,rax
332	adc	rdx,0
333	add	r15,rbx
334	adc	rdx,0
335	mov	rbx,rdx
336
337	mov	rax,QWORD PTR[56+rsi]
338	mul	rbp
339	add	r8,rax
340	adc	rdx,0
341	add	r8,rbx
342	adc	rdx,0
343	mov	r9,rdx
344	mov	rbp,QWORD PTR[112+rcx]
345	mov	rax,QWORD PTR[rsi]
346	mul	rbp
347	add	r10,rax
348	adc	rdx,0
349	mov	QWORD PTR[16+rdi],r10
350	mov	rbx,rdx
351
352	mov	rax,QWORD PTR[8+rsi]
353	mul	rbp
354	add	r11,rax
355	adc	rdx,0
356	add	r11,rbx
357	adc	rdx,0
358	mov	rbx,rdx
359
360	mov	rax,QWORD PTR[16+rsi]
361	mul	rbp
362	add	r12,rax
363	adc	rdx,0
364	add	r12,rbx
365	adc	rdx,0
366	mov	rbx,rdx
367
368	mov	rax,QWORD PTR[24+rsi]
369	mul	rbp
370	add	r13,rax
371	adc	rdx,0
372	add	r13,rbx
373	adc	rdx,0
374	mov	rbx,rdx
375
376	mov	rax,QWORD PTR[32+rsi]
377	mul	rbp
378	add	r14,rax
379	adc	rdx,0
380	add	r14,rbx
381	adc	rdx,0
382	mov	rbx,rdx
383
384	mov	rax,QWORD PTR[40+rsi]
385	mul	rbp
386	add	r15,rax
387	adc	rdx,0
388	add	r15,rbx
389	adc	rdx,0
390	mov	rbx,rdx
391
392	mov	rax,QWORD PTR[48+rsi]
393	mul	rbp
394	add	r8,rax
395	adc	rdx,0
396	add	r8,rbx
397	adc	rdx,0
398	mov	rbx,rdx
399
400	mov	rax,QWORD PTR[56+rsi]
401	mul	rbp
402	add	r9,rax
403	adc	rdx,0
404	add	r9,rbx
405	adc	rdx,0
406	mov	r10,rdx
407	mov	rbp,QWORD PTR[120+rcx]
408	mov	rax,QWORD PTR[rsi]
409	mul	rbp
410	add	r11,rax
411	adc	rdx,0
412	mov	QWORD PTR[24+rdi],r11
413	mov	rbx,rdx
414
415	mov	rax,QWORD PTR[8+rsi]
416	mul	rbp
417	add	r12,rax
418	adc	rdx,0
419	add	r12,rbx
420	adc	rdx,0
421	mov	rbx,rdx
422
423	mov	rax,QWORD PTR[16+rsi]
424	mul	rbp
425	add	r13,rax
426	adc	rdx,0
427	add	r13,rbx
428	adc	rdx,0
429	mov	rbx,rdx
430
431	mov	rax,QWORD PTR[24+rsi]
432	mul	rbp
433	add	r14,rax
434	adc	rdx,0
435	add	r14,rbx
436	adc	rdx,0
437	mov	rbx,rdx
438
439	mov	rax,QWORD PTR[32+rsi]
440	mul	rbp
441	add	r15,rax
442	adc	rdx,0
443	add	r15,rbx
444	adc	rdx,0
445	mov	rbx,rdx
446
447	mov	rax,QWORD PTR[40+rsi]
448	mul	rbp
449	add	r8,rax
450	adc	rdx,0
451	add	r8,rbx
452	adc	rdx,0
453	mov	rbx,rdx
454
455	mov	rax,QWORD PTR[48+rsi]
456	mul	rbp
457	add	r9,rax
458	adc	rdx,0
459	add	r9,rbx
460	adc	rdx,0
461	mov	rbx,rdx
462
463	mov	rax,QWORD PTR[56+rsi]
464	mul	rbp
465	add	r10,rax
466	adc	rdx,0
467	add	r10,rbx
468	adc	rdx,0
469	mov	r11,rdx
470	xor	rax,rax
471
472	add	r8,QWORD PTR[64+rcx]
473	adc	r9,QWORD PTR[72+rcx]
474	adc	r10,QWORD PTR[80+rcx]
475	adc	r11,QWORD PTR[88+rcx]
476	adc	rax,0
477
478
479
480
481	mov	QWORD PTR[64+rdi],r8
482	mov	QWORD PTR[72+rdi],r9
483	mov	rbp,r10
484	mov	QWORD PTR[88+rdi],r11
485
486	mov	QWORD PTR[384+rsp],rax
487
488	mov	r8,QWORD PTR[rdi]
489	mov	r9,QWORD PTR[8+rdi]
490	mov	r10,QWORD PTR[16+rdi]
491	mov	r11,QWORD PTR[24+rdi]
492
493
494
495
496
497
498
499
500	add	rdi,8*10
501
502	add	rsi,64
503	lea	rcx,QWORD PTR[296+rsp]
504
505	call	MULADD_128x512
506
507	mov	rax,QWORD PTR[384+rsp]
508
509
510	add	r8,QWORD PTR[((-16))+rdi]
511	adc	r9,QWORD PTR[((-8))+rdi]
512	mov	QWORD PTR[64+rcx],r8
513	mov	QWORD PTR[72+rcx],r9
514
515	adc	rax,rax
516	mov	QWORD PTR[384+rsp],rax
517
518	lea	rdi,QWORD PTR[192+rsp]
519	add	rsi,64
520
521
522
523
524
525	mov	r8,QWORD PTR[rsi]
526	mov	rbx,QWORD PTR[8+rsi]
527
528	mov	rax,QWORD PTR[rcx]
529	mul	r8
530	mov	rbp,rax
531	mov	r9,rdx
532
533	mov	rax,QWORD PTR[8+rcx]
534	mul	r8
535	add	r9,rax
536
537	mov	rax,QWORD PTR[rcx]
538	mul	rbx
539	add	r9,rax
540
541	mov	QWORD PTR[8+rdi],r9
542
543
544	sub	rsi,192
545
546	mov	r8,QWORD PTR[rcx]
547	mov	r9,QWORD PTR[8+rcx]
548
549	call	MULADD_128x512
550
551
552
553
554	mov	rax,QWORD PTR[rsi]
555	mov	rbx,QWORD PTR[8+rsi]
556	mov	rdi,QWORD PTR[16+rsi]
557	mov	rdx,QWORD PTR[24+rsi]
558
559
560	mov	rbp,QWORD PTR[384+rsp]
561
562	add	r8,QWORD PTR[64+rcx]
563	adc	r9,QWORD PTR[72+rcx]
564
565
566	adc	rbp,rbp
567
568
569
570	shl	rbp,3
571	mov	rcx,QWORD PTR[32+rsp]
572	add	rbp,rcx
573
574
575	xor	rsi,rsi
576
577	add	r10,QWORD PTR[rbp]
578	adc	r11,QWORD PTR[64+rbp]
579	adc	r12,QWORD PTR[128+rbp]
580	adc	r13,QWORD PTR[192+rbp]
581	adc	r14,QWORD PTR[256+rbp]
582	adc	r15,QWORD PTR[320+rbp]
583	adc	r8,QWORD PTR[384+rbp]
584	adc	r9,QWORD PTR[448+rbp]
585
586
587
588	sbb	rsi,0
589
590
591	and	rax,rsi
592	and	rbx,rsi
593	and	rdi,rsi
594	and	rdx,rsi
595
596	mov	rbp,1
597	sub	r10,rax
598	sbb	r11,rbx
599	sbb	r12,rdi
600	sbb	r13,rdx
601
602
603
604
605	sbb	rbp,0
606
607
608
609	add	rcx,512
610	mov	rax,QWORD PTR[32+rcx]
611	mov	rbx,QWORD PTR[40+rcx]
612	mov	rdi,QWORD PTR[48+rcx]
613	mov	rdx,QWORD PTR[56+rcx]
614
615
616
617	and	rax,rsi
618	and	rbx,rsi
619	and	rdi,rsi
620	and	rdx,rsi
621
622
623
624	sub	rbp,1
625
626	sbb	r14,rax
627	sbb	r15,rbx
628	sbb	r8,rdi
629	sbb	r9,rdx
630
631
632
633	mov	rsi,QWORD PTR[144+rsp]
634	mov	QWORD PTR[rsi],r10
635	mov	QWORD PTR[8+rsi],r11
636	mov	QWORD PTR[16+rsi],r12
637	mov	QWORD PTR[24+rsi],r13
638	mov	QWORD PTR[32+rsi],r14
639	mov	QWORD PTR[40+rsi],r15
640	mov	QWORD PTR[48+rsi],r8
641	mov	QWORD PTR[56+rsi],r9
642
643	DB	0F3h,0C3h		;repret
644mont_reduce	ENDP
645
646ALIGN	16
647mont_mul_a3b	PROC PRIVATE
648
649
650
651
652	mov	rbp,QWORD PTR[rdi]
653
654	mov	rax,r10
655	mul	rbp
656	mov	QWORD PTR[520+rsp],rax
657	mov	r10,rdx
658	mov	rax,r11
659	mul	rbp
660	add	r10,rax
661	adc	rdx,0
662	mov	r11,rdx
663	mov	rax,r12
664	mul	rbp
665	add	r11,rax
666	adc	rdx,0
667	mov	r12,rdx
668	mov	rax,r13
669	mul	rbp
670	add	r12,rax
671	adc	rdx,0
672	mov	r13,rdx
673	mov	rax,r14
674	mul	rbp
675	add	r13,rax
676	adc	rdx,0
677	mov	r14,rdx
678	mov	rax,r15
679	mul	rbp
680	add	r14,rax
681	adc	rdx,0
682	mov	r15,rdx
683	mov	rax,r8
684	mul	rbp
685	add	r15,rax
686	adc	rdx,0
687	mov	r8,rdx
688	mov	rax,r9
689	mul	rbp
690	add	r8,rax
691	adc	rdx,0
692	mov	r9,rdx
693	mov	rbp,QWORD PTR[8+rdi]
694	mov	rax,QWORD PTR[rsi]
695	mul	rbp
696	add	r10,rax
697	adc	rdx,0
698	mov	QWORD PTR[528+rsp],r10
699	mov	rbx,rdx
700
701	mov	rax,QWORD PTR[8+rsi]
702	mul	rbp
703	add	r11,rax
704	adc	rdx,0
705	add	r11,rbx
706	adc	rdx,0
707	mov	rbx,rdx
708
709	mov	rax,QWORD PTR[16+rsi]
710	mul	rbp
711	add	r12,rax
712	adc	rdx,0
713	add	r12,rbx
714	adc	rdx,0
715	mov	rbx,rdx
716
717	mov	rax,QWORD PTR[24+rsi]
718	mul	rbp
719	add	r13,rax
720	adc	rdx,0
721	add	r13,rbx
722	adc	rdx,0
723	mov	rbx,rdx
724
725	mov	rax,QWORD PTR[32+rsi]
726	mul	rbp
727	add	r14,rax
728	adc	rdx,0
729	add	r14,rbx
730	adc	rdx,0
731	mov	rbx,rdx
732
733	mov	rax,QWORD PTR[40+rsi]
734	mul	rbp
735	add	r15,rax
736	adc	rdx,0
737	add	r15,rbx
738	adc	rdx,0
739	mov	rbx,rdx
740
741	mov	rax,QWORD PTR[48+rsi]
742	mul	rbp
743	add	r8,rax
744	adc	rdx,0
745	add	r8,rbx
746	adc	rdx,0
747	mov	rbx,rdx
748
749	mov	rax,QWORD PTR[56+rsi]
750	mul	rbp
751	add	r9,rax
752	adc	rdx,0
753	add	r9,rbx
754	adc	rdx,0
755	mov	r10,rdx
756	mov	rbp,QWORD PTR[16+rdi]
757	mov	rax,QWORD PTR[rsi]
758	mul	rbp
759	add	r11,rax
760	adc	rdx,0
761	mov	QWORD PTR[536+rsp],r11
762	mov	rbx,rdx
763
764	mov	rax,QWORD PTR[8+rsi]
765	mul	rbp
766	add	r12,rax
767	adc	rdx,0
768	add	r12,rbx
769	adc	rdx,0
770	mov	rbx,rdx
771
772	mov	rax,QWORD PTR[16+rsi]
773	mul	rbp
774	add	r13,rax
775	adc	rdx,0
776	add	r13,rbx
777	adc	rdx,0
778	mov	rbx,rdx
779
780	mov	rax,QWORD PTR[24+rsi]
781	mul	rbp
782	add	r14,rax
783	adc	rdx,0
784	add	r14,rbx
785	adc	rdx,0
786	mov	rbx,rdx
787
788	mov	rax,QWORD PTR[32+rsi]
789	mul	rbp
790	add	r15,rax
791	adc	rdx,0
792	add	r15,rbx
793	adc	rdx,0
794	mov	rbx,rdx
795
796	mov	rax,QWORD PTR[40+rsi]
797	mul	rbp
798	add	r8,rax
799	adc	rdx,0
800	add	r8,rbx
801	adc	rdx,0
802	mov	rbx,rdx
803
804	mov	rax,QWORD PTR[48+rsi]
805	mul	rbp
806	add	r9,rax
807	adc	rdx,0
808	add	r9,rbx
809	adc	rdx,0
810	mov	rbx,rdx
811
812	mov	rax,QWORD PTR[56+rsi]
813	mul	rbp
814	add	r10,rax
815	adc	rdx,0
816	add	r10,rbx
817	adc	rdx,0
818	mov	r11,rdx
819	mov	rbp,QWORD PTR[24+rdi]
820	mov	rax,QWORD PTR[rsi]
821	mul	rbp
822	add	r12,rax
823	adc	rdx,0
824	mov	QWORD PTR[544+rsp],r12
825	mov	rbx,rdx
826
827	mov	rax,QWORD PTR[8+rsi]
828	mul	rbp
829	add	r13,rax
830	adc	rdx,0
831	add	r13,rbx
832	adc	rdx,0
833	mov	rbx,rdx
834
835	mov	rax,QWORD PTR[16+rsi]
836	mul	rbp
837	add	r14,rax
838	adc	rdx,0
839	add	r14,rbx
840	adc	rdx,0
841	mov	rbx,rdx
842
843	mov	rax,QWORD PTR[24+rsi]
844	mul	rbp
845	add	r15,rax
846	adc	rdx,0
847	add	r15,rbx
848	adc	rdx,0
849	mov	rbx,rdx
850
851	mov	rax,QWORD PTR[32+rsi]
852	mul	rbp
853	add	r8,rax
854	adc	rdx,0
855	add	r8,rbx
856	adc	rdx,0
857	mov	rbx,rdx
858
859	mov	rax,QWORD PTR[40+rsi]
860	mul	rbp
861	add	r9,rax
862	adc	rdx,0
863	add	r9,rbx
864	adc	rdx,0
865	mov	rbx,rdx
866
867	mov	rax,QWORD PTR[48+rsi]
868	mul	rbp
869	add	r10,rax
870	adc	rdx,0
871	add	r10,rbx
872	adc	rdx,0
873	mov	rbx,rdx
874
875	mov	rax,QWORD PTR[56+rsi]
876	mul	rbp
877	add	r11,rax
878	adc	rdx,0
879	add	r11,rbx
880	adc	rdx,0
881	mov	r12,rdx
882	mov	rbp,QWORD PTR[32+rdi]
883	mov	rax,QWORD PTR[rsi]
884	mul	rbp
885	add	r13,rax
886	adc	rdx,0
887	mov	QWORD PTR[552+rsp],r13
888	mov	rbx,rdx
889
890	mov	rax,QWORD PTR[8+rsi]
891	mul	rbp
892	add	r14,rax
893	adc	rdx,0
894	add	r14,rbx
895	adc	rdx,0
896	mov	rbx,rdx
897
898	mov	rax,QWORD PTR[16+rsi]
899	mul	rbp
900	add	r15,rax
901	adc	rdx,0
902	add	r15,rbx
903	adc	rdx,0
904	mov	rbx,rdx
905
906	mov	rax,QWORD PTR[24+rsi]
907	mul	rbp
908	add	r8,rax
909	adc	rdx,0
910	add	r8,rbx
911	adc	rdx,0
912	mov	rbx,rdx
913
914	mov	rax,QWORD PTR[32+rsi]
915	mul	rbp
916	add	r9,rax
917	adc	rdx,0
918	add	r9,rbx
919	adc	rdx,0
920	mov	rbx,rdx
921
922	mov	rax,QWORD PTR[40+rsi]
923	mul	rbp
924	add	r10,rax
925	adc	rdx,0
926	add	r10,rbx
927	adc	rdx,0
928	mov	rbx,rdx
929
930	mov	rax,QWORD PTR[48+rsi]
931	mul	rbp
932	add	r11,rax
933	adc	rdx,0
934	add	r11,rbx
935	adc	rdx,0
936	mov	rbx,rdx
937
938	mov	rax,QWORD PTR[56+rsi]
939	mul	rbp
940	add	r12,rax
941	adc	rdx,0
942	add	r12,rbx
943	adc	rdx,0
944	mov	r13,rdx
945	mov	rbp,QWORD PTR[40+rdi]
946	mov	rax,QWORD PTR[rsi]
947	mul	rbp
948	add	r14,rax
949	adc	rdx,0
950	mov	QWORD PTR[560+rsp],r14
951	mov	rbx,rdx
952
953	mov	rax,QWORD PTR[8+rsi]
954	mul	rbp
955	add	r15,rax
956	adc	rdx,0
957	add	r15,rbx
958	adc	rdx,0
959	mov	rbx,rdx
960
961	mov	rax,QWORD PTR[16+rsi]
962	mul	rbp
963	add	r8,rax
964	adc	rdx,0
965	add	r8,rbx
966	adc	rdx,0
967	mov	rbx,rdx
968
969	mov	rax,QWORD PTR[24+rsi]
970	mul	rbp
971	add	r9,rax
972	adc	rdx,0
973	add	r9,rbx
974	adc	rdx,0
975	mov	rbx,rdx
976
977	mov	rax,QWORD PTR[32+rsi]
978	mul	rbp
979	add	r10,rax
980	adc	rdx,0
981	add	r10,rbx
982	adc	rdx,0
983	mov	rbx,rdx
984
985	mov	rax,QWORD PTR[40+rsi]
986	mul	rbp
987	add	r11,rax
988	adc	rdx,0
989	add	r11,rbx
990	adc	rdx,0
991	mov	rbx,rdx
992
993	mov	rax,QWORD PTR[48+rsi]
994	mul	rbp
995	add	r12,rax
996	adc	rdx,0
997	add	r12,rbx
998	adc	rdx,0
999	mov	rbx,rdx
1000
1001	mov	rax,QWORD PTR[56+rsi]
1002	mul	rbp
1003	add	r13,rax
1004	adc	rdx,0
1005	add	r13,rbx
1006	adc	rdx,0
1007	mov	r14,rdx
1008	mov	rbp,QWORD PTR[48+rdi]
1009	mov	rax,QWORD PTR[rsi]
1010	mul	rbp
1011	add	r15,rax
1012	adc	rdx,0
1013	mov	QWORD PTR[568+rsp],r15
1014	mov	rbx,rdx
1015
1016	mov	rax,QWORD PTR[8+rsi]
1017	mul	rbp
1018	add	r8,rax
1019	adc	rdx,0
1020	add	r8,rbx
1021	adc	rdx,0
1022	mov	rbx,rdx
1023
1024	mov	rax,QWORD PTR[16+rsi]
1025	mul	rbp
1026	add	r9,rax
1027	adc	rdx,0
1028	add	r9,rbx
1029	adc	rdx,0
1030	mov	rbx,rdx
1031
1032	mov	rax,QWORD PTR[24+rsi]
1033	mul	rbp
1034	add	r10,rax
1035	adc	rdx,0
1036	add	r10,rbx
1037	adc	rdx,0
1038	mov	rbx,rdx
1039
1040	mov	rax,QWORD PTR[32+rsi]
1041	mul	rbp
1042	add	r11,rax
1043	adc	rdx,0
1044	add	r11,rbx
1045	adc	rdx,0
1046	mov	rbx,rdx
1047
1048	mov	rax,QWORD PTR[40+rsi]
1049	mul	rbp
1050	add	r12,rax
1051	adc	rdx,0
1052	add	r12,rbx
1053	adc	rdx,0
1054	mov	rbx,rdx
1055
1056	mov	rax,QWORD PTR[48+rsi]
1057	mul	rbp
1058	add	r13,rax
1059	adc	rdx,0
1060	add	r13,rbx
1061	adc	rdx,0
1062	mov	rbx,rdx
1063
1064	mov	rax,QWORD PTR[56+rsi]
1065	mul	rbp
1066	add	r14,rax
1067	adc	rdx,0
1068	add	r14,rbx
1069	adc	rdx,0
1070	mov	r15,rdx
1071	mov	rbp,QWORD PTR[56+rdi]
1072	mov	rax,QWORD PTR[rsi]
1073	mul	rbp
1074	add	r8,rax
1075	adc	rdx,0
1076	mov	QWORD PTR[576+rsp],r8
1077	mov	rbx,rdx
1078
1079	mov	rax,QWORD PTR[8+rsi]
1080	mul	rbp
1081	add	r9,rax
1082	adc	rdx,0
1083	add	r9,rbx
1084	adc	rdx,0
1085	mov	rbx,rdx
1086
1087	mov	rax,QWORD PTR[16+rsi]
1088	mul	rbp
1089	add	r10,rax
1090	adc	rdx,0
1091	add	r10,rbx
1092	adc	rdx,0
1093	mov	rbx,rdx
1094
1095	mov	rax,QWORD PTR[24+rsi]
1096	mul	rbp
1097	add	r11,rax
1098	adc	rdx,0
1099	add	r11,rbx
1100	adc	rdx,0
1101	mov	rbx,rdx
1102
1103	mov	rax,QWORD PTR[32+rsi]
1104	mul	rbp
1105	add	r12,rax
1106	adc	rdx,0
1107	add	r12,rbx
1108	adc	rdx,0
1109	mov	rbx,rdx
1110
1111	mov	rax,QWORD PTR[40+rsi]
1112	mul	rbp
1113	add	r13,rax
1114	adc	rdx,0
1115	add	r13,rbx
1116	adc	rdx,0
1117	mov	rbx,rdx
1118
1119	mov	rax,QWORD PTR[48+rsi]
1120	mul	rbp
1121	add	r14,rax
1122	adc	rdx,0
1123	add	r14,rbx
1124	adc	rdx,0
1125	mov	rbx,rdx
1126
1127	mov	rax,QWORD PTR[56+rsi]
1128	mul	rbp
1129	add	r15,rax
1130	adc	rdx,0
1131	add	r15,rbx
1132	adc	rdx,0
1133	mov	r8,rdx
1134	mov	QWORD PTR[584+rsp],r9
1135	mov	QWORD PTR[592+rsp],r10
1136	mov	QWORD PTR[600+rsp],r11
1137	mov	QWORD PTR[608+rsp],r12
1138	mov	QWORD PTR[616+rsp],r13
1139	mov	QWORD PTR[624+rsp],r14
1140	mov	QWORD PTR[632+rsp],r15
1141	mov	QWORD PTR[640+rsp],r8
1142
1143
1144
1145
1146
1147	jmp	mont_reduce
1148
1149
1150mont_mul_a3b	ENDP
1151
1152ALIGN	16
1153sqr_reduce	PROC PRIVATE
1154	mov	rcx,QWORD PTR[16+rsp]
1155
1156
1157
1158	mov	rbx,r10
1159
1160	mov	rax,r11
1161	mul	rbx
1162	mov	QWORD PTR[528+rsp],rax
1163	mov	r10,rdx
1164	mov	rax,r12
1165	mul	rbx
1166	add	r10,rax
1167	adc	rdx,0
1168	mov	r11,rdx
1169	mov	rax,r13
1170	mul	rbx
1171	add	r11,rax
1172	adc	rdx,0
1173	mov	r12,rdx
1174	mov	rax,r14
1175	mul	rbx
1176	add	r12,rax
1177	adc	rdx,0
1178	mov	r13,rdx
1179	mov	rax,r15
1180	mul	rbx
1181	add	r13,rax
1182	adc	rdx,0
1183	mov	r14,rdx
1184	mov	rax,r8
1185	mul	rbx
1186	add	r14,rax
1187	adc	rdx,0
1188	mov	r15,rdx
1189	mov	rax,r9
1190	mul	rbx
1191	add	r15,rax
1192	adc	rdx,0
1193	mov	rsi,rdx
1194
1195	mov	QWORD PTR[536+rsp],r10
1196
1197
1198
1199
1200
1201	mov	rbx,QWORD PTR[8+rcx]
1202
1203	mov	rax,QWORD PTR[16+rcx]
1204	mul	rbx
1205	add	r11,rax
1206	adc	rdx,0
1207	mov	QWORD PTR[544+rsp],r11
1208
1209	mov	r10,rdx
1210	mov	rax,QWORD PTR[24+rcx]
1211	mul	rbx
1212	add	r12,rax
1213	adc	rdx,0
1214	add	r12,r10
1215	adc	rdx,0
1216	mov	QWORD PTR[552+rsp],r12
1217
1218	mov	r10,rdx
1219	mov	rax,QWORD PTR[32+rcx]
1220	mul	rbx
1221	add	r13,rax
1222	adc	rdx,0
1223	add	r13,r10
1224	adc	rdx,0
1225
1226	mov	r10,rdx
1227	mov	rax,QWORD PTR[40+rcx]
1228	mul	rbx
1229	add	r14,rax
1230	adc	rdx,0
1231	add	r14,r10
1232	adc	rdx,0
1233
1234	mov	r10,rdx
1235	mov	rax,r8
1236	mul	rbx
1237	add	r15,rax
1238	adc	rdx,0
1239	add	r15,r10
1240	adc	rdx,0
1241
1242	mov	r10,rdx
1243	mov	rax,r9
1244	mul	rbx
1245	add	rsi,rax
1246	adc	rdx,0
1247	add	rsi,r10
1248	adc	rdx,0
1249
1250	mov	r11,rdx
1251
1252
1253
1254
1255	mov	rbx,QWORD PTR[16+rcx]
1256
1257	mov	rax,QWORD PTR[24+rcx]
1258	mul	rbx
1259	add	r13,rax
1260	adc	rdx,0
1261	mov	QWORD PTR[560+rsp],r13
1262
1263	mov	r10,rdx
1264	mov	rax,QWORD PTR[32+rcx]
1265	mul	rbx
1266	add	r14,rax
1267	adc	rdx,0
1268	add	r14,r10
1269	adc	rdx,0
1270	mov	QWORD PTR[568+rsp],r14
1271
1272	mov	r10,rdx
1273	mov	rax,QWORD PTR[40+rcx]
1274	mul	rbx
1275	add	r15,rax
1276	adc	rdx,0
1277	add	r15,r10
1278	adc	rdx,0
1279
1280	mov	r10,rdx
1281	mov	rax,r8
1282	mul	rbx
1283	add	rsi,rax
1284	adc	rdx,0
1285	add	rsi,r10
1286	adc	rdx,0
1287
1288	mov	r10,rdx
1289	mov	rax,r9
1290	mul	rbx
1291	add	r11,rax
1292	adc	rdx,0
1293	add	r11,r10
1294	adc	rdx,0
1295
1296	mov	r12,rdx
1297
1298
1299
1300
1301
1302	mov	rbx,QWORD PTR[24+rcx]
1303
1304	mov	rax,QWORD PTR[32+rcx]
1305	mul	rbx
1306	add	r15,rax
1307	adc	rdx,0
1308	mov	QWORD PTR[576+rsp],r15
1309
1310	mov	r10,rdx
1311	mov	rax,QWORD PTR[40+rcx]
1312	mul	rbx
1313	add	rsi,rax
1314	adc	rdx,0
1315	add	rsi,r10
1316	adc	rdx,0
1317	mov	QWORD PTR[584+rsp],rsi
1318
1319	mov	r10,rdx
1320	mov	rax,r8
1321	mul	rbx
1322	add	r11,rax
1323	adc	rdx,0
1324	add	r11,r10
1325	adc	rdx,0
1326
1327	mov	r10,rdx
1328	mov	rax,r9
1329	mul	rbx
1330	add	r12,rax
1331	adc	rdx,0
1332	add	r12,r10
1333	adc	rdx,0
1334
1335	mov	r15,rdx
1336
1337
1338
1339
1340	mov	rbx,QWORD PTR[32+rcx]
1341
1342	mov	rax,QWORD PTR[40+rcx]
1343	mul	rbx
1344	add	r11,rax
1345	adc	rdx,0
1346	mov	QWORD PTR[592+rsp],r11
1347
1348	mov	r10,rdx
1349	mov	rax,r8
1350	mul	rbx
1351	add	r12,rax
1352	adc	rdx,0
1353	add	r12,r10
1354	adc	rdx,0
1355	mov	QWORD PTR[600+rsp],r12
1356
1357	mov	r10,rdx
1358	mov	rax,r9
1359	mul	rbx
1360	add	r15,rax
1361	adc	rdx,0
1362	add	r15,r10
1363	adc	rdx,0
1364
1365	mov	r11,rdx
1366
1367
1368
1369
1370	mov	rbx,QWORD PTR[40+rcx]
1371
1372	mov	rax,r8
1373	mul	rbx
1374	add	r15,rax
1375	adc	rdx,0
1376	mov	QWORD PTR[608+rsp],r15
1377
1378	mov	r10,rdx
1379	mov	rax,r9
1380	mul	rbx
1381	add	r11,rax
1382	adc	rdx,0
1383	add	r11,r10
1384	adc	rdx,0
1385	mov	QWORD PTR[616+rsp],r11
1386
1387	mov	r12,rdx
1388
1389
1390
1391
1392	mov	rbx,r8
1393
1394	mov	rax,r9
1395	mul	rbx
1396	add	r12,rax
1397	adc	rdx,0
1398	mov	QWORD PTR[624+rsp],r12
1399
1400	mov	QWORD PTR[632+rsp],rdx
1401
1402
1403	mov	r10,QWORD PTR[528+rsp]
1404	mov	r11,QWORD PTR[536+rsp]
1405	mov	r12,QWORD PTR[544+rsp]
1406	mov	r13,QWORD PTR[552+rsp]
1407	mov	r14,QWORD PTR[560+rsp]
1408	mov	r15,QWORD PTR[568+rsp]
1409
1410	mov	rax,QWORD PTR[24+rcx]
1411	mul	rax
1412	mov	rdi,rax
1413	mov	r8,rdx
1414
1415	add	r10,r10
1416	adc	r11,r11
1417	adc	r12,r12
1418	adc	r13,r13
1419	adc	r14,r14
1420	adc	r15,r15
1421	adc	r8,0
1422
1423	mov	rax,QWORD PTR[rcx]
1424	mul	rax
1425	mov	QWORD PTR[520+rsp],rax
1426	mov	rbx,rdx
1427
1428	mov	rax,QWORD PTR[8+rcx]
1429	mul	rax
1430
1431	add	r10,rbx
1432	adc	r11,rax
1433	adc	rdx,0
1434
1435	mov	rbx,rdx
1436	mov	QWORD PTR[528+rsp],r10
1437	mov	QWORD PTR[536+rsp],r11
1438
1439	mov	rax,QWORD PTR[16+rcx]
1440	mul	rax
1441
1442	add	r12,rbx
1443	adc	r13,rax
1444	adc	rdx,0
1445
1446	mov	rbx,rdx
1447
1448	mov	QWORD PTR[544+rsp],r12
1449	mov	QWORD PTR[552+rsp],r13
1450
1451	xor	rbp,rbp
1452	add	r14,rbx
1453	adc	r15,rdi
1454	adc	rbp,0
1455
1456	mov	QWORD PTR[560+rsp],r14
1457	mov	QWORD PTR[568+rsp],r15
1458
1459
1460
1461
1462	mov	r10,QWORD PTR[576+rsp]
1463	mov	r11,QWORD PTR[584+rsp]
1464	mov	r12,QWORD PTR[592+rsp]
1465	mov	r13,QWORD PTR[600+rsp]
1466	mov	r14,QWORD PTR[608+rsp]
1467	mov	r15,QWORD PTR[616+rsp]
1468	mov	rdi,QWORD PTR[624+rsp]
1469	mov	rsi,QWORD PTR[632+rsp]
1470
1471	mov	rax,r9
1472	mul	rax
1473	mov	r9,rax
1474	mov	rbx,rdx
1475
1476	add	r10,r10
1477	adc	r11,r11
1478	adc	r12,r12
1479	adc	r13,r13
1480	adc	r14,r14
1481	adc	r15,r15
1482	adc	rdi,rdi
1483	adc	rsi,rsi
1484	adc	rbx,0
1485
1486	add	r10,rbp
1487
1488	mov	rax,QWORD PTR[32+rcx]
1489	mul	rax
1490
1491	add	r10,r8
1492	adc	r11,rax
1493	adc	rdx,0
1494
1495	mov	rbp,rdx
1496
1497	mov	QWORD PTR[576+rsp],r10
1498	mov	QWORD PTR[584+rsp],r11
1499
1500	mov	rax,QWORD PTR[40+rcx]
1501	mul	rax
1502
1503	add	r12,rbp
1504	adc	r13,rax
1505	adc	rdx,0
1506
1507	mov	rbp,rdx
1508
1509	mov	QWORD PTR[592+rsp],r12
1510	mov	QWORD PTR[600+rsp],r13
1511
1512	mov	rax,QWORD PTR[48+rcx]
1513	mul	rax
1514
1515	add	r14,rbp
1516	adc	r15,rax
1517	adc	rdx,0
1518
1519	mov	QWORD PTR[608+rsp],r14
1520	mov	QWORD PTR[616+rsp],r15
1521
1522	add	rdi,rdx
1523	adc	rsi,r9
1524	adc	rbx,0
1525
1526	mov	QWORD PTR[624+rsp],rdi
1527	mov	QWORD PTR[632+rsp],rsi
1528	mov	QWORD PTR[640+rsp],rbx
1529
1530	jmp	mont_reduce
1531
1532
1533sqr_reduce	ENDP
1534PUBLIC	mod_exp_512
1535
1536mod_exp_512	PROC PUBLIC
1537	mov	QWORD PTR[8+rsp],rdi	;WIN64 prologue
1538	mov	QWORD PTR[16+rsp],rsi
1539	mov	rax,rsp
1540$L$SEH_begin_mod_exp_512::
1541	mov	rdi,rcx
1542	mov	rsi,rdx
1543	mov	rdx,r8
1544	mov	rcx,r9
1545
1546
1547	push	rbp
1548	push	rbx
1549	push	r12
1550	push	r13
1551	push	r14
1552	push	r15
1553
1554
1555	mov	r8,rsp
1556	sub	rsp,2688
1557	and	rsp,-64
1558
1559
1560	mov	QWORD PTR[rsp],r8
1561	mov	QWORD PTR[8+rsp],rdi
1562	mov	QWORD PTR[16+rsp],rsi
1563	mov	QWORD PTR[24+rsp],rcx
1564$L$body::
1565
1566
1567
1568	pxor	xmm4,xmm4
1569	movdqu	xmm0,XMMWORD PTR[rsi]
1570	movdqu	xmm1,XMMWORD PTR[16+rsi]
1571	movdqu	xmm2,XMMWORD PTR[32+rsi]
1572	movdqu	xmm3,XMMWORD PTR[48+rsi]
1573	movdqa	XMMWORD PTR[512+rsp],xmm4
1574	movdqa	XMMWORD PTR[528+rsp],xmm4
1575	movdqa	XMMWORD PTR[608+rsp],xmm4
1576	movdqa	XMMWORD PTR[624+rsp],xmm4
1577	movdqa	XMMWORD PTR[544+rsp],xmm0
1578	movdqa	XMMWORD PTR[560+rsp],xmm1
1579	movdqa	XMMWORD PTR[576+rsp],xmm2
1580	movdqa	XMMWORD PTR[592+rsp],xmm3
1581
1582
1583	movdqu	xmm0,XMMWORD PTR[rdx]
1584	movdqu	xmm1,XMMWORD PTR[16+rdx]
1585	movdqu	xmm2,XMMWORD PTR[32+rdx]
1586	movdqu	xmm3,XMMWORD PTR[48+rdx]
1587
1588	lea	rbx,QWORD PTR[384+rsp]
1589	mov	QWORD PTR[136+rsp],rbx
1590	call	mont_reduce
1591
1592
1593	lea	rcx,QWORD PTR[448+rsp]
1594	xor	rax,rax
1595	mov	QWORD PTR[rcx],rax
1596	mov	QWORD PTR[8+rcx],rax
1597	mov	QWORD PTR[24+rcx],rax
1598	mov	QWORD PTR[32+rcx],rax
1599	mov	QWORD PTR[40+rcx],rax
1600	mov	QWORD PTR[48+rcx],rax
1601	mov	QWORD PTR[56+rcx],rax
1602	mov	QWORD PTR[128+rsp],rax
1603	mov	QWORD PTR[16+rcx],1
1604
1605	lea	rbp,QWORD PTR[640+rsp]
1606	mov	rsi,rcx
1607	mov	rdi,rbp
1608	mov	rax,8
1609loop_0::
1610	mov	rbx,QWORD PTR[rcx]
1611	mov	WORD PTR[rdi],bx
1612	shr	rbx,16
1613	mov	WORD PTR[64+rdi],bx
1614	shr	rbx,16
1615	mov	WORD PTR[128+rdi],bx
1616	shr	rbx,16
1617	mov	WORD PTR[192+rdi],bx
1618	lea	rcx,QWORD PTR[8+rcx]
1619	lea	rdi,QWORD PTR[256+rdi]
1620	dec	rax
1621	jnz	loop_0
1622	mov	rax,31
1623	mov	QWORD PTR[32+rsp],rax
1624	mov	QWORD PTR[40+rsp],rbp
1625
1626	mov	QWORD PTR[136+rsp],rsi
1627	mov	r10,QWORD PTR[rsi]
1628	mov	r11,QWORD PTR[8+rsi]
1629	mov	r12,QWORD PTR[16+rsi]
1630	mov	r13,QWORD PTR[24+rsi]
1631	mov	r14,QWORD PTR[32+rsi]
1632	mov	r15,QWORD PTR[40+rsi]
1633	mov	r8,QWORD PTR[48+rsi]
1634	mov	r9,QWORD PTR[56+rsi]
1635init_loop::
1636	lea	rdi,QWORD PTR[384+rsp]
1637	call	mont_mul_a3b
1638	lea	rsi,QWORD PTR[448+rsp]
1639	mov	rbp,QWORD PTR[40+rsp]
1640	add	rbp,2
1641	mov	QWORD PTR[40+rsp],rbp
1642	mov	rcx,rsi
1643	mov	rax,8
1644loop_1::
1645	mov	rbx,QWORD PTR[rcx]
1646	mov	WORD PTR[rbp],bx
1647	shr	rbx,16
1648	mov	WORD PTR[64+rbp],bx
1649	shr	rbx,16
1650	mov	WORD PTR[128+rbp],bx
1651	shr	rbx,16
1652	mov	WORD PTR[192+rbp],bx
1653	lea	rcx,QWORD PTR[8+rcx]
1654	lea	rbp,QWORD PTR[256+rbp]
1655	dec	rax
1656	jnz	loop_1
1657	mov	rax,QWORD PTR[32+rsp]
1658	sub	rax,1
1659	mov	QWORD PTR[32+rsp],rax
1660	jne	init_loop
1661
1662
1663
1664	movdqa	XMMWORD PTR[64+rsp],xmm0
1665	movdqa	XMMWORD PTR[80+rsp],xmm1
1666	movdqa	XMMWORD PTR[96+rsp],xmm2
1667	movdqa	XMMWORD PTR[112+rsp],xmm3
1668
1669
1670
1671
1672
1673	mov	eax,DWORD PTR[126+rsp]
1674	mov	rdx,rax
1675	shr	rax,11
1676	and	edx,007FFh
1677	mov	DWORD PTR[126+rsp],edx
1678	lea	rsi,QWORD PTR[640+rax*2+rsp]
1679	mov	rdx,QWORD PTR[8+rsp]
1680	mov	rbp,4
1681loop_2::
1682	movzx	rbx,WORD PTR[192+rsi]
1683	movzx	rax,WORD PTR[448+rsi]
1684	shl	rbx,16
1685	shl	rax,16
1686	mov	bx,WORD PTR[128+rsi]
1687	mov	ax,WORD PTR[384+rsi]
1688	shl	rbx,16
1689	shl	rax,16
1690	mov	bx,WORD PTR[64+rsi]
1691	mov	ax,WORD PTR[320+rsi]
1692	shl	rbx,16
1693	shl	rax,16
1694	mov	bx,WORD PTR[rsi]
1695	mov	ax,WORD PTR[256+rsi]
1696	mov	QWORD PTR[rdx],rbx
1697	mov	QWORD PTR[8+rdx],rax
1698	lea	rsi,QWORD PTR[512+rsi]
1699	lea	rdx,QWORD PTR[16+rdx]
1700	sub	rbp,1
1701	jnz	loop_2
1702	mov	QWORD PTR[48+rsp],505
1703
1704	mov	rcx,QWORD PTR[8+rsp]
1705	mov	QWORD PTR[136+rsp],rcx
1706	mov	r10,QWORD PTR[rcx]
1707	mov	r11,QWORD PTR[8+rcx]
1708	mov	r12,QWORD PTR[16+rcx]
1709	mov	r13,QWORD PTR[24+rcx]
1710	mov	r14,QWORD PTR[32+rcx]
1711	mov	r15,QWORD PTR[40+rcx]
1712	mov	r8,QWORD PTR[48+rcx]
1713	mov	r9,QWORD PTR[56+rcx]
1714	jmp	sqr_2
1715
1716main_loop_a3b::
1717	call	sqr_reduce
1718	call	sqr_reduce
1719	call	sqr_reduce
1720sqr_2::
1721	call	sqr_reduce
1722	call	sqr_reduce
1723
1724
1725
1726	mov	rcx,QWORD PTR[48+rsp]
1727	mov	rax,rcx
1728	shr	rax,4
1729	mov	edx,DWORD PTR[64+rax*2+rsp]
1730	and	rcx,15
1731	shr	rdx,cl
1732	and	rdx,01Fh
1733
1734	lea	rsi,QWORD PTR[640+rdx*2+rsp]
1735	lea	rdx,QWORD PTR[448+rsp]
1736	mov	rdi,rdx
1737	mov	rbp,4
1738loop_3::
1739	movzx	rbx,WORD PTR[192+rsi]
1740	movzx	rax,WORD PTR[448+rsi]
1741	shl	rbx,16
1742	shl	rax,16
1743	mov	bx,WORD PTR[128+rsi]
1744	mov	ax,WORD PTR[384+rsi]
1745	shl	rbx,16
1746	shl	rax,16
1747	mov	bx,WORD PTR[64+rsi]
1748	mov	ax,WORD PTR[320+rsi]
1749	shl	rbx,16
1750	shl	rax,16
1751	mov	bx,WORD PTR[rsi]
1752	mov	ax,WORD PTR[256+rsi]
1753	mov	QWORD PTR[rdx],rbx
1754	mov	QWORD PTR[8+rdx],rax
1755	lea	rsi,QWORD PTR[512+rsi]
1756	lea	rdx,QWORD PTR[16+rdx]
1757	sub	rbp,1
1758	jnz	loop_3
1759	mov	rsi,QWORD PTR[8+rsp]
1760	call	mont_mul_a3b
1761
1762
1763
1764	mov	rcx,QWORD PTR[48+rsp]
1765	sub	rcx,5
1766	mov	QWORD PTR[48+rsp],rcx
1767	jge	main_loop_a3b
1768
1769
1770
1771end_main_loop_a3b::
1772
1773
1774	mov	rdx,QWORD PTR[8+rsp]
1775	pxor	xmm4,xmm4
1776	movdqu	xmm0,XMMWORD PTR[rdx]
1777	movdqu	xmm1,XMMWORD PTR[16+rdx]
1778	movdqu	xmm2,XMMWORD PTR[32+rdx]
1779	movdqu	xmm3,XMMWORD PTR[48+rdx]
1780	movdqa	XMMWORD PTR[576+rsp],xmm4
1781	movdqa	XMMWORD PTR[592+rsp],xmm4
1782	movdqa	XMMWORD PTR[608+rsp],xmm4
1783	movdqa	XMMWORD PTR[624+rsp],xmm4
1784	movdqa	XMMWORD PTR[512+rsp],xmm0
1785	movdqa	XMMWORD PTR[528+rsp],xmm1
1786	movdqa	XMMWORD PTR[544+rsp],xmm2
1787	movdqa	XMMWORD PTR[560+rsp],xmm3
1788	call	mont_reduce
1789
1790
1791
1792	mov	rax,QWORD PTR[8+rsp]
1793	mov	r8,QWORD PTR[rax]
1794	mov	r9,QWORD PTR[8+rax]
1795	mov	r10,QWORD PTR[16+rax]
1796	mov	r11,QWORD PTR[24+rax]
1797	mov	r12,QWORD PTR[32+rax]
1798	mov	r13,QWORD PTR[40+rax]
1799	mov	r14,QWORD PTR[48+rax]
1800	mov	r15,QWORD PTR[56+rax]
1801
1802
1803	mov	rbx,QWORD PTR[24+rsp]
1804	add	rbx,512
1805
1806	sub	r8,QWORD PTR[rbx]
1807	sbb	r9,QWORD PTR[8+rbx]
1808	sbb	r10,QWORD PTR[16+rbx]
1809	sbb	r11,QWORD PTR[24+rbx]
1810	sbb	r12,QWORD PTR[32+rbx]
1811	sbb	r13,QWORD PTR[40+rbx]
1812	sbb	r14,QWORD PTR[48+rbx]
1813	sbb	r15,QWORD PTR[56+rbx]
1814
1815
1816	mov	rsi,QWORD PTR[rax]
1817	mov	rdi,QWORD PTR[8+rax]
1818	mov	rcx,QWORD PTR[16+rax]
1819	mov	rdx,QWORD PTR[24+rax]
1820	cmovnc	rsi,r8
1821	cmovnc	rdi,r9
1822	cmovnc	rcx,r10
1823	cmovnc	rdx,r11
1824	mov	QWORD PTR[rax],rsi
1825	mov	QWORD PTR[8+rax],rdi
1826	mov	QWORD PTR[16+rax],rcx
1827	mov	QWORD PTR[24+rax],rdx
1828
1829	mov	rsi,QWORD PTR[32+rax]
1830	mov	rdi,QWORD PTR[40+rax]
1831	mov	rcx,QWORD PTR[48+rax]
1832	mov	rdx,QWORD PTR[56+rax]
1833	cmovnc	rsi,r12
1834	cmovnc	rdi,r13
1835	cmovnc	rcx,r14
1836	cmovnc	rdx,r15
1837	mov	QWORD PTR[32+rax],rsi
1838	mov	QWORD PTR[40+rax],rdi
1839	mov	QWORD PTR[48+rax],rcx
1840	mov	QWORD PTR[56+rax],rdx
1841
1842	mov	rsi,QWORD PTR[rsp]
1843	mov	r15,QWORD PTR[rsi]
1844	mov	r14,QWORD PTR[8+rsi]
1845	mov	r13,QWORD PTR[16+rsi]
1846	mov	r12,QWORD PTR[24+rsi]
1847	mov	rbx,QWORD PTR[32+rsi]
1848	mov	rbp,QWORD PTR[40+rsi]
1849	lea	rsp,QWORD PTR[48+rsi]
1850$L$epilogue::
1851	mov	rdi,QWORD PTR[8+rsp]	;WIN64 epilogue
1852	mov	rsi,QWORD PTR[16+rsp]
1853	DB	0F3h,0C3h		;repret
1854$L$SEH_end_mod_exp_512::
1855mod_exp_512	ENDP
1856
1857.text$	ENDS
1858END
1859
1860