1; PROLOGUE(mpn_subadd_n)
2
3;  Copyright 2009 Jason Moxham
4;
5;  Windows Conversion Copyright 2008 Brian Gladman
6;
7;  This file is part of the MPIR Library.
8;
9;  The MPIR Library is free software; you can redistribute it and/or modify
10;  it under the terms of the GNU Lesser General Public License as published
11;  by the Free Software Foundation; either version 2.1 of the License, or (at
12;  your option) any later version.
13;  The MPIR Library is distributed in the hope that it will be useful, but
14;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16;  License for more details.
17;  You should have received a copy of the GNU Lesser General Public License
18;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
19;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20;  Boston, MA 02110-1301, USA.
21;
22;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)
23;  rax                       rdi     rsi     rdx     rcx          r8
24;  rax                       rcx     rdx      r8      r9    [rsp+40]
25
26%include "yasm_mac.inc"
27
28%define reg_save_list   rbx, rbp, rsi, rdi
29
30    CPU  Athlon64
31    BITS 64
32
33    FRAME_PROC mpn_subadd_n, 0, reg_save_list
34    mov     rbx, qword [rsp+stack_use+40]
35	lea     rdi, [rcx+rbx*8-56]
36	lea     rsi, [rdx+rbx*8-56]
37	lea     rdx, [r8+rbx*8-56]
38	lea     rcx, [r9+rbx*8-56]
39	mov     r9, 3
40	xor     rax, rax
41	xor     r10, r10
42	sub     r9, rbx
43	jge     .3
44	add     r9, 4
45	mov     rbp, [rsi+r9*8+16]
46	mov     r11, [rsi+r9*8+24]
47	mov     r8, [rsi+r9*8]
48	mov     rbx, [rsi+r9*8+8]
49	jc      .2
50
51	xalign  16
52.1:	add     rax, 1
53	sbb     r8, [rcx+r9*8]
54	sbb     rbx, [rcx+r9*8+8]
55	sbb     rbp, [rcx+r9*8+16]
56	sbb     r11, [rcx+r9*8+24]
57	sbb     rax, rax
58	add     r10, 1
59	sbb     r8, [rdx+r9*8]
60	sbb     rbx, [rdx+r9*8+8]
61	sbb     rbp, [rdx+r9*8+16]
62	sbb     r11, [rdx+r9*8+24]
63	sbb     r10, r10
64	mov     [rdi+r9*8], r8
65	mov     [rdi+r9*8+24], r11
66	mov     [rdi+r9*8+8], rbx
67	mov     [rdi+r9*8+16], rbp
68	mov     rbp, [rsi+r9*8+48]
69	mov     r11, [rsi+r9*8+56]
70	add     r9, 4
71	mov     r8, [rsi+r9*8]
72	mov     rbx, [rsi+r9*8+8]
73	jnc     .1
74.2:	add     rax, 1
75	sbb     r8, [rcx+r9*8]
76	sbb     rbx, [rcx+r9*8+8]
77	sbb     rbp, [rcx+r9*8+16]
78	sbb     r11, [rcx+r9*8+24]
79	sbb     rax, rax
80	add     r10, 1
81	sbb     r8, [rdx+r9*8]
82	sbb     rbx, [rdx+r9*8+8]
83	sbb     rbp, [rdx+r9*8+16]
84	sbb     r11, [rdx+r9*8+24]
85	sbb     r10, r10
86	mov     [rdi+r9*8], r8
87	mov     [rdi+r9*8+24], r11
88	mov     [rdi+r9*8+8], rbx
89	mov     [rdi+r9*8+16], rbp
90.3:	cmp     r9, 2
91	ja      .7
92	jz      .6
93	jp      .5
94.4:	mov     rbp, [rsi+r9*8+48]
95	mov     r8, [rsi+r9*8+32]
96	mov     rbx, [rsi+r9*8+40]
97	add     rax, 1
98	sbb     r8, [rcx+r9*8+32]
99	sbb     rbx, [rcx+r9*8+40]
100	sbb     rbp, [rcx+r9*8+48]
101	sbb     rax, rax
102	add     r10, 1
103	sbb     r8, [rdx+r9*8+32]
104	sbb     rbx, [rdx+r9*8+40]
105	sbb     rbp, [rdx+r9*8+48]
106	mov     [rdi+r9*8+32], r8
107	mov     [rdi+r9*8+40], rbx
108	mov     [rdi+r9*8+48], rbp
109	sbb     rax, 0
110	neg     rax
111	EXIT_PROC reg_save_list
112
113	xalign  16
114.5:	mov     r8, [rsi+r9*8+32]
115	mov     rbx, [rsi+r9*8+40]
116	add     rax, 1
117	sbb     r8, [rcx+r9*8+32]
118	sbb     rbx, [rcx+r9*8+40]
119	sbb     rax, rax
120	add     r10, 1
121	sbb     r8, [rdx+r9*8+32]
122	sbb     rbx, [rdx+r9*8+40]
123	mov     [rdi+r9*8+32], r8
124	mov     [rdi+r9*8+40], rbx
125	sbb     rax, 0
126	neg     rax
127	EXIT_PROC reg_save_list
128
129	xalign  16
130.6:	mov     r8, [rsi+r9*8+32]
131	add     rax, 1
132	sbb     r8, [rcx+r9*8+32]
133	sbb     rax, rax
134	add     r10, 1
135	sbb     r8, [rdx+r9*8+32]
136	mov     [rdi+r9*8+32], r8
137	sbb     rax, 0
138	neg     rax
139	EXIT_PROC reg_save_list
140
141	xalign  16
142.7:	add     rax, r10
143	neg     rax
144.8:	END_PROC reg_save_list
145
146    end
147