1
2;  AMD64 mpn_sub_n
3;  Copyright 2008 Jason Moxham
4;  This file is part of the MPIR Library.
5;  The MPIR Library is free software; you can redistribute it and/or modify
6;  it under the terms of the GNU Lesser General Public License as published
7;  by the Free Software Foundation; either version 2.1 of the License, or (at
8;  your option) any later version.
9;  The MPIR Library is distributed in the hope that it will be useful, but
10;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
12;  License for more details.
13;  You should have received a copy of the GNU Lesser General Public License
14;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
15;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
16;  Boston, MA 02110-1301, USA.
17
18;	(rdi,rcx)=(rsi,rcx)-(rdx,rcx)
19;	rax=borrow
20
21%include 'yasm_mac.inc'
22
23    BITS    64
24
25   GLOBAL_FUNC mpn_sub_n
26	mov     rax, rcx
27	and     rax, 3
28	shr     rcx, 2
29	cmp     rcx, 0
30;	carry flag is clear here
31	jnz     loop1
32	mov     r11, [rsi]
33	sub     r11, [rdx]
34	mov     [rdi], r11
35	dec     rax
36	jz      end1
37	mov     r11, [rsi+8]
38	sbb     r11, [rdx+8]
39	mov     [rdi+8], r11
40	dec     rax
41	jz      end1
42	mov     r11, [rsi+16]
43	sbb     r11, [rdx+16]
44	mov     [rdi+16], r11
45	dec     rax
46end1:
47	adc     rax, rax
48	ret
49	align   8
50loop1:
51	mov     r11, [rsi]
52	mov     r8, [rsi+8]
53	lea     rsi, [rsi+32]
54	sbb     r11, [rdx]
55	sbb     r8, [rdx+8]
56	lea     rdx, [rdx+32]
57	mov     [rdi], r11
58	mov     [rdi+8], r8
59	lea     rdi, [rdi+32]
60	mov     r9, [rsi-16]
61	mov     r10, [rsi-8]
62	sbb     r9, [rdx-16]
63	sbb     r10, [rdx-8]
64	mov     [rdi-16], r9
65	dec     rcx
66	mov     [rdi-8], r10
67	jnz     loop1
68	inc     rax
69	dec     rax
70	jz      end
71	mov     r11, [rsi]
72	sbb     r11, [rdx]
73	mov     [rdi], r11
74	dec     rax
75	jz      end
76	mov     r11, [rsi+8]
77	sbb     r11, [rdx+8]
78	mov     [rdi+8], r11
79	dec     rax
80	jz      end
81	mov     r11, [rsi+16]
82	sbb     r11, [rdx+16]
83	mov     [rdi+16], r11
84	dec     rax
85end:
86	adc     rax, rax
87	ret
88