1; PROLOGUE(mpn_subadd_n)
2;
3;  Copyright 2011 The Code Cavern
4;
5;  Windows Conversion Copyright 2008 Brian Gladman
6;
7;  This file is part of the MPIR Library.
8;  The MPIR Library is free software; you can redistribute it and/or modify
9;  it under the terms of the GNU Lesser General Public License as published
10;  by the Free Software Foundation; either version 2.1 of the License, or (at
11;  your option) any later version.
12;  The MPIR Library is distributed in the hope that it will be useful, but
13;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15;  License for more details.
16;  You should have received a copy of the GNU Lesser General Public License
17;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
18;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19;  Boston, MA 02110-1301, USA.
20;
21;  mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_size_t)
22;  rax                       rdi     rsi     rdx     rcx          r8
23;  rax                       rcx     rdx      r8      r9    [rsp+40]
24
25%include "yasm_mac.inc"
26
27%define reg_save_list   rbx, rbp, rsi, rdi
28
29        CPU  Nehalem
30        BITS 64
31
32        FRAME_PROC mpn_subadd_n, 0, reg_save_list
33        mov     rbx, [rsp+stack_use+40]
34        xor     rax, rax
35        mov     r10d, 3
36        sub     r10, rbx
37        lea     rdi, [rcx+rbx*8-24]
38        lea     rsi, [rdx+rbx*8-24]
39        lea     rdx, [r8+rbx*8-24]
40        lea     rcx, [r9+rbx*8-24]
41        mov     r9, rax
42        jnc     .2
43
44        align   16
45.1:     sahf
46        mov     r8, [rsi+r10*8]
47        sbb     r8, [rcx+r10*8]
48        mov     rbx, [rsi+r10*8+8]
49        sbb     rbx, [rcx+r10*8+8]
50        mov     r11, [rsi+r10*8+24]
51        mov     rbp, [rsi+r10*8+16]
52        sbb     rbp, [rcx+r10*8+16]
53        sbb     r11, [rcx+r10*8+24]
54        lahf
55        add     r9b, 255
56        sbb     r8, [rdx+r10*8]
57        sbb     rbx, [rdx+r10*8+8]
58        mov     [rdi+r10*8], r8
59        sbb     rbp, [rdx+r10*8+16]
60        sbb     r11, [rdx+r10*8+24]
61        setc    r9b
62        mov     [rdi+r10*8+24], r11
63        mov     [rdi+r10*8+16], rbp
64        mov     [rdi+r10*8+8], rbx
65        add     r10, 4
66        jnc     .1
67.2:     cmp     r10, 2
68        jg      .6
69        je      .5
70        jp      .4
71.3:     sahf
72        mov     r8, [rsi]
73        sbb     r8, [rcx]
74        mov     rbx, [rsi+8]
75        sbb     rbx, [rcx+8]
76        mov     rbp, [rsi+16]
77        sbb     rbp, [rcx+16]
78        lahf
79        add     r9b, 255
80        sbb     r8, [rdx]
81        sbb     rbx, [rdx+8]
82        mov     [rdi], r8
83        sbb     rbp, [rdx+16]
84        setc    r9b
85        mov     [rdi+16], rbp
86        mov     [rdi+8], rbx
87        sahf
88        mov     eax, 0
89        adc     rax, 0
90        add     r9b, 255
91        adc     rax, 0
92        EXIT_PROC reg_save_list
93
94.4:     sahf
95        mov     r8, [rsi+8]
96        sbb     r8, [rcx+8]
97        mov     rbx, [rsi+16]
98        sbb     rbx, [rcx+16]
99        lahf
100        add     r9b, 255
101        sbb     r8, [rdx+8]
102        sbb     rbx, [rdx+16]
103        mov     [rdi+8], r8
104        setc    r9b
105        mov     [rdi+16], rbx
106        sahf
107        mov     eax, 0
108        adc     rax, 0
109        add     r9b, 255
110        adc     rax, 0
111        EXIT_PROC reg_save_list
112
113.5:     sahf
114        mov     r8, [rsi+16]
115        sbb     r8, [rcx+16]
116        lahf
117        add     r9b, 255
118        sbb     r8, [rdx+16]
119        mov     [rdi+16], r8
120        setc    r9b
121.6:     sahf
122        mov     eax, 0
123        adc     rax, 0
124        add     r9b, 255
125        adc     rax, 0
126        END_PROC reg_save_list
127
128        end
129