1; PROLOGUE(mpn_sublsh1_n)
2
3;  Copyright 2009 Jason Moxham
4;
5;  Windows Conversion Copyright 2008 Brian Gladman
6;
7;  This file is part of the MPIR Library.
8;
9;  The MPIR Library is free software; you can redistribute it and/or modify
10;  it under the terms of the GNU Lesser General Public License as published
11;  by the Free Software Foundation; either version 2.1 of the License, or (at
12;  your option) any later version.
13;  The MPIR Library is distributed in the hope that it will be useful, but
14;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16;  License for more details.
17;  You should have received a copy of the GNU Lesser General Public License
18;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
19;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20;  Boston, MA 02110-1301, USA.
21;
22;  mp_limb_t mpn_sublsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t)
23;  rax                        rdi     rsi     rdx        rcx
24;  rax                        rcx     rdx      r8         r9
25
26%include "yasm_mac.inc"
27
28%define reg_save_list rbx, rsi, rdi
29
30    CPU  Athlon64
31    BITS 64
32
33    FRAME_PROC mpn_sublsh1_n, 0, reg_save_list
34    mov     rax, r9
35
36    lea     rdi, [rcx+rax*8-56]
37    lea     rsi, [rdx+rax*8-56]
38    lea     rdx, [ r8+rax*8-56]
39    mov     rcx, rax
40
41	xor     rax, rax
42	xor     r10, r10
43	mov     r8, 3
44	sub     r8, rcx
45	jge     .3
46	add     r8, 4
47	mov     r11, [rsi+r8*8+24]
48	mov     rcx, [rsi+r8*8+16]
49	mov     r9, [rsi+r8*8]
50	mov     rbx, [rsi+r8*8+8]
51	jc      .2
52
53	xalign   16
54.1: add     rax, 1
55	sbb     r9, [rdx+r8*8]
56	sbb     rbx, [rdx+r8*8+8]
57	sbb     rcx, [rdx+r8*8+16]
58	sbb     r11, [rdx+r8*8+24]
59	sbb     rax, rax
60	add     r10, 1
61	sbb     r9, [rdx+r8*8]
62	sbb     rbx, [rdx+r8*8+8]
63	sbb     rcx, [rdx+r8*8+16]
64	sbb     r11, [rdx+r8*8+24]
65	sbb     r10, r10
66	mov     [rdi+r8*8], r9
67	mov     [rdi+r8*8+8], rbx
68	mov     [rdi+r8*8+16], rcx
69	mov     [rdi+r8*8+24], r11
70	mov     r11, [rsi+r8*8+56]
71	mov     rcx, [rsi+r8*8+48]
72	add     r8, 4
73	mov     r9, [rsi+r8*8]
74	mov     rbx, [rsi+r8*8+8]
75	jnc     .1
76.2: add     rax, 1
77	sbb     r9, [rdx+r8*8]
78	sbb     rbx, [rdx+r8*8+8]
79	sbb     rcx, [rdx+r8*8+16]
80	sbb     r11, [rdx+r8*8+24]
81	sbb     rax, rax
82	add     r10, 1
83	sbb     r9, [rdx+r8*8]
84	sbb     rbx, [rdx+r8*8+8]
85	sbb     rcx, [rdx+r8*8+16]
86	sbb     r11, [rdx+r8*8+24]
87	sbb     r10, r10
88	mov     [rdi+r8*8], r9
89	mov     [rdi+r8*8+8], rbx
90	mov     [rdi+r8*8+16], rcx
91	mov     [rdi+r8*8+24], r11
92.3: cmp     r8, 2
93	ja      .7
94	jz      .6
95	jp      .5
96.4: mov     rcx, [rsi+r8*8+48]
97	mov     r9, [rsi+r8*8+32]
98	mov     rbx, [rsi+r8*8+40]
99	add     rax, 1
100	sbb     r9, [rdx+r8*8+32]
101	sbb     rbx, [rdx+r8*8+40]
102	sbb     rcx, [rdx+r8*8+48]
103	sbb     rax, rax
104	add     r10, 1
105	sbb     r9, [rdx+r8*8+32]
106	sbb     rbx, [rdx+r8*8+40]
107	sbb     rcx, [rdx+r8*8+48]
108	mov     [rdi+r8*8+32], r9
109	mov     [rdi+r8*8+40], rbx
110	mov     [rdi+r8*8+48], rcx
111	sbb     rax, 0
112	neg     rax
113	EXIT_PROC reg_save_list
114
115	xalign   16
116.5: mov     r9, [rsi+r8*8+32]
117	mov     rbx, [rsi+r8*8+40]
118	add     rax, 1
119	sbb     r9, [rdx+r8*8+32]
120	sbb     rbx, [rdx+r8*8+40]
121	sbb     rax, rax
122	add     r10, 1
123	sbb     r9, [rdx+r8*8+32]
124	sbb     rbx, [rdx+r8*8+40]
125	mov     [rdi+r8*8+32], r9
126	mov     [rdi+r8*8+40], rbx
127	sbb     rax, 0
128	neg     rax
129	EXIT_PROC reg_save_list
130
131	xalign   16
132.6: mov     r9, [rsi+r8*8+32]
133	add     rax, 1
134	sbb     r9, [rdx+r8*8+32]
135	sbb     rax, rax
136	add     r10, 1
137	sbb     r9, [rdx+r8*8+32]
138	mov     [rdi+r8*8+32], r9
139	sbb     rax, 0
140	neg     rax
141	EXIT_PROC reg_save_list
142
143	xalign   16
144.7: add     r10, 1
145.8: sbb     rax, 0
146	neg     rax
147	END_PROC reg_save_list
148
149	end
150