1
2;  mpn_sublsh1_n
3
4;  Copyright 2009 Jason Moxham
5
6;  This file is part of the MPIR Library.
7;  The MPIR Library is free software; you can redistribute it and/or modify
8;  it under the terms of the GNU Lesser General Public License as published
9;  by the Free Software Foundation; either version 2.1 of the License, or (at
10;  your option) any later version.
11;  The MPIR Library is distributed in the hope that it will be useful, but
12;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14;  License for more details.
15;  You should have received a copy of the GNU Lesser General Public License
16;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
17;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18;  Boston, MA 02110-1301, USA.
19
20%include 'yasm_mac.inc'
21
22;	ret mpn_sublsh1_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t)
23
24	GLOBAL_FUNC mpn_sublsh1_n
25	push    rbx
26	lea     rdi, [rdi+rcx*8-56]
27	lea     rsi, [rsi+rcx*8-56]
28	lea     rdx, [rdx+rcx*8-56]
29	xor     rax, rax
30	xor     r10, r10
31	mov     r8, 3
32	sub     r8, rcx
33	jge     L_skip
34	add     r8, 4
35	mov     r11, [rsi+r8*8+24]
36	mov     rcx, [rsi+r8*8+16]
37	mov     r9, [rsi+r8*8]
38	mov     rbx, [rsi+r8*8+8]
39	jc      L_skiplp
40	align   16
41L_lp:
42	add     rax, 1
43	sbb     r9, [rdx+r8*8]
44	sbb     rbx, [rdx+r8*8+8]
45	sbb     rcx, [rdx+r8*8+16]
46	sbb     r11, [rdx+r8*8+24]
47	sbb     rax, rax
48	add     r10, 1
49	sbb     r9, [rdx+r8*8]
50	sbb     rbx, [rdx+r8*8+8]
51	sbb     rcx, [rdx+r8*8+16]
52	sbb     r11, [rdx+r8*8+24]
53	sbb     r10, r10
54	mov     [rdi+r8*8], r9
55	mov     [rdi+r8*8+8], rbx
56	mov     [rdi+r8*8+16], rcx
57	mov     [rdi+r8*8+24], r11
58	mov     r11, [rsi+r8*8+56]
59	mov     rcx, [rsi+r8*8+48]
60	add     r8, 4
61	mov     r9, [rsi+r8*8]
62	mov     rbx, [rsi+r8*8+8]
63	jnc     L_lp
64L_skiplp:
65	add     rax, 1
66	sbb     r9, [rdx+r8*8]
67	sbb     rbx, [rdx+r8*8+8]
68	sbb     rcx, [rdx+r8*8+16]
69	sbb     r11, [rdx+r8*8+24]
70	sbb     rax, rax
71	add     r10, 1
72	sbb     r9, [rdx+r8*8]
73	sbb     rbx, [rdx+r8*8+8]
74	sbb     rcx, [rdx+r8*8+16]
75	sbb     r11, [rdx+r8*8+24]
76	sbb     r10, r10
77	mov     [rdi+r8*8], r9
78	mov     [rdi+r8*8+8], rbx
79	mov     [rdi+r8*8+16], rcx
80	mov     [rdi+r8*8+24], r11
81L_skip:
82	cmp     r8, 2
83	ja      L_case0
84	jz      L_case1
85	jp      L_case2
86L_case3:
87	mov     rcx, [rsi+r8*8+48]
88	mov     r9, [rsi+r8*8+32]
89	mov     rbx, [rsi+r8*8+40]
90	add     rax, 1
91	sbb     r9, [rdx+r8*8+32]
92	sbb     rbx, [rdx+r8*8+40]
93	sbb     rcx, [rdx+r8*8+48]
94	sbb     rax, rax
95	add     r10, 1
96	sbb     r9, [rdx+r8*8+32]
97	sbb     rbx, [rdx+r8*8+40]
98	sbb     rcx, [rdx+r8*8+48]
99	mov     [rdi+r8*8+32], r9
100	mov     [rdi+r8*8+40], rbx
101	mov     [rdi+r8*8+48], rcx
102	sbb     rax, 0
103	neg     rax
104	pop     rbx
105	ret
106	align   16
107L_case2:
108	mov     r9, [rsi+r8*8+32]
109	mov     rbx, [rsi+r8*8+40]
110	add     rax, 1
111	sbb     r9, [rdx+r8*8+32]
112	sbb     rbx, [rdx+r8*8+40]
113	sbb     rax, rax
114	add     r10, 1
115	sbb     r9, [rdx+r8*8+32]
116	sbb     rbx, [rdx+r8*8+40]
117	mov     [rdi+r8*8+32], r9
118	mov     [rdi+r8*8+40], rbx
119	sbb     rax, 0
120	neg     rax
121	pop     rbx
122	ret
123	align   16
124L_case1:
125	mov     r9, [rsi+r8*8+32]
126	add     rax, 1
127	sbb     r9, [rdx+r8*8+32]
128	sbb     rax, rax
129	add     r10, 1
130	sbb     r9, [rdx+r8*8+32]
131	mov     [rdi+r8*8+32], r9
132	sbb     rax, 0
133	neg     rax
134	pop     rbx
135	ret
136	align   16
137L_case0:
138	add     r10, 1
139	sbb     rax, 0
140	neg     rax
141	pop     rbx
142	ret
143	end
144