1dnl  mpn_sumdiff
2
3dnl  Copyright 2011 The Code Cavern
4
5dnl  This file is part of the MPIR Library.
6
7dnl  The MPIR Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The MPIR Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write
19dnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20dnl  Boston, MA 02110-1301, USA.
21
22include(`../config.m4')
23
24ASM_START()
25PROLOGUE(mpn_sumdiff_n)
26push %rbx
27xor %r9,%r9
28push %rbp
29xor %rax,%rax
30push %r12
31mov $3,%r10d
32push %r13
33lea -24(%rdi,%r8,8),%rdi
34push %r14
35lea -24(%rsi,%r8,8),%rsi
36push %r15
37sub %r8,%r10
38lea -24(%rdx,%r8,8),%rdx
39lea -24(%rcx,%r8,8),%rcx
40jnc skiplp
41.align 16
42lp:     .byte 0x9e      # sahf
43	mov (%rdx,%r10,8),%r8
44	mov %r8,%r11
45	adc (%rcx,%r10,8),%r8
46	mov 8(%rdx,%r10,8),%rbx
47	mov %rbx,%r13
48	adc 8(%rcx,%r10,8),%rbx
49	mov 16(%rdx,%r10,8),%rbp
50	mov 24(%rdx,%r10,8),%r12
51	mov %rbp,%r14
52	mov %r12,%r15
53	adc 16(%rcx,%r10,8),%rbp
54	adc 24(%rcx,%r10,8),%r12
55        .byte 0x9f      # lahf
56	add $255,%r9b
57	sbb (%rcx,%r10,8),%r11
58	mov %r8,(%rdi,%r10,8)
59	sbb 8(%rcx,%r10,8),%r13
60	sbb 16(%rcx,%r10,8),%r14
61	mov %rbx,8(%rdi,%r10,8)
62	sbb 24(%rcx,%r10,8),%r15
63	mov %rbp,16(%rdi,%r10,8)
64	mov %r12,24(%rdi,%r10,8)
65	mov %r13,8(%rsi,%r10,8)
66	setc %r9b
67	add $4,%r10
68	mov %r14,16-32(%rsi,%r10,8)
69	mov %r15,24-32(%rsi,%r10,8)
70	mov %r11,-32(%rsi,%r10,8)
71	jnc lp
72skiplp:
73cmp $2,%r10
74jg case0
75je case1
76jp case2
77case3:  .byte 0x9e      # sahf
78	mov (%rdx),%r8
79	mov %r8,%r11
80	adc (%rcx),%r8
81	mov 8(%rdx),%rbx
82	mov %rbx,%r13
83	adc 8(%rcx),%rbx
84	mov 16(%rdx),%rbp
85	mov %rbp,%r14
86	adc 16(%rcx),%rbp
87        .byte 0x9f      # lahf
88	add $255,%r9b
89	sbb (%rcx),%r11
90	mov %r8,(%rdi)
91	sbb 8(%rcx),%r13
92	sbb 16(%rcx),%r14
93	mov %rbx,8(%rdi)
94	mov %rbp,16(%rdi)
95	mov %r13,8(%rsi)
96	setc %r9b
97	mov %r14,16(%rsi)
98	mov %r11,(%rsi)
99	.byte 0x9e      # sahf
100	mov $0,%rax
101	adc $0,%rax
102	add $255,%r9b
103	rcl $1,%rax
104	pop %r15
105	pop %r14
106	pop %r13
107	pop %r12
108	pop %rbp
109	pop %rbx
110	ret
111case2:  .byte 0x9e      # sahf
112	mov 8(%rdx),%r8
113	mov %r8,%r11
114	adc 8(%rcx),%r8
115	mov 8+8(%rdx),%rbx
116	mov %rbx,%r13
117	adc 8+8(%rcx),%rbx
118        .byte 0x9f      # lahf
119	add $255,%r9b
120	sbb 8(%rcx),%r11
121	mov %r8,8(%rdi)
122	sbb 8+8(%rcx),%r13
123	mov %rbx,8+8(%rdi)
124	mov %r13,8+8(%rsi)
125	setc %r9b
126	mov %r11,8(%rsi)
127	.byte 0x9e      # sahf
128	mov $0,%rax
129	adc $0,%rax
130	add $255,%r9b
131	rcl $1,%rax
132	pop %r15
133	pop %r14
134	pop %r13
135	pop %r12
136	pop %rbp
137	pop %rbx
138	ret
139case1:  .byte 0x9e      # sahf
140	mov 16(%rdx),%r8
141	mov %r8,%r11
142	adc 16(%rcx),%r8
143        .byte 0x9f      # lahf
144	add $255,%r9b
145	sbb 16(%rcx),%r11
146	mov %r8,16(%rdi)
147	setc %r9b
148	mov %r11,16(%rsi)
149case0:  .byte 0x9e      # sahf
150	mov $0,%rax
151	adc $0,%rax
152	add $255,%r9b
153	rcl $1,%rax
154	pop %r15
155	pop %r14
156	pop %r13
157	pop %r12
158	pop %rbp
159	pop %rbx
160	ret
161EPILOGUE()
162