1dnl  mpn_sumdiff
2
3dnl  Copyright 2011 The Code Cavern
4
5dnl  This file is part of the MPIR Library.
6
7dnl  The MPIR Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The MPIR Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write
19dnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20dnl  Boston, MA 02110-1301, USA.
21
22include(`../config.m4')
23
24ASM_START()
25PROLOGUE(mpn_sumdiff_n)
26push %rbx
27xor %r9,%r9
28push %rbp
29xor %rax,%rax
30push %r12
31mov $3,%r10d
32push %r13
33lea -24(%rdi,%r8,8),%rdi
34push %r14
35lea -24(%rsi,%r8,8),%rsi
36push %r15
37sub %r8,%r10
38lea -24(%rdx,%r8,8),%rdx
39lea -24(%rcx,%r8,8),%rcx
40jnc skiplp
41.align 16
42lp:     .byte 0x9e      # sahf
43	mov (%rdx,%r10,8),%r8
44	mov 24(%rdx,%r10,8),%r12
45	mov %r8,%r11
46	adc (%rcx,%r10,8),%r8
47	mov 8(%rdx,%r10,8),%rbx
48	mov %rbx,%r13
49	adc 8(%rcx,%r10,8),%rbx
50	mov 16(%rdx,%r10,8),%rbp
51	mov %rbp,%r14
52	adc 16(%rcx,%r10,8),%rbp
53	mov %r12,%r15
54	adc 24(%rcx,%r10,8),%r12
55        .byte 0x9f      # lahf
56	add $255,%r9b
57	sbb (%rcx,%r10,8),%r11
58	mov %r11,(%rsi,%r10,8)
59	sbb 8(%rcx,%r10,8),%r13
60	sbb 16(%rcx,%r10,8),%r14
61	sbb 24(%rcx,%r10,8),%r15
62	setc %r9b
63	add $4,%r10
64	mov %r8,-32(%rdi,%r10,8)
65	mov %rbp,16-32(%rdi,%r10,8)
66	mov %r13,8-32(%rsi,%r10,8)
67	mov %r15,24-32(%rsi,%r10,8)
68	mov %r12,24-32(%rdi,%r10,8)
69	mov %r14,16-32(%rsi,%r10,8)
70	mov %rbx,8-32(%rdi,%r10,8)
71	jnc lp
72skiplp:
73cmp $2,%r10
74jg case0
75je case1
76jp case2
77case3:  .byte 0x9e      # sahf
78	mov (%rdx),%r8
79	mov %r8,%r11
80	adc (%rcx),%r8
81	mov 8(%rdx),%rbx
82	mov %rbx,%r13
83	adc 8(%rcx),%rbx
84	mov 16(%rdx),%rbp
85	mov %rbp,%r14
86	adc 16(%rcx),%rbp
87        .byte 0x9f      # lahf
88	add $255,%r9b
89	sbb (%rcx),%r11
90	mov %r11,(%rsi)
91	sbb 8(%rcx),%r13
92	sbb 16(%rcx),%r14
93	setc %r9b
94	mov %r8,(%rdi)
95	mov %rbp,16(%rdi)
96	mov %r13,8(%rsi)
97	mov %r14,16(%rsi)
98	mov %rbx,8(%rdi)
99        .byte 0x9e      # sahf
100	mov $0,%rax
101	adc $0,%rax
102	add $255,%r9b
103	rcl $1,%rax
104	pop %r15
105	pop %r14
106	pop %r13
107	pop %r12
108	pop %rbp
109	pop %rbx
110	ret
111case2:  .byte 0x9e      # sahf
112	mov 8(%rdx),%r8
113	mov %r8,%r11
114	adc 8(%rcx),%r8
115	mov 16(%rdx),%rbx
116	mov %rbx,%r13
117	adc 16(%rcx),%rbx
118        .byte 0x9f      # lahf
119	add $255,%r9b
120	sbb 8(%rcx),%r11
121	mov %r11,8(%rsi)
122	sbb 16(%rcx),%r13
123	setc %r9b
124	mov %r8,8(%rdi)
125	mov %r13,16(%rsi)
126	mov %rbx,16(%rdi)
127        .byte 0x9e      # sahf
128	mov $0,%rax
129	adc $0,%rax
130	add $255,%r9b
131	rcl $1,%rax
132	pop %r15
133	pop %r14
134	pop %r13
135	pop %r12
136	pop %rbp
137	pop %rbx
138	ret
139case1:  .byte 0x9e      # sahf
140	mov 16(%rdx),%r8
141	mov %r8,%r11
142	adc 16(%rcx),%r8
143        .byte 0x9f      # lahf
144	add $255,%r9b
145	sbb 16(%rcx),%r11
146	mov %r11,16(%rsi)
147	setc %r9b
148	mov %r8,16(%rdi)
149case0:  .byte 0x9e      # sahf
150	mov $0,%rax
151	adc $0,%rax
152	add $255,%r9b
153	rcl $1,%rax
154	pop %r15
155	pop %r14
156	pop %r13
157	pop %r12
158	pop %rbp
159	pop %rbx
160	ret
161EPILOGUE()
162