1dnl  mpn_hamdist
2
3dnl  Copyright 2010 The Code Cavern
4
5dnl  This file is part of the MPIR Library.
6
7dnl  The MPIR Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The MPIR Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write
19dnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20dnl  Boston, MA 02110-1301, USA.
21
22include(`../config.m4')
23
24C	ret mpn_hamdist(mp_ptr,mp_ptr,mp_size_t)
25C	rax               rdi,   rsi,      rdx
26
27ASM_START()
28PROLOGUE(mpn_hamdist)
29push %r12
30push %r14
31push %rbp
32mov $0x5555555555555555,%r8
33mov $0x3333333333333333,%r9
34mov $0x0f0f0f0f0f0f0f0f,%r10
35mov $0x0101010101010101,%r11
36xor %eax,%eax
37sub $3,%rdx
38jc skip
39	mov 16(%rdi,%rdx,8),%rcx
40	xor 16(%rsi,%rdx,8),%rcx
41	mov 8(%rdi,%rdx,8),%r12
42	xor 8(%rsi,%rdx,8),%r12
43	mov (%rdi,%rdx,8),%r14
44	xor (%rsi,%rdx,8),%r14
45sub $3,%rdx
46jc skiplp
47ALIGN(16)
48lp:	mov %rcx,%rbp
49	shr $1,%rcx
50	and %r8,%rcx
51	sub %rcx,%rbp
52	mov %rbp,%rcx
53	shr $2,%rbp
54	and %r9,%rcx
55	and %r9,%rbp
56	add %rbp,%rcx
57
58	mov %r12,%rbp
59	shr $1,%r12
60	and %r8,%r12
61	sub %r12,%rbp
62	mov %rbp,%r12
63	shr $2,%rbp
64	and %r9,%r12
65	and %r9,%rbp
66	add %r12,%rbp
67
68	mov %r14,%r12
69	shr $1,%r14
70	and %r8,%r14
71	sub %r14,%r12
72	mov %r12,%r14
73	shr $2,%r12
74	and %r9,%r14
75	and %r9,%r12
76	add %r14,%r12
77
78	add %rcx,%rbp
79	add %r12,%rbp
80		mov 16(%rdi,%rdx,8),%rcx
81	mov %rbp,%r14
82	shr $4,%rbp
83	and %r10,%r14
84		xor 16(%rsi,%rdx,8),%rcx
85		mov 8(%rdi,%rdx,8),%r12
86		xor 8(%rsi,%rdx,8),%r12
87	and %r10,%rbp
88	add %rbp,%r14
89	imul %r11,%r14
90	shr $56,%r14
91	add %r14,%rax
92		mov (%rdi,%rdx,8),%r14
93		xor (%rsi,%rdx,8),%r14
94	sub $3,%rdx
95	jnc lp
96skiplp:
97	mov %rcx,%rbp
98	shr $1,%rcx
99	and %r8,%rcx
100	sub %rcx,%rbp
101	mov %rbp,%rcx
102	shr $2,%rbp
103	and %r9,%rcx
104	and %r9,%rbp
105	add %rbp,%rcx
106
107	mov %r12,%rbp
108	shr $1,%r12
109	and %r8,%r12
110	sub %r12,%rbp
111	mov %rbp,%r12
112	shr $2,%rbp
113	and %r9,%r12
114	and %r9,%rbp
115	add %r12,%rbp
116
117	mov %r14,%r12
118	shr $1,%r14
119	and %r8,%r14
120	sub %r14,%r12
121	mov %r12,%r14
122	shr $2,%r12
123	and %r9,%r14
124	and %r9,%r12
125	add %r14,%r12
126
127	add %rcx,%rbp
128	add %r12,%rbp
129	mov %rbp,%r14
130	shr $4,%rbp
131	and %r10,%r14
132	and %r10,%rbp
133	add %rbp,%r14
134	imul %r11,%r14
135	shr $56,%r14
136	add %r14,%rax
137skip:
138	cmp $-2,%rdx
139	jl case0
140	jz case1
141case2:
142	mov 16(%rdi,%rdx,8),%rcx
143	xor 16(%rsi,%rdx,8),%rcx
144	mov %rcx,%rbp
145	shr $1,%rcx
146	and %r8,%rcx
147	sub %rcx,%rbp
148	mov %rbp,%rcx
149	shr $2,%rbp
150	and %r9,%rcx
151	and %r9,%rbp
152	add %rbp,%rcx
153
154	mov %rcx,%r14
155	shr $4,%rcx
156	and %r10,%r14
157	and %r10,%rcx
158	add %rcx,%r14
159	imul %r11,%r14
160	shr $56,%r14
161	add %r14,%rax
162	dec %rdx
163case1:
164	mov 16(%rdi,%rdx,8),%rcx
165	xor 16(%rsi,%rdx,8),%rcx
166	mov %rcx,%rbp
167	shr $1,%rcx
168	and %r8,%rcx
169	sub %rcx,%rbp
170	mov %rbp,%rcx
171	shr $2,%rbp
172	and %r9,%rcx
173	and %r9,%rbp
174	add %rbp,%rcx
175
176	mov %rcx,%r14
177	shr $4,%rcx
178	and %r10,%r14
179	and %r10,%rcx
180	add %rcx,%r14
181	imul %r11,%r14
182	shr $56,%r14
183	add %r14,%rax
184case0:	pop %rbp
185	pop %r14
186	pop %r12
187	ret
188EPILOGUE()
189