1dnl mpn_hamdist 2 3dnl Copyright 2010 The Code Cavern 4 5dnl This file is part of the MPIR Library. 6 7dnl The MPIR Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 2.1 of the License, or (at 10dnl your option) any later version. 11 12dnl The MPIR Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the MPIR Library; see the file COPYING.LIB. If not, write 19dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20dnl Boston, MA 02110-1301, USA. 21 22include(`../config.m4') 23 24C ret mpn_hamdist(mp_ptr,mp_ptr,mp_size_t) 25C rax rdi, rsi, rdx 26 27ASM_START() 28PROLOGUE(mpn_hamdist) 29push %r12 30push %r14 31push %rbp 32mov $0x5555555555555555,%r8 33mov $0x3333333333333333,%r9 34mov $0x0f0f0f0f0f0f0f0f,%r10 35mov $0x0101010101010101,%r11 36xor %eax,%eax 37sub $3,%rdx 38jc skip 39 mov 16(%rdi,%rdx,8),%rcx 40 xor 16(%rsi,%rdx,8),%rcx 41 mov 8(%rdi,%rdx,8),%r12 42 xor 8(%rsi,%rdx,8),%r12 43 mov (%rdi,%rdx,8),%r14 44 xor (%rsi,%rdx,8),%r14 45sub $3,%rdx 46jc skiplp 47ALIGN(16) 48lp: mov %rcx,%rbp 49 shr $1,%rcx 50 and %r8,%rcx 51 sub %rcx,%rbp 52 mov %rbp,%rcx 53 shr $2,%rbp 54 and %r9,%rcx 55 and %r9,%rbp 56 add %rbp,%rcx 57 58 mov %r12,%rbp 59 shr $1,%r12 60 and %r8,%r12 61 sub %r12,%rbp 62 mov %rbp,%r12 63 shr $2,%rbp 64 and %r9,%r12 65 and %r9,%rbp 66 add %r12,%rbp 67 68 mov %r14,%r12 69 shr $1,%r14 70 and %r8,%r14 71 sub %r14,%r12 72 mov %r12,%r14 73 shr $2,%r12 74 and %r9,%r14 75 and %r9,%r12 76 add %r14,%r12 77 78 add %rcx,%rbp 79 add %r12,%rbp 80 mov 16(%rdi,%rdx,8),%rcx 81 mov %rbp,%r14 82 shr $4,%rbp 83 and %r10,%r14 84 xor 16(%rsi,%rdx,8),%rcx 85 mov 8(%rdi,%rdx,8),%r12 86 xor 8(%rsi,%rdx,8),%r12 87 and %r10,%rbp 88 add %rbp,%r14 89 imul %r11,%r14 90 shr $56,%r14 91 add %r14,%rax 92 mov (%rdi,%rdx,8),%r14 93 xor (%rsi,%rdx,8),%r14 94 sub $3,%rdx 95 jnc lp 96skiplp: 97 mov %rcx,%rbp 98 shr $1,%rcx 99 and %r8,%rcx 100 sub %rcx,%rbp 101 mov %rbp,%rcx 102 shr $2,%rbp 103 and %r9,%rcx 104 and %r9,%rbp 105 add %rbp,%rcx 106 107 mov %r12,%rbp 108 shr $1,%r12 109 and %r8,%r12 110 sub %r12,%rbp 111 mov %rbp,%r12 112 shr $2,%rbp 113 and %r9,%r12 114 and %r9,%rbp 115 add %r12,%rbp 116 117 mov %r14,%r12 118 shr $1,%r14 119 and %r8,%r14 120 sub %r14,%r12 121 mov %r12,%r14 122 shr $2,%r12 123 and %r9,%r14 124 and %r9,%r12 125 add %r14,%r12 126 127 add %rcx,%rbp 128 add %r12,%rbp 129 mov %rbp,%r14 130 shr $4,%rbp 131 and %r10,%r14 132 and %r10,%rbp 133 add %rbp,%r14 134 imul %r11,%r14 135 shr $56,%r14 136 add %r14,%rax 137skip: 138 cmp $-2,%rdx 139 jl case0 140 jz case1 141case2: 142 mov 16(%rdi,%rdx,8),%rcx 143 xor 16(%rsi,%rdx,8),%rcx 144 mov %rcx,%rbp 145 shr $1,%rcx 146 and %r8,%rcx 147 sub %rcx,%rbp 148 mov %rbp,%rcx 149 shr $2,%rbp 150 and %r9,%rcx 151 and %r9,%rbp 152 add %rbp,%rcx 153 154 mov %rcx,%r14 155 shr $4,%rcx 156 and %r10,%r14 157 and %r10,%rcx 158 add %rcx,%r14 159 imul %r11,%r14 160 shr $56,%r14 161 add %r14,%rax 162 dec %rdx 163case1: 164 mov 16(%rdi,%rdx,8),%rcx 165 xor 16(%rsi,%rdx,8),%rcx 166 mov %rcx,%rbp 167 shr $1,%rcx 168 and %r8,%rcx 169 sub %rcx,%rbp 170 mov %rbp,%rcx 171 shr $2,%rbp 172 and %r9,%rcx 173 and %r9,%rbp 174 add %rbp,%rcx 175 176 mov %rcx,%r14 177 shr $4,%rcx 178 and %r10,%r14 179 and %r10,%rcx 180 add %rcx,%r14 181 imul %r11,%r14 182 shr $56,%r14 183 add %r14,%rax 184case0: pop %rbp 185 pop %r14 186 pop %r12 187 ret 188EPILOGUE() 189