1// +build amd64,!appengine,!gccgo 2 3// This is a translation of the gcc output of FloodyBerry's pure-C public 4// domain siphash implementation at https://github.com/floodyberry/siphash 5 6// This assembly code has been modified from the 64-bit output to the experiment 128-bit output. 7 8// SI = v0 9// AX = v1 10// CX = v2 11// DX = v3 12 13// func Hash128(k0, k1 uint64, b []byte) (r0 uint64, r1 uint64) 14TEXT ·Hash128(SB),4,$0-56 15 MOVQ k0+0(FP),CX 16 MOVQ $0x736F6D6570736575,R9 17 MOVQ k1+8(FP),DI 18 MOVQ $0x6C7967656E657261,BX 19 MOVQ $0x646F72616E646F6D,AX 20 MOVQ b_len+24(FP),DX 21 XORQ $0xEE,AX 22 MOVQ DX,R11 23 MOVQ DX,R10 24 XORQ CX,R9 25 XORQ CX,BX 26 MOVQ $0x7465646279746573,CX 27 XORQ DI,AX 28 XORQ DI,CX 29 SHLQ $0x38,R11 30 XORQ DI,DI 31 MOVQ b_base+16(FP),SI 32 ANDQ $0xFFFFFFFFFFFFFFF8,R10 33 JE afterLoop 34 XCHGQ AX,AX 35loopBody: 36 MOVQ 0(SI)(DI*1),R8 37 ADDQ AX,R9 38 RORQ $0x33,AX 39 XORQ R9,AX 40 RORQ $0x20,R9 41 ADDQ $0x8,DI 42 XORQ R8,CX 43 ADDQ CX,BX 44 RORQ $0x30,CX 45 XORQ BX,CX 46 ADDQ AX,BX 47 RORQ $0x2F,AX 48 ADDQ CX,R9 49 RORQ $0x2B,CX 50 XORQ BX,AX 51 XORQ R9,CX 52 RORQ $0x20,BX 53 ADDQ AX,R9 54 ADDQ CX,BX 55 RORQ $0x33,AX 56 RORQ $0x30,CX 57 XORQ R9,AX 58 XORQ BX,CX 59 RORQ $0x20,R9 60 ADDQ AX,BX 61 ADDQ CX,R9 62 RORQ $0x2F,AX 63 RORQ $0x2B,CX 64 XORQ BX,AX 65 RORQ $0x20,BX 66 XORQ R9,CX 67 XORQ R8,R9 68 CMPQ R10,DI 69 JA loopBody 70afterLoop: 71 SUBQ R10,DX 72 73 CMPQ DX,$0x7 74 JA afterSwitch 75 76 // no support for jump tables 77 78 CMPQ DX,$0x7 79 JE sw7 80 81 CMPQ DX,$0x6 82 JE sw6 83 84 CMPQ DX,$0x5 85 JE sw5 86 87 CMPQ DX,$0x4 88 JE sw4 89 90 CMPQ DX,$0x3 91 JE sw3 92 93 CMPQ DX,$0x2 94 JE sw2 95 96 CMPQ DX,$0x1 97 JE sw1 98 99 JMP afterSwitch 100 101sw7: MOVBQZX 6(SI)(DI*1),DX 102 SHLQ $0x30,DX 103 ORQ DX,R11 104sw6: MOVBQZX 0x5(SI)(DI*1),DX 105 SHLQ $0x28,DX 106 ORQ DX,R11 107sw5: MOVBQZX 0x4(SI)(DI*1),DX 108 SHLQ $0x20,DX 109 ORQ DX,R11 110sw4: MOVBQZX 0x3(SI)(DI*1),DX 111 SHLQ $0x18,DX 112 ORQ DX,R11 113sw3: MOVBQZX 0x2(SI)(DI*1),DX 114 SHLQ $0x10,DX 115 ORQ DX,R11 116sw2: MOVBQZX 0x1(SI)(DI*1),DX 117 SHLQ $0x8,DX 118 ORQ DX,R11 119sw1: MOVBQZX 0(SI)(DI*1),DX 120 ORQ DX,R11 121afterSwitch: 122 LEAQ (AX)(R9*1),SI 123 XORQ R11,CX 124 RORQ $0x33,AX 125 ADDQ CX,BX 126 MOVQ CX,DX 127 XORQ SI,AX 128 RORQ $0x30,DX 129 RORQ $0x20,SI 130 LEAQ 0(BX)(AX*1),CX 131 XORQ BX,DX 132 RORQ $0x2F,AX 133 ADDQ DX,SI 134 RORQ $0x2B,DX 135 XORQ CX,AX 136 XORQ SI,DX 137 RORQ $0x20,CX 138 ADDQ AX,SI 139 RORQ $0x33,AX 140 ADDQ DX,CX 141 XORQ SI,AX 142 RORQ $0x30,DX 143 RORQ $0x20,SI 144 XORQ CX,DX 145 ADDQ AX,CX 146 RORQ $0x2F,AX 147 ADDQ DX,SI 148 XORQ CX,AX 149 RORQ $0x2B,DX 150 RORQ $0x20,CX 151 XORQ SI,DX 152 XORQ R11,SI 153 XORB $0xEE,CL 154 ADDQ AX,SI 155 RORQ $0x33,AX 156 ADDQ DX,CX 157 RORQ $0x30,DX 158 XORQ SI,AX 159 XORQ CX,DX 160 RORQ $0x20,SI 161 ADDQ AX,CX 162 ADDQ DX,SI 163 RORQ $0x2F,AX 164 RORQ $0x2B,DX 165 XORQ CX,AX 166 XORQ SI,DX 167 RORQ $0x20,CX 168 ADDQ AX,SI 169 ADDQ DX,CX 170 RORQ $0x33,AX 171 RORQ $0x30,DX 172 XORQ SI,AX 173 RORQ $0x20,SI 174 XORQ CX,DX 175 ADDQ AX,CX 176 RORQ $0x2F,AX 177 ADDQ DX,SI 178 RORQ $0x2B,DX 179 XORQ CX,AX 180 XORQ SI,DX 181 RORQ $0x20,CX 182 ADDQ AX,SI 183 ADDQ DX,CX 184 RORQ $0x33,AX 185 RORQ $0x30,DX 186 XORQ CX,DX 187 XORQ SI,AX 188 RORQ $0x20,SI 189 ADDQ DX,SI 190 ADDQ AX,CX 191 RORQ $0x2F,AX 192 XORQ CX,AX 193 RORQ $0x2B,DX 194 RORQ $0x20,CX 195 XORQ SI,DX 196 197 // gcc optimized the tail end of this function differently. However, 198 // we need to preserve out registers to carry out the second stage of 199 // the finalization. This is a duplicate of an earlier finalization 200 // round. 201 202 ADDQ AX,SI 203 RORQ $0x33,AX 204 ADDQ DX,CX 205 RORQ $0x30,DX 206 XORQ SI,AX 207 XORQ CX,DX 208 RORQ $0x20,SI 209 ADDQ AX,CX 210 ADDQ DX,SI 211 RORQ $0x2F,AX 212 RORQ $0x2B,DX 213 XORQ CX,AX 214 XORQ SI,DX 215 RORQ $0x20,CX 216 217 // Stuff the result into BX instead of AX as gcc had done 218 219 MOVQ SI,BX 220 XORQ AX,BX 221 XORQ DX,BX 222 XORQ CX,BX 223 MOVQ BX,ret+40(FP) 224 225 // Start the second finalization round 226 227 XORB $0xDD,AL 228 ADDQ AX,SI 229 RORQ $0x33,AX 230 ADDQ DX,CX 231 RORQ $0x30,DX 232 XORQ SI,AX 233 XORQ CX,DX 234 RORQ $0x20,SI 235 ADDQ AX,CX 236 ADDQ DX,SI 237 RORQ $0x2F,AX 238 RORQ $0x2B,DX 239 XORQ CX,AX 240 XORQ SI,DX 241 RORQ $0x20,CX 242 ADDQ AX,SI 243 ADDQ DX,CX 244 RORQ $0x33,AX 245 RORQ $0x30,DX 246 XORQ SI,AX 247 RORQ $0x20,SI 248 XORQ CX,DX 249 ADDQ AX,CX 250 RORQ $0x2F,AX 251 ADDQ DX,SI 252 RORQ $0x2B,DX 253 XORQ CX,AX 254 XORQ SI,DX 255 RORQ $0x20,CX 256 ADDQ AX,SI 257 ADDQ DX,CX 258 RORQ $0x33,AX 259 RORQ $0x30,DX 260 XORQ CX,DX 261 XORQ SI,AX 262 RORQ $0x20,SI 263 ADDQ DX,SI 264 ADDQ AX,CX 265 RORQ $0x2F,AX 266 XORQ CX,AX 267 RORQ $0x2B,DX 268 RORQ $0x20,CX 269 XORQ SI,DX 270 271 ADDQ AX,SI 272 RORQ $0x33,AX 273 ADDQ DX,CX 274 RORQ $0x30,DX 275 XORQ SI,AX 276 XORQ CX,DX 277 RORQ $0x20,SI 278 ADDQ AX,CX 279 ADDQ DX,SI 280 RORQ $0x2F,AX 281 RORQ $0x2B,DX 282 XORQ CX,AX 283 XORQ SI,DX 284 RORQ $0x20,CX 285 286 MOVQ SI,BX 287 XORQ AX,BX 288 XORQ DX,BX 289 XORQ CX,BX 290 MOVQ BX,ret1+48(FP) 291 292 RET 293