1/ This Source Code Form is subject to the terms of the Mozilla Public 2/ License, v. 2.0. If a copy of the MPL was not distributed with this 3/ file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5/ ** ARCFOUR implementation optimized for AMD64. 6/ ** 7/ ** The throughput achieved by this code is about 320 MBytes/sec, on 8/ ** a 1.8 GHz AMD Opteron (rev C0) processor. 9 10.text 11.align 16 12.globl ARCFOUR 13.type ARCFOUR,@function 14ARCFOUR: 15 pushq %rbp 16 pushq %rbx 17 movq %rdi, %rbp / key = ARG(key) 18 movq %rsi, %rbx / rbx = ARG(len) 19 movq %rdx, %rsi / in = ARG(in) 20 movq %rcx, %rdi / out = ARG(out) 21 movq (%rbp), %rcx / x = key->x 22 movq 8(%rbp), %rdx / y = key->y 23 addq $16, %rbp / d = key->data 24 incq %rcx / x++ 25 andq $255, %rcx / x &= 0xff 26 leaq -8(%rbx,%rsi), %rbx / rbx = in+len-8 27 movq %rbx, %r9 / tmp = in+len-8 28 movq 0(%rbp,%rcx,8), %rax / tx = d[x] 29 cmpq %rsi, %rbx / cmp in with in+len-8 30 jl .Lend / jump if (in+len-8 < in) 31 32.Lstart: 33 addq $8, %rsi / increment in 34 addq $8, %rdi / increment out 35 36 / generate the next 8 bytes of the rc4 stream into %r8 37 movq $8, %r11 / byte counter 381: addb %al, %dl / y += tx 39 movl 0(%rbp,%rdx,8), %ebx / ty = d[y] 40 movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty 41 addb %al, %bl / val = ty + tx 42 movl %eax, 0(%rbp,%rdx,8) / d[y] = tx 43 incb %cl / x++ (NEXT ROUND) 44 movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND) 45 movb 0(%rbp,%rbx,8), %r8b / val = d[val] 46 decb %r11b 47 rorq $8, %r8 / (ror does not change ZF) 48 jnz 1b 49 50 / xor 8 bytes 51 xorq -8(%rsi), %r8 52 cmpq %r9, %rsi / cmp in+len-8 with in 53 movq %r8, -8(%rdi) 54 jle .Lstart / jump if (in <= in+len-8) 55 56.Lend: 57 addq $8, %r9 / tmp = in+len 58 59 / handle the last bytes, one by one 601: cmpq %rsi, %r9 / cmp in with in+len 61 jle .Lfinished / jump if (in+len <= in) 62 addb %al, %dl / y += tx 63 movl 0(%rbp,%rdx,8), %ebx / ty = d[y] 64 movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty 65 addb %al, %bl / val = ty + tx 66 movl %eax, 0(%rbp,%rdx,8) / d[y] = tx 67 incb %cl / x++ (NEXT ROUND) 68 movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND) 69 movb 0(%rbp,%rbx,8), %r8b / val = d[val] 70 xorb (%rsi), %r8b / xor 1 byte 71 movb %r8b, (%rdi) 72 incq %rsi / in++ 73 incq %rdi / out++ 74 jmp 1b 75 76.Lfinished: 77 decq %rcx / x-- 78 movb %dl, -8(%rbp) / key->y = y 79 movb %cl, -16(%rbp) / key->x = x 80 popq %rbx 81 popq %rbp 82 ret 83.L_ARCFOUR_end: 84.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR 85