10ac341f1SConrad Meyer#ifdef HAVE_AMD64_ASM 20ac341f1SConrad Meyer 30ac341f1SConrad Meyer.text 40ac341f1SConrad Meyer.p2align 5 50ac341f1SConrad Meyer 60ac341f1SConrad Meyer#ifdef ASM_HIDE_SYMBOL 70ac341f1SConrad MeyerASM_HIDE_SYMBOL stream_salsa20_xmm6 80ac341f1SConrad MeyerASM_HIDE_SYMBOL _stream_salsa20_xmm6 90ac341f1SConrad Meyer#endif 100ac341f1SConrad Meyer.globl stream_salsa20_xmm6 110ac341f1SConrad Meyer.globl _stream_salsa20_xmm6 120ac341f1SConrad Meyer#ifdef __ELF__ 130ac341f1SConrad Meyer.type stream_salsa20_xmm6, @function 140ac341f1SConrad Meyer.type _stream_salsa20_xmm6, @function 150ac341f1SConrad Meyer#endif 160ac341f1SConrad Meyerstream_salsa20_xmm6: 170ac341f1SConrad Meyer_stream_salsa20_xmm6: 180ac341f1SConrad Meyermov %rsp,%r11 190ac341f1SConrad Meyerand $31,%r11 200ac341f1SConrad Meyeradd $512,%r11 210ac341f1SConrad Meyersub %r11,%rsp 220ac341f1SConrad Meyermovq %r11,416(%rsp) 230ac341f1SConrad Meyermovq %r12,424(%rsp) 240ac341f1SConrad Meyermovq %r13,432(%rsp) 250ac341f1SConrad Meyermovq %r14,440(%rsp) 260ac341f1SConrad Meyermovq %r15,448(%rsp) 270ac341f1SConrad Meyermovq %rbx,456(%rsp) 280ac341f1SConrad Meyermovq %rbp,464(%rsp) 290ac341f1SConrad Meyermov %rsi,%r9 300ac341f1SConrad Meyermov %rdi,%rdi 310ac341f1SConrad Meyermov %rdi,%rsi 320ac341f1SConrad Meyermov %rdx,%rdx 330ac341f1SConrad Meyermov %rcx,%r10 340ac341f1SConrad Meyercmp $0,%r9 350ac341f1SConrad Meyerjbe ._done 360ac341f1SConrad Meyermov $0,%rax 370ac341f1SConrad Meyermov %r9,%rcx 380ac341f1SConrad Meyerrep stosb 390ac341f1SConrad Meyersub %r9,%rdi 400ac341f1SConrad Meyermovq $0,472(%rsp) 410ac341f1SConrad Meyerjmp ._start 420ac341f1SConrad Meyer 430ac341f1SConrad Meyer.text 440ac341f1SConrad Meyer.p2align 5 450ac341f1SConrad Meyer 460ac341f1SConrad Meyer#ifdef ASM_HIDE_SYMBOL 470ac341f1SConrad MeyerASM_HIDE_SYMBOL stream_salsa20_xmm6_xor_ic 480ac341f1SConrad MeyerASM_HIDE_SYMBOL _stream_salsa20_xmm6_xor_ic 490ac341f1SConrad Meyer#endif 500ac341f1SConrad Meyer.globl stream_salsa20_xmm6_xor_ic 510ac341f1SConrad Meyer.globl _stream_salsa20_xmm6_xor_ic 520ac341f1SConrad Meyer#ifdef __ELF__ 530ac341f1SConrad Meyer.type stream_salsa20_xmm6_xor_ic, @function 540ac341f1SConrad Meyer.type _stream_salsa20_xmm6_xor_ic, @function 550ac341f1SConrad Meyer#endif 560ac341f1SConrad Meyerstream_salsa20_xmm6_xor_ic: 570ac341f1SConrad Meyer_stream_salsa20_xmm6_xor_ic: 580ac341f1SConrad Meyer 590ac341f1SConrad Meyermov %rsp,%r11 600ac341f1SConrad Meyerand $31,%r11 610ac341f1SConrad Meyeradd $512,%r11 620ac341f1SConrad Meyersub %r11,%rsp 630ac341f1SConrad Meyermovq %r11,416(%rsp) 640ac341f1SConrad Meyermovq %r12,424(%rsp) 650ac341f1SConrad Meyermovq %r13,432(%rsp) 660ac341f1SConrad Meyermovq %r14,440(%rsp) 670ac341f1SConrad Meyermovq %r15,448(%rsp) 680ac341f1SConrad Meyermovq %rbx,456(%rsp) 690ac341f1SConrad Meyermovq %rbp,464(%rsp) 700ac341f1SConrad Meyermov %rdi,%rdi 710ac341f1SConrad Meyermov %rsi,%rsi 720ac341f1SConrad Meyermov %r9,%r10 730ac341f1SConrad Meyermovq %r8,472(%rsp) 740ac341f1SConrad Meyermov %rdx,%r9 750ac341f1SConrad Meyermov %rcx,%rdx 760ac341f1SConrad Meyercmp $0,%r9 770ac341f1SConrad Meyerjbe ._done 780ac341f1SConrad Meyer 790ac341f1SConrad Meyer._start: 800ac341f1SConrad Meyermovl 20(%r10),%ecx 810ac341f1SConrad Meyermovl 0(%r10),%r8d 820ac341f1SConrad Meyermovl 0(%rdx),%eax 830ac341f1SConrad Meyermovl 16(%r10),%r11d 840ac341f1SConrad Meyermovl %ecx,64(%rsp) 850ac341f1SConrad Meyermovl %r8d,4+64(%rsp) 860ac341f1SConrad Meyermovl %eax,8+64(%rsp) 870ac341f1SConrad Meyermovl %r11d,12+64(%rsp) 880ac341f1SConrad Meyermovl 24(%r10),%r8d 890ac341f1SConrad Meyermovl 4(%r10),%eax 900ac341f1SConrad Meyermovl 4(%rdx),%edx 910ac341f1SConrad Meyermovq 472(%rsp),%rcx 920ac341f1SConrad Meyermovl %ecx,80(%rsp) 930ac341f1SConrad Meyermovl %r8d,4+80(%rsp) 940ac341f1SConrad Meyermovl %eax,8+80(%rsp) 950ac341f1SConrad Meyermovl %edx,12+80(%rsp) 960ac341f1SConrad Meyermovl 12(%r10),%edx 970ac341f1SConrad Meyershr $32,%rcx 980ac341f1SConrad Meyermovl 28(%r10),%r8d 990ac341f1SConrad Meyermovl 8(%r10),%eax 1000ac341f1SConrad Meyermovl %edx,96(%rsp) 1010ac341f1SConrad Meyermovl %ecx,4+96(%rsp) 1020ac341f1SConrad Meyermovl %r8d,8+96(%rsp) 1030ac341f1SConrad Meyermovl %eax,12+96(%rsp) 1040ac341f1SConrad Meyermov $1634760805,%rdx 1050ac341f1SConrad Meyermov $857760878,%rcx 1060ac341f1SConrad Meyermov $2036477234,%r8 1070ac341f1SConrad Meyermov $1797285236,%rax 1080ac341f1SConrad Meyermovl %edx,112(%rsp) 1090ac341f1SConrad Meyermovl %ecx,4+112(%rsp) 1100ac341f1SConrad Meyermovl %r8d,8+112(%rsp) 1110ac341f1SConrad Meyermovl %eax,12+112(%rsp) 1120ac341f1SConrad Meyercmp $256,%r9 1130ac341f1SConrad Meyerjb ._bytesbetween1and255 1140ac341f1SConrad Meyermovdqa 112(%rsp),%xmm0 1150ac341f1SConrad Meyerpshufd $0x55,%xmm0,%xmm1 1160ac341f1SConrad Meyerpshufd $0xaa,%xmm0,%xmm2 1170ac341f1SConrad Meyerpshufd $0xff,%xmm0,%xmm3 1180ac341f1SConrad Meyerpshufd $0x00,%xmm0,%xmm0 1190ac341f1SConrad Meyermovdqa %xmm1,128(%rsp) 1200ac341f1SConrad Meyermovdqa %xmm2,144(%rsp) 1210ac341f1SConrad Meyermovdqa %xmm3,160(%rsp) 1220ac341f1SConrad Meyermovdqa %xmm0,176(%rsp) 1230ac341f1SConrad Meyermovdqa 64(%rsp),%xmm0 1240ac341f1SConrad Meyerpshufd $0xaa,%xmm0,%xmm1 1250ac341f1SConrad Meyerpshufd $0xff,%xmm0,%xmm2 1260ac341f1SConrad Meyerpshufd $0x00,%xmm0,%xmm3 1270ac341f1SConrad Meyerpshufd $0x55,%xmm0,%xmm0 1280ac341f1SConrad Meyermovdqa %xmm1,192(%rsp) 1290ac341f1SConrad Meyermovdqa %xmm2,208(%rsp) 1300ac341f1SConrad Meyermovdqa %xmm3,224(%rsp) 1310ac341f1SConrad Meyermovdqa %xmm0,240(%rsp) 1320ac341f1SConrad Meyermovdqa 80(%rsp),%xmm0 1330ac341f1SConrad Meyerpshufd $0xff,%xmm0,%xmm1 1340ac341f1SConrad Meyerpshufd $0x55,%xmm0,%xmm2 1350ac341f1SConrad Meyerpshufd $0xaa,%xmm0,%xmm0 1360ac341f1SConrad Meyermovdqa %xmm1,256(%rsp) 1370ac341f1SConrad Meyermovdqa %xmm2,272(%rsp) 1380ac341f1SConrad Meyermovdqa %xmm0,288(%rsp) 1390ac341f1SConrad Meyermovdqa 96(%rsp),%xmm0 1400ac341f1SConrad Meyerpshufd $0x00,%xmm0,%xmm1 1410ac341f1SConrad Meyerpshufd $0xaa,%xmm0,%xmm2 1420ac341f1SConrad Meyerpshufd $0xff,%xmm0,%xmm0 1430ac341f1SConrad Meyermovdqa %xmm1,304(%rsp) 1440ac341f1SConrad Meyermovdqa %xmm2,320(%rsp) 1450ac341f1SConrad Meyermovdqa %xmm0,336(%rsp) 1460ac341f1SConrad Meyer 1470ac341f1SConrad Meyer.p2align 4 1480ac341f1SConrad Meyer._bytesatleast256: 1490ac341f1SConrad Meyermovq 472(%rsp),%rdx 1500ac341f1SConrad Meyermov %rdx,%rcx 1510ac341f1SConrad Meyershr $32,%rcx 1520ac341f1SConrad Meyermovl %edx,352(%rsp) 1530ac341f1SConrad Meyermovl %ecx,368(%rsp) 1540ac341f1SConrad Meyeradd $1,%rdx 1550ac341f1SConrad Meyermov %rdx,%rcx 1560ac341f1SConrad Meyershr $32,%rcx 1570ac341f1SConrad Meyermovl %edx,4+352(%rsp) 1580ac341f1SConrad Meyermovl %ecx,4+368(%rsp) 1590ac341f1SConrad Meyeradd $1,%rdx 1600ac341f1SConrad Meyermov %rdx,%rcx 1610ac341f1SConrad Meyershr $32,%rcx 1620ac341f1SConrad Meyermovl %edx,8+352(%rsp) 1630ac341f1SConrad Meyermovl %ecx,8+368(%rsp) 1640ac341f1SConrad Meyeradd $1,%rdx 1650ac341f1SConrad Meyermov %rdx,%rcx 1660ac341f1SConrad Meyershr $32,%rcx 1670ac341f1SConrad Meyermovl %edx,12+352(%rsp) 1680ac341f1SConrad Meyermovl %ecx,12+368(%rsp) 1690ac341f1SConrad Meyeradd $1,%rdx 1700ac341f1SConrad Meyermov %rdx,%rcx 1710ac341f1SConrad Meyershr $32,%rcx 1720ac341f1SConrad Meyermovl %edx,80(%rsp) 1730ac341f1SConrad Meyermovl %ecx,4+96(%rsp) 1740ac341f1SConrad Meyermovq %rdx,472(%rsp) 1750ac341f1SConrad Meyermovq %r9,480(%rsp) 1760ac341f1SConrad Meyermov $20,%rdx 1770ac341f1SConrad Meyermovdqa 128(%rsp),%xmm0 1780ac341f1SConrad Meyermovdqa 144(%rsp),%xmm1 1790ac341f1SConrad Meyermovdqa 160(%rsp),%xmm2 1800ac341f1SConrad Meyermovdqa 320(%rsp),%xmm3 1810ac341f1SConrad Meyermovdqa 336(%rsp),%xmm4 1820ac341f1SConrad Meyermovdqa 192(%rsp),%xmm5 1830ac341f1SConrad Meyermovdqa 208(%rsp),%xmm6 1840ac341f1SConrad Meyermovdqa 240(%rsp),%xmm7 1850ac341f1SConrad Meyermovdqa 256(%rsp),%xmm8 1860ac341f1SConrad Meyermovdqa 272(%rsp),%xmm9 1870ac341f1SConrad Meyermovdqa 288(%rsp),%xmm10 1880ac341f1SConrad Meyermovdqa 368(%rsp),%xmm11 1890ac341f1SConrad Meyermovdqa 176(%rsp),%xmm12 1900ac341f1SConrad Meyermovdqa 224(%rsp),%xmm13 1910ac341f1SConrad Meyermovdqa 304(%rsp),%xmm14 1920ac341f1SConrad Meyermovdqa 352(%rsp),%xmm15 1930ac341f1SConrad Meyer 1940ac341f1SConrad Meyer.p2align 4 1950ac341f1SConrad Meyer._mainloop1: 1960ac341f1SConrad Meyermovdqa %xmm1,384(%rsp) 1970ac341f1SConrad Meyermovdqa %xmm2,400(%rsp) 1980ac341f1SConrad Meyermovdqa %xmm13,%xmm1 1990ac341f1SConrad Meyerpaddd %xmm12,%xmm1 2000ac341f1SConrad Meyermovdqa %xmm1,%xmm2 2010ac341f1SConrad Meyerpslld $7,%xmm1 2020ac341f1SConrad Meyerpxor %xmm1,%xmm14 2030ac341f1SConrad Meyerpsrld $25,%xmm2 2040ac341f1SConrad Meyerpxor %xmm2,%xmm14 2050ac341f1SConrad Meyermovdqa %xmm7,%xmm1 2060ac341f1SConrad Meyerpaddd %xmm0,%xmm1 2070ac341f1SConrad Meyermovdqa %xmm1,%xmm2 2080ac341f1SConrad Meyerpslld $7,%xmm1 2090ac341f1SConrad Meyerpxor %xmm1,%xmm11 2100ac341f1SConrad Meyerpsrld $25,%xmm2 2110ac341f1SConrad Meyerpxor %xmm2,%xmm11 2120ac341f1SConrad Meyermovdqa %xmm12,%xmm1 2130ac341f1SConrad Meyerpaddd %xmm14,%xmm1 2140ac341f1SConrad Meyermovdqa %xmm1,%xmm2 2150ac341f1SConrad Meyerpslld $9,%xmm1 2160ac341f1SConrad Meyerpxor %xmm1,%xmm15 2170ac341f1SConrad Meyerpsrld $23,%xmm2 2180ac341f1SConrad Meyerpxor %xmm2,%xmm15 2190ac341f1SConrad Meyermovdqa %xmm0,%xmm1 2200ac341f1SConrad Meyerpaddd %xmm11,%xmm1 2210ac341f1SConrad Meyermovdqa %xmm1,%xmm2 2220ac341f1SConrad Meyerpslld $9,%xmm1 2230ac341f1SConrad Meyerpxor %xmm1,%xmm9 2240ac341f1SConrad Meyerpsrld $23,%xmm2 2250ac341f1SConrad Meyerpxor %xmm2,%xmm9 2260ac341f1SConrad Meyermovdqa %xmm14,%xmm1 2270ac341f1SConrad Meyerpaddd %xmm15,%xmm1 2280ac341f1SConrad Meyermovdqa %xmm1,%xmm2 2290ac341f1SConrad Meyerpslld $13,%xmm1 2300ac341f1SConrad Meyerpxor %xmm1,%xmm13 2310ac341f1SConrad Meyerpsrld $19,%xmm2 2320ac341f1SConrad Meyerpxor %xmm2,%xmm13 2330ac341f1SConrad Meyermovdqa %xmm11,%xmm1 2340ac341f1SConrad Meyerpaddd %xmm9,%xmm1 2350ac341f1SConrad Meyermovdqa %xmm1,%xmm2 2360ac341f1SConrad Meyerpslld $13,%xmm1 2370ac341f1SConrad Meyerpxor %xmm1,%xmm7 2380ac341f1SConrad Meyerpsrld $19,%xmm2 2390ac341f1SConrad Meyerpxor %xmm2,%xmm7 2400ac341f1SConrad Meyermovdqa %xmm15,%xmm1 2410ac341f1SConrad Meyerpaddd %xmm13,%xmm1 2420ac341f1SConrad Meyermovdqa %xmm1,%xmm2 2430ac341f1SConrad Meyerpslld $18,%xmm1 2440ac341f1SConrad Meyerpxor %xmm1,%xmm12 2450ac341f1SConrad Meyerpsrld $14,%xmm2 2460ac341f1SConrad Meyerpxor %xmm2,%xmm12 2470ac341f1SConrad Meyermovdqa 384(%rsp),%xmm1 2480ac341f1SConrad Meyermovdqa %xmm12,384(%rsp) 2490ac341f1SConrad Meyermovdqa %xmm9,%xmm2 2500ac341f1SConrad Meyerpaddd %xmm7,%xmm2 2510ac341f1SConrad Meyermovdqa %xmm2,%xmm12 2520ac341f1SConrad Meyerpslld $18,%xmm2 2530ac341f1SConrad Meyerpxor %xmm2,%xmm0 2540ac341f1SConrad Meyerpsrld $14,%xmm12 2550ac341f1SConrad Meyerpxor %xmm12,%xmm0 2560ac341f1SConrad Meyermovdqa %xmm5,%xmm2 2570ac341f1SConrad Meyerpaddd %xmm1,%xmm2 2580ac341f1SConrad Meyermovdqa %xmm2,%xmm12 2590ac341f1SConrad Meyerpslld $7,%xmm2 2600ac341f1SConrad Meyerpxor %xmm2,%xmm3 2610ac341f1SConrad Meyerpsrld $25,%xmm12 2620ac341f1SConrad Meyerpxor %xmm12,%xmm3 2630ac341f1SConrad Meyermovdqa 400(%rsp),%xmm2 2640ac341f1SConrad Meyermovdqa %xmm0,400(%rsp) 2650ac341f1SConrad Meyermovdqa %xmm6,%xmm0 2660ac341f1SConrad Meyerpaddd %xmm2,%xmm0 2670ac341f1SConrad Meyermovdqa %xmm0,%xmm12 2680ac341f1SConrad Meyerpslld $7,%xmm0 2690ac341f1SConrad Meyerpxor %xmm0,%xmm4 2700ac341f1SConrad Meyerpsrld $25,%xmm12 2710ac341f1SConrad Meyerpxor %xmm12,%xmm4 2720ac341f1SConrad Meyermovdqa %xmm1,%xmm0 2730ac341f1SConrad Meyerpaddd %xmm3,%xmm0 2740ac341f1SConrad Meyermovdqa %xmm0,%xmm12 2750ac341f1SConrad Meyerpslld $9,%xmm0 2760ac341f1SConrad Meyerpxor %xmm0,%xmm10 2770ac341f1SConrad Meyerpsrld $23,%xmm12 2780ac341f1SConrad Meyerpxor %xmm12,%xmm10 2790ac341f1SConrad Meyermovdqa %xmm2,%xmm0 2800ac341f1SConrad Meyerpaddd %xmm4,%xmm0 2810ac341f1SConrad Meyermovdqa %xmm0,%xmm12 2820ac341f1SConrad Meyerpslld $9,%xmm0 2830ac341f1SConrad Meyerpxor %xmm0,%xmm8 2840ac341f1SConrad Meyerpsrld $23,%xmm12 2850ac341f1SConrad Meyerpxor %xmm12,%xmm8 2860ac341f1SConrad Meyermovdqa %xmm3,%xmm0 2870ac341f1SConrad Meyerpaddd %xmm10,%xmm0 2880ac341f1SConrad Meyermovdqa %xmm0,%xmm12 2890ac341f1SConrad Meyerpslld $13,%xmm0 2900ac341f1SConrad Meyerpxor %xmm0,%xmm5 2910ac341f1SConrad Meyerpsrld $19,%xmm12 2920ac341f1SConrad Meyerpxor %xmm12,%xmm5 2930ac341f1SConrad Meyermovdqa %xmm4,%xmm0 2940ac341f1SConrad Meyerpaddd %xmm8,%xmm0 2950ac341f1SConrad Meyermovdqa %xmm0,%xmm12 2960ac341f1SConrad Meyerpslld $13,%xmm0 2970ac341f1SConrad Meyerpxor %xmm0,%xmm6 2980ac341f1SConrad Meyerpsrld $19,%xmm12 2990ac341f1SConrad Meyerpxor %xmm12,%xmm6 3000ac341f1SConrad Meyermovdqa %xmm10,%xmm0 3010ac341f1SConrad Meyerpaddd %xmm5,%xmm0 3020ac341f1SConrad Meyermovdqa %xmm0,%xmm12 3030ac341f1SConrad Meyerpslld $18,%xmm0 3040ac341f1SConrad Meyerpxor %xmm0,%xmm1 3050ac341f1SConrad Meyerpsrld $14,%xmm12 3060ac341f1SConrad Meyerpxor %xmm12,%xmm1 3070ac341f1SConrad Meyermovdqa 384(%rsp),%xmm0 3080ac341f1SConrad Meyermovdqa %xmm1,384(%rsp) 3090ac341f1SConrad Meyermovdqa %xmm4,%xmm1 3100ac341f1SConrad Meyerpaddd %xmm0,%xmm1 3110ac341f1SConrad Meyermovdqa %xmm1,%xmm12 3120ac341f1SConrad Meyerpslld $7,%xmm1 3130ac341f1SConrad Meyerpxor %xmm1,%xmm7 3140ac341f1SConrad Meyerpsrld $25,%xmm12 3150ac341f1SConrad Meyerpxor %xmm12,%xmm7 3160ac341f1SConrad Meyermovdqa %xmm8,%xmm1 3170ac341f1SConrad Meyerpaddd %xmm6,%xmm1 3180ac341f1SConrad Meyermovdqa %xmm1,%xmm12 3190ac341f1SConrad Meyerpslld $18,%xmm1 3200ac341f1SConrad Meyerpxor %xmm1,%xmm2 3210ac341f1SConrad Meyerpsrld $14,%xmm12 3220ac341f1SConrad Meyerpxor %xmm12,%xmm2 3230ac341f1SConrad Meyermovdqa 400(%rsp),%xmm12 3240ac341f1SConrad Meyermovdqa %xmm2,400(%rsp) 3250ac341f1SConrad Meyermovdqa %xmm14,%xmm1 3260ac341f1SConrad Meyerpaddd %xmm12,%xmm1 3270ac341f1SConrad Meyermovdqa %xmm1,%xmm2 3280ac341f1SConrad Meyerpslld $7,%xmm1 3290ac341f1SConrad Meyerpxor %xmm1,%xmm5 3300ac341f1SConrad Meyerpsrld $25,%xmm2 3310ac341f1SConrad Meyerpxor %xmm2,%xmm5 3320ac341f1SConrad Meyermovdqa %xmm0,%xmm1 3330ac341f1SConrad Meyerpaddd %xmm7,%xmm1 3340ac341f1SConrad Meyermovdqa %xmm1,%xmm2 3350ac341f1SConrad Meyerpslld $9,%xmm1 3360ac341f1SConrad Meyerpxor %xmm1,%xmm10 3370ac341f1SConrad Meyerpsrld $23,%xmm2 3380ac341f1SConrad Meyerpxor %xmm2,%xmm10 3390ac341f1SConrad Meyermovdqa %xmm12,%xmm1 3400ac341f1SConrad Meyerpaddd %xmm5,%xmm1 3410ac341f1SConrad Meyermovdqa %xmm1,%xmm2 3420ac341f1SConrad Meyerpslld $9,%xmm1 3430ac341f1SConrad Meyerpxor %xmm1,%xmm8 3440ac341f1SConrad Meyerpsrld $23,%xmm2 3450ac341f1SConrad Meyerpxor %xmm2,%xmm8 3460ac341f1SConrad Meyermovdqa %xmm7,%xmm1 3470ac341f1SConrad Meyerpaddd %xmm10,%xmm1 3480ac341f1SConrad Meyermovdqa %xmm1,%xmm2 3490ac341f1SConrad Meyerpslld $13,%xmm1 3500ac341f1SConrad Meyerpxor %xmm1,%xmm4 3510ac341f1SConrad Meyerpsrld $19,%xmm2 3520ac341f1SConrad Meyerpxor %xmm2,%xmm4 3530ac341f1SConrad Meyermovdqa %xmm5,%xmm1 3540ac341f1SConrad Meyerpaddd %xmm8,%xmm1 3550ac341f1SConrad Meyermovdqa %xmm1,%xmm2 3560ac341f1SConrad Meyerpslld $13,%xmm1 3570ac341f1SConrad Meyerpxor %xmm1,%xmm14 3580ac341f1SConrad Meyerpsrld $19,%xmm2 3590ac341f1SConrad Meyerpxor %xmm2,%xmm14 3600ac341f1SConrad Meyermovdqa %xmm10,%xmm1 3610ac341f1SConrad Meyerpaddd %xmm4,%xmm1 3620ac341f1SConrad Meyermovdqa %xmm1,%xmm2 3630ac341f1SConrad Meyerpslld $18,%xmm1 3640ac341f1SConrad Meyerpxor %xmm1,%xmm0 3650ac341f1SConrad Meyerpsrld $14,%xmm2 3660ac341f1SConrad Meyerpxor %xmm2,%xmm0 3670ac341f1SConrad Meyermovdqa 384(%rsp),%xmm1 3680ac341f1SConrad Meyermovdqa %xmm0,384(%rsp) 3690ac341f1SConrad Meyermovdqa %xmm8,%xmm0 3700ac341f1SConrad Meyerpaddd %xmm14,%xmm0 3710ac341f1SConrad Meyermovdqa %xmm0,%xmm2 3720ac341f1SConrad Meyerpslld $18,%xmm0 3730ac341f1SConrad Meyerpxor %xmm0,%xmm12 3740ac341f1SConrad Meyerpsrld $14,%xmm2 3750ac341f1SConrad Meyerpxor %xmm2,%xmm12 3760ac341f1SConrad Meyermovdqa %xmm11,%xmm0 3770ac341f1SConrad Meyerpaddd %xmm1,%xmm0 3780ac341f1SConrad Meyermovdqa %xmm0,%xmm2 3790ac341f1SConrad Meyerpslld $7,%xmm0 3800ac341f1SConrad Meyerpxor %xmm0,%xmm6 3810ac341f1SConrad Meyerpsrld $25,%xmm2 3820ac341f1SConrad Meyerpxor %xmm2,%xmm6 3830ac341f1SConrad Meyermovdqa 400(%rsp),%xmm2 3840ac341f1SConrad Meyermovdqa %xmm12,400(%rsp) 3850ac341f1SConrad Meyermovdqa %xmm3,%xmm0 3860ac341f1SConrad Meyerpaddd %xmm2,%xmm0 3870ac341f1SConrad Meyermovdqa %xmm0,%xmm12 3880ac341f1SConrad Meyerpslld $7,%xmm0 3890ac341f1SConrad Meyerpxor %xmm0,%xmm13 3900ac341f1SConrad Meyerpsrld $25,%xmm12 3910ac341f1SConrad Meyerpxor %xmm12,%xmm13 3920ac341f1SConrad Meyermovdqa %xmm1,%xmm0 3930ac341f1SConrad Meyerpaddd %xmm6,%xmm0 3940ac341f1SConrad Meyermovdqa %xmm0,%xmm12 3950ac341f1SConrad Meyerpslld $9,%xmm0 3960ac341f1SConrad Meyerpxor %xmm0,%xmm15 3970ac341f1SConrad Meyerpsrld $23,%xmm12 3980ac341f1SConrad Meyerpxor %xmm12,%xmm15 3990ac341f1SConrad Meyermovdqa %xmm2,%xmm0 4000ac341f1SConrad Meyerpaddd %xmm13,%xmm0 4010ac341f1SConrad Meyermovdqa %xmm0,%xmm12 4020ac341f1SConrad Meyerpslld $9,%xmm0 4030ac341f1SConrad Meyerpxor %xmm0,%xmm9 4040ac341f1SConrad Meyerpsrld $23,%xmm12 4050ac341f1SConrad Meyerpxor %xmm12,%xmm9 4060ac341f1SConrad Meyermovdqa %xmm6,%xmm0 4070ac341f1SConrad Meyerpaddd %xmm15,%xmm0 4080ac341f1SConrad Meyermovdqa %xmm0,%xmm12 4090ac341f1SConrad Meyerpslld $13,%xmm0 4100ac341f1SConrad Meyerpxor %xmm0,%xmm11 4110ac341f1SConrad Meyerpsrld $19,%xmm12 4120ac341f1SConrad Meyerpxor %xmm12,%xmm11 4130ac341f1SConrad Meyermovdqa %xmm13,%xmm0 4140ac341f1SConrad Meyerpaddd %xmm9,%xmm0 4150ac341f1SConrad Meyermovdqa %xmm0,%xmm12 4160ac341f1SConrad Meyerpslld $13,%xmm0 4170ac341f1SConrad Meyerpxor %xmm0,%xmm3 4180ac341f1SConrad Meyerpsrld $19,%xmm12 4190ac341f1SConrad Meyerpxor %xmm12,%xmm3 4200ac341f1SConrad Meyermovdqa %xmm15,%xmm0 4210ac341f1SConrad Meyerpaddd %xmm11,%xmm0 4220ac341f1SConrad Meyermovdqa %xmm0,%xmm12 4230ac341f1SConrad Meyerpslld $18,%xmm0 4240ac341f1SConrad Meyerpxor %xmm0,%xmm1 4250ac341f1SConrad Meyerpsrld $14,%xmm12 4260ac341f1SConrad Meyerpxor %xmm12,%xmm1 4270ac341f1SConrad Meyermovdqa %xmm9,%xmm0 4280ac341f1SConrad Meyerpaddd %xmm3,%xmm0 4290ac341f1SConrad Meyermovdqa %xmm0,%xmm12 4300ac341f1SConrad Meyerpslld $18,%xmm0 4310ac341f1SConrad Meyerpxor %xmm0,%xmm2 4320ac341f1SConrad Meyerpsrld $14,%xmm12 4330ac341f1SConrad Meyerpxor %xmm12,%xmm2 4340ac341f1SConrad Meyermovdqa 384(%rsp),%xmm12 4350ac341f1SConrad Meyermovdqa 400(%rsp),%xmm0 4360ac341f1SConrad Meyersub $2,%rdx 4370ac341f1SConrad Meyerja ._mainloop1 4380ac341f1SConrad Meyer 4390ac341f1SConrad Meyerpaddd 176(%rsp),%xmm12 4400ac341f1SConrad Meyerpaddd 240(%rsp),%xmm7 4410ac341f1SConrad Meyerpaddd 288(%rsp),%xmm10 4420ac341f1SConrad Meyerpaddd 336(%rsp),%xmm4 4430ac341f1SConrad Meyermovd %xmm12,%rdx 4440ac341f1SConrad Meyermovd %xmm7,%rcx 4450ac341f1SConrad Meyermovd %xmm10,%r8 4460ac341f1SConrad Meyermovd %xmm4,%r9 4470ac341f1SConrad Meyerpshufd $0x39,%xmm12,%xmm12 4480ac341f1SConrad Meyerpshufd $0x39,%xmm7,%xmm7 4490ac341f1SConrad Meyerpshufd $0x39,%xmm10,%xmm10 4500ac341f1SConrad Meyerpshufd $0x39,%xmm4,%xmm4 4510ac341f1SConrad Meyerxorl 0(%rsi),%edx 4520ac341f1SConrad Meyerxorl 4(%rsi),%ecx 4530ac341f1SConrad Meyerxorl 8(%rsi),%r8d 4540ac341f1SConrad Meyerxorl 12(%rsi),%r9d 4550ac341f1SConrad Meyermovl %edx,0(%rdi) 4560ac341f1SConrad Meyermovl %ecx,4(%rdi) 4570ac341f1SConrad Meyermovl %r8d,8(%rdi) 4580ac341f1SConrad Meyermovl %r9d,12(%rdi) 4590ac341f1SConrad Meyermovd %xmm12,%rdx 4600ac341f1SConrad Meyermovd %xmm7,%rcx 4610ac341f1SConrad Meyermovd %xmm10,%r8 4620ac341f1SConrad Meyermovd %xmm4,%r9 4630ac341f1SConrad Meyerpshufd $0x39,%xmm12,%xmm12 4640ac341f1SConrad Meyerpshufd $0x39,%xmm7,%xmm7 4650ac341f1SConrad Meyerpshufd $0x39,%xmm10,%xmm10 4660ac341f1SConrad Meyerpshufd $0x39,%xmm4,%xmm4 4670ac341f1SConrad Meyerxorl 64(%rsi),%edx 4680ac341f1SConrad Meyerxorl 68(%rsi),%ecx 4690ac341f1SConrad Meyerxorl 72(%rsi),%r8d 4700ac341f1SConrad Meyerxorl 76(%rsi),%r9d 4710ac341f1SConrad Meyermovl %edx,64(%rdi) 4720ac341f1SConrad Meyermovl %ecx,68(%rdi) 4730ac341f1SConrad Meyermovl %r8d,72(%rdi) 4740ac341f1SConrad Meyermovl %r9d,76(%rdi) 4750ac341f1SConrad Meyermovd %xmm12,%rdx 4760ac341f1SConrad Meyermovd %xmm7,%rcx 4770ac341f1SConrad Meyermovd %xmm10,%r8 4780ac341f1SConrad Meyermovd %xmm4,%r9 4790ac341f1SConrad Meyerpshufd $0x39,%xmm12,%xmm12 4800ac341f1SConrad Meyerpshufd $0x39,%xmm7,%xmm7 4810ac341f1SConrad Meyerpshufd $0x39,%xmm10,%xmm10 4820ac341f1SConrad Meyerpshufd $0x39,%xmm4,%xmm4 4830ac341f1SConrad Meyerxorl 128(%rsi),%edx 4840ac341f1SConrad Meyerxorl 132(%rsi),%ecx 4850ac341f1SConrad Meyerxorl 136(%rsi),%r8d 4860ac341f1SConrad Meyerxorl 140(%rsi),%r9d 4870ac341f1SConrad Meyermovl %edx,128(%rdi) 4880ac341f1SConrad Meyermovl %ecx,132(%rdi) 4890ac341f1SConrad Meyermovl %r8d,136(%rdi) 4900ac341f1SConrad Meyermovl %r9d,140(%rdi) 4910ac341f1SConrad Meyermovd %xmm12,%rdx 4920ac341f1SConrad Meyermovd %xmm7,%rcx 4930ac341f1SConrad Meyermovd %xmm10,%r8 4940ac341f1SConrad Meyermovd %xmm4,%r9 4950ac341f1SConrad Meyerxorl 192(%rsi),%edx 4960ac341f1SConrad Meyerxorl 196(%rsi),%ecx 4970ac341f1SConrad Meyerxorl 200(%rsi),%r8d 4980ac341f1SConrad Meyerxorl 204(%rsi),%r9d 4990ac341f1SConrad Meyermovl %edx,192(%rdi) 5000ac341f1SConrad Meyermovl %ecx,196(%rdi) 5010ac341f1SConrad Meyermovl %r8d,200(%rdi) 5020ac341f1SConrad Meyermovl %r9d,204(%rdi) 5030ac341f1SConrad Meyerpaddd 304(%rsp),%xmm14 5040ac341f1SConrad Meyerpaddd 128(%rsp),%xmm0 5050ac341f1SConrad Meyerpaddd 192(%rsp),%xmm5 5060ac341f1SConrad Meyerpaddd 256(%rsp),%xmm8 5070ac341f1SConrad Meyermovd %xmm14,%rdx 5080ac341f1SConrad Meyermovd %xmm0,%rcx 5090ac341f1SConrad Meyermovd %xmm5,%r8 5100ac341f1SConrad Meyermovd %xmm8,%r9 5110ac341f1SConrad Meyerpshufd $0x39,%xmm14,%xmm14 5120ac341f1SConrad Meyerpshufd $0x39,%xmm0,%xmm0 5130ac341f1SConrad Meyerpshufd $0x39,%xmm5,%xmm5 5140ac341f1SConrad Meyerpshufd $0x39,%xmm8,%xmm8 5150ac341f1SConrad Meyerxorl 16(%rsi),%edx 5160ac341f1SConrad Meyerxorl 20(%rsi),%ecx 5170ac341f1SConrad Meyerxorl 24(%rsi),%r8d 5180ac341f1SConrad Meyerxorl 28(%rsi),%r9d 5190ac341f1SConrad Meyermovl %edx,16(%rdi) 5200ac341f1SConrad Meyermovl %ecx,20(%rdi) 5210ac341f1SConrad Meyermovl %r8d,24(%rdi) 5220ac341f1SConrad Meyermovl %r9d,28(%rdi) 5230ac341f1SConrad Meyermovd %xmm14,%rdx 5240ac341f1SConrad Meyermovd %xmm0,%rcx 5250ac341f1SConrad Meyermovd %xmm5,%r8 5260ac341f1SConrad Meyermovd %xmm8,%r9 5270ac341f1SConrad Meyerpshufd $0x39,%xmm14,%xmm14 5280ac341f1SConrad Meyerpshufd $0x39,%xmm0,%xmm0 5290ac341f1SConrad Meyerpshufd $0x39,%xmm5,%xmm5 5300ac341f1SConrad Meyerpshufd $0x39,%xmm8,%xmm8 5310ac341f1SConrad Meyerxorl 80(%rsi),%edx 5320ac341f1SConrad Meyerxorl 84(%rsi),%ecx 5330ac341f1SConrad Meyerxorl 88(%rsi),%r8d 5340ac341f1SConrad Meyerxorl 92(%rsi),%r9d 5350ac341f1SConrad Meyermovl %edx,80(%rdi) 5360ac341f1SConrad Meyermovl %ecx,84(%rdi) 5370ac341f1SConrad Meyermovl %r8d,88(%rdi) 5380ac341f1SConrad Meyermovl %r9d,92(%rdi) 5390ac341f1SConrad Meyermovd %xmm14,%rdx 5400ac341f1SConrad Meyermovd %xmm0,%rcx 5410ac341f1SConrad Meyermovd %xmm5,%r8 5420ac341f1SConrad Meyermovd %xmm8,%r9 5430ac341f1SConrad Meyerpshufd $0x39,%xmm14,%xmm14 5440ac341f1SConrad Meyerpshufd $0x39,%xmm0,%xmm0 5450ac341f1SConrad Meyerpshufd $0x39,%xmm5,%xmm5 5460ac341f1SConrad Meyerpshufd $0x39,%xmm8,%xmm8 5470ac341f1SConrad Meyerxorl 144(%rsi),%edx 5480ac341f1SConrad Meyerxorl 148(%rsi),%ecx 5490ac341f1SConrad Meyerxorl 152(%rsi),%r8d 5500ac341f1SConrad Meyerxorl 156(%rsi),%r9d 5510ac341f1SConrad Meyermovl %edx,144(%rdi) 5520ac341f1SConrad Meyermovl %ecx,148(%rdi) 5530ac341f1SConrad Meyermovl %r8d,152(%rdi) 5540ac341f1SConrad Meyermovl %r9d,156(%rdi) 5550ac341f1SConrad Meyermovd %xmm14,%rdx 5560ac341f1SConrad Meyermovd %xmm0,%rcx 5570ac341f1SConrad Meyermovd %xmm5,%r8 5580ac341f1SConrad Meyermovd %xmm8,%r9 5590ac341f1SConrad Meyerxorl 208(%rsi),%edx 5600ac341f1SConrad Meyerxorl 212(%rsi),%ecx 5610ac341f1SConrad Meyerxorl 216(%rsi),%r8d 5620ac341f1SConrad Meyerxorl 220(%rsi),%r9d 5630ac341f1SConrad Meyermovl %edx,208(%rdi) 5640ac341f1SConrad Meyermovl %ecx,212(%rdi) 5650ac341f1SConrad Meyermovl %r8d,216(%rdi) 5660ac341f1SConrad Meyermovl %r9d,220(%rdi) 5670ac341f1SConrad Meyerpaddd 352(%rsp),%xmm15 5680ac341f1SConrad Meyerpaddd 368(%rsp),%xmm11 5690ac341f1SConrad Meyerpaddd 144(%rsp),%xmm1 5700ac341f1SConrad Meyerpaddd 208(%rsp),%xmm6 5710ac341f1SConrad Meyermovd %xmm15,%rdx 5720ac341f1SConrad Meyermovd %xmm11,%rcx 5730ac341f1SConrad Meyermovd %xmm1,%r8 5740ac341f1SConrad Meyermovd %xmm6,%r9 5750ac341f1SConrad Meyerpshufd $0x39,%xmm15,%xmm15 5760ac341f1SConrad Meyerpshufd $0x39,%xmm11,%xmm11 5770ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 5780ac341f1SConrad Meyerpshufd $0x39,%xmm6,%xmm6 5790ac341f1SConrad Meyerxorl 32(%rsi),%edx 5800ac341f1SConrad Meyerxorl 36(%rsi),%ecx 5810ac341f1SConrad Meyerxorl 40(%rsi),%r8d 5820ac341f1SConrad Meyerxorl 44(%rsi),%r9d 5830ac341f1SConrad Meyermovl %edx,32(%rdi) 5840ac341f1SConrad Meyermovl %ecx,36(%rdi) 5850ac341f1SConrad Meyermovl %r8d,40(%rdi) 5860ac341f1SConrad Meyermovl %r9d,44(%rdi) 5870ac341f1SConrad Meyermovd %xmm15,%rdx 5880ac341f1SConrad Meyermovd %xmm11,%rcx 5890ac341f1SConrad Meyermovd %xmm1,%r8 5900ac341f1SConrad Meyermovd %xmm6,%r9 5910ac341f1SConrad Meyerpshufd $0x39,%xmm15,%xmm15 5920ac341f1SConrad Meyerpshufd $0x39,%xmm11,%xmm11 5930ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 5940ac341f1SConrad Meyerpshufd $0x39,%xmm6,%xmm6 5950ac341f1SConrad Meyerxorl 96(%rsi),%edx 5960ac341f1SConrad Meyerxorl 100(%rsi),%ecx 5970ac341f1SConrad Meyerxorl 104(%rsi),%r8d 5980ac341f1SConrad Meyerxorl 108(%rsi),%r9d 5990ac341f1SConrad Meyermovl %edx,96(%rdi) 6000ac341f1SConrad Meyermovl %ecx,100(%rdi) 6010ac341f1SConrad Meyermovl %r8d,104(%rdi) 6020ac341f1SConrad Meyermovl %r9d,108(%rdi) 6030ac341f1SConrad Meyermovd %xmm15,%rdx 6040ac341f1SConrad Meyermovd %xmm11,%rcx 6050ac341f1SConrad Meyermovd %xmm1,%r8 6060ac341f1SConrad Meyermovd %xmm6,%r9 6070ac341f1SConrad Meyerpshufd $0x39,%xmm15,%xmm15 6080ac341f1SConrad Meyerpshufd $0x39,%xmm11,%xmm11 6090ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 6100ac341f1SConrad Meyerpshufd $0x39,%xmm6,%xmm6 6110ac341f1SConrad Meyerxorl 160(%rsi),%edx 6120ac341f1SConrad Meyerxorl 164(%rsi),%ecx 6130ac341f1SConrad Meyerxorl 168(%rsi),%r8d 6140ac341f1SConrad Meyerxorl 172(%rsi),%r9d 6150ac341f1SConrad Meyermovl %edx,160(%rdi) 6160ac341f1SConrad Meyermovl %ecx,164(%rdi) 6170ac341f1SConrad Meyermovl %r8d,168(%rdi) 6180ac341f1SConrad Meyermovl %r9d,172(%rdi) 6190ac341f1SConrad Meyermovd %xmm15,%rdx 6200ac341f1SConrad Meyermovd %xmm11,%rcx 6210ac341f1SConrad Meyermovd %xmm1,%r8 6220ac341f1SConrad Meyermovd %xmm6,%r9 6230ac341f1SConrad Meyerxorl 224(%rsi),%edx 6240ac341f1SConrad Meyerxorl 228(%rsi),%ecx 6250ac341f1SConrad Meyerxorl 232(%rsi),%r8d 6260ac341f1SConrad Meyerxorl 236(%rsi),%r9d 6270ac341f1SConrad Meyermovl %edx,224(%rdi) 6280ac341f1SConrad Meyermovl %ecx,228(%rdi) 6290ac341f1SConrad Meyermovl %r8d,232(%rdi) 6300ac341f1SConrad Meyermovl %r9d,236(%rdi) 6310ac341f1SConrad Meyerpaddd 224(%rsp),%xmm13 6320ac341f1SConrad Meyerpaddd 272(%rsp),%xmm9 6330ac341f1SConrad Meyerpaddd 320(%rsp),%xmm3 6340ac341f1SConrad Meyerpaddd 160(%rsp),%xmm2 6350ac341f1SConrad Meyermovd %xmm13,%rdx 6360ac341f1SConrad Meyermovd %xmm9,%rcx 6370ac341f1SConrad Meyermovd %xmm3,%r8 6380ac341f1SConrad Meyermovd %xmm2,%r9 6390ac341f1SConrad Meyerpshufd $0x39,%xmm13,%xmm13 6400ac341f1SConrad Meyerpshufd $0x39,%xmm9,%xmm9 6410ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 6420ac341f1SConrad Meyerpshufd $0x39,%xmm2,%xmm2 6430ac341f1SConrad Meyerxorl 48(%rsi),%edx 6440ac341f1SConrad Meyerxorl 52(%rsi),%ecx 6450ac341f1SConrad Meyerxorl 56(%rsi),%r8d 6460ac341f1SConrad Meyerxorl 60(%rsi),%r9d 6470ac341f1SConrad Meyermovl %edx,48(%rdi) 6480ac341f1SConrad Meyermovl %ecx,52(%rdi) 6490ac341f1SConrad Meyermovl %r8d,56(%rdi) 6500ac341f1SConrad Meyermovl %r9d,60(%rdi) 6510ac341f1SConrad Meyermovd %xmm13,%rdx 6520ac341f1SConrad Meyermovd %xmm9,%rcx 6530ac341f1SConrad Meyermovd %xmm3,%r8 6540ac341f1SConrad Meyermovd %xmm2,%r9 6550ac341f1SConrad Meyerpshufd $0x39,%xmm13,%xmm13 6560ac341f1SConrad Meyerpshufd $0x39,%xmm9,%xmm9 6570ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 6580ac341f1SConrad Meyerpshufd $0x39,%xmm2,%xmm2 6590ac341f1SConrad Meyerxorl 112(%rsi),%edx 6600ac341f1SConrad Meyerxorl 116(%rsi),%ecx 6610ac341f1SConrad Meyerxorl 120(%rsi),%r8d 6620ac341f1SConrad Meyerxorl 124(%rsi),%r9d 6630ac341f1SConrad Meyermovl %edx,112(%rdi) 6640ac341f1SConrad Meyermovl %ecx,116(%rdi) 6650ac341f1SConrad Meyermovl %r8d,120(%rdi) 6660ac341f1SConrad Meyermovl %r9d,124(%rdi) 6670ac341f1SConrad Meyermovd %xmm13,%rdx 6680ac341f1SConrad Meyermovd %xmm9,%rcx 6690ac341f1SConrad Meyermovd %xmm3,%r8 6700ac341f1SConrad Meyermovd %xmm2,%r9 6710ac341f1SConrad Meyerpshufd $0x39,%xmm13,%xmm13 6720ac341f1SConrad Meyerpshufd $0x39,%xmm9,%xmm9 6730ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 6740ac341f1SConrad Meyerpshufd $0x39,%xmm2,%xmm2 6750ac341f1SConrad Meyerxorl 176(%rsi),%edx 6760ac341f1SConrad Meyerxorl 180(%rsi),%ecx 6770ac341f1SConrad Meyerxorl 184(%rsi),%r8d 6780ac341f1SConrad Meyerxorl 188(%rsi),%r9d 6790ac341f1SConrad Meyermovl %edx,176(%rdi) 6800ac341f1SConrad Meyermovl %ecx,180(%rdi) 6810ac341f1SConrad Meyermovl %r8d,184(%rdi) 6820ac341f1SConrad Meyermovl %r9d,188(%rdi) 6830ac341f1SConrad Meyermovd %xmm13,%rdx 6840ac341f1SConrad Meyermovd %xmm9,%rcx 6850ac341f1SConrad Meyermovd %xmm3,%r8 6860ac341f1SConrad Meyermovd %xmm2,%r9 6870ac341f1SConrad Meyerxorl 240(%rsi),%edx 6880ac341f1SConrad Meyerxorl 244(%rsi),%ecx 6890ac341f1SConrad Meyerxorl 248(%rsi),%r8d 6900ac341f1SConrad Meyerxorl 252(%rsi),%r9d 6910ac341f1SConrad Meyermovl %edx,240(%rdi) 6920ac341f1SConrad Meyermovl %ecx,244(%rdi) 6930ac341f1SConrad Meyermovl %r8d,248(%rdi) 6940ac341f1SConrad Meyermovl %r9d,252(%rdi) 6950ac341f1SConrad Meyermovq 480(%rsp),%r9 6960ac341f1SConrad Meyersub $256,%r9 6970ac341f1SConrad Meyeradd $256,%rsi 6980ac341f1SConrad Meyeradd $256,%rdi 6990ac341f1SConrad Meyercmp $256,%r9 7000ac341f1SConrad Meyerjae ._bytesatleast256 7010ac341f1SConrad Meyer 7020ac341f1SConrad Meyercmp $0,%r9 7030ac341f1SConrad Meyerjbe ._done 7040ac341f1SConrad Meyer 7050ac341f1SConrad Meyer._bytesbetween1and255: 7060ac341f1SConrad Meyercmp $64,%r9 7070ac341f1SConrad Meyerjae ._nocopy 7080ac341f1SConrad Meyer 7090ac341f1SConrad Meyermov %rdi,%rdx 7100ac341f1SConrad Meyerleaq 0(%rsp),%rdi 7110ac341f1SConrad Meyermov %r9,%rcx 7120ac341f1SConrad Meyerrep movsb 7130ac341f1SConrad Meyerleaq 0(%rsp),%rdi 7140ac341f1SConrad Meyerleaq 0(%rsp),%rsi 7150ac341f1SConrad Meyer 7160ac341f1SConrad Meyer._nocopy: 7170ac341f1SConrad Meyermovq %r9,480(%rsp) 7180ac341f1SConrad Meyermovdqa 112(%rsp),%xmm0 7190ac341f1SConrad Meyermovdqa 64(%rsp),%xmm1 7200ac341f1SConrad Meyermovdqa 80(%rsp),%xmm2 7210ac341f1SConrad Meyermovdqa 96(%rsp),%xmm3 7220ac341f1SConrad Meyermovdqa %xmm1,%xmm4 7230ac341f1SConrad Meyermov $20,%rcx 7240ac341f1SConrad Meyer 7250ac341f1SConrad Meyer.p2align 4 7260ac341f1SConrad Meyer._mainloop2: 7270ac341f1SConrad Meyerpaddd %xmm0,%xmm4 7280ac341f1SConrad Meyermovdqa %xmm0,%xmm5 7290ac341f1SConrad Meyermovdqa %xmm4,%xmm6 7300ac341f1SConrad Meyerpslld $7,%xmm4 7310ac341f1SConrad Meyerpsrld $25,%xmm6 7320ac341f1SConrad Meyerpxor %xmm4,%xmm3 7330ac341f1SConrad Meyerpxor %xmm6,%xmm3 7340ac341f1SConrad Meyerpaddd %xmm3,%xmm5 7350ac341f1SConrad Meyermovdqa %xmm3,%xmm4 7360ac341f1SConrad Meyermovdqa %xmm5,%xmm6 7370ac341f1SConrad Meyerpslld $9,%xmm5 7380ac341f1SConrad Meyerpsrld $23,%xmm6 7390ac341f1SConrad Meyerpxor %xmm5,%xmm2 7400ac341f1SConrad Meyerpshufd $0x93,%xmm3,%xmm3 7410ac341f1SConrad Meyerpxor %xmm6,%xmm2 7420ac341f1SConrad Meyerpaddd %xmm2,%xmm4 7430ac341f1SConrad Meyermovdqa %xmm2,%xmm5 7440ac341f1SConrad Meyermovdqa %xmm4,%xmm6 7450ac341f1SConrad Meyerpslld $13,%xmm4 7460ac341f1SConrad Meyerpsrld $19,%xmm6 7470ac341f1SConrad Meyerpxor %xmm4,%xmm1 7480ac341f1SConrad Meyerpshufd $0x4e,%xmm2,%xmm2 7490ac341f1SConrad Meyerpxor %xmm6,%xmm1 7500ac341f1SConrad Meyerpaddd %xmm1,%xmm5 7510ac341f1SConrad Meyermovdqa %xmm3,%xmm4 7520ac341f1SConrad Meyermovdqa %xmm5,%xmm6 7530ac341f1SConrad Meyerpslld $18,%xmm5 7540ac341f1SConrad Meyerpsrld $14,%xmm6 7550ac341f1SConrad Meyerpxor %xmm5,%xmm0 7560ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 7570ac341f1SConrad Meyerpxor %xmm6,%xmm0 7580ac341f1SConrad Meyerpaddd %xmm0,%xmm4 7590ac341f1SConrad Meyermovdqa %xmm0,%xmm5 7600ac341f1SConrad Meyermovdqa %xmm4,%xmm6 7610ac341f1SConrad Meyerpslld $7,%xmm4 7620ac341f1SConrad Meyerpsrld $25,%xmm6 7630ac341f1SConrad Meyerpxor %xmm4,%xmm1 7640ac341f1SConrad Meyerpxor %xmm6,%xmm1 7650ac341f1SConrad Meyerpaddd %xmm1,%xmm5 7660ac341f1SConrad Meyermovdqa %xmm1,%xmm4 7670ac341f1SConrad Meyermovdqa %xmm5,%xmm6 7680ac341f1SConrad Meyerpslld $9,%xmm5 7690ac341f1SConrad Meyerpsrld $23,%xmm6 7700ac341f1SConrad Meyerpxor %xmm5,%xmm2 7710ac341f1SConrad Meyerpshufd $0x93,%xmm1,%xmm1 7720ac341f1SConrad Meyerpxor %xmm6,%xmm2 7730ac341f1SConrad Meyerpaddd %xmm2,%xmm4 7740ac341f1SConrad Meyermovdqa %xmm2,%xmm5 7750ac341f1SConrad Meyermovdqa %xmm4,%xmm6 7760ac341f1SConrad Meyerpslld $13,%xmm4 7770ac341f1SConrad Meyerpsrld $19,%xmm6 7780ac341f1SConrad Meyerpxor %xmm4,%xmm3 7790ac341f1SConrad Meyerpshufd $0x4e,%xmm2,%xmm2 7800ac341f1SConrad Meyerpxor %xmm6,%xmm3 7810ac341f1SConrad Meyerpaddd %xmm3,%xmm5 7820ac341f1SConrad Meyermovdqa %xmm1,%xmm4 7830ac341f1SConrad Meyermovdqa %xmm5,%xmm6 7840ac341f1SConrad Meyerpslld $18,%xmm5 7850ac341f1SConrad Meyerpsrld $14,%xmm6 7860ac341f1SConrad Meyerpxor %xmm5,%xmm0 7870ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 7880ac341f1SConrad Meyerpxor %xmm6,%xmm0 7890ac341f1SConrad Meyerpaddd %xmm0,%xmm4 7900ac341f1SConrad Meyermovdqa %xmm0,%xmm5 7910ac341f1SConrad Meyermovdqa %xmm4,%xmm6 7920ac341f1SConrad Meyerpslld $7,%xmm4 7930ac341f1SConrad Meyerpsrld $25,%xmm6 7940ac341f1SConrad Meyerpxor %xmm4,%xmm3 7950ac341f1SConrad Meyerpxor %xmm6,%xmm3 7960ac341f1SConrad Meyerpaddd %xmm3,%xmm5 7970ac341f1SConrad Meyermovdqa %xmm3,%xmm4 7980ac341f1SConrad Meyermovdqa %xmm5,%xmm6 7990ac341f1SConrad Meyerpslld $9,%xmm5 8000ac341f1SConrad Meyerpsrld $23,%xmm6 8010ac341f1SConrad Meyerpxor %xmm5,%xmm2 8020ac341f1SConrad Meyerpshufd $0x93,%xmm3,%xmm3 8030ac341f1SConrad Meyerpxor %xmm6,%xmm2 8040ac341f1SConrad Meyerpaddd %xmm2,%xmm4 8050ac341f1SConrad Meyermovdqa %xmm2,%xmm5 8060ac341f1SConrad Meyermovdqa %xmm4,%xmm6 8070ac341f1SConrad Meyerpslld $13,%xmm4 8080ac341f1SConrad Meyerpsrld $19,%xmm6 8090ac341f1SConrad Meyerpxor %xmm4,%xmm1 8100ac341f1SConrad Meyerpshufd $0x4e,%xmm2,%xmm2 8110ac341f1SConrad Meyerpxor %xmm6,%xmm1 8120ac341f1SConrad Meyerpaddd %xmm1,%xmm5 8130ac341f1SConrad Meyermovdqa %xmm3,%xmm4 8140ac341f1SConrad Meyermovdqa %xmm5,%xmm6 8150ac341f1SConrad Meyerpslld $18,%xmm5 8160ac341f1SConrad Meyerpsrld $14,%xmm6 8170ac341f1SConrad Meyerpxor %xmm5,%xmm0 8180ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 8190ac341f1SConrad Meyerpxor %xmm6,%xmm0 8200ac341f1SConrad Meyerpaddd %xmm0,%xmm4 8210ac341f1SConrad Meyermovdqa %xmm0,%xmm5 8220ac341f1SConrad Meyermovdqa %xmm4,%xmm6 8230ac341f1SConrad Meyerpslld $7,%xmm4 8240ac341f1SConrad Meyerpsrld $25,%xmm6 8250ac341f1SConrad Meyerpxor %xmm4,%xmm1 8260ac341f1SConrad Meyerpxor %xmm6,%xmm1 8270ac341f1SConrad Meyerpaddd %xmm1,%xmm5 8280ac341f1SConrad Meyermovdqa %xmm1,%xmm4 8290ac341f1SConrad Meyermovdqa %xmm5,%xmm6 8300ac341f1SConrad Meyerpslld $9,%xmm5 8310ac341f1SConrad Meyerpsrld $23,%xmm6 8320ac341f1SConrad Meyerpxor %xmm5,%xmm2 8330ac341f1SConrad Meyerpshufd $0x93,%xmm1,%xmm1 8340ac341f1SConrad Meyerpxor %xmm6,%xmm2 8350ac341f1SConrad Meyerpaddd %xmm2,%xmm4 8360ac341f1SConrad Meyermovdqa %xmm2,%xmm5 8370ac341f1SConrad Meyermovdqa %xmm4,%xmm6 8380ac341f1SConrad Meyerpslld $13,%xmm4 8390ac341f1SConrad Meyerpsrld $19,%xmm6 8400ac341f1SConrad Meyerpxor %xmm4,%xmm3 8410ac341f1SConrad Meyerpshufd $0x4e,%xmm2,%xmm2 8420ac341f1SConrad Meyerpxor %xmm6,%xmm3 8430ac341f1SConrad Meyersub $4,%rcx 8440ac341f1SConrad Meyerpaddd %xmm3,%xmm5 8450ac341f1SConrad Meyermovdqa %xmm1,%xmm4 8460ac341f1SConrad Meyermovdqa %xmm5,%xmm6 8470ac341f1SConrad Meyerpslld $18,%xmm5 8480ac341f1SConrad Meyerpxor %xmm7,%xmm7 8490ac341f1SConrad Meyerpsrld $14,%xmm6 8500ac341f1SConrad Meyerpxor %xmm5,%xmm0 8510ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 8520ac341f1SConrad Meyerpxor %xmm6,%xmm0 8530ac341f1SConrad Meyerja ._mainloop2 8540ac341f1SConrad Meyer 8550ac341f1SConrad Meyerpaddd 112(%rsp),%xmm0 8560ac341f1SConrad Meyerpaddd 64(%rsp),%xmm1 8570ac341f1SConrad Meyerpaddd 80(%rsp),%xmm2 8580ac341f1SConrad Meyerpaddd 96(%rsp),%xmm3 8590ac341f1SConrad Meyermovd %xmm0,%rcx 8600ac341f1SConrad Meyermovd %xmm1,%r8 8610ac341f1SConrad Meyermovd %xmm2,%r9 8620ac341f1SConrad Meyermovd %xmm3,%rax 8630ac341f1SConrad Meyerpshufd $0x39,%xmm0,%xmm0 8640ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 8650ac341f1SConrad Meyerpshufd $0x39,%xmm2,%xmm2 8660ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 8670ac341f1SConrad Meyerxorl 0(%rsi),%ecx 8680ac341f1SConrad Meyerxorl 48(%rsi),%r8d 8690ac341f1SConrad Meyerxorl 32(%rsi),%r9d 8700ac341f1SConrad Meyerxorl 16(%rsi),%eax 8710ac341f1SConrad Meyermovl %ecx,0(%rdi) 8720ac341f1SConrad Meyermovl %r8d,48(%rdi) 8730ac341f1SConrad Meyermovl %r9d,32(%rdi) 8740ac341f1SConrad Meyermovl %eax,16(%rdi) 8750ac341f1SConrad Meyermovd %xmm0,%rcx 8760ac341f1SConrad Meyermovd %xmm1,%r8 8770ac341f1SConrad Meyermovd %xmm2,%r9 8780ac341f1SConrad Meyermovd %xmm3,%rax 8790ac341f1SConrad Meyerpshufd $0x39,%xmm0,%xmm0 8800ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 8810ac341f1SConrad Meyerpshufd $0x39,%xmm2,%xmm2 8820ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 8830ac341f1SConrad Meyerxorl 20(%rsi),%ecx 8840ac341f1SConrad Meyerxorl 4(%rsi),%r8d 8850ac341f1SConrad Meyerxorl 52(%rsi),%r9d 8860ac341f1SConrad Meyerxorl 36(%rsi),%eax 8870ac341f1SConrad Meyermovl %ecx,20(%rdi) 8880ac341f1SConrad Meyermovl %r8d,4(%rdi) 8890ac341f1SConrad Meyermovl %r9d,52(%rdi) 8900ac341f1SConrad Meyermovl %eax,36(%rdi) 8910ac341f1SConrad Meyermovd %xmm0,%rcx 8920ac341f1SConrad Meyermovd %xmm1,%r8 8930ac341f1SConrad Meyermovd %xmm2,%r9 8940ac341f1SConrad Meyermovd %xmm3,%rax 8950ac341f1SConrad Meyerpshufd $0x39,%xmm0,%xmm0 8960ac341f1SConrad Meyerpshufd $0x39,%xmm1,%xmm1 8970ac341f1SConrad Meyerpshufd $0x39,%xmm2,%xmm2 8980ac341f1SConrad Meyerpshufd $0x39,%xmm3,%xmm3 8990ac341f1SConrad Meyerxorl 40(%rsi),%ecx 9000ac341f1SConrad Meyerxorl 24(%rsi),%r8d 9010ac341f1SConrad Meyerxorl 8(%rsi),%r9d 9020ac341f1SConrad Meyerxorl 56(%rsi),%eax 9030ac341f1SConrad Meyermovl %ecx,40(%rdi) 9040ac341f1SConrad Meyermovl %r8d,24(%rdi) 9050ac341f1SConrad Meyermovl %r9d,8(%rdi) 9060ac341f1SConrad Meyermovl %eax,56(%rdi) 9070ac341f1SConrad Meyermovd %xmm0,%rcx 9080ac341f1SConrad Meyermovd %xmm1,%r8 9090ac341f1SConrad Meyermovd %xmm2,%r9 9100ac341f1SConrad Meyermovd %xmm3,%rax 9110ac341f1SConrad Meyerxorl 60(%rsi),%ecx 9120ac341f1SConrad Meyerxorl 44(%rsi),%r8d 9130ac341f1SConrad Meyerxorl 28(%rsi),%r9d 9140ac341f1SConrad Meyerxorl 12(%rsi),%eax 9150ac341f1SConrad Meyermovl %ecx,60(%rdi) 9160ac341f1SConrad Meyermovl %r8d,44(%rdi) 9170ac341f1SConrad Meyermovl %r9d,28(%rdi) 9180ac341f1SConrad Meyermovl %eax,12(%rdi) 9190ac341f1SConrad Meyermovq 480(%rsp),%r9 9200ac341f1SConrad Meyermovq 472(%rsp),%rcx 9210ac341f1SConrad Meyeradd $1,%rcx 9220ac341f1SConrad Meyermov %rcx,%r8 9230ac341f1SConrad Meyershr $32,%r8 9240ac341f1SConrad Meyermovl %ecx,80(%rsp) 9250ac341f1SConrad Meyermovl %r8d,4+96(%rsp) 9260ac341f1SConrad Meyermovq %rcx,472(%rsp) 9270ac341f1SConrad Meyercmp $64,%r9 9280ac341f1SConrad Meyerja ._bytesatleast65 9290ac341f1SConrad Meyerjae ._bytesatleast64 9300ac341f1SConrad Meyer 9310ac341f1SConrad Meyermov %rdi,%rsi 9320ac341f1SConrad Meyermov %rdx,%rdi 9330ac341f1SConrad Meyermov %r9,%rcx 9340ac341f1SConrad Meyerrep movsb 9350ac341f1SConrad Meyer 9360ac341f1SConrad Meyer._bytesatleast64: 9370ac341f1SConrad Meyer._done: 9380ac341f1SConrad Meyermovq 416(%rsp),%r11 9390ac341f1SConrad Meyermovq 424(%rsp),%r12 9400ac341f1SConrad Meyermovq 432(%rsp),%r13 9410ac341f1SConrad Meyermovq 440(%rsp),%r14 9420ac341f1SConrad Meyermovq 448(%rsp),%r15 9430ac341f1SConrad Meyermovq 456(%rsp),%rbx 9440ac341f1SConrad Meyermovq 464(%rsp),%rbp 9450ac341f1SConrad Meyeradd %r11,%rsp 9460ac341f1SConrad Meyerxor %rax,%rax 9470ac341f1SConrad Meyermov %rsi,%rdx 9480ac341f1SConrad Meyerret 9490ac341f1SConrad Meyer 9500ac341f1SConrad Meyer._bytesatleast65: 9510ac341f1SConrad Meyersub $64,%r9 9520ac341f1SConrad Meyeradd $64,%rdi 9530ac341f1SConrad Meyeradd $64,%rsi 9540ac341f1SConrad Meyerjmp ._bytesbetween1and255 9550ac341f1SConrad Meyer 9560ac341f1SConrad Meyer#endif 9570ac341f1SConrad Meyer 9580ac341f1SConrad Meyer#if defined(__linux__) && defined(__ELF__) 9590ac341f1SConrad Meyer.section .note.GNU-stack,"",%progbits 9600ac341f1SConrad Meyer#endif 961