1; PROLOGUE(mpn_lshift1) 2 3; Copyright 2008 Jason Moxham 4; 5; Windows Conversion Copyright 2008 Brian Gladman 6; 7; This file is part of the MPIR Library. 8; 9; The MPIR Library is free software; you can redistribute it and/or modify 10; it under the terms of the GNU Lesser General Public License as published 11; by the Free Software Foundation; either version 2.1 of the License, or (at 12; your option) any later version. 13; The MPIR Library is distributed in the hope that it will be useful, but 14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16; License for more details. 17; You should have received a copy of the GNU Lesser General Public License 18; along with the MPIR Library; see the file COPYING.LIB. If not, write 19; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20; Boston, MA 02110-1301, USA. 21; 22; mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t) 23; rax rdi rsi rdx 24; rax rcx rdx r8 25 26%include "yasm_mac.inc" 27 28 CPU Athlon64 29 BITS 64 30 31 LEAF_PROC mpn_lshift1 32 mov rax, r8 33 and r8, 7 34 inc r8 35 mov [rsp+0x18], r8 36 shr rax, 3 37 cmp rax, 0 38 jz .2 39 40 xalign 16 41.1: mov r8, [rdx] 42 mov r9, [rdx+8] 43 mov r10, [rdx+16] 44 mov r11, [rdx+24] 45 adc r8, r8 46 adc r9, r9 47 adc r10, r10 48 adc r11, r11 49 mov [rcx], r8 50 mov [rcx+8], r9 51 mov [rcx+16], r10 52 mov [rcx+24], r11 53 mov r8, [rdx+32] 54 mov r9, [rdx+40] 55 mov r10, [rdx+48] 56 mov r11, [rdx+56] 57 adc r8, r8 58 adc r9, r9 59 adc r10, r10 60 adc r11, r11 61 mov [rcx+32], r8 62 mov [rcx+40], r9 63 mov [rcx+48], r10 64 mov [rcx+56], r11 65 lea rcx, [rcx+64] 66 dec rax 67 lea rdx, [rdx+64] 68 jnz .1 69.2: mov rax, [rsp+0x18] 70 dec rax 71 jz .3 72; Could still have cache-bank conflicts in this tail part 73 mov r8, [rdx] 74 adc r8, r8 75 mov [rcx], r8 76 dec rax 77 jz .3 78 mov r8, [rdx+8] 79 adc r8, r8 80 mov [rcx+8], r8 81 dec rax 82 jz .3 83 mov r8, [rdx+16] 84 adc r8, r8 85 mov [rcx+16], r8 86 dec rax 87 jz .3 88 mov r8, [rdx+24] 89 adc r8, r8 90 mov [rcx+24], r8 91 dec rax 92 jz .3 93 mov r8, [rdx+32] 94 adc r8, r8 95 mov [rcx+32], r8 96 dec rax 97 jz .3 98 mov r8, [rdx+40] 99 adc r8, r8 100 mov [rcx+40], r8 101 dec rax 102 jz .3 103 mov r8, [rdx+48] 104 adc r8, r8 105 mov [rcx+48], r8 106.3: sbb rax, rax 107 neg rax 108 ret 109 110 end 111