1; PROLOGUE(mpn_sublsh1_n) 2 3; Copyright 2009 Jason Moxham 4; 5; Windows Conversion Copyright 2008 Brian Gladman 6; 7; This file is part of the MPIR Library. 8; 9; The MPIR Library is free software; you can redistribute it and/or modify 10; it under the terms of the GNU Lesser General Public License as published 11; by the Free Software Foundation; either version 2.1 of the License, or (at 12; your option) any later version. 13; The MPIR Library is distributed in the hope that it will be useful, but 14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16; License for more details. 17; You should have received a copy of the GNU Lesser General Public License 18; along with the MPIR Library; see the file COPYING.LIB. If not, write 19; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20; Boston, MA 02110-1301, USA. 21; 22; mp_limb_t mpn_sublsh1_n(mp_ptr, mp_ptr, mp_ptr, mp_size_t) 23; rax rdi rsi rdx rcx 24; rax rcx rdx r8 r9 25 26%include "yasm_mac.inc" 27 28%define reg_save_list rbx, rsi, rdi 29 30 CPU Athlon64 31 BITS 64 32 33 FRAME_PROC mpn_sublsh1_n, 0, reg_save_list 34 mov rax, r9 35 36 lea rdi, [rcx+rax*8-56] 37 lea rsi, [rdx+rax*8-56] 38 lea rdx, [ r8+rax*8-56] 39 mov rcx, rax 40 41 xor rax, rax 42 xor r10, r10 43 mov r8, 3 44 sub r8, rcx 45 jge .3 46 add r8, 4 47 mov r11, [rsi+r8*8+24] 48 mov rcx, [rsi+r8*8+16] 49 mov r9, [rsi+r8*8] 50 mov rbx, [rsi+r8*8+8] 51 jc .2 52 53 xalign 16 54.1: add rax, 1 55 sbb r9, [rdx+r8*8] 56 sbb rbx, [rdx+r8*8+8] 57 sbb rcx, [rdx+r8*8+16] 58 sbb r11, [rdx+r8*8+24] 59 sbb rax, rax 60 add r10, 1 61 sbb r9, [rdx+r8*8] 62 sbb rbx, [rdx+r8*8+8] 63 sbb rcx, [rdx+r8*8+16] 64 sbb r11, [rdx+r8*8+24] 65 sbb r10, r10 66 mov [rdi+r8*8], r9 67 mov [rdi+r8*8+8], rbx 68 mov [rdi+r8*8+16], rcx 69 mov [rdi+r8*8+24], r11 70 mov r11, [rsi+r8*8+56] 71 mov rcx, [rsi+r8*8+48] 72 add r8, 4 73 mov r9, [rsi+r8*8] 74 mov rbx, [rsi+r8*8+8] 75 jnc .1 76.2: add rax, 1 77 sbb r9, [rdx+r8*8] 78 sbb rbx, [rdx+r8*8+8] 79 sbb rcx, [rdx+r8*8+16] 80 sbb r11, [rdx+r8*8+24] 81 sbb rax, rax 82 add r10, 1 83 sbb r9, [rdx+r8*8] 84 sbb rbx, [rdx+r8*8+8] 85 sbb rcx, [rdx+r8*8+16] 86 sbb r11, [rdx+r8*8+24] 87 sbb r10, r10 88 mov [rdi+r8*8], r9 89 mov [rdi+r8*8+8], rbx 90 mov [rdi+r8*8+16], rcx 91 mov [rdi+r8*8+24], r11 92.3: cmp r8, 2 93 ja .7 94 jz .6 95 jp .5 96.4: mov rcx, [rsi+r8*8+48] 97 mov r9, [rsi+r8*8+32] 98 mov rbx, [rsi+r8*8+40] 99 add rax, 1 100 sbb r9, [rdx+r8*8+32] 101 sbb rbx, [rdx+r8*8+40] 102 sbb rcx, [rdx+r8*8+48] 103 sbb rax, rax 104 add r10, 1 105 sbb r9, [rdx+r8*8+32] 106 sbb rbx, [rdx+r8*8+40] 107 sbb rcx, [rdx+r8*8+48] 108 mov [rdi+r8*8+32], r9 109 mov [rdi+r8*8+40], rbx 110 mov [rdi+r8*8+48], rcx 111 sbb rax, 0 112 neg rax 113 EXIT_PROC reg_save_list 114 115 xalign 16 116.5: mov r9, [rsi+r8*8+32] 117 mov rbx, [rsi+r8*8+40] 118 add rax, 1 119 sbb r9, [rdx+r8*8+32] 120 sbb rbx, [rdx+r8*8+40] 121 sbb rax, rax 122 add r10, 1 123 sbb r9, [rdx+r8*8+32] 124 sbb rbx, [rdx+r8*8+40] 125 mov [rdi+r8*8+32], r9 126 mov [rdi+r8*8+40], rbx 127 sbb rax, 0 128 neg rax 129 EXIT_PROC reg_save_list 130 131 xalign 16 132.6: mov r9, [rsi+r8*8+32] 133 add rax, 1 134 sbb r9, [rdx+r8*8+32] 135 sbb rax, rax 136 add r10, 1 137 sbb r9, [rdx+r8*8+32] 138 mov [rdi+r8*8+32], r9 139 sbb rax, 0 140 neg rax 141 EXIT_PROC reg_save_list 142 143 xalign 16 144.7: add r10, 1 145.8: sbb rax, 0 146 neg rax 147 END_PROC reg_save_list 148 149 end 150