1 2; mpn_sublsh1_n 3 4; Copyright 2009 Jason Moxham 5 6; This file is part of the MPIR Library. 7; The MPIR Library is free software; you can redistribute it and/or modify 8; it under the terms of the GNU Lesser General Public License as published 9; by the Free Software Foundation; either version 2.1 of the License, or (at 10; your option) any later version. 11; The MPIR Library is distributed in the hope that it will be useful, but 12; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 14; License for more details. 15; You should have received a copy of the GNU Lesser General Public License 16; along with the MPIR Library; see the file COPYING.LIB. If not, write 17; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18; Boston, MA 02110-1301, USA. 19 20%include 'yasm_mac.inc' 21 22; ret mpn_sublsh1_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t) 23 24 GLOBAL_FUNC mpn_sublsh1_n 25 push rbx 26 lea rdi, [rdi+rcx*8-56] 27 lea rsi, [rsi+rcx*8-56] 28 lea rdx, [rdx+rcx*8-56] 29 xor rax, rax 30 xor r10, r10 31 mov r8, 3 32 sub r8, rcx 33 jge L_skip 34 add r8, 4 35 mov r11, [rsi+r8*8+24] 36 mov rcx, [rsi+r8*8+16] 37 mov r9, [rsi+r8*8] 38 mov rbx, [rsi+r8*8+8] 39 jc L_skiplp 40 align 16 41L_lp: 42 add rax, 1 43 sbb r9, [rdx+r8*8] 44 sbb rbx, [rdx+r8*8+8] 45 sbb rcx, [rdx+r8*8+16] 46 sbb r11, [rdx+r8*8+24] 47 sbb rax, rax 48 add r10, 1 49 sbb r9, [rdx+r8*8] 50 sbb rbx, [rdx+r8*8+8] 51 sbb rcx, [rdx+r8*8+16] 52 sbb r11, [rdx+r8*8+24] 53 sbb r10, r10 54 mov [rdi+r8*8], r9 55 mov [rdi+r8*8+8], rbx 56 mov [rdi+r8*8+16], rcx 57 mov [rdi+r8*8+24], r11 58 mov r11, [rsi+r8*8+56] 59 mov rcx, [rsi+r8*8+48] 60 add r8, 4 61 mov r9, [rsi+r8*8] 62 mov rbx, [rsi+r8*8+8] 63 jnc L_lp 64L_skiplp: 65 add rax, 1 66 sbb r9, [rdx+r8*8] 67 sbb rbx, [rdx+r8*8+8] 68 sbb rcx, [rdx+r8*8+16] 69 sbb r11, [rdx+r8*8+24] 70 sbb rax, rax 71 add r10, 1 72 sbb r9, [rdx+r8*8] 73 sbb rbx, [rdx+r8*8+8] 74 sbb rcx, [rdx+r8*8+16] 75 sbb r11, [rdx+r8*8+24] 76 sbb r10, r10 77 mov [rdi+r8*8], r9 78 mov [rdi+r8*8+8], rbx 79 mov [rdi+r8*8+16], rcx 80 mov [rdi+r8*8+24], r11 81L_skip: 82 cmp r8, 2 83 ja L_case0 84 jz L_case1 85 jp L_case2 86L_case3: 87 mov rcx, [rsi+r8*8+48] 88 mov r9, [rsi+r8*8+32] 89 mov rbx, [rsi+r8*8+40] 90 add rax, 1 91 sbb r9, [rdx+r8*8+32] 92 sbb rbx, [rdx+r8*8+40] 93 sbb rcx, [rdx+r8*8+48] 94 sbb rax, rax 95 add r10, 1 96 sbb r9, [rdx+r8*8+32] 97 sbb rbx, [rdx+r8*8+40] 98 sbb rcx, [rdx+r8*8+48] 99 mov [rdi+r8*8+32], r9 100 mov [rdi+r8*8+40], rbx 101 mov [rdi+r8*8+48], rcx 102 sbb rax, 0 103 neg rax 104 pop rbx 105 ret 106 align 16 107L_case2: 108 mov r9, [rsi+r8*8+32] 109 mov rbx, [rsi+r8*8+40] 110 add rax, 1 111 sbb r9, [rdx+r8*8+32] 112 sbb rbx, [rdx+r8*8+40] 113 sbb rax, rax 114 add r10, 1 115 sbb r9, [rdx+r8*8+32] 116 sbb rbx, [rdx+r8*8+40] 117 mov [rdi+r8*8+32], r9 118 mov [rdi+r8*8+40], rbx 119 sbb rax, 0 120 neg rax 121 pop rbx 122 ret 123 align 16 124L_case1: 125 mov r9, [rsi+r8*8+32] 126 add rax, 1 127 sbb r9, [rdx+r8*8+32] 128 sbb rax, rax 129 add r10, 1 130 sbb r9, [rdx+r8*8+32] 131 mov [rdi+r8*8+32], r9 132 sbb rax, 0 133 neg rax 134 pop rbx 135 ret 136 align 16 137L_case0: 138 add r10, 1 139 sbb rax, 0 140 neg rax 141 pop rbx 142 ret 143 end 144