1; PROLOGUE(mpn_subadd_n) 2 3; Copyright 2009 Jason Moxham 4; 5; Windows Conversion Copyright 2008 Brian Gladman 6; 7; This file is part of the MPIR Library. 8; 9; The MPIR Library is free software; you can redistribute it and/or modify 10; it under the terms of the GNU Lesser General Public License as published 11; by the Free Software Foundation; either version 2.1 of the License, or (at 12; your option) any later version. 13; The MPIR Library is distributed in the hope that it will be useful, but 14; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16; License for more details. 17; You should have received a copy of the GNU Lesser General Public License 18; along with the MPIR Library; see the file COPYING.LIB. If not, write 19; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20; Boston, MA 02110-1301, USA. 21; 22; mp_limb_t mpn_subadd_n(mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t) 23; rax rdi rsi rdx rcx r8 24; rax rcx rdx r8 r9 [rsp+40] 25 26%include "yasm_mac.inc" 27 28%define reg_save_list rbx, rbp, rsi, rdi 29 30 CPU Athlon64 31 BITS 64 32 33 FRAME_PROC mpn_subadd_n, 0, reg_save_list 34 mov rbx, qword [rsp+stack_use+40] 35 lea rdi, [rcx+rbx*8-56] 36 lea rsi, [rdx+rbx*8-56] 37 lea rdx, [r8+rbx*8-56] 38 lea rcx, [r9+rbx*8-56] 39 mov r9, 3 40 xor rax, rax 41 xor r10, r10 42 sub r9, rbx 43 jge .3 44 add r9, 4 45 mov rbp, [rsi+r9*8+16] 46 mov r11, [rsi+r9*8+24] 47 mov r8, [rsi+r9*8] 48 mov rbx, [rsi+r9*8+8] 49 jc .2 50 51 xalign 16 52.1: add rax, 1 53 sbb r8, [rcx+r9*8] 54 sbb rbx, [rcx+r9*8+8] 55 sbb rbp, [rcx+r9*8+16] 56 sbb r11, [rcx+r9*8+24] 57 sbb rax, rax 58 add r10, 1 59 sbb r8, [rdx+r9*8] 60 sbb rbx, [rdx+r9*8+8] 61 sbb rbp, [rdx+r9*8+16] 62 sbb r11, [rdx+r9*8+24] 63 sbb r10, r10 64 mov [rdi+r9*8], r8 65 mov [rdi+r9*8+24], r11 66 mov [rdi+r9*8+8], rbx 67 mov [rdi+r9*8+16], rbp 68 mov rbp, [rsi+r9*8+48] 69 mov r11, [rsi+r9*8+56] 70 add r9, 4 71 mov r8, [rsi+r9*8] 72 mov rbx, [rsi+r9*8+8] 73 jnc .1 74.2: add rax, 1 75 sbb r8, [rcx+r9*8] 76 sbb rbx, [rcx+r9*8+8] 77 sbb rbp, [rcx+r9*8+16] 78 sbb r11, [rcx+r9*8+24] 79 sbb rax, rax 80 add r10, 1 81 sbb r8, [rdx+r9*8] 82 sbb rbx, [rdx+r9*8+8] 83 sbb rbp, [rdx+r9*8+16] 84 sbb r11, [rdx+r9*8+24] 85 sbb r10, r10 86 mov [rdi+r9*8], r8 87 mov [rdi+r9*8+24], r11 88 mov [rdi+r9*8+8], rbx 89 mov [rdi+r9*8+16], rbp 90.3: cmp r9, 2 91 ja .7 92 jz .6 93 jp .5 94.4: mov rbp, [rsi+r9*8+48] 95 mov r8, [rsi+r9*8+32] 96 mov rbx, [rsi+r9*8+40] 97 add rax, 1 98 sbb r8, [rcx+r9*8+32] 99 sbb rbx, [rcx+r9*8+40] 100 sbb rbp, [rcx+r9*8+48] 101 sbb rax, rax 102 add r10, 1 103 sbb r8, [rdx+r9*8+32] 104 sbb rbx, [rdx+r9*8+40] 105 sbb rbp, [rdx+r9*8+48] 106 mov [rdi+r9*8+32], r8 107 mov [rdi+r9*8+40], rbx 108 mov [rdi+r9*8+48], rbp 109 sbb rax, 0 110 neg rax 111 EXIT_PROC reg_save_list 112 113 xalign 16 114.5: mov r8, [rsi+r9*8+32] 115 mov rbx, [rsi+r9*8+40] 116 add rax, 1 117 sbb r8, [rcx+r9*8+32] 118 sbb rbx, [rcx+r9*8+40] 119 sbb rax, rax 120 add r10, 1 121 sbb r8, [rdx+r9*8+32] 122 sbb rbx, [rdx+r9*8+40] 123 mov [rdi+r9*8+32], r8 124 mov [rdi+r9*8+40], rbx 125 sbb rax, 0 126 neg rax 127 EXIT_PROC reg_save_list 128 129 xalign 16 130.6: mov r8, [rsi+r9*8+32] 131 add rax, 1 132 sbb r8, [rcx+r9*8+32] 133 sbb rax, rax 134 add r10, 1 135 sbb r8, [rdx+r9*8+32] 136 mov [rdi+r9*8+32], r8 137 sbb rax, 0 138 neg rax 139 EXIT_PROC reg_save_list 140 141 xalign 16 142.7: add rax, r10 143 neg rax 144.8: END_PROC reg_save_list 145 146 end 147