1 2; AMD64 mpn_diveby3 3; Copyright 2009 Jason Moxham 4; This file is part of the MPIR Library. 5; The MPIR Library is free software; you can redistribute it and/or modify 6; it under the terms of the GNU Lesser General Public License as published 7; by the Free Software Foundation; either version 2.1 of the License, or (at 8; your option) any later version. 9; The MPIR Library is distributed in the hope that it will be useful, but 10; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 12; License for more details. 13; You should have received a copy of the GNU Lesser General Public License 14; along with the MPIR Library; see the file COPYING.LIB. If not, write 15; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 16; Boston, MA 02110-1301, USA. 17 18; (rdi, rdx) = (rsi, rdx) rcx = carry in 19; rax = carry out 20; NOTE could pass 55555...555 as next param so this would 21; be mpn_divexact_by_ff_over_c , and change imul at end , or 22; drop backwards compatibilty and just dump the two imuls 23 24%include 'yasm_mac.inc' 25 26 BITS 64 27 28 GLOBAL_FUNC mpn_divexact_by3c 29 mov r9d, 3 30 lea rsi, [rsi+rdx*8-24] 31 lea rdi, [rdi+rdx*8-24] 32 mov r8, 0x5555555555555555 33 imul rcx, r8 34 sub r9, rdx 35 jnc skiploop 36 align 16 37loop1: 38 mov rax, [rsi+r9*8] 39 mul r8 40 sub rcx, rax 41 mov [rdi+r9*8], rcx 42 sbb rcx, rdx 43 mov rax, [rsi+r9*8+8] 44 mul r8 45 sub rcx, rax 46 mov [rdi+r9*8+8], rcx 47 sbb rcx, rdx 48 mov rax, [rsi+r9*8+16] 49 mul r8 50 sub rcx, rax 51 mov [rdi+r9*8+16], rcx 52 sbb rcx, rdx 53 mov rax, [rsi+r9*8+24] 54 mul r8 55 sub rcx, rax 56 mov [rdi+r9*8+24], rcx 57 sbb rcx, rdx 58 add r9, 4 59 jnc loop1 60skiploop: 61 test r9, 2 62 jnz skip 63 mov rax, [rsi+r9*8] 64 mul r8 65 sub rcx, rax 66 mov [rdi+r9*8], rcx 67 sbb rcx, rdx 68 mov rax, [rsi+r9*8+8] 69 mul r8 70 sub rcx, rax 71 mov [rdi+r9*8+8], rcx 72 sbb rcx, rdx 73 add r9, 2 74skip: 75 test r9, 1 76 jnz end 77 mov rax, [rsi+r9*8] 78 mul r8 79 sub rcx, rax 80 mov [rdi+r9*8], rcx 81 sbb rcx, rdx 82end: 83 ; below is the same as imul rax, rcx, -3 84 lea rax, [rcx+rcx*2] 85 neg rax 86 ret 87