1e0bd33c5Sjsing// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 23b6dd4a6Sjsing// 33b6dd4a6Sjsing// Permission to use, copy, modify, and/or distribute this software for any 43b6dd4a6Sjsing// purpose with or without fee is hereby granted, provided that the above 53b6dd4a6Sjsing// copyright notice and this permission notice appear in all copies. 63b6dd4a6Sjsing// 73b6dd4a6Sjsing// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 83b6dd4a6Sjsing// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 93b6dd4a6Sjsing// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 103b6dd4a6Sjsing// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 113b6dd4a6Sjsing// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 123b6dd4a6Sjsing// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 133b6dd4a6Sjsing// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14e0bd33c5Sjsing 15e0bd33c5Sjsing// ---------------------------------------------------------------------------- 16e0bd33c5Sjsing// Subtract, z := x - y 17e0bd33c5Sjsing// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 18e0bd33c5Sjsing// 19e0bd33c5Sjsing// extern uint64_t bignum_sub 20e0bd33c5Sjsing// (uint64_t p, uint64_t *z, 21e0bd33c5Sjsing// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 22e0bd33c5Sjsing// 23e0bd33c5Sjsing// Does the z := x - y operation, truncating modulo p words in general and 24e0bd33c5Sjsing// returning a top borrow (0 or 1) in the p'th place, only subtracting input 25e0bd33c5Sjsing// words below p (as well as m and n respectively) to get the diff and borrow. 26e0bd33c5Sjsing// 27e0bd33c5Sjsing// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX 28e0bd33c5Sjsing// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX 29e0bd33c5Sjsing// ---------------------------------------------------------------------------- 30e0bd33c5Sjsing 31999c1c14Sjsing#include "s2n_bignum_internal.h" 32e0bd33c5Sjsing 33e0bd33c5Sjsing .intel_syntax noprefix 34e0bd33c5Sjsing S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub) 35e0bd33c5Sjsing S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub) 36e0bd33c5Sjsing .text 37e0bd33c5Sjsing 38e0bd33c5Sjsing#define p rdi 39e0bd33c5Sjsing#define z rsi 40e0bd33c5Sjsing#define m rdx 41e0bd33c5Sjsing#define x rcx 42e0bd33c5Sjsing#define n r8 43e0bd33c5Sjsing#define y r9 44e0bd33c5Sjsing#define i r10 45e0bd33c5Sjsing#define a rax 46e0bd33c5Sjsing 47e0bd33c5Sjsing#define ashort eax 48e0bd33c5Sjsing 49e0bd33c5Sjsing 50e0bd33c5Sjsing 51e0bd33c5SjsingS2N_BN_SYMBOL(bignum_sub): 52*22787c51Stb _CET_ENDBR 53e0bd33c5Sjsing 54e0bd33c5Sjsing#if WINDOWS_ABI 55e0bd33c5Sjsing push rdi 56e0bd33c5Sjsing push rsi 57e0bd33c5Sjsing mov rdi, rcx 58e0bd33c5Sjsing mov rsi, rdx 59e0bd33c5Sjsing mov rdx, r8 60e0bd33c5Sjsing mov rcx, r9 61e0bd33c5Sjsing mov r8, [rsp+56] 62e0bd33c5Sjsing mov r9, [rsp+64] 63e0bd33c5Sjsing#endif 64e0bd33c5Sjsing 65e0bd33c5Sjsing// Zero the main index counter for both branches 66e0bd33c5Sjsing 67e0bd33c5Sjsing xor i, i 68e0bd33c5Sjsing 69e0bd33c5Sjsing// First clamp the two input sizes m := min(p,m) and n := min(p,n) since 70e0bd33c5Sjsing// we'll never need words past the p'th. Can now assume m <= p and n <= p. 71e0bd33c5Sjsing// Then compare the modified m and n and branch accordingly 72e0bd33c5Sjsing 73e0bd33c5Sjsing cmp p, m 74e0bd33c5Sjsing cmovc m, p 75e0bd33c5Sjsing cmp p, n 76e0bd33c5Sjsing cmovc n, p 77e0bd33c5Sjsing cmp m, n 78e0bd33c5Sjsing jc ylonger 79e0bd33c5Sjsing 80e0bd33c5Sjsing// The case where x is longer or of the same size (p >= m >= n) 81e0bd33c5Sjsing 82e0bd33c5Sjsing sub p, m 83e0bd33c5Sjsing sub m, n 84e0bd33c5Sjsing inc m 85e0bd33c5Sjsing test n, n 86e0bd33c5Sjsing jz xtest 87e0bd33c5Sjsingxmainloop: 88e0bd33c5Sjsing mov a, [x+8*i] 89e0bd33c5Sjsing sbb a, [y+8*i] 90e0bd33c5Sjsing mov [z+8*i],a 91e0bd33c5Sjsing inc i 92e0bd33c5Sjsing dec n 93e0bd33c5Sjsing jnz xmainloop 94e0bd33c5Sjsing jmp xtest 95e0bd33c5Sjsingxtoploop: 96e0bd33c5Sjsing mov a, [x+8*i] 97e0bd33c5Sjsing sbb a, 0 98e0bd33c5Sjsing mov [z+8*i],a 99e0bd33c5Sjsing inc i 100e0bd33c5Sjsingxtest: 101e0bd33c5Sjsing dec m 102e0bd33c5Sjsing jnz xtoploop 103e0bd33c5Sjsing sbb a, a 104e0bd33c5Sjsing test p, p 105e0bd33c5Sjsing jz tailskip 106e0bd33c5Sjsingtailloop: 107e0bd33c5Sjsing mov [z+8*i],a 108e0bd33c5Sjsing inc i 109e0bd33c5Sjsing dec p 110e0bd33c5Sjsing jnz tailloop 111e0bd33c5Sjsingtailskip: 112e0bd33c5Sjsing neg a 113e0bd33c5Sjsing#if WINDOWS_ABI 114e0bd33c5Sjsing pop rsi 115e0bd33c5Sjsing pop rdi 116e0bd33c5Sjsing#endif 117e0bd33c5Sjsing ret 118e0bd33c5Sjsing 119e0bd33c5Sjsing// The case where y is longer (p >= n > m) 120e0bd33c5Sjsing 121e0bd33c5Sjsingylonger: 122e0bd33c5Sjsing 123e0bd33c5Sjsing sub p, n 124e0bd33c5Sjsing sub n, m 125e0bd33c5Sjsing test m, m 126e0bd33c5Sjsing jz ytoploop 127e0bd33c5Sjsingymainloop: 128e0bd33c5Sjsing mov a, [x+8*i] 129e0bd33c5Sjsing sbb a, [y+8*i] 130e0bd33c5Sjsing mov [z+8*i],a 131e0bd33c5Sjsing inc i 132e0bd33c5Sjsing dec m 133e0bd33c5Sjsing jnz ymainloop 134e0bd33c5Sjsingytoploop: 135e0bd33c5Sjsing mov ashort, 0 136e0bd33c5Sjsing sbb a, [y+8*i] 137e0bd33c5Sjsing mov [z+8*i],a 138e0bd33c5Sjsing inc i 139e0bd33c5Sjsing dec n 140e0bd33c5Sjsing jnz ytoploop 141e0bd33c5Sjsing sbb a, a 142e0bd33c5Sjsing test p, p 143e0bd33c5Sjsing jnz tailloop 144e0bd33c5Sjsing neg a 145e0bd33c5Sjsing#if WINDOWS_ABI 146e0bd33c5Sjsing pop rsi 147e0bd33c5Sjsing pop rdi 148e0bd33c5Sjsing#endif 149e0bd33c5Sjsing ret 150e0bd33c5Sjsing 151e0bd33c5Sjsing#if defined(__linux__) && defined(__ELF__) 152e0bd33c5Sjsing.section .note.GNU-stack,"",%progbits 153e0bd33c5Sjsing#endif 154