xref: /openbsd/lib/libcrypto/bn/arch/amd64/bignum_sub.S (revision 22787c51)
1e0bd33c5Sjsing// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
23b6dd4a6Sjsing//
33b6dd4a6Sjsing// Permission to use, copy, modify, and/or distribute this software for any
43b6dd4a6Sjsing// purpose with or without fee is hereby granted, provided that the above
53b6dd4a6Sjsing// copyright notice and this permission notice appear in all copies.
63b6dd4a6Sjsing//
73b6dd4a6Sjsing// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
83b6dd4a6Sjsing// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
93b6dd4a6Sjsing// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
103b6dd4a6Sjsing// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
113b6dd4a6Sjsing// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
123b6dd4a6Sjsing// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
133b6dd4a6Sjsing// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14e0bd33c5Sjsing
15e0bd33c5Sjsing// ----------------------------------------------------------------------------
16e0bd33c5Sjsing// Subtract, z := x - y
17e0bd33c5Sjsing// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18e0bd33c5Sjsing//
19e0bd33c5Sjsing//    extern uint64_t bignum_sub
20e0bd33c5Sjsing//     (uint64_t p, uint64_t *z,
21e0bd33c5Sjsing//      uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22e0bd33c5Sjsing//
23e0bd33c5Sjsing// Does the z := x - y operation, truncating modulo p words in general and
24e0bd33c5Sjsing// returning a top borrow (0 or 1) in the p'th place, only subtracting input
25e0bd33c5Sjsing// words below p (as well as m and n respectively) to get the diff and borrow.
26e0bd33c5Sjsing//
27e0bd33c5Sjsing// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
28e0bd33c5Sjsing// Microsoft x64 ABI:   RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
29e0bd33c5Sjsing// ----------------------------------------------------------------------------
30e0bd33c5Sjsing
31999c1c14Sjsing#include "s2n_bignum_internal.h"
32e0bd33c5Sjsing
33e0bd33c5Sjsing        .intel_syntax noprefix
34e0bd33c5Sjsing        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub)
35e0bd33c5Sjsing        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub)
36e0bd33c5Sjsing        .text
37e0bd33c5Sjsing
38e0bd33c5Sjsing#define p rdi
39e0bd33c5Sjsing#define z rsi
40e0bd33c5Sjsing#define m rdx
41e0bd33c5Sjsing#define x rcx
42e0bd33c5Sjsing#define n r8
43e0bd33c5Sjsing#define y r9
44e0bd33c5Sjsing#define i r10
45e0bd33c5Sjsing#define a rax
46e0bd33c5Sjsing
47e0bd33c5Sjsing#define ashort eax
48e0bd33c5Sjsing
49e0bd33c5Sjsing
50e0bd33c5Sjsing
51e0bd33c5SjsingS2N_BN_SYMBOL(bignum_sub):
52*22787c51Stb	_CET_ENDBR
53e0bd33c5Sjsing
54e0bd33c5Sjsing#if WINDOWS_ABI
55e0bd33c5Sjsing        push    rdi
56e0bd33c5Sjsing        push    rsi
57e0bd33c5Sjsing        mov     rdi, rcx
58e0bd33c5Sjsing        mov     rsi, rdx
59e0bd33c5Sjsing        mov     rdx, r8
60e0bd33c5Sjsing        mov     rcx, r9
61e0bd33c5Sjsing        mov     r8, [rsp+56]
62e0bd33c5Sjsing        mov     r9, [rsp+64]
63e0bd33c5Sjsing#endif
64e0bd33c5Sjsing
65e0bd33c5Sjsing// Zero the main index counter for both branches
66e0bd33c5Sjsing
67e0bd33c5Sjsing        xor     i, i
68e0bd33c5Sjsing
69e0bd33c5Sjsing// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
70e0bd33c5Sjsing// we'll never need words past the p'th. Can now assume m <= p and n <= p.
71e0bd33c5Sjsing// Then compare the modified m and n and branch accordingly
72e0bd33c5Sjsing
73e0bd33c5Sjsing        cmp     p, m
74e0bd33c5Sjsing        cmovc   m, p
75e0bd33c5Sjsing        cmp     p, n
76e0bd33c5Sjsing        cmovc   n, p
77e0bd33c5Sjsing        cmp     m, n
78e0bd33c5Sjsing        jc      ylonger
79e0bd33c5Sjsing
80e0bd33c5Sjsing// The case where x is longer or of the same size (p >= m >= n)
81e0bd33c5Sjsing
82e0bd33c5Sjsing        sub     p, m
83e0bd33c5Sjsing        sub     m, n
84e0bd33c5Sjsing        inc     m
85e0bd33c5Sjsing        test    n, n
86e0bd33c5Sjsing        jz      xtest
87e0bd33c5Sjsingxmainloop:
88e0bd33c5Sjsing        mov     a, [x+8*i]
89e0bd33c5Sjsing        sbb     a, [y+8*i]
90e0bd33c5Sjsing        mov     [z+8*i],a
91e0bd33c5Sjsing        inc     i
92e0bd33c5Sjsing        dec     n
93e0bd33c5Sjsing        jnz     xmainloop
94e0bd33c5Sjsing        jmp     xtest
95e0bd33c5Sjsingxtoploop:
96e0bd33c5Sjsing        mov     a, [x+8*i]
97e0bd33c5Sjsing        sbb     a, 0
98e0bd33c5Sjsing        mov     [z+8*i],a
99e0bd33c5Sjsing        inc     i
100e0bd33c5Sjsingxtest:
101e0bd33c5Sjsing        dec     m
102e0bd33c5Sjsing        jnz     xtoploop
103e0bd33c5Sjsing        sbb     a, a
104e0bd33c5Sjsing        test    p, p
105e0bd33c5Sjsing        jz      tailskip
106e0bd33c5Sjsingtailloop:
107e0bd33c5Sjsing        mov     [z+8*i],a
108e0bd33c5Sjsing        inc     i
109e0bd33c5Sjsing        dec     p
110e0bd33c5Sjsing        jnz     tailloop
111e0bd33c5Sjsingtailskip:
112e0bd33c5Sjsing        neg     a
113e0bd33c5Sjsing#if WINDOWS_ABI
114e0bd33c5Sjsing        pop    rsi
115e0bd33c5Sjsing        pop    rdi
116e0bd33c5Sjsing#endif
117e0bd33c5Sjsing        ret
118e0bd33c5Sjsing
119e0bd33c5Sjsing// The case where y is longer (p >= n > m)
120e0bd33c5Sjsing
121e0bd33c5Sjsingylonger:
122e0bd33c5Sjsing
123e0bd33c5Sjsing        sub     p, n
124e0bd33c5Sjsing        sub     n, m
125e0bd33c5Sjsing        test    m, m
126e0bd33c5Sjsing        jz      ytoploop
127e0bd33c5Sjsingymainloop:
128e0bd33c5Sjsing        mov     a, [x+8*i]
129e0bd33c5Sjsing        sbb     a, [y+8*i]
130e0bd33c5Sjsing        mov     [z+8*i],a
131e0bd33c5Sjsing        inc     i
132e0bd33c5Sjsing        dec     m
133e0bd33c5Sjsing        jnz     ymainloop
134e0bd33c5Sjsingytoploop:
135e0bd33c5Sjsing        mov     ashort, 0
136e0bd33c5Sjsing        sbb     a, [y+8*i]
137e0bd33c5Sjsing        mov     [z+8*i],a
138e0bd33c5Sjsing        inc     i
139e0bd33c5Sjsing        dec     n
140e0bd33c5Sjsing        jnz     ytoploop
141e0bd33c5Sjsing        sbb     a, a
142e0bd33c5Sjsing        test    p, p
143e0bd33c5Sjsing        jnz     tailloop
144e0bd33c5Sjsing        neg     a
145e0bd33c5Sjsing#if WINDOWS_ABI
146e0bd33c5Sjsing        pop    rsi
147e0bd33c5Sjsing        pop    rdi
148e0bd33c5Sjsing#endif
149e0bd33c5Sjsing        ret
150e0bd33c5Sjsing
151e0bd33c5Sjsing#if defined(__linux__) && defined(__ELF__)
152e0bd33c5Sjsing.section .note.GNU-stack,"",%progbits
153e0bd33c5Sjsing#endif
154