1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply-add with single-word multiplier, z := z + c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18//
19//    extern uint64_t bignum_cmadd
20//     (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
21//
22// Does the "z := z + c * y" operation where y is n digits, result z is p.
23// Truncates the result in general.
24//
25// The return value is a high/carry word that is meaningful when p = n + 1, or
26// more generally when n <= p and the result fits in p + 1 digits. In these
27// cases it gives the top digit of the (p + 1)-digit result.
28//
29// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
30// Microsoft x64 ABI:   RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
31// ----------------------------------------------------------------------------
32
33#include "s2n_bignum_internal.h"
34
35        .intel_syntax noprefix
36        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmadd)
37        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmadd)
38        .text
39
40#define p rdi
41#define z rsi
42#define c r9
43#define n rcx
44#define x r8
45
46#define i r10
47#define h r11
48
49#define r rbx
50
51#define hshort r11d
52#define ishort r10d
53
54
55
56S2N_BN_SYMBOL(bignum_cmadd):
57	_CET_ENDBR
58
59#if WINDOWS_ABI
60        push    rdi
61        push    rsi
62        mov     rdi, rcx
63        mov     rsi, rdx
64        mov     rdx, r8
65        mov     rcx, r9
66        mov     r8, [rsp+56]
67#endif
68
69// Seems hard to avoid one more register
70
71        push    rbx
72
73// First clamp the input size n := min(p,n) since we can never need to read
74// past the p'th term of the input to generate p-digit output.
75// Subtract p := p - min(n,p) so it holds the size of the extra tail needed
76
77        cmp     p, n
78        cmovc   n, p
79        sub     p, n
80
81// Initialize high part h = 0; if n = 0 do nothing but return that zero
82
83        xor     h, h
84        test    n, n
85        jz      end
86
87// Move c into a safer register as multiplies overwrite rdx
88
89        mov     c, rdx
90
91// Initialization of the loop: 2^64 * CF + [h,z_0'] = z_0 + c * x_0
92
93        mov     rax, [x]
94        mul     c
95        add     [z], rax
96        mov     h, rdx
97        mov     ishort, 1
98        dec     n
99        jz      hightail
100
101// Main loop, where we always have CF + previous high part h to add in
102
103loop:
104        adc     h, [z+8*i]
105        sbb     r, r
106        mov     rax, [x+8*i]
107        mul     c
108        sub     rdx, r
109        add     rax, h
110        mov     [z+8*i], rax
111        mov     h, rdx
112        inc     i
113        dec     n
114        jnz     loop
115
116hightail:
117        adc     h, 0
118
119// Propagate the carry all the way to the end with h as extra carry word
120
121tail:
122        test    p, p
123        jz      end
124
125        add     [z+8*i], h
126        mov     hshort, 0
127        inc     i
128        dec     p
129        jz      highend
130
131tloop:
132        adc     [z+8*i], h
133        inc     i
134        dec     p
135        jnz     tloop
136
137highend:
138
139        adc     h, 0
140
141// Return the high/carry word
142
143end:
144        mov     rax, h
145
146        pop     rbx
147#if WINDOWS_ABI
148        pop    rsi
149        pop    rdi
150#endif
151        ret
152
153#if defined(__linux__) && defined(__ELF__)
154.section .note.GNU-stack,"",%progbits
155#endif
156