1 #include "ed25519-donna-portable-identify.h"
2
3 #define mul32x32_64(a,b) (((uint64_t)(a))*(b))
4
5 /* platform */
6 #if defined(COMPILER_MSVC)
7 #include <intrin.h>
8 #if !defined(_DEBUG)
9 #undef mul32x32_64
10 #define mul32x32_64(a,b) __emulu(a,b)
11 #endif
12 #undef inline
13 #define inline __forceinline
14 #define DONNA_INLINE __forceinline
15 #define DONNA_NOINLINE __declspec(noinline)
16 #define ALIGN(x) __declspec(align(x))
17 #define ROTL32(a,b) _rotl(a,b)
18 #define ROTR32(a,b) _rotr(a,b)
19 #else
20 #include <sys/param.h>
21 #define DONNA_INLINE inline __attribute__((always_inline))
22 #define DONNA_NOINLINE __attribute__((noinline))
23 /* Tor: OSX pollutes the global namespace with an ALIGN macro. */
24 #undef ALIGN
25 #define ALIGN(x) __attribute__((aligned(x)))
26 #define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
27 #define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
28 #endif
29
30 /* uint128_t */
31 #if defined(CPU_64BITS) && !defined(ED25519_FORCE_32BIT)
32 #if defined(COMPILER_CLANG) && (COMPILER_CLANG >= 30100)
33 #define HAVE_NATIVE_UINT128
34 typedef unsigned __int128 uint128_t;
35 #elif defined(COMPILER_MSVC)
36 #define HAVE_UINT128
37 typedef struct uint128_t {
38 uint64_t lo, hi;
39 } uint128_t;
40 #define mul64x64_128(out,a,b) out.lo = _umul128(a,b,&out.hi);
41 #define shr128_pair(out,hi,lo,shift) out = __shiftright128(lo, hi, shift);
42 #define shl128_pair(out,hi,lo,shift) out = __shiftleft128(lo, hi, shift);
43 #define shr128(out,in,shift) shr128_pair(out, in.hi, in.lo, shift)
44 #define shl128(out,in,shift) shl128_pair(out, in.hi, in.lo, shift)
45 #define add128(a,b) { uint64_t p = a.lo; a.lo += b.lo; a.hi += b.hi + (a.lo < p); }
46 #define add128_64(a,b) { uint64_t p = a.lo; a.lo += b; a.hi += (a.lo < p); }
47 #define lo128(a) (a.lo)
48 #define hi128(a) (a.hi)
49 #elif defined(COMPILER_GCC) && !defined(HAVE_NATIVE_UINT128)
50 #if defined(__SIZEOF_INT128__)
51 #define HAVE_NATIVE_UINT128
52 typedef unsigned __int128 uint128_t;
53 #elif (COMPILER_GCC >= 40400)
54 #define HAVE_NATIVE_UINT128
55 typedef unsigned uint128_t __attribute__((mode(TI)));
56 #elif defined(CPU_X86_64)
57 #define HAVE_UINT128
58 typedef struct uint128_t {
59 uint64_t lo, hi;
60 } uint128_t;
61 #define mul64x64_128(out,a,b) __asm__ ("mulq %3" : "=a" (out.lo), "=d" (out.hi) : "a" (a), "rm" (b));
62 #define shr128_pair(out,hi,lo,shift) __asm__ ("shrdq %2,%1,%0" : "+r" (lo) : "r" (hi), "J" (shift)); out = lo;
63 #define shl128_pair(out,hi,lo,shift) __asm__ ("shldq %2,%1,%0" : "+r" (hi) : "r" (lo), "J" (shift)); out = hi;
64 #define shr128(out,in,shift) shr128_pair(out,in.hi, in.lo, shift)
65 #define shl128(out,in,shift) shl128_pair(out,in.hi, in.lo, shift)
66 #define add128(a,b) __asm__ ("addq %4,%2; adcq %5,%3" : "=r" (a.hi), "=r" (a.lo) : "1" (a.lo), "0" (a.hi), "rm" (b.lo), "rm" (b.hi) : "cc");
67 #define add128_64(a,b) __asm__ ("addq %4,%2; adcq $0,%3" : "=r" (a.hi), "=r" (a.lo) : "1" (a.lo), "0" (a.hi), "rm" (b) : "cc");
68 #define lo128(a) (a.lo)
69 #define hi128(a) (a.hi)
70 #endif
71 #endif
72
73 #if defined(HAVE_NATIVE_UINT128)
74 #define HAVE_UINT128
75 #define mul64x64_128(out,a,b) out = (uint128_t)a * b;
76 #define shr128_pair(out,hi,lo,shift) out = (uint64_t)((((uint128_t)hi << 64) | lo) >> (shift));
77 #define shl128_pair(out,hi,lo,shift) out = (uint64_t)(((((uint128_t)hi << 64) | lo) << (shift)) >> 64);
78 #define shr128(out,in,shift) out = (uint64_t)(in >> (shift));
79 #define shl128(out,in,shift) out = (uint64_t)((in << shift) >> 64);
80 #define add128(a,b) a += b;
81 #define add128_64(a,b) a += (uint64_t)b;
82 #define lo128(a) ((uint64_t)a)
83 #define hi128(a) ((uint64_t)(a >> 64))
84 #endif
85
86 #if !defined(HAVE_UINT128)
87 #error Need a uint128_t implementation!
88 #endif
89 #endif
90
91 /* endian */
92 #if !defined(ED25519_OPENSSLRNG)
U32TO8_LE(unsigned char * p,const uint32_t v)93 static inline void U32TO8_LE(unsigned char *p, const uint32_t v) {
94 p[0] = (unsigned char)(v );
95 p[1] = (unsigned char)(v >> 8);
96 p[2] = (unsigned char)(v >> 16);
97 p[3] = (unsigned char)(v >> 24);
98 }
99 #endif
100
101 #if !defined(HAVE_UINT128)
U8TO32_LE(const unsigned char * p)102 static inline uint32_t U8TO32_LE(const unsigned char *p) {
103 return
104 (((uint32_t)(p[0]) ) |
105 ((uint32_t)(p[1]) << 8) |
106 ((uint32_t)(p[2]) << 16) |
107 ((uint32_t)(p[3]) << 24));
108 }
109 #else
U8TO64_LE(const unsigned char * p)110 static inline uint64_t U8TO64_LE(const unsigned char *p) {
111 return
112 (((uint64_t)(p[0]) ) |
113 ((uint64_t)(p[1]) << 8) |
114 ((uint64_t)(p[2]) << 16) |
115 ((uint64_t)(p[3]) << 24) |
116 ((uint64_t)(p[4]) << 32) |
117 ((uint64_t)(p[5]) << 40) |
118 ((uint64_t)(p[6]) << 48) |
119 ((uint64_t)(p[7]) << 56));
120 }
121
U64TO8_LE(unsigned char * p,const uint64_t v)122 static inline void U64TO8_LE(unsigned char *p, const uint64_t v) {
123 p[0] = (unsigned char)(v );
124 p[1] = (unsigned char)(v >> 8);
125 p[2] = (unsigned char)(v >> 16);
126 p[3] = (unsigned char)(v >> 24);
127 p[4] = (unsigned char)(v >> 32);
128 p[5] = (unsigned char)(v >> 40);
129 p[6] = (unsigned char)(v >> 48);
130 p[7] = (unsigned char)(v >> 56);
131 }
132 #endif
133
134 /* Tor: Detect and disable inline assembly when clang's AddressSanitizer
135 * is present, due to compilation failing because it runs out of registers.
136 *
137 * The alternative is to annotate `ge25519_scalarmult_base_choose_niels`
138 * and selectively disable AddressSanitizer insturmentation, however doing
139 * things this way results in a "more sanitized" binary.
140 */
141 #if defined(__has_feature)
142 #if __has_feature(address_sanitizer)
143 #define ED25519_NO_INLINE_ASM
144 #endif
145 #endif
146
147 /* Tor: Force enable SSE2 on 32 bit x86 systems if the compile target
148 * architecture supports it. This is not done on x86-64 as the non-SSE2
149 * code benchmarks better, at least on Haswell.
150 */
151 #if defined(__SSE2__) && !defined(CPU_X86_64)
152 /* undef in case it's manually specified... */
153 #undef ED25519_SSE2
154 #define ED25519_SSE2
155 #endif
156
157 /* Tor: GCC's Stack Protector freaks out and produces variable length
158 * buffer warnings when alignment is requested that is greater than
159 * STACK_BOUNDARY (x86 has special code to deal with this for SSE2).
160 *
161 * Since the only reason things are 16 byte aligned in the first place
162 * is for SSE2, only request variable alignment for SSE2 builds.
163 *
164 * See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59674
165 */
166 #if !defined(ED25519_SSE2)
167 #undef ALIGN
168 #define ALIGN(x)
169 #endif
170
171 #include <stdlib.h>
172 #include <string.h>
173
174
175