1 /**
2 * @file constant_time.h
3 * @copyright
4 * Copyright (c) 2014 Cryptography Research, Inc. \n
5 * Released under the MIT License. See LICENSE.txt for license information.
6 * @author Mike Hamburg
7 *
8 * @brief Constant-time routines.
9 */
10
11 #ifndef __CONSTANT_TIME_H__
12 #define __CONSTANT_TIME_H__ 1
13
14 #include "word.h"
15 #include <string.h>
16
17 /*
18 * Constant-time operations on hopefully-compile-time-sized memory
19 * regions. Needed for flexibility / demagication: not all fields
20 * have sizes which are multiples of the vector width, necessitating
21 * a change from the Ed448 versions.
22 *
23 * These routines would be much simpler to define at the byte level,
24 * but if not vectorized they would be a significant fraction of the
25 * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
26 * signing time, vs 6% on Haswell with its fancy AVX2 vectors.
27 *
28 * If the compiler could do a good job of autovectorizing the code,
29 * we could just leave it with the byte definition. But that's unlikely
30 * on most deployed compilers, especially if you consider that pcmpeq[size]
31 * is much faster than moving a scalar to the vector unit (which is what
32 * a naive autovectorizer will do with constant_time_lookup on Intel).
33 *
34 * Instead, we're putting our trust in the loop unroller and unswitcher.
35 */
36
37
38 /**
39 * Unaligned big (vector?) register.
40 */
41 typedef struct {
42 big_register_t unaligned;
43 } __attribute__((packed)) unaligned_br_t;
44
45 /**
46 * Unaligned word register, for architectures where that matters.
47 */
48 typedef struct {
49 word_t unaligned;
50 } __attribute__((packed)) unaligned_word_t;
51
52 /**
53 * @brief Constant-time conditional swap.
54 *
55 * If doswap, then swap elem_bytes between *a and *b.
56 *
57 * *a and *b must not alias. Also, they must be at least as aligned
58 * as their sizes, if the CPU cares about that sort of thing.
59 */
60 static __inline__ void
61 __attribute__((unused,always_inline))
constant_time_cond_swap(void * __restrict__ a_,void * __restrict__ b_,word_t elem_bytes,mask_t doswap)62 constant_time_cond_swap (
63 void *__restrict__ a_,
64 void *__restrict__ b_,
65 word_t elem_bytes,
66 mask_t doswap
67 ) {
68 word_t k;
69 unsigned char *a = (unsigned char *)a_;
70 unsigned char *b = (unsigned char *)b_;
71
72 big_register_t br_mask = br_set_to_mask(doswap);
73 for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
74 if (elem_bytes % sizeof(big_register_t)) {
75 /* unaligned */
76 big_register_t xor =
77 ((unaligned_br_t*)(&a[k]))->unaligned
78 ^ ((unaligned_br_t*)(&b[k]))->unaligned;
79 xor &= br_mask;
80 ((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
81 ((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
82 } else {
83 /* aligned */
84 big_register_t xor =
85 *((big_register_t*)(&a[k]))
86 ^ *((big_register_t*)(&b[k]));
87 xor &= br_mask;
88 *((big_register_t*)(&a[k])) ^= xor;
89 *((big_register_t*)(&b[k])) ^= xor;
90 }
91 }
92
93 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
94 for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
95 if (elem_bytes % sizeof(word_t)) {
96 /* unaligned */
97 word_t xor =
98 ((unaligned_word_t*)(&a[k]))->unaligned
99 ^ ((unaligned_word_t*)(&b[k]))->unaligned;
100 xor &= doswap;
101 ((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
102 ((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
103 } else {
104 /* aligned */
105 word_t xor =
106 *((word_t*)(&a[k]))
107 ^ *((word_t*)(&b[k]));
108 xor &= doswap;
109 *((word_t*)(&a[k])) ^= xor;
110 *((word_t*)(&b[k])) ^= xor;
111 }
112 }
113 }
114
115 if (elem_bytes % sizeof(word_t)) {
116 for (; k<elem_bytes; k+=1) {
117 unsigned char xor = a[k] ^ b[k];
118 xor &= doswap;
119 a[k] ^= xor;
120 b[k] ^= xor;
121 }
122 }
123 }
124
125 /**
126 * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
127 *
128 * The table must be at least as aligned as elem_bytes. The output must be word aligned,
129 * and if the input size is vector aligned it must also be vector aligned.
130 *
131 * The table and output must not alias.
132 */
133 static __inline__ void
134 __attribute__((unused,always_inline))
constant_time_lookup(void * __restrict__ out_,const void * table_,word_t elem_bytes,word_t n_table,word_t idx)135 constant_time_lookup (
136 void *__restrict__ out_,
137 const void *table_,
138 word_t elem_bytes,
139 word_t n_table,
140 word_t idx
141 ) {
142 big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
143
144 /* Can't do pointer arithmetic on void* */
145 unsigned char *out = (unsigned char *)out_;
146 const unsigned char *table = (const unsigned char *)table_;
147 word_t j,k;
148
149 memset(out, 0, elem_bytes);
150 for (j=0; j<n_table; j++, big_i-=big_one) {
151 big_register_t br_mask = br_is_zero(big_i);
152 for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
153 if (elem_bytes % sizeof(big_register_t)) {
154 /* unaligned */
155 ((unaligned_br_t *)(out+k))->unaligned
156 |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
157 } else {
158 /* aligned */
159 *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
160 }
161 }
162
163 word_t mask = word_is_zero(idx^j);
164 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
165 for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
166 if (elem_bytes % sizeof(word_t)) {
167 /* input unaligned, output aligned */
168 *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
169 } else {
170 /* aligned */
171 *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
172 }
173 }
174 }
175
176 if (elem_bytes % sizeof(word_t)) {
177 for (; k<elem_bytes; k+=1) {
178 out[k] |= mask & table[k+j*elem_bytes];
179 }
180 }
181 }
182 }
183
184 /**
185 * @brief Constant-time equivalent of memcpy(table + elem_bytes*idx, in, elem_bytes);
186 *
187 * The table must be at least as aligned as elem_bytes. The input must be word aligned,
188 * and if the output size is vector aligned it must also be vector aligned.
189 *
190 * The table and input must not alias.
191 */
192 static __inline__ void
193 __attribute__((unused,always_inline))
constant_time_insert(void * __restrict__ table_,const void * in_,word_t elem_bytes,word_t n_table,word_t idx)194 constant_time_insert (
195 void *__restrict__ table_,
196 const void *in_,
197 word_t elem_bytes,
198 word_t n_table,
199 word_t idx
200 ) {
201 big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
202
203 /* Can't do pointer arithmetic on void* */
204 const unsigned char *in = (const unsigned char *)in_;
205 unsigned char *table = (unsigned char *)table_;
206 word_t j,k;
207
208 for (j=0; j<n_table; j++, big_i-=big_one) {
209 big_register_t br_mask = br_is_zero(big_i);
210 for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
211 if (elem_bytes % sizeof(big_register_t)) {
212 /* unaligned */
213 ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned
214 = ( ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned & ~br_mask )
215 | ( ((const unaligned_br_t *)(in+k))->unaligned & br_mask );
216 } else {
217 /* aligned */
218 *(big_register_t*)(&table[k+j*elem_bytes])
219 = ( *(big_register_t*)(&table[k+j*elem_bytes]) & ~br_mask )
220 | ( *(const big_register_t *)(in+k) & br_mask );
221 }
222 }
223
224 word_t mask = word_is_zero(idx^j);
225 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
226 for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
227 if (elem_bytes % sizeof(word_t)) {
228 /* output unaligned, input aligned */
229 ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned
230 = ( ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned & ~mask )
231 | ( *(const word_t *)(in+k) & mask );
232 } else {
233 /* aligned */
234 *(word_t*)(&table[k+j*elem_bytes])
235 = ( *(word_t*)(&table[k+j*elem_bytes]) & ~mask )
236 | ( *(const word_t *)(in+k) & mask );
237 }
238 }
239 }
240
241 if (elem_bytes % sizeof(word_t)) {
242 for (; k<elem_bytes; k+=1) {
243 table[k+j*elem_bytes]
244 = ( table[k+j*elem_bytes] & ~mask )
245 | ( in[k] & mask );
246 }
247 }
248 }
249 }
250
251 /**
252 * @brief Constant-time a = b&mask.
253 *
254 * The input and output must be at least as aligned as elem_bytes.
255 */
256 static __inline__ void
257 __attribute__((unused,always_inline))
constant_time_mask(void * a_,const void * b_,word_t elem_bytes,mask_t mask)258 constant_time_mask (
259 void * a_,
260 const void *b_,
261 word_t elem_bytes,
262 mask_t mask
263 ) {
264 unsigned char *a = (unsigned char *)a_;
265 const unsigned char *b = (const unsigned char *)b_;
266
267 word_t k;
268 big_register_t br_mask = br_set_to_mask(mask);
269 for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
270 if (elem_bytes % sizeof(big_register_t)) {
271 /* unaligned */
272 ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
273 } else {
274 /* aligned */
275 *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
276 }
277 }
278
279 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
280 for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
281 if (elem_bytes % sizeof(word_t)) {
282 /* unaligned */
283 ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
284 } else {
285 /* aligned */
286 *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
287 }
288 }
289 }
290
291 if (elem_bytes % sizeof(word_t)) {
292 for (; k<elem_bytes; k+=1) {
293 a[k] = mask & b[k];
294 }
295 }
296 }
297
298 /**
299 * @brief Constant-time a = mask ? bTrue : bFalse.
300 *
301 * The input and output must be at least as aligned as alignment_bytes
302 * or their size, whichever is smaller.
303 *
304 * Note that the output is not __restrict__, but if it overlaps either
305 * input, it must be equal and not partially overlap.
306 */
307 static __inline__ void
308 __attribute__((unused,always_inline))
constant_time_select(void * a_,const void * bFalse_,const void * bTrue_,word_t elem_bytes,mask_t mask,size_t alignment_bytes)309 constant_time_select (
310 void *a_,
311 const void *bFalse_,
312 const void *bTrue_,
313 word_t elem_bytes,
314 mask_t mask,
315 size_t alignment_bytes
316 ) {
317 unsigned char *a = (unsigned char *)a_;
318 const unsigned char *bTrue = (const unsigned char *)bTrue_;
319 const unsigned char *bFalse = (const unsigned char *)bFalse_;
320
321 alignment_bytes |= elem_bytes;
322
323 word_t k;
324 big_register_t br_mask = br_set_to_mask(mask);
325 for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
326 if (alignment_bytes % sizeof(big_register_t)) {
327 /* unaligned */
328 ((unaligned_br_t*)(&a[k]))->unaligned =
329 ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
330 | (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
331 } else {
332 /* aligned */
333 *(big_register_t *)(a+k) =
334 ( br_mask & *(const big_register_t*)(&bTrue [k]))
335 | (~br_mask & *(const big_register_t*)(&bFalse[k]));
336 }
337 }
338
339 if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
340 for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
341 if (alignment_bytes % sizeof(word_t)) {
342 /* unaligned */
343 ((unaligned_word_t*)(&a[k]))->unaligned =
344 ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
345 | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
346 } else {
347 /* aligned */
348 *(word_t *)(a+k) =
349 ( mask & *(const word_t*)(&bTrue [k]))
350 | (~mask & *(const word_t*)(&bFalse[k]));
351 }
352 }
353 }
354
355 if (elem_bytes % sizeof(word_t)) {
356 for (; k<elem_bytes; k+=1) {
357 a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
358 }
359 }
360 }
361
362 #endif /* __CONSTANT_TIME_H__ */
363