1 /* poly1305.c - Poly1305 internals and generic implementation
2 * Copyright (C) 2014,2017,2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
3 *
4 * This file is part of Libgcrypt.
5 *
6 * Libgcrypt is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser general Public License as
8 * published by the Free Software Foundation; either version 2.1 of
9 * the License, or (at your option) any later version.
10 *
11 * Libgcrypt is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <config.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 #include "types.h"
26 #include "g10lib.h"
27 #include "cipher.h"
28 #include "bufhelp.h"
29 #include "poly1305-internal.h"
30
31 #include "mpi-internal.h"
32 #include "longlong.h"
33
34
35 static const char *selftest (void);
36
37
38 #undef HAVE_ASM_POLY1305_BLOCKS
39
40
41 #undef USE_MPI_64BIT
42 #undef USE_MPI_32BIT
43 #if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_TYPE_U64)
44 # define USE_MPI_64BIT 1
45 #elif BYTES_PER_MPI_LIMB == 4
46 # define USE_MPI_32BIT 1
47 #else
48 # error please implement for this limb size.
49 #endif
50
51
52 /* USE_S390X_ASM indicates whether to enable zSeries code. */
53 #undef USE_S390X_ASM
54 #if BYTES_PER_MPI_LIMB == 8
55 # if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
56 # if defined(HAVE_GCC_INLINE_ASM_S390X)
57 # define USE_S390X_ASM 1
58 # endif /* USE_S390X_ASM */
59 # endif
60 #endif
61
62
63 #ifdef USE_S390X_ASM
64
65 #define HAVE_ASM_POLY1305_BLOCKS 1
66
67 extern unsigned int _gcry_poly1305_s390x_blocks1(void *state,
68 const byte *buf, size_t len,
69 byte high_pad);
70
71 static unsigned int
poly1305_blocks(poly1305_context_t * ctx,const byte * buf,size_t len,byte high_pad)72 poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
73 byte high_pad)
74 {
75 return _gcry_poly1305_s390x_blocks1(&ctx->state, buf, len, high_pad);
76 }
77
78 #endif /* USE_S390X_ASM */
79
80
poly1305_init(poly1305_context_t * ctx,const byte key[POLY1305_KEYLEN])81 static void poly1305_init (poly1305_context_t *ctx,
82 const byte key[POLY1305_KEYLEN])
83 {
84 POLY1305_STATE *st = &ctx->state;
85
86 ctx->leftover = 0;
87
88 st->h[0] = 0;
89 st->h[1] = 0;
90 st->h[2] = 0;
91 st->h[3] = 0;
92 st->h[4] = 0;
93
94 st->r[0] = buf_get_le32(key + 0) & 0x0fffffff;
95 st->r[1] = buf_get_le32(key + 4) & 0x0ffffffc;
96 st->r[2] = buf_get_le32(key + 8) & 0x0ffffffc;
97 st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc;
98
99 st->k[0] = buf_get_le32(key + 16);
100 st->k[1] = buf_get_le32(key + 20);
101 st->k[2] = buf_get_le32(key + 24);
102 st->k[3] = buf_get_le32(key + 28);
103 }
104
105
106 #ifdef USE_MPI_64BIT
107
108 #if defined (__aarch64__) && __GNUC__ >= 4
109
110 /* A += B (armv8/aarch64) */
111 #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
112 __asm__ ("adds %0, %3, %0\n" \
113 "adcs %1, %4, %1\n" \
114 "adc %2, %5, %2\n" \
115 : "+r" (A0), "+r" (A1), "+r" (A2) \
116 : "r" (B0), "r" (B1), "r" (B2) \
117 : "cc" )
118
119 #endif /* __aarch64__ */
120
121 #if defined (__x86_64__) && __GNUC__ >= 4
122
123 /* A += B (x86-64) */
124 #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
125 __asm__ ("addq %3, %0\n" \
126 "adcq %4, %1\n" \
127 "adcq %5, %2\n" \
128 : "+r" (A0), "+r" (A1), "+r" (A2) \
129 : "g" (B0), "g" (B1), "g" (B2) \
130 : "cc" )
131
132 #endif /* __x86_64__ */
133
134 #if defined (__powerpc__) && __GNUC__ >= 4
135
136 /* A += B (ppc64) */
137 #define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
138 __asm__ ("addc %0, %3, %0\n" \
139 "adde %1, %4, %1\n" \
140 "adde %2, %5, %2\n" \
141 : "+r" (A0), "+r" (A1), "+r" (A2) \
142 : "r" (B0), "r" (B1), "r" (B2) \
143 : "cc" )
144
145 #endif /* __powerpc__ */
146
147 #ifndef ADD_1305_64
148 /* A += B (generic, mpi) */
149 # define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \
150 u64 carry; \
151 add_ssaaaa(carry, A0, 0, A0, 0, B0); \
152 add_ssaaaa(A2, A1, A2, A1, B2, B1); \
153 add_ssaaaa(A2, A1, A2, A1, 0, carry); \
154 } while (0)
155 #endif
156
157 /* H = H * R mod 2¹³⁰-5 */
158 #define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \
159 u64 x0_lo, x0_hi, x1_lo, x1_hi; \
160 u64 t0_lo, t0_hi, t1_lo, t1_hi; \
161 \
162 /* x = a * r (partial mod 2^130-5) */ \
163 umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \
164 umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \
165 \
166 umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \
167 add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \
168 umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \
169 add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \
170 \
171 t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \
172 t1_hi = H2 * R0; /* h2 * r0 */ \
173 add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \
174 \
175 /* carry propagation */ \
176 H2 = H0 & 3; \
177 H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \
178 ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
179 } while (0)
180
181 #ifndef HAVE_ASM_POLY1305_BLOCKS
182
183 static unsigned int
poly1305_blocks(poly1305_context_t * ctx,const byte * buf,size_t len,byte high_pad)184 poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
185 byte high_pad)
186 {
187 POLY1305_STATE *st = &ctx->state;
188 u64 r0, r1, r1_mult5;
189 u64 h0, h1, h2;
190 u64 m0, m1, m2;
191
192 m2 = high_pad;
193
194 h0 = st->h[0] + ((u64)st->h[1] << 32);
195 h1 = st->h[2] + ((u64)st->h[3] << 32);
196 h2 = st->h[4];
197
198 r0 = st->r[0] + ((u64)st->r[1] << 32);
199 r1 = st->r[2] + ((u64)st->r[3] << 32);
200
201 r1_mult5 = (r1 >> 2) + r1;
202
203 m0 = buf_get_le64(buf + 0);
204 m1 = buf_get_le64(buf + 8);
205 buf += POLY1305_BLOCKSIZE;
206 len -= POLY1305_BLOCKSIZE;
207
208 while (len >= POLY1305_BLOCKSIZE)
209 {
210 /* a = h + m */
211 ADD_1305_64(h2, h1, h0, m2, m1, m0);
212
213 m0 = buf_get_le64(buf + 0);
214 m1 = buf_get_le64(buf + 8);
215
216 /* h = a * r (partial mod 2^130-5) */
217 MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5);
218
219 buf += POLY1305_BLOCKSIZE;
220 len -= POLY1305_BLOCKSIZE;
221 }
222
223 /* a = h + m */
224 ADD_1305_64(h2, h1, h0, m2, m1, m0);
225
226 /* h = a * r (partial mod 2^130-5) */
227 MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5);
228
229 st->h[0] = h0;
230 st->h[1] = h0 >> 32;
231 st->h[2] = h1;
232 st->h[3] = h1 >> 32;
233 st->h[4] = h2;
234
235 return 6 * sizeof (void *) + 18 * sizeof (u64);
236 }
237
238 #endif /* !HAVE_ASM_POLY1305_BLOCKS */
239
poly1305_final(poly1305_context_t * ctx,byte mac[POLY1305_TAGLEN])240 static unsigned int poly1305_final (poly1305_context_t *ctx,
241 byte mac[POLY1305_TAGLEN])
242 {
243 POLY1305_STATE *st = &ctx->state;
244 unsigned int burn = 0;
245 u64 u, carry;
246 u64 k0, k1;
247 u64 h0, h1;
248 u64 h2;
249
250 /* process the remaining block */
251 if (ctx->leftover)
252 {
253 ctx->buffer[ctx->leftover++] = 1;
254 if (ctx->leftover < POLY1305_BLOCKSIZE)
255 {
256 memset (&ctx->buffer[ctx->leftover], 0,
257 POLY1305_BLOCKSIZE - ctx->leftover);
258 ctx->leftover = POLY1305_BLOCKSIZE;
259 }
260 burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0);
261 }
262
263 h0 = st->h[0] + ((u64)st->h[1] << 32);
264 h1 = st->h[2] + ((u64)st->h[3] << 32);
265 h2 = st->h[4];
266
267 k0 = st->k[0] + ((u64)st->k[1] << 32);
268 k1 = st->k[2] + ((u64)st->k[3] << 32);
269
270 /* check if h is more than 2^130-5, by adding 5. */
271 add_ssaaaa(carry, u, 0, h0, 0, 5);
272 add_ssaaaa(carry, u, 0, carry, 0, h1);
273 u = (carry + h2) >> 2; /* u == 0 or 1 */
274
275 /* minus 2^130-5 ... (+5) */
276 u = (-u) & 5;
277 add_ssaaaa(h1, h0, h1, h0, 0, u);
278
279 /* add high part of key + h */
280 add_ssaaaa(h1, h0, h1, h0, k1, k0);
281 buf_put_le64(mac + 0, h0);
282 buf_put_le64(mac + 8, h1);
283
284 /* burn_stack */
285 return 4 * sizeof (void *) + 7 * sizeof (u64) + burn;
286 }
287
288 #endif /* USE_MPI_64BIT */
289
290 #ifdef USE_MPI_32BIT
291
292 #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
293
294 /* HI:LO += A * B (arm) */
295 #define UMUL_ADD_32(HI, LO, A, B) \
296 __asm__ ("umlal %1, %0, %4, %5" \
297 : "=r" (HI), "=r" (LO) \
298 : "0" (HI), "1" (LO), "r" (A), "r" (B) )
299
300 /* A += B (arm) */
301 #define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \
302 __asm__ ("adds %0, %0, %5\n" \
303 "adcs %1, %1, %6\n" \
304 "adcs %2, %2, %7\n" \
305 "adcs %3, %3, %8\n" \
306 "adc %4, %4, %9\n" \
307 : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \
308 : "r" (B0), "r" (B1), "r" (B2), "r" (B3), "r" (B4) \
309 : "cc" )
310
311 #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */
312
313 #if defined (__i386__) && __GNUC__ >= 4
314
315 /* A += B (i386) */
316 #define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \
317 __asm__ ("addl %5, %0\n" \
318 "adcl %6, %1\n" \
319 "adcl %7, %2\n" \
320 "adcl %8, %3\n" \
321 "adcl %9, %4\n" \
322 : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \
323 : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \
324 : "cc" )
325
326 #endif /* __i386__ */
327
328 #ifndef UMUL_ADD_32
329 /* HI:LO += A * B (generic, mpi) */
330 # define UMUL_ADD_32(HI, LO, A, B) do { \
331 u32 t_lo, t_hi; \
332 umul_ppmm(t_hi, t_lo, A, B); \
333 add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \
334 } while (0)
335 #endif
336
337 #ifndef ADD_1305_32
338 /* A += B (generic, mpi) */
339 # define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \
340 u32 carry0, carry1, carry2; \
341 add_ssaaaa(carry0, A0, 0, A0, 0, B0); \
342 add_ssaaaa(carry1, A1, 0, A1, 0, B1); \
343 add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \
344 add_ssaaaa(carry2, A2, 0, A2, 0, B2); \
345 add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \
346 add_ssaaaa(A4, A3, A4, A3, B4, B3); \
347 add_ssaaaa(A4, A3, A4, A3, 0, carry2); \
348 } while (0)
349 #endif
350
351 /* H = H * R mod 2¹³⁰-5 */
352 #define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \
353 R3_MULT5, R2_MULT5, R1_MULT5) do { \
354 u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \
355 u32 t0_lo, t0_hi; \
356 \
357 /* x = a * r (partial mod 2^130-5) */ \
358 umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \
359 umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \
360 umul_ppmm(x2_hi, x2_lo, H0, R2); /* h0 * r2 */ \
361 umul_ppmm(x3_hi, x3_lo, H0, R3); /* h0 * r3 */ \
362 \
363 UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \
364 UMUL_ADD_32(x1_hi, x1_lo, H1, R0); /* h1 * r0 */ \
365 UMUL_ADD_32(x2_hi, x2_lo, H1, R1); /* h1 * r1 */ \
366 UMUL_ADD_32(x3_hi, x3_lo, H1, R2); /* h1 * r2 */ \
367 \
368 UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \
369 UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \
370 UMUL_ADD_32(x2_hi, x2_lo, H2, R0); /* h2 * r0 */ \
371 UMUL_ADD_32(x3_hi, x3_lo, H2, R1); /* h2 * r1 */ \
372 \
373 UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \
374 H1 = x0_hi; \
375 UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \
376 UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \
377 UMUL_ADD_32(x3_hi, x3_lo, H3, R0); /* h3 * r0 */ \
378 \
379 t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \
380 t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \
381 add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \
382 add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \
383 t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \
384 t0_hi = H4 * R0; /* h4 * r0 */ \
385 add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \
386 \
387 /* carry propagation */ \
388 H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \
389 H4 = H4 & 3; \
390 ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \
391 } while (0)
392
393 #ifndef HAVE_ASM_POLY1305_BLOCKS
394
395 static unsigned int
poly1305_blocks(poly1305_context_t * ctx,const byte * buf,size_t len,byte high_pad)396 poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
397 byte high_pad)
398 {
399 POLY1305_STATE *st = &ctx->state;
400 u32 r1_mult5, r2_mult5, r3_mult5;
401 u32 h0, h1, h2, h3, h4;
402 u32 m0, m1, m2, m3, m4;
403
404 m4 = high_pad;
405
406 h0 = st->h[0];
407 h1 = st->h[1];
408 h2 = st->h[2];
409 h3 = st->h[3];
410 h4 = st->h[4];
411
412 r1_mult5 = (st->r[1] >> 2) + st->r[1];
413 r2_mult5 = (st->r[2] >> 2) + st->r[2];
414 r3_mult5 = (st->r[3] >> 2) + st->r[3];
415
416 while (len >= POLY1305_BLOCKSIZE)
417 {
418 m0 = buf_get_le32(buf + 0);
419 m1 = buf_get_le32(buf + 4);
420 m2 = buf_get_le32(buf + 8);
421 m3 = buf_get_le32(buf + 12);
422
423 /* a = h + m */
424 ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0);
425
426 /* h = a * r (partial mod 2^130-5) */
427 MUL_MOD_1305_32(h4, h3, h2, h1, h0,
428 st->r[3], st->r[2], st->r[1], st->r[0],
429 r3_mult5, r2_mult5, r1_mult5);
430
431 buf += POLY1305_BLOCKSIZE;
432 len -= POLY1305_BLOCKSIZE;
433 }
434
435 st->h[0] = h0;
436 st->h[1] = h1;
437 st->h[2] = h2;
438 st->h[3] = h3;
439 st->h[4] = h4;
440
441 return 6 * sizeof (void *) + 28 * sizeof (u32);
442 }
443
444 #endif /* !HAVE_ASM_POLY1305_BLOCKS */
445
poly1305_final(poly1305_context_t * ctx,byte mac[POLY1305_TAGLEN])446 static unsigned int poly1305_final (poly1305_context_t *ctx,
447 byte mac[POLY1305_TAGLEN])
448 {
449 POLY1305_STATE *st = &ctx->state;
450 unsigned int burn = 0;
451 u32 carry, tmp0, tmp1, tmp2, u;
452 u32 h4, h3, h2, h1, h0;
453
454 /* process the remaining block */
455 if (ctx->leftover)
456 {
457 ctx->buffer[ctx->leftover++] = 1;
458 if (ctx->leftover < POLY1305_BLOCKSIZE)
459 {
460 memset (&ctx->buffer[ctx->leftover], 0,
461 POLY1305_BLOCKSIZE - ctx->leftover);
462 ctx->leftover = POLY1305_BLOCKSIZE;
463 }
464 burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0);
465 }
466
467 h0 = st->h[0];
468 h1 = st->h[1];
469 h2 = st->h[2];
470 h3 = st->h[3];
471 h4 = st->h[4];
472
473 /* check if h is more than 2^130-5, by adding 5. */
474 add_ssaaaa(carry, tmp0, 0, h0, 0, 5);
475 add_ssaaaa(carry, tmp0, 0, carry, 0, h1);
476 add_ssaaaa(carry, tmp0, 0, carry, 0, h2);
477 add_ssaaaa(carry, tmp0, 0, carry, 0, h3);
478 u = (carry + h4) >> 2; /* u == 0 or 1 */
479
480 /* minus 2^130-5 ... (+5) */
481 u = (-u) & 5;
482 add_ssaaaa(carry, h0, 0, h0, 0, u);
483 add_ssaaaa(carry, h1, 0, h1, 0, carry);
484 add_ssaaaa(carry, h2, 0, h2, 0, carry);
485 add_ssaaaa(carry, h3, 0, h3, 0, carry);
486
487 /* add high part of key + h */
488 add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]);
489 add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]);
490 add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0);
491 add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]);
492 add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1);
493 add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]);
494 h3 += tmp2;
495
496 buf_put_le32(mac + 0, h0);
497 buf_put_le32(mac + 4, h1);
498 buf_put_le32(mac + 8, h2);
499 buf_put_le32(mac + 12, h3);
500
501 /* burn_stack */
502 return 4 * sizeof (void *) + 10 * sizeof (u32) + burn;
503 }
504
505 #endif /* USE_MPI_32BIT */
506
507
508 unsigned int
_gcry_poly1305_update_burn(poly1305_context_t * ctx,const byte * m,size_t bytes)509 _gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m,
510 size_t bytes)
511 {
512 unsigned int burn = 0;
513
514 /* handle leftover */
515 if (ctx->leftover)
516 {
517 size_t want = (POLY1305_BLOCKSIZE - ctx->leftover);
518 if (want > bytes)
519 want = bytes;
520 buf_cpy (ctx->buffer + ctx->leftover, m, want);
521 bytes -= want;
522 m += want;
523 ctx->leftover += want;
524 if (ctx->leftover < POLY1305_BLOCKSIZE)
525 return 0;
526 burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1);
527 ctx->leftover = 0;
528 }
529
530 /* process full blocks */
531 if (bytes >= POLY1305_BLOCKSIZE)
532 {
533 size_t nblks = bytes / POLY1305_BLOCKSIZE;
534 burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1);
535 m += nblks * POLY1305_BLOCKSIZE;
536 bytes -= nblks * POLY1305_BLOCKSIZE;
537 }
538
539 /* store leftover */
540 if (bytes)
541 {
542 buf_cpy (ctx->buffer + ctx->leftover, m, bytes);
543 ctx->leftover += bytes;
544 }
545
546 return burn;
547 }
548
549
550 void
_gcry_poly1305_update(poly1305_context_t * ctx,const byte * m,size_t bytes)551 _gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes)
552 {
553 unsigned int burn;
554
555 burn = _gcry_poly1305_update_burn (ctx, m, bytes);
556
557 if (burn)
558 _gcry_burn_stack (burn);
559 }
560
561
562 void
_gcry_poly1305_finish(poly1305_context_t * ctx,byte mac[POLY1305_TAGLEN])563 _gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN])
564 {
565 unsigned int burn;
566
567 burn = poly1305_final (ctx, mac);
568
569 _gcry_burn_stack (burn);
570 }
571
572
573 gcry_err_code_t
_gcry_poly1305_init(poly1305_context_t * ctx,const byte * key,size_t keylen)574 _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key,
575 size_t keylen)
576 {
577 static int initialized;
578 static const char *selftest_failed;
579
580 if (!initialized)
581 {
582 initialized = 1;
583 selftest_failed = selftest ();
584 if (selftest_failed)
585 log_error ("Poly1305 selftest failed (%s)\n", selftest_failed);
586 }
587
588 if (keylen != POLY1305_KEYLEN)
589 return GPG_ERR_INV_KEYLEN;
590
591 if (selftest_failed)
592 return GPG_ERR_SELFTEST_FAILED;
593
594 poly1305_init (ctx, key);
595
596 return 0;
597 }
598
599
600 static void
poly1305_auth(byte mac[POLY1305_TAGLEN],const byte * m,size_t bytes,const byte * key)601 poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes,
602 const byte * key)
603 {
604 poly1305_context_t ctx;
605
606 memset (&ctx, 0, sizeof (ctx));
607
608 _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN);
609 _gcry_poly1305_update (&ctx, m, bytes);
610 _gcry_poly1305_finish (&ctx, mac);
611
612 wipememory (&ctx, sizeof (ctx));
613 }
614
615
616 static const char *
selftest(void)617 selftest (void)
618 {
619 /* example from nacl */
620 static const byte nacl_key[POLY1305_KEYLEN] = {
621 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91,
622 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25,
623 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65,
624 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80,
625 };
626
627 static const byte nacl_msg[131] = {
628 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73,
629 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce,
630 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4,
631 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a,
632 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b,
633 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72,
634 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2,
635 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38,
636 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a,
637 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae,
638 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea,
639 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda,
640 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde,
641 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3,
642 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6,
643 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74,
644 0xe3, 0x55, 0xa5
645 };
646
647 static const byte nacl_mac[16] = {
648 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5,
649 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9
650 };
651
652 /* generates a final value of (2^130 - 2) == 3 */
653 static const byte wrap_key[POLY1305_KEYLEN] = {
654 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
655 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
656 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
657 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
658 };
659
660 static const byte wrap_msg[16] = {
661 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
662 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
663 };
664
665 static const byte wrap_mac[16] = {
666 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
667 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
668 };
669
670 /* mac of the macs of messages of length 0 to 256, where the key and messages
671 * have all their values set to the length
672 */
673 static const byte total_key[POLY1305_KEYLEN] = {
674 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
675 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9,
676 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
677 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
678 };
679
680 static const byte total_mac[16] = {
681 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd,
682 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39
683 };
684
685 poly1305_context_t ctx;
686 poly1305_context_t total_ctx;
687 byte all_key[POLY1305_KEYLEN];
688 byte all_msg[256];
689 byte mac[16];
690 size_t i, j;
691
692 memset (&ctx, 0, sizeof (ctx));
693 memset (&total_ctx, 0, sizeof (total_ctx));
694
695 memset (mac, 0, sizeof (mac));
696 poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key);
697 if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0)
698 return "Poly1305 test 1 failed.";
699
700 /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so
701 * make sure everything still works varying between them */
702 memset (mac, 0, sizeof (mac));
703 _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN);
704 _gcry_poly1305_update (&ctx, nacl_msg + 0, 32);
705 _gcry_poly1305_update (&ctx, nacl_msg + 32, 64);
706 _gcry_poly1305_update (&ctx, nacl_msg + 96, 16);
707 _gcry_poly1305_update (&ctx, nacl_msg + 112, 8);
708 _gcry_poly1305_update (&ctx, nacl_msg + 120, 4);
709 _gcry_poly1305_update (&ctx, nacl_msg + 124, 2);
710 _gcry_poly1305_update (&ctx, nacl_msg + 126, 1);
711 _gcry_poly1305_update (&ctx, nacl_msg + 127, 1);
712 _gcry_poly1305_update (&ctx, nacl_msg + 128, 1);
713 _gcry_poly1305_update (&ctx, nacl_msg + 129, 1);
714 _gcry_poly1305_update (&ctx, nacl_msg + 130, 1);
715 _gcry_poly1305_finish (&ctx, mac);
716 if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0)
717 return "Poly1305 test 2 failed.";
718
719 memset (mac, 0, sizeof (mac));
720 poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key);
721 if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0)
722 return "Poly1305 test 3 failed.";
723
724 _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN);
725 for (i = 0; i < 256; i++)
726 {
727 /* set key and message to 'i,i,i..' */
728 for (j = 0; j < sizeof (all_key); j++)
729 all_key[j] = i;
730 for (j = 0; j < i; j++)
731 all_msg[j] = i;
732 poly1305_auth (mac, all_msg, i, all_key);
733 _gcry_poly1305_update (&total_ctx, mac, 16);
734 }
735 _gcry_poly1305_finish (&total_ctx, mac);
736 if (memcmp (total_mac, mac, sizeof (total_mac)) != 0)
737 return "Poly1305 test 4 failed.";
738
739 return NULL;
740 }
741