1 /* chacha.c
2 *
3 * Copyright (C) 2006-2021 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21 /*
22
23 DESCRIPTION
24 This library contains implementation for the ChaCha20 stream cipher.
25
26 Based from chacha-ref.c version 20080118
27 D. J. Bernstein
28 Public domain.
29
30 */
31 #ifdef WOLFSSL_ARMASM
32 /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
33
34 #else
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
38
39 #include <wolfssl/wolfcrypt/settings.h>
40
41 #if defined(HAVE_CHACHA) && !defined(WOLFSSL_ARMASM)
42
43 #include <wolfssl/wolfcrypt/chacha.h>
44 #include <wolfssl/wolfcrypt/error-crypt.h>
45 #include <wolfssl/wolfcrypt/logging.h>
46 #include <wolfssl/wolfcrypt/cpuid.h>
47 #ifdef NO_INLINE
48 #include <wolfssl/wolfcrypt/misc.h>
49 #else
50 #define WOLFSSL_MISC_INCLUDED
51 #include <wolfcrypt/src/misc.c>
52 #endif
53
54 #ifdef CHACHA_AEAD_TEST
55 #include <stdio.h>
56 #endif
57
58 #ifdef USE_INTEL_CHACHA_SPEEDUP
59 #include <emmintrin.h>
60 #include <immintrin.h>
61
62 #if defined(__GNUC__) && ((__GNUC__ < 4) || \
63 (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
64 #undef NO_AVX2_SUPPORT
65 #define NO_AVX2_SUPPORT
66 #endif
67 #if defined(__clang__) && ((__clang_major__ < 3) || \
68 (__clang_major__ == 3 && __clang_minor__ <= 5))
69 #undef NO_AVX2_SUPPORT
70 #define NO_AVX2_SUPPORT
71 #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
72 #undef NO_AVX2_SUPPORT
73 #endif
74
75 #ifndef NO_AVX2_SUPPORT
76 #define HAVE_INTEL_AVX2
77 #endif
78
79 static int cpuidFlagsSet = 0;
80 static int cpuidFlags = 0;
81 #endif
82
83 #ifdef BIG_ENDIAN_ORDER
84 #define LITTLE32(x) ByteReverseWord32(x)
85 #else
86 #define LITTLE32(x) (x)
87 #endif
88
89 /* Number of rounds */
90 #define ROUNDS 20
91
92 #define U32C(v) (v##U)
93 #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
94 #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
95
96 #define ROTATE(v,c) rotlFixed(v, c)
97 #define XOR(v,w) ((v) ^ (w))
98 #define PLUS(v,w) (U32V((v) + (w)))
99 #define PLUSONE(v) (PLUS((v),1))
100
101 #define QUARTERROUND(a,b,c,d) \
102 x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
103 x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
104 x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
105 x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
106
107
108 /**
109 * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
110 * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
111 */
wc_Chacha_SetIV(ChaCha * ctx,const byte * inIv,word32 counter)112 int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
113 {
114 word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
115
116
117 if (ctx == NULL || inIv == NULL)
118 return BAD_FUNC_ARG;
119
120 XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
121
122 ctx->left = 0; /* resets state */
123 ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter; /* block counter */
124 ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
125 ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
126 ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
127
128 return 0;
129 }
130
131 /* "expand 32-byte k" as unsigned 32 byte */
132 static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
133 /* "expand 16-byte k" as unsigned 16 byte */
134 static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
135
136 /**
137 * Key setup. 8 word iv (nonce)
138 */
wc_Chacha_SetKey(ChaCha * ctx,const byte * key,word32 keySz)139 int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
140 {
141 const word32* constants;
142 const byte* k;
143
144 #ifdef XSTREAM_ALIGN
145 word32 alignKey[8];
146 #endif
147
148 if (ctx == NULL || key == NULL)
149 return BAD_FUNC_ARG;
150
151 if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
152 return BAD_FUNC_ARG;
153
154 #ifdef XSTREAM_ALIGN
155 if ((wc_ptr_t)key % 4) {
156 WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
157 XMEMCPY(alignKey, key, keySz);
158 k = (byte*)alignKey;
159 }
160 else {
161 k = key;
162 }
163 #else
164 k = key;
165 #endif /* XSTREAM_ALIGN */
166
167 #ifdef CHACHA_AEAD_TEST
168 word32 i;
169 printf("ChaCha key used :\n");
170 for (i = 0; i < keySz; i++) {
171 printf("%02x", key[i]);
172 if ((i + 1) % 8 == 0)
173 printf("\n");
174 }
175 printf("\n\n");
176 #endif
177
178 ctx->X[4] = U8TO32_LITTLE(k + 0);
179 ctx->X[5] = U8TO32_LITTLE(k + 4);
180 ctx->X[6] = U8TO32_LITTLE(k + 8);
181 ctx->X[7] = U8TO32_LITTLE(k + 12);
182 if (keySz == CHACHA_MAX_KEY_SZ) {
183 k += 16;
184 constants = sigma;
185 }
186 else {
187 constants = tau;
188 }
189 ctx->X[ 8] = U8TO32_LITTLE(k + 0);
190 ctx->X[ 9] = U8TO32_LITTLE(k + 4);
191 ctx->X[10] = U8TO32_LITTLE(k + 8);
192 ctx->X[11] = U8TO32_LITTLE(k + 12);
193 ctx->X[ 0] = constants[0];
194 ctx->X[ 1] = constants[1];
195 ctx->X[ 2] = constants[2];
196 ctx->X[ 3] = constants[3];
197 ctx->left = 0; /* resets state */
198
199 return 0;
200 }
201
202 /**
203 * Converts word into bytes with rotations having been done.
204 */
wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS],const word32 input[CHACHA_CHUNK_WORDS])205 static WC_INLINE void wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS],
206 const word32 input[CHACHA_CHUNK_WORDS])
207 {
208 word32 x[CHACHA_CHUNK_WORDS];
209 word32 i;
210
211 for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
212 x[i] = input[i];
213 }
214
215 for (i = (ROUNDS); i > 0; i -= 2) {
216 QUARTERROUND(0, 4, 8, 12)
217 QUARTERROUND(1, 5, 9, 13)
218 QUARTERROUND(2, 6, 10, 14)
219 QUARTERROUND(3, 7, 11, 15)
220 QUARTERROUND(0, 5, 10, 15)
221 QUARTERROUND(1, 6, 11, 12)
222 QUARTERROUND(2, 7, 8, 13)
223 QUARTERROUND(3, 4, 9, 14)
224 }
225
226 for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
227 x[i] = PLUS(x[i], input[i]);
228 }
229
230 for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
231 output[i] = LITTLE32(x[i]);
232 }
233 }
234
235
236 #ifdef HAVE_XCHACHA
237
238 /*
239 * wc_HChacha_block - half a ChaCha block, for XChaCha
240 *
241 * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
242 */
wc_HChacha_block(ChaCha * ctx,word32 stream[CHACHA_CHUNK_WORDS/2],int nrounds)243 static WC_INLINE void wc_HChacha_block(ChaCha* ctx, word32 stream[CHACHA_CHUNK_WORDS/2], int nrounds)
244 {
245 word32 x[CHACHA_CHUNK_WORDS];
246 word32 i;
247
248 for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
249 x[i] = ctx->X[i];
250 }
251
252 for (i = nrounds; i > 0; i -= 2) {
253 QUARTERROUND(0, 4, 8, 12)
254 QUARTERROUND(1, 5, 9, 13)
255 QUARTERROUND(2, 6, 10, 14)
256 QUARTERROUND(3, 7, 11, 15)
257 QUARTERROUND(0, 5, 10, 15)
258 QUARTERROUND(1, 6, 11, 12)
259 QUARTERROUND(2, 7, 8, 13)
260 QUARTERROUND(3, 4, 9, 14)
261 }
262
263 for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
264 stream[i] = x[i];
265 for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
266 stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
267 }
268
269 /* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
wc_XChacha_SetKey(ChaCha * ctx,const byte * key,word32 keySz,const byte * nonce,word32 nonceSz,word32 counter)270 int wc_XChacha_SetKey(ChaCha *ctx,
271 const byte *key, word32 keySz,
272 const byte *nonce, word32 nonceSz,
273 word32 counter) {
274 word32 k[CHACHA_MAX_KEY_SZ];
275 byte iv[CHACHA_IV_BYTES];
276 int ret;
277
278 if (nonceSz != XCHACHA_NONCE_BYTES)
279 return BAD_FUNC_ARG;
280
281 if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
282 return ret;
283
284 /* form a first chacha IV from the first 16 bytes of the nonce.
285 * the first word is supplied in the "counter" arg, and
286 * the result is a full 128 bit nonceful IV for the one-time block
287 * crypto op that follows.
288 */
289 if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
290 return ret;
291
292 wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
293
294 /* the HChacha output is used as a 256 bit key for the main cipher. */
295 XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
296
297 /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
298 * to form the IV for the main cipher.
299 */
300 XMEMSET(iv, 0, 4);
301 XMEMCPY(iv + 4, nonce + 16, 8);
302
303 if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
304 return ret;
305
306 ForceZero(k, sizeof k);
307 ForceZero(iv, sizeof iv);
308
309 return 0;
310 }
311
312 #endif /* HAVE_XCHACHA */
313
314
315 #ifdef __cplusplus
316 extern "C" {
317 #endif
318
319 extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
320 word32 bytes);
321 extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
322 word32 bytes);
323 extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
324 word32 bytes);
325
326 #ifdef __cplusplus
327 } /* extern "C" */
328 #endif
329
330
331 /**
332 * Encrypt a stream of bytes
333 */
wc_Chacha_encrypt_bytes(ChaCha * ctx,const byte * m,byte * c,word32 bytes)334 static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
335 word32 bytes)
336 {
337 byte* output;
338 word32 temp[CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
339 word32 i;
340
341 /* handle left overs */
342 if (bytes > 0 && ctx->left > 0) {
343 wc_Chacha_wordtobyte(temp, ctx->X); /* recreate the stream */
344 output = (byte*)temp + CHACHA_CHUNK_BYTES - ctx->left;
345 for (i = 0; i < bytes && i < ctx->left; i++) {
346 c[i] = (byte)(m[i] ^ output[i]);
347 }
348 ctx->left -= i;
349
350 /* Used up all of the stream that was left, increment the counter */
351 if (ctx->left == 0) {
352 ctx->X[CHACHA_MATRIX_CNT_IV] =
353 PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
354 }
355 bytes -= i;
356 c += i;
357 m += i;
358 }
359
360 output = (byte*)temp;
361 while (bytes >= CHACHA_CHUNK_BYTES) {
362 wc_Chacha_wordtobyte(temp, ctx->X);
363 ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
364 for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
365 c[i] = (byte)(m[i] ^ output[i]);
366 }
367 bytes -= CHACHA_CHUNK_BYTES;
368 c += CHACHA_CHUNK_BYTES;
369 m += CHACHA_CHUNK_BYTES;
370 }
371
372 if (bytes) {
373 /* in this case there will always be some left over since bytes is less
374 * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
375 * stream in order for the stream to be recreated on next call */
376 wc_Chacha_wordtobyte(temp, ctx->X);
377 for (i = 0; i < bytes; ++i) {
378 c[i] = m[i] ^ output[i];
379 }
380 ctx->left = CHACHA_CHUNK_BYTES - i;
381 }
382 }
383
384 /**
385 * API to encrypt/decrypt a message of any size.
386 */
wc_Chacha_Process(ChaCha * ctx,byte * output,const byte * input,word32 msglen)387 int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
388 word32 msglen)
389 {
390 if (ctx == NULL || input == NULL || output == NULL)
391 return BAD_FUNC_ARG;
392
393 #ifdef USE_INTEL_CHACHA_SPEEDUP
394 /* handle left overs */
395 if (msglen > 0 && ctx->left > 0) {
396 byte* out;
397 word32 i;
398
399 out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
400 for (i = 0; i < msglen && i < ctx->left; i++) {
401 output[i] = (byte)(input[i] ^ out[i]);
402 }
403 ctx->left -= i;
404
405 msglen -= i;
406 output += i;
407 input += i;
408 }
409
410 if (msglen == 0) {
411 return 0;
412 }
413
414 if (!cpuidFlagsSet) {
415 cpuidFlags = cpuid_get_flags();
416 cpuidFlagsSet = 1;
417 }
418
419 #ifdef HAVE_INTEL_AVX2
420 if (IS_INTEL_AVX2(cpuidFlags)) {
421 SAVE_VECTOR_REGISTERS(return _svr_ret;);
422 chacha_encrypt_avx2(ctx, input, output, msglen);
423 RESTORE_VECTOR_REGISTERS();
424 return 0;
425 }
426 #endif
427 if (IS_INTEL_AVX1(cpuidFlags)) {
428 SAVE_VECTOR_REGISTERS(return _svr_ret;);
429 chacha_encrypt_avx1(ctx, input, output, msglen);
430 RESTORE_VECTOR_REGISTERS();
431 return 0;
432 }
433 else {
434 chacha_encrypt_x64(ctx, input, output, msglen);
435 return 0;
436 }
437 #endif
438 wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
439
440 return 0;
441 }
442
wc_Chacha_purge_current_block(ChaCha * ctx)443 void wc_Chacha_purge_current_block(ChaCha* ctx) {
444 if (ctx->left > 0) {
445 byte scratch[CHACHA_CHUNK_BYTES];
446 XMEMSET(scratch, 0, sizeof(scratch));
447 (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
448 }
449 }
450
451 #endif /* HAVE_CHACHA*/
452
453 #endif /* WOLFSSL_ARMASM */
454