1 /* chacha.c
2  *
3  * Copyright (C) 2006-2021 wolfSSL Inc.
4  *
5  * This file is part of wolfSSL.
6  *
7  * wolfSSL is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * wolfSSL is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20  */
21 /*
22 
23 DESCRIPTION
24 This library contains implementation for the ChaCha20 stream cipher.
25 
26 Based from chacha-ref.c version 20080118
27 D. J. Bernstein
28 Public domain.
29 
30 */
31 #ifdef WOLFSSL_ARMASM
32     /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
33 
34 #else
35 #ifdef HAVE_CONFIG_H
36     #include <config.h>
37 #endif
38 
39 #include <wolfssl/wolfcrypt/settings.h>
40 
41 #if defined(HAVE_CHACHA) && !defined(WOLFSSL_ARMASM)
42 
43 #include <wolfssl/wolfcrypt/chacha.h>
44 #include <wolfssl/wolfcrypt/error-crypt.h>
45 #include <wolfssl/wolfcrypt/logging.h>
46 #include <wolfssl/wolfcrypt/cpuid.h>
47 #ifdef NO_INLINE
48     #include <wolfssl/wolfcrypt/misc.h>
49 #else
50     #define WOLFSSL_MISC_INCLUDED
51     #include <wolfcrypt/src/misc.c>
52 #endif
53 
54 #ifdef CHACHA_AEAD_TEST
55     #include <stdio.h>
56 #endif
57 
58 #ifdef USE_INTEL_CHACHA_SPEEDUP
59     #include <emmintrin.h>
60     #include <immintrin.h>
61 
62     #if defined(__GNUC__) && ((__GNUC__ < 4) || \
63                               (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
64         #undef  NO_AVX2_SUPPORT
65         #define NO_AVX2_SUPPORT
66     #endif
67     #if defined(__clang__) && ((__clang_major__ < 3) || \
68                                (__clang_major__ == 3 && __clang_minor__ <= 5))
69         #undef  NO_AVX2_SUPPORT
70         #define NO_AVX2_SUPPORT
71     #elif defined(__clang__) && defined(NO_AVX2_SUPPORT)
72         #undef NO_AVX2_SUPPORT
73     #endif
74 
75     #ifndef NO_AVX2_SUPPORT
76         #define HAVE_INTEL_AVX2
77     #endif
78 
79     static int cpuidFlagsSet = 0;
80     static int cpuidFlags = 0;
81 #endif
82 
83 #ifdef BIG_ENDIAN_ORDER
84     #define LITTLE32(x) ByteReverseWord32(x)
85 #else
86     #define LITTLE32(x) (x)
87 #endif
88 
89 /* Number of rounds */
90 #define ROUNDS  20
91 
92 #define U32C(v) (v##U)
93 #define U32V(v) ((word32)(v) & U32C(0xFFFFFFFF))
94 #define U8TO32_LITTLE(p) LITTLE32(((word32*)(p))[0])
95 
96 #define ROTATE(v,c) rotlFixed(v, c)
97 #define XOR(v,w)    ((v) ^ (w))
98 #define PLUS(v,w)   (U32V((v) + (w)))
99 #define PLUSONE(v)  (PLUS((v),1))
100 
101 #define QUARTERROUND(a,b,c,d) \
102   x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
103   x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
104   x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
105   x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
106 
107 
108 /**
109   * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
110   * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
111   */
wc_Chacha_SetIV(ChaCha * ctx,const byte * inIv,word32 counter)112 int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
113 {
114     word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
115 
116 
117     if (ctx == NULL || inIv == NULL)
118         return BAD_FUNC_ARG;
119 
120     XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
121 
122     ctx->left = 0; /* resets state */
123     ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter;           /* block counter */
124     ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]); /* fixed variable from nonce */
125     ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]); /* counter from nonce */
126     ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]); /* counter from nonce */
127 
128     return 0;
129 }
130 
131 /* "expand 32-byte k" as unsigned 32 byte */
132 static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
133 /* "expand 16-byte k" as unsigned 16 byte */
134 static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
135 
136 /**
137   * Key setup. 8 word iv (nonce)
138   */
wc_Chacha_SetKey(ChaCha * ctx,const byte * key,word32 keySz)139 int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
140 {
141     const word32* constants;
142     const byte*   k;
143 
144 #ifdef XSTREAM_ALIGN
145     word32 alignKey[8];
146 #endif
147 
148     if (ctx == NULL || key == NULL)
149         return BAD_FUNC_ARG;
150 
151     if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
152         return BAD_FUNC_ARG;
153 
154 #ifdef XSTREAM_ALIGN
155     if ((wc_ptr_t)key % 4) {
156         WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
157         XMEMCPY(alignKey, key, keySz);
158         k = (byte*)alignKey;
159     }
160     else {
161         k = key;
162     }
163 #else
164     k = key;
165 #endif /* XSTREAM_ALIGN */
166 
167 #ifdef CHACHA_AEAD_TEST
168     word32 i;
169     printf("ChaCha key used :\n");
170     for (i = 0; i < keySz; i++) {
171         printf("%02x", key[i]);
172         if ((i + 1) % 8 == 0)
173            printf("\n");
174     }
175     printf("\n\n");
176 #endif
177 
178     ctx->X[4] = U8TO32_LITTLE(k +  0);
179     ctx->X[5] = U8TO32_LITTLE(k +  4);
180     ctx->X[6] = U8TO32_LITTLE(k +  8);
181     ctx->X[7] = U8TO32_LITTLE(k + 12);
182     if (keySz == CHACHA_MAX_KEY_SZ) {
183         k += 16;
184         constants = sigma;
185     }
186     else {
187         constants = tau;
188     }
189     ctx->X[ 8] = U8TO32_LITTLE(k +  0);
190     ctx->X[ 9] = U8TO32_LITTLE(k +  4);
191     ctx->X[10] = U8TO32_LITTLE(k +  8);
192     ctx->X[11] = U8TO32_LITTLE(k + 12);
193     ctx->X[ 0] = constants[0];
194     ctx->X[ 1] = constants[1];
195     ctx->X[ 2] = constants[2];
196     ctx->X[ 3] = constants[3];
197     ctx->left = 0; /* resets state */
198 
199     return 0;
200 }
201 
202 /**
203   * Converts word into bytes with rotations having been done.
204   */
wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS],const word32 input[CHACHA_CHUNK_WORDS])205 static WC_INLINE void wc_Chacha_wordtobyte(word32 output[CHACHA_CHUNK_WORDS],
206     const word32 input[CHACHA_CHUNK_WORDS])
207 {
208     word32 x[CHACHA_CHUNK_WORDS];
209     word32 i;
210 
211     for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
212         x[i] = input[i];
213     }
214 
215     for (i = (ROUNDS); i > 0; i -= 2) {
216         QUARTERROUND(0, 4,  8, 12)
217         QUARTERROUND(1, 5,  9, 13)
218         QUARTERROUND(2, 6, 10, 14)
219         QUARTERROUND(3, 7, 11, 15)
220         QUARTERROUND(0, 5, 10, 15)
221         QUARTERROUND(1, 6, 11, 12)
222         QUARTERROUND(2, 7,  8, 13)
223         QUARTERROUND(3, 4,  9, 14)
224     }
225 
226     for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
227         x[i] = PLUS(x[i], input[i]);
228     }
229 
230     for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
231         output[i] = LITTLE32(x[i]);
232     }
233 }
234 
235 
236 #ifdef HAVE_XCHACHA
237 
238 /*
239  * wc_HChacha_block - half a ChaCha block, for XChaCha
240  *
241  * see https://tools.ietf.org/html/draft-arciszewski-xchacha-03
242  */
wc_HChacha_block(ChaCha * ctx,word32 stream[CHACHA_CHUNK_WORDS/2],int nrounds)243 static WC_INLINE void wc_HChacha_block(ChaCha* ctx, word32 stream[CHACHA_CHUNK_WORDS/2], int nrounds)
244 {
245     word32 x[CHACHA_CHUNK_WORDS];
246     word32 i;
247 
248     for (i = 0; i < CHACHA_CHUNK_WORDS; i++) {
249         x[i] = ctx->X[i];
250     }
251 
252     for (i = nrounds; i > 0; i -= 2) {
253         QUARTERROUND(0, 4,  8, 12)
254         QUARTERROUND(1, 5,  9, 13)
255         QUARTERROUND(2, 6, 10, 14)
256         QUARTERROUND(3, 7, 11, 15)
257         QUARTERROUND(0, 5, 10, 15)
258         QUARTERROUND(1, 6, 11, 12)
259         QUARTERROUND(2, 7,  8, 13)
260         QUARTERROUND(3, 4,  9, 14)
261     }
262 
263     for (i = 0; i < CHACHA_CHUNK_WORDS/4; ++i)
264         stream[i] = x[i];
265     for (i = CHACHA_CHUNK_WORDS/4; i < CHACHA_CHUNK_WORDS/2; ++i)
266         stream[i] = x[i + CHACHA_CHUNK_WORDS/2];
267 }
268 
269 /* XChaCha -- https://tools.ietf.org/html/draft-arciszewski-xchacha-03 */
wc_XChacha_SetKey(ChaCha * ctx,const byte * key,word32 keySz,const byte * nonce,word32 nonceSz,word32 counter)270 int wc_XChacha_SetKey(ChaCha *ctx,
271                       const byte *key, word32 keySz,
272                       const byte *nonce, word32 nonceSz,
273                       word32 counter) {
274     word32 k[CHACHA_MAX_KEY_SZ];
275     byte iv[CHACHA_IV_BYTES];
276     int ret;
277 
278     if (nonceSz != XCHACHA_NONCE_BYTES)
279         return BAD_FUNC_ARG;
280 
281     if ((ret = wc_Chacha_SetKey(ctx, key, keySz)) < 0)
282         return ret;
283 
284     /* form a first chacha IV from the first 16 bytes of the nonce.
285      * the first word is supplied in the "counter" arg, and
286      * the result is a full 128 bit nonceful IV for the one-time block
287      * crypto op that follows.
288      */
289     if ((ret = wc_Chacha_SetIV(ctx, nonce + 4, U8TO32_LITTLE(nonce))) < 0)
290         return ret;
291 
292     wc_HChacha_block(ctx, k, 20); /* 20 rounds, but keeping half the output. */
293 
294     /* the HChacha output is used as a 256 bit key for the main cipher. */
295     XMEMCPY(&ctx->X[4], k, 8 * sizeof(word32));
296 
297     /* use 8 bytes from the end of the 24 byte nonce, padded up to 12 bytes,
298      * to form the IV for the main cipher.
299      */
300     XMEMSET(iv, 0, 4);
301     XMEMCPY(iv + 4, nonce + 16, 8);
302 
303     if ((ret = wc_Chacha_SetIV(ctx, iv, counter)) < 0)
304         return ret;
305 
306     ForceZero(k, sizeof k);
307     ForceZero(iv, sizeof iv);
308 
309     return 0;
310 }
311 
312 #endif /* HAVE_XCHACHA */
313 
314 
315 #ifdef __cplusplus
316     extern "C" {
317 #endif
318 
319 extern void chacha_encrypt_x64(ChaCha* ctx, const byte* m, byte* c,
320                                word32 bytes);
321 extern void chacha_encrypt_avx1(ChaCha* ctx, const byte* m, byte* c,
322                                 word32 bytes);
323 extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
324                                 word32 bytes);
325 
326 #ifdef __cplusplus
327     }  /* extern "C" */
328 #endif
329 
330 
331 /**
332   * Encrypt a stream of bytes
333   */
wc_Chacha_encrypt_bytes(ChaCha * ctx,const byte * m,byte * c,word32 bytes)334 static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
335                                     word32 bytes)
336 {
337     byte*  output;
338     word32 temp[CHACHA_CHUNK_WORDS]; /* used to make sure aligned */
339     word32 i;
340 
341     /* handle left overs */
342     if (bytes > 0 && ctx->left > 0) {
343         wc_Chacha_wordtobyte(temp, ctx->X); /* recreate the stream */
344         output = (byte*)temp + CHACHA_CHUNK_BYTES - ctx->left;
345         for (i = 0; i < bytes && i < ctx->left; i++) {
346             c[i] = (byte)(m[i] ^ output[i]);
347         }
348         ctx->left -= i;
349 
350         /* Used up all of the stream that was left, increment the counter */
351         if (ctx->left == 0) {
352             ctx->X[CHACHA_MATRIX_CNT_IV] =
353                                           PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
354         }
355         bytes -= i;
356         c += i;
357         m += i;
358     }
359 
360     output = (byte*)temp;
361     while (bytes >= CHACHA_CHUNK_BYTES) {
362         wc_Chacha_wordtobyte(temp, ctx->X);
363         ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
364         for (i = 0; i < CHACHA_CHUNK_BYTES; ++i) {
365             c[i] = (byte)(m[i] ^ output[i]);
366         }
367         bytes -= CHACHA_CHUNK_BYTES;
368         c += CHACHA_CHUNK_BYTES;
369         m += CHACHA_CHUNK_BYTES;
370     }
371 
372     if (bytes) {
373         /* in this case there will always be some left over since bytes is less
374          * than CHACHA_CHUNK_BYTES, so do not increment counter after getting
375          * stream in order for the stream to be recreated on next call */
376         wc_Chacha_wordtobyte(temp, ctx->X);
377         for (i = 0; i < bytes; ++i) {
378             c[i] = m[i] ^ output[i];
379         }
380         ctx->left = CHACHA_CHUNK_BYTES - i;
381     }
382 }
383 
384 /**
385   * API to encrypt/decrypt a message of any size.
386   */
wc_Chacha_Process(ChaCha * ctx,byte * output,const byte * input,word32 msglen)387 int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
388                       word32 msglen)
389 {
390     if (ctx == NULL || input == NULL || output == NULL)
391         return BAD_FUNC_ARG;
392 
393 #ifdef USE_INTEL_CHACHA_SPEEDUP
394     /* handle left overs */
395     if (msglen > 0 && ctx->left > 0) {
396         byte*  out;
397         word32 i;
398 
399         out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
400         for (i = 0; i < msglen && i < ctx->left; i++) {
401             output[i] = (byte)(input[i] ^ out[i]);
402         }
403         ctx->left -= i;
404 
405         msglen -= i;
406         output += i;
407         input += i;
408     }
409 
410     if (msglen == 0) {
411         return 0;
412     }
413 
414     if (!cpuidFlagsSet) {
415         cpuidFlags = cpuid_get_flags();
416         cpuidFlagsSet = 1;
417     }
418 
419     #ifdef HAVE_INTEL_AVX2
420     if (IS_INTEL_AVX2(cpuidFlags)) {
421         SAVE_VECTOR_REGISTERS(return _svr_ret;);
422         chacha_encrypt_avx2(ctx, input, output, msglen);
423         RESTORE_VECTOR_REGISTERS();
424         return 0;
425     }
426     #endif
427     if (IS_INTEL_AVX1(cpuidFlags)) {
428         SAVE_VECTOR_REGISTERS(return _svr_ret;);
429         chacha_encrypt_avx1(ctx, input, output, msglen);
430         RESTORE_VECTOR_REGISTERS();
431         return 0;
432     }
433     else {
434         chacha_encrypt_x64(ctx, input, output, msglen);
435         return 0;
436     }
437 #endif
438     wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
439 
440     return 0;
441 }
442 
wc_Chacha_purge_current_block(ChaCha * ctx)443 void wc_Chacha_purge_current_block(ChaCha* ctx) {
444     if (ctx->left > 0) {
445         byte scratch[CHACHA_CHUNK_BYTES];
446         XMEMSET(scratch, 0, sizeof(scratch));
447         (void)wc_Chacha_Process(ctx, scratch, scratch, CHACHA_CHUNK_BYTES - ctx->left);
448     }
449 }
450 
451 #endif /* HAVE_CHACHA*/
452 
453 #endif /* WOLFSSL_ARMASM */
454