1 /* Copyright (c) 2014, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 // Adapted from the public domain, estream code by D. Bernstein.
16 
17 #include <openssl/chacha.h>
18 
19 #include <assert.h>
20 #include <string.h>
21 
22 #include <openssl/cpu.h>
23 
24 #include "../internal.h"
25 #include "internal.h"
26 
27 
28 #define U8TO32_LITTLE(p)                              \
29   (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \
30    ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
31 
32 #define U32TO8_LITTLE(p, v)    \
33   {                            \
34     (p)[0] = (v >> 0) & 0xff;  \
35     (p)[1] = (v >> 8) & 0xff;  \
36     (p)[2] = (v >> 16) & 0xff; \
37     (p)[3] = (v >> 24) & 0xff; \
38   }
39 
40 // sigma contains the ChaCha constants, which happen to be an ASCII string.
41 static const uint8_t sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
42                                    '2', '-', 'b', 'y', 't', 'e', ' ', 'k' };
43 
44 #define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n))))
45 
46 // QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round.
47 #define QUARTERROUND(a, b, c, d)                \
48   x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a], 16); \
49   x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c], 12); \
50   x[a] += x[b]; x[d] = ROTATE(x[d] ^ x[a],  8); \
51   x[c] += x[d]; x[b] = ROTATE(x[b] ^ x[c],  7);
52 
CRYPTO_hchacha20(uint8_t out[32],const uint8_t key[32],const uint8_t nonce[16])53 void CRYPTO_hchacha20(uint8_t out[32], const uint8_t key[32],
54                       const uint8_t nonce[16]) {
55   uint32_t x[16];
56   int i;
57 
58   x[0] = U8TO32_LITTLE(sigma + 0);
59   x[1] = U8TO32_LITTLE(sigma + 4);
60   x[2] = U8TO32_LITTLE(sigma + 8);
61   x[3] = U8TO32_LITTLE(sigma + 12);
62 
63   x[4] = U8TO32_LITTLE(key + 0);
64   x[5] = U8TO32_LITTLE(key + 4);
65   x[6] = U8TO32_LITTLE(key + 8);
66   x[7] = U8TO32_LITTLE(key + 12);
67 
68   x[8] = U8TO32_LITTLE(key + 16);
69   x[9] = U8TO32_LITTLE(key + 20);
70   x[10] = U8TO32_LITTLE(key + 24);
71   x[11] = U8TO32_LITTLE(key + 28);
72 
73   x[12] = U8TO32_LITTLE(nonce + 0);
74   x[13] = U8TO32_LITTLE(nonce + 4);
75   x[14] = U8TO32_LITTLE(nonce + 8);
76   x[15] = U8TO32_LITTLE(nonce + 12);
77 
78   for (size_t i = 0; i < 20; i += 2) {
79     QUARTERROUND(0, 4, 8, 12)
80     QUARTERROUND(1, 5, 9, 13)
81     QUARTERROUND(2, 6, 10, 14)
82     QUARTERROUND(3, 7, 11, 15)
83     QUARTERROUND(0, 5, 10, 15)
84     QUARTERROUND(1, 6, 11, 12)
85     QUARTERROUND(2, 7, 8, 13)
86     QUARTERROUND(3, 4, 9, 14)
87   }
88 
89   for (i = 0; i < 4; ++i) {
90     U32TO8_LITTLE(out + 4 * i, x[i]);
91     U32TO8_LITTLE(&out[16] + 4 * i, x[12+i]);
92   }
93 }
94 
95 #if defined(CHACHA20_ASM)
96 
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)97 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
98                       const uint8_t key[32], const uint8_t nonce[12],
99                       uint32_t counter) {
100   assert(!buffers_alias(out, in_len, in, in_len) || in == out);
101 
102   uint32_t counter_nonce[4];  counter_nonce[0] = counter;
103   counter_nonce[1] = U8TO32_LITTLE(nonce + 0);
104   counter_nonce[2] = U8TO32_LITTLE(nonce + 4);
105   counter_nonce[3] = U8TO32_LITTLE(nonce + 8);
106 
107   const uint32_t *key_ptr = (const uint32_t *)key;
108 #if !defined(OPENSSL_X86) && !defined(OPENSSL_X86_64)
109   // The assembly expects the key to be four-byte aligned.
110   uint32_t key_u32[8];
111   if ((((uintptr_t)key) & 3) != 0) {
112     key_u32[0] = U8TO32_LITTLE(key + 0);
113     key_u32[1] = U8TO32_LITTLE(key + 4);
114     key_u32[2] = U8TO32_LITTLE(key + 8);
115     key_u32[3] = U8TO32_LITTLE(key + 12);
116     key_u32[4] = U8TO32_LITTLE(key + 16);
117     key_u32[5] = U8TO32_LITTLE(key + 20);
118     key_u32[6] = U8TO32_LITTLE(key + 24);
119     key_u32[7] = U8TO32_LITTLE(key + 28);
120 
121     key_ptr = key_u32;
122   }
123 #endif
124 
125   ChaCha20_ctr32(out, in, in_len, key_ptr, counter_nonce);
126 }
127 
128 #else
129 
130 // chacha_core performs 20 rounds of ChaCha on the input words in
131 // |input| and writes the 64 output bytes to |output|.
chacha_core(uint8_t output[64],const uint32_t input[16])132 static void chacha_core(uint8_t output[64], const uint32_t input[16]) {
133   uint32_t x[16];
134   int i;
135 
136   OPENSSL_memcpy(x, input, sizeof(uint32_t) * 16);
137   for (i = 20; i > 0; i -= 2) {
138     QUARTERROUND(0, 4, 8, 12)
139     QUARTERROUND(1, 5, 9, 13)
140     QUARTERROUND(2, 6, 10, 14)
141     QUARTERROUND(3, 7, 11, 15)
142     QUARTERROUND(0, 5, 10, 15)
143     QUARTERROUND(1, 6, 11, 12)
144     QUARTERROUND(2, 7, 8, 13)
145     QUARTERROUND(3, 4, 9, 14)
146   }
147 
148   for (i = 0; i < 16; ++i) {
149     x[i] += input[i];
150   }
151   for (i = 0; i < 16; ++i) {
152     U32TO8_LITTLE(output + 4 * i, x[i]);
153   }
154 }
155 
CRYPTO_chacha_20(uint8_t * out,const uint8_t * in,size_t in_len,const uint8_t key[32],const uint8_t nonce[12],uint32_t counter)156 void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
157                       const uint8_t key[32], const uint8_t nonce[12],
158                       uint32_t counter) {
159   assert(!buffers_alias(out, in_len, in, in_len) || in == out);
160 
161   uint32_t input[16];
162   uint8_t buf[64];
163   size_t todo, i;
164 
165   input[0] = U8TO32_LITTLE(sigma + 0);
166   input[1] = U8TO32_LITTLE(sigma + 4);
167   input[2] = U8TO32_LITTLE(sigma + 8);
168   input[3] = U8TO32_LITTLE(sigma + 12);
169 
170   input[4] = U8TO32_LITTLE(key + 0);
171   input[5] = U8TO32_LITTLE(key + 4);
172   input[6] = U8TO32_LITTLE(key + 8);
173   input[7] = U8TO32_LITTLE(key + 12);
174 
175   input[8] = U8TO32_LITTLE(key + 16);
176   input[9] = U8TO32_LITTLE(key + 20);
177   input[10] = U8TO32_LITTLE(key + 24);
178   input[11] = U8TO32_LITTLE(key + 28);
179 
180   input[12] = counter;
181   input[13] = U8TO32_LITTLE(nonce + 0);
182   input[14] = U8TO32_LITTLE(nonce + 4);
183   input[15] = U8TO32_LITTLE(nonce + 8);
184 
185   while (in_len > 0) {
186     todo = sizeof(buf);
187     if (in_len < todo) {
188       todo = in_len;
189     }
190 
191     chacha_core(buf, input);
192     for (i = 0; i < todo; i++) {
193       out[i] = in[i] ^ buf[i];
194     }
195 
196     out += todo;
197     in += todo;
198     in_len -= todo;
199 
200     input[12]++;
201   }
202 }
203 
204 #endif
205