1 /* $OpenBSD: chacha-merged.c,v 1.9 2019/01/22 00:59:21 dlg Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
7 
8 #include <sys/types.h>
9 
10 #include <stdint.h>
11 
12 #if !defined(__OpenBSD__)
13 #define __bounded__(x, y, z)
14 #endif
15 
16 #define CHACHA_MINKEYLEN 	16
17 #define CHACHA_NONCELEN		8
18 #define CHACHA_CTRLEN		8
19 #define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
20 #define CHACHA_BLOCKLEN		64
21 
22 struct chacha_ctx {
23 	u_int input[16];
24 	uint8_t ks[CHACHA_BLOCKLEN];
25 	uint8_t unused;
26 };
27 
28 static inline void chacha_keysetup(struct chacha_ctx *x, const u_char *k,
29     u_int kbits)
30     __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
31 static inline void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv,
32     const u_char *ctr)
33     __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
34     __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
35 static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
36     u_char *c, u_int bytes)
37     __attribute__((__bounded__(__buffer__, 2, 4)))
38     __attribute__((__bounded__(__buffer__, 3, 4)));
39 
40 typedef unsigned char u8;
41 typedef unsigned int u32;
42 
43 typedef struct chacha_ctx chacha_ctx;
44 
45 #define U8C(v) (v##U)
46 #define U32C(v) (v##U)
47 
48 #define U8V(v) ((u8)(v) & U8C(0xFF))
49 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
50 
51 #define ROTL32(v, n) \
52   (U32V((v) << (n)) | ((v) >> (32 - (n))))
53 
54 #define U8TO32_LITTLE(p) \
55   (((u32)((p)[0])) | \
56    ((u32)((p)[1]) <<  8) | \
57    ((u32)((p)[2]) << 16) | \
58    ((u32)((p)[3]) << 24))
59 
60 #define U32TO8_LITTLE(p, v) \
61   do { \
62     (p)[0] = U8V((v)); \
63     (p)[1] = U8V((v) >>  8); \
64     (p)[2] = U8V((v) >> 16); \
65     (p)[3] = U8V((v) >> 24); \
66   } while (0)
67 
68 #define ROTATE(v,c) (ROTL32(v,c))
69 #define XOR(v,w) ((v) ^ (w))
70 #define PLUS(v,w) (U32V((v) + (w)))
71 #define PLUSONE(v) (PLUS((v),1))
72 
73 #define QUARTERROUND(a,b,c,d) \
74   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
75   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
76   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
77   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
78 
79 /* Initialise with "expand 32-byte k". */
80 static const char sigma[16] = {
81 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
82 	0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
83 };
84 
85 /* Initialise with "expand 16-byte k". */
86 static const char tau[16] = {
87 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
88 	0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
89 };
90 
91 static inline void
92 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
93 {
94 	const char *constants;
95 
96 	x->input[4] = U8TO32_LITTLE(k + 0);
97 	x->input[5] = U8TO32_LITTLE(k + 4);
98 	x->input[6] = U8TO32_LITTLE(k + 8);
99 	x->input[7] = U8TO32_LITTLE(k + 12);
100 	if (kbits == 256) { /* recommended */
101 		k += 16;
102 		constants = sigma;
103 	} else { /* kbits == 128 */
104 		constants = tau;
105 	}
106 	x->input[8] = U8TO32_LITTLE(k + 0);
107 	x->input[9] = U8TO32_LITTLE(k + 4);
108 	x->input[10] = U8TO32_LITTLE(k + 8);
109 	x->input[11] = U8TO32_LITTLE(k + 12);
110 	x->input[0] = U8TO32_LITTLE(constants + 0);
111 	x->input[1] = U8TO32_LITTLE(constants + 4);
112 	x->input[2] = U8TO32_LITTLE(constants + 8);
113 	x->input[3] = U8TO32_LITTLE(constants + 12);
114 }
115 
116 static inline void
117 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
118 {
119 	x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
120 	x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
121 	x->input[14] = U8TO32_LITTLE(iv + 0);
122 	x->input[15] = U8TO32_LITTLE(iv + 4);
123 }
124 
125 static inline void
126 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
127 {
128 	u32 x0, x1, x2, x3, x4, x5, x6, x7;
129 	u32 x8, x9, x10, x11, x12, x13, x14, x15;
130 	u32 j0, j1, j2, j3, j4, j5, j6, j7;
131 	u32 j8, j9, j10, j11, j12, j13, j14, j15;
132 	u8 *ctarget = NULL;
133 	u8 tmp[64];
134 	u_int i;
135 
136 	if (!bytes)
137 		return;
138 
139 	j0 = x->input[0];
140 	j1 = x->input[1];
141 	j2 = x->input[2];
142 	j3 = x->input[3];
143 	j4 = x->input[4];
144 	j5 = x->input[5];
145 	j6 = x->input[6];
146 	j7 = x->input[7];
147 	j8 = x->input[8];
148 	j9 = x->input[9];
149 	j10 = x->input[10];
150 	j11 = x->input[11];
151 	j12 = x->input[12];
152 	j13 = x->input[13];
153 	j14 = x->input[14];
154 	j15 = x->input[15];
155 
156 	for (;;) {
157 		if (bytes < 64) {
158 			for (i = 0; i < bytes; ++i)
159 				tmp[i] = m[i];
160 			m = tmp;
161 			ctarget = c;
162 			c = tmp;
163 		}
164 		x0 = j0;
165 		x1 = j1;
166 		x2 = j2;
167 		x3 = j3;
168 		x4 = j4;
169 		x5 = j5;
170 		x6 = j6;
171 		x7 = j7;
172 		x8 = j8;
173 		x9 = j9;
174 		x10 = j10;
175 		x11 = j11;
176 		x12 = j12;
177 		x13 = j13;
178 		x14 = j14;
179 		x15 = j15;
180 		for (i = 20; i > 0; i -= 2) {
181 			QUARTERROUND(x0, x4, x8, x12)
182 			QUARTERROUND(x1, x5, x9, x13)
183 			QUARTERROUND(x2, x6, x10, x14)
184 			QUARTERROUND(x3, x7, x11, x15)
185 			QUARTERROUND(x0, x5, x10, x15)
186 			QUARTERROUND(x1, x6, x11, x12)
187 			QUARTERROUND(x2, x7, x8, x13)
188 			QUARTERROUND(x3, x4, x9, x14)
189 		}
190 		x0 = PLUS(x0, j0);
191 		x1 = PLUS(x1, j1);
192 		x2 = PLUS(x2, j2);
193 		x3 = PLUS(x3, j3);
194 		x4 = PLUS(x4, j4);
195 		x5 = PLUS(x5, j5);
196 		x6 = PLUS(x6, j6);
197 		x7 = PLUS(x7, j7);
198 		x8 = PLUS(x8, j8);
199 		x9 = PLUS(x9, j9);
200 		x10 = PLUS(x10, j10);
201 		x11 = PLUS(x11, j11);
202 		x12 = PLUS(x12, j12);
203 		x13 = PLUS(x13, j13);
204 		x14 = PLUS(x14, j14);
205 		x15 = PLUS(x15, j15);
206 
207 		if (bytes < 64) {
208 			U32TO8_LITTLE(x->ks + 0, x0);
209 			U32TO8_LITTLE(x->ks + 4, x1);
210 			U32TO8_LITTLE(x->ks + 8, x2);
211 			U32TO8_LITTLE(x->ks + 12, x3);
212 			U32TO8_LITTLE(x->ks + 16, x4);
213 			U32TO8_LITTLE(x->ks + 20, x5);
214 			U32TO8_LITTLE(x->ks + 24, x6);
215 			U32TO8_LITTLE(x->ks + 28, x7);
216 			U32TO8_LITTLE(x->ks + 32, x8);
217 			U32TO8_LITTLE(x->ks + 36, x9);
218 			U32TO8_LITTLE(x->ks + 40, x10);
219 			U32TO8_LITTLE(x->ks + 44, x11);
220 			U32TO8_LITTLE(x->ks + 48, x12);
221 			U32TO8_LITTLE(x->ks + 52, x13);
222 			U32TO8_LITTLE(x->ks + 56, x14);
223 			U32TO8_LITTLE(x->ks + 60, x15);
224 		}
225 
226 		x0 = XOR(x0, U8TO32_LITTLE(m + 0));
227 		x1 = XOR(x1, U8TO32_LITTLE(m + 4));
228 		x2 = XOR(x2, U8TO32_LITTLE(m + 8));
229 		x3 = XOR(x3, U8TO32_LITTLE(m + 12));
230 		x4 = XOR(x4, U8TO32_LITTLE(m + 16));
231 		x5 = XOR(x5, U8TO32_LITTLE(m + 20));
232 		x6 = XOR(x6, U8TO32_LITTLE(m + 24));
233 		x7 = XOR(x7, U8TO32_LITTLE(m + 28));
234 		x8 = XOR(x8, U8TO32_LITTLE(m + 32));
235 		x9 = XOR(x9, U8TO32_LITTLE(m + 36));
236 		x10 = XOR(x10, U8TO32_LITTLE(m + 40));
237 		x11 = XOR(x11, U8TO32_LITTLE(m + 44));
238 		x12 = XOR(x12, U8TO32_LITTLE(m + 48));
239 		x13 = XOR(x13, U8TO32_LITTLE(m + 52));
240 		x14 = XOR(x14, U8TO32_LITTLE(m + 56));
241 		x15 = XOR(x15, U8TO32_LITTLE(m + 60));
242 
243 		j12 = PLUSONE(j12);
244 		if (!j12) {
245 			j13 = PLUSONE(j13);
246 			/*
247 			 * Stopping at 2^70 bytes per nonce is the user's
248 			 * responsibility.
249 			 */
250 		}
251 
252 		U32TO8_LITTLE(c + 0, x0);
253 		U32TO8_LITTLE(c + 4, x1);
254 		U32TO8_LITTLE(c + 8, x2);
255 		U32TO8_LITTLE(c + 12, x3);
256 		U32TO8_LITTLE(c + 16, x4);
257 		U32TO8_LITTLE(c + 20, x5);
258 		U32TO8_LITTLE(c + 24, x6);
259 		U32TO8_LITTLE(c + 28, x7);
260 		U32TO8_LITTLE(c + 32, x8);
261 		U32TO8_LITTLE(c + 36, x9);
262 		U32TO8_LITTLE(c + 40, x10);
263 		U32TO8_LITTLE(c + 44, x11);
264 		U32TO8_LITTLE(c + 48, x12);
265 		U32TO8_LITTLE(c + 52, x13);
266 		U32TO8_LITTLE(c + 56, x14);
267 		U32TO8_LITTLE(c + 60, x15);
268 
269 		if (bytes <= 64) {
270 			if (bytes < 64) {
271 				for (i = 0; i < bytes; ++i)
272 					ctarget[i] = c[i];
273 			}
274 			x->input[12] = j12;
275 			x->input[13] = j13;
276 			x->unused = 64 - bytes;
277 			return;
278 		}
279 		bytes -= 64;
280 		c += 64;
281 		m += 64;
282 	}
283 }
284 
285 void
286 CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32],
287     const unsigned char nonce[16])
288 {
289 	uint32_t x[16];
290 	int i;
291 
292 	x[0] = U8TO32_LITTLE(sigma + 0);
293 	x[1] = U8TO32_LITTLE(sigma + 4);
294 	x[2] = U8TO32_LITTLE(sigma + 8);
295 	x[3] = U8TO32_LITTLE(sigma + 12);
296 	x[4] = U8TO32_LITTLE(key + 0);
297 	x[5] = U8TO32_LITTLE(key + 4);
298 	x[6] = U8TO32_LITTLE(key + 8);
299 	x[7] = U8TO32_LITTLE(key + 12);
300 	x[8] = U8TO32_LITTLE(key + 16);
301 	x[9] = U8TO32_LITTLE(key + 20);
302 	x[10] = U8TO32_LITTLE(key + 24);
303 	x[11] = U8TO32_LITTLE(key + 28);
304 	x[12] = U8TO32_LITTLE(nonce + 0);
305 	x[13] = U8TO32_LITTLE(nonce + 4);
306 	x[14] = U8TO32_LITTLE(nonce + 8);
307 	x[15] = U8TO32_LITTLE(nonce + 12);
308 
309 	for (i = 20; i > 0; i -= 2) {
310 		QUARTERROUND(x[0], x[4], x[8], x[12])
311 		QUARTERROUND(x[1], x[5], x[9], x[13])
312 		QUARTERROUND(x[2], x[6], x[10], x[14])
313 		QUARTERROUND(x[3], x[7], x[11], x[15])
314 		QUARTERROUND(x[0], x[5], x[10], x[15])
315 		QUARTERROUND(x[1], x[6], x[11], x[12])
316 		QUARTERROUND(x[2], x[7], x[8], x[13])
317 		QUARTERROUND(x[3], x[4], x[9], x[14])
318 	}
319 
320 	U32TO8_LITTLE(subkey + 0, x[0]);
321 	U32TO8_LITTLE(subkey + 4, x[1]);
322 	U32TO8_LITTLE(subkey + 8, x[2]);
323 	U32TO8_LITTLE(subkey + 12, x[3]);
324 
325 	U32TO8_LITTLE(subkey + 16, x[12]);
326 	U32TO8_LITTLE(subkey + 20, x[13]);
327 	U32TO8_LITTLE(subkey + 24, x[14]);
328 	U32TO8_LITTLE(subkey + 28, x[15]);
329 }
330