1 /* $OpenBSD: chacha-merged.c,v 1.9 2019/01/22 00:59:21 dlg Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
7 
8 #include <sys/types.h>
9 
10 #include <stdint.h>
11 
12 #define CHACHA_MINKEYLEN 	16
13 #define CHACHA_NONCELEN		8
14 #define CHACHA_CTRLEN		8
15 #define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
16 #define CHACHA_BLOCKLEN		64
17 
18 struct chacha_ctx {
19 	u_int input[16];
20 	uint8_t ks[CHACHA_BLOCKLEN];
21 	uint8_t unused;
22 };
23 
24 static inline void chacha_keysetup(struct chacha_ctx *x, const u_char *k,
25     u_int kbits)
26     __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
27 static inline void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv,
28     const u_char *ctr)
29     __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
30     __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
31 static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
32     u_char *c, u_int bytes)
33     __attribute__((__bounded__(__buffer__, 2, 4)))
34     __attribute__((__bounded__(__buffer__, 3, 4)));
35 
36 typedef unsigned char u8;
37 typedef unsigned int u32;
38 
39 typedef struct chacha_ctx chacha_ctx;
40 
41 #define U8C(v) (v##U)
42 #define U32C(v) (v##U)
43 
44 #define U8V(v) ((u8)(v) & U8C(0xFF))
45 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
46 
47 #define ROTL32(v, n) \
48   (U32V((v) << (n)) | ((v) >> (32 - (n))))
49 
50 #define U8TO32_LITTLE(p) \
51   (((u32)((p)[0])) | \
52    ((u32)((p)[1]) <<  8) | \
53    ((u32)((p)[2]) << 16) | \
54    ((u32)((p)[3]) << 24))
55 
56 #define U32TO8_LITTLE(p, v) \
57   do { \
58     (p)[0] = U8V((v)); \
59     (p)[1] = U8V((v) >>  8); \
60     (p)[2] = U8V((v) >> 16); \
61     (p)[3] = U8V((v) >> 24); \
62   } while (0)
63 
64 #define ROTATE(v,c) (ROTL32(v,c))
65 #define XOR(v,w) ((v) ^ (w))
66 #define PLUS(v,w) (U32V((v) + (w)))
67 #define PLUSONE(v) (PLUS((v),1))
68 
69 #define QUARTERROUND(a,b,c,d) \
70   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
71   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
72   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
73   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
74 
75 /* Initialise with "expand 32-byte k". */
76 static const char sigma[16] = {
77 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
78 	0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
79 };
80 
81 /* Initialise with "expand 16-byte k". */
82 static const char tau[16] = {
83 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
84 	0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
85 };
86 
87 static inline void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)88 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
89 {
90 	const char *constants;
91 
92 	x->input[4] = U8TO32_LITTLE(k + 0);
93 	x->input[5] = U8TO32_LITTLE(k + 4);
94 	x->input[6] = U8TO32_LITTLE(k + 8);
95 	x->input[7] = U8TO32_LITTLE(k + 12);
96 	if (kbits == 256) { /* recommended */
97 		k += 16;
98 		constants = sigma;
99 	} else { /* kbits == 128 */
100 		constants = tau;
101 	}
102 	x->input[8] = U8TO32_LITTLE(k + 0);
103 	x->input[9] = U8TO32_LITTLE(k + 4);
104 	x->input[10] = U8TO32_LITTLE(k + 8);
105 	x->input[11] = U8TO32_LITTLE(k + 12);
106 	x->input[0] = U8TO32_LITTLE(constants + 0);
107 	x->input[1] = U8TO32_LITTLE(constants + 4);
108 	x->input[2] = U8TO32_LITTLE(constants + 8);
109 	x->input[3] = U8TO32_LITTLE(constants + 12);
110 }
111 
112 static inline void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)113 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
114 {
115 	x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
116 	x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
117 	x->input[14] = U8TO32_LITTLE(iv + 0);
118 	x->input[15] = U8TO32_LITTLE(iv + 4);
119 }
120 
121 static inline void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)122 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
123 {
124 	u32 x0, x1, x2, x3, x4, x5, x6, x7;
125 	u32 x8, x9, x10, x11, x12, x13, x14, x15;
126 	u32 j0, j1, j2, j3, j4, j5, j6, j7;
127 	u32 j8, j9, j10, j11, j12, j13, j14, j15;
128 	u8 *ctarget = NULL;
129 	u8 tmp[64];
130 	u_int i;
131 
132 	if (!bytes)
133 		return;
134 
135 	j0 = x->input[0];
136 	j1 = x->input[1];
137 	j2 = x->input[2];
138 	j3 = x->input[3];
139 	j4 = x->input[4];
140 	j5 = x->input[5];
141 	j6 = x->input[6];
142 	j7 = x->input[7];
143 	j8 = x->input[8];
144 	j9 = x->input[9];
145 	j10 = x->input[10];
146 	j11 = x->input[11];
147 	j12 = x->input[12];
148 	j13 = x->input[13];
149 	j14 = x->input[14];
150 	j15 = x->input[15];
151 
152 	for (;;) {
153 		if (bytes < 64) {
154 			for (i = 0; i < bytes; ++i)
155 				tmp[i] = m[i];
156 			m = tmp;
157 			ctarget = c;
158 			c = tmp;
159 		}
160 		x0 = j0;
161 		x1 = j1;
162 		x2 = j2;
163 		x3 = j3;
164 		x4 = j4;
165 		x5 = j5;
166 		x6 = j6;
167 		x7 = j7;
168 		x8 = j8;
169 		x9 = j9;
170 		x10 = j10;
171 		x11 = j11;
172 		x12 = j12;
173 		x13 = j13;
174 		x14 = j14;
175 		x15 = j15;
176 		for (i = 20; i > 0; i -= 2) {
177 			QUARTERROUND(x0, x4, x8, x12)
178 			QUARTERROUND(x1, x5, x9, x13)
179 			QUARTERROUND(x2, x6, x10, x14)
180 			QUARTERROUND(x3, x7, x11, x15)
181 			QUARTERROUND(x0, x5, x10, x15)
182 			QUARTERROUND(x1, x6, x11, x12)
183 			QUARTERROUND(x2, x7, x8, x13)
184 			QUARTERROUND(x3, x4, x9, x14)
185 		}
186 		x0 = PLUS(x0, j0);
187 		x1 = PLUS(x1, j1);
188 		x2 = PLUS(x2, j2);
189 		x3 = PLUS(x3, j3);
190 		x4 = PLUS(x4, j4);
191 		x5 = PLUS(x5, j5);
192 		x6 = PLUS(x6, j6);
193 		x7 = PLUS(x7, j7);
194 		x8 = PLUS(x8, j8);
195 		x9 = PLUS(x9, j9);
196 		x10 = PLUS(x10, j10);
197 		x11 = PLUS(x11, j11);
198 		x12 = PLUS(x12, j12);
199 		x13 = PLUS(x13, j13);
200 		x14 = PLUS(x14, j14);
201 		x15 = PLUS(x15, j15);
202 
203 		if (bytes < 64) {
204 			U32TO8_LITTLE(x->ks + 0, x0);
205 			U32TO8_LITTLE(x->ks + 4, x1);
206 			U32TO8_LITTLE(x->ks + 8, x2);
207 			U32TO8_LITTLE(x->ks + 12, x3);
208 			U32TO8_LITTLE(x->ks + 16, x4);
209 			U32TO8_LITTLE(x->ks + 20, x5);
210 			U32TO8_LITTLE(x->ks + 24, x6);
211 			U32TO8_LITTLE(x->ks + 28, x7);
212 			U32TO8_LITTLE(x->ks + 32, x8);
213 			U32TO8_LITTLE(x->ks + 36, x9);
214 			U32TO8_LITTLE(x->ks + 40, x10);
215 			U32TO8_LITTLE(x->ks + 44, x11);
216 			U32TO8_LITTLE(x->ks + 48, x12);
217 			U32TO8_LITTLE(x->ks + 52, x13);
218 			U32TO8_LITTLE(x->ks + 56, x14);
219 			U32TO8_LITTLE(x->ks + 60, x15);
220 		}
221 
222 		x0 = XOR(x0, U8TO32_LITTLE(m + 0));
223 		x1 = XOR(x1, U8TO32_LITTLE(m + 4));
224 		x2 = XOR(x2, U8TO32_LITTLE(m + 8));
225 		x3 = XOR(x3, U8TO32_LITTLE(m + 12));
226 		x4 = XOR(x4, U8TO32_LITTLE(m + 16));
227 		x5 = XOR(x5, U8TO32_LITTLE(m + 20));
228 		x6 = XOR(x6, U8TO32_LITTLE(m + 24));
229 		x7 = XOR(x7, U8TO32_LITTLE(m + 28));
230 		x8 = XOR(x8, U8TO32_LITTLE(m + 32));
231 		x9 = XOR(x9, U8TO32_LITTLE(m + 36));
232 		x10 = XOR(x10, U8TO32_LITTLE(m + 40));
233 		x11 = XOR(x11, U8TO32_LITTLE(m + 44));
234 		x12 = XOR(x12, U8TO32_LITTLE(m + 48));
235 		x13 = XOR(x13, U8TO32_LITTLE(m + 52));
236 		x14 = XOR(x14, U8TO32_LITTLE(m + 56));
237 		x15 = XOR(x15, U8TO32_LITTLE(m + 60));
238 
239 		j12 = PLUSONE(j12);
240 		if (!j12) {
241 			j13 = PLUSONE(j13);
242 			/*
243 			 * Stopping at 2^70 bytes per nonce is the user's
244 			 * responsibility.
245 			 */
246 		}
247 
248 		U32TO8_LITTLE(c + 0, x0);
249 		U32TO8_LITTLE(c + 4, x1);
250 		U32TO8_LITTLE(c + 8, x2);
251 		U32TO8_LITTLE(c + 12, x3);
252 		U32TO8_LITTLE(c + 16, x4);
253 		U32TO8_LITTLE(c + 20, x5);
254 		U32TO8_LITTLE(c + 24, x6);
255 		U32TO8_LITTLE(c + 28, x7);
256 		U32TO8_LITTLE(c + 32, x8);
257 		U32TO8_LITTLE(c + 36, x9);
258 		U32TO8_LITTLE(c + 40, x10);
259 		U32TO8_LITTLE(c + 44, x11);
260 		U32TO8_LITTLE(c + 48, x12);
261 		U32TO8_LITTLE(c + 52, x13);
262 		U32TO8_LITTLE(c + 56, x14);
263 		U32TO8_LITTLE(c + 60, x15);
264 
265 		if (bytes <= 64) {
266 			if (bytes < 64) {
267 				for (i = 0; i < bytes; ++i)
268 					ctarget[i] = c[i];
269 			}
270 			x->input[12] = j12;
271 			x->input[13] = j13;
272 			x->unused = 64 - bytes;
273 			return;
274 		}
275 		bytes -= 64;
276 		c += 64;
277 		m += 64;
278 	}
279 }
280 
281 void
CRYPTO_hchacha_20(unsigned char subkey[32],const unsigned char key[32],const unsigned char nonce[16])282 CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32],
283     const unsigned char nonce[16])
284 {
285 	uint32_t x[16];
286 	int i;
287 
288 	x[0] = U8TO32_LITTLE(sigma + 0);
289 	x[1] = U8TO32_LITTLE(sigma + 4);
290 	x[2] = U8TO32_LITTLE(sigma + 8);
291 	x[3] = U8TO32_LITTLE(sigma + 12);
292 	x[4] = U8TO32_LITTLE(key + 0);
293 	x[5] = U8TO32_LITTLE(key + 4);
294 	x[6] = U8TO32_LITTLE(key + 8);
295 	x[7] = U8TO32_LITTLE(key + 12);
296 	x[8] = U8TO32_LITTLE(key + 16);
297 	x[9] = U8TO32_LITTLE(key + 20);
298 	x[10] = U8TO32_LITTLE(key + 24);
299 	x[11] = U8TO32_LITTLE(key + 28);
300 	x[12] = U8TO32_LITTLE(nonce + 0);
301 	x[13] = U8TO32_LITTLE(nonce + 4);
302 	x[14] = U8TO32_LITTLE(nonce + 8);
303 	x[15] = U8TO32_LITTLE(nonce + 12);
304 
305 	for (i = 20; i > 0; i -= 2) {
306 		QUARTERROUND(x[0], x[4], x[8], x[12])
307 		QUARTERROUND(x[1], x[5], x[9], x[13])
308 		QUARTERROUND(x[2], x[6], x[10], x[14])
309 		QUARTERROUND(x[3], x[7], x[11], x[15])
310 		QUARTERROUND(x[0], x[5], x[10], x[15])
311 		QUARTERROUND(x[1], x[6], x[11], x[12])
312 		QUARTERROUND(x[2], x[7], x[8], x[13])
313 		QUARTERROUND(x[3], x[4], x[9], x[14])
314 	}
315 
316 	U32TO8_LITTLE(subkey + 0, x[0]);
317 	U32TO8_LITTLE(subkey + 4, x[1]);
318 	U32TO8_LITTLE(subkey + 8, x[2]);
319 	U32TO8_LITTLE(subkey + 12, x[3]);
320 
321 	U32TO8_LITTLE(subkey + 16, x[12]);
322 	U32TO8_LITTLE(subkey + 20, x[13]);
323 	U32TO8_LITTLE(subkey + 24, x[14]);
324 	U32TO8_LITTLE(subkey + 28, x[15]);
325 }
326