1 /* $OpenBSD: chacha-merged.c,v 1.10 2021/10/22 17:43:00 tb Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
7 
8 #include <stdint.h>
9 
10 #if !defined(__OpenBSD__)
11 #define __bounded__(x, y, z)
12 #endif
13 
14 #define CHACHA_MINKEYLEN 	16
15 #define CHACHA_NONCELEN		8
16 #define CHACHA_CTRLEN		8
17 #define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
18 #define CHACHA_BLOCKLEN		64
19 
20 typedef uint8_t u8;
21 typedef uint32_t u32;
22 
23 struct chacha_ctx {
24 	u32 input[16];
25 	u8 ks[CHACHA_BLOCKLEN];
26 	u8 unused;
27 };
28 
29 static inline void chacha_keysetup(struct chacha_ctx *x, const u8 *k, u32 kbits)
30     __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
31 static inline void chacha_ivsetup(struct chacha_ctx *x, const u8 *iv,
32     const u8 *ctr)
33     __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
34     __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
35 static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u8 *m,
36     u8 *c, u32 bytes)
37     __attribute__((__bounded__(__buffer__, 2, 4)))
38     __attribute__((__bounded__(__buffer__, 3, 4)));
39 
40 typedef struct chacha_ctx chacha_ctx;
41 
42 #define U8C(v) (v##U)
43 #define U32C(v) (v##U)
44 
45 #define U8V(v) ((u8)(v) & U8C(0xFF))
46 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
47 
48 #define ROTL32(v, n) \
49   (U32V((v) << (n)) | ((v) >> (32 - (n))))
50 
51 #define U8TO32_LITTLE(p) \
52   (((u32)((p)[0])) | \
53    ((u32)((p)[1]) <<  8) | \
54    ((u32)((p)[2]) << 16) | \
55    ((u32)((p)[3]) << 24))
56 
57 #define U32TO8_LITTLE(p, v) \
58   do { \
59     (p)[0] = U8V((v)); \
60     (p)[1] = U8V((v) >>  8); \
61     (p)[2] = U8V((v) >> 16); \
62     (p)[3] = U8V((v) >> 24); \
63   } while (0)
64 
65 #define ROTATE(v,c) (ROTL32(v,c))
66 #define XOR(v,w) ((v) ^ (w))
67 #define PLUS(v,w) (U32V((v) + (w)))
68 #define PLUSONE(v) (PLUS((v),1))
69 
70 #define QUARTERROUND(a,b,c,d) \
71   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
72   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
73   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
74   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
75 
76 /* Initialise with "expand 32-byte k". */
77 static const char sigma[16] = {
78 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
79 	0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
80 };
81 
82 /* Initialise with "expand 16-byte k". */
83 static const char tau[16] = {
84 	0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
85 	0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
86 };
87 
88 static inline void
89 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
90 {
91 	const char *constants;
92 
93 	x->input[4] = U8TO32_LITTLE(k + 0);
94 	x->input[5] = U8TO32_LITTLE(k + 4);
95 	x->input[6] = U8TO32_LITTLE(k + 8);
96 	x->input[7] = U8TO32_LITTLE(k + 12);
97 	if (kbits == 256) { /* recommended */
98 		k += 16;
99 		constants = sigma;
100 	} else { /* kbits == 128 */
101 		constants = tau;
102 	}
103 	x->input[8] = U8TO32_LITTLE(k + 0);
104 	x->input[9] = U8TO32_LITTLE(k + 4);
105 	x->input[10] = U8TO32_LITTLE(k + 8);
106 	x->input[11] = U8TO32_LITTLE(k + 12);
107 	x->input[0] = U8TO32_LITTLE(constants + 0);
108 	x->input[1] = U8TO32_LITTLE(constants + 4);
109 	x->input[2] = U8TO32_LITTLE(constants + 8);
110 	x->input[3] = U8TO32_LITTLE(constants + 12);
111 }
112 
113 static inline void
114 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
115 {
116 	x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
117 	x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
118 	x->input[14] = U8TO32_LITTLE(iv + 0);
119 	x->input[15] = U8TO32_LITTLE(iv + 4);
120 }
121 
122 static inline void
123 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
124 {
125 	u32 x0, x1, x2, x3, x4, x5, x6, x7;
126 	u32 x8, x9, x10, x11, x12, x13, x14, x15;
127 	u32 j0, j1, j2, j3, j4, j5, j6, j7;
128 	u32 j8, j9, j10, j11, j12, j13, j14, j15;
129 	u8 *ctarget = NULL;
130 	u8 tmp[64];
131 	u32 i;
132 
133 	if (!bytes)
134 		return;
135 
136 	j0 = x->input[0];
137 	j1 = x->input[1];
138 	j2 = x->input[2];
139 	j3 = x->input[3];
140 	j4 = x->input[4];
141 	j5 = x->input[5];
142 	j6 = x->input[6];
143 	j7 = x->input[7];
144 	j8 = x->input[8];
145 	j9 = x->input[9];
146 	j10 = x->input[10];
147 	j11 = x->input[11];
148 	j12 = x->input[12];
149 	j13 = x->input[13];
150 	j14 = x->input[14];
151 	j15 = x->input[15];
152 
153 	for (;;) {
154 		if (bytes < 64) {
155 			for (i = 0; i < bytes; ++i)
156 				tmp[i] = m[i];
157 			m = tmp;
158 			ctarget = c;
159 			c = tmp;
160 		}
161 		x0 = j0;
162 		x1 = j1;
163 		x2 = j2;
164 		x3 = j3;
165 		x4 = j4;
166 		x5 = j5;
167 		x6 = j6;
168 		x7 = j7;
169 		x8 = j8;
170 		x9 = j9;
171 		x10 = j10;
172 		x11 = j11;
173 		x12 = j12;
174 		x13 = j13;
175 		x14 = j14;
176 		x15 = j15;
177 		for (i = 20; i > 0; i -= 2) {
178 			QUARTERROUND(x0, x4, x8, x12)
179 			QUARTERROUND(x1, x5, x9, x13)
180 			QUARTERROUND(x2, x6, x10, x14)
181 			QUARTERROUND(x3, x7, x11, x15)
182 			QUARTERROUND(x0, x5, x10, x15)
183 			QUARTERROUND(x1, x6, x11, x12)
184 			QUARTERROUND(x2, x7, x8, x13)
185 			QUARTERROUND(x3, x4, x9, x14)
186 		}
187 		x0 = PLUS(x0, j0);
188 		x1 = PLUS(x1, j1);
189 		x2 = PLUS(x2, j2);
190 		x3 = PLUS(x3, j3);
191 		x4 = PLUS(x4, j4);
192 		x5 = PLUS(x5, j5);
193 		x6 = PLUS(x6, j6);
194 		x7 = PLUS(x7, j7);
195 		x8 = PLUS(x8, j8);
196 		x9 = PLUS(x9, j9);
197 		x10 = PLUS(x10, j10);
198 		x11 = PLUS(x11, j11);
199 		x12 = PLUS(x12, j12);
200 		x13 = PLUS(x13, j13);
201 		x14 = PLUS(x14, j14);
202 		x15 = PLUS(x15, j15);
203 
204 		if (bytes < 64) {
205 			U32TO8_LITTLE(x->ks + 0, x0);
206 			U32TO8_LITTLE(x->ks + 4, x1);
207 			U32TO8_LITTLE(x->ks + 8, x2);
208 			U32TO8_LITTLE(x->ks + 12, x3);
209 			U32TO8_LITTLE(x->ks + 16, x4);
210 			U32TO8_LITTLE(x->ks + 20, x5);
211 			U32TO8_LITTLE(x->ks + 24, x6);
212 			U32TO8_LITTLE(x->ks + 28, x7);
213 			U32TO8_LITTLE(x->ks + 32, x8);
214 			U32TO8_LITTLE(x->ks + 36, x9);
215 			U32TO8_LITTLE(x->ks + 40, x10);
216 			U32TO8_LITTLE(x->ks + 44, x11);
217 			U32TO8_LITTLE(x->ks + 48, x12);
218 			U32TO8_LITTLE(x->ks + 52, x13);
219 			U32TO8_LITTLE(x->ks + 56, x14);
220 			U32TO8_LITTLE(x->ks + 60, x15);
221 		}
222 
223 		x0 = XOR(x0, U8TO32_LITTLE(m + 0));
224 		x1 = XOR(x1, U8TO32_LITTLE(m + 4));
225 		x2 = XOR(x2, U8TO32_LITTLE(m + 8));
226 		x3 = XOR(x3, U8TO32_LITTLE(m + 12));
227 		x4 = XOR(x4, U8TO32_LITTLE(m + 16));
228 		x5 = XOR(x5, U8TO32_LITTLE(m + 20));
229 		x6 = XOR(x6, U8TO32_LITTLE(m + 24));
230 		x7 = XOR(x7, U8TO32_LITTLE(m + 28));
231 		x8 = XOR(x8, U8TO32_LITTLE(m + 32));
232 		x9 = XOR(x9, U8TO32_LITTLE(m + 36));
233 		x10 = XOR(x10, U8TO32_LITTLE(m + 40));
234 		x11 = XOR(x11, U8TO32_LITTLE(m + 44));
235 		x12 = XOR(x12, U8TO32_LITTLE(m + 48));
236 		x13 = XOR(x13, U8TO32_LITTLE(m + 52));
237 		x14 = XOR(x14, U8TO32_LITTLE(m + 56));
238 		x15 = XOR(x15, U8TO32_LITTLE(m + 60));
239 
240 		j12 = PLUSONE(j12);
241 		if (!j12) {
242 			j13 = PLUSONE(j13);
243 			/*
244 			 * Stopping at 2^70 bytes per nonce is the user's
245 			 * responsibility.
246 			 */
247 		}
248 
249 		U32TO8_LITTLE(c + 0, x0);
250 		U32TO8_LITTLE(c + 4, x1);
251 		U32TO8_LITTLE(c + 8, x2);
252 		U32TO8_LITTLE(c + 12, x3);
253 		U32TO8_LITTLE(c + 16, x4);
254 		U32TO8_LITTLE(c + 20, x5);
255 		U32TO8_LITTLE(c + 24, x6);
256 		U32TO8_LITTLE(c + 28, x7);
257 		U32TO8_LITTLE(c + 32, x8);
258 		U32TO8_LITTLE(c + 36, x9);
259 		U32TO8_LITTLE(c + 40, x10);
260 		U32TO8_LITTLE(c + 44, x11);
261 		U32TO8_LITTLE(c + 48, x12);
262 		U32TO8_LITTLE(c + 52, x13);
263 		U32TO8_LITTLE(c + 56, x14);
264 		U32TO8_LITTLE(c + 60, x15);
265 
266 		if (bytes <= 64) {
267 			if (bytes < 64) {
268 				for (i = 0; i < bytes; ++i)
269 					ctarget[i] = c[i];
270 			}
271 			x->input[12] = j12;
272 			x->input[13] = j13;
273 			x->unused = 64 - bytes;
274 			return;
275 		}
276 		bytes -= 64;
277 		c += 64;
278 		m += 64;
279 	}
280 }
281 
282 void
283 CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32],
284     const unsigned char nonce[16])
285 {
286 	uint32_t x[16];
287 	int i;
288 
289 	x[0] = U8TO32_LITTLE(sigma + 0);
290 	x[1] = U8TO32_LITTLE(sigma + 4);
291 	x[2] = U8TO32_LITTLE(sigma + 8);
292 	x[3] = U8TO32_LITTLE(sigma + 12);
293 	x[4] = U8TO32_LITTLE(key + 0);
294 	x[5] = U8TO32_LITTLE(key + 4);
295 	x[6] = U8TO32_LITTLE(key + 8);
296 	x[7] = U8TO32_LITTLE(key + 12);
297 	x[8] = U8TO32_LITTLE(key + 16);
298 	x[9] = U8TO32_LITTLE(key + 20);
299 	x[10] = U8TO32_LITTLE(key + 24);
300 	x[11] = U8TO32_LITTLE(key + 28);
301 	x[12] = U8TO32_LITTLE(nonce + 0);
302 	x[13] = U8TO32_LITTLE(nonce + 4);
303 	x[14] = U8TO32_LITTLE(nonce + 8);
304 	x[15] = U8TO32_LITTLE(nonce + 12);
305 
306 	for (i = 20; i > 0; i -= 2) {
307 		QUARTERROUND(x[0], x[4], x[8], x[12])
308 		QUARTERROUND(x[1], x[5], x[9], x[13])
309 		QUARTERROUND(x[2], x[6], x[10], x[14])
310 		QUARTERROUND(x[3], x[7], x[11], x[15])
311 		QUARTERROUND(x[0], x[5], x[10], x[15])
312 		QUARTERROUND(x[1], x[6], x[11], x[12])
313 		QUARTERROUND(x[2], x[7], x[8], x[13])
314 		QUARTERROUND(x[3], x[4], x[9], x[14])
315 	}
316 
317 	U32TO8_LITTLE(subkey + 0, x[0]);
318 	U32TO8_LITTLE(subkey + 4, x[1]);
319 	U32TO8_LITTLE(subkey + 8, x[2]);
320 	U32TO8_LITTLE(subkey + 12, x[3]);
321 
322 	U32TO8_LITTLE(subkey + 16, x[12]);
323 	U32TO8_LITTLE(subkey + 20, x[13]);
324 	U32TO8_LITTLE(subkey + 24, x[14]);
325 	U32TO8_LITTLE(subkey + 28, x[15]);
326 }
327