1 /* $OpenBSD: chacha-merged.c,v 1.6 2014/06/24 18:12:09 jsing Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
7 
8 #include <sys/types.h>
9 
10 #include <stdint.h>
11 
12 #define CHACHA_MINKEYLEN 	16
13 #define CHACHA_NONCELEN		8
14 #define CHACHA_CTRLEN		8
15 #define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
16 #define CHACHA_BLOCKLEN		64
17 
18 struct chacha_ctx {
19 	u_int input[16];
20 	uint8_t ks[CHACHA_BLOCKLEN];
21 	uint8_t unused;
22 };
23 
24 static inline void chacha_keysetup(struct chacha_ctx *x, const u_char *k,
25     u_int kbits)
26     __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
27 static inline void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv,
28     const u_char *ctr)
29     __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
30     __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
31 static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
32     u_char *c, u_int bytes)
33     __attribute__((__bounded__(__buffer__, 2, 4)))
34     __attribute__((__bounded__(__buffer__, 3, 4)));
35 
36 typedef unsigned char u8;
37 typedef unsigned int u32;
38 
39 typedef struct chacha_ctx chacha_ctx;
40 
41 #define U8C(v) (v##U)
42 #define U32C(v) (v##U)
43 
44 #define U8V(v) ((u8)(v) & U8C(0xFF))
45 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
46 
47 #define ROTL32(v, n) \
48   (U32V((v) << (n)) | ((v) >> (32 - (n))))
49 
50 #define U8TO32_LITTLE(p) \
51   (((u32)((p)[0])) | \
52    ((u32)((p)[1]) <<  8) | \
53    ((u32)((p)[2]) << 16) | \
54    ((u32)((p)[3]) << 24))
55 
56 #define U32TO8_LITTLE(p, v) \
57   do { \
58     (p)[0] = U8V((v)); \
59     (p)[1] = U8V((v) >>  8); \
60     (p)[2] = U8V((v) >> 16); \
61     (p)[3] = U8V((v) >> 24); \
62   } while (0)
63 
64 #define ROTATE(v,c) (ROTL32(v,c))
65 #define XOR(v,w) ((v) ^ (w))
66 #define PLUS(v,w) (U32V((v) + (w)))
67 #define PLUSONE(v) (PLUS((v),1))
68 
69 #define QUARTERROUND(a,b,c,d) \
70   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
71   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
72   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
73   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
74 
75 static const char sigma[16] = "expand 32-byte k";
76 static const char tau[16] = "expand 16-byte k";
77 
78 static inline void
79 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
80 {
81 	const char *constants;
82 
83 	x->input[4] = U8TO32_LITTLE(k + 0);
84 	x->input[5] = U8TO32_LITTLE(k + 4);
85 	x->input[6] = U8TO32_LITTLE(k + 8);
86 	x->input[7] = U8TO32_LITTLE(k + 12);
87 	if (kbits == 256) { /* recommended */
88 		k += 16;
89 		constants = sigma;
90 	} else { /* kbits == 128 */
91 		constants = tau;
92 	}
93 	x->input[8] = U8TO32_LITTLE(k + 0);
94 	x->input[9] = U8TO32_LITTLE(k + 4);
95 	x->input[10] = U8TO32_LITTLE(k + 8);
96 	x->input[11] = U8TO32_LITTLE(k + 12);
97 	x->input[0] = U8TO32_LITTLE(constants + 0);
98 	x->input[1] = U8TO32_LITTLE(constants + 4);
99 	x->input[2] = U8TO32_LITTLE(constants + 8);
100 	x->input[3] = U8TO32_LITTLE(constants + 12);
101 }
102 
103 static inline void
104 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
105 {
106 	x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
107 	x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
108 	x->input[14] = U8TO32_LITTLE(iv + 0);
109 	x->input[15] = U8TO32_LITTLE(iv + 4);
110 }
111 
112 static inline void
113 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
114 {
115 	u32 x0, x1, x2, x3, x4, x5, x6, x7;
116 	u32 x8, x9, x10, x11, x12, x13, x14, x15;
117 	u32 j0, j1, j2, j3, j4, j5, j6, j7;
118 	u32 j8, j9, j10, j11, j12, j13, j14, j15;
119 	u8 *ctarget = NULL;
120 	u8 tmp[64];
121 	u_int i;
122 
123 	if (!bytes)
124 		return;
125 
126 	j0 = x->input[0];
127 	j1 = x->input[1];
128 	j2 = x->input[2];
129 	j3 = x->input[3];
130 	j4 = x->input[4];
131 	j5 = x->input[5];
132 	j6 = x->input[6];
133 	j7 = x->input[7];
134 	j8 = x->input[8];
135 	j9 = x->input[9];
136 	j10 = x->input[10];
137 	j11 = x->input[11];
138 	j12 = x->input[12];
139 	j13 = x->input[13];
140 	j14 = x->input[14];
141 	j15 = x->input[15];
142 
143 	for (;;) {
144 		if (bytes < 64) {
145 			for (i = 0; i < bytes; ++i)
146 				tmp[i] = m[i];
147 			m = tmp;
148 			ctarget = c;
149 			c = tmp;
150 		}
151 		x0 = j0;
152 		x1 = j1;
153 		x2 = j2;
154 		x3 = j3;
155 		x4 = j4;
156 		x5 = j5;
157 		x6 = j6;
158 		x7 = j7;
159 		x8 = j8;
160 		x9 = j9;
161 		x10 = j10;
162 		x11 = j11;
163 		x12 = j12;
164 		x13 = j13;
165 		x14 = j14;
166 		x15 = j15;
167 		for (i = 20; i > 0; i -= 2) {
168 			QUARTERROUND(x0, x4, x8, x12)
169 			QUARTERROUND(x1, x5, x9, x13)
170 			QUARTERROUND(x2, x6, x10, x14)
171 			QUARTERROUND(x3, x7, x11, x15)
172 			QUARTERROUND(x0, x5, x10, x15)
173 			QUARTERROUND(x1, x6, x11, x12)
174 			QUARTERROUND(x2, x7, x8, x13)
175 			QUARTERROUND(x3, x4, x9, x14)
176 		}
177 		x0 = PLUS(x0, j0);
178 		x1 = PLUS(x1, j1);
179 		x2 = PLUS(x2, j2);
180 		x3 = PLUS(x3, j3);
181 		x4 = PLUS(x4, j4);
182 		x5 = PLUS(x5, j5);
183 		x6 = PLUS(x6, j6);
184 		x7 = PLUS(x7, j7);
185 		x8 = PLUS(x8, j8);
186 		x9 = PLUS(x9, j9);
187 		x10 = PLUS(x10, j10);
188 		x11 = PLUS(x11, j11);
189 		x12 = PLUS(x12, j12);
190 		x13 = PLUS(x13, j13);
191 		x14 = PLUS(x14, j14);
192 		x15 = PLUS(x15, j15);
193 
194 		if (bytes < 64) {
195 			U32TO8_LITTLE(x->ks + 0, x0);
196 			U32TO8_LITTLE(x->ks + 4, x1);
197 			U32TO8_LITTLE(x->ks + 8, x2);
198 			U32TO8_LITTLE(x->ks + 12, x3);
199 			U32TO8_LITTLE(x->ks + 16, x4);
200 			U32TO8_LITTLE(x->ks + 20, x5);
201 			U32TO8_LITTLE(x->ks + 24, x6);
202 			U32TO8_LITTLE(x->ks + 28, x7);
203 			U32TO8_LITTLE(x->ks + 32, x8);
204 			U32TO8_LITTLE(x->ks + 36, x9);
205 			U32TO8_LITTLE(x->ks + 40, x10);
206 			U32TO8_LITTLE(x->ks + 44, x11);
207 			U32TO8_LITTLE(x->ks + 48, x12);
208 			U32TO8_LITTLE(x->ks + 52, x13);
209 			U32TO8_LITTLE(x->ks + 56, x14);
210 			U32TO8_LITTLE(x->ks + 60, x15);
211 		}
212 
213 		x0 = XOR(x0, U8TO32_LITTLE(m + 0));
214 		x1 = XOR(x1, U8TO32_LITTLE(m + 4));
215 		x2 = XOR(x2, U8TO32_LITTLE(m + 8));
216 		x3 = XOR(x3, U8TO32_LITTLE(m + 12));
217 		x4 = XOR(x4, U8TO32_LITTLE(m + 16));
218 		x5 = XOR(x5, U8TO32_LITTLE(m + 20));
219 		x6 = XOR(x6, U8TO32_LITTLE(m + 24));
220 		x7 = XOR(x7, U8TO32_LITTLE(m + 28));
221 		x8 = XOR(x8, U8TO32_LITTLE(m + 32));
222 		x9 = XOR(x9, U8TO32_LITTLE(m + 36));
223 		x10 = XOR(x10, U8TO32_LITTLE(m + 40));
224 		x11 = XOR(x11, U8TO32_LITTLE(m + 44));
225 		x12 = XOR(x12, U8TO32_LITTLE(m + 48));
226 		x13 = XOR(x13, U8TO32_LITTLE(m + 52));
227 		x14 = XOR(x14, U8TO32_LITTLE(m + 56));
228 		x15 = XOR(x15, U8TO32_LITTLE(m + 60));
229 
230 		j12 = PLUSONE(j12);
231 		if (!j12) {
232 			j13 = PLUSONE(j13);
233 			/*
234 			 * Stopping at 2^70 bytes per nonce is the user's
235 			 * responsibility.
236 			 */
237 		}
238 
239 		U32TO8_LITTLE(c + 0, x0);
240 		U32TO8_LITTLE(c + 4, x1);
241 		U32TO8_LITTLE(c + 8, x2);
242 		U32TO8_LITTLE(c + 12, x3);
243 		U32TO8_LITTLE(c + 16, x4);
244 		U32TO8_LITTLE(c + 20, x5);
245 		U32TO8_LITTLE(c + 24, x6);
246 		U32TO8_LITTLE(c + 28, x7);
247 		U32TO8_LITTLE(c + 32, x8);
248 		U32TO8_LITTLE(c + 36, x9);
249 		U32TO8_LITTLE(c + 40, x10);
250 		U32TO8_LITTLE(c + 44, x11);
251 		U32TO8_LITTLE(c + 48, x12);
252 		U32TO8_LITTLE(c + 52, x13);
253 		U32TO8_LITTLE(c + 56, x14);
254 		U32TO8_LITTLE(c + 60, x15);
255 
256 		if (bytes <= 64) {
257 			if (bytes < 64) {
258 				for (i = 0; i < bytes; ++i)
259 					ctarget[i] = c[i];
260 			}
261 			x->input[12] = j12;
262 			x->input[13] = j13;
263 			x->unused = 64 - bytes;
264 			return;
265 		}
266 		bytes -= 64;
267 		c += 64;
268 		m += 64;
269 	}
270 }
271