1 /*
2  * Copyright (c) 2014-2015 Vincent Hanquez <vincent@snarc.org>
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the author nor the names of his contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <stdint.h>
32 #include <string.h>
33 #include "cryptonite_chacha.h"
34 #include "cryptonite_bitfn.h"
35 #include "cryptonite_align.h"
36 #include <stdio.h>
37 
38 #define QR(a,b,c,d) \
39 	a += b; d = rol32(d ^ a,16); \
40 	c += d; b = rol32(b ^ c,12); \
41 	a += b; d = rol32(d ^ a, 8); \
42 	c += d; b = rol32(b ^ c, 7);
43 
44 #define ALIGNED64(PTR) \
45 	(((uintptr_t)(const void *)(PTR)) % 8 == 0)
46 
47 static const uint8_t sigma[16] = "expand 32-byte k";
48 static const uint8_t tau[16] = "expand 16-byte k";
49 
chacha_core(int rounds,block * out,const cryptonite_chacha_state * in)50 static void chacha_core(int rounds, block *out, const cryptonite_chacha_state *in)
51 {
52 	uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
53 	int i;
54 
55 	x0 = in->d[0]; x1 = in->d[1]; x2 = in->d[2]; x3 = in->d[3];
56 	x4 = in->d[4]; x5 = in->d[5]; x6 = in->d[6]; x7 = in->d[7];
57 	x8 = in->d[8]; x9 = in->d[9]; x10 = in->d[10]; x11 = in->d[11];
58 	x12 = in->d[12]; x13 = in->d[13]; x14 = in->d[14]; x15 = in->d[15];
59 
60 	for (i = rounds; i > 0; i -= 2) {
61 		QR(x0, x4, x8, x12);
62 		QR(x1, x5, x9, x13);
63 		QR(x2, x6, x10, x14);
64 		QR(x3, x7, x11, x15);
65 
66 		QR(x0, x5, x10, x15);
67 		QR(x1, x6, x11, x12);
68 		QR(x2, x7, x8, x13);
69 		QR(x3, x4, x9, x14);
70 	}
71 
72 	x0 += in->d[0]; x1 += in->d[1]; x2 += in->d[2]; x3 += in->d[3];
73 	x4 += in->d[4]; x5 += in->d[5]; x6 += in->d[6]; x7 += in->d[7];
74 	x8 += in->d[8]; x9 += in->d[9]; x10 += in->d[10]; x11 += in->d[11];
75 	x12 += in->d[12]; x13 += in->d[13]; x14 += in->d[14]; x15 += in->d[15];
76 
77 	out->d[0] = cpu_to_le32(x0);
78 	out->d[1] = cpu_to_le32(x1);
79 	out->d[2] = cpu_to_le32(x2);
80 	out->d[3] = cpu_to_le32(x3);
81 	out->d[4] = cpu_to_le32(x4);
82 	out->d[5] = cpu_to_le32(x5);
83 	out->d[6] = cpu_to_le32(x6);
84 	out->d[7] = cpu_to_le32(x7);
85 	out->d[8] = cpu_to_le32(x8);
86 	out->d[9] = cpu_to_le32(x9);
87 	out->d[10] = cpu_to_le32(x10);
88 	out->d[11] = cpu_to_le32(x11);
89 	out->d[12] = cpu_to_le32(x12);
90 	out->d[13] = cpu_to_le32(x13);
91 	out->d[14] = cpu_to_le32(x14);
92 	out->d[15] = cpu_to_le32(x15);
93 }
94 
95 /* only 2 valids values are 256 (32) and 128 (16) */
cryptonite_chacha_init_core(cryptonite_chacha_state * st,uint32_t keylen,const uint8_t * key,uint32_t ivlen,const uint8_t * iv)96 void cryptonite_chacha_init_core(cryptonite_chacha_state *st,
97                                  uint32_t keylen, const uint8_t *key,
98                                  uint32_t ivlen, const uint8_t *iv)
99 {
100 	const uint8_t *constants = (keylen == 32) ? sigma : tau;
101 
102 	ASSERT_ALIGNMENT(constants, 4);
103 
104 	st->d[0] = load_le32_aligned(constants + 0);
105 	st->d[1] = load_le32_aligned(constants + 4);
106 	st->d[2] = load_le32_aligned(constants + 8);
107 	st->d[3] = load_le32_aligned(constants + 12);
108 
109 	st->d[4] = load_le32(key + 0);
110 	st->d[5] = load_le32(key + 4);
111 	st->d[6] = load_le32(key + 8);
112 	st->d[7] = load_le32(key + 12);
113 	/* we repeat the key on 128 bits */
114 	if (keylen == 32)
115 		key += 16;
116 	st->d[8] = load_le32(key + 0);
117 	st->d[9] = load_le32(key + 4);
118 	st->d[10] = load_le32(key + 8);
119 	st->d[11] = load_le32(key + 12);
120 	st->d[12] = 0;
121 	switch (ivlen) {
122 	case 8:
123 		st->d[13] = 0;
124 		st->d[14] = load_le32(iv + 0);
125 		st->d[15] = load_le32(iv + 4);
126 		break;
127 	case 12:
128 		st->d[13] = load_le32(iv + 0);
129 		st->d[14] = load_le32(iv + 4);
130 		st->d[15] = load_le32(iv + 8);
131 	default:
132 		return;
133 	}
134 }
135 
cryptonite_chacha_init(cryptonite_chacha_context * ctx,uint8_t nb_rounds,uint32_t keylen,const uint8_t * key,uint32_t ivlen,const uint8_t * iv)136 void cryptonite_chacha_init(cryptonite_chacha_context *ctx, uint8_t nb_rounds,
137                             uint32_t keylen, const uint8_t *key,
138                             uint32_t ivlen, const uint8_t *iv)
139 {
140 	memset(ctx, 0, sizeof(*ctx));
141 	ctx->nb_rounds = nb_rounds;
142 	cryptonite_chacha_init_core(&ctx->st, keylen, key, ivlen, iv);
143 }
144 
cryptonite_chacha_combine(uint8_t * dst,cryptonite_chacha_context * ctx,const uint8_t * src,uint32_t bytes)145 void cryptonite_chacha_combine(uint8_t *dst, cryptonite_chacha_context *ctx, const uint8_t *src, uint32_t bytes)
146 {
147 	block out;
148 	cryptonite_chacha_state *st;
149 	int i;
150 
151 	if (!bytes)
152 		return;
153 
154 	/* xor the previous buffer first (if any) */
155 	if (ctx->prev_len > 0) {
156 		int to_copy = (ctx->prev_len < bytes) ? ctx->prev_len : bytes;
157 		for (i = 0; i < to_copy; i++)
158 			dst[i] = src[i] ^ ctx->prev[ctx->prev_ofs+i];
159 		memset(ctx->prev + ctx->prev_ofs, 0, to_copy);
160 		ctx->prev_len -= to_copy;
161 		ctx->prev_ofs += to_copy;
162 		src += to_copy;
163 		dst += to_copy;
164 		bytes -= to_copy;
165 	}
166 
167 	if (bytes == 0)
168 		return;
169 
170 	st = &ctx->st;
171 
172 	/* xor new 64-bytes chunks and store the left over if any */
173 	for (; bytes >= 64; bytes -= 64, src += 64, dst += 64) {
174 		/* generate new chunk and update state */
175 		chacha_core(ctx->nb_rounds, &out, st);
176 		st->d[12] += 1;
177 		if (st->d[12] == 0)
178 			st->d[13] += 1;
179 
180 		for (i = 0; i < 64; ++i)
181 			dst[i] = src[i] ^ out.b[i];
182 	}
183 
184 	if (bytes > 0) {
185 		/* generate new chunk and update state */
186 		chacha_core(ctx->nb_rounds, &out, st);
187 		st->d[12] += 1;
188 		if (st->d[12] == 0)
189 			st->d[13] += 1;
190 
191 		/* xor as much as needed */
192 		for (i = 0; i < bytes; i++)
193 			dst[i] = src[i] ^ out.b[i];
194 
195 		/* copy the left over in the buffer */
196 		ctx->prev_len = 64 - bytes;
197 		ctx->prev_ofs = i;
198 		for (; i < 64; i++) {
199 			ctx->prev[i] = out.b[i];
200 		}
201 	}
202 }
203 
cryptonite_chacha_generate(uint8_t * dst,cryptonite_chacha_context * ctx,uint32_t bytes)204 void cryptonite_chacha_generate(uint8_t *dst, cryptonite_chacha_context *ctx, uint32_t bytes)
205 {
206 	cryptonite_chacha_state *st;
207 	block out;
208 	int i;
209 
210 	if (!bytes)
211 		return;
212 
213 	/* xor the previous buffer first (if any) */
214 	if (ctx->prev_len > 0) {
215 		int to_copy = (ctx->prev_len < bytes) ? ctx->prev_len : bytes;
216 		for (i = 0; i < to_copy; i++)
217 			dst[i] = ctx->prev[ctx->prev_ofs+i];
218 		memset(ctx->prev + ctx->prev_ofs, 0, to_copy);
219 		ctx->prev_len -= to_copy;
220 		ctx->prev_ofs += to_copy;
221 		dst += to_copy;
222 		bytes -= to_copy;
223 	}
224 
225 	if (bytes == 0)
226 		return;
227 
228 	st = &ctx->st;
229 
230 	if (ALIGNED64(dst)) {
231 		/* xor new 64-bytes chunks and store the left over if any */
232 		for (; bytes >= 64; bytes -= 64, dst += 64) {
233 			/* generate new chunk and update state */
234 			chacha_core(ctx->nb_rounds, (block *) dst, st);
235 			st->d[12] += 1;
236 			if (st->d[12] == 0)
237 				st->d[13] += 1;
238 		}
239 	} else {
240 		/* xor new 64-bytes chunks and store the left over if any */
241 		for (; bytes >= 64; bytes -= 64, dst += 64) {
242 			/* generate new chunk and update state */
243 			chacha_core(ctx->nb_rounds, &out, st);
244 			st->d[12] += 1;
245 			if (st->d[12] == 0)
246 				st->d[13] += 1;
247 
248 			for (i = 0; i < 64; ++i)
249 				dst[i] = out.b[i];
250 		}
251 	}
252 
253 	if (bytes > 0) {
254 		/* generate new chunk and update state */
255 		chacha_core(ctx->nb_rounds, &out, st);
256 		st->d[12] += 1;
257 		if (st->d[12] == 0)
258 			st->d[13] += 1;
259 
260 		/* xor as much as needed */
261 		for (i = 0; i < bytes; i++)
262 			dst[i] = out.b[i];
263 
264 		/* copy the left over in the buffer */
265 		ctx->prev_len = 64 - bytes;
266 		ctx->prev_ofs = i;
267 		for (; i < 64; i++)
268 			ctx->prev[i] = out.b[i];
269 	}
270 }
271 
cryptonite_chacha_random(uint32_t rounds,uint8_t * dst,cryptonite_chacha_state * st,uint32_t bytes)272 void cryptonite_chacha_random(uint32_t rounds, uint8_t *dst, cryptonite_chacha_state *st, uint32_t bytes)
273 {
274 	block out;
275 
276 	if (!bytes)
277 		return;
278 	for (; bytes >= 16; bytes -= 16, dst += 16) {
279 		chacha_core(rounds, &out, st);
280 		memcpy(dst, out.b + 40, 16);
281 		cryptonite_chacha_init_core(st, 32, out.b, 8, out.b + 32);
282 	}
283 	if (bytes) {
284 		chacha_core(rounds, &out, st);
285 		memcpy(dst, out.b + 40, bytes);
286 		cryptonite_chacha_init_core(st, 32, out.b, 8, out.b + 32);
287 	}
288 }
289