1 /*
2  * Copyright (c) 2014-2015 Vincent Hanquez <vincent@snarc.org>
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Neither the name of the author nor the names of his contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <stdint.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include "cryptonite_salsa.h"
35 #include "cryptonite_bitfn.h"
36 #include "cryptonite_align.h"
37 
38 static const uint8_t sigma[16] = "expand 32-byte k";
39 static const uint8_t tau[16] = "expand 16-byte k";
40 
41 #define QR(a,b,c,d) \
42 	b ^= rol32(a+d, 7); \
43 	c ^= rol32(b+a, 9); \
44 	d ^= rol32(c+b, 13); \
45 	a ^= rol32(d+c, 18);
46 
47 #define ALIGNED64(PTR) \
48 	(((uintptr_t)(const void *)(PTR)) % 8 == 0)
49 
50 #define SALSA_CORE_LOOP \
51 	for (i = rounds; i > 0; i -= 2) { \
52 		QR (x0,x4,x8,x12); \
53 		QR (x5,x9,x13,x1); \
54 		QR (x10,x14,x2,x6); \
55 		QR (x15,x3,x7,x11); \
56 		QR (x0,x1,x2,x3); \
57 		QR (x5,x6,x7,x4); \
58 		QR (x10,x11,x8,x9); \
59 		QR (x15,x12,x13,x14); \
60 	}
61 
salsa_core(int rounds,block * out,const cryptonite_salsa_state * in)62 static void salsa_core(int rounds, block *out, const cryptonite_salsa_state *in)
63 {
64 	uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
65 	int i;
66 
67 	x0 = in->d[0]; x1 = in->d[1]; x2 = in->d[2]; x3 = in->d[3];
68 	x4 = in->d[4]; x5 = in->d[5]; x6 = in->d[6]; x7 = in->d[7];
69 	x8 = in->d[8]; x9 = in->d[9]; x10 = in->d[10]; x11 = in->d[11];
70 	x12 = in->d[12]; x13 = in->d[13]; x14 = in->d[14]; x15 = in->d[15];
71 
72 	SALSA_CORE_LOOP;
73 
74 	x0 += in->d[0]; x1 += in->d[1]; x2 += in->d[2]; x3 += in->d[3];
75 	x4 += in->d[4]; x5 += in->d[5]; x6 += in->d[6]; x7 += in->d[7];
76 	x8 += in->d[8]; x9 += in->d[9]; x10 += in->d[10]; x11 += in->d[11];
77 	x12 += in->d[12]; x13 += in->d[13]; x14 += in->d[14]; x15 += in->d[15];
78 
79 	out->d[0] = cpu_to_le32(x0);
80 	out->d[1] = cpu_to_le32(x1);
81 	out->d[2] = cpu_to_le32(x2);
82 	out->d[3] = cpu_to_le32(x3);
83 	out->d[4] = cpu_to_le32(x4);
84 	out->d[5] = cpu_to_le32(x5);
85 	out->d[6] = cpu_to_le32(x6);
86 	out->d[7] = cpu_to_le32(x7);
87 	out->d[8] = cpu_to_le32(x8);
88 	out->d[9] = cpu_to_le32(x9);
89 	out->d[10] = cpu_to_le32(x10);
90 	out->d[11] = cpu_to_le32(x11);
91 	out->d[12] = cpu_to_le32(x12);
92 	out->d[13] = cpu_to_le32(x13);
93 	out->d[14] = cpu_to_le32(x14);
94 	out->d[15] = cpu_to_le32(x15);
95 }
96 
cryptonite_salsa_core_xor(int rounds,block * out,block * in)97 void cryptonite_salsa_core_xor(int rounds, block *out, block *in)
98 {
99 	uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
100 	int i;
101 
102 #define LOAD(i) (out->d[i] ^= in->d[i])
103 	x0 = LOAD(0); x1 = LOAD(1); x2 = LOAD(2); x3 = LOAD(3);
104 	x4 = LOAD(4); x5 = LOAD(5); x6 = LOAD(6); x7 = LOAD(7);
105 	x8 = LOAD(8); x9 = LOAD(9); x10 = LOAD(10); x11 = LOAD(11);
106 	x12 = LOAD(12); x13 = LOAD(13); x14 = LOAD(14); x15 = LOAD(15);
107 #undef LOAD
108 
109 	SALSA_CORE_LOOP;
110 
111 	out->d[0] += x0; out->d[1] += x1; out->d[2] += x2; out->d[3] += x3;
112 	out->d[4] += x4; out->d[5] += x5; out->d[6] += x6; out->d[7] += x7;
113 	out->d[8] += x8; out->d[9] += x9; out->d[10] += x10; out->d[11] += x11;
114 	out->d[12] += x12; out->d[13] += x13; out->d[14] += x14; out->d[15] += x15;
115 }
116 
117 /* only 2 valid values for keylen are 256 (32) and 128 (16) */
cryptonite_salsa_init_core(cryptonite_salsa_state * st,uint32_t keylen,const uint8_t * key,uint32_t ivlen,const uint8_t * iv)118 void cryptonite_salsa_init_core(cryptonite_salsa_state *st,
119                                 uint32_t keylen, const uint8_t *key,
120                                 uint32_t ivlen, const uint8_t *iv)
121 {
122 	const uint8_t *constants = (keylen == 32) ? sigma : tau;
123 
124 	st->d[0] = load_le32_aligned(constants + 0);
125 	st->d[5] = load_le32_aligned(constants + 4);
126 	st->d[10] = load_le32_aligned(constants + 8);
127 	st->d[15] = load_le32_aligned(constants + 12);
128 
129 	st->d[1] = load_le32(key + 0);
130 	st->d[2] = load_le32(key + 4);
131 	st->d[3] = load_le32(key + 8);
132 	st->d[4] = load_le32(key + 12);
133 	/* we repeat the key on 128 bits */
134 	if (keylen == 32)
135 		key += 16;
136 	st->d[11] = load_le32(key + 0);
137 	st->d[12] = load_le32(key + 4);
138 	st->d[13] = load_le32(key + 8);
139 	st->d[14] = load_le32(key + 12);
140 
141 	st->d[9] = 0;
142 	switch (ivlen) {
143 	case 8:
144 		st->d[6] = load_le32(iv + 0);
145 		st->d[7] = load_le32(iv + 4);
146 		st->d[8] = 0;
147 		break;
148 	case 12:
149 		st->d[6] = load_le32(iv + 0);
150 		st->d[7] = load_le32(iv + 4);
151 		st->d[8] = load_le32(iv + 8);
152 	default:
153 		return;
154 	}
155 }
156 
cryptonite_salsa_init(cryptonite_salsa_context * ctx,uint8_t nb_rounds,uint32_t keylen,const uint8_t * key,uint32_t ivlen,const uint8_t * iv)157 void cryptonite_salsa_init(cryptonite_salsa_context *ctx, uint8_t nb_rounds,
158                            uint32_t keylen, const uint8_t *key,
159                            uint32_t ivlen, const uint8_t *iv)
160 {
161 	memset(ctx, 0, sizeof(*ctx));
162 	ctx->nb_rounds = nb_rounds;
163 	cryptonite_salsa_init_core(&ctx->st, keylen, key, ivlen, iv);
164 }
165 
cryptonite_salsa_combine(uint8_t * dst,cryptonite_salsa_context * ctx,const uint8_t * src,uint32_t bytes)166 void cryptonite_salsa_combine(uint8_t *dst, cryptonite_salsa_context *ctx, const uint8_t *src, uint32_t bytes)
167 {
168 	block out;
169 	cryptonite_salsa_state *st;
170 	int i;
171 
172 	if (!bytes)
173 		return;
174 
175 	/* xor the previous buffer first (if any) */
176 	if (ctx->prev_len > 0) {
177 		int to_copy = (ctx->prev_len < bytes) ? ctx->prev_len : bytes;
178 		for (i = 0; i < to_copy; i++)
179 			dst[i] = src[i] ^ ctx->prev[ctx->prev_ofs+i];
180 		memset(ctx->prev + ctx->prev_ofs, 0, to_copy);
181 		ctx->prev_len -= to_copy;
182 		ctx->prev_ofs += to_copy;
183 		src += to_copy;
184 		dst += to_copy;
185 		bytes -= to_copy;
186 	}
187 
188 	if (bytes == 0)
189 		return;
190 
191 	st = &ctx->st;
192 
193 	/* xor new 64-bytes chunks and store the left over if any */
194 	for (; bytes >= 64; bytes -= 64, src += 64, dst += 64) {
195 		/* generate new chunk and update state */
196 		salsa_core(ctx->nb_rounds, &out, st);
197 		st->d[8] += 1;
198 		if (st->d[8] == 0)
199 			st->d[9] += 1;
200 
201 		for (i = 0; i < 64; ++i)
202 			dst[i] = src[i] ^ out.b[i];
203 	}
204 
205 	if (bytes > 0) {
206 		/* generate new chunk and update state */
207 		salsa_core(ctx->nb_rounds, &out, st);
208 		st->d[8] += 1;
209 		if (st->d[8] == 0)
210 			st->d[9] += 1;
211 
212 		/* xor as much as needed */
213 		for (i = 0; i < bytes; i++)
214 			dst[i] = src[i] ^ out.b[i];
215 
216 		/* copy the left over in the buffer */
217 		ctx->prev_len = 64 - bytes;
218 		ctx->prev_ofs = i;
219 		for (; i < 64; i++) {
220 			ctx->prev[i] = out.b[i];
221 		}
222 	}
223 }
224 
cryptonite_salsa_generate(uint8_t * dst,cryptonite_salsa_context * ctx,uint32_t bytes)225 void cryptonite_salsa_generate(uint8_t *dst, cryptonite_salsa_context *ctx, uint32_t bytes)
226 {
227 	cryptonite_salsa_state *st;
228 	block out;
229 	int i;
230 
231 	if (!bytes)
232 		return;
233 
234 	/* xor the previous buffer first (if any) */
235 	if (ctx->prev_len > 0) {
236 		int to_copy = (ctx->prev_len < bytes) ? ctx->prev_len : bytes;
237 		for (i = 0; i < to_copy; i++)
238 			dst[i] = ctx->prev[ctx->prev_ofs+i];
239 		memset(ctx->prev + ctx->prev_ofs, 0, to_copy);
240 		ctx->prev_len -= to_copy;
241 		ctx->prev_ofs += to_copy;
242 		dst += to_copy;
243 		bytes -= to_copy;
244 	}
245 
246 	if (bytes == 0)
247 		return;
248 
249 	st = &ctx->st;
250 
251 	if (ALIGNED64(dst)) {
252 		/* xor new 64-bytes chunks and store the left over if any */
253 		for (; bytes >= 64; bytes -= 64, dst += 64) {
254 			/* generate new chunk and update state */
255 			salsa_core(ctx->nb_rounds, (block *) dst, st);
256 			st->d[8] += 1;
257 			if (st->d[8] == 0)
258 				st->d[9] += 1;
259 		}
260 	} else {
261 		/* xor new 64-bytes chunks and store the left over if any */
262 		for (; bytes >= 64; bytes -= 64, dst += 64) {
263 			/* generate new chunk and update state */
264 			salsa_core(ctx->nb_rounds, &out, st);
265 			st->d[8] += 1;
266 			if (st->d[8] == 0)
267 				st->d[9] += 1;
268 
269 			for (i = 0; i < 64; ++i)
270 				dst[i] = out.b[i];
271 		}
272 	}
273 
274 	if (bytes > 0) {
275 		/* generate new chunk and update state */
276 		salsa_core(ctx->nb_rounds, &out, st);
277 		st->d[8] += 1;
278 		if (st->d[8] == 0)
279 			st->d[9] += 1;
280 
281 		/* xor as much as needed */
282 		for (i = 0; i < bytes; i++)
283 			dst[i] = out.b[i];
284 
285 		/* copy the left over in the buffer */
286 		ctx->prev_len = 64 - bytes;
287 		ctx->prev_ofs = i;
288 		for (; i < 64; i++)
289 			ctx->prev[i] = out.b[i];
290 	}
291 }
292 
293