1 /*
2  * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "inner.h"
26 
27 /* see bearssl_block.h */
28 void
29 br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
30 	const void *key, size_t len)
31 {
32 	ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
33 	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
34 }
35 
36 static void
37 xorbuf(void *dst, const void *src, size_t len)
38 {
39 	unsigned char *d;
40 	const unsigned char *s;
41 
42 	d = dst;
43 	s = src;
44 	while (len -- > 0) {
45 		*d ++ ^= *s ++;
46 	}
47 }
48 
49 /* see bearssl_block.h */
50 void
51 br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
52 	void *ctr, void *data, size_t len)
53 {
54 	unsigned char *buf;
55 	unsigned char *ivbuf;
56 	uint32_t iv0, iv1, iv2, iv3;
57 	uint64_t sk_exp[120];
58 
59 	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
60 
61 	/*
62 	 * We keep the counter as four 32-bit values, with big-endian
63 	 * convention, because that's what is expected for purposes of
64 	 * incrementing the counter value.
65 	 */
66 	ivbuf = ctr;
67 	iv0 = br_dec32be(ivbuf +  0);
68 	iv1 = br_dec32be(ivbuf +  4);
69 	iv2 = br_dec32be(ivbuf +  8);
70 	iv3 = br_dec32be(ivbuf + 12);
71 
72 	buf = data;
73 	while (len > 0) {
74 		uint64_t q[8];
75 		uint32_t w[16];
76 		unsigned char tmp[64];
77 		int i, j;
78 
79 		/*
80 		 * The bitslice implementation expects values in
81 		 * little-endian convention, so we have to byteswap them.
82 		 */
83 		j = (len >= 64) ? 16 : (int)(len >> 2);
84 		for (i = 0; i < j; i += 4) {
85 			uint32_t carry;
86 
87 			w[i + 0] = br_swap32(iv0);
88 			w[i + 1] = br_swap32(iv1);
89 			w[i + 2] = br_swap32(iv2);
90 			w[i + 3] = br_swap32(iv3);
91 			iv3 ++;
92 			carry = ~(iv3 | -iv3) >> 31;
93 			iv2 += carry;
94 			carry &= -(~(iv2 | -iv2) >> 31);
95 			iv1 += carry;
96 			carry &= -(~(iv1 | -iv1) >> 31);
97 			iv0 += carry;
98 		}
99 		memset(w + i, 0, (16 - i) * sizeof(uint32_t));
100 
101 		for (i = 0; i < 4; i ++) {
102 			br_aes_ct64_interleave_in(
103 				&q[i], &q[i + 4], w + (i << 2));
104 		}
105 		br_aes_ct64_ortho(q);
106 		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
107 		br_aes_ct64_ortho(q);
108 		for (i = 0; i < 4; i ++) {
109 			br_aes_ct64_interleave_out(
110 				w + (i << 2), q[i], q[i + 4]);
111 		}
112 
113 		br_range_enc32le(tmp, w, 16);
114 		if (len <= 64) {
115 			xorbuf(buf, tmp, len);
116 			break;
117 		}
118 		xorbuf(buf, tmp, 64);
119 		buf += 64;
120 		len -= 64;
121 	}
122 	br_enc32be(ivbuf +  0, iv0);
123 	br_enc32be(ivbuf +  4, iv1);
124 	br_enc32be(ivbuf +  8, iv2);
125 	br_enc32be(ivbuf + 12, iv3);
126 }
127 
128 /* see bearssl_block.h */
129 void
130 br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
131 	void *cbcmac, const void *data, size_t len)
132 {
133 	const unsigned char *buf;
134 	uint32_t cm0, cm1, cm2, cm3;
135 	uint64_t q[8];
136 	uint64_t sk_exp[120];
137 
138 	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
139 
140 	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
141 	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
142 	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
143 	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
144 
145 	buf = data;
146 	memset(q, 0, sizeof q);
147 	while (len > 0) {
148 		uint32_t w[4];
149 
150 		w[0] = cm0 ^ br_dec32le(buf +  0);
151 		w[1] = cm1 ^ br_dec32le(buf +  4);
152 		w[2] = cm2 ^ br_dec32le(buf +  8);
153 		w[3] = cm3 ^ br_dec32le(buf + 12);
154 
155 		br_aes_ct64_interleave_in(&q[0], &q[4], w);
156 		br_aes_ct64_ortho(q);
157 		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
158 		br_aes_ct64_ortho(q);
159 		br_aes_ct64_interleave_out(w, q[0], q[4]);
160 
161 		cm0 = w[0];
162 		cm1 = w[1];
163 		cm2 = w[2];
164 		cm3 = w[3];
165 		buf += 16;
166 		len -= 16;
167 	}
168 
169 	br_enc32le((unsigned char *)cbcmac +  0, cm0);
170 	br_enc32le((unsigned char *)cbcmac +  4, cm1);
171 	br_enc32le((unsigned char *)cbcmac +  8, cm2);
172 	br_enc32le((unsigned char *)cbcmac + 12, cm3);
173 }
174 
175 /* see bearssl_block.h */
176 void
177 br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
178 	void *ctr, void *cbcmac, void *data, size_t len)
179 {
180 	/*
181 	 * When encrypting, the CBC-MAC processing must be lagging by
182 	 * one block, since it operates on the encrypted values, so
183 	 * it must wait for that encryption to complete.
184 	 */
185 
186 	unsigned char *buf;
187 	unsigned char *ivbuf;
188 	uint32_t iv0, iv1, iv2, iv3;
189 	uint32_t cm0, cm1, cm2, cm3;
190 	uint64_t sk_exp[120];
191 	uint64_t q[8];
192 	int first_iter;
193 
194 	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
195 
196 	/*
197 	 * We keep the counter as four 32-bit values, with big-endian
198 	 * convention, because that's what is expected for purposes of
199 	 * incrementing the counter value.
200 	 */
201 	ivbuf = ctr;
202 	iv0 = br_dec32be(ivbuf +  0);
203 	iv1 = br_dec32be(ivbuf +  4);
204 	iv2 = br_dec32be(ivbuf +  8);
205 	iv3 = br_dec32be(ivbuf + 12);
206 
207 	/*
208 	 * The current CBC-MAC value is kept in little-endian convention.
209 	 */
210 	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
211 	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
212 	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
213 	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
214 
215 	buf = data;
216 	first_iter = 1;
217 	memset(q, 0, sizeof q);
218 	while (len > 0) {
219 		uint32_t w[8], carry;
220 
221 		/*
222 		 * The bitslice implementation expects values in
223 		 * little-endian convention, so we have to byteswap them.
224 		 */
225 		w[0] = br_swap32(iv0);
226 		w[1] = br_swap32(iv1);
227 		w[2] = br_swap32(iv2);
228 		w[3] = br_swap32(iv3);
229 		iv3 ++;
230 		carry = ~(iv3 | -iv3) >> 31;
231 		iv2 += carry;
232 		carry &= -(~(iv2 | -iv2) >> 31);
233 		iv1 += carry;
234 		carry &= -(~(iv1 | -iv1) >> 31);
235 		iv0 += carry;
236 
237 		/*
238 		 * The block for CBC-MAC.
239 		 */
240 		w[4] = cm0;
241 		w[5] = cm1;
242 		w[6] = cm2;
243 		w[7] = cm3;
244 
245 		br_aes_ct64_interleave_in(&q[0], &q[4], w);
246 		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
247 		br_aes_ct64_ortho(q);
248 		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
249 		br_aes_ct64_ortho(q);
250 		br_aes_ct64_interleave_out(w, q[0], q[4]);
251 		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
252 
253 		/*
254 		 * We do the XOR with the plaintext in 32-bit registers,
255 		 * so that the value are available for CBC-MAC processing
256 		 * as well.
257 		 */
258 		w[0] ^= br_dec32le(buf +  0);
259 		w[1] ^= br_dec32le(buf +  4);
260 		w[2] ^= br_dec32le(buf +  8);
261 		w[3] ^= br_dec32le(buf + 12);
262 		br_enc32le(buf +  0, w[0]);
263 		br_enc32le(buf +  4, w[1]);
264 		br_enc32le(buf +  8, w[2]);
265 		br_enc32le(buf + 12, w[3]);
266 
267 		buf += 16;
268 		len -= 16;
269 
270 		/*
271 		 * We set the cm* values to the block to encrypt in the
272 		 * next iteration.
273 		 */
274 		if (first_iter) {
275 			first_iter = 0;
276 			cm0 ^= w[0];
277 			cm1 ^= w[1];
278 			cm2 ^= w[2];
279 			cm3 ^= w[3];
280 		} else {
281 			cm0 = w[0] ^ w[4];
282 			cm1 = w[1] ^ w[5];
283 			cm2 = w[2] ^ w[6];
284 			cm3 = w[3] ^ w[7];
285 		}
286 
287 		/*
288 		 * If this was the last iteration, then compute the
289 		 * extra block encryption to complete CBC-MAC.
290 		 */
291 		if (len == 0) {
292 			w[0] = cm0;
293 			w[1] = cm1;
294 			w[2] = cm2;
295 			w[3] = cm3;
296 			br_aes_ct64_interleave_in(&q[0], &q[4], w);
297 			br_aes_ct64_ortho(q);
298 			br_aes_ct64_bitslice_encrypt(
299 				ctx->num_rounds, sk_exp, q);
300 			br_aes_ct64_ortho(q);
301 			br_aes_ct64_interleave_out(w, q[0], q[4]);
302 			cm0 = w[0];
303 			cm1 = w[1];
304 			cm2 = w[2];
305 			cm3 = w[3];
306 			break;
307 		}
308 	}
309 
310 	br_enc32be(ivbuf +  0, iv0);
311 	br_enc32be(ivbuf +  4, iv1);
312 	br_enc32be(ivbuf +  8, iv2);
313 	br_enc32be(ivbuf + 12, iv3);
314 	br_enc32le((unsigned char *)cbcmac +  0, cm0);
315 	br_enc32le((unsigned char *)cbcmac +  4, cm1);
316 	br_enc32le((unsigned char *)cbcmac +  8, cm2);
317 	br_enc32le((unsigned char *)cbcmac + 12, cm3);
318 }
319 
320 /* see bearssl_block.h */
321 void
322 br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
323 	void *ctr, void *cbcmac, void *data, size_t len)
324 {
325 	unsigned char *buf;
326 	unsigned char *ivbuf;
327 	uint32_t iv0, iv1, iv2, iv3;
328 	uint32_t cm0, cm1, cm2, cm3;
329 	uint64_t sk_exp[120];
330 	uint64_t q[8];
331 
332 	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
333 
334 	/*
335 	 * We keep the counter as four 32-bit values, with big-endian
336 	 * convention, because that's what is expected for purposes of
337 	 * incrementing the counter value.
338 	 */
339 	ivbuf = ctr;
340 	iv0 = br_dec32be(ivbuf +  0);
341 	iv1 = br_dec32be(ivbuf +  4);
342 	iv2 = br_dec32be(ivbuf +  8);
343 	iv3 = br_dec32be(ivbuf + 12);
344 
345 	/*
346 	 * The current CBC-MAC value is kept in little-endian convention.
347 	 */
348 	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
349 	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
350 	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
351 	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
352 
353 	buf = data;
354 	memset(q, 0, sizeof q);
355 	while (len > 0) {
356 		uint32_t w[8], carry;
357 		unsigned char tmp[16];
358 
359 		/*
360 		 * The bitslice implementation expects values in
361 		 * little-endian convention, so we have to byteswap them.
362 		 */
363 		w[0] = br_swap32(iv0);
364 		w[1] = br_swap32(iv1);
365 		w[2] = br_swap32(iv2);
366 		w[3] = br_swap32(iv3);
367 		iv3 ++;
368 		carry = ~(iv3 | -iv3) >> 31;
369 		iv2 += carry;
370 		carry &= -(~(iv2 | -iv2) >> 31);
371 		iv1 += carry;
372 		carry &= -(~(iv1 | -iv1) >> 31);
373 		iv0 += carry;
374 
375 		/*
376 		 * The block for CBC-MAC.
377 		 */
378 		w[4] = cm0 ^ br_dec32le(buf +  0);
379 		w[5] = cm1 ^ br_dec32le(buf +  4);
380 		w[6] = cm2 ^ br_dec32le(buf +  8);
381 		w[7] = cm3 ^ br_dec32le(buf + 12);
382 
383 		br_aes_ct64_interleave_in(&q[0], &q[4], w);
384 		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
385 		br_aes_ct64_ortho(q);
386 		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
387 		br_aes_ct64_ortho(q);
388 		br_aes_ct64_interleave_out(w, q[0], q[4]);
389 		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
390 
391 		br_enc32le(tmp +  0, w[0]);
392 		br_enc32le(tmp +  4, w[1]);
393 		br_enc32le(tmp +  8, w[2]);
394 		br_enc32le(tmp + 12, w[3]);
395 		xorbuf(buf, tmp, 16);
396 		cm0 = w[4];
397 		cm1 = w[5];
398 		cm2 = w[6];
399 		cm3 = w[7];
400 		buf += 16;
401 		len -= 16;
402 	}
403 
404 	br_enc32be(ivbuf +  0, iv0);
405 	br_enc32be(ivbuf +  4, iv1);
406 	br_enc32be(ivbuf +  8, iv2);
407 	br_enc32be(ivbuf + 12, iv3);
408 	br_enc32le((unsigned char *)cbcmac +  0, cm0);
409 	br_enc32le((unsigned char *)cbcmac +  4, cm1);
410 	br_enc32le((unsigned char *)cbcmac +  8, cm2);
411 	br_enc32le((unsigned char *)cbcmac + 12, cm3);
412 }
413 
414 /* see bearssl_block.h */
415 const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
416 	sizeof(br_aes_ct64_ctrcbc_keys),
417 	16,
418 	4,
419 	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
420 		&br_aes_ct64_ctrcbc_init,
421 	(void (*)(const br_block_ctrcbc_class *const *,
422 		void *, void *, void *, size_t))
423 		&br_aes_ct64_ctrcbc_encrypt,
424 	(void (*)(const br_block_ctrcbc_class *const *,
425 		void *, void *, void *, size_t))
426 		&br_aes_ct64_ctrcbc_decrypt,
427 	(void (*)(const br_block_ctrcbc_class *const *,
428 		void *, void *, size_t))
429 		&br_aes_ct64_ctrcbc_ctr,
430 	(void (*)(const br_block_ctrcbc_class *const *,
431 		void *, const void *, size_t))
432 		&br_aes_ct64_ctrcbc_mac
433 };
434