1 /*
2  * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "inner.h"
26 
27 /* see bearssl_block.h */
28 void
29 br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
30 	const void *key, size_t len)
31 {
32 	ctx->vtable = &br_aes_ct_ctrcbc_vtable;
33 	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
34 }
35 
36 static void
37 xorbuf(void *dst, const void *src, size_t len)
38 {
39 	unsigned char *d;
40 	const unsigned char *s;
41 
42 	d = dst;
43 	s = src;
44 	while (len -- > 0) {
45 		*d ++ ^= *s ++;
46 	}
47 }
48 
49 /* see bearssl_block.h */
50 void
51 br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
52 	void *ctr, void *data, size_t len)
53 {
54 	unsigned char *buf;
55 	unsigned char *ivbuf;
56 	uint32_t iv0, iv1, iv2, iv3;
57 	uint32_t sk_exp[120];
58 
59 	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
60 
61 	/*
62 	 * We keep the counter as four 32-bit values, with big-endian
63 	 * convention, because that's what is expected for purposes of
64 	 * incrementing the counter value.
65 	 */
66 	ivbuf = ctr;
67 	iv0 = br_dec32be(ivbuf +  0);
68 	iv1 = br_dec32be(ivbuf +  4);
69 	iv2 = br_dec32be(ivbuf +  8);
70 	iv3 = br_dec32be(ivbuf + 12);
71 
72 	buf = data;
73 	while (len > 0) {
74 		uint32_t q[8], carry;
75 		unsigned char tmp[32];
76 
77 		/*
78 		 * The bitslice implementation expects values in
79 		 * little-endian convention, so we have to byteswap them.
80 		 */
81 		q[0] = br_swap32(iv0);
82 		q[2] = br_swap32(iv1);
83 		q[4] = br_swap32(iv2);
84 		q[6] = br_swap32(iv3);
85 		iv3 ++;
86 		carry = ~(iv3 | -iv3) >> 31;
87 		iv2 += carry;
88 		carry &= -(~(iv2 | -iv2) >> 31);
89 		iv1 += carry;
90 		carry &= -(~(iv1 | -iv1) >> 31);
91 		iv0 += carry;
92 		q[1] = br_swap32(iv0);
93 		q[3] = br_swap32(iv1);
94 		q[5] = br_swap32(iv2);
95 		q[7] = br_swap32(iv3);
96 		if (len > 16) {
97 			iv3 ++;
98 			carry = ~(iv3 | -iv3) >> 31;
99 			iv2 += carry;
100 			carry &= -(~(iv2 | -iv2) >> 31);
101 			iv1 += carry;
102 			carry &= -(~(iv1 | -iv1) >> 31);
103 			iv0 += carry;
104 		}
105 
106 		br_aes_ct_ortho(q);
107 		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
108 		br_aes_ct_ortho(q);
109 
110 		br_enc32le(tmp, q[0]);
111 		br_enc32le(tmp + 4, q[2]);
112 		br_enc32le(tmp + 8, q[4]);
113 		br_enc32le(tmp + 12, q[6]);
114 		br_enc32le(tmp + 16, q[1]);
115 		br_enc32le(tmp + 20, q[3]);
116 		br_enc32le(tmp + 24, q[5]);
117 		br_enc32le(tmp + 28, q[7]);
118 
119 		if (len <= 32) {
120 			xorbuf(buf, tmp, len);
121 			break;
122 		}
123 		xorbuf(buf, tmp, 32);
124 		buf += 32;
125 		len -= 32;
126 	}
127 	br_enc32be(ivbuf +  0, iv0);
128 	br_enc32be(ivbuf +  4, iv1);
129 	br_enc32be(ivbuf +  8, iv2);
130 	br_enc32be(ivbuf + 12, iv3);
131 }
132 
133 /* see bearssl_block.h */
134 void
135 br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
136 	void *cbcmac, const void *data, size_t len)
137 {
138 	const unsigned char *buf;
139 	uint32_t cm0, cm1, cm2, cm3;
140 	uint32_t q[8];
141 	uint32_t sk_exp[120];
142 
143 	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
144 
145 	buf = data;
146 	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
147 	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
148 	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
149 	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
150 	q[1] = 0;
151 	q[3] = 0;
152 	q[5] = 0;
153 	q[7] = 0;
154 
155 	while (len > 0) {
156 		q[0] = cm0 ^ br_dec32le(buf +  0);
157 		q[2] = cm1 ^ br_dec32le(buf +  4);
158 		q[4] = cm2 ^ br_dec32le(buf +  8);
159 		q[6] = cm3 ^ br_dec32le(buf + 12);
160 
161 		br_aes_ct_ortho(q);
162 		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
163 		br_aes_ct_ortho(q);
164 
165 		cm0 = q[0];
166 		cm1 = q[2];
167 		cm2 = q[4];
168 		cm3 = q[6];
169 		buf += 16;
170 		len -= 16;
171 	}
172 
173 	br_enc32le((unsigned char *)cbcmac +  0, cm0);
174 	br_enc32le((unsigned char *)cbcmac +  4, cm1);
175 	br_enc32le((unsigned char *)cbcmac +  8, cm2);
176 	br_enc32le((unsigned char *)cbcmac + 12, cm3);
177 }
178 
179 /* see bearssl_block.h */
180 void
181 br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
182 	void *ctr, void *cbcmac, void *data, size_t len)
183 {
184 	/*
185 	 * When encrypting, the CBC-MAC processing must be lagging by
186 	 * one block, since it operates on the encrypted values, so
187 	 * it must wait for that encryption to complete.
188 	 */
189 
190 	unsigned char *buf;
191 	unsigned char *ivbuf;
192 	uint32_t iv0, iv1, iv2, iv3;
193 	uint32_t cm0, cm1, cm2, cm3;
194 	uint32_t sk_exp[120];
195 	int first_iter;
196 
197 	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
198 
199 	/*
200 	 * We keep the counter as four 32-bit values, with big-endian
201 	 * convention, because that's what is expected for purposes of
202 	 * incrementing the counter value.
203 	 */
204 	ivbuf = ctr;
205 	iv0 = br_dec32be(ivbuf +  0);
206 	iv1 = br_dec32be(ivbuf +  4);
207 	iv2 = br_dec32be(ivbuf +  8);
208 	iv3 = br_dec32be(ivbuf + 12);
209 
210 	/*
211 	 * The current CBC-MAC value is kept in little-endian convention.
212 	 */
213 	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
214 	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
215 	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
216 	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
217 
218 	buf = data;
219 	first_iter = 1;
220 	while (len > 0) {
221 		uint32_t q[8], carry;
222 
223 		/*
224 		 * The bitslice implementation expects values in
225 		 * little-endian convention, so we have to byteswap them.
226 		 */
227 		q[0] = br_swap32(iv0);
228 		q[2] = br_swap32(iv1);
229 		q[4] = br_swap32(iv2);
230 		q[6] = br_swap32(iv3);
231 		iv3 ++;
232 		carry = ~(iv3 | -iv3) >> 31;
233 		iv2 += carry;
234 		carry &= -(~(iv2 | -iv2) >> 31);
235 		iv1 += carry;
236 		carry &= -(~(iv1 | -iv1) >> 31);
237 		iv0 += carry;
238 
239 		/*
240 		 * The odd values are used for CBC-MAC.
241 		 */
242 		q[1] = cm0;
243 		q[3] = cm1;
244 		q[5] = cm2;
245 		q[7] = cm3;
246 
247 		br_aes_ct_ortho(q);
248 		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
249 		br_aes_ct_ortho(q);
250 
251 		/*
252 		 * We do the XOR with the plaintext in 32-bit registers,
253 		 * so that the value are available for CBC-MAC processing
254 		 * as well.
255 		 */
256 		q[0] ^= br_dec32le(buf +  0);
257 		q[2] ^= br_dec32le(buf +  4);
258 		q[4] ^= br_dec32le(buf +  8);
259 		q[6] ^= br_dec32le(buf + 12);
260 		br_enc32le(buf +  0, q[0]);
261 		br_enc32le(buf +  4, q[2]);
262 		br_enc32le(buf +  8, q[4]);
263 		br_enc32le(buf + 12, q[6]);
264 
265 		buf += 16;
266 		len -= 16;
267 
268 		/*
269 		 * We set the cm* values to the block to encrypt in the
270 		 * next iteration.
271 		 */
272 		if (first_iter) {
273 			first_iter = 0;
274 			cm0 ^= q[0];
275 			cm1 ^= q[2];
276 			cm2 ^= q[4];
277 			cm3 ^= q[6];
278 		} else {
279 			cm0 = q[0] ^ q[1];
280 			cm1 = q[2] ^ q[3];
281 			cm2 = q[4] ^ q[5];
282 			cm3 = q[6] ^ q[7];
283 		}
284 
285 		/*
286 		 * If this was the last iteration, then compute the
287 		 * extra block encryption to complete CBC-MAC.
288 		 */
289 		if (len == 0) {
290 			q[0] = cm0;
291 			q[2] = cm1;
292 			q[4] = cm2;
293 			q[6] = cm3;
294 			br_aes_ct_ortho(q);
295 			br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
296 			br_aes_ct_ortho(q);
297 			cm0 = q[0];
298 			cm1 = q[2];
299 			cm2 = q[4];
300 			cm3 = q[6];
301 			break;
302 		}
303 	}
304 
305 	br_enc32be(ivbuf +  0, iv0);
306 	br_enc32be(ivbuf +  4, iv1);
307 	br_enc32be(ivbuf +  8, iv2);
308 	br_enc32be(ivbuf + 12, iv3);
309 	br_enc32le((unsigned char *)cbcmac +  0, cm0);
310 	br_enc32le((unsigned char *)cbcmac +  4, cm1);
311 	br_enc32le((unsigned char *)cbcmac +  8, cm2);
312 	br_enc32le((unsigned char *)cbcmac + 12, cm3);
313 }
314 
315 /* see bearssl_block.h */
316 void
317 br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
318 	void *ctr, void *cbcmac, void *data, size_t len)
319 {
320 	unsigned char *buf;
321 	unsigned char *ivbuf;
322 	uint32_t iv0, iv1, iv2, iv3;
323 	uint32_t cm0, cm1, cm2, cm3;
324 	uint32_t sk_exp[120];
325 
326 	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
327 
328 	/*
329 	 * We keep the counter as four 32-bit values, with big-endian
330 	 * convention, because that's what is expected for purposes of
331 	 * incrementing the counter value.
332 	 */
333 	ivbuf = ctr;
334 	iv0 = br_dec32be(ivbuf +  0);
335 	iv1 = br_dec32be(ivbuf +  4);
336 	iv2 = br_dec32be(ivbuf +  8);
337 	iv3 = br_dec32be(ivbuf + 12);
338 
339 	/*
340 	 * The current CBC-MAC value is kept in little-endian convention.
341 	 */
342 	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
343 	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
344 	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
345 	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
346 
347 	buf = data;
348 	while (len > 0) {
349 		uint32_t q[8], carry;
350 		unsigned char tmp[16];
351 
352 		/*
353 		 * The bitslice implementation expects values in
354 		 * little-endian convention, so we have to byteswap them.
355 		 */
356 		q[0] = br_swap32(iv0);
357 		q[2] = br_swap32(iv1);
358 		q[4] = br_swap32(iv2);
359 		q[6] = br_swap32(iv3);
360 		iv3 ++;
361 		carry = ~(iv3 | -iv3) >> 31;
362 		iv2 += carry;
363 		carry &= -(~(iv2 | -iv2) >> 31);
364 		iv1 += carry;
365 		carry &= -(~(iv1 | -iv1) >> 31);
366 		iv0 += carry;
367 
368 		/*
369 		 * The odd values are used for CBC-MAC.
370 		 */
371 		q[1] = cm0 ^ br_dec32le(buf +  0);
372 		q[3] = cm1 ^ br_dec32le(buf +  4);
373 		q[5] = cm2 ^ br_dec32le(buf +  8);
374 		q[7] = cm3 ^ br_dec32le(buf + 12);
375 
376 		br_aes_ct_ortho(q);
377 		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
378 		br_aes_ct_ortho(q);
379 
380 		br_enc32le(tmp +  0, q[0]);
381 		br_enc32le(tmp +  4, q[2]);
382 		br_enc32le(tmp +  8, q[4]);
383 		br_enc32le(tmp + 12, q[6]);
384 		xorbuf(buf, tmp, 16);
385 		cm0 = q[1];
386 		cm1 = q[3];
387 		cm2 = q[5];
388 		cm3 = q[7];
389 		buf += 16;
390 		len -= 16;
391 	}
392 
393 	br_enc32be(ivbuf +  0, iv0);
394 	br_enc32be(ivbuf +  4, iv1);
395 	br_enc32be(ivbuf +  8, iv2);
396 	br_enc32be(ivbuf + 12, iv3);
397 	br_enc32le((unsigned char *)cbcmac +  0, cm0);
398 	br_enc32le((unsigned char *)cbcmac +  4, cm1);
399 	br_enc32le((unsigned char *)cbcmac +  8, cm2);
400 	br_enc32le((unsigned char *)cbcmac + 12, cm3);
401 }
402 
403 /* see bearssl_block.h */
404 const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
405 	sizeof(br_aes_ct_ctrcbc_keys),
406 	16,
407 	4,
408 	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
409 		&br_aes_ct_ctrcbc_init,
410 	(void (*)(const br_block_ctrcbc_class *const *,
411 		void *, void *, void *, size_t))
412 		&br_aes_ct_ctrcbc_encrypt,
413 	(void (*)(const br_block_ctrcbc_class *const *,
414 		void *, void *, void *, size_t))
415 		&br_aes_ct_ctrcbc_decrypt,
416 	(void (*)(const br_block_ctrcbc_class *const *,
417 		void *, void *, size_t))
418 		&br_aes_ct_ctrcbc_ctr,
419 	(void (*)(const br_block_ctrcbc_class *const *,
420 		void *, const void *, size_t))
421 		&br_aes_ct_ctrcbc_mac
422 };
423