1 /*
2  * Copyright (c) 2002, 2003, 2009 Bob Deblier
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  *
18  */
19 
20 /*!\file aes.c
21  * \brief AES block cipher, as specified by NIST FIPS 197.
22  *
23  * The table lookup method was inspired by Brian Gladman's AES implementation,
24  * which is much more readable than the standard code.
25  *
26  * \author Bob Deblier <bob.deblier@telenet.be>
27  * \ingroup BC_aes_m BC_m
28  */
29 
30 #define BEECRYPT_DLL_EXPORT
31 
32 #if HAVE_CONFIG_H
33 # include "config.h"
34 #endif
35 
36 #ifdef OPTIMIZE_MMX
37 # include <mmintrin.h>
38 #endif
39 
40 #include "beecrypt/aes.h"
41 
42 #if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && defined(LITTLE_ENDIAN)
43 # if (BYTE_ORDER != BIG_ENDIAN) && (BYTE_ORDER != LITTLE_ENDIAN)
44 #  error unsupported endian-ness.
45 # endif
46 #endif
47 
48 #if WORDS_BIGENDIAN
49 # include "beecrypt/aes_be.h"
50 #else
51 #  include "beecrypt/aes_le.h"
52 #endif
53 
54 #ifdef ASM_AESENCRYPTECB
55 extern int aesEncryptECB(aesParam*, uint32_t*, const uint32_t*, unsigned int);
56 #endif
57 
58 #ifdef ASM_AESDECRYPTECB
59 extern int aesDecryptECB(aesParam*, uint32_t*, const uint32_t*, unsigned int);
60 #endif
61 
62 #ifdef ASM_AESENCRYPTCBC
63 extern int aesEncryptCBC(aesParam*, uint32_t*, const uint32_t*, unsigned int);
64 #endif
65 
66 #ifdef ASM_AESDECRYPTCBC
67 extern int aesDecryptCBC(aesParam*, uint32_t*, const uint32_t*, unsigned int);
68 #endif
69 
70 #ifdef ASM_AESENCRYPTCTR
71 extern int aesEncryptCTR(aesParam*, uint32_t*, const uint32_t*, unsigned int);
72 #endif
73 
74 #ifdef ASM_AESDECRYPTCTR
75 extern int aesDecryptCTR(aesParam*, uint32_t*, const uint32_t*, unsigned int);
76 #endif
77 
78 const blockCipher aes = {
79 	.name = "AES",
80 	.paramsize = sizeof(aesParam),
81 	.blocksize = 16,
82 	.keybitsmin = 128,
83 	.keybitsmax = 256,
84 	.keybitsinc = 64,
85 	.setup = (blockCipherSetup) aesSetup,
86 	.setiv = (blockCipherSetIV) aesSetIV,
87 	.setctr = (blockCipherSetCTR) aesSetCTR,
88 	.getfb = (blockCipherFeedback) aesFeedback,
89 	.raw =
90 	{
91 		.encrypt = (blockCipherRawcrypt) aesEncrypt,
92 		.decrypt = (blockCipherRawcrypt) aesDecrypt
93 	},
94 	.ecb =
95 	{
96 		#ifdef ASM_AESENCRYPTECB
97 		.encrypt = (blockCipherModcrypt) aesEncryptECB,
98 		#else
99 		.encrypt = (blockCipherModcrypt) 0,
100 		#endif
101 		#ifdef ASM_AESDECRYPTECB
102 		.decrypt = (blockCipherModcrypt) aesDecryptECB,
103 		#else
104 		.decrypt = (blockCipherModcrypt) 0,
105 		#endif
106 	},
107 	.cbc =
108 	{
109 		#ifdef ASM_AESENCRYPTCBC
110 		.encrypt = (blockCipherModcrypt) aesEncryptCBC,
111 		#else
112 		.encrypt = (blockCipherModcrypt) 0,
113 		#endif
114 		#ifdef ASM_AESDECRYPTCBC
115 		.decrypt = (blockCipherModcrypt) aesDecryptCBC,
116 		#else
117 		.decrypt = (blockCipherModcrypt) 0
118 		#endif
119 	},
120 	.ctr =
121 	{
122 		#ifdef ASM_AESENCRYPTCTR
123 		.encrypt = (blockCipherModcrypt) aesEncryptCTR,
124 		#else
125 		.encrypt = (blockCipherModcrypt) 0,
126 		#endif
127 		#ifdef ASM_AESDECRYPTCTR
128 		.decrypt = (blockCipherModcrypt) aesDecryptCTR,
129 		#else
130 		.decrypt = (blockCipherModcrypt) 0
131 		#endif
132 	}
133 };
134 
aesSetup(aesParam * ap,const byte * key,size_t keybits,cipherOperation op)135 int aesSetup(aesParam* ap, const byte* key, size_t keybits, cipherOperation op)
136 {
137 	if ((op != ENCRYPT) && (op != DECRYPT))
138 		return -1;
139 
140 	if (((keybits & 63) == 0) && (keybits >= 128) && (keybits <= 256))
141 	{
142 		register uint32_t* rk, t, i, j;
143 
144 		/* clear fdback/iv */
145 		ap->fdback[0] = 0;
146 		ap->fdback[1] = 0;
147 		ap->fdback[2] = 0;
148 		ap->fdback[3] = 0;
149 
150 		ap->nr = 6 + (keybits >> 5);
151 
152 		rk = ap->k;
153 
154 		memcpy(rk, key, keybits >> 3);
155 
156 		i = 0;
157 
158 		if (keybits == 128)
159 		{
160 			while (1)
161 			{
162 				t = rk[3];
163 				#if WORDS_BIGENDIAN
164 				t = (_ae4[(t >> 16) & 0xff] & 0xff000000) ^
165 					(_ae4[(t >>  8) & 0xff] & 0x00ff0000) ^
166 					(_ae4[(t      ) & 0xff] & 0x0000ff00) ^
167 					(_ae4[(t >> 24)       ] & 0x000000ff) ^
168 					 _arc[i];
169 				#else
170 				t = (_ae4[(t >>  8) & 0xff] & 0x000000ff) ^
171 					(_ae4[(t >> 16) & 0xff] & 0x0000ff00) ^
172 					(_ae4[(t >> 24)       ] & 0x00ff0000) ^
173 					(_ae4[(t      ) & 0xff] & 0xff000000) ^
174 					 _arc[i];
175 				#endif
176 				rk[4] = (t ^= rk[0]);
177 				rk[5] = (t ^= rk[1]);
178 				rk[6] = (t ^= rk[2]);
179 				rk[7] = (t ^= rk[3]);
180 				if (++i == 10)
181 					break;
182 				rk += 4;
183 			}
184 		}
185 		else if (keybits == 192)
186 		{
187 			while (1)
188 			{
189 				t = rk[5];
190 				#if WORDS_BIGENDIAN
191 				t = (_ae4[(t >> 16) & 0xff] & 0xff000000) ^
192 					(_ae4[(t >>  8) & 0xff] & 0x00ff0000) ^
193 					(_ae4[(t      ) & 0xff] & 0x0000ff00) ^
194 					(_ae4[(t >> 24)       ] & 0x000000ff) ^
195 					 _arc[i];
196 				#else
197 				t = (_ae4[(t >>  8) & 0xff] & 0x000000ff) ^
198 					(_ae4[(t >> 16) & 0xff] & 0x0000ff00) ^
199 					(_ae4[(t >> 24)       ] & 0x00ff0000) ^
200 					(_ae4[(t      ) & 0xff] & 0xff000000) ^
201 					 _arc[i];
202 				#endif
203 				rk[6] = (t ^= rk[0]);
204 				rk[7] = (t ^= rk[1]);
205 				rk[8] = (t ^= rk[2]);
206 				rk[9] = (t ^= rk[3]);
207 				if (++i == 8)
208 					break;
209 				rk[10] = (t ^= rk[4]);
210 				rk[11] = (t ^= rk[5]);
211 				rk += 6;
212 			}
213 		}
214 		else if (keybits == 256)
215 		{
216 			while (1)
217 			{
218 				t = rk[7];
219 				#if WORDS_BIGENDIAN
220 				t = (_ae4[(t >> 16) & 0xff] & 0xff000000) ^
221 					(_ae4[(t >>  8) & 0xff] & 0x00ff0000) ^
222 					(_ae4[(t      ) & 0xff] & 0x0000ff00) ^
223 					(_ae4[(t >> 24)       ] & 0x000000ff) ^
224 					 _arc[i];
225 				#else
226 				t = (_ae4[(t >>  8) & 0xff] & 0x000000ff) ^
227 					(_ae4[(t >> 16) & 0xff] & 0x0000ff00) ^
228 					(_ae4[(t >> 24)       ] & 0x00ff0000) ^
229 					(_ae4[(t      ) & 0xff] & 0xff000000) ^
230 					 _arc[i];
231 				#endif
232 				rk[8] = (t ^= rk[0]);
233 				rk[9] = (t ^= rk[1]);
234 				rk[10] = (t ^= rk[2]);
235 				rk[11] = (t ^= rk[3]);
236 				if (++i == 7)
237 					break;
238 				#if WORDS_BIGENDIAN
239 				t = (_ae4[(t >> 24)       ] & 0xff000000) ^
240 					(_ae4[(t >> 16) & 0xff] & 0x00ff0000) ^
241 					(_ae4[(t >>  8) & 0xff] & 0x0000ff00) ^
242 					(_ae4[(t      ) & 0xff] & 0x000000ff);
243 				#else
244 				t = (_ae4[(t      ) & 0xff] & 0x000000ff) ^
245 					(_ae4[(t >>  8) & 0xff] & 0x0000ff00) ^
246 					(_ae4[(t >> 16) & 0xff] & 0x00ff0000) ^
247 					(_ae4[(t >> 24)       ] & 0xff000000);
248 				#endif
249 				rk[12] = (t ^= rk[4]);
250 				rk[13] = (t ^= rk[5]);
251 				rk[14] = (t ^= rk[6]);
252 				rk[15] = (t ^= rk[7]);
253 				rk += 8;
254 			}
255 		}
256 
257 		if (op == DECRYPT)
258 		{
259 			rk = ap->k;
260 
261 			for (i = 0, j = (ap->nr << 2); i < j; i += 4, j -= 4)
262 			{
263 				t = rk[i  ]; rk[i  ] = rk[j  ]; rk[j  ] = t;
264 				t = rk[i+1]; rk[i+1] = rk[j+1]; rk[j+1] = t;
265 				t = rk[i+2]; rk[i+2] = rk[j+2]; rk[j+2] = t;
266 				t = rk[i+3]; rk[i+3] = rk[j+3]; rk[j+3] = t;
267 			}
268 			for (i = 1; i < ap->nr; i++)
269 			{
270 				rk += 4;
271 				#if WORDS_BIGENDIAN
272 				rk[0] =
273 					_ad0[_ae4[(rk[0] >> 24)       ] & 0xff] ^
274 					_ad1[_ae4[(rk[0] >> 16) & 0xff] & 0xff] ^
275 					_ad2[_ae4[(rk[0] >>  8) & 0xff] & 0xff] ^
276 					_ad3[_ae4[(rk[0]      ) & 0xff] & 0xff];
277 				rk[1] =
278 					_ad0[_ae4[(rk[1] >> 24)       ] & 0xff] ^
279 					_ad1[_ae4[(rk[1] >> 16) & 0xff] & 0xff] ^
280 					_ad2[_ae4[(rk[1] >>  8) & 0xff] & 0xff] ^
281 					_ad3[_ae4[(rk[1]      ) & 0xff] & 0xff];
282 				rk[2] =
283 					_ad0[_ae4[(rk[2] >> 24)       ] & 0xff] ^
284 					_ad1[_ae4[(rk[2] >> 16) & 0xff] & 0xff] ^
285 					_ad2[_ae4[(rk[2] >>  8) & 0xff] & 0xff] ^
286 					_ad3[_ae4[(rk[2]      ) & 0xff] & 0xff];
287 				rk[3] =
288 					_ad0[_ae4[(rk[3] >> 24)       ] & 0xff] ^
289 					_ad1[_ae4[(rk[3] >> 16) & 0xff] & 0xff] ^
290 					_ad2[_ae4[(rk[3] >>  8) & 0xff] & 0xff] ^
291 					_ad3[_ae4[(rk[3]      ) & 0xff] & 0xff];
292 				#else
293 				rk[0] =
294 					_ad0[_ae4[(rk[0]      ) & 0xff] & 0xff] ^
295 					_ad1[_ae4[(rk[0] >>  8) & 0xff] & 0xff] ^
296 					_ad2[_ae4[(rk[0] >> 16) & 0xff] & 0xff] ^
297 					_ad3[_ae4[(rk[0] >> 24)       ] & 0xff];
298 				rk[1] =
299 					_ad0[_ae4[(rk[1]      ) & 0xff] & 0xff] ^
300 					_ad1[_ae4[(rk[1] >>  8) & 0xff] & 0xff] ^
301 					_ad2[_ae4[(rk[1] >> 16) & 0xff] & 0xff] ^
302 					_ad3[_ae4[(rk[1] >> 24)       ] & 0xff];
303 				rk[2] =
304 					_ad0[_ae4[(rk[2]      ) & 0xff] & 0xff] ^
305 					_ad1[_ae4[(rk[2] >>  8) & 0xff] & 0xff] ^
306 					_ad2[_ae4[(rk[2] >> 16) & 0xff] & 0xff] ^
307 					_ad3[_ae4[(rk[2] >> 24)       ] & 0xff];
308 				rk[3] =
309 					_ad0[_ae4[(rk[3]      ) & 0xff] & 0xff] ^
310 					_ad1[_ae4[(rk[3] >>  8) & 0xff] & 0xff] ^
311 					_ad2[_ae4[(rk[3] >> 16) & 0xff] & 0xff] ^
312 					_ad3[_ae4[(rk[3] >> 24)       ] & 0xff];
313 				#endif
314 			}
315 		}
316 		return 0;
317 	}
318 	return -1;
319 }
320 
321 #ifndef ASM_AESSETIV
aesSetIV(aesParam * ap,const byte * iv)322 int aesSetIV(aesParam* ap, const byte* iv)
323 {
324 	if (iv)
325 		memcpy(ap->fdback, iv, 16);
326 	else
327 		memset(ap->fdback, 0, 16);
328 
329 	return 0;
330 }
331 #endif
332 
333 #ifndef ASM_AESSETCTR
aesSetCTR(aesParam * ap,const byte * nivz,size_t counter)334 int aesSetCTR(aesParam* ap, const byte* nivz, size_t counter)
335 {
336 	unsigned int blockwords = MP_BYTES_TO_WORDS(16);
337 
338 	if (nivz)
339 	{
340 		mpw tmp[MP_BYTES_TO_WORDS(16)];
341 
342 		os2ip((mpw*) ap->fdback, blockwords, nivz, 16);
343 		mpsetws(blockwords, tmp, counter);
344 		mpadd(blockwords, (mpw*) ap->fdback, tmp);
345 	}
346 	else
347 		mpsetws(blockwords, (mpw*) ap->fdback, counter);
348 
349 	return 0;
350 }
351 #endif
352 
353 #ifndef ASM_AESENCRYPT
aesEncrypt(aesParam * ap,uint32_t * dst,const uint32_t * src)354 int aesEncrypt(aesParam* ap, uint32_t* dst, const uint32_t* src)
355 {
356 	#if defined (OPTIMIZE_MMX) && (defined(OPTIMIZE_I586) || defined(OPTIMIZE_I686))
357 	register __m64 s0, s1, s2, s3;
358 	register __m64 t0, t1, t2, t3;
359 	register uint32_t i0, i1, i2, i3;
360 	#else
361 	register uint32_t s0, s1, s2, s3;
362 	register uint32_t t0, t1, t2, t3;
363 	#endif
364 	register uint32_t* rk = ap->k;
365 
366 	#if defined (OPTIMIZE_MMX) && (defined(OPTIMIZE_I586) || defined(OPTIMIZE_I686))
367 	s0 = _mm_cvtsi32_si64(src[0] ^ rk[0]);
368 	s1 = _mm_cvtsi32_si64(src[1] ^ rk[1]);
369 	s2 = _mm_cvtsi32_si64(src[2] ^ rk[2]);
370 	s3 = _mm_cvtsi32_si64(src[3] ^ rk[3]);
371 	#else
372 	s0 = src[0] ^ rk[0];
373 	s1 = src[1] ^ rk[1];
374 	s2 = src[2] ^ rk[2];
375 	s3 = src[3] ^ rk[3];
376 	#endif
377 
378 	etfs(4);		/* round 1 */
379 	esft(8);		/* round 2 */
380 	etfs(12);		/* round 3 */
381 	esft(16);		/* round 4 */
382 	etfs(20);		/* round 5 */
383 	esft(24);		/* round 6 */
384 	etfs(28);		/* round 7 */
385 	esft(32);		/* round 8 */
386 	etfs(36);		/* round 9 */
387 
388 	if (ap->nr > 10)
389 	{
390 		esft(40);	/* round 10 */
391 		etfs(44);	/* round 11 */
392 		if (ap->nr > 12)
393 		{
394 			esft(48);	/* round 12 */
395 			etfs(52);	/* round 13 */
396 		}
397 	}
398 
399 	rk += (ap->nr << 2);
400 
401 	elr(); /* last round */
402 
403 	#if defined(OPTIMIZE_MMX) && (defined(OPTIMIZE_I586) || defined(OPTIMIZE_I686))
404 	dst[0] = _mm_cvtsi64_si32(s0);
405 	dst[1] = _mm_cvtsi64_si32(s1);
406 	dst[2] = _mm_cvtsi64_si32(s2);
407 	dst[3] = _mm_cvtsi64_si32(s3);
408 	#else
409 	dst[0] = s0;
410 	dst[1] = s1;
411 	dst[2] = s2;
412 	dst[3] = s3;
413 	#endif
414 
415 	return 0;
416 }
417 #endif
418 
419 #ifndef ASM_AESDECRYPT
aesDecrypt(aesParam * ap,uint32_t * dst,const uint32_t * src)420 int aesDecrypt(aesParam* ap, uint32_t* dst, const uint32_t* src)
421 {
422 	register uint32_t s0, s1, s2, s3;
423 	register uint32_t t0, t1, t2, t3;
424 	register uint32_t* rk = ap->k;
425 
426 	s0 = src[0] ^ rk[0];
427 	s1 = src[1] ^ rk[1];
428 	s2 = src[2] ^ rk[2];
429 	s3 = src[3] ^ rk[3];
430 
431 	dtfs(4);		/* round 1 */
432 	dsft(8);		/* round 2 */
433 	dtfs(12);		/* round 3 */
434 	dsft(16);		/* round 4 */
435 	dtfs(20);		/* round 5 */
436 	dsft(24);		/* round 6 */
437 	dtfs(28);		/* round 7 */
438 	dsft(32);		/* round 8 */
439 	dtfs(36);		/* round 9 */
440 
441 	if (ap->nr > 10)
442 	{
443 		dsft(40);	/* round 10 */
444 		dtfs(44);	/* round 11 */
445 		if (ap->nr > 12)
446 		{
447 			dsft(48);	/* round 12 */
448 			dtfs(52);	/* round 13 */
449 		}
450 	}
451 
452 	rk += (ap->nr << 2);
453 
454 	dlr(); /* last round */
455 
456 	dst[0] = s0;
457 	dst[1] = s1;
458 	dst[2] = s2;
459 	dst[3] = s3;
460 
461 	return 0;
462 }
463 #endif
464 
aesFeedback(aesParam * ap)465 uint32_t* aesFeedback(aesParam* ap)
466 {
467 	return ap->fdback;
468 }
469