1 /*
2 * Copyright (c) 2002, 2003, 2009 Bob Deblier
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19
20 /*!\file aes.c
21 * \brief AES block cipher, as specified by NIST FIPS 197.
22 *
23 * The table lookup method was inspired by Brian Gladman's AES implementation,
24 * which is much more readable than the standard code.
25 *
26 * \author Bob Deblier <bob.deblier@telenet.be>
27 * \ingroup BC_aes_m BC_m
28 */
29
30 #define BEECRYPT_DLL_EXPORT
31
32 #if HAVE_CONFIG_H
33 # include "config.h"
34 #endif
35
36 #ifdef OPTIMIZE_MMX
37 # include <mmintrin.h>
38 #endif
39
40 #include "beecrypt/aes.h"
41
42 #if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && defined(LITTLE_ENDIAN)
43 # if (BYTE_ORDER != BIG_ENDIAN) && (BYTE_ORDER != LITTLE_ENDIAN)
44 # error unsupported endian-ness.
45 # endif
46 #endif
47
48 #if WORDS_BIGENDIAN
49 # include "beecrypt/aes_be.h"
50 #else
51 # include "beecrypt/aes_le.h"
52 #endif
53
54 #ifdef ASM_AESENCRYPTECB
55 extern int aesEncryptECB(aesParam*, uint32_t*, const uint32_t*, unsigned int);
56 #endif
57
58 #ifdef ASM_AESDECRYPTECB
59 extern int aesDecryptECB(aesParam*, uint32_t*, const uint32_t*, unsigned int);
60 #endif
61
62 #ifdef ASM_AESENCRYPTCBC
63 extern int aesEncryptCBC(aesParam*, uint32_t*, const uint32_t*, unsigned int);
64 #endif
65
66 #ifdef ASM_AESDECRYPTCBC
67 extern int aesDecryptCBC(aesParam*, uint32_t*, const uint32_t*, unsigned int);
68 #endif
69
70 #ifdef ASM_AESENCRYPTCTR
71 extern int aesEncryptCTR(aesParam*, uint32_t*, const uint32_t*, unsigned int);
72 #endif
73
74 #ifdef ASM_AESDECRYPTCTR
75 extern int aesDecryptCTR(aesParam*, uint32_t*, const uint32_t*, unsigned int);
76 #endif
77
78 const blockCipher aes = {
79 .name = "AES",
80 .paramsize = sizeof(aesParam),
81 .blocksize = 16,
82 .keybitsmin = 128,
83 .keybitsmax = 256,
84 .keybitsinc = 64,
85 .setup = (blockCipherSetup) aesSetup,
86 .setiv = (blockCipherSetIV) aesSetIV,
87 .setctr = (blockCipherSetCTR) aesSetCTR,
88 .getfb = (blockCipherFeedback) aesFeedback,
89 .raw =
90 {
91 .encrypt = (blockCipherRawcrypt) aesEncrypt,
92 .decrypt = (blockCipherRawcrypt) aesDecrypt
93 },
94 .ecb =
95 {
96 #ifdef ASM_AESENCRYPTECB
97 .encrypt = (blockCipherModcrypt) aesEncryptECB,
98 #else
99 .encrypt = (blockCipherModcrypt) 0,
100 #endif
101 #ifdef ASM_AESDECRYPTECB
102 .decrypt = (blockCipherModcrypt) aesDecryptECB,
103 #else
104 .decrypt = (blockCipherModcrypt) 0,
105 #endif
106 },
107 .cbc =
108 {
109 #ifdef ASM_AESENCRYPTCBC
110 .encrypt = (blockCipherModcrypt) aesEncryptCBC,
111 #else
112 .encrypt = (blockCipherModcrypt) 0,
113 #endif
114 #ifdef ASM_AESDECRYPTCBC
115 .decrypt = (blockCipherModcrypt) aesDecryptCBC,
116 #else
117 .decrypt = (blockCipherModcrypt) 0
118 #endif
119 },
120 .ctr =
121 {
122 #ifdef ASM_AESENCRYPTCTR
123 .encrypt = (blockCipherModcrypt) aesEncryptCTR,
124 #else
125 .encrypt = (blockCipherModcrypt) 0,
126 #endif
127 #ifdef ASM_AESDECRYPTCTR
128 .decrypt = (blockCipherModcrypt) aesDecryptCTR,
129 #else
130 .decrypt = (blockCipherModcrypt) 0
131 #endif
132 }
133 };
134
aesSetup(aesParam * ap,const byte * key,size_t keybits,cipherOperation op)135 int aesSetup(aesParam* ap, const byte* key, size_t keybits, cipherOperation op)
136 {
137 if ((op != ENCRYPT) && (op != DECRYPT))
138 return -1;
139
140 if (((keybits & 63) == 0) && (keybits >= 128) && (keybits <= 256))
141 {
142 register uint32_t* rk, t, i, j;
143
144 /* clear fdback/iv */
145 ap->fdback[0] = 0;
146 ap->fdback[1] = 0;
147 ap->fdback[2] = 0;
148 ap->fdback[3] = 0;
149
150 ap->nr = 6 + (keybits >> 5);
151
152 rk = ap->k;
153
154 memcpy(rk, key, keybits >> 3);
155
156 i = 0;
157
158 if (keybits == 128)
159 {
160 while (1)
161 {
162 t = rk[3];
163 #if WORDS_BIGENDIAN
164 t = (_ae4[(t >> 16) & 0xff] & 0xff000000) ^
165 (_ae4[(t >> 8) & 0xff] & 0x00ff0000) ^
166 (_ae4[(t ) & 0xff] & 0x0000ff00) ^
167 (_ae4[(t >> 24) ] & 0x000000ff) ^
168 _arc[i];
169 #else
170 t = (_ae4[(t >> 8) & 0xff] & 0x000000ff) ^
171 (_ae4[(t >> 16) & 0xff] & 0x0000ff00) ^
172 (_ae4[(t >> 24) ] & 0x00ff0000) ^
173 (_ae4[(t ) & 0xff] & 0xff000000) ^
174 _arc[i];
175 #endif
176 rk[4] = (t ^= rk[0]);
177 rk[5] = (t ^= rk[1]);
178 rk[6] = (t ^= rk[2]);
179 rk[7] = (t ^= rk[3]);
180 if (++i == 10)
181 break;
182 rk += 4;
183 }
184 }
185 else if (keybits == 192)
186 {
187 while (1)
188 {
189 t = rk[5];
190 #if WORDS_BIGENDIAN
191 t = (_ae4[(t >> 16) & 0xff] & 0xff000000) ^
192 (_ae4[(t >> 8) & 0xff] & 0x00ff0000) ^
193 (_ae4[(t ) & 0xff] & 0x0000ff00) ^
194 (_ae4[(t >> 24) ] & 0x000000ff) ^
195 _arc[i];
196 #else
197 t = (_ae4[(t >> 8) & 0xff] & 0x000000ff) ^
198 (_ae4[(t >> 16) & 0xff] & 0x0000ff00) ^
199 (_ae4[(t >> 24) ] & 0x00ff0000) ^
200 (_ae4[(t ) & 0xff] & 0xff000000) ^
201 _arc[i];
202 #endif
203 rk[6] = (t ^= rk[0]);
204 rk[7] = (t ^= rk[1]);
205 rk[8] = (t ^= rk[2]);
206 rk[9] = (t ^= rk[3]);
207 if (++i == 8)
208 break;
209 rk[10] = (t ^= rk[4]);
210 rk[11] = (t ^= rk[5]);
211 rk += 6;
212 }
213 }
214 else if (keybits == 256)
215 {
216 while (1)
217 {
218 t = rk[7];
219 #if WORDS_BIGENDIAN
220 t = (_ae4[(t >> 16) & 0xff] & 0xff000000) ^
221 (_ae4[(t >> 8) & 0xff] & 0x00ff0000) ^
222 (_ae4[(t ) & 0xff] & 0x0000ff00) ^
223 (_ae4[(t >> 24) ] & 0x000000ff) ^
224 _arc[i];
225 #else
226 t = (_ae4[(t >> 8) & 0xff] & 0x000000ff) ^
227 (_ae4[(t >> 16) & 0xff] & 0x0000ff00) ^
228 (_ae4[(t >> 24) ] & 0x00ff0000) ^
229 (_ae4[(t ) & 0xff] & 0xff000000) ^
230 _arc[i];
231 #endif
232 rk[8] = (t ^= rk[0]);
233 rk[9] = (t ^= rk[1]);
234 rk[10] = (t ^= rk[2]);
235 rk[11] = (t ^= rk[3]);
236 if (++i == 7)
237 break;
238 #if WORDS_BIGENDIAN
239 t = (_ae4[(t >> 24) ] & 0xff000000) ^
240 (_ae4[(t >> 16) & 0xff] & 0x00ff0000) ^
241 (_ae4[(t >> 8) & 0xff] & 0x0000ff00) ^
242 (_ae4[(t ) & 0xff] & 0x000000ff);
243 #else
244 t = (_ae4[(t ) & 0xff] & 0x000000ff) ^
245 (_ae4[(t >> 8) & 0xff] & 0x0000ff00) ^
246 (_ae4[(t >> 16) & 0xff] & 0x00ff0000) ^
247 (_ae4[(t >> 24) ] & 0xff000000);
248 #endif
249 rk[12] = (t ^= rk[4]);
250 rk[13] = (t ^= rk[5]);
251 rk[14] = (t ^= rk[6]);
252 rk[15] = (t ^= rk[7]);
253 rk += 8;
254 }
255 }
256
257 if (op == DECRYPT)
258 {
259 rk = ap->k;
260
261 for (i = 0, j = (ap->nr << 2); i < j; i += 4, j -= 4)
262 {
263 t = rk[i ]; rk[i ] = rk[j ]; rk[j ] = t;
264 t = rk[i+1]; rk[i+1] = rk[j+1]; rk[j+1] = t;
265 t = rk[i+2]; rk[i+2] = rk[j+2]; rk[j+2] = t;
266 t = rk[i+3]; rk[i+3] = rk[j+3]; rk[j+3] = t;
267 }
268 for (i = 1; i < ap->nr; i++)
269 {
270 rk += 4;
271 #if WORDS_BIGENDIAN
272 rk[0] =
273 _ad0[_ae4[(rk[0] >> 24) ] & 0xff] ^
274 _ad1[_ae4[(rk[0] >> 16) & 0xff] & 0xff] ^
275 _ad2[_ae4[(rk[0] >> 8) & 0xff] & 0xff] ^
276 _ad3[_ae4[(rk[0] ) & 0xff] & 0xff];
277 rk[1] =
278 _ad0[_ae4[(rk[1] >> 24) ] & 0xff] ^
279 _ad1[_ae4[(rk[1] >> 16) & 0xff] & 0xff] ^
280 _ad2[_ae4[(rk[1] >> 8) & 0xff] & 0xff] ^
281 _ad3[_ae4[(rk[1] ) & 0xff] & 0xff];
282 rk[2] =
283 _ad0[_ae4[(rk[2] >> 24) ] & 0xff] ^
284 _ad1[_ae4[(rk[2] >> 16) & 0xff] & 0xff] ^
285 _ad2[_ae4[(rk[2] >> 8) & 0xff] & 0xff] ^
286 _ad3[_ae4[(rk[2] ) & 0xff] & 0xff];
287 rk[3] =
288 _ad0[_ae4[(rk[3] >> 24) ] & 0xff] ^
289 _ad1[_ae4[(rk[3] >> 16) & 0xff] & 0xff] ^
290 _ad2[_ae4[(rk[3] >> 8) & 0xff] & 0xff] ^
291 _ad3[_ae4[(rk[3] ) & 0xff] & 0xff];
292 #else
293 rk[0] =
294 _ad0[_ae4[(rk[0] ) & 0xff] & 0xff] ^
295 _ad1[_ae4[(rk[0] >> 8) & 0xff] & 0xff] ^
296 _ad2[_ae4[(rk[0] >> 16) & 0xff] & 0xff] ^
297 _ad3[_ae4[(rk[0] >> 24) ] & 0xff];
298 rk[1] =
299 _ad0[_ae4[(rk[1] ) & 0xff] & 0xff] ^
300 _ad1[_ae4[(rk[1] >> 8) & 0xff] & 0xff] ^
301 _ad2[_ae4[(rk[1] >> 16) & 0xff] & 0xff] ^
302 _ad3[_ae4[(rk[1] >> 24) ] & 0xff];
303 rk[2] =
304 _ad0[_ae4[(rk[2] ) & 0xff] & 0xff] ^
305 _ad1[_ae4[(rk[2] >> 8) & 0xff] & 0xff] ^
306 _ad2[_ae4[(rk[2] >> 16) & 0xff] & 0xff] ^
307 _ad3[_ae4[(rk[2] >> 24) ] & 0xff];
308 rk[3] =
309 _ad0[_ae4[(rk[3] ) & 0xff] & 0xff] ^
310 _ad1[_ae4[(rk[3] >> 8) & 0xff] & 0xff] ^
311 _ad2[_ae4[(rk[3] >> 16) & 0xff] & 0xff] ^
312 _ad3[_ae4[(rk[3] >> 24) ] & 0xff];
313 #endif
314 }
315 }
316 return 0;
317 }
318 return -1;
319 }
320
321 #ifndef ASM_AESSETIV
aesSetIV(aesParam * ap,const byte * iv)322 int aesSetIV(aesParam* ap, const byte* iv)
323 {
324 if (iv)
325 memcpy(ap->fdback, iv, 16);
326 else
327 memset(ap->fdback, 0, 16);
328
329 return 0;
330 }
331 #endif
332
333 #ifndef ASM_AESSETCTR
aesSetCTR(aesParam * ap,const byte * nivz,size_t counter)334 int aesSetCTR(aesParam* ap, const byte* nivz, size_t counter)
335 {
336 unsigned int blockwords = MP_BYTES_TO_WORDS(16);
337
338 if (nivz)
339 {
340 mpw tmp[MP_BYTES_TO_WORDS(16)];
341
342 os2ip((mpw*) ap->fdback, blockwords, nivz, 16);
343 mpsetws(blockwords, tmp, counter);
344 mpadd(blockwords, (mpw*) ap->fdback, tmp);
345 }
346 else
347 mpsetws(blockwords, (mpw*) ap->fdback, counter);
348
349 return 0;
350 }
351 #endif
352
353 #ifndef ASM_AESENCRYPT
aesEncrypt(aesParam * ap,uint32_t * dst,const uint32_t * src)354 int aesEncrypt(aesParam* ap, uint32_t* dst, const uint32_t* src)
355 {
356 #if defined (OPTIMIZE_MMX) && (defined(OPTIMIZE_I586) || defined(OPTIMIZE_I686))
357 register __m64 s0, s1, s2, s3;
358 register __m64 t0, t1, t2, t3;
359 register uint32_t i0, i1, i2, i3;
360 #else
361 register uint32_t s0, s1, s2, s3;
362 register uint32_t t0, t1, t2, t3;
363 #endif
364 register uint32_t* rk = ap->k;
365
366 #if defined (OPTIMIZE_MMX) && (defined(OPTIMIZE_I586) || defined(OPTIMIZE_I686))
367 s0 = _mm_cvtsi32_si64(src[0] ^ rk[0]);
368 s1 = _mm_cvtsi32_si64(src[1] ^ rk[1]);
369 s2 = _mm_cvtsi32_si64(src[2] ^ rk[2]);
370 s3 = _mm_cvtsi32_si64(src[3] ^ rk[3]);
371 #else
372 s0 = src[0] ^ rk[0];
373 s1 = src[1] ^ rk[1];
374 s2 = src[2] ^ rk[2];
375 s3 = src[3] ^ rk[3];
376 #endif
377
378 etfs(4); /* round 1 */
379 esft(8); /* round 2 */
380 etfs(12); /* round 3 */
381 esft(16); /* round 4 */
382 etfs(20); /* round 5 */
383 esft(24); /* round 6 */
384 etfs(28); /* round 7 */
385 esft(32); /* round 8 */
386 etfs(36); /* round 9 */
387
388 if (ap->nr > 10)
389 {
390 esft(40); /* round 10 */
391 etfs(44); /* round 11 */
392 if (ap->nr > 12)
393 {
394 esft(48); /* round 12 */
395 etfs(52); /* round 13 */
396 }
397 }
398
399 rk += (ap->nr << 2);
400
401 elr(); /* last round */
402
403 #if defined(OPTIMIZE_MMX) && (defined(OPTIMIZE_I586) || defined(OPTIMIZE_I686))
404 dst[0] = _mm_cvtsi64_si32(s0);
405 dst[1] = _mm_cvtsi64_si32(s1);
406 dst[2] = _mm_cvtsi64_si32(s2);
407 dst[3] = _mm_cvtsi64_si32(s3);
408 #else
409 dst[0] = s0;
410 dst[1] = s1;
411 dst[2] = s2;
412 dst[3] = s3;
413 #endif
414
415 return 0;
416 }
417 #endif
418
419 #ifndef ASM_AESDECRYPT
aesDecrypt(aesParam * ap,uint32_t * dst,const uint32_t * src)420 int aesDecrypt(aesParam* ap, uint32_t* dst, const uint32_t* src)
421 {
422 register uint32_t s0, s1, s2, s3;
423 register uint32_t t0, t1, t2, t3;
424 register uint32_t* rk = ap->k;
425
426 s0 = src[0] ^ rk[0];
427 s1 = src[1] ^ rk[1];
428 s2 = src[2] ^ rk[2];
429 s3 = src[3] ^ rk[3];
430
431 dtfs(4); /* round 1 */
432 dsft(8); /* round 2 */
433 dtfs(12); /* round 3 */
434 dsft(16); /* round 4 */
435 dtfs(20); /* round 5 */
436 dsft(24); /* round 6 */
437 dtfs(28); /* round 7 */
438 dsft(32); /* round 8 */
439 dtfs(36); /* round 9 */
440
441 if (ap->nr > 10)
442 {
443 dsft(40); /* round 10 */
444 dtfs(44); /* round 11 */
445 if (ap->nr > 12)
446 {
447 dsft(48); /* round 12 */
448 dtfs(52); /* round 13 */
449 }
450 }
451
452 rk += (ap->nr << 2);
453
454 dlr(); /* last round */
455
456 dst[0] = s0;
457 dst[1] = s1;
458 dst[2] = s2;
459 dst[3] = s3;
460
461 return 0;
462 }
463 #endif
464
aesFeedback(aesParam * ap)465 uint32_t* aesFeedback(aesParam* ap)
466 {
467 return ap->fdback;
468 }
469