1 /*
2  *  FIPS-197 compliant AES implementation
3  *
4  *  Copyright (C) 2011, Con Kolivas <kernel@kolivas.org>
5  *  Copyright (C) 2006-2010, Brainspark B.V.
6  *
7  *  This file is part of PolarSSL (http://www.polarssl.org)
8  *  Lead Maintainer: Paul Bakker <polarssl_maintainer at polarssl.org>
9  *
10  *  All rights reserved.
11  *
12  *  This program is free software; you can redistribute it and/or modify
13  *  it under the terms of the GNU General Public License as published by
14  *  the Free Software Foundation; either version 2 of the License, or
15  *  (at your option) any later version.
16  *
17  *  This program is distributed in the hope that it will be useful,
18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *  GNU General Public License for more details.
21  *
22  *  You should have received a copy of the GNU General Public License along
23  *  with this program; if not, write to the Free Software Foundation, Inc.,
24  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25  */
26 /*
27  *  The AES block cipher was designed by Vincent Rijmen and Joan Daemen.
28  *
29  *  http://csrc.nist.gov/encryption/aes/rijndael/Rijndael.pdf
30  *  http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
31  */
32 
33 #include "aes.h"
34 
35 #include <string.h>
36 
37 /*
38  * 32-bit integer manipulation macros (little endian)
39  */
40 #ifndef GET_ULONG_LE
41 #define GET_ULONG_LE(n,b,i)                             \
42 {                                                       \
43     (n) = ( (unsigned long) (b)[(i)    ]       )        \
44         | ( (unsigned long) (b)[(i) + 1] <<  8 )        \
45         | ( (unsigned long) (b)[(i) + 2] << 16 )        \
46         | ( (unsigned long) (b)[(i) + 3] << 24 );       \
47 }
48 #endif
49 
50 #ifndef PUT_ULONG_LE
51 #define PUT_ULONG_LE(n,b,i)                             \
52 {                                                       \
53     (b)[(i)    ] = (unsigned char) ( (n)       );       \
54     (b)[(i) + 1] = (unsigned char) ( (n) >>  8 );       \
55     (b)[(i) + 2] = (unsigned char) ( (n) >> 16 );       \
56     (b)[(i) + 3] = (unsigned char) ( (n) >> 24 );       \
57 }
58 #endif
59 
60 /*
61  * Forward S-box & tables
62  */
63 static unsigned char FSb[256];
64 static unsigned long FT0[256];
65 static unsigned long FT1[256];
66 static unsigned long FT2[256];
67 static unsigned long FT3[256];
68 
69 /*
70  * Reverse S-box & tables
71  */
72 static unsigned char RSb[256];
73 static unsigned long RT0[256];
74 static unsigned long RT1[256];
75 static unsigned long RT2[256];
76 static unsigned long RT3[256];
77 
78 /*
79  * Round constants
80  */
81 static unsigned long RCON[10];
82 
83 /*
84  * Tables generation code
85  */
86 #define ROTL8(x) ( ( x << 8 ) & 0xFFFFFFFF ) | ( x >> 24 )
87 #define XTIME(x) ( ( x << 1 ) ^ ( ( x & 0x80 ) ? 0x1B : 0x00 ) )
88 #define MUL(x,y) ( ( x && y ) ? pow[(log[x]+log[y]) % 255] : 0 )
89 
90 static int aes_init_done = 0;
91 
aes_gen_tables(void)92 static void aes_gen_tables( void )
93 {
94     int i, x, y, z;
95     int pow[256];
96     int log[256];
97 
98     /*
99      * compute pow and log tables over GF(2^8)
100      */
101     for( i = 0, x = 1; i < 256; i++ )
102     {
103         pow[i] = x;
104         log[x] = i;
105         x = ( x ^ XTIME( x ) ) & 0xFF;
106     }
107 
108     /*
109      * calculate the round constants
110      */
111     for( i = 0, x = 1; i < 10; i++ )
112     {
113         RCON[i] = (unsigned long) x;
114         x = XTIME( x ) & 0xFF;
115     }
116 
117     /*
118      * generate the forward and reverse S-boxes
119      */
120     FSb[0x00] = 0x63;
121     RSb[0x63] = 0x00;
122 
123     for( i = 1; i < 256; i++ )
124     {
125         x = pow[255 - log[i]];
126 
127         y  = x; y = ( (y << 1) | (y >> 7) ) & 0xFF;
128         x ^= y; y = ( (y << 1) | (y >> 7) ) & 0xFF;
129         x ^= y; y = ( (y << 1) | (y >> 7) ) & 0xFF;
130         x ^= y; y = ( (y << 1) | (y >> 7) ) & 0xFF;
131         x ^= y ^ 0x63;
132 
133         FSb[i] = (unsigned char) x;
134         RSb[x] = (unsigned char) i;
135     }
136 
137     /*
138      * generate the forward and reverse tables
139      */
140     for( i = 0; i < 256; i++ )
141     {
142         x = FSb[i];
143         y = XTIME( x ) & 0xFF;
144         z =  ( y ^ x ) & 0xFF;
145 
146         FT0[i] = ( (unsigned long) y       ) ^
147                  ( (unsigned long) x <<  8 ) ^
148                  ( (unsigned long) x << 16 ) ^
149                  ( (unsigned long) z << 24 );
150 
151         FT1[i] = ROTL8( FT0[i] );
152         FT2[i] = ROTL8( FT1[i] );
153         FT3[i] = ROTL8( FT2[i] );
154 
155         x = RSb[i];
156 
157         RT0[i] = ( (unsigned long) MUL( 0x0E, x )       ) ^
158                  ( (unsigned long) MUL( 0x09, x ) <<  8 ) ^
159                  ( (unsigned long) MUL( 0x0D, x ) << 16 ) ^
160                  ( (unsigned long) MUL( 0x0B, x ) << 24 );
161 
162         RT1[i] = ROTL8( RT0[i] );
163         RT2[i] = ROTL8( RT1[i] );
164         RT3[i] = ROTL8( RT2[i] );
165     }
166 }
167 
168 /*
169  * AES key schedule (encryption)
170  */
aes_setkey_enc(aes_context * ctx,const unsigned char * key,int keysize)171 int aes_setkey_enc( aes_context *ctx, const unsigned char *key, int keysize )
172 {
173     int i;
174     unsigned long *RK;
175 
176 #if !defined(POLARSSL_AES_ROM_TABLES)
177     if( aes_init_done == 0 )
178     {
179         aes_gen_tables();
180         aes_init_done = 1;
181     }
182 #endif
183 
184     switch( keysize )
185     {
186         case 128: ctx->nr = 10; break;
187         case 192: ctx->nr = 12; break;
188         case 256: ctx->nr = 14; break;
189         default : return( POLARSSL_ERR_AES_INVALID_KEY_LENGTH );
190     }
191 
192 #if defined(PADLOCK_ALIGN16)
193     ctx->rk = RK = PADLOCK_ALIGN16( ctx->buf );
194 #else
195     ctx->rk = RK = ctx->buf;
196 #endif
197 
198     for( i = 0; i < (keysize >> 5); i++ )
199     {
200         GET_ULONG_LE( RK[i], key, i << 2 );
201     }
202 
203     switch( ctx->nr )
204     {
205         case 10:
206 
207             for( i = 0; i < 10; i++, RK += 4 )
208             {
209                 RK[4]  = RK[0] ^ RCON[i] ^
210                 ( (unsigned long) FSb[ ( RK[3] >>  8 ) & 0xFF ]       ) ^
211                 ( (unsigned long) FSb[ ( RK[3] >> 16 ) & 0xFF ] <<  8 ) ^
212                 ( (unsigned long) FSb[ ( RK[3] >> 24 ) & 0xFF ] << 16 ) ^
213                 ( (unsigned long) FSb[ ( RK[3]       ) & 0xFF ] << 24 );
214 
215                 RK[5]  = RK[1] ^ RK[4];
216                 RK[6]  = RK[2] ^ RK[5];
217                 RK[7]  = RK[3] ^ RK[6];
218             }
219             break;
220 
221         case 12:
222 
223             for( i = 0; i < 8; i++, RK += 6 )
224             {
225                 RK[6]  = RK[0] ^ RCON[i] ^
226                 ( (unsigned long) FSb[ ( RK[5] >>  8 ) & 0xFF ]       ) ^
227                 ( (unsigned long) FSb[ ( RK[5] >> 16 ) & 0xFF ] <<  8 ) ^
228                 ( (unsigned long) FSb[ ( RK[5] >> 24 ) & 0xFF ] << 16 ) ^
229                 ( (unsigned long) FSb[ ( RK[5]       ) & 0xFF ] << 24 );
230 
231                 RK[7]  = RK[1] ^ RK[6];
232                 RK[8]  = RK[2] ^ RK[7];
233                 RK[9]  = RK[3] ^ RK[8];
234                 RK[10] = RK[4] ^ RK[9];
235                 RK[11] = RK[5] ^ RK[10];
236             }
237             break;
238 
239         case 14:
240 
241             for( i = 0; i < 7; i++, RK += 8 )
242             {
243                 RK[8]  = RK[0] ^ RCON[i] ^
244                 ( (unsigned long) FSb[ ( RK[7] >>  8 ) & 0xFF ]       ) ^
245                 ( (unsigned long) FSb[ ( RK[7] >> 16 ) & 0xFF ] <<  8 ) ^
246                 ( (unsigned long) FSb[ ( RK[7] >> 24 ) & 0xFF ] << 16 ) ^
247                 ( (unsigned long) FSb[ ( RK[7]       ) & 0xFF ] << 24 );
248 
249                 RK[9]  = RK[1] ^ RK[8];
250                 RK[10] = RK[2] ^ RK[9];
251                 RK[11] = RK[3] ^ RK[10];
252 
253                 RK[12] = RK[4] ^
254                 ( (unsigned long) FSb[ ( RK[11]       ) & 0xFF ]       ) ^
255                 ( (unsigned long) FSb[ ( RK[11] >>  8 ) & 0xFF ] <<  8 ) ^
256                 ( (unsigned long) FSb[ ( RK[11] >> 16 ) & 0xFF ] << 16 ) ^
257                 ( (unsigned long) FSb[ ( RK[11] >> 24 ) & 0xFF ] << 24 );
258 
259                 RK[13] = RK[5] ^ RK[12];
260                 RK[14] = RK[6] ^ RK[13];
261                 RK[15] = RK[7] ^ RK[14];
262             }
263             break;
264 
265         default:
266 
267             break;
268     }
269 
270     return( 0 );
271 }
272 
273 /*
274  * AES key schedule (decryption)
275  */
aes_setkey_dec(aes_context * ctx,const unsigned char * key,int keysize)276 int aes_setkey_dec( aes_context *ctx, const unsigned char *key, int keysize )
277 {
278     int i, j;
279     aes_context cty;
280     unsigned long *RK;
281     unsigned long *SK;
282     int ret;
283 
284     switch( keysize )
285     {
286         case 128: ctx->nr = 10; break;
287         case 192: ctx->nr = 12; break;
288         case 256: ctx->nr = 14; break;
289         default : return( POLARSSL_ERR_AES_INVALID_KEY_LENGTH );
290     }
291 
292 #if defined(PADLOCK_ALIGN16)
293     ctx->rk = RK = PADLOCK_ALIGN16( ctx->buf );
294 #else
295     ctx->rk = RK = ctx->buf;
296 #endif
297 
298     ret = aes_setkey_enc( &cty, key, keysize );
299     if( ret != 0 )
300         return( ret );
301 
302     SK = cty.rk + cty.nr * 4;
303 
304     *RK++ = *SK++;
305     *RK++ = *SK++;
306     *RK++ = *SK++;
307     *RK++ = *SK++;
308 
309     for( i = ctx->nr - 1, SK -= 8; i > 0; i--, SK -= 8 )
310     {
311         for( j = 0; j < 4; j++, SK++ )
312         {
313             *RK++ = RT0[ FSb[ ( *SK       ) & 0xFF ] ] ^
314                     RT1[ FSb[ ( *SK >>  8 ) & 0xFF ] ] ^
315                     RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^
316                     RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ];
317         }
318     }
319 
320     *RK++ = *SK++;
321     *RK++ = *SK++;
322     *RK++ = *SK++;
323     *RK++ = *SK++;
324 
325     memset( &cty, 0, sizeof( aes_context ) );
326 
327     return( 0 );
328 }
329 
330 #define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3)     \
331 {                                               \
332     X0 = *RK++ ^ FT0[ ( Y0       ) & 0xFF ] ^   \
333                  FT1[ ( Y1 >>  8 ) & 0xFF ] ^   \
334                  FT2[ ( Y2 >> 16 ) & 0xFF ] ^   \
335                  FT3[ ( Y3 >> 24 ) & 0xFF ];    \
336                                                 \
337     X1 = *RK++ ^ FT0[ ( Y1       ) & 0xFF ] ^   \
338                  FT1[ ( Y2 >>  8 ) & 0xFF ] ^   \
339                  FT2[ ( Y3 >> 16 ) & 0xFF ] ^   \
340                  FT3[ ( Y0 >> 24 ) & 0xFF ];    \
341                                                 \
342     X2 = *RK++ ^ FT0[ ( Y2       ) & 0xFF ] ^   \
343                  FT1[ ( Y3 >>  8 ) & 0xFF ] ^   \
344                  FT2[ ( Y0 >> 16 ) & 0xFF ] ^   \
345                  FT3[ ( Y1 >> 24 ) & 0xFF ];    \
346                                                 \
347     X3 = *RK++ ^ FT0[ ( Y3       ) & 0xFF ] ^   \
348                  FT1[ ( Y0 >>  8 ) & 0xFF ] ^   \
349                  FT2[ ( Y1 >> 16 ) & 0xFF ] ^   \
350                  FT3[ ( Y2 >> 24 ) & 0xFF ];    \
351 }
352 
353 #define AES_RROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3)     \
354 {                                               \
355     X0 = *RK++ ^ RT0[ ( Y0       ) & 0xFF ] ^   \
356                  RT1[ ( Y3 >>  8 ) & 0xFF ] ^   \
357                  RT2[ ( Y2 >> 16 ) & 0xFF ] ^   \
358                  RT3[ ( Y1 >> 24 ) & 0xFF ];    \
359                                                 \
360     X1 = *RK++ ^ RT0[ ( Y1       ) & 0xFF ] ^   \
361                  RT1[ ( Y0 >>  8 ) & 0xFF ] ^   \
362                  RT2[ ( Y3 >> 16 ) & 0xFF ] ^   \
363                  RT3[ ( Y2 >> 24 ) & 0xFF ];    \
364                                                 \
365     X2 = *RK++ ^ RT0[ ( Y2       ) & 0xFF ] ^   \
366                  RT1[ ( Y1 >>  8 ) & 0xFF ] ^   \
367                  RT2[ ( Y0 >> 16 ) & 0xFF ] ^   \
368                  RT3[ ( Y3 >> 24 ) & 0xFF ];    \
369                                                 \
370     X3 = *RK++ ^ RT0[ ( Y3       ) & 0xFF ] ^   \
371                  RT1[ ( Y2 >>  8 ) & 0xFF ] ^   \
372                  RT2[ ( Y1 >> 16 ) & 0xFF ] ^   \
373                  RT3[ ( Y0 >> 24 ) & 0xFF ];    \
374 }
375 
376 /*
377  * AES-ECB block encryption/decryption
378  */
aes_crypt_ecb(aes_context * ctx,int mode,const unsigned char input[16],unsigned char output[16])379 int aes_crypt_ecb( aes_context *ctx,
380                     int mode,
381                     const unsigned char input[16],
382                     unsigned char output[16] )
383 {
384     int i;
385     unsigned long *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3;
386 
387 #if defined(POLARSSL_PADLOCK_C) && defined(POLARSSL_HAVE_X86)
388     if( padlock_supports( PADLOCK_ACE ) )
389     {
390         if( padlock_xcryptecb( ctx, mode, input, output ) == 0 )
391             return( 0 );
392 
393         // If padlock data misaligned, we just fall back to
394         // unaccelerated mode
395         //
396     }
397 #endif
398 
399     RK = ctx->rk;
400 
401     GET_ULONG_LE( X0, input,  0 ); X0 ^= *RK++;
402     GET_ULONG_LE( X1, input,  4 ); X1 ^= *RK++;
403     GET_ULONG_LE( X2, input,  8 ); X2 ^= *RK++;
404     GET_ULONG_LE( X3, input, 12 ); X3 ^= *RK++;
405 
406     if( mode == AES_DECRYPT )
407     {
408         for( i = (ctx->nr >> 1) - 1; i > 0; i-- )
409         {
410             AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
411             AES_RROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 );
412         }
413 
414         AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
415 
416         X0 = *RK++ ^ \
417                 ( (unsigned long) RSb[ ( Y0       ) & 0xFF ]       ) ^
418                 ( (unsigned long) RSb[ ( Y3 >>  8 ) & 0xFF ] <<  8 ) ^
419                 ( (unsigned long) RSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^
420                 ( (unsigned long) RSb[ ( Y1 >> 24 ) & 0xFF ] << 24 );
421 
422         X1 = *RK++ ^ \
423                 ( (unsigned long) RSb[ ( Y1       ) & 0xFF ]       ) ^
424                 ( (unsigned long) RSb[ ( Y0 >>  8 ) & 0xFF ] <<  8 ) ^
425                 ( (unsigned long) RSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^
426                 ( (unsigned long) RSb[ ( Y2 >> 24 ) & 0xFF ] << 24 );
427 
428         X2 = *RK++ ^ \
429                 ( (unsigned long) RSb[ ( Y2       ) & 0xFF ]       ) ^
430                 ( (unsigned long) RSb[ ( Y1 >>  8 ) & 0xFF ] <<  8 ) ^
431                 ( (unsigned long) RSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^
432                 ( (unsigned long) RSb[ ( Y3 >> 24 ) & 0xFF ] << 24 );
433 
434         X3 = *RK++ ^ \
435                 ( (unsigned long) RSb[ ( Y3       ) & 0xFF ]       ) ^
436                 ( (unsigned long) RSb[ ( Y2 >>  8 ) & 0xFF ] <<  8 ) ^
437                 ( (unsigned long) RSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^
438                 ( (unsigned long) RSb[ ( Y0 >> 24 ) & 0xFF ] << 24 );
439     }
440     else /* AES_ENCRYPT */
441     {
442         for( i = (ctx->nr >> 1) - 1; i > 0; i-- )
443         {
444             AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
445             AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 );
446         }
447 
448         AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
449 
450         X0 = *RK++ ^ \
451                 ( (unsigned long) FSb[ ( Y0       ) & 0xFF ]       ) ^
452                 ( (unsigned long) FSb[ ( Y1 >>  8 ) & 0xFF ] <<  8 ) ^
453                 ( (unsigned long) FSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^
454                 ( (unsigned long) FSb[ ( Y3 >> 24 ) & 0xFF ] << 24 );
455 
456         X1 = *RK++ ^ \
457                 ( (unsigned long) FSb[ ( Y1       ) & 0xFF ]       ) ^
458                 ( (unsigned long) FSb[ ( Y2 >>  8 ) & 0xFF ] <<  8 ) ^
459                 ( (unsigned long) FSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^
460                 ( (unsigned long) FSb[ ( Y0 >> 24 ) & 0xFF ] << 24 );
461 
462         X2 = *RK++ ^ \
463                 ( (unsigned long) FSb[ ( Y2       ) & 0xFF ]       ) ^
464                 ( (unsigned long) FSb[ ( Y3 >>  8 ) & 0xFF ] <<  8 ) ^
465                 ( (unsigned long) FSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^
466                 ( (unsigned long) FSb[ ( Y1 >> 24 ) & 0xFF ] << 24 );
467 
468         X3 = *RK++ ^ \
469                 ( (unsigned long) FSb[ ( Y3       ) & 0xFF ]       ) ^
470                 ( (unsigned long) FSb[ ( Y0 >>  8 ) & 0xFF ] <<  8 ) ^
471                 ( (unsigned long) FSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^
472                 ( (unsigned long) FSb[ ( Y2 >> 24 ) & 0xFF ] << 24 );
473     }
474 
475     PUT_ULONG_LE( X0, output,  0 );
476     PUT_ULONG_LE( X1, output,  4 );
477     PUT_ULONG_LE( X2, output,  8 );
478     PUT_ULONG_LE( X3, output, 12 );
479 
480     return( 0 );
481 }
482 
483 /*
484  * AES-CBC buffer encryption/decryption
485  */
aes_crypt_cbc(aes_context * ctx,int mode,long long int length,unsigned char iv[16],const unsigned char * input,unsigned char * output)486 int aes_crypt_cbc( aes_context *ctx,
487                     int mode,
488                     long long int length,
489                     unsigned char iv[16],
490                     const unsigned char *input,
491                     unsigned char *output )
492 {
493     int i;
494     unsigned char temp[16];
495 
496     if( length % 16 )
497         return( POLARSSL_ERR_AES_INVALID_INPUT_LENGTH );
498 
499 #if defined(POLARSSL_PADLOCK_C) && defined(POLARSSL_HAVE_X86)
500     if( padlock_supports( PADLOCK_ACE ) )
501     {
502         if( padlock_xcryptcbc( ctx, mode, length, iv, input, output ) == 0 )
503             return( 0 );
504 
505         // If padlock data misaligned, we just fall back to
506         // unaccelerated mode
507         //
508     }
509 #endif
510 
511     if( mode == AES_DECRYPT )
512     {
513         while( length > 0 )
514         {
515             memcpy( temp, input, 16 );
516             aes_crypt_ecb( ctx, mode, input, output );
517 
518             for( i = 0; i < 16; i++ )
519                 output[i] = (unsigned char)( output[i] ^ iv[i] );
520 
521             memcpy( iv, temp, 16 );
522 
523             input  += 16;
524             output += 16;
525             length -= 16;
526         }
527     }
528     else
529     {
530         while( length > 0 )
531         {
532             for( i = 0; i < 16; i++ )
533                 output[i] = (unsigned char)( input[i] ^ iv[i] );
534 
535             aes_crypt_ecb( ctx, mode, output, output );
536             memcpy( iv, output, 16 );
537 
538             input  += 16;
539             output += 16;
540             length -= 16;
541         }
542     }
543 
544     return( 0 );
545 }
546