1 /*
2  * Copyright (c) 2007-2016, Cameron Rich
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * * Redistributions of source code must retain the above copyright notice,
10  *   this list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  *   this list of conditions and the following disclaimer in the documentation
13  *   and/or other materials provided with the distribution.
14  * * Neither the name of the axTLS project nor the names of its contributors
15  *   may be used to endorse or promote products derived from this software
16  *   without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /**
32  * AES implementation - this is a small code version. There are much faster
33  * versions around but they are much larger in size (i.e. they use large
34  * submix tables).
35  */
36 
37 #include <string.h>
38 #include "os_port.h"
39 #include "crypto.h"
40 
41 #define rot1(x) (((x) << 24) | ((x) >> 8))
42 #define rot2(x) (((x) << 16) | ((x) >> 16))
43 #define rot3(x) (((x) <<  8) | ((x) >> 24))
44 
45 /*
46  * This cute trick does 4 'mul by two' at once.  Stolen from
47  * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
48  * a standard graphics trick
49  * The key to this is that we need to xor with 0x1b if the top bit is set.
50  * a 1xxx xxxx   0xxx 0xxx First we mask the 7bit,
51  * b 1000 0000   0000 0000 then we shift right by 7 putting the 7bit in 0bit,
52  * c 0000 0001   0000 0000 we then subtract (c) from (b)
53  * d 0111 1111   0000 0000 and now we and with our mask
54  * e 0001 1011   0000 0000
55  */
56 #define mt  0x80808080
57 #define ml  0x7f7f7f7f
58 #define mh  0xfefefefe
59 #define mm  0x1b1b1b1b
60 #define mul2(x,t)	((t)=((x)&mt), \
61 			((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
62 
63 #define inv_mix_col(x,f2,f4,f8,f9) (\
64 			(f2)=mul2(x,f2), \
65 			(f4)=mul2(f2,f4), \
66 			(f8)=mul2(f4,f8), \
67 			(f9)=(x)^(f8), \
68 			(f8)=((f2)^(f4)^(f8)), \
69 			(f2)^=(f9), \
70 			(f4)^=(f9), \
71 			(f8)^=rot3(f2), \
72 			(f8)^=rot2(f4), \
73 			(f8)^rot1(f9))
74 
75 /*
76  * AES S-box
77  */
78 static const uint8_t aes_sbox[256] =
79 {
80 	0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
81 	0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
82 	0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
83 	0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
84 	0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
85 	0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
86 	0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
87 	0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
88 	0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
89 	0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
90 	0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
91 	0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
92 	0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
93 	0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
94 	0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
95 	0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
96 	0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
97 	0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
98 	0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
99 	0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
100 	0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
101 	0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
102 	0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
103 	0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
104 	0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
105 	0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
106 	0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
107 	0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
108 	0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
109 	0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
110 	0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
111 	0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
112 };
113 
114 /*
115  * AES is-box
116  */
117 static const uint8_t aes_isbox[256] =
118 {
119     0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
120     0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
121     0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
122     0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
123     0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
124     0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
125     0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
126     0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
127     0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
128     0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
129     0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
130     0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
131     0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
132     0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
133     0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
134     0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
135     0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
136     0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
137     0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
138     0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
139     0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
140     0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
141     0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
142     0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
143     0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
144     0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
145     0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
146     0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
147     0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
148     0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
149     0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
150     0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
151 };
152 
153 static const unsigned char Rcon[30]=
154 {
155 	0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
156 	0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
157 	0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
158 	0xb3,0x7d,0xfa,0xef,0xc5,0x91,
159 };
160 
161 /* ----- static functions ----- */
162 
163 /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
164    x^8+x^4+x^3+x+1 */
AES_xtime(uint32_t x)165 static unsigned char AES_xtime(uint32_t x)
166 {
167 	return (x&0x80) ? (x<<1)^0x1b : x<<1;
168 }
169 
170 /**
171  * Set up AES with the key/iv and cipher size.
172  */
AES_set_key(AES_CTX * ctx,const uint8_t * key,const uint8_t * iv,AES_MODE mode)173 void AES_set_key(AES_CTX *ctx, const uint8_t *key,
174         const uint8_t *iv, AES_MODE mode)
175 {
176     int i, ii;
177     uint32_t *W, tmp, tmp2;
178     const unsigned char *ip;
179     int words;
180 
181     switch (mode)
182     {
183         case AES_MODE_128:
184             i = 10;
185             words = 4;
186             break;
187 
188         case AES_MODE_256:
189             i = 14;
190             words = 8;
191             break;
192 
193         default:        /* fail silently */
194             return;
195     }
196 
197     ctx->rounds = i;
198     ctx->key_size = words;
199     W = ctx->ks;
200     for (i = 0; i < words; i+=2)
201     {
202         W[i+0]=	((uint32_t)key[ 0]<<24)|
203             ((uint32_t)key[ 1]<<16)|
204             ((uint32_t)key[ 2]<< 8)|
205             ((uint32_t)key[ 3]    );
206         W[i+1]=	((uint32_t)key[ 4]<<24)|
207             ((uint32_t)key[ 5]<<16)|
208             ((uint32_t)key[ 6]<< 8)|
209             ((uint32_t)key[ 7]    );
210         key += 8;
211     }
212 
213     ip = Rcon;
214     ii = 4 * (ctx->rounds+1);
215     for (i = words; i<ii; i++)
216     {
217         tmp = W[i-1];
218 
219         if ((i % words) == 0)
220         {
221             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]<< 8;
222             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
223             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
224             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ];
225             tmp=tmp2^(((unsigned int)*ip)<<24);
226             ip++;
227         }
228 
229         if ((words == 8) && ((i % words) == 4))
230         {
231             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]    ;
232             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
233             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
234             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ]<<24;
235             tmp=tmp2;
236         }
237 
238         W[i]=W[i-words]^tmp;
239     }
240 
241     /* copy the iv across */
242     if (iv)
243     {
244         memcpy(ctx->iv, iv, 16);
245     }
246 }
247 
248 /**
249  * Change a key for decryption.
250  */
AES_convert_key(AES_CTX * ctx)251 void AES_convert_key(AES_CTX *ctx)
252 {
253     int i;
254     uint32_t *k,w,t1,t2,t3,t4;
255 
256     k = ctx->ks;
257     k += 4;
258 
259     for (i= ctx->rounds*4; i > 4; i--)
260     {
261         w= *k;
262         w = inv_mix_col(w,t1,t2,t3,t4);
263         *k++ =w;
264     }
265 }
266 
267 /**
268  * Encrypt a byte sequence (with a block size 16) using the AES cipher.
269  */
AES_cbc_encrypt(AES_CTX * ctx,const uint8_t * msg,uint8_t * out,int length)270 void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
271 {
272     int i;
273     uint32_t tin[4], tout[4], iv[4];
274 
275     memcpy(iv, ctx->iv, AES_IV_SIZE);
276     for (i = 0; i < 4; i++)
277         tout[i] = ntohl(iv[i]);
278 
279     for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
280     {
281         uint32_t msg_32[4];
282         uint32_t out_32[4];
283         memcpy(msg_32, msg, AES_BLOCKSIZE);
284         msg += AES_BLOCKSIZE;
285 
286         for (i = 0; i < 4; i++)
287             tin[i] = ntohl(msg_32[i])^tout[i];
288 
289         AES_encrypt(ctx, tin);
290 
291         for (i = 0; i < 4; i++)
292         {
293             tout[i] = tin[i];
294             out_32[i] = htonl(tout[i]);
295         }
296 
297         memcpy(out, out_32, AES_BLOCKSIZE);
298         out += AES_BLOCKSIZE;
299     }
300 
301     for (i = 0; i < 4; i++)
302         iv[i] = htonl(tout[i]);
303     memcpy(ctx->iv, iv, AES_IV_SIZE);
304 }
305 
306 /**
307  * Decrypt a byte sequence (with a block size 16) using the AES cipher.
308  */
AES_cbc_decrypt(AES_CTX * ctx,const uint8_t * msg,uint8_t * out,int length)309 void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
310 {
311     int i;
312     uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
313 
314     memcpy(iv, ctx->iv, AES_IV_SIZE);
315     for (i = 0; i < 4; i++)
316         xor[i] = ntohl(iv[i]);
317 
318     for (length -= 16; length >= 0; length -= 16)
319     {
320         uint32_t msg_32[4];
321         uint32_t out_32[4];
322         memcpy(msg_32, msg, AES_BLOCKSIZE);
323         msg += AES_BLOCKSIZE;
324 
325         for (i = 0; i < 4; i++)
326         {
327             tin[i] = ntohl(msg_32[i]);
328             data[i] = tin[i];
329         }
330 
331         AES_decrypt(ctx, data);
332 
333         for (i = 0; i < 4; i++)
334         {
335             tout[i] = data[i]^xor[i];
336             xor[i] = tin[i];
337             out_32[i] = htonl(tout[i]);
338         }
339 
340         memcpy(out, out_32, AES_BLOCKSIZE);
341         out += AES_BLOCKSIZE;
342     }
343 
344     for (i = 0; i < 4; i++)
345         iv[i] = htonl(xor[i]);
346     memcpy(ctx->iv, iv, AES_IV_SIZE);
347 }
348 
349 /**
350  * Encrypt a single block (16 bytes) of data
351  */
AES_encrypt(const AES_CTX * ctx,uint32_t * data)352 void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
353 {
354     /* To make this code smaller, generate the sbox entries on the fly.
355      * This will have a really heavy effect upon performance.
356      */
357     uint32_t tmp[4];
358     uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
359     int curr_rnd;
360     int rounds = ctx->rounds;
361     const uint32_t *k = ctx->ks;
362 
363     /* Pre-round key addition */
364     for (row = 0; row < 4; row++)
365         data[row] ^= *(k++);
366 
367     /* Encrypt one block. */
368     for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
369     {
370         /* Perform ByteSub and ShiftRow operations together */
371         for (row = 0; row < 4; row++)
372         {
373             a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
374             a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
375             a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
376             a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
377 
378             /* Perform MixColumn iff not last round */
379             if (curr_rnd < (rounds - 1))
380             {
381                 tmp1 = a0 ^ a1 ^ a2 ^ a3;
382                 old_a0 = a0;
383                 a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
384                 a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
385                 a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
386                 a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
387             }
388 
389             tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
390         }
391 
392         /* KeyAddition - note that it is vital that this loop is separate from
393            the MixColumn operation, which must be atomic...*/
394         for (row = 0; row < 4; row++)
395             data[row] = tmp[row] ^ *(k++);
396     }
397 }
398 
399 /**
400  * Decrypt a single block (16 bytes) of data
401  */
AES_decrypt(const AES_CTX * ctx,uint32_t * data)402 void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
403 {
404     uint32_t tmp[4];
405     uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
406     uint32_t a0, a1, a2, a3, row;
407     int curr_rnd;
408     int rounds = ctx->rounds;
409     const uint32_t *k = ctx->ks + ((rounds+1)*4);
410 
411     /* pre-round key addition */
412     for (row=4; row > 0;row--)
413         data[row-1] ^= *(--k);
414 
415     /* Decrypt one block */
416     for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
417     {
418         /* Perform ByteSub and ShiftRow operations together */
419         for (row = 4; row > 0; row--)
420         {
421             a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
422             a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
423             a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
424             a3 = aes_isbox[(data[row%4])&0xFF];
425 
426             /* Perform MixColumn iff not last round */
427             if (curr_rnd<(rounds-1))
428             {
429                 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
430                    are quite large compared to encryption; this
431                    operation slows decryption down noticeably. */
432                 xt0 = AES_xtime(a0^a1);
433                 xt1 = AES_xtime(a1^a2);
434                 xt2 = AES_xtime(a2^a3);
435                 xt3 = AES_xtime(a3^a0);
436                 xt4 = AES_xtime(xt0^xt1);
437                 xt5 = AES_xtime(xt1^xt2);
438                 xt6 = AES_xtime(xt4^xt5);
439 
440                 xt0 ^= a1^a2^a3^xt4^xt6;
441                 xt1 ^= a0^a2^a3^xt5^xt6;
442                 xt2 ^= a0^a1^a3^xt4^xt6;
443                 xt3 ^= a0^a1^a2^xt5^xt6;
444                 tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
445             }
446             else
447                 tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
448         }
449 
450         for (row = 4; row > 0; row--)
451             data[row-1] = tmp[row-1] ^ *(--k);
452     }
453 }
454