1 /*
2 * AES Block cipher
3 * (c) 2005-2008 Axiomatic Systems,LLC
4 * Portions (c) 2001, Dr Brian Gladman (see below)
5 */
6 
7 /*
8 -------------------------------------------------------------------------
9 Copyright (c) 2001, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
10 All rights reserved.
11 
12 LICENSE TERMS
13 
14 The free distribution and use of this software in both source and binary
15 form is allowed (with or without changes) provided that:
16 
17 1. distributions of this source code include the above copyright
18 notice, this list of conditions and the following disclaimer;
19 
20 2. distributions in binary form include the above copyright
21 notice, this list of conditions and the following disclaimer
22 in the documentation and/or other associated materials;
23 
24 3. the copyright holder's name is not used to endorse products
25 built using this software without specific written permission.
26 
27 DISCLAIMER
28 
29 This software is provided 'as is' with no explicit or implied warranties
30 in respect of its properties, including, but not limited to, correctness
31 and fitness for purpose.
32 -------------------------------------------------------------------------
33 Issue Date: 29/07/2002
34 */
35 
36 /*----------------------------------------------------------------------
37 |   includes
38 +---------------------------------------------------------------------*/
39 #include "Ap4AesBlockCipher.h"
40 #include "Ap4Results.h"
41 #include "Ap4Utils.h"
42 
43 /*----------------------------------------------------------------------
44 |   AES types
45 +---------------------------------------------------------------------*/
46 typedef AP4_UI32     aes_32t;
47 typedef AP4_UI08     aes_08t;
48 typedef unsigned int aes_rval;
49 struct aes_ctx                     // the AES context for encryption
50 {   aes_32t    k_sch[4*AP4_AES_BLOCK_SIZE];   // the encryption key schedule
51     aes_32t    n_rnd;              // the number of cipher rounds
52     aes_32t    n_blk;              // the number of bytes in the state
53 };
54 #define aes_bad      0             // bad function return value
55 #define aes_good     1             // good function return value
56 
57 /*----------------------------------------------------------------------
58 |   build options
59 +---------------------------------------------------------------------*/
60 #define ENCRYPTION_KEY_SCHEDULE
61 #define ENCRYPTION
62 #define DECRYPTION_KEY_SCHEDULE
63 #define DECRYPTION
64 #define BLOCK_SIZE AP4_AES_BLOCK_SIZE
65 
66 /*----------------------------------------------------------------------
67 |   options
68 +---------------------------------------------------------------------*/
69 /*  START OF CONFIGURATION OPTIONS
70 
71     USE OF DEFINES
72 
73     Later in this section there are a number of defines that control the
74     operation of the code.  In each section, the purpose of each define is
75     explained so that the relevant form can be included or excluded by
76     setting either 1's or 0's respectively on the branches of the related
77     #if clauses.
78 */
79 
80 /*  1. BYTE ORDER IN 32-BIT WORDS
81 
82     To obtain the highest speed on processors with 32-bit words, this code
83     needs to determine the order in which bytes are packed into such words.
84     The following block of code is an attempt to capture the most obvious
85     ways in which various environemnts define byte order. It may well fail,
86     in which case the definitions will need to be set by editing at the
87     points marked **** EDIT HERE IF NECESSARY **** below.
88 */
89 #define AES_LITTLE_ENDIAN   1234 /* byte 0 is least significant (i386) */
90 #define AES_BIG_ENDIAN      4321 /* byte 0 is most significant (mc68k) */
91 
92 #if !defined(AP4_PLATFORM_BYTE_ORDER)
93 #  error AP4_PLATFORM_BYTE_ORDER is not set
94 #endif
95 
96 #if AP4_PLATFORM_BYTE_ORDER == AP4_PLATFORM_BYTE_ORDER_BIG_ENDIAN
97 #define PLATFORM_BYTE_ORDER AES_BIG_ENDIAN
98 #elif AP4_PLATFORM_BYTE_ORDER == AP4_PLATFORM_BYTE_ORDER_LITTLE_ENDIAN
99 #define PLATFORM_BYTE_ORDER AES_LITTLE_ENDIAN
100 #else
101 #error unsupported value for AP4_PLATFORM_BYTE_ORDER
102 #endif
103 
104 
105 
106 /*  2. BYTE ORDER WITHIN 32 BIT WORDS
107 
108     The fundamental data processing units in Rijndael are 8-bit bytes. The
109     input, output and key input are all enumerated arrays of bytes in which
110     bytes are numbered starting at zero and increasing to one less than the
111     number of bytes in the array in question. This enumeration is only used
112     for naming bytes and does not imply any adjacency or order relationship
113     from one byte to another. When these inputs and outputs are considered
114     as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
115     byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
116     In this implementation bits are numbered from 0 to 7 starting at the
117     numerically least significant end of each byte (bit n represents 2^n).
118 
119     However, Rijndael can be implemented more efficiently using 32-bit
120     words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
121     into word[n]. While in principle these bytes can be assembled into words
122     in any positions, this implementation only supports the two formats in
123     which bytes in adjacent positions within words also have adjacent byte
124     numbers. This order is called big-endian if the lowest numbered bytes
125     in words have the highest numeric significance and little-endian if the
126     opposite applies.
127 
128     This code can work in either order irrespective of the order used by the
129     machine on which it runs. Normally the internal byte order will be set
130     to the order of the processor on which the code is to be run but this
131     define can be used to reverse this in special situations
132 */
133 #if 1
134 #define INTERNAL_BYTE_ORDER PLATFORM_BYTE_ORDER
135 #elif defined(AES_LITTLE_ENDIAN)
136 #define INTERNAL_BYTE_ORDER AES_LITTLE_ENDIAN
137 #elif defined(AES_BIG_ENDIAN)
138 #define INTERNAL_BYTE_ORDER AES_BIG_ENDIAN
139 #endif
140 
141 /*  3. FAST INPUT/OUTPUT OPERATIONS.
142 
143     On some machines it is possible to improve speed by transferring the
144     bytes in the input and output arrays to and from the internal 32-bit
145     variables by addressing these arrays as if they are arrays of 32-bit
146     words.  On some machines this will always be possible but there may
147     be a large performance penalty if the byte arrays are not aligned on
148     the normal word boundaries. On other machines this technique will
149     lead to memory access errors when such 32-bit word accesses are not
150     properly aligned. The option SAFE_IO avoids such problems but will
151     often be slower on those machines that support misaligned access
152     (especially so if care is taken to align the input  and output byte
153     arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
154     assumed that access to byte arrays as if they are arrays of 32-bit
155     words will not cause problems when such accesses are misaligned.
156 */
157 #if 1
158 #define SAFE_IO
159 #endif
160 
161 /*  4. LOOP UNROLLING
162 
163     The code for encryption and decrytpion cycles through a number of rounds
164     that can be implemented either in a loop or by expanding the code into a
165     long sequence of instructions, the latter producing a larger program but
166     one that will often be much faster. The latter is called loop unrolling.
167     There are also potential speed advantages in expanding two iterations in
168     a loop with half the number of iterations, which is called partial loop
169     unrolling.  The following options allow partial or full loop unrolling
170     to be set independently for encryption and decryption
171 */
172 #if 0
173 #define ENC_UNROLL  FULL
174 #elif 0
175 #define ENC_UNROLL  PARTIAL
176 #else
177 #define ENC_UNROLL  NONE
178 #endif
179 
180 #if 0
181 #define DEC_UNROLL  FULL
182 #elif 0
183 #define DEC_UNROLL  PARTIAL
184 #else
185 #define DEC_UNROLL  NONE
186 #endif
187 
188 /*  5. FIXED OR DYNAMIC TABLES
189 
190     When this section is included the tables used by the code are comipled
191     statically into the binary file.  Otherwise they are computed once when
192     the code is first used.
193 */
194 #if 1
195 #define FIXED_TABLES
196 #endif
197 
198 /*  6. FAST FINITE FIELD OPERATIONS
199 
200     If this section is included, tables are used to provide faster finite
201     field arithmetic (this has no effect if FIXED_TABLES is defined).
202 */
203 #if 1
204 #define FF_TABLES
205 #endif
206 
207 /*  7. INTERNAL STATE VARIABLE FORMAT
208 
209     The internal state of Rijndael is stored in a number of local 32-bit
210     word varaibles which can be defined either as an array or as individual
211     names variables. Include this section if you want to store these local
212     variables in arrays. Otherwise individual local variables will be used.
213 */
214 #if 1
215 #define ARRAYS
216 #endif
217 
218 /* In this implementation the columns of the state array are each held in
219    32-bit words. The state array can be held in various ways: in an array
220    of words, in a number of individual word variables or in a number of
221    processor registers. The following define maps a variable name x and
222    a column number c to the way the state array variable is to be held.
223    The first define below maps the state into an array x[c] whereas the
224    second form maps the state into a number of individual variables x0,
225    x1, etc.  Another form could map individual state colums to machine
226    register names.
227 */
228 
229 #if defined(ARRAYS)
230 #define s(x,c) x[c]
231 #else
232 #define s(x,c) x##c
233 #endif
234 
235 /*  8. VARIABLE BLOCK SIZE SPEED
236 
237     This section is only relevant if you wish to use the variable block
238     length feature of the code.  Include this section if you place more
239     emphasis on speed rather than code size.
240 */
241 #if 0
242 #define FAST_VARIABLE
243 #endif
244 
245 /*  9. INTERNAL TABLE CONFIGURATION
246 
247     This cipher proceeds by repeating in a number of cycles known as 'rounds'
248     which are implemented by a round function which can optionally be speeded
249     up using tables.  The basic tables are each 256 32-bit words, with either
250     one or four tables being required for each round function depending on
251     how much speed is required. The encryption and decryption round functions
252     are different and the last encryption and decrytpion round functions are
253     different again making four different round functions in all.
254 
255     This means that:
256       1. Normal encryption and decryption rounds can each use either 0, 1
257          or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
258       2. The last encryption and decryption rounds can also use either 0, 1
259          or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
260 
261     Include or exclude the appropriate definitions below to set the number
262     of tables used by this implementation.
263 */
264 
265 #if 1   /* set tables for the normal encryption round */
266 #define ENC_ROUND   FOUR_TABLES
267 #elif 0
268 #define ENC_ROUND   ONE_TABLE
269 #else
270 #define ENC_ROUND   NO_TABLES
271 #endif
272 
273 #if 1   /* set tables for the last encryption round */
274 #define LAST_ENC_ROUND  FOUR_TABLES
275 #elif 0
276 #define LAST_ENC_ROUND  ONE_TABLE
277 #else
278 #define LAST_ENC_ROUND  NO_TABLES
279 #endif
280 
281 #if 1   /* set tables for the normal decryption round */
282 #define DEC_ROUND   FOUR_TABLES
283 #elif 0
284 #define DEC_ROUND   ONE_TABLE
285 #else
286 #define DEC_ROUND   NO_TABLES
287 #endif
288 
289 #if 1   /* set tables for the last decryption round */
290 #define LAST_DEC_ROUND  FOUR_TABLES
291 #elif 0
292 #define LAST_DEC_ROUND  ONE_TABLE
293 #else
294 #define LAST_DEC_ROUND  NO_TABLES
295 #endif
296 
297 /*  The decryption key schedule can be speeded up with tables in the same
298     way that the round functions can.  Include or exclude the following
299     defines to set this requirement.
300 */
301 #if 1
302 #define KEY_SCHED   FOUR_TABLES
303 #elif 0
304 #define KEY_SCHED   ONE_TABLE
305 #else
306 #define KEY_SCHED   NO_TABLES
307 #endif
308 
309 /* END OF CONFIGURATION OPTIONS */
310 
311 #define NO_TABLES   0   /* DO NOT CHANGE */
312 #define ONE_TABLE   1   /* DO NOT CHANGE */
313 #define FOUR_TABLES 4   /* DO NOT CHANGE */
314 #define NONE        0   /* DO NOT CHANGE */
315 #define PARTIAL     1   /* DO NOT CHANGE */
316 #define FULL        2   /* DO NOT CHANGE */
317 
318 #if defined(BLOCK_SIZE) && ((BLOCK_SIZE & 3) || BLOCK_SIZE < 16 || BLOCK_SIZE > 32)
319 #error An illegal block size has been specified.
320 #endif
321 
322 #if !defined(BLOCK_SIZE)
323 #define RC_LENGTH    29
324 #else
325 #define RC_LENGTH   5 * BLOCK_SIZE / 4 - (BLOCK_SIZE == 16 ? 10 : 11)
326 #endif
327 
328 /* Disable at least some poor combinations of options */
329 
330 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
331 #undef  LAST_ENC_ROUND
332 #define LAST_ENC_ROUND  NO_TABLES
333 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
334 #undef  LAST_ENC_ROUND
335 #define LAST_ENC_ROUND  ONE_TABLE
336 #endif
337 
338 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
339 #undef  ENC_UNROLL
340 #define ENC_UNROLL  NONE
341 #endif
342 
343 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
344 #undef  LAST_DEC_ROUND
345 #define LAST_DEC_ROUND  NO_TABLES
346 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
347 #undef  LAST_DEC_ROUND
348 #define LAST_DEC_ROUND  ONE_TABLE
349 #endif
350 
351 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
352 #undef  DEC_UNROLL
353 #define DEC_UNROLL  NONE
354 #endif
355 
356 /*  upr(x,n):  rotates bytes within words by n positions, moving bytes to
357                higher index positions with wrap around into low positions
358     ups(x,n):  moves bytes by n positions to higher index positions in
359                words but without wrap around
360     bval(x,n): extracts a byte from a word
361 
362     NOTE:      The definitions given here are intended only for use with
363                unsigned variables and with shift counts that are compile
364                time constants
365 */
366 
367 #if (INTERNAL_BYTE_ORDER == AES_LITTLE_ENDIAN)
368 #if defined(_MSC_VER)
369 #define upr(x,n)        _lrotl((aes_32t)(x), 8 * (n))
370 #else
371 #define upr(x,n)        ((aes_32t)(x) << 8 * (n) | (aes_32t)(x) >> (32 - 8 * (n)))
372 #endif
373 #define ups(x,n)        ((aes_32t)(x) << 8 * (n))
374 #define bval(x,n)       ((aes_08t)((x) >> 8 * (n)))
375 #define bytes2word(b0, b1, b2, b3)  \
376         (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0))
377 #endif
378 
379 #if (INTERNAL_BYTE_ORDER == AES_BIG_ENDIAN)
380 #define upr(x,n)        ((aes_32t)(x) >> 8 * (n) | (aes_32t)(x) << 32 - 8 * (n))
381 #define ups(x,n)        ((aes_32t)(x) >> 8 * (n)))
382 #define bval(x,n)       ((aes_08t)((x) >> (24 - 8 * (n))))
383 #define bytes2word(b0, b1, b2, b3)  \
384         (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3))
385 #endif
386 
387 #if defined(SAFE_IO)
388 
389 #define word_in(x)      bytes2word((x)[0], (x)[1], (x)[2], (x)[3])
390 #define word_out(x,v)   { (x)[0] = bval(v,0); (x)[1] = bval(v,1);   \
391                           (x)[2] = bval(v,2); (x)[3] = bval(v,3);   }
392 
393 #elif (INTERNAL_BYTE_ORDER == PLATFORM_BYTE_ORDER)
394 
395 #define word_in(x)      *(aes_32t*)(x)
396 #define word_out(x,v)   *(aes_32t*)(x) = (v)
397 
398 #else
399 
400 #if !defined(bswap_32)
401 #if !defined(_MSC_VER)
402 #define _lrotl(x,n)     ((((aes_32t)(x)) <<  n) | (((aes_32t)(x)) >> (32 - n)))
403 #endif
404 #define bswap_32(x)     ((_lrotl((x),8) & 0x00ff00ff) | (_lrotl((x),24) & 0xff00ff00))
405 #endif
406 
407 #define word_in(x)      bswap_32(*(aes_32t*)(x))
408 #define word_out(x,v)   *(aes_32t*)(x) = bswap_32(v)
409 
410 #endif
411 
412 /* the finite field modular polynomial and elements */
413 
414 #define WPOLY   0x011b
415 #define BPOLY     0x1b
416 
417 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
418 
419 #define m1  0x80808080
420 #define m2  0x7f7f7f7f
421 #define FFmulX(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
422 
423 /* The following defines provide alternative definitions of FFmulX that might
424    give improved performance if a fast 32-bit multiply is not available. Note
425    that a temporary variable u needs to be defined where FFmulX is used.
426 
427 #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
428 #define m4  (0x01010101 * BPOLY)
429 #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
430 */
431 
432 /* Work out which tables are needed for the different options   */
433 
434 #ifdef  AES_ASM
435 #ifdef  ENC_ROUND
436 #undef  ENC_ROUND
437 #endif
438 #define ENC_ROUND   FOUR_TABLES
439 #ifdef  LAST_ENC_ROUND
440 #undef  LAST_ENC_ROUND
441 #endif
442 #define LAST_ENC_ROUND  FOUR_TABLES
443 #ifdef  DEC_ROUND
444 #undef  DEC_ROUND
445 #endif
446 #define DEC_ROUND   FOUR_TABLES
447 #ifdef  LAST_DEC_ROUND
448 #undef  LAST_DEC_ROUND
449 #endif
450 #define LAST_DEC_ROUND  FOUR_TABLES
451 #ifdef  KEY_SCHED
452 #undef  KEY_SCHED
453 #define KEY_SCHED   FOUR_TABLES
454 #endif
455 #endif
456 
457 #if defined(ENCRYPTION) || defined(AES_ASM)
458 #if ENC_ROUND == ONE_TABLE
459 #define FT1_SET
460 #elif ENC_ROUND == FOUR_TABLES
461 #define FT4_SET
462 #else
463 #define SBX_SET
464 #endif
465 #if LAST_ENC_ROUND == ONE_TABLE
466 #define FL1_SET
467 #elif LAST_ENC_ROUND == FOUR_TABLES
468 #define FL4_SET
469 #elif !defined(SBX_SET)
470 #define SBX_SET
471 #endif
472 #endif
473 
474 #if defined(DECRYPTION) || defined(AES_ASM)
475 #if DEC_ROUND == ONE_TABLE
476 #define IT1_SET
477 #elif DEC_ROUND == FOUR_TABLES
478 #define IT4_SET
479 #else
480 #define ISB_SET
481 #endif
482 #if LAST_DEC_ROUND == ONE_TABLE
483 #define IL1_SET
484 #elif LAST_DEC_ROUND == FOUR_TABLES
485 #define IL4_SET
486 #elif !defined(ISB_SET)
487 #define ISB_SET
488 #endif
489 #endif
490 
491 #if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE)
492 #if KEY_SCHED == ONE_TABLE
493 #define LS1_SET
494 #define IM1_SET
495 #elif KEY_SCHED == FOUR_TABLES
496 #define LS4_SET
497 #define IM4_SET
498 #elif !defined(SBX_SET)
499 #define SBX_SET
500 #endif
501 #endif
502 
503 #ifdef  FIXED_TABLES
504 #define prefx   static const
505 #else
506 #define prefx   extern
507 extern aes_08t  tab_init;
508 void gen_tabs(void);
509 #endif
510 
511 //prefx aes_32t  rcon_tab[29];
512 //
513 //#ifdef  SBX_SET
514 //prefx aes_08t s_box[256];
515 //#endif
516 //
517 //#ifdef  ISB_SET
518 //prefx aes_08t inv_s_box[256];
519 //#endif
520 //
521 //#ifdef  FT1_SET
522 //prefx aes_32t ft_tab[256];
523 //#endif
524 //
525 //#ifdef  FT4_SET
526 //prefx aes_32t ft_tab[4][256];
527 //#endif
528 //
529 //#ifdef  FL1_SET
530 //prefx aes_32t fl_tab[256];
531 //#endif
532 //
533 //#ifdef  FL4_SET
534 //prefx aes_32t fl_tab[4][256];
535 //#endif
536 //
537 //#ifdef  IT1_SET
538 //prefx aes_32t it_tab[256];
539 //#endif
540 //
541 //#ifdef  IT4_SET
542 //prefx aes_32t it_tab[4][256];
543 //#endif
544 //
545 //#ifdef  IL1_SET
546 //prefx aes_32t il_tab[256];
547 //#endif
548 //
549 //#ifdef  IL4_SET
550 //prefx aes_32t il_tab[4][256];
551 //#endif
552 //
553 //#ifdef  LS1_SET
554 //#ifdef  FL1_SET
555 //#undef  LS1_SET
556 //#else
557 //prefx aes_32t ls_tab[256];
558 //#endif
559 //#endif
560 //
561 //#ifdef  LS4_SET
562 //#ifdef  FL4_SET
563 //#undef  LS4_SET
564 //#else
565 //prefx aes_32t ls_tab[4][256];
566 //#endif
567 //#endif
568 //
569 //#ifdef  IM1_SET
570 //prefx aes_32t im_tab[256];
571 //#endif
572 //
573 //#ifdef  IM4_SET
574 //prefx aes_32t im_tab[4][256];
575 //#endif
576 
577 /* Set the number of columns in nc.  Note that it is important
578    that nc is a constant which is known at compile time if the
579    highest speed version of the code is needed.
580 */
581 
582 #if defined(BLOCK_SIZE)
583 #define nc  (BLOCK_SIZE >> 2)
584 #else
585 #define nc  (cx->n_blk >> 2)
586 #endif
587 
588 /* generic definitions of Rijndael macros that use tables    */
589 
590 #define no_table(x,box,vf,rf,c) bytes2word( \
591     box[bval(vf(x,0,c),rf(0,c))], \
592     box[bval(vf(x,1,c),rf(1,c))], \
593     box[bval(vf(x,2,c),rf(2,c))], \
594     box[bval(vf(x,3,c),rf(3,c))])
595 
596 #define one_table(x,op,tab,vf,rf,c) \
597  (     tab[bval(vf(x,0,c),rf(0,c))] \
598   ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
599   ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
600   ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
601 
602 #define four_tables(x,tab,vf,rf,c) \
603  (  tab[0][bval(vf(x,0,c),rf(0,c))] \
604   ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
605   ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
606   ^ tab[3][bval(vf(x,3,c),rf(3,c))])
607 
608 #define vf1(x,r,c)  (x)
609 #define rf1(r,c)    (r)
610 #define rf2(r,c)    ((r-c)&3)
611 
612 /* perform forward and inverse column mix operation on four bytes in long word x in */
613 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros.  */
614 
615 #define dec_fmvars
616 #if defined(FM4_SET)    /* not currently used */
617 #define fwd_mcol(x)     four_tables(x,fm_tab,vf1,rf1,0)
618 #elif defined(FM1_SET)  /* not currently used */
619 #define fwd_mcol(x)     one_table(x,upr,fm_tab,vf1,rf1,0)
620 #else
621 #undef  dec_fmvars
622 #define dec_fmvars      aes_32t f1, f2;
623 #define fwd_mcol(x)     (f1 = (x), f2 = FFmulX(f1), f2 ^ upr(f1 ^ f2, 3) ^ upr(f1, 2) ^ upr(f1, 1))
624 #endif
625 
626 #define dec_imvars
627 #if defined(IM4_SET)
628 #define inv_mcol(x)     four_tables(x,im_tab,vf1,rf1,0)
629 #elif defined(IM1_SET)
630 #define inv_mcol(x)     one_table(x,upr,im_tab,vf1,rf1,0)
631 #else
632 #undef  dec_imvars
633 #define dec_imvars      aes_32t    f2, f4, f8, f9;
634 #define inv_mcol(x) \
635     (f9 = (x), f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
636     f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
637 #endif
638 
639 #if defined(FL4_SET)
640 #define ls_box(x,c)     four_tables(x,fl_tab,vf1,rf2,c)
641 #elif   defined(LS4_SET)
642 #define ls_box(x,c)     four_tables(x,ls_tab,vf1,rf2,c)
643 #elif defined(FL1_SET)
644 #define ls_box(x,c)     one_table(x,upr,fl_tab,vf1,rf2,c)
645 #elif defined(LS1_SET)
646 #define ls_box(x,c)     one_table(x,upr,ls_tab,vf1,rf2,c)
647 #else
648 #define ls_box(x,c)     no_table(x,s_box,vf1,rf2,c)
649 #endif
650 
651 /*----------------------------------------------------------------------
652 |   tables
653 +---------------------------------------------------------------------*/
654 #if defined(FIXED_TABLES) || !defined(FF_TABLES)
655 
656 /*  finite field arithmetic operations */
657 
658 #define f2(x)   ((x<<1) ^ (((x>>7) & 1) * WPOLY))
659 #define f4(x)   ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
660 #define f8(x)   ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \
661                         ^ (((x>>5) & 4) * WPOLY))
662 #define f3(x)   (f2(x) ^ x)
663 #define f9(x)   (f8(x) ^ x)
664 #define fb(x)   (f8(x) ^ f2(x) ^ x)
665 #define fd(x)   (f8(x) ^ f4(x) ^ x)
666 #define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
667 
668 #endif
669 
670 #if defined(FIXED_TABLES)
671 
672 #define sb_data(w) \
673     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
674     w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
675     w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
676     w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
677     w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
678     w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
679     w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
680     w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
681     w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
682     w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
683     w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
684     w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
685     w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
686     w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
687     w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
688     w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
689     w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
690     w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
691     w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
692     w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
693     w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
694     w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
695     w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
696     w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
697     w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
698     w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
699     w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
700     w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
701     w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
702     w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
703     w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
704     w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16)
705 
706 #define isb_data(w) \
707     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
708     w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
709     w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
710     w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
711     w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
712     w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
713     w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
714     w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
715     w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
716     w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
717     w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
718     w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
719     w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
720     w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
721     w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
722     w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
723     w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
724     w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
725     w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
726     w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
727     w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
728     w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
729     w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
730     w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
731     w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
732     w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
733     w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
734     w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
735     w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
736     w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
737     w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
738     w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d),
739 
740 #define mm_data(w) \
741     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
742     w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
743     w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
744     w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
745     w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
746     w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
747     w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
748     w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
749     w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
750     w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
751     w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
752     w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
753     w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
754     w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
755     w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
756     w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
757     w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
758     w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
759     w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
760     w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
761     w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
762     w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
763     w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
764     w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
765     w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
766     w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
767     w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
768     w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
769     w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
770     w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
771     w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
772     w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff)
773 
774 #define h0(x)   (x)
775 
776 /*  These defines are used to ensure tables are generated in the
777     right format depending on the internal byte order required
778 */
779 
780 #define w0(p)   bytes2word(p, 0, 0, 0)
781 #define w1(p)   bytes2word(0, p, 0, 0)
782 #define w2(p)   bytes2word(0, 0, p, 0)
783 #define w3(p)   bytes2word(0, 0, 0, p)
784 
785 /*  Number of elements required in this table for different
786     block and key lengths is:
787 
788     Rcon Table      key length (bytes)
789     Length          16  20  24  28  32
790                 ---------------------
791     block     16 |  10   9   8   7   7
792     length    20 |  14  11  10   9   9
793     (bytes)   24 |  19  15  12  11  11
794               28 |  24  19  16  13  13
795               32 |  29  23  19  17  14
796 
797     this table can be a table of bytes if the key schedule
798     code is adjusted accordingly
799 */
800 
801 #define u0(p)   bytes2word(f2(p), p, p, f3(p))
802 #define u1(p)   bytes2word(f3(p), f2(p), p, p)
803 #define u2(p)   bytes2word(p, f3(p), f2(p), p)
804 #define u3(p)   bytes2word(p, p, f3(p), f2(p))
805 
806 #define v0(p)   bytes2word(fe(p), f9(p), fd(p), fb(p))
807 #define v1(p)   bytes2word(fb(p), fe(p), f9(p), fd(p))
808 #define v2(p)   bytes2word(fd(p), fb(p), fe(p), f9(p))
809 #define v3(p)   bytes2word(f9(p), fd(p), fb(p), fe(p))
810 
811 static const aes_32t rcon_tab[29] =
812 {
813     w0(0x01), w0(0x02), w0(0x04), w0(0x08),
814     w0(0x10), w0(0x20), w0(0x40), w0(0x80),
815     w0(0x1b), w0(0x36), w0(0x6c), w0(0xd8),
816     w0(0xab), w0(0x4d), w0(0x9a), w0(0x2f),
817     w0(0x5e), w0(0xbc), w0(0x63), w0(0xc6),
818     w0(0x97), w0(0x35), w0(0x6a), w0(0xd4),
819     w0(0xb3), w0(0x7d), w0(0xfa), w0(0xef),
820     w0(0xc5)
821 };
822 
823 #ifdef  SBX_SET
824 static const aes_08t s_box[256] = { sb_data(h0) };
825 #endif
826 #ifdef  ISB_SET
827 static const aes_08t inv_s_box[256] = { isb_data(h0) };
828 #endif
829 
830 #ifdef  FT1_SET
831 static const aes_32t ft_tab[256] = { sb_data(u0) };
832 #endif
833 #ifdef  FT4_SET
834 static const aes_32t ft_tab[4][256] =
835     { {  sb_data(u0) }, {  sb_data(u1) }, {  sb_data(u2) }, {  sb_data(u3) } };
836 #endif
837 
838 #ifdef  FL1_SET
839 static const aes_32t fl_tab[256] = { sb_data(w0) };
840 #endif
841 #ifdef  FL4_SET
842 static const aes_32t fl_tab[4][256] =
843     { {  sb_data(w0) }, {  sb_data(w1) }, {  sb_data(w2) }, {  sb_data(w3) } };
844 #endif
845 
846 #ifdef  IT1_SET
847 static const aes_32t it_tab[256] = { isb_data(v0) };
848 #endif
849 #ifdef  IT4_SET
850 static const aes_32t it_tab[4][256] =
851     { { isb_data(v0) }, { isb_data(v1) }, { isb_data(v2) }, { isb_data(v3) } };
852 #endif
853 
854 #ifdef  IL1_SET
855 static const aes_32t il_tab[256] = { isb_data(w0) };
856 #endif
857 #ifdef  IL4_SET
858 static const aes_32t il_tab[4][256] =
859     { { isb_data(w0) }, { isb_data(w1) }, { isb_data(w2) }, { isb_data(w3) } };
860 #endif
861 
862 #ifdef  LS1_SET
863 static const aes_32t ls_tab[256] = { sb_data(w0) };
864 #endif
865 #ifdef  LS4_SET
866 /* GBG: unused?
867 static const aes_32t ls_tab[4][256] =
868     { {  sb_data(w0) }, {  sb_data(w1) }, {  sb_data(w2) }, {  sb_data(w3) } };
869 */
870 #endif
871 
872 #ifdef  IM1_SET
873 static const aes_32t im_tab[256] = { mm_data(v0) };
874 #endif
875 #ifdef  IM4_SET
876 static const aes_32t im_tab[4][256] =
877     { {  mm_data(v0) }, {  mm_data(v1) }, {  mm_data(v2) }, {  mm_data(v3) } };
878 #endif
879 
880 #else   /* dynamic table generation */
881 
882 aes_08t tab_init = 0;
883 
884 #define const
885 
886 static aes_32t  rcon_tab[RC_LENGTH];
887 
888 #ifdef  SBX_SET
889 aes_08t s_box[256];
890 #endif
891 #ifdef  ISB_SET
892 aes_08t inv_s_box[256];
893 #endif
894 
895 #ifdef  FT1_SET
896 aes_32t ft_tab[256];
897 #endif
898 #ifdef  FT4_SET
899 aes_32t ft_tab[4][256];
900 #endif
901 
902 #ifdef  FL1_SET
903 aes_32t fl_tab[256];
904 #endif
905 #ifdef  FL4_SET
906 aes_32t fl_tab[4][256];
907 #endif
908 
909 #ifdef  IT1_SET
910 aes_32t it_tab[256];
911 #endif
912 #ifdef  IT4_SET
913 aes_32t it_tab[4][256];
914 #endif
915 
916 #ifdef  IL1_SET
917 aes_32t il_tab[256];
918 #endif
919 #ifdef  IL4_SET
920 aes_32t il_tab[4][256];
921 #endif
922 
923 #ifdef  LS1_SET
924 aes_32t ls_tab[256];
925 #endif
926 #ifdef  LS4_SET
927 aes_32t ls_tab[4][256];
928 #endif
929 
930 #ifdef  IM1_SET
931 aes_32t im_tab[256];
932 #endif
933 #ifdef  IM4_SET
934 aes_32t im_tab[4][256];
935 #endif
936 
937 #if !defined(FF_TABLES)
938 
939 /*  Generate the tables for the dynamic table option
940 
941     It will generally be sensible to use tables to compute finite
942     field multiplies and inverses but where memory is scarse this
943     code might sometimes be better. But it only has effect during
944     initialisation so its pretty unimportant in overall terms.
945 */
946 
947 /*  return 2 ^ (n - 1) where n is the bit number of the highest bit
948     set in x with x in the range 1 < x < 0x00000200.   This form is
949     used so that locals within fi can be bytes rather than words
950 */
951 
hibit(const aes_32t x)952 static aes_08t hibit(const aes_32t x)
953 {   aes_08t r = (aes_08t)((x >> 1) | (x >> 2));
954 
955     r |= (r >> 2);
956     r |= (r >> 4);
957     return (r + 1) >> 1;
958 }
959 
960 /* return the inverse of the finite field element x */
961 
fi(const aes_08t x)962 static aes_08t fi(const aes_08t x)
963 {   aes_08t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
964 
965     if(x < 2) return x;
966 
967     for(;;)
968     {
969         if(!n1) return v1;
970 
971         while(n2 >= n1)
972         {
973             n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
974         }
975 
976         if(!n2) return v2;
977 
978         while(n1 >= n2)
979         {
980             n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
981         }
982     }
983 }
984 
985 #else
986 
987 /* define the finite field multiplies required for Rijndael */
988 
989 #define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
990 #define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
991 #define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
992 #define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
993 #define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
994 #define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
995 #define fi(x) ((x) ?   pow[255 - log[x]]: 0)
996 
997 #endif
998 
999 /* The forward and inverse affine transformations used in the S-box */
1000 
1001 #define fwd_affine(x) \
1002     (w = (aes_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(aes_08t)(w^(w>>8)))
1003 
1004 #define inv_affine(x) \
1005     (w = (aes_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(aes_08t)(w^(w>>8)))
1006 
gen_tabs(void)1007 void gen_tabs(void)
1008 {   aes_32t  i, w;
1009 
1010 #if defined(FF_TABLES)
1011 
1012     aes_08t  pow[512], log[256];
1013 
1014     /*  log and power tables for GF(2^8) finite field with
1015         WPOLY as modular polynomial - the simplest primitive
1016         root is 0x03, used here to generate the tables
1017     */
1018 
1019     i = 0; w = 1;
1020     do
1021     {
1022         pow[i] = (aes_08t)w;
1023         pow[i + 255] = (aes_08t)w;
1024         log[w] = (aes_08t)i++;
1025         w ^=  (w << 1) ^ (w & 0x80 ? WPOLY : 0);
1026     }
1027     while (w != 1);
1028 
1029 #endif
1030 
1031     for(i = 0, w = 1; i < RC_LENGTH; ++i)
1032     {
1033         rcon_tab[i] = bytes2word(w, 0, 0, 0);
1034         w = f2(w);
1035     }
1036 
1037     for(i = 0; i < 256; ++i)
1038     {   aes_08t    b;
1039 
1040         b = fwd_affine(fi((aes_08t)i));
1041         w = bytes2word(f2(b), b, b, f3(b));
1042 
1043 #ifdef  SBX_SET
1044         s_box[i] = b;
1045 #endif
1046 
1047 #ifdef  FT1_SET                 /* tables for a normal encryption round */
1048         ft_tab[i] = w;
1049 #endif
1050 #ifdef  FT4_SET
1051         ft_tab[0][i] = w;
1052         ft_tab[1][i] = upr(w,1);
1053         ft_tab[2][i] = upr(w,2);
1054         ft_tab[3][i] = upr(w,3);
1055 #endif
1056         w = bytes2word(b, 0, 0, 0);
1057 
1058 #ifdef  FL1_SET                 /* tables for last encryption round (may also   */
1059         fl_tab[i] = w;          /* be used in the key schedule)                 */
1060 #endif
1061 #ifdef  FL4_SET
1062         fl_tab[0][i] = w;
1063         fl_tab[1][i] = upr(w,1);
1064         fl_tab[2][i] = upr(w,2);
1065         fl_tab[3][i] = upr(w,3);
1066 #endif
1067 
1068 #ifdef  LS1_SET                 /* table for key schedule if fl_tab above is    */
1069         ls_tab[i] = w;          /* not of the required form                     */
1070 #endif
1071 #ifdef  LS4_SET
1072         ls_tab[0][i] = w;
1073         ls_tab[1][i] = upr(w,1);
1074         ls_tab[2][i] = upr(w,2);
1075         ls_tab[3][i] = upr(w,3);
1076 #endif
1077 
1078         b = fi(inv_affine((aes_08t)i));
1079         w = bytes2word(fe(b), f9(b), fd(b), fb(b));
1080 
1081 #ifdef  IM1_SET                 /* tables for the inverse mix column operation  */
1082         im_tab[b] = w;
1083 #endif
1084 #ifdef  IM4_SET
1085         im_tab[0][b] = w;
1086         im_tab[1][b] = upr(w,1);
1087         im_tab[2][b] = upr(w,2);
1088         im_tab[3][b] = upr(w,3);
1089 #endif
1090 
1091 #ifdef  ISB_SET
1092         inv_s_box[i] = b;
1093 #endif
1094 #ifdef  IT1_SET                 /* tables for a normal decryption round */
1095         it_tab[i] = w;
1096 #endif
1097 #ifdef  IT4_SET
1098         it_tab[0][i] = w;
1099         it_tab[1][i] = upr(w,1);
1100         it_tab[2][i] = upr(w,2);
1101         it_tab[3][i] = upr(w,3);
1102 #endif
1103         w = bytes2word(b, 0, 0, 0);
1104 #ifdef  IL1_SET                 /* tables for last decryption round */
1105         il_tab[i] = w;
1106 #endif
1107 #ifdef  IL4_SET
1108         il_tab[0][i] = w;
1109         il_tab[1][i] = upr(w,1);
1110         il_tab[2][i] = upr(w,2);
1111         il_tab[3][i] = upr(w,3);
1112 #endif
1113     }
1114 
1115     tab_init = 1;
1116 }
1117 
1118 #endif
1119 
1120 /*----------------------------------------------------------------------
1121 |   key schedule
1122 +---------------------------------------------------------------------*/
1123 #if !defined(BLOCK_SIZE)
1124 
aes_blk_len(unsigned int blen,aes_ctx cx[1])1125 static aes_rval aes_blk_len(unsigned int blen, aes_ctx cx[1])
1126 {
1127 #if !defined(FIXED_TABLES)
1128     if(!tab_init) gen_tabs();
1129 #endif
1130 
1131     if((blen & 7) || blen < 16 || blen > 32)
1132     {
1133         cx->n_blk = 0; return aes_bad;
1134     }
1135 
1136     cx->n_blk = blen;
1137     return aes_good;
1138 }
1139 
1140 #endif
1141 
1142 /* Initialise the key schedule from the user supplied key. The key
1143    length is now specified in bytes - 16, 24 or 32 as appropriate.
1144    This corresponds to bit lengths of 128, 192 and 256 bits, and
1145    to Nk values of 4, 6 and 8 respectively.
1146 
1147    The following macros implement a single cycle in the key
1148    schedule generation process. The number of cycles needed
1149    for each cx->n_col and nk value is:
1150 
1151     nk =             4  5  6  7  8
1152     ------------------------------
1153     cx->n_col = 4   10  9  8  7  7
1154     cx->n_col = 5   14 11 10  9  9
1155     cx->n_col = 6   19 15 12 11 11
1156     cx->n_col = 7   21 19 16 13 14
1157     cx->n_col = 8   29 23 19 17 14
1158 */
1159 
1160 #define ke4(k,i) \
1161 {   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
1162     k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
1163 }
1164 #define kel4(k,i) \
1165 {   k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \
1166     k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
1167 }
1168 
1169 #define ke6(k,i) \
1170 {   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
1171     k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
1172     k[6*(i)+10] = ss[4] ^= ss[3]; k[6*(i)+11] = ss[5] ^= ss[4]; \
1173 }
1174 #define kel6(k,i) \
1175 {   k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \
1176     k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \
1177 }
1178 
1179 #define ke8(k,i) \
1180 {   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
1181     k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
1182     k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); k[8*(i)+13] = ss[5] ^= ss[4]; \
1183     k[8*(i)+14] = ss[6] ^= ss[5]; k[8*(i)+15] = ss[7] ^= ss[6]; \
1184 }
1185 #define kel8(k,i) \
1186 {   k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \
1187     k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \
1188 }
1189 
1190 #if defined(ENCRYPTION_KEY_SCHEDULE)
1191 
aes_enc_key(const unsigned char in_key[],unsigned int klen,aes_ctx cx[1])1192 static aes_rval aes_enc_key(const unsigned char in_key[], unsigned int klen, aes_ctx cx[1])
1193 {   aes_32t    ss[8];
1194 
1195 #if !defined(FIXED_TABLES)
1196     if(!tab_init) gen_tabs();
1197 #endif
1198 
1199 #if !defined(BLOCK_SIZE)
1200     if(!cx->n_blk) cx->n_blk = 16;
1201 #else
1202     cx->n_blk = BLOCK_SIZE;
1203 #endif
1204 
1205     cx->n_blk = (cx->n_blk & ~3) | 1;
1206 
1207     cx->k_sch[0] = ss[0] = word_in(in_key     );
1208     cx->k_sch[1] = ss[1] = word_in(in_key +  4);
1209     cx->k_sch[2] = ss[2] = word_in(in_key +  8);
1210     cx->k_sch[3] = ss[3] = word_in(in_key + 12);
1211 
1212 #if (BLOCK_SIZE == 16) && (ENC_UNROLL != NONE)
1213 
1214     switch(klen)
1215     {
1216     case 16:    ke4(cx->k_sch, 0); ke4(cx->k_sch, 1);
1217                 ke4(cx->k_sch, 2); ke4(cx->k_sch, 3);
1218                 ke4(cx->k_sch, 4); ke4(cx->k_sch, 5);
1219                 ke4(cx->k_sch, 6); ke4(cx->k_sch, 7);
1220                 ke4(cx->k_sch, 8); kel4(cx->k_sch, 9);
1221                 cx->n_rnd = 10; break;
1222     case 24:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
1223                 cx->k_sch[5] = ss[5] = word_in(in_key + 20);
1224                 ke6(cx->k_sch, 0); ke6(cx->k_sch, 1);
1225                 ke6(cx->k_sch, 2); ke6(cx->k_sch, 3);
1226                 ke6(cx->k_sch, 4); ke6(cx->k_sch, 5);
1227                 ke6(cx->k_sch, 6); kel6(cx->k_sch, 7);
1228                 cx->n_rnd = 12; break;
1229     case 32:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
1230                 cx->k_sch[5] = ss[5] = word_in(in_key + 20);
1231                 cx->k_sch[6] = ss[6] = word_in(in_key + 24);
1232                 cx->k_sch[7] = ss[7] = word_in(in_key + 28);
1233                 ke8(cx->k_sch, 0); ke8(cx->k_sch, 1);
1234                 ke8(cx->k_sch, 2); ke8(cx->k_sch, 3);
1235                 ke8(cx->k_sch, 4); ke8(cx->k_sch, 5);
1236                 kel8(cx->k_sch, 6);
1237                 cx->n_rnd = 14; break;
1238     default:    cx->n_rnd = 0; return aes_bad;
1239     }
1240 #else
1241     {   aes_32t i, l;
1242         cx->n_rnd = ((klen >> 2) > nc ? (klen >> 2) : nc) + 6;
1243         l = (nc * cx->n_rnd + nc - 1) / (klen >> 2);
1244 
1245         switch(klen)
1246         {
1247         case 16:    for(i = 0; i < l; ++i)
1248                         ke4(cx->k_sch, i);
1249                     break;
1250         case 24:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
1251                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
1252                     for(i = 0; i < l; ++i)
1253                         ke6(cx->k_sch, i);
1254                     break;
1255         case 32:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
1256                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
1257                     cx->k_sch[6] = ss[6] = word_in(in_key + 24);
1258                     cx->k_sch[7] = ss[7] = word_in(in_key + 28);
1259                     for(i = 0; i < l; ++i)
1260                         ke8(cx->k_sch,  i);
1261                     break;
1262         default:    cx->n_rnd = 0; return aes_bad;
1263         }
1264     }
1265 #endif
1266 
1267     return aes_good;
1268 }
1269 
1270 #endif
1271 
1272 #if defined(DECRYPTION_KEY_SCHEDULE)
1273 
1274 #if (DEC_ROUND != NO_TABLES)
1275 #define d_vars  dec_imvars
1276 #define ff(x)   inv_mcol(x)
1277 #else
1278 #define ff(x)   (x)
1279 #define d_vars
1280 #endif
1281 
1282 #if 1
1283 #define kdf4(k,i) \
1284 {   ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; ss[1] = ss[1] ^ ss[3]; ss[2] = ss[2] ^ ss[3]; ss[3] = ss[3]; \
1285     ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; ss[i % 4] ^= ss[4]; \
1286     ss[4] ^= k[4*(i)];   k[4*(i)+4] = ff(ss[4]); ss[4] ^= k[4*(i)+1]; k[4*(i)+5] = ff(ss[4]); \
1287     ss[4] ^= k[4*(i)+2]; k[4*(i)+6] = ff(ss[4]); ss[4] ^= k[4*(i)+3]; k[4*(i)+7] = ff(ss[4]); \
1288 }
1289 #define kd4(k,i) \
1290 {   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
1291     k[4*(i)+4] = ss[4] ^= k[4*(i)]; k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
1292     k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
1293 }
1294 #define kdl4(k,i) \
1295 {   ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; ss[i % 4] ^= ss[4]; \
1296     k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; k[4*(i)+5] = ss[1] ^ ss[3]; \
1297     k[4*(i)+6] = ss[0]; k[4*(i)+7] = ss[1]; \
1298 }
1299 #else
1300 #define kdf4(k,i) \
1301 {   ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+ 4] = ff(ss[0]); ss[1] ^= ss[0]; k[4*(i)+ 5] = ff(ss[1]); \
1302     ss[2] ^= ss[1]; k[4*(i)+ 6] = ff(ss[2]); ss[3] ^= ss[2]; k[4*(i)+ 7] = ff(ss[3]); \
1303 }
1304 #define kd4(k,i) \
1305 {   ss[4] = ls_box(ss[3],3) ^ rcon_tab[i]; \
1306     ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[4*(i)+ 4] = ss[4] ^= k[4*(i)]; \
1307     ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[4] ^= k[4*(i)+ 1]; \
1308     ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[4] ^= k[4*(i)+ 2]; \
1309     ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[4] ^= k[4*(i)+ 3]; \
1310 }
1311 #define kdl4(k,i) \
1312 {   ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; k[4*(i)+ 4] = ss[0]; ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[1]; \
1313     ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[2]; ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[3]; \
1314 }
1315 #endif
1316 
1317 #define kdf6(k,i) \
1318 {   ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 6] = ff(ss[0]); ss[1] ^= ss[0]; k[6*(i)+ 7] = ff(ss[1]); \
1319     ss[2] ^= ss[1]; k[6*(i)+ 8] = ff(ss[2]); ss[3] ^= ss[2]; k[6*(i)+ 9] = ff(ss[3]); \
1320     ss[4] ^= ss[3]; k[6*(i)+10] = ff(ss[4]); ss[5] ^= ss[4]; k[6*(i)+11] = ff(ss[5]); \
1321 }
1322 #define kd6(k,i) \
1323 {   ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
1324     ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
1325     ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
1326     ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
1327     ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
1328     ss[4] ^= ss[3]; k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
1329     ss[5] ^= ss[4]; k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
1330 }
1331 #define kdl6(k,i) \
1332 {   ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; k[6*(i)+ 6] = ss[0]; ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[1]; \
1333     ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[2]; ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[3]; \
1334 }
1335 
1336 #define kdf8(k,i) \
1337 {   ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 8] = ff(ss[0]); ss[1] ^= ss[0]; k[8*(i)+ 9] = ff(ss[1]); \
1338     ss[2] ^= ss[1]; k[8*(i)+10] = ff(ss[2]); ss[3] ^= ss[2]; k[8*(i)+11] = ff(ss[3]); \
1339     ss[4] ^= ls_box(ss[3],0); k[8*(i)+12] = ff(ss[4]); ss[5] ^= ss[4]; k[8*(i)+13] = ff(ss[5]); \
1340     ss[6] ^= ss[5]; k[8*(i)+14] = ff(ss[6]); ss[7] ^= ss[6]; k[8*(i)+15] = ff(ss[7]); \
1341 }
1342 #define kd8(k,i) \
1343 {   aes_32t g = ls_box(ss[7],3) ^ rcon_tab[i]; \
1344     ss[0] ^= g; g = ff(g); k[8*(i)+ 8] = g ^= k[8*(i)]; \
1345     ss[1] ^= ss[0]; k[8*(i)+ 9] = g ^= k[8*(i)+ 1]; \
1346     ss[2] ^= ss[1]; k[8*(i)+10] = g ^= k[8*(i)+ 2]; \
1347     ss[3] ^= ss[2]; k[8*(i)+11] = g ^= k[8*(i)+ 3]; \
1348     g = ls_box(ss[3],0); \
1349     ss[4] ^= g; g = ff(g); k[8*(i)+12] = g ^= k[8*(i)+ 4]; \
1350     ss[5] ^= ss[4]; k[8*(i)+13] = g ^= k[8*(i)+ 5]; \
1351     ss[6] ^= ss[5]; k[8*(i)+14] = g ^= k[8*(i)+ 6]; \
1352     ss[7] ^= ss[6]; k[8*(i)+15] = g ^= k[8*(i)+ 7]; \
1353 }
1354 #define kdl8(k,i) \
1355 {   ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; k[8*(i)+ 8] = ss[0]; ss[1] ^= ss[0]; k[8*(i)+ 9] = ss[1]; \
1356     ss[2] ^= ss[1]; k[8*(i)+10] = ss[2]; ss[3] ^= ss[2]; k[8*(i)+11] = ss[3]; \
1357 }
1358 
aes_dec_key(const unsigned char in_key[],unsigned int klen,aes_ctx cx[1])1359 static aes_rval aes_dec_key(const unsigned char in_key[], unsigned int klen, aes_ctx cx[1])
1360 {   aes_32t    ss[8];
1361     d_vars
1362 
1363 #if !defined(FIXED_TABLES)
1364     if(!tab_init) gen_tabs();
1365 #endif
1366 
1367 #if !defined(BLOCK_SIZE)
1368     if(!cx->n_blk) cx->n_blk = 16;
1369 #else
1370     cx->n_blk = BLOCK_SIZE;
1371 #endif
1372 
1373     cx->n_blk = (cx->n_blk & ~3) | 2;
1374 
1375     cx->k_sch[0] = ss[0] = word_in(in_key     );
1376     cx->k_sch[1] = ss[1] = word_in(in_key +  4);
1377     cx->k_sch[2] = ss[2] = word_in(in_key +  8);
1378     cx->k_sch[3] = ss[3] = word_in(in_key + 12);
1379 
1380 #if (BLOCK_SIZE == 16) && (DEC_UNROLL != NONE)
1381 
1382     switch(klen)
1383     {
1384     case 16:    kdf4(cx->k_sch, 0); kd4(cx->k_sch, 1);
1385                 kd4(cx->k_sch, 2); kd4(cx->k_sch, 3);
1386                 kd4(cx->k_sch, 4); kd4(cx->k_sch, 5);
1387                 kd4(cx->k_sch, 6); kd4(cx->k_sch, 7);
1388                 kd4(cx->k_sch, 8); kdl4(cx->k_sch, 9);
1389                 cx->n_rnd = 10; break;
1390     case 24:    cx->k_sch[4] = ff(ss[4] = word_in(in_key + 16));
1391                 cx->k_sch[5] = ff(ss[5] = word_in(in_key + 20));
1392                 kdf6(cx->k_sch, 0); kd6(cx->k_sch, 1);
1393                 kd6(cx->k_sch, 2); kd6(cx->k_sch, 3);
1394                 kd6(cx->k_sch, 4); kd6(cx->k_sch, 5);
1395                 kd6(cx->k_sch, 6); kdl6(cx->k_sch, 7);
1396                 cx->n_rnd = 12; break;
1397     case 32:    cx->k_sch[4] = ff(ss[4] = word_in(in_key + 16));
1398                 cx->k_sch[5] = ff(ss[5] = word_in(in_key + 20));
1399                 cx->k_sch[6] = ff(ss[6] = word_in(in_key + 24));
1400                 cx->k_sch[7] = ff(ss[7] = word_in(in_key + 28));
1401                 kdf8(cx->k_sch, 0); kd8(cx->k_sch, 1);
1402                 kd8(cx->k_sch, 2); kd8(cx->k_sch, 3);
1403                 kd8(cx->k_sch, 4); kd8(cx->k_sch, 5);
1404                 kdl8(cx->k_sch, 6);
1405                 cx->n_rnd = 14; break;
1406     default:    cx->n_rnd = 0; return aes_bad;
1407     }
1408 #else
1409     {   aes_32t i, l;
1410         cx->n_rnd = ((klen >> 2) > nc ? (klen >> 2) : nc) + 6;
1411         l = (nc * cx->n_rnd + nc - 1) / (klen >> 2);
1412 
1413         switch(klen)
1414         {
1415         case 16:
1416                     for(i = 0; i < l; ++i)
1417                         ke4(cx->k_sch, i);
1418                     break;
1419         case 24:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
1420                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
1421                     for(i = 0; i < l; ++i)
1422                         ke6(cx->k_sch, i);
1423                     break;
1424         case 32:    cx->k_sch[4] = ss[4] = word_in(in_key + 16);
1425                     cx->k_sch[5] = ss[5] = word_in(in_key + 20);
1426                     cx->k_sch[6] = ss[6] = word_in(in_key + 24);
1427                     cx->k_sch[7] = ss[7] = word_in(in_key + 28);
1428                     for(i = 0; i < l; ++i)
1429                         ke8(cx->k_sch,  i);
1430                     break;
1431         default:    cx->n_rnd = 0; return aes_bad;
1432         }
1433 #if (DEC_ROUND != NO_TABLES)
1434         for(i = nc; i < nc * cx->n_rnd; ++i)
1435             cx->k_sch[i] = inv_mcol(cx->k_sch[i]);
1436 #endif
1437     }
1438 #endif
1439 
1440     return aes_good;
1441 }
1442 
1443 #endif
1444 
1445 /*----------------------------------------------------------------------
1446 |   cipher
1447 +---------------------------------------------------------------------*/
1448 #define unused  77  /* Sunset Strip */
1449 
1450 #define si(y,x,k,c) s(y,c) = word_in(x + 4 * c) ^ k[c]
1451 #define so(y,x,c)   word_out(y + 4 * c, s(x,c))
1452 
1453 #if BLOCK_SIZE == 16
1454 
1455 #if defined(ARRAYS)
1456 #define locals(y,x)     x[4],y[4]
1457 #else
1458 #define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
1459  /*
1460    the following defines prevent the compiler requiring the declaration
1461    of generated but unused variables in the fwd_var and inv_var macros
1462  */
1463 #define b04 unused
1464 #define b05 unused
1465 #define b06 unused
1466 #define b07 unused
1467 #define b14 unused
1468 #define b15 unused
1469 #define b16 unused
1470 #define b17 unused
1471 #endif
1472 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
1473                         s(y,2) = s(x,2); s(y,3) = s(x,3);
1474 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
1475 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
1476 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
1477 
1478 #elif BLOCK_SIZE == 24
1479 
1480 #if defined(ARRAYS)
1481 #define locals(y,x)     x[6],y[6]
1482 #else
1483 #define locals(y,x)     x##0,x##1,x##2,x##3,x##4,x##5, \
1484                         y##0,y##1,y##2,y##3,y##4,y##5
1485 #define b06 unused
1486 #define b07 unused
1487 #define b16 unused
1488 #define b17 unused
1489 #endif
1490 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
1491                         s(y,2) = s(x,2); s(y,3) = s(x,3); \
1492                         s(y,4) = s(x,4); s(y,5) = s(x,5);
1493 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
1494                         si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
1495 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); \
1496                         so(y,x,3); so(y,x,4); so(y,x,5)
1497 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
1498                         rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
1499 #else
1500 
1501 #if defined(ARRAYS)
1502 #define locals(y,x)     x[8],y[8]
1503 #else
1504 #define locals(y,x)     x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
1505                         y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
1506 #endif
1507 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
1508                         s(y,2) = s(x,2); s(y,3) = s(x,3); \
1509                         s(y,4) = s(x,4); s(y,5) = s(x,5); \
1510                         s(y,6) = s(x,6); s(y,7) = s(x,7);
1511 
1512 #if BLOCK_SIZE == 32
1513 
1514 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
1515                         si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
1516 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
1517                         so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
1518 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
1519                         rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
1520 #else
1521 
1522 #define state_in(y,x,k) \
1523 switch(nc) \
1524 {   case 8: si(y,x,k,7); si(y,x,k,6); \
1525     case 6: si(y,x,k,5); si(y,x,k,4); \
1526     case 4: si(y,x,k,3); si(y,x,k,2); \
1527             si(y,x,k,1); si(y,x,k,0); \
1528 }
1529 
1530 #define state_out(y,x) \
1531 switch(nc) \
1532 {   case 8: so(y,x,7); so(y,x,6); \
1533     case 6: so(y,x,5); so(y,x,4); \
1534     case 4: so(y,x,3); so(y,x,2); \
1535             so(y,x,1); so(y,x,0); \
1536 }
1537 
1538 #if defined(FAST_VARIABLE)
1539 
1540 #define round(rm,y,x,k) \
1541 switch(nc) \
1542 {   case 8: rm(y,x,k,7); rm(y,x,k,6); \
1543             rm(y,x,k,5); rm(y,x,k,4); \
1544             rm(y,x,k,3); rm(y,x,k,2); \
1545             rm(y,x,k,1); rm(y,x,k,0); \
1546             break; \
1547     case 6: rm(y,x,k,5); rm(y,x,k,4); \
1548             rm(y,x,k,3); rm(y,x,k,2); \
1549             rm(y,x,k,1); rm(y,x,k,0); \
1550             break; \
1551     case 4: rm(y,x,k,3); rm(y,x,k,2); \
1552             rm(y,x,k,1); rm(y,x,k,0); \
1553             break; \
1554 }
1555 #else
1556 
1557 #define round(rm,y,x,k) \
1558 switch(nc) \
1559 {   case 8: rm(y,x,k,7); rm(y,x,k,6); \
1560     case 6: rm(y,x,k,5); rm(y,x,k,4); \
1561     case 4: rm(y,x,k,3); rm(y,x,k,2); \
1562             rm(y,x,k,1); rm(y,x,k,0); \
1563 }
1564 
1565 #endif
1566 
1567 #endif
1568 #endif
1569 
1570 #if defined(ENCRYPTION)
1571 
1572 /* I am grateful to Frank Yellin for the following construction
1573    (and that for decryption) which, given the column (c) of the
1574    output state variable, gives the input state variables which
1575    are needed in its computation for each row (r) of the state.
1576 
1577    For the fixed block size options, compilers should be able to
1578    reduce this complex expression (and the equivalent one for
1579    decryption) to a static variable reference at compile time.
1580    But for variable block size code, there will be some limbs on
1581    which conditional clauses will be returned.
1582 */
1583 
1584 /* y = output word, x = input word, r = row, c = column for r = 0,
1585    1, 2 and 3 = column accessed for row r.
1586 */
1587 
1588 #define fwd_var(x,r,c)\
1589  ( r == 0 ?           \
1590     ( c == 0 ? s(x,0) \
1591     : c == 1 ? s(x,1) \
1592     : c == 2 ? s(x,2) \
1593     : c == 3 ? s(x,3) \
1594     : c == 4 ? s(x,4) \
1595     : c == 5 ? s(x,5) \
1596     : c == 6 ? s(x,6) \
1597     :          s(x,7))\
1598  : r == 1 ?           \
1599     ( c == 0 ? s(x,1) \
1600     : c == 1 ? s(x,2) \
1601     : c == 2 ? s(x,3) \
1602     : c == 3 ? nc == 4 ? s(x,0) : s(x,4) \
1603     : c == 4 ? s(x,5) \
1604     : c == 5 ? nc == 8 ? s(x,6) : s(x,0) \
1605     : c == 6 ? s(x,7) \
1606     :          s(x,0))\
1607  : r == 2 ?           \
1608     ( c == 0 ? nc == 8 ? s(x,3) : s(x,2) \
1609     : c == 1 ? nc == 8 ? s(x,4) : s(x,3) \
1610     : c == 2 ? nc == 4 ? s(x,0) : nc == 8 ? s(x,5) : s(x,4) \
1611     : c == 3 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,6) : s(x,5) \
1612     : c == 4 ? nc == 8 ? s(x,7) : s(x,0) \
1613     : c == 5 ? nc == 8 ? s(x,0) : s(x,1) \
1614     : c == 6 ? s(x,1) \
1615     :          s(x,2))\
1616  :                    \
1617     ( c == 0 ? nc == 8 ? s(x,4) : s(x,3) \
1618     : c == 1 ? nc == 4 ? s(x,0) : nc == 8 ? s(x,5) : s(x,4) \
1619     : c == 2 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,6) : s(x,5) \
1620     : c == 3 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,7) : s(x,0) \
1621     : c == 4 ? nc == 8 ? s(x,0) : s(x,1) \
1622     : c == 5 ? nc == 8 ? s(x,1) : s(x,2) \
1623     : c == 6 ? s(x,2) \
1624     :          s(x,3)))
1625 
1626 #if defined(FT4_SET)
1627 #undef  dec_fmvars
1628 #define dec_fmvars
1629 #define fwd_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
1630 #elif defined(FT1_SET)
1631 #undef  dec_fmvars
1632 #define dec_fmvars
1633 #define fwd_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
1634 #else
1635 #define fwd_rnd(y,x,k,c)    s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
1636 #endif
1637 
1638 #if defined(FL4_SET)
1639 #define fwd_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
1640 #elif defined(FL1_SET)
1641 #define fwd_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
1642 #else
1643 #define fwd_lrnd(y,x,k,c)   s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
1644 #endif
1645 
aes_enc_blk(const unsigned char in_blk[],unsigned char out_blk[],const aes_ctx cx[1])1646 static aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
1647 {   aes_32t        locals(b0, b1);
1648     const aes_32t  *kp = cx->k_sch;
1649     dec_fmvars  /* declare variables for fwd_mcol() if needed */
1650 
1651     if(!(cx->n_blk & 1)) return aes_bad;
1652 
1653     state_in(b0, in_blk, kp);
1654 
1655 #if (ENC_UNROLL == FULL)
1656 
1657     kp += (cx->n_rnd - 9) * nc;
1658 
1659     switch(cx->n_rnd)
1660     {
1661     case 14:    round(fwd_rnd,  b1, b0, kp - 4 * nc);
1662                 round(fwd_rnd,  b0, b1, kp - 3 * nc);
1663     case 12:    round(fwd_rnd,  b1, b0, kp - 2 * nc);
1664                 round(fwd_rnd,  b0, b1, kp -     nc);
1665     case 10:    round(fwd_rnd,  b1, b0, kp         );
1666                 round(fwd_rnd,  b0, b1, kp +     nc);
1667                 round(fwd_rnd,  b1, b0, kp + 2 * nc);
1668                 round(fwd_rnd,  b0, b1, kp + 3 * nc);
1669                 round(fwd_rnd,  b1, b0, kp + 4 * nc);
1670                 round(fwd_rnd,  b0, b1, kp + 5 * nc);
1671                 round(fwd_rnd,  b1, b0, kp + 6 * nc);
1672                 round(fwd_rnd,  b0, b1, kp + 7 * nc);
1673                 round(fwd_rnd,  b1, b0, kp + 8 * nc);
1674                 round(fwd_lrnd, b0, b1, kp + 9 * nc);
1675     }
1676 #else
1677 
1678 #if (ENC_UNROLL == PARTIAL)
1679     {   aes_32t    rnd;
1680         for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd)
1681         {
1682             kp += nc;
1683             round(fwd_rnd, b1, b0, kp);
1684             kp += nc;
1685             round(fwd_rnd, b0, b1, kp);
1686         }
1687         kp += nc;
1688         round(fwd_rnd,  b1, b0, kp);
1689 #else
1690     {   aes_32t    rnd, *p0 = b0, *p1 = b1, *pt;
1691         for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
1692         {
1693             kp += nc;
1694             round(fwd_rnd, p1, p0, kp);
1695             pt = p0, p0 = p1, p1 = pt;
1696         }
1697 #endif
1698         kp += nc;
1699         round(fwd_lrnd, b0, b1, kp);
1700     }
1701 #endif
1702 
1703     state_out(out_blk, b0);
1704     return aes_good;
1705 }
1706 
1707 #endif
1708 
1709 #if defined(DECRYPTION)
1710 
1711 #define inv_var(x,r,c) \
1712  ( r == 0 ?           \
1713     ( c == 0 ? s(x,0) \
1714     : c == 1 ? s(x,1) \
1715     : c == 2 ? s(x,2) \
1716     : c == 3 ? s(x,3) \
1717     : c == 4 ? s(x,4) \
1718     : c == 5 ? s(x,5) \
1719     : c == 6 ? s(x,6) \
1720     :          s(x,7))\
1721  : r == 1 ?           \
1722     ( c == 0 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,7) : s(x,5) \
1723     : c == 1 ? s(x,0) \
1724     : c == 2 ? s(x,1) \
1725     : c == 3 ? s(x,2) \
1726     : c == 4 ? s(x,3) \
1727     : c == 5 ? s(x,4) \
1728     : c == 6 ? s(x,5) \
1729     :          s(x,6))\
1730  : r == 2 ?           \
1731     ( c == 0 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,5) : s(x,4) \
1732     : c == 1 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,6) : s(x,5) \
1733     : c == 2 ? nc == 8 ? s(x,7) : s(x,0) \
1734     : c == 3 ? nc == 8 ? s(x,0) : s(x,1) \
1735     : c == 4 ? nc == 8 ? s(x,1) : s(x,2) \
1736     : c == 5 ? nc == 8 ? s(x,2) : s(x,3) \
1737     : c == 6 ? s(x,3) \
1738     :          s(x,4))\
1739  :                    \
1740     ( c == 0 ? nc == 4 ? s(x,1) : nc == 8 ? s(x,4) : s(x,3) \
1741     : c == 1 ? nc == 4 ? s(x,2) : nc == 8 ? s(x,5) : s(x,4) \
1742     : c == 2 ? nc == 4 ? s(x,3) : nc == 8 ? s(x,6) : s(x,5) \
1743     : c == 3 ? nc == 8 ? s(x,7) : s(x,0) \
1744     : c == 4 ? nc == 8 ? s(x,0) : s(x,1) \
1745     : c == 5 ? nc == 8 ? s(x,1) : s(x,2) \
1746     : c == 6 ? s(x,2) \
1747     :          s(x,3)))
1748 
1749 #if defined(IT4_SET)
1750 #undef  dec_imvars
1751 #define dec_imvars
1752 #define inv_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
1753 #elif defined(IT1_SET)
1754 #undef  dec_imvars
1755 #define dec_imvars
1756 #define inv_rnd(y,x,k,c)    s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
1757 #else
1758 #define inv_rnd(y,x,k,c)    s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
1759 #endif
1760 
1761 #if defined(IL4_SET)
1762 #define inv_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
1763 #elif defined(IL1_SET)
1764 #define inv_lrnd(y,x,k,c)   s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
1765 #else
1766 #define inv_lrnd(y,x,k,c)   s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
1767 #endif
1768 
1769 static aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])
1770 {   aes_32t        locals(b0, b1);
1771     const aes_32t  *kp = cx->k_sch + nc * cx->n_rnd;
1772     dec_imvars  /* declare variables for inv_mcol() if needed */
1773 
1774     if(!(cx->n_blk & 2)) return aes_bad;
1775 
1776     state_in(b0, in_blk, kp);
1777 
1778 #if (DEC_UNROLL == FULL)
1779 
1780     kp = cx->k_sch + 9 * nc;
1781     switch(cx->n_rnd)
1782     {
1783     case 14:    round(inv_rnd,  b1, b0, kp + 4 * nc);
1784                 round(inv_rnd,  b0, b1, kp + 3 * nc);
1785     case 12:    round(inv_rnd,  b1, b0, kp + 2 * nc);
1786                 round(inv_rnd,  b0, b1, kp + nc    );
1787     case 10:    round(inv_rnd,  b1, b0, kp         );
1788                 round(inv_rnd,  b0, b1, kp -     nc);
1789                 round(inv_rnd,  b1, b0, kp - 2 * nc);
1790                 round(inv_rnd,  b0, b1, kp - 3 * nc);
1791                 round(inv_rnd,  b1, b0, kp - 4 * nc);
1792                 round(inv_rnd,  b0, b1, kp - 5 * nc);
1793                 round(inv_rnd,  b1, b0, kp - 6 * nc);
1794                 round(inv_rnd,  b0, b1, kp - 7 * nc);
1795                 round(inv_rnd,  b1, b0, kp - 8 * nc);
1796                 round(inv_lrnd, b0, b1, kp - 9 * nc);
1797     }
1798 #else
1799 
1800 #if (DEC_UNROLL == PARTIAL)
1801     {   aes_32t    rnd;
1802         for(rnd = 0; rnd < (cx->n_rnd >> 1) - 1; ++rnd)
1803         {
1804             kp -= nc;
1805             round(inv_rnd, b1, b0, kp);
1806             kp -= nc;
1807             round(inv_rnd, b0, b1, kp);
1808         }
1809         kp -= nc;
1810         round(inv_rnd, b1, b0, kp);
1811 #else
1812     {   aes_32t    rnd, *p0 = b0, *p1 = b1, *pt;
1813         for(rnd = 0; rnd < cx->n_rnd - 1; ++rnd)
1814         {
1815             kp -= nc;
1816             round(inv_rnd, p1, p0, kp);
1817             pt = p0, p0 = p1, p1 = pt;
1818         }
1819 #endif
1820         kp -= nc;
1821         round(inv_lrnd, b0, b1, kp);
1822     }
1823 #endif
1824 
1825     state_out(out_blk, b0);
1826     return aes_good;
1827 }
1828 
1829 #endif
1830 
1831 /*----------------------------------------------------------------------
1832 |   AP4_AesCbcBlockCipher
1833 +---------------------------------------------------------------------*/
1834 class AP4_AesCbcBlockCipher : public AP4_AesBlockCipher
1835 {
1836 public:
1837     AP4_AesCbcBlockCipher(CipherDirection direction,
1838                           aes_ctx*        context) :
1839         AP4_AesBlockCipher(direction, CBC, context) {}
1840 
1841     // AP4_BlockCipher methods
1842     virtual AP4_Result Process(const AP4_UI08* input,
1843                                AP4_Size        input_size,
1844                                AP4_UI08*       output,
1845                                const AP4_UI08* iv);
1846 };
1847 
1848 /*----------------------------------------------------------------------
1849 |   AP4_AesCbcBlockCipher::Process
1850 +---------------------------------------------------------------------*/
1851 AP4_Result
1852 AP4_AesCbcBlockCipher::Process(const AP4_UI08* input,
1853                                AP4_Size        input_size,
1854                                AP4_UI08*       output,
1855                                const AP4_UI08* iv)
1856 {
1857     // check the parameters
1858     if (input_size%AP4_AES_BLOCK_SIZE) {
1859         return AP4_ERROR_INVALID_PARAMETERS;
1860     }
1861 
1862     // setup the chaining block from the IV
1863     AP4_UI08 chaining_block[AP4_AES_BLOCK_SIZE];
1864     if (iv) {
1865         AP4_CopyMemory(chaining_block, iv, AP4_AES_BLOCK_SIZE);
1866     } else {
1867         AP4_SetMemory(chaining_block, 0, AP4_AES_BLOCK_SIZE);
1868     }
1869 
1870     // process all blocks
1871     unsigned int block_count = input_size/AP4_AES_BLOCK_SIZE;
1872     if (m_Direction == ENCRYPT) {
1873         for (unsigned int i=0; i<block_count; i++) {
1874             AP4_UI08 block[AP4_AES_BLOCK_SIZE];
1875             for (unsigned int j=0; j<AP4_AES_BLOCK_SIZE; j++) {
1876                 block[j] = input[j] ^ chaining_block[j];
1877             }
1878             aes_enc_blk(block, output, m_Context);
1879             AP4_CopyMemory(chaining_block, output, AP4_AES_BLOCK_SIZE);
1880             input  += AP4_AES_BLOCK_SIZE;
1881             output += AP4_AES_BLOCK_SIZE;
1882         }
1883     } else {
1884         for (unsigned int i=0; i<block_count; i++) {
1885             aes_dec_blk(input, output, m_Context);
1886             for (unsigned int j=0; j<AP4_AES_BLOCK_SIZE; j++) {
1887                 output[j] ^= chaining_block[j];
1888             }
1889             AP4_CopyMemory(chaining_block, input, AP4_AES_BLOCK_SIZE);
1890             input  += AP4_AES_BLOCK_SIZE;
1891             output += AP4_AES_BLOCK_SIZE;
1892         }
1893     }
1894 
1895     return AP4_SUCCESS;
1896 }
1897 
1898 /*----------------------------------------------------------------------
1899 |   AP4_AesCtrBlockCipher
1900 +---------------------------------------------------------------------*/
1901 class AP4_AesCtrBlockCipher : public AP4_AesBlockCipher
1902 {
1903 public:
1904     AP4_AesCtrBlockCipher(CipherDirection direction,
1905                           unsigned int    /*counter_size*/,
1906                           aes_ctx*        context) :
1907         AP4_AesBlockCipher(direction, CTR, context)
1908         /*m_CounterSize(counter_size)*/ {}
1909 
1910     // AP4_BlockCipher methods
1911     virtual AP4_Result Process(const AP4_UI08* input,
1912                                AP4_Size        input_size,
1913                                AP4_UI08*       output,
1914                                const AP4_UI08* iv);
1915 
1916 //private:
1917 //    unsigned int m_CounterSize;
1918 };
1919 
1920 /*----------------------------------------------------------------------
1921 |   AP4_AesCtrBlockCipher::Process
1922 +---------------------------------------------------------------------*/
1923 AP4_Result
1924 AP4_AesCtrBlockCipher::Process(const AP4_UI08* input,
1925                                AP4_Size        input_size,
1926                                AP4_UI08*       output,
1927                                const AP4_UI08* iv)
1928 {
1929     // copy the iv into the counter
1930     AP4_UI08 counter[AP4_AES_BLOCK_SIZE];
1931     if (iv) {
1932         AP4_CopyMemory(counter, iv, AP4_AES_BLOCK_SIZE);
1933     } else {
1934         AP4_SetMemory(counter, 0, AP4_AES_BLOCK_SIZE);
1935     }
1936 
1937     // process all blocks
1938     while (input_size) {
1939         AP4_UI08 block[AP4_AES_BLOCK_SIZE];
1940         aes_enc_blk(counter, block, m_Context);
1941         unsigned int chunk = input_size>=AP4_AES_BLOCK_SIZE?AP4_AES_BLOCK_SIZE:input_size;
1942         for (unsigned int j=0; j<chunk; j++) {
1943             output[j] = input[j]^block[j];
1944         }
1945         input_size -= chunk;
1946         if (input_size) {
1947             // increment the counter
1948             for (int x=AP4_AES_BLOCK_SIZE-1; x; --x) {
1949                 if (counter[x] == 255) {
1950                     counter[x] = 0;
1951                     continue;
1952                 } else {
1953                     ++counter[x];
1954                     break;
1955                 }
1956             }
1957 
1958             // move to the next block
1959             input  += AP4_AES_BLOCK_SIZE;
1960             output += AP4_AES_BLOCK_SIZE;
1961         }
1962     }
1963     return AP4_SUCCESS;
1964 }
1965 
1966 /*----------------------------------------------------------------------
1967 |   AP4_AesBlockCipher::Create
1968 +---------------------------------------------------------------------*/
1969 AP4_Result
1970 AP4_AesBlockCipher::Create(const AP4_UI08*      key,
1971                            CipherDirection      direction,
1972                            CipherMode           mode,
1973                            const void*          mode_params,
1974                            AP4_AesBlockCipher*& cipher)
1975 {
1976     cipher = NULL;
1977 
1978     aes_ctx* context = new aes_ctx();
1979 
1980     switch (mode) {
1981         case AP4_BlockCipher::CBC:
1982             if (direction == AP4_BlockCipher::ENCRYPT) {
1983                 aes_enc_key(key, AP4_AES_KEY_LENGTH, context);
1984             } else {
1985                 aes_dec_key(key, AP4_AES_KEY_LENGTH, context);
1986             }
1987             cipher = new AP4_AesCbcBlockCipher(direction, context);
1988             break;
1989 
1990         case AP4_BlockCipher::CTR: {
1991             aes_enc_key(key, AP4_AES_KEY_LENGTH, context);
1992             const AP4_BlockCipher::CtrParams* ctr_params = (const AP4_BlockCipher::CtrParams*)mode_params;
1993             unsigned int counter_size = 16;
1994             if (ctr_params) {
1995                 counter_size = ctr_params->counter_size;
1996             }
1997             cipher = new AP4_AesCtrBlockCipher(direction, counter_size, context);
1998             break;
1999         }
2000 
2001         default:
2002             return AP4_ERROR_INVALID_PARAMETERS;
2003     }
2004 
2005     return AP4_SUCCESS;
2006 }
2007 
2008 /*----------------------------------------------------------------------
2009 |   AP4_AesBlockCipher::~AP4_AesBlockCipher
2010 +---------------------------------------------------------------------*/
2011 AP4_AesBlockCipher::~AP4_AesBlockCipher()
2012 {
2013     delete m_Context;
2014 }
2015