1 /* 2 --------------------------------------------------------------------------- 3 Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. 4 5 The redistribution and use of this software (with or without changes) 6 is allowed without the payment of fees or royalties provided that: 7 8 source code distributions include the above copyright notice, this 9 list of conditions and the following disclaimer; 10 11 binary distributions include the above copyright notice, this list 12 of conditions and the following disclaimer in their documentation. 13 14 This software is provided 'as is' with no explicit or implied warranties 15 in respect of its operation, including, but not limited to, correctness 16 and fitness for purpose. 17 --------------------------------------------------------------------------- 18 Issue Date: 20/12/2007 19 20 This file contains the compilation options for AES (Rijndael) and code 21 that is common across encryption, key scheduling and table generation. 22 23 OPERATION 24 25 These source code files implement the AES algorithm Rijndael designed by 26 Joan Daemen and Vincent Rijmen. This version is designed for the standard 27 block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 28 and 32 bytes). 29 30 This version is designed for flexibility and speed using operations on 31 32-bit words rather than operations on bytes. It can be compiled with 32 either big or little endian internal byte order but is faster when the 33 native byte order for the processor is used. 34 35 THE CIPHER INTERFACE 36 37 The cipher interface is implemented as an array of bytes in which lower 38 AES bit sequence indexes map to higher numeric significance within bytes. 39 40 uint8_t (an unsigned 8-bit type) 41 uint32_t (an unsigned 32-bit type) 42 struct aes_encrypt_ctx (structure for the cipher encryption context) 43 struct aes_decrypt_ctx (structure for the cipher decryption context) 44 AES_RETURN the function return type 45 46 C subroutine calls: 47 48 AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); 49 AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); 50 AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); 51 AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, 52 const aes_encrypt_ctx cx[1]); 53 54 AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); 55 AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); 56 AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); 57 AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, 58 const aes_decrypt_ctx cx[1]); 59 60 IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that 61 you call aes_init() before AES is used so that the tables are initialised. 62 63 C++ aes class subroutines: 64 65 Class AESencrypt for encryption 66 67 Construtors: 68 AESencrypt(void) 69 AESencrypt(const unsigned char *key) - 128 bit key 70 Members: 71 AES_RETURN key128(const unsigned char *key) 72 AES_RETURN key192(const unsigned char *key) 73 AES_RETURN key256(const unsigned char *key) 74 AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const 75 76 Class AESdecrypt for encryption 77 Construtors: 78 AESdecrypt(void) 79 AESdecrypt(const unsigned char *key) - 128 bit key 80 Members: 81 AES_RETURN key128(const unsigned char *key) 82 AES_RETURN key192(const unsigned char *key) 83 AES_RETURN key256(const unsigned char *key) 84 AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const 85 */ 86 87 #if !defined( _AESOPT_H ) 88 #define _AESOPT_H 89 90 #if defined( __cplusplus ) 91 #include "aescpp.h" 92 #else 93 #include "aes.h" 94 #endif 95 96 /* PLATFORM SPECIFIC INCLUDES */ 97 98 #include "brg_endian.h" 99 100 /* CONFIGURATION - THE USE OF DEFINES 101 102 Later in this section there are a number of defines that control the 103 operation of the code. In each section, the purpose of each define is 104 explained so that the relevant form can be included or excluded by 105 setting either 1's or 0's respectively on the branches of the related 106 #if clauses. The following local defines should not be changed. 107 */ 108 109 #define ENCRYPTION_IN_C 1 110 #define DECRYPTION_IN_C 2 111 #define ENC_KEYING_IN_C 4 112 #define DEC_KEYING_IN_C 8 113 114 #define NO_TABLES 0 115 #define ONE_TABLE 1 116 #define FOUR_TABLES 4 117 #define NONE 0 118 #define PARTIAL 1 119 #define FULL 2 120 121 /* --- START OF USER CONFIGURED OPTIONS --- */ 122 123 /* 1. BYTE ORDER WITHIN 32 BIT WORDS 124 125 The fundamental data processing units in Rijndael are 8-bit bytes. The 126 input, output and key input are all enumerated arrays of bytes in which 127 bytes are numbered starting at zero and increasing to one less than the 128 number of bytes in the array in question. This enumeration is only used 129 for naming bytes and does not imply any adjacency or order relationship 130 from one byte to another. When these inputs and outputs are considered 131 as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to 132 byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. 133 In this implementation bits are numbered from 0 to 7 starting at the 134 numerically least significant end of each byte (bit n represents 2^n). 135 136 However, Rijndael can be implemented more efficiently using 32-bit 137 words by packing bytes into words so that bytes 4*n to 4*n+3 are placed 138 into word[n]. While in principle these bytes can be assembled into words 139 in any positions, this implementation only supports the two formats in 140 which bytes in adjacent positions within words also have adjacent byte 141 numbers. This order is called big-endian if the lowest numbered bytes 142 in words have the highest numeric significance and little-endian if the 143 opposite applies. 144 145 This code can work in either order irrespective of the order used by the 146 machine on which it runs. Normally the internal byte order will be set 147 to the order of the processor on which the code is to be run but this 148 define can be used to reverse this in special situations 149 150 WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set. 151 This define will hence be redefined later (in section 4) if necessary 152 */ 153 154 #if 1 155 # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER 156 #elif 0 157 # define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN 158 #elif 0 159 # define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN 160 #else 161 # error The algorithm byte order is not defined 162 #endif 163 164 /* 2. Intel AES AND VIA ACE SUPPORT */ 165 166 #if defined( __GNUC__ ) && defined( __i386__ ) \ 167 || defined(_WIN32) && defined(_M_IX86) \ 168 && !(defined(_WIN64) || defined(_WIN32_WCE) || defined(_MSC_VER) && (_MSC_VER <= 800)) 169 # define VIA_ACE_POSSIBLE 170 #endif 171 172 /* Define this option if support for the Intel AESNI is required (not 173 currently available with GCC). If AESNI is known to be present, then 174 defining ASSUME_INTEL_AES_VIA_PRESENT will replace the ordinary 175 encryption/decryption. If USE_INTEL_AES_IF_PRESENT is defined then 176 AESNI will be used if it is detected (both present and enabled). 177 178 AESNI uses a decryption key schedule with the first decryption 179 round key at the high end of the key scedule with the following 180 round keys at lower positions in memory. So AES_REV_DKS must NOT 181 be defined when AESNI will be used. ALthough it is unlikely that 182 assembler code will be used with an AESNI build, if it is then 183 AES_REV_DKS must NOT be defined when such assembler files are 184 built 185 */ 186 #if 0 && defined( _WIN64 ) && defined( _MSC_VER ) 187 # define INTEL_AES_POSSIBLE 188 #endif 189 190 #if defined( INTEL_AES_POSSIBLE ) && !defined( USE_INTEL_AES_IF_PRESENT ) 191 # define USE_INTEL_AES_IF_PRESENT 192 #endif 193 194 /* Define this option if support for the VIA ACE is required. This uses 195 inline assembler instructions and is only implemented for the Microsoft, 196 Intel and GCC compilers. If VIA ACE is known to be present, then defining 197 ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption 198 code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if 199 it is detected (both present and enabled) but the normal AES code will 200 also be present. 201 202 When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte 203 aligned; other input/output buffers do not need to be 16 byte aligned 204 but there are very large performance gains if this can be arranged. 205 VIA ACE also requires the decryption key schedule to be in reverse 206 order (which later checks below ensure). 207 208 AES_REV_DKS must be set for assembler code used with a VIA ACE build 209 */ 210 211 #if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT ) 212 # define USE_VIA_ACE_IF_PRESENT 213 #endif 214 215 #if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT ) 216 # define ASSUME_VIA_ACE_PRESENT 217 # endif 218 219 /* 3. ASSEMBLER SUPPORT 220 221 This define (which can be on the command line) enables the use of the 222 assembler code routines for encryption, decryption and key scheduling 223 as follows: 224 225 ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for 226 encryption and decryption and but with key scheduling in C 227 ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for 228 encryption, decryption and key scheduling 229 ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for 230 encryption and decryption and but with key scheduling in C 231 ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for 232 encryption and decryption and but with key scheduling in C 233 234 Change one 'if 0' below to 'if 1' to select the version or define 235 as a compilation option. 236 */ 237 238 #if 0 && !defined( ASM_X86_V1C ) 239 # define ASM_X86_V1C 240 #elif 0 && !defined( ASM_X86_V2 ) 241 # define ASM_X86_V2 242 #elif 0 && !defined( ASM_X86_V2C ) 243 # define ASM_X86_V2C 244 #elif 0 && !defined( ASM_AMD64_C ) 245 # define ASM_AMD64_C 246 #endif 247 248 #if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \ 249 && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 ) 250 # error Assembler code is only available for x86 and AMD64 systems 251 #endif 252 253 /* 4. FAST INPUT/OUTPUT OPERATIONS. 254 255 On some machines it is possible to improve speed by transferring the 256 bytes in the input and output arrays to and from the internal 32-bit 257 variables by addressing these arrays as if they are arrays of 32-bit 258 words. On some machines this will always be possible but there may 259 be a large performance penalty if the byte arrays are not aligned on 260 the normal word boundaries. On other machines this technique will 261 lead to memory access errors when such 32-bit word accesses are not 262 properly aligned. The option SAFE_IO avoids such problems but will 263 often be slower on those machines that support misaligned access 264 (especially so if care is taken to align the input and output byte 265 arrays on 32-bit word boundaries). If SAFE_IO is not defined it is 266 assumed that access to byte arrays as if they are arrays of 32-bit 267 words will not cause problems when such accesses are misaligned. 268 */ 269 #if 1 && !defined( _MSC_VER ) 270 # define SAFE_IO 271 #endif 272 273 /* 5. LOOP UNROLLING 274 275 The code for encryption and decrytpion cycles through a number of rounds 276 that can be implemented either in a loop or by expanding the code into a 277 long sequence of instructions, the latter producing a larger program but 278 one that will often be much faster. The latter is called loop unrolling. 279 There are also potential speed advantages in expanding two iterations in 280 a loop with half the number of iterations, which is called partial loop 281 unrolling. The following options allow partial or full loop unrolling 282 to be set independently for encryption and decryption 283 */ 284 #if 1 285 # define ENC_UNROLL FULL 286 #elif 0 287 # define ENC_UNROLL PARTIAL 288 #else 289 # define ENC_UNROLL NONE 290 #endif 291 292 #if 1 293 # define DEC_UNROLL FULL 294 #elif 0 295 # define DEC_UNROLL PARTIAL 296 #else 297 # define DEC_UNROLL NONE 298 #endif 299 300 #if 1 301 # define ENC_KS_UNROLL 302 #endif 303 304 #if 1 305 # define DEC_KS_UNROLL 306 #endif 307 308 /* 6. FAST FINITE FIELD OPERATIONS 309 310 If this section is included, tables are used to provide faster finite 311 field arithmetic (this has no effect if FIXED_TABLES is defined). 312 */ 313 #if 1 314 # define FF_TABLES 315 #endif 316 317 /* 7. INTERNAL STATE VARIABLE FORMAT 318 319 The internal state of Rijndael is stored in a number of local 32-bit 320 word varaibles which can be defined either as an array or as individual 321 names variables. Include this section if you want to store these local 322 varaibles in arrays. Otherwise individual local variables will be used. 323 */ 324 #if 1 325 # define ARRAYS 326 #endif 327 328 /* 8. FIXED OR DYNAMIC TABLES 329 330 When this section is included the tables used by the code are compiled 331 statically into the binary file. Otherwise the subroutine aes_init() 332 must be called to compute them before the code is first used. 333 */ 334 #if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) 335 # define FIXED_TABLES 336 #endif 337 338 /* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES 339 340 In some systems it is better to mask longer values to extract bytes 341 rather than using a cast. This option allows this choice. 342 */ 343 #if 0 344 # define to_byte(x) ((uint8_t)(x)) 345 #else 346 # define to_byte(x) ((x) & 0xff) 347 #endif 348 349 /* 10. TABLE ALIGNMENT 350 351 On some sytsems speed will be improved by aligning the AES large lookup 352 tables on particular boundaries. This define should be set to a power of 353 two giving the desired alignment. It can be left undefined if alignment 354 is not needed. This option is specific to the Microsft VC++ compiler - 355 it seems to sometimes cause trouble for the VC++ version 6 compiler. 356 */ 357 358 #if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) 359 # define TABLE_ALIGN 32 360 #endif 361 362 /* 11. REDUCE CODE AND TABLE SIZE 363 364 This replaces some expanded macros with function calls if AES_ASM_V2 or 365 AES_ASM_V2C are defined 366 */ 367 368 #if 0 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) 369 # define REDUCE_CODE_SIZE 370 #endif 371 372 /* 12. TABLE OPTIONS 373 374 This cipher proceeds by repeating in a number of cycles known as 'rounds' 375 which are implemented by a round function which can optionally be speeded 376 up using tables. The basic tables are each 256 32-bit words, with either 377 one or four tables being required for each round function depending on 378 how much speed is required. The encryption and decryption round functions 379 are different and the last encryption and decrytpion round functions are 380 different again making four different round functions in all. 381 382 This means that: 383 1. Normal encryption and decryption rounds can each use either 0, 1 384 or 4 tables and table spaces of 0, 1024 or 4096 bytes each. 385 2. The last encryption and decryption rounds can also use either 0, 1 386 or 4 tables and table spaces of 0, 1024 or 4096 bytes each. 387 388 Include or exclude the appropriate definitions below to set the number 389 of tables used by this implementation. 390 */ 391 392 #if 1 /* set tables for the normal encryption round */ 393 # define ENC_ROUND FOUR_TABLES 394 #elif 0 395 # define ENC_ROUND ONE_TABLE 396 #else 397 # define ENC_ROUND NO_TABLES 398 #endif 399 400 #if 1 /* set tables for the last encryption round */ 401 # define LAST_ENC_ROUND FOUR_TABLES 402 #elif 0 403 # define LAST_ENC_ROUND ONE_TABLE 404 #else 405 # define LAST_ENC_ROUND NO_TABLES 406 #endif 407 408 #if 1 /* set tables for the normal decryption round */ 409 # define DEC_ROUND FOUR_TABLES 410 #elif 0 411 # define DEC_ROUND ONE_TABLE 412 #else 413 # define DEC_ROUND NO_TABLES 414 #endif 415 416 #if 1 /* set tables for the last decryption round */ 417 # define LAST_DEC_ROUND FOUR_TABLES 418 #elif 0 419 # define LAST_DEC_ROUND ONE_TABLE 420 #else 421 # define LAST_DEC_ROUND NO_TABLES 422 #endif 423 424 /* The decryption key schedule can be speeded up with tables in the same 425 way that the round functions can. Include or exclude the following 426 defines to set this requirement. 427 */ 428 #if 1 429 # define KEY_SCHED FOUR_TABLES 430 #elif 0 431 # define KEY_SCHED ONE_TABLE 432 #else 433 # define KEY_SCHED NO_TABLES 434 #endif 435 436 /* ---- END OF USER CONFIGURED OPTIONS ---- */ 437 438 /* VIA ACE support is only available for VC++ and GCC */ 439 440 #if !defined( _MSC_VER ) && !defined( __GNUC__ ) 441 # if defined( ASSUME_VIA_ACE_PRESENT ) 442 # undef ASSUME_VIA_ACE_PRESENT 443 # endif 444 # if defined( USE_VIA_ACE_IF_PRESENT ) 445 # undef USE_VIA_ACE_IF_PRESENT 446 # endif 447 #endif 448 449 #if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT ) 450 # define USE_VIA_ACE_IF_PRESENT 451 #endif 452 453 /* define to reverse decryption key schedule */ 454 #if 1 || defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS ) 455 # define AES_REV_DKS 456 #endif 457 458 /* Intel AESNI uses a decryption key schedule in the encryption order */ 459 #if defined( USE_INTEL_AES_IF_PRESENT ) && defined ( AES_REV_DKS ) 460 # undef AES_REV_DKS 461 #endif 462 463 /* Assembler support requires the use of platform byte order */ 464 465 #if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \ 466 && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER) 467 # undef ALGORITHM_BYTE_ORDER 468 # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER 469 #endif 470 471 /* In this implementation the columns of the state array are each held in 472 32-bit words. The state array can be held in various ways: in an array 473 of words, in a number of individual word variables or in a number of 474 processor registers. The following define maps a variable name x and 475 a column number c to the way the state array variable is to be held. 476 The first define below maps the state into an array x[c] whereas the 477 second form maps the state into a number of individual variables x0, 478 x1, etc. Another form could map individual state colums to machine 479 register names. 480 */ 481 482 #if defined( ARRAYS ) 483 # define s(x,c) x[c] 484 #else 485 # define s(x,c) x##c 486 #endif 487 488 /* This implementation provides subroutines for encryption, decryption 489 and for setting the three key lengths (separately) for encryption 490 and decryption. Since not all functions are needed, masks are set 491 up here to determine which will be implemented in C 492 */ 493 494 #if !defined( AES_ENCRYPT ) 495 # define EFUNCS_IN_C 0 496 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ 497 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) 498 # define EFUNCS_IN_C ENC_KEYING_IN_C 499 #elif !defined( ASM_X86_V2 ) 500 # define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C ) 501 #else 502 # define EFUNCS_IN_C 0 503 #endif 504 505 #if !defined( AES_DECRYPT ) 506 # define DFUNCS_IN_C 0 507 #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ 508 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) 509 # define DFUNCS_IN_C DEC_KEYING_IN_C 510 #elif !defined( ASM_X86_V2 ) 511 # define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C ) 512 #else 513 # define DFUNCS_IN_C 0 514 #endif 515 516 #define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C ) 517 518 /* END OF CONFIGURATION OPTIONS */ 519 520 #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) 521 522 /* Disable or report errors on some combinations of options */ 523 524 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES 525 # undef LAST_ENC_ROUND 526 # define LAST_ENC_ROUND NO_TABLES 527 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES 528 # undef LAST_ENC_ROUND 529 # define LAST_ENC_ROUND ONE_TABLE 530 #endif 531 532 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE 533 # undef ENC_UNROLL 534 # define ENC_UNROLL NONE 535 #endif 536 537 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES 538 # undef LAST_DEC_ROUND 539 # define LAST_DEC_ROUND NO_TABLES 540 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES 541 # undef LAST_DEC_ROUND 542 # define LAST_DEC_ROUND ONE_TABLE 543 #endif 544 545 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE 546 # undef DEC_UNROLL 547 # define DEC_UNROLL NONE 548 #endif 549 550 #if defined( bswap32 ) 551 # define aes_sw32 bswap32 552 #elif defined( bswap_32 ) 553 # define aes_sw32 bswap_32 554 #else 555 # define brot(x,n) (((uint32_t)(x) << n) | ((uint32_t)(x) >> (32 - n))) 556 # define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) 557 #endif 558 559 /* upr(x,n): rotates bytes within words by n positions, moving bytes to 560 higher index positions with wrap around into low positions 561 ups(x,n): moves bytes by n positions to higher index positions in 562 words but without wrap around 563 bval(x,n): extracts a byte from a word 564 565 WARNING: The definitions given here are intended only for use with 566 unsigned variables and with shift counts that are compile 567 time constants 568 */ 569 570 #if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN ) 571 # define upr(x,n) (((uint32_t)(x) << (8 * (n))) | ((uint32_t)(x) >> (32 - 8 * (n)))) 572 # define ups(x,n) ((uint32_t) (x) << (8 * (n))) 573 # define bval(x,n) to_byte((x) >> (8 * (n))) 574 # define bytes2word(b0, b1, b2, b3) \ 575 (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0)) 576 #endif 577 578 #if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN ) 579 # define upr(x,n) (((uint32_t)(x) >> (8 * (n))) | ((uint32_t)(x) << (32 - 8 * (n)))) 580 # define ups(x,n) ((uint32_t) (x) >> (8 * (n))) 581 # define bval(x,n) to_byte((x) >> (24 - 8 * (n))) 582 # define bytes2word(b0, b1, b2, b3) \ 583 (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | ((uint32_t)(b2) << 8) | (b3)) 584 #endif 585 586 #if defined( SAFE_IO ) 587 # define word_in(x,c) bytes2word(((const uint8_t*)(x)+4*c)[0], ((const uint8_t*)(x)+4*c)[1], \ 588 ((const uint8_t*)(x)+4*c)[2], ((const uint8_t*)(x)+4*c)[3]) 589 # define word_out(x,c,v) { ((uint8_t*)(x)+4*c)[0] = bval(v,0); ((uint8_t*)(x)+4*c)[1] = bval(v,1); \ 590 ((uint8_t*)(x)+4*c)[2] = bval(v,2); ((uint8_t*)(x)+4*c)[3] = bval(v,3); } 591 #elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER ) 592 # define word_in(x,c) (*((uint32_t*)(x)+(c))) 593 # define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v)) 594 #else 595 # define word_in(x,c) aes_sw32(*((uint32_t*)(x)+(c))) 596 # define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = aes_sw32(v)) 597 #endif 598 599 /* the finite field modular polynomial and elements */ 600 601 #define WPOLY 0x011b 602 #define BPOLY 0x1b 603 604 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ 605 606 #define gf_c1 0x80808080 607 #define gf_c2 0x7f7f7f7f 608 #define gf_mulx(x) ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY)) 609 610 /* The following defines provide alternative definitions of gf_mulx that might 611 give improved performance if a fast 32-bit multiply is not available. Note 612 that a temporary variable u needs to be defined where gf_mulx is used. 613 614 #define gf_mulx(x) (u = (x) & gf_c1, u |= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) | (u >> 6)) 615 #define gf_c4 (0x01010101 * BPOLY) 616 #define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4) 617 */ 618 619 /* Work out which tables are needed for the different options */ 620 621 #if defined( ASM_X86_V1C ) 622 # if defined( ENC_ROUND ) 623 # undef ENC_ROUND 624 # endif 625 # define ENC_ROUND FOUR_TABLES 626 # if defined( LAST_ENC_ROUND ) 627 # undef LAST_ENC_ROUND 628 # endif 629 # define LAST_ENC_ROUND FOUR_TABLES 630 # if defined( DEC_ROUND ) 631 # undef DEC_ROUND 632 # endif 633 # define DEC_ROUND FOUR_TABLES 634 # if defined( LAST_DEC_ROUND ) 635 # undef LAST_DEC_ROUND 636 # endif 637 # define LAST_DEC_ROUND FOUR_TABLES 638 # if defined( KEY_SCHED ) 639 # undef KEY_SCHED 640 # define KEY_SCHED FOUR_TABLES 641 # endif 642 #endif 643 644 #if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C ) 645 # if ENC_ROUND == ONE_TABLE 646 # define FT1_SET 647 # elif ENC_ROUND == FOUR_TABLES 648 # define FT4_SET 649 # else 650 # define SBX_SET 651 # endif 652 # if LAST_ENC_ROUND == ONE_TABLE 653 # define FL1_SET 654 # elif LAST_ENC_ROUND == FOUR_TABLES 655 # define FL4_SET 656 # elif !defined( SBX_SET ) 657 # define SBX_SET 658 # endif 659 #endif 660 661 #if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C ) 662 # if DEC_ROUND == ONE_TABLE 663 # define IT1_SET 664 # elif DEC_ROUND == FOUR_TABLES 665 # define IT4_SET 666 # else 667 # define ISB_SET 668 # endif 669 # if LAST_DEC_ROUND == ONE_TABLE 670 # define IL1_SET 671 # elif LAST_DEC_ROUND == FOUR_TABLES 672 # define IL4_SET 673 # elif !defined(ISB_SET) 674 # define ISB_SET 675 # endif 676 #endif 677 678 #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) 679 # if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C)) 680 # if KEY_SCHED == ONE_TABLE 681 # if !defined( FL1_SET ) && !defined( FL4_SET ) 682 # define LS1_SET 683 # endif 684 # elif KEY_SCHED == FOUR_TABLES 685 # if !defined( FL4_SET ) 686 # define LS4_SET 687 # endif 688 # elif !defined( SBX_SET ) 689 # define SBX_SET 690 # endif 691 # endif 692 # if (FUNCS_IN_C & DEC_KEYING_IN_C) 693 # if KEY_SCHED == ONE_TABLE 694 # define IM1_SET 695 # elif KEY_SCHED == FOUR_TABLES 696 # define IM4_SET 697 # elif !defined( SBX_SET ) 698 # define SBX_SET 699 # endif 700 # endif 701 #endif 702 703 /* generic definitions of Rijndael macros that use tables */ 704 705 #define no_table(x,box,vf,rf,c) bytes2word( \ 706 box[bval(vf(x,0,c),rf(0,c))], \ 707 box[bval(vf(x,1,c),rf(1,c))], \ 708 box[bval(vf(x,2,c),rf(2,c))], \ 709 box[bval(vf(x,3,c),rf(3,c))]) 710 711 #define one_table(x,op,tab,vf,rf,c) \ 712 ( tab[bval(vf(x,0,c),rf(0,c))] \ 713 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ 714 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ 715 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) 716 717 #define four_tables(x,tab,vf,rf,c) \ 718 ( tab[0][bval(vf(x,0,c),rf(0,c))] \ 719 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ 720 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ 721 ^ tab[3][bval(vf(x,3,c),rf(3,c))]) 722 723 #define vf1(x,r,c) (x) 724 #define rf1(r,c) (r) 725 #define rf2(r,c) ((8+r-c)&3) 726 727 /* perform forward and inverse column mix operation on four bytes in long word x in */ 728 /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ 729 730 #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) 731 732 #if defined( FM4_SET ) /* not currently used */ 733 # define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) 734 #elif defined( FM1_SET ) /* not currently used */ 735 # define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) 736 #else 737 # define dec_fmvars uint32_t g2 738 # define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) 739 #endif 740 741 #if defined( IM4_SET ) 742 # define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) 743 #elif defined( IM1_SET ) 744 # define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) 745 #else 746 # define dec_imvars uint32_t g2, g4, g9 747 # define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ 748 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) 749 #endif 750 751 #if defined( FL4_SET ) 752 # define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) 753 #elif defined( LS4_SET ) 754 # define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) 755 #elif defined( FL1_SET ) 756 # define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) 757 #elif defined( LS1_SET ) 758 # define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) 759 #else 760 # define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) 761 #endif 762 763 #endif 764 765 #if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET ) 766 # define ISB_SET 767 #endif 768 769 #endif 770