1 /******************************************************************************* 2 Copyright (c) 2009-2020, Intel Corporation 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are met: 6 7 * Redistributions of source code must retain the above copyright notice, 8 this list of conditions and the following disclaimer. 9 * Redistributions in binary form must reproduce the above copyright 10 notice, this list of conditions and the following disclaimer in the 11 documentation and/or other materials provided with the distribution. 12 * Neither the name of Intel Corporation nor the names of its contributors 13 may be used to endorse or promote products derived from this software 14 without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 *******************************************************************************/ 27 28 /** 29 ****************************************************************************** 30 * @file zuc_internal.h 31 * 32 * @description 33 * This header file defines the internal API's and data types for the 34 * 3GPP algorithm ZUC. 35 * 36 *****************************************************************************/ 37 38 #ifndef ZUC_INTERNAL_H_ 39 #define ZUC_INTERNAL_H_ 40 41 #include <stdio.h> 42 #include <stdint.h> 43 44 #include "include/ipsec_ooo_mgr.h" 45 #include "intel-ipsec-mb.h" 46 #include "immintrin.h" 47 #include "include/wireless_common.h" 48 49 /* 64 bytes of Keystream will be generated */ 50 #define ZUC_KEYSTR_LEN (64) 51 #define NUM_LFSR_STATES (16) 52 #define ZUC_WORD_BITS (32) 53 #define ZUC_WORD_BYTES (ZUC_WORD_BITS / 8) 54 55 /* Range of input data for ZUC is from 1 to 65504 bits */ 56 #define ZUC_MIN_BITLEN 1 57 #define ZUC_MAX_BITLEN 65504 58 #define ZUC_MIN_BYTELEN 1 59 #define ZUC_MAX_BYTELEN (ZUC_MAX_BITLEN / 8) 60 61 #ifdef DEBUG 62 #ifdef _WIN32 63 #define DEBUG_PRINT(_fmt, ...) \ 64 fprintf(stderr, "%s()::%d " _fmt , __FUNCTION__, __LINE__, __VA_ARGS__) 65 #else 66 #define DEBUG_PRINT(_fmt, ...) \ 67 fprintf(stderr, "%s()::%d " _fmt , __func__, __LINE__, __VA_ARGS__) 68 #endif 69 #else 70 #define DEBUG_PRINT(_fmt, ...) 71 #endif 72 73 /** 74 ****************************************************************************** 75 * @description 76 * Macro will loop through keystream of length 64bytes and xor with the 77 * input buffer placing the result in the output buffer. 78 * KeyStream bytes must be swapped on 32bit boundary before this operation 79 * 80 *****************************************************************************/ 81 #define ZUC_XOR_KEYSTREAM(pIn64, pOut64, pKeyStream64) \ 82 { \ 83 int i =0; \ 84 union SwapBytes_t { \ 85 uint64_t l64; \ 86 uint32_t w32[2]; \ 87 }swapBytes; \ 88 /* loop through the key stream and xor 64 bits at a time */ \ 89 for(i =0; i < ZUC_KEYSTR_LEN/8; i++) { \ 90 swapBytes.l64 = *pKeyStream64++; \ 91 swapBytes.w32[0] = bswap4(swapBytes.w32[0]); \ 92 swapBytes.w32[1] = bswap4(swapBytes.w32[1]); \ 93 *pOut64++ = *pIn64++ ^ swapBytes.l64; \ 94 } \ 95 } 96 97 /** 98 ***************************************************************************** 99 * @description 100 * Packed structure to store the ZUC state for 4 packets. * 101 *****************************************************************************/ 102 typedef struct zuc_state_4_s { 103 uint32_t lfsrState[16][4]; 104 /**< State registers of the LFSR */ 105 uint32_t fR1[4]; 106 /**< register of F */ 107 uint32_t fR2[4]; 108 /**< register of F */ 109 uint32_t bX0[4]; 110 /**< Output X0 of the bit reorganization for 4 packets */ 111 uint32_t bX1[4]; 112 /**< Output X1 of the bit reorganization for 4 packets */ 113 uint32_t bX2[4]; 114 /**< Output X2 of the bit reorganization for 4 packets */ 115 uint32_t bX3[4]; 116 /**< Output X3 of the bit reorganization for 4 packets */ 117 } ZucState4_t; 118 119 /** 120 ***************************************************************************** 121 * @description 122 * Packed structure to store the ZUC state for 8 packets. * 123 *****************************************************************************/ 124 typedef struct zuc_state_8_s { 125 uint32_t lfsrState[16][8]; 126 /**< State registers of the LFSR */ 127 uint32_t fR1[8]; 128 /**< register of F */ 129 uint32_t fR2[8]; 130 /**< register of F */ 131 uint32_t bX0[8]; 132 /**< Output X0 of the bit reorganization for 8 packets */ 133 uint32_t bX1[8]; 134 /**< Output X1 of the bit reorganization for 8 packets */ 135 uint32_t bX2[8]; 136 /**< Output X2 of the bit reorganization for 8 packets */ 137 uint32_t bX3[8]; 138 /**< Output X3 of the bit reorganization for 8 packets */ 139 } ZucState8_t; 140 141 /** 142 ***************************************************************************** 143 * @description 144 * Packed structure to store the ZUC state for a single packet. * 145 *****************************************************************************/ 146 typedef struct zuc_state_s { 147 uint32_t lfsrState[16]; 148 /**< State registers of the LFSR */ 149 uint32_t fR1; 150 /**< register of F */ 151 uint32_t fR2; 152 /**< register of F */ 153 uint32_t bX0; 154 /**< Output X0 of the bit reorganization */ 155 uint32_t bX1; 156 /**< Output X1 of the bit reorganization */ 157 uint32_t bX2; 158 /**< Output X2 of the bit reorganization */ 159 uint32_t bX3; 160 /**< Output X3 of the bit reorganization */ 161 } ZucState_t; 162 163 /** 164 ***************************************************************************** 165 * @description 166 * Structure to store pointers to the 4 keys to be used as input to 167 * @ref asm_ZucInitialization_4 and @ref asm_ZucGenKeystream64B_4 168 *****************************************************************************/ 169 typedef struct zuc_key_4_s { 170 const uint8_t *pKeys[4]; 171 /**< Array of pointers to 128-bit keys for the 4 packets */ 172 } ZucKey4_t; 173 174 /** 175 ***************************************************************************** 176 * @description 177 * Structure to store pointers to the 4 IV's to be used as input to 178 * @ref asm_ZucInitialization_4 and @ref asm_ZucGenKeystream64B_4 179 *****************************************************************************/ 180 typedef struct zuc_iv_4_s { 181 const uint8_t *pIvs[4]; 182 /**< Array of pointers to 128-bit IV's for the 4 packets */ 183 } ZucIv4_t; 184 185 /** 186 ***************************************************************************** 187 * @description 188 * Structure to store pointers to the 8 keys to be used as input to 189 * @ref asm_ZucInitialization_8 and @ref asm_ZucGenKeystream64B_8 190 *****************************************************************************/ 191 typedef struct zuc_key_8_s { 192 const uint8_t *pKeys[8]; 193 /**< Array of pointers to 128-bit keys for the 8 packets */ 194 } ZucKey8_t; 195 196 /** 197 ***************************************************************************** 198 * @description 199 * Structure to store pointers to the 8 IV's to be used as input to 200 * @ref asm_ZucInitialization_8 and @ref asm_ZucGenKeystream64B_8 201 *****************************************************************************/ 202 typedef struct zuc_iv_8_s { 203 const uint8_t *pIvs[8]; 204 /**< Array of pointers to 128-bit IV's for the 8 packets */ 205 } ZucIv8_t; 206 207 /** 208 ***************************************************************************** 209 * @description 210 * Structure to store pointers to the 16 keys to be used as input to 211 * @ref asm_ZucInitialization_16 and @ref asm_ZucGenKeystream64B_16 212 *****************************************************************************/ 213 typedef struct zuc_key_16_s { 214 const uint8_t *pKeys[16]; 215 /**< Array of pointers to 128-bit keys for the 16 packets */ 216 } ZucKey16_t; 217 218 /** 219 ***************************************************************************** 220 * @description 221 * Structure to store pointers to the 16 IV's to be used as input to 222 * @ref asm_ZucInitialization_16 and @ref asm_ZucGenKeystream64B_16 223 *****************************************************************************/ 224 typedef struct zuc_iv_16_s { 225 const uint8_t *pIvs[16]; 226 /**< Array of pointers to 128-bit IV's for the 16 packets */ 227 } ZucIv16_t; 228 229 /** 230 ****************************************************************************** 231 * 232 * @description 233 * Definition of the external function that implements the initialization 234 * stage of the ZUC algorithm. The function will initialize the state 235 * for a single packet operation. 236 * 237 * @param[in] pKey Pointer to the 128-bit initial key that 238 * will be used when initializing the ZUC 239 * state. 240 * @param[in] pIv Pointer to the 128-bit initial vector that 241 * will be used when initializing the ZUC 242 * state. 243 * @param[in,out] pState Pointer to a ZUC state structure of type 244 * @ref ZucState_t that will be populated 245 * with the initialized ZUC state. 246 * 247 * @pre 248 * None 249 * 250 *****************************************************************************/ 251 IMB_DLL_LOCAL void asm_ZucInitialization_sse(const void *pKey, 252 const void *pIv, 253 ZucState_t *pState); 254 255 IMB_DLL_LOCAL void asm_ZucInitialization_sse_no_aesni(const void *pKey, 256 const void *pIv, 257 ZucState_t *pState); 258 259 IMB_DLL_LOCAL void asm_ZucInitialization_avx(const void *pKey, 260 const void *pIv, 261 ZucState_t *pState); 262 263 /** 264 ****************************************************************************** 265 * @description 266 * Definition of the external function that implements the initialization 267 * stage of the ZUC algorithm for 4 packets. The function will initialize 268 * the state for 4 individual packets. 269 * 270 * @param[in] pKey Pointer to an array of 128-bit initial keys 271 * that will be used when initializing the ZUC 272 * state. 273 * @param[in] pIv Pointer to an array of 128-bit initial 274 * vectors that will be used when initializing 275 * the ZUC state. 276 * @param[in,out] pState Pointer to a ZUC state structure of type 277 * @ref ZucState4_t that will be populated 278 * with the initialized ZUC state. 279 * 280 * @pre 281 * None 282 * 283 *****************************************************************************/ 284 IMB_DLL_LOCAL void asm_ZucInitialization_4_sse(ZucKey4_t *pKeys, 285 ZucIv4_t *pIvs, 286 ZucState4_t *pState); 287 288 IMB_DLL_LOCAL void asm_ZucInitialization_4_sse_no_aesni(ZucKey4_t *pKeys, 289 ZucIv4_t *pIvs, 290 ZucState4_t *pState); 291 292 IMB_DLL_LOCAL void asm_ZucInitialization_4_gfni_sse(ZucKey4_t *pKeys, 293 ZucIv4_t *pIvs, 294 ZucState4_t *pState); 295 296 IMB_DLL_LOCAL void asm_ZucInitialization_4_avx(ZucKey4_t *pKeys, 297 ZucIv4_t *pIvs, 298 ZucState4_t *pState); 299 300 /** 301 ****************************************************************************** 302 * @description 303 * Definition of the external function that implements the initialization 304 * stage of the ZUC algorithm for 8 packets. The function will initialize 305 * the state for 8 individual packets. 306 * 307 * @param[in] pKey Pointer to an array of 128-bit initial keys 308 * that will be used when initializing the ZUC 309 * state. 310 * @param[in] pIv Pointer to an array of 128-bit initial 311 * vectors that will be used when initializing 312 * the ZUC state. 313 * @param[in,out] pState Pointer to a ZUC state structure of type 314 * @ref ZucState8_t that will be populated 315 * with the initialized ZUC state. 316 * 317 * @pre 318 * None 319 * 320 *****************************************************************************/ 321 IMB_DLL_LOCAL void asm_ZucInitialization_8_avx2(ZucKey8_t *pKeys, 322 ZucIv8_t *pIvs, 323 ZucState8_t *pState); 324 325 /** 326 ****************************************************************************** 327 * @description 328 * Definition of the external function that implements the initialization 329 * stage of the ZUC algorithm for 16 packets. The function will initialize 330 * the state for 16 individual packets. 331 * 332 * @param[in] pKey Pointer to an array of 128-bit initial keys 333 * that will be used when initializing the ZUC 334 * state. 335 * @param[in] pIv Pointer to an array of 128-bit initial 336 * vectors that will be used when initializing 337 * the ZUC state. 338 * @param[in,out] pState Pointer to a ZUC state structure of type 339 * @ref ZucState16_t that will be populated 340 * with the initialized ZUC state. 341 * 342 * @pre 343 * None 344 * 345 *****************************************************************************/ 346 IMB_DLL_LOCAL void asm_ZucInitialization_16_avx512(ZucKey16_t *pKeys, 347 ZucIv16_t *pIvs, 348 ZucState16_t *pState, 349 const uint16_t lane_mask); 350 351 IMB_DLL_LOCAL void asm_ZucInitialization_16_gfni_avx512(ZucKey16_t *pKeys, 352 ZucIv16_t *pIvs, 353 ZucState16_t *pState, 354 const uint16_t lane_mask); 355 356 /** 357 ****************************************************************************** 358 * 359 * @description 360 * Definition of the external function that implements the working 361 * stage of the ZUC algorithm. The function will generate 64 bytes of 362 * keystream. 363 * 364 * @param[in,out] pKeystream Pointer to an input buffer that will 365 * contain the generated keystream. 366 367 * @param[in] pState Pointer to a ZUC state structure of type 368 * @ref ZucState_t 369 * 370 * @pre 371 * A successful call to @ref asm_ZucInitialization to initialize the ZUC 372 * state. 373 * 374 *****************************************************************************/ 375 IMB_DLL_LOCAL void asm_ZucGenKeystream64B_avx(uint32_t *pKeystream, 376 ZucState_t *pState); 377 378 /** 379 ****************************************************************************** 380 * 381 * @description 382 * Definition of the external function that implements the working 383 * stage of the ZUC algorithm. The function will generate 32 bytes of 384 * keystream. 385 * 386 * @param[in,out] pKeystream Pointer to an input buffer that will 387 * contain the generated keystream. 388 389 * @param[in] pState Pointer to a ZUC state structure of type 390 * @ref ZucState_t 391 * 392 * @pre 393 * A successful call to @ref asm_ZucInitialization to initialize the ZUC 394 * state. 395 * 396 *****************************************************************************/ 397 IMB_DLL_LOCAL void asm_ZucGenKeystream32B_avx(uint32_t *pKeystream, 398 ZucState_t *pState); 399 400 /** 401 ****************************************************************************** 402 * 403 * @description 404 * Definition of the external function that implements the working 405 * stage of the ZUC algorithm. The function will generate 16 bytes of 406 * keystream. 407 * 408 * @param[in,out] pKeystream Pointer to an input buffer that will 409 * contain the generated keystream. 410 411 * @param[in] pState Pointer to a ZUC state structure of type 412 * @ref ZucState_t 413 * 414 * @pre 415 * A successful call to @ref asm_ZucInitialization to initialize the ZUC 416 * state. 417 * 418 *****************************************************************************/ 419 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_avx(uint32_t *pKeystream, 420 ZucState_t *pState); 421 422 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_sse(uint32_t *pKeystream, 423 ZucState_t *pState); 424 425 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_sse_no_aesni(uint32_t *pKeystream, 426 ZucState_t *pState); 427 428 /** 429 ****************************************************************************** 430 * 431 * @description 432 * Definition of the external function that implements the working 433 * stage of the ZUC algorithm. The function will generate 8 bytes of 434 * keystream. 435 * 436 * @param[in,out] pKeystream Pointer to an input buffer that will 437 * contain the generated keystream. 438 439 * @param[in] pState Pointer to a ZUC state structure of type 440 * @ref ZucState_t 441 * 442 * @pre 443 * A successful call to @ref asm_ZucInitialization to initialize the ZUC 444 * state. 445 * 446 *****************************************************************************/ 447 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_sse(void *pKeystream, 448 ZucState_t *pState); 449 450 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_sse_no_aesni(void *pKeystream, 451 ZucState_t *pState); 452 453 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_avx(void *pKeystream, 454 ZucState_t *pState); 455 456 /** 457 ****************************************************************************** 458 * 459 * @description 460 * Definition of the external function that implements the working 461 * stage of the ZUC algorithm. The function will generate N*4 bytes of 462 * keystream, being N the number of rounds specified 463 * in the numRounds parameter (from 1 to 16 rounds, 464 * equal to from 4 to 64 bytes) 465 * 466 * @param[in,out] pKeystream Pointer to an input buffer that will 467 * contain the generated keystream. 468 469 * @param[in] pState Pointer to a ZUC state structure of type 470 * @ref ZucState_t 471 * 472 * @param[in] numRounds Number of 4-byte rounds (1 to 16 rounds) 473 * 474 * @pre 475 * A successful call to @ref asm_ZucInitialization to initialize the ZUC 476 * state. 477 * 478 *****************************************************************************/ 479 IMB_DLL_LOCAL void asm_ZucGenKeystream_sse(void *pKeystream, 480 ZucState_t *pState, 481 uint64_t numRounds); 482 483 IMB_DLL_LOCAL void asm_ZucGenKeystream_sse_no_aesni(void *pKeystream, 484 ZucState_t *pState, 485 uint64_t numRounds); 486 487 IMB_DLL_LOCAL void asm_ZucGenKeystream_avx(void *pKeystream, 488 ZucState_t *pState, 489 uint64_t numRounds); 490 491 /** 492 ****************************************************************************** 493 * 494 * @description 495 * Definition of the external function that implements the working 496 * stage of the ZUC algorithm. The function will generate 16 bytes of 497 * keystream for four packets in parallel. 498 * 499 * @param[in] pState Pointer to a ZUC state structure of type 500 * @ref ZucState4_t 501 * 502 * @param[in,out] pKeyStr Array of pointers to 4 input buffers that 503 * will contain the generated keystream for 504 * these 4 packets. 505 * 506 * @pre 507 * A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC 508 * state. 509 * 510 *****************************************************************************/ 511 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_sse(ZucState4_t *pState, 512 uint32_t *pKeyStr[4]); 513 514 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_sse_no_aesni(ZucState4_t *pState, 515 uint32_t *pKeyStr[4]); 516 517 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_gfni_sse(ZucState4_t *pState, 518 uint32_t *pKeyStr[4]); 519 520 IMB_DLL_LOCAL void asm_ZucGenKeystream16B_4_avx(ZucState4_t *pState, 521 uint32_t *pKeyStr[4]); 522 523 /** 524 ****************************************************************************** 525 * 526 * @description 527 * Definition of the external function that implements the working 528 * stage of the ZUC algorithm. The function will generate 32 bytes of 529 * keystream for eight packets in parallel. 530 * 531 * @param[in] pState Pointer to a ZUC state structure of type 532 * @ref ZucState8_t 533 * 534 * @param[in,out] pKeyStr Array of pointers to 8 input buffers that 535 * will contain the generated keystream for 536 * these 8 packets. 537 * 538 * @pre 539 * A successful call to @ref asm_ZucInitialization_8 to initialize the ZUC 540 * state. 541 * 542 *****************************************************************************/ 543 IMB_DLL_LOCAL void asm_ZucGenKeystream32B_8_avx2(ZucState8_t *pState, 544 uint32_t *pKeyStr[8]); 545 546 /** 547 ****************************************************************************** 548 * 549 * @description 550 * Definition of the external function that implements the working 551 * stage of the ZUC algorithm. The function will generate 64 bytes of 552 * keystream for four packets in parallel. 553 * 554 * @param[in] pState Pointer to a ZUC state structure of type 555 * @ref ZucState16_t 556 * 557 * @param[in,out] pKeyStr Array of pointers to 16 input buffers 558 * that will contain the generated keystream 559 * for these 16 packets. 560 * 561 * @pre 562 * A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC 563 * state. 564 * 565 *****************************************************************************/ 566 IMB_DLL_LOCAL void asm_ZucGenKeystream64B_16_avx512(ZucState16_t *pState, 567 uint32_t *pKeyStr[16]); 568 569 IMB_DLL_LOCAL void asm_ZucGenKeystream64B_16_gfni_avx512(ZucState16_t *pState, 570 uint32_t *pKeyStr[16]); 571 /** 572 ****************************************************************************** 573 * 574 * @description 575 * Definition of the external function that implements the working 576 * stage of the ZUC algorithm. The function will generate 8 bytes of 577 * keystream for four packets in parallel. 578 * 579 * @param[in] pState Pointer to a ZUC state structure of type 580 * @ref ZucState4_t 581 * 582 * @param[in,out] pKeyStr Array of pointers to 4 input buffers that 583 * will contain the generated keystream for 584 * these 4 packets. 585 * 586 * @pre 587 * A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC 588 * state. 589 * 590 *****************************************************************************/ 591 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_sse(ZucState4_t *pState, 592 uint32_t *pKeyStr[4]); 593 594 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_sse_no_aesni(ZucState4_t *pState, 595 uint32_t *pKeyStr[4]); 596 597 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_gfni_sse(ZucState4_t *pState, 598 uint32_t *pKeyStr[4]); 599 600 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_4_avx(ZucState4_t *pState, 601 uint32_t *pKeyStr[4]); 602 603 /** 604 ****************************************************************************** 605 * 606 * @description 607 * Definition of the external function that implements the working 608 * stage of the ZUC algorithm. The function will generate 8 bytes of 609 * keystream for eight packets in parallel. 610 * 611 * @param[in] pState Pointer to a ZUC state structure of type 612 * @ref ZucState8_t 613 * 614 * @param[in,out] pKeyStr Array of pointers to 8 input buffers that 615 * will contain the generated keystream for 616 * these 8 packets. 617 * 618 * @pre 619 * A successful call to @ref asm_ZucInitialization_8 to initialize the ZUC 620 * state. 621 * 622 *****************************************************************************/ 623 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_8_avx2(ZucState8_t *pState, 624 uint32_t *pKeyStr[8]); 625 626 /** 627 ****************************************************************************** 628 * 629 * @description 630 * Definition of the external function that implements the working 631 * stage of the ZUC algorithm. The function will generate 8 bytes of 632 * keystream for sixteen packets in parallel. 633 * 634 * @param[in] pState Pointer to a ZUC state structure of type 635 * @ref ZucState16_t 636 * 637 * @param[in,out] pKeyStr Array of pointers to 16 input buffers 638 * that will contain the generated keystream 639 * for these 16 packets. 640 * 641 * @pre 642 * A successful call to @ref asm_ZucInitialization_16 to initialize the ZUC 643 * state. 644 * 645 *****************************************************************************/ 646 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_16_avx512(ZucState16_t *pState, 647 uint32_t *pKeyStr[16], 648 const uint32_t lane_mask); 649 650 IMB_DLL_LOCAL void asm_ZucGenKeystream8B_16_gfni_avx512(ZucState16_t *pState, 651 uint32_t *pKeyStr[16], 652 const uint32_t lane_mask); 653 654 /** 655 ****************************************************************************** 656 * 657 * @description 658 * Definition of the external function that implements the working 659 * stage of the ZUC algorithm. The function will generate N*4 bytes of 660 * keystream for sixteen packets in parallel. 661 * 662 * @param[in] pState Pointer to a ZUC state structure of type 663 * @ref ZucState16_t 664 * 665 * @param[in,out] pKeyStr Array of pointers to 16 input buffers 666 * that will contain the generated keystream 667 * for these 16 packets. 668 * 669 * 670 * @param[in] numRounds Number of 4-byte rounds (1 to 16 rounds) 671 * 672 * @pre 673 * A successful call to @ref asm_ZucInitialization to initialize the ZUC 674 * state. 675 * 676 *****************************************************************************/ 677 IMB_DLL_LOCAL void asm_ZucGenKeystream_16_avx512(ZucState16_t *pState, 678 uint32_t *pKstr[16], 679 const uint64_t numRounds); 680 681 IMB_DLL_LOCAL void asm_ZucGenKeystream_16_gfni_avx512(ZucState16_t *pState, 682 uint32_t *pKstr[16], 683 const uint64_t numRounds); 684 /** 685 ****************************************************************************** 686 * 687 * @description 688 * Definition of the external function that implements the working 689 * stage of the ZUC algorithm. The function will generate a multiple of 690 * 4 bytes of keystream for 4 packets in parallel and will XOR this 691 * keystream with the input text, producing output of up to the minimum 692 * length of all bytes, rounded up to the nearest multiple of 4 bytes. 693 * "lengths" array is updated after the function call, with the remaining 694 * bytes to encrypt. 695 * 696 * @param[in] pState Pointer to a ZUC state structure of type 697 * @ref ZucState4_t 698 * 699 * @param[in] pIn Array of pointers to 4 input buffers. 700 * @param[out] pOut Array of pointers to 4 output buffers. 701 * @param[in/out] lengths Remaining length of buffers to encrypt 702 * @param[in] minLength Common length for all buffers to encrypt 703 * 704 * @pre 705 * A successful call to @ref asm_ZucInitialization_4 to initialize the ZUC 706 * state. 707 * 708 *****************************************************************************/ 709 IMB_DLL_LOCAL void asm_ZucCipher_4_sse(ZucState4_t *pState, 710 const uint64_t *pIn[4], 711 uint64_t *pOut[4], 712 uint16_t lengths[4], 713 const uint64_t minLength); 714 715 IMB_DLL_LOCAL void asm_ZucCipher_4_sse_no_aesni(ZucState4_t *pState, 716 const uint64_t *pIn[4], 717 uint64_t *pOut[4], 718 uint16_t lengths[4], 719 const uint64_t minLength); 720 721 IMB_DLL_LOCAL void asm_ZucCipher_4_gfni_sse(ZucState4_t *pState, 722 const uint64_t *pIn[4], 723 uint64_t *pOut[4], 724 uint16_t lengths[4], 725 const uint64_t minLength); 726 727 IMB_DLL_LOCAL void asm_ZucCipher_4_avx(ZucState4_t *pState, 728 const uint64_t *pIn[4], 729 uint64_t *pOut[4], 730 uint16_t lengths[4], 731 const uint64_t minLength); 732 733 /** 734 ****************************************************************************** 735 * 736 * @description 737 * Definition of the external function that implements the working 738 * stage of the ZUC algorithm. The function will generate a multiple of 739 * 4 bytes of keystream for 8 packets in parallel and will XOR this 740 * keystream with the input text, producing output of up to the minimum 741 * length of all bytes, rounded up to the nearest multiple of 4 bytes. 742 * "lengths" array is updated after the function call, with the remaining 743 * bytes to encrypt. 744 * 745 * @param[in] pState Pointer to a ZUC state structure of type 746 * @ref ZucState8_t 747 * 748 * @param[in] pIn Array of pointers to 8 input buffers. 749 * @param[out] pOut Array of pointers to 8 output buffers. 750 * @param[in/out] lengths Remaining length of buffers to encrypt 751 * @param[in] minLength Common length for all buffers to encrypt 752 * 753 * @pre 754 * A successful call to @ref asm_ZucInitialization_8 to initialize the ZUC 755 * state. 756 * 757 *****************************************************************************/ 758 IMB_DLL_LOCAL void asm_ZucCipher_8_avx2(ZucState8_t *pState, 759 const uint64_t *pIn[8], 760 uint64_t *pOut[8], 761 const uint16_t lengths[8], 762 const uint64_t minLength); 763 764 /** 765 ****************************************************************************** 766 * 767 * @description 768 * Definition of the external function that implements the working 769 * stage of the ZUC algorithm. The function will generate a multiple of 770 * 4 bytes of keystream for sixteen packets in parallel and will XOR this 771 * keystream with the input text, producing output of up to the minimum 772 * length of all bytes, rounded up to the nearest multiple of 4 bytes. 773 * "lengths" array is updated after the function call, with the remaining 774 * bytes to encrypt. 775 * 776 * @param[in] pState Pointer to a ZUC state structure of type 777 * @ref ZucState16_t 778 * @param[in] pIn Array of pointers to 16 input buffers. 779 * @param[out] pOut Array of pointers to 16 output buffers. 780 * @param[in/out] lengths Remaining length of buffers to encrypt 781 * @param[in] minLength Common length for all buffers to encrypt 782 * 783 * @pre 784 * A successful call to @ref asm_ZucInitialization_16 to initialize the ZUC 785 * state. 786 * 787 *****************************************************************************/ 788 IMB_DLL_LOCAL void asm_ZucCipher_16_avx512(ZucState16_t *pState, 789 const uint64_t *pIn[16], 790 uint64_t *pOut[16], 791 const uint16_t lengths[16], 792 const uint64_t minLength); 793 794 IMB_DLL_LOCAL void asm_ZucCipher_16_gfni_avx512(ZucState16_t *pState, 795 const uint64_t *pIn[16], 796 uint64_t *pOut[16], 797 const uint16_t lengths[16], 798 const uint64_t minLength); 799 800 /** 801 ****************************************************************************** 802 * @description 803 * Definition of the external function to update the authentication tag 804 * based on keystream and data (SSE variant) 805 * 806 * @param[in] T Authentication tag 807 * 808 * @param[in] ks Pointer to key stream 809 * 810 * @param[in] data Pointer to the data 811 * 812 * @pre 813 * None 814 * 815 *****************************************************************************/ 816 IMB_DLL_LOCAL uint32_t asm_Eia3Round16BSSE(uint32_t T, const void *ks, 817 const void *data); 818 819 IMB_DLL_LOCAL uint32_t asm_Eia3Round16BSSE_no_aesni(uint32_t T, const void *ks, 820 const void *data); 821 822 /** 823 ****************************************************************************** 824 * @description 825 * Definition of the external function to return the authentication 826 * update value to be XOR'ed with current authentication tag (SSE variant) 827 * 828 * @param[in] ks Pointer to key stream 829 * 830 * @param[in] data Pointer to the data 831 * 832 * @param[in] n_words Number of data bits to be processed 833 * 834 * @pre 835 * None 836 * 837 *****************************************************************************/ 838 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderSSE(const void *ks, const void *data, 839 const uint64_t n_words); 840 841 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderSSE_no_aesni(const void *ks, 842 const void *data, 843 const uint64_t n_words); 844 845 /** 846 ****************************************************************************** 847 * @description 848 * Definition of the external function to update the authentication tag 849 * based on keystream and data (AVX variant) 850 * 851 * @param[in] T Authentication tag 852 * 853 * @param[in] ks Pointer to key stream 854 * 855 * @param[in] data Pointer to the data 856 * 857 * @pre 858 * None 859 * 860 *****************************************************************************/ 861 IMB_DLL_LOCAL uint32_t asm_Eia3Round64BAVX(uint32_t T, const void *ks, 862 const void *data); 863 864 IMB_DLL_LOCAL void asm_Eia3Round64BAVX512_16(uint32_t *T, 865 const void * const *ks, 866 const void **data, 867 uint16_t *len); 868 869 IMB_DLL_LOCAL void asm_Eia3Round64B_16_VPCLMUL(uint32_t *T, 870 const void * const *ks, 871 const void **data, 872 uint16_t *len); 873 874 IMB_DLL_LOCAL uint32_t asm_Eia3Round32BAVX(uint32_t T, const void *ks, 875 const void *data); 876 877 IMB_DLL_LOCAL uint32_t asm_Eia3Round16BAVX(uint32_t T, const void *ks, 878 const void *data); 879 880 IMB_DLL_LOCAL void asm_Eia3Round64BAVX512(uint32_t *T, const void *ks, 881 const void *data); 882 883 /** 884 ****************************************************************************** 885 * @description 886 * Definition of the external function to return the authentication 887 * update value to be XOR'ed with current authentication tag (AVX variant) 888 * 889 * @param[in] ks Pointer to key stream 890 * 891 * @param[in] data Pointer to the data 892 * 893 * @param[in] n_words Number of data bits to be processed 894 * 895 * @pre 896 * None 897 * 898 *****************************************************************************/ 899 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderAVX(const void *ks, const void *data, 900 const uint64_t n_words); 901 902 /** 903 ****************************************************************************** 904 * @description 905 * Definition of the external function to return the final authentication 906 * tag of the message. 907 * 908 * @param[in/out] T Pointer to authentication tag to be updated 909 * 910 * @param[in] ks Pointer to key stream 911 * 912 * @param[in] data Pointer to the data 913 * 914 * @param[in] n_bits Number of data bits to be processed 915 * 916 * @pre 917 * None 918 * 919 *****************************************************************************/ 920 IMB_DLL_LOCAL void asm_Eia3RemainderAVX512(uint32_t *T, const void *ks, 921 const void *data, 922 const uint64_t n_bits); 923 924 IMB_DLL_LOCAL uint32_t asm_Eia3RemainderAVX512_16(uint32_t *T, 925 const void * const ks, 926 const void **data, 927 uint16_t *lens, 928 const uint64_t commonBits); 929 930 IMB_DLL_LOCAL 931 void zuc_eia3_4_buffer_job_gfni_sse(const void * const pKey[4], 932 const void * const pIv[4], 933 const void * const pBufferIn[4], 934 uint32_t *pMacI[4], 935 const uint16_t lengthInBits[4], 936 const void * const job_in_lane[4]); 937 938 IMB_DLL_LOCAL 939 void zuc_eia3_4_buffer_job_no_gfni_sse(const void * const pKey[4], 940 const void * const pIv[4], 941 const void * const pBufferIn[4], 942 uint32_t *pMacI[4], 943 const uint16_t lengthInBits[4], 944 const void * const job_in_lane[4]); 945 946 IMB_DLL_LOCAL 947 void zuc_eia3_4_buffer_job_sse_no_aesni(const void * const pKey[4], 948 const void * const pIv[4], 949 const void * const pBufferIn[4], 950 uint32_t *pMacI[4], 951 const uint16_t lengthInBits[4], 952 const void * const job_in_lane[4]); 953 954 IMB_DLL_LOCAL 955 void zuc_eia3_4_buffer_job_avx(const void * const pKey[4], 956 const void * const pIv[4], 957 const void * const pBufferIn[4], 958 uint32_t *pMacI[4], 959 const uint16_t lengthInBits[4], 960 const void * const job_in_lane[4]); 961 962 IMB_DLL_LOCAL 963 void zuc_eia3_8_buffer_job_avx2(const void * const pKey[8], 964 const void * const pIv[8], 965 const void * const pBufferIn[8], 966 uint32_t *pMacI[8], 967 const uint16_t lengthInBits[8], 968 const void * const job_in_lane[8]); 969 970 IMB_DLL_LOCAL 971 void zuc_eia3_16_buffer_job_no_gfni_avx512(MB_MGR_ZUC_OOO *ooo); 972 973 IMB_DLL_LOCAL 974 void zuc_eia3_16_buffer_job_gfni_avx512(MB_MGR_ZUC_OOO *ooo); 975 976 /* the s-boxes */ 977 extern const uint8_t S0[256]; 978 extern const uint8_t S1[256]; 979 980 void zuc_eea3_1_buffer_sse(const void *pKey, const void *pIv, 981 const void *pBufferIn, void *pBufferOut, 982 const uint32_t lengthInBytes); 983 984 void zuc_eea3_4_buffer_sse(const void * const pKey[4], 985 const void * const pIv[4], 986 const void * const pBufferIn[4], 987 void *pBufferOut[4], 988 const uint32_t lengthInBytes[4]); 989 990 void zuc_eea3_n_buffer_sse(const void * const pKey[], const void * const pIv[], 991 const void * const pBufferIn[], void *pBufferOut[], 992 const uint32_t lengthInBytes[], 993 const uint32_t numBuffers); 994 995 void zuc_eia3_1_buffer_sse(const void *pKey, const void *pIv, 996 const void *pBufferIn, const uint32_t lengthInBits, 997 uint32_t *pMacI); 998 999 void zuc_eia3_n_buffer_sse(const void * const pKey[], 1000 const void * const pIv[], 1001 const void * const pBufferIn[], 1002 const uint32_t lengthInBits[], 1003 uint32_t *pMacI[], 1004 const uint32_t numBuffers); 1005 1006 void zuc_eia3_n_buffer_gfni_sse(const void * const pKey[], 1007 const void * const pIv[], 1008 const void * const pBufferIn[], 1009 const uint32_t lengthInBits[], 1010 uint32_t *pMacI[], 1011 const uint32_t numBuffers); 1012 1013 void zuc_eea3_1_buffer_sse_no_aesni(const void *pKey, const void *pIv, 1014 const void *pBufferIn, void *pBufferOut, 1015 const uint32_t lengthInBytes); 1016 1017 void zuc_eea3_4_buffer_sse_no_aesni(const void * const pKey[4], 1018 const void * const pIv[4], 1019 const void * const pBufferIn[4], 1020 void *pBufferOut[4], 1021 const uint32_t lengthInBytes[4]); 1022 1023 void zuc_eea3_n_buffer_sse_no_aesni(const void * const pKey[], 1024 const void * const pIv[], 1025 const void * const pBufferIn[], 1026 void *pBufferOut[], 1027 const uint32_t lengthInBytes[], 1028 const uint32_t numBuffers); 1029 1030 void zuc_eea3_4_buffer_gfni_sse(const void * const pKey[4], 1031 const void * const pIv[4], 1032 const void * const pBufferIn[4], 1033 void *pBufferOut[4], 1034 const uint32_t lengthInBytes[4]); 1035 1036 void zuc_eea3_n_buffer_gfni_sse(const void * const pKey[], 1037 const void * const pIv[], 1038 const void * const pBufferIn[], 1039 void *pBufferOut[], 1040 const uint32_t lengthInBytes[], 1041 const uint32_t numBuffers); 1042 1043 void zuc_eia3_1_buffer_sse_no_aesni(const void *pKey, const void *pIv, 1044 const void *pBufferIn, 1045 const uint32_t lengthInBits, 1046 uint32_t *pMacI); 1047 1048 void zuc_eia3_n_buffer_sse_no_aesni(const void * const pKey[], 1049 const void * const pIv[], 1050 const void * const pBufferIn[], 1051 const uint32_t lengthInBits[], 1052 uint32_t *pMacI[], 1053 const uint32_t numBuffers); 1054 1055 void zuc_eea3_1_buffer_avx(const void *pKey, const void *pIv, 1056 const void *pBufferIn, void *pBufferOut, 1057 const uint32_t lengthInBytes); 1058 1059 void zuc_eea3_4_buffer_avx(const void * const pKey[4], 1060 const void * const pIv[4], 1061 const void * const pBufferIn[4], 1062 void *pBufferOut[4], 1063 const uint32_t lengthInBytes[4]); 1064 1065 void zuc_eea3_n_buffer_avx(const void * const pKey[], const void * const pIv[], 1066 const void * const pBufferIn[], void *pBufferOut[], 1067 const uint32_t lengthInBytes[], 1068 const uint32_t numBuffers); 1069 1070 void zuc_eia3_1_buffer_avx(const void *pKey, const void *pIv, 1071 const void *pBufferIn, const uint32_t lengthInBits, 1072 uint32_t *pMacI); 1073 1074 void zuc_eia3_n_buffer_avx(const void * const pKey[], 1075 const void * const pIv[], 1076 const void * const pBufferIn[], 1077 const uint32_t lengthInBits[], 1078 uint32_t *pMacI[], 1079 const uint32_t numBuffers); 1080 1081 1082 void zuc_eea3_1_buffer_avx2(const void *pKey, const void *pIv, 1083 const void *pBufferIn, void *pBufferOut, 1084 const uint32_t lengthInBytes); 1085 1086 void zuc_eea3_n_buffer_avx2(const void * const pKey[], const void * const pIv[], 1087 const void * const pBufferIn[], void *pBufferOut[], 1088 const uint32_t lengthInBytes[], 1089 const uint32_t numBuffers); 1090 1091 void zuc_eia3_1_buffer_avx2(const void *pKey, const void *pIv, 1092 const void *pBufferIn, const uint32_t lengthInBits, 1093 uint32_t *pMacI); 1094 1095 void zuc_eia3_n_buffer_avx2(const void * const pKey[], 1096 const void * const pIv[], 1097 const void * const pBufferIn[], 1098 const uint32_t lengthInBits[], 1099 uint32_t *pMacI[], 1100 const uint32_t numBuffers); 1101 1102 void zuc_eea3_1_buffer_avx512(const void *pKey, const void *pIv, 1103 const void *pBufferIn, void *pBufferOut, 1104 const uint32_t lengthInBytes); 1105 1106 void zuc_eea3_n_buffer_avx512(const void * const pKey[], 1107 const void * const pIv[], 1108 const void * const pBufferIn[], 1109 void *pBufferOut[], 1110 const uint32_t lengthInBytes[], 1111 const uint32_t numBuffers); 1112 1113 void zuc_eea3_n_buffer_gfni_avx512(const void * const pKey[], 1114 const void * const pIv[], 1115 const void * const pBufferIn[], 1116 void *pBufferOut[], 1117 const uint32_t lengthInBytes[], 1118 const uint32_t numBuffers); 1119 1120 void zuc_eia3_1_buffer_avx512(const void *pKey, const void *pIv, 1121 const void *pBufferIn, 1122 const uint32_t lengthInBits, 1123 uint32_t *pMacI); 1124 1125 void zuc_eia3_n_buffer_avx512(const void * const pKey[], 1126 const void * const pIv[], 1127 const void * const pBufferIn[], 1128 const uint32_t lengthInBits[], 1129 uint32_t *pMacI[], 1130 const uint32_t numBuffers); 1131 1132 void zuc_eia3_n_buffer_gfni_avx512(const void * const pKey[], 1133 const void * const pIv[], 1134 const void * const pBufferIn[], 1135 const uint32_t lengthInBits[], 1136 uint32_t *pMacI[], 1137 const uint32_t numBuffers); 1138 1139 /* Internal API */ 1140 IMB_DLL_LOCAL 1141 void _zuc_eea3_4_buffer_avx(const void * const pKey[4], 1142 const void * const pIv[4], 1143 const void * const pBufferIn[4], 1144 void *pBufferOut[4], 1145 const uint32_t length[4]); 1146 1147 IMB_DLL_LOCAL 1148 void _zuc_eia3_4_buffer_avx(const void * const pKey[4], 1149 const void * const pIv[4], 1150 const void * const pBufferIn[4], 1151 const uint32_t lengthInBits[4], 1152 uint32_t *pMacI[4]); 1153 1154 IMB_DLL_LOCAL 1155 void _zuc_eea3_8_buffer_avx2(const void * const pKey[8], 1156 const void * const pIv[8], 1157 const void * const pBufferIn[8], 1158 void *pBufferOut[8], 1159 const uint32_t length[8]); 1160 1161 IMB_DLL_LOCAL 1162 void _zuc_eia3_8_buffer_avx2(const void * const pKey[8], 1163 const void * const pIv[8], 1164 const void * const pBufferIn[8], 1165 const uint32_t lengthInBits[8], 1166 uint32_t *pMacI[8]); 1167 1168 #endif /* ZUC_INTERNAL_H_ */ 1169 1170