1;; 2;; Copyright (c) 2009-2020, Intel Corporation 3;; 4;; Redistribution and use in source and binary forms, with or without 5;; modification, are permitted provided that the following conditions are met: 6;; 7;; * Redistributions of source code must retain the above copyright notice, 8;; this list of conditions and the following disclaimer. 9;; * Redistributions in binary form must reproduce the above copyright 10;; notice, this list of conditions and the following disclaimer in the 11;; documentation and/or other materials provided with the distribution. 12;; * Neither the name of Intel Corporation nor the names of its contributors 13;; may be used to endorse or promote products derived from this software 14;; without specific prior written permission. 15;; 16;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26;; 27 28%include "include/os.asm" 29%include "include/reg_sizes.asm" 30%include "include/zuc_sbox.inc" 31 32section .data 33default rel 34EK_d: 35dw 0x44D7, 0x26BC, 0x626B, 0x135E, 0x5789, 0x35E2, 0x7135, 0x09AF, 36dw 0x4D78, 0x2F13, 0x6BC4, 0x1AF1, 0x5E26, 0x3C4D, 0x789A, 0x47AC 37 38align 16 39mask_S0: 40dq 0xff00ff00ff00ff00 41 42align 16 43mask_S1: 44dq 0x00ff00ff00ff00ff 45 46%ifdef LINUX 47section .note.GNU-stack noalloc noexec nowrite progbits 48%endif 49 50section .text 51 52%define OFFSET_FR1 (16*4) 53%define OFFSET_FR2 (17*4) 54%define OFFSET_BRC_X0 (18*4) 55%define OFFSET_BRC_X1 (19*4) 56%define OFFSET_BRC_X2 (20*4) 57%define OFFSET_BRC_X3 (21*4) 58 59; 60; BITS_REORG() 61; 62; params 63; %1 - round number 64; uses 65; eax, ebx, ecx, edx 66; return 67; updates r12d, r13d, r14d, r15d 68; 69%macro BITS_REORG 1 70 ; 71 ; r12d = LFSR_S15 72 ; eax = LFSR_S14 73 ; r13d = LFSR_S11 74 ; ebx = LFSR_S9 75 ; r14d = LFSR_S7 76 ; ecx = LFSR_S5 77 ; r15d = LFSR_S2 78 ; edx = LFSR_S0 79 80 mov r12d, [rsi + ((15 + %1) % 16)*4] 81 mov eax, [rsi + ((14 + %1) % 16)*4] 82 mov r13d, [rsi + ((11 + %1) % 16)*4] 83 mov ebx, [rsi + (( 9 + %1) % 16)*4] 84 mov r14d, [rsi + (( 7 + %1) % 16)*4] 85 mov ecx, [rsi + (( 5 + %1) % 16)*4] 86 mov r15d, [rsi + (( 2 + %1) % 16)*4] 87 mov edx, [rsi + (( 0 + %1) % 16)*4] 88 89 shr r12d, 15 90 shl eax, 16 91 shl ebx, 1 92 shl ecx, 1 93 shl edx, 1 94 shld r12d, eax, 16 ; BRC_X0 95 shld r13d, ebx, 16 ; BRC_X1 96 shld r14d, ecx, 16 ; BRC_X2 97 shld r15d, edx, 16 ; BRC_X3 98%endmacro 99 100; 101; NONLIN_FUN() 102; 103; uses 104; rdi rsi eax rdx edx 105; r8d r9d ebx 106; return 107; eax = W value 108; r10d = F_R1 109; r11d = F_R2 110; 111%macro NONLIN_FUN 2 112%define %%CALC_W %1 ; [in] Calculate W if 1 113%define %%ARCH %2 ; [in] SSE/SSE_NO_AESNI/AVX 114 115%if (%%CALC_W == 1) 116 mov eax, r12d 117 xor eax, r10d 118 add eax, r11d ; W = (BRC_X0 ^ F_R1) + F_R2 119%endif 120 121 add r10d, r13d ; W1= F_R1 + BRC_X1 122 xor r11d, r14d ; W2= F_R2 ^ BRC_X2 123 124 mov rdx, r10 125 shld edx, r11d, 16 ; P = (W1 << 16) | (W2 >> 16) 126 shld r11d, r10d, 16 ; Q = (W2 << 16) | (W1 >> 16) 127 128 mov ebx, edx 129 mov ecx, edx 130 mov r8d, edx 131 mov r9d, edx 132 133 rol ebx, 2 134 rol ecx, 10 135 rol r8d, 18 136 rol r9d, 24 137 xor edx, ebx 138 xor edx, ecx 139 xor edx, r8d 140 xor edx, r9d ; U = L1(P) = EDX, hi(RDX)=0 141 142 mov ebx, r11d 143 mov ecx, r11d 144 mov r8d, r11d 145 mov r9d, r11d 146 rol ebx, 8 147 rol ecx, 14 148 rol r8d, 22 149 rol r9d, 30 150 xor r11d, ebx 151 xor r11d, ecx 152 xor r11d, r8d 153 xor r11d, r9d ; V = L2(Q) = R11D, hi(R11)=0 154 155 shl r11, 32 156 xor rdx, r11 ; V || U 157%ifidn %%ARCH, SSE 158 movq xmm0, rdx 159 movdqa xmm1, xmm0 160 S0_comput_SSE xmm1, xmm2, xmm3, 0 161 S1_comput_SSE xmm0, xmm2, xmm3, xmm4, 0 162 163 pand xmm0, [rel mask_S1] 164 pand xmm1, [rel mask_S0] 165 166 pxor xmm0, xmm1 167 movd r10d, xmm0 ; F_R1 168 pextrd r11d, xmm0, 1 ; F_R2 169%elifidn %%ARCH, SSE_NO_AESNI 170 movq xmm0, rdx 171 movdqa xmm1, xmm0 172 S0_comput_SSE xmm1, xmm2, xmm3, 0 173 S1_comput_SSE_NO_AESNI xmm0, xmm2, xmm3, xmm4 174 175 pand xmm0, [rel mask_S1] 176 pand xmm1, [rel mask_S0] 177 178 pxor xmm0, xmm1 179 movd r10d, xmm0 ; F_R1 180 pextrd r11d, xmm0, 1 ; F_R2 181%else 182 vmovq xmm0, rdx 183 vmovdqa xmm1, xmm0 184 S0_comput_AVX xmm1, xmm2, xmm3 185 S1_comput_AVX xmm0, xmm2, xmm3, xmm4 186 vpand xmm0, [rel mask_S1] 187 vpand xmm1, [rel mask_S0] 188 189 190 vpxor xmm0, xmm0, xmm1 191 vmovd r10d, xmm0 ; F_R1 192 vpextrd r11d, xmm0, 1 ; F_R2 193 194%endif 195 196%endmacro 197 198; 199; LFSR_UPDT() 200; 201; params 202; %1 - round number 203; uses 204; rax as input (ZERO or W) 205; return 206; 207%macro LFSR_UPDT 1 208 ; 209 ; ebx = LFSR_S0 210 ; ecx = LFSR_S4 211 ; edx = LFSR_S10 212 ; r8d = LFSR_S13 213 ; r9d = LFSR_S15 214 ;lea rsi, [LFSR_STA] ; moved to calling function 215 216 mov ebx, [rsi + (( 0 + %1) % 16)*4] 217 mov ecx, [rsi + (( 4 + %1) % 16)*4] 218 mov edx, [rsi + ((10 + %1) % 16)*4] 219 mov r8d, [rsi + ((13 + %1) % 16)*4] 220 mov r9d, [rsi + ((15 + %1) % 16)*4] 221 222 ; Calculate 64-bit LFSR feedback 223 add rax, rbx 224 shl rbx, 8 225 shl rcx, 20 226 shl rdx, 21 227 shl r8, 17 228 shl r9, 15 229 add rax, rbx 230 add rax, rcx 231 add rax, rdx 232 add rax, r8 233 add rax, r9 234 235 ; Reduce it to 31-bit value 236 mov rbx, rax 237 and rax, 0x7FFFFFFF 238 shr rbx, 31 239 add rax, rbx 240 241 mov rbx, rax 242 sub rbx, 0x7FFFFFFF 243 cmovns rax, rbx 244 245 246 ; LFSR_S16 = (LFSR_S15++) = eax 247 mov [rsi + (( 0 + %1) % 16)*4], eax 248%endmacro 249 250 251; 252; make_u31() 253; 254%macro make_u31 4 255 256%define %%Rt %1 257%define %%Ke %2 258%define %%Ek %3 259%define %%Iv %4 260 xor %%Rt, %%Rt 261 shrd %%Rt, %%Iv, 8 262 shrd %%Rt, %%Ek, 15 263 shrd %%Rt, %%Ke, 9 264%endmacro 265 266 267; 268; key_expand() 269; 270%macro key_expand 1 271 movzx r8d, byte [pKe + (%1 + 0)] 272 movzx r9d, word [rbx + ((%1 + 0)*2)] 273 movzx r10d, byte [pIv + (%1 + 0)] 274 make_u31 r11d, r8d, r9d, r10d 275 mov [rax + ((%1 + 0)*4)], r11d 276 277 movzx r12d, byte [pKe + (%1 + 1)] 278 movzx r13d, word [rbx + ((%1 + 1)*2)] 279 movzx r14d, byte [pIv + (%1 + 1)] 280 make_u31 r15d, r12d, r13d, r14d 281 mov [rax + ((%1 + 1)*4)], r15d 282%endmacro 283 284; 285; Initialize internal LFSR 286; 287%macro ZUC_INIT 1 288%define %%ARCH %1 ; [in] SSE/SSE_NO_AESNI/AVX 289 290%ifdef LINUX 291 %define pKe rdi 292 %define pIv rsi 293 %define pState rdx 294%else 295 %define pKe rcx 296 %define pIv rdx 297 %define pState r8 298%endif 299 300 ; save the base pointer 301 push rbp 302 303 ;load stack pointer to rbp and reserve memory in the red zone 304 mov rbp, rsp 305 sub rsp, 64 306 307 ; Save non-volatile registers 308 mov [rbp - 8], rbx 309 mov [rbp - 16], r12 310 mov [rbp - 24], r13 311 mov [rbp - 32], r14 312 mov [rbp - 40], r15 313%ifndef LINUX 314 mov [rbp - 48], rdi 315 mov [rbp - 56], rsi 316%endif 317 318 lea rbx, [rel EK_d] ; load pointer to D 319 lea rax, [pState] ; load pointer to pState 320 mov [rbp - 64], pState ; save pointer to pState 321 322 ; Expand key 323 key_expand 0 324 key_expand 2 325 key_expand 4 326 key_expand 6 327 key_expand 8 328 key_expand 10 329 key_expand 12 330 key_expand 14 331 332 ; Set R1 and R2 to zero 333 xor r10, r10 334 xor r11, r11 335 336 ; Shift LFSR 32-times, update state variables 337%assign N 0 338%rep 32 339 mov rdx, [rbp - 64] ; load pointer to pState 340 lea rsi, [rdx] 341 342 BITS_REORG N 343 344 NONLIN_FUN 1, %%ARCH 345 shr eax, 1 346 347 mov rdx, [rbp - 64] ; re-load pointer to pState 348 lea rsi, [rdx] 349 350 LFSR_UPDT N 351 352%assign N N+1 353%endrep 354 355 ; And once more, initial round from keygen phase = 33 times 356 mov rdx, [rbp - 64] ; load pointer to pState 357 lea rsi, [rdx] 358 359 360 BITS_REORG 0 361 NONLIN_FUN 0, %%ARCH 362 xor rax, rax 363 364 mov rdx, [rbp - 64] ; load pointer to pState 365 lea rsi, [rdx] 366 367 LFSR_UPDT 0 368 369 mov rdx, [rbp - 64] ; load pointer to pState 370 lea rsi, [rdx] 371 372 ; Save ZUC's state variables 373 mov [rsi + (16*4)],r10d ;F_R1 374 mov [rsi + (17*4)],r11d ;F_R2 375 mov [rsi + (18*4)],r12d ;BRC_X0 376 mov [rsi + (19*4)],r13d ;BRC_X1 377 mov [rsi + (20*4)],r14d ;BRC_X2 378 mov [rsi + (21*4)],r15d ;BRC_X3 379 380 381 ; Restore non-volatile registers 382 mov rbx, [rbp - 8] 383 mov r12, [rbp - 16] 384 mov r13, [rbp - 24] 385 mov r14, [rbp - 32] 386 mov r15, [rbp - 40] 387%ifndef LINUX 388 mov rdi, [rbp - 48] 389 mov rsi, [rbp - 56] 390%endif 391 392 ; restore base pointer 393 mov rsp, rbp 394 pop rbp 395 396%endmacro 397 398; 399; Generate N*4 bytes of keystream 400; for a single buffer (where N is number of rounds) 401; 402%macro ZUC_KEYGEN 2 403%define %%ARCH %1 ; [in] SSE/SSE_NO_AESNI/AVX 404%define %%NUM_ROUNDS %2 ; [in] Number of 4-byte rounds 405 406%ifdef LINUX 407 %define pKS rdi 408 %define pState rsi 409%else 410 %define pKS rcx 411 %define pState rdx 412%endif 413 414%ifidn %%ARCH, AVX 415%define %%MOVDQA vmovdqa 416%else 417%define %%MOVDQA movdqa 418%endif 419 420 ; save the base pointer 421 push rbp 422 423 ;load stack pointer to rbp and reserve memory in the red zone 424 mov rbp, rsp 425 sub rsp, 72 426 427 ; Save non-volatile registers 428 mov [rbp - 8], rbx 429 mov [rbp - 16], r12 430 mov [rbp - 24], r13 431 mov [rbp - 32], r14 432 mov [rbp - 40], r15 433%ifndef LINUX 434 mov [rbp - 48], rdi 435 mov [rbp - 56], rsi 436%endif 437 438 ; Load input keystream pointer parameter in RAX 439 mov rax, pKS 440 441 ; Restore ZUC's state variables 442 mov r10d, [pState + OFFSET_FR1] 443 mov r11d, [pState + OFFSET_FR2] 444 mov r12d, [pState + OFFSET_BRC_X0] 445 mov r13d, [pState + OFFSET_BRC_X1] 446 mov r14d, [pState + OFFSET_BRC_X2] 447 mov r15d, [pState + OFFSET_BRC_X3] 448 449 ; Store keystream pointer 450 mov [rbp - 64], rax 451 452 ; Store ZUC State Pointer 453 mov [rbp - 72], pState 454 455 ; Generate N*4B of keystream in N rounds 456%assign N 1 457%rep %%NUM_ROUNDS 458 459 mov rdx, [rbp - 72] ; load *pState 460 lea rsi, [rdx] 461 462 BITS_REORG N 463 NONLIN_FUN 1, %%ARCH 464 465 ;Store the keystream 466 mov rbx, [rbp - 64] ; load *pkeystream 467 xor eax, r15d 468 mov [rbx], eax 469 add rbx, 4 ; increment the pointer 470 mov [rbp - 64], rbx ; save pkeystream 471 472 xor rax, rax 473 474 mov rdx, [rbp - 72] ; load *pState 475 lea rsi, [rdx] 476 477 LFSR_UPDT N 478 479%assign N N+1 480%endrep 481 482;; Reorder LFSR registers, as not all 16 rounds have been completed 483;; (if number of rounds is not 4, 8 or 16, the only possible case is 2, 484;; and in that case, we don't have to update the states, as that function 485;; call is done at the end the algorithm). 486%if (%%NUM_ROUNDS == 8) 487 %%MOVDQA xmm0, [rsi] 488 %%MOVDQA xmm1, [rsi+16] 489 %%MOVDQA xmm2, [rsi+32] 490 %%MOVDQA xmm3, [rsi+48] 491 492 %%MOVDQA [rsi], xmm2 493 %%MOVDQA [rsi+16], xmm3 494 %%MOVDQA [rsi+32], xmm0 495 %%MOVDQA [rsi+48], xmm1 496%elif (%%NUM_ROUNDS == 4) 497 %%MOVDQA xmm0, [rsi] 498 %%MOVDQA xmm1, [rsi+16] 499 %%MOVDQA xmm2, [rsi+32] 500 %%MOVDQA xmm3, [rsi+48] 501 502 %%MOVDQA [rsi], xmm1 503 %%MOVDQA [rsi+16], xmm2 504 %%MOVDQA [rsi+32], xmm3 505 %%MOVDQA [rsi+48], xmm0 506%endif 507 508 mov rsi, [rbp - 72] ; load pState 509 510 511 ; Save ZUC's state variables 512 mov [rsi + OFFSET_FR1], r10d 513 mov [rsi + OFFSET_FR2], r11d 514 mov [rsi + OFFSET_BRC_X0], r12d 515 mov [rsi + OFFSET_BRC_X1], r13d 516 mov [rsi + OFFSET_BRC_X2], r14d 517 mov [rsi + OFFSET_BRC_X3], r15d 518 519 ; Restore non-volatile registers 520 mov rbx, [rbp - 8] 521 mov r12, [rbp - 16] 522 mov r13, [rbp - 24] 523 mov r14, [rbp - 32] 524 mov r15, [rbp - 40] 525%ifndef LINUX 526 mov rdi, [rbp - 48] 527 mov rsi, [rbp - 56] 528%endif 529 530 mov rsp, rbp 531 pop rbp 532 533%endmacro 534 535; 536; Generate N*4 bytes of keystream for a single buffer 537; (where N is number of rounds, being 16 rounds the maximum) 538; 539%macro ZUC_KEYGEN_VAR 1 540%define %%ARCH %1 ; [in] SSE/SSE_NO_AESNI/AVX 541 542%ifdef LINUX 543 %define pKS rdi 544 %define pState rsi 545 %define nRounds rdx 546%else 547 %define pKS rcx 548 %define pState rdx 549 %define nRounds r8 550%endif 551 552%define MAX_ROUNDS 16 553 ; save the base pointer 554 push rbp 555 556 ;load stack pointer to rbp and reserve memory in the red zone 557 mov rbp, rsp 558 sub rsp, 80 559 560 ; Save non-volatile registers 561 mov [rbp - 8], rbx 562 mov [rbp - 16], r12 563 mov [rbp - 24], r13 564 mov [rbp - 32], r14 565 mov [rbp - 40], r15 566%ifndef LINUX 567 mov [rbp - 48], rdi 568 mov [rbp - 56], rsi 569%endif 570 571 mov [rbp - 80], nRounds 572 573 ; Load input keystream pointer parameter in RAX 574 mov rax, pKS 575 576 ; Restore ZUC's state variables 577 mov r10d, [pState + OFFSET_FR1] 578 mov r11d, [pState + OFFSET_FR2] 579 mov r12d, [pState + OFFSET_BRC_X0] 580 mov r13d, [pState + OFFSET_BRC_X1] 581 mov r14d, [pState + OFFSET_BRC_X2] 582 mov r15d, [pState + OFFSET_BRC_X3] 583 584 ; Store keystream pointer 585 mov [rbp - 64], rax 586 587 ; Store ZUC State Pointer 588 mov [rbp - 72], pState 589 590 ; Generate N*4B of keystream in N rounds 591%assign N 1 592%rep MAX_ROUNDS 593 594 mov rdx, [rbp - 72] ; load *pState 595 lea rsi, [rdx] 596 597 BITS_REORG N 598 NONLIN_FUN 1, %%ARCH 599 600 ;Store the keystream 601 mov rbx, [rbp - 64] ; load *pkeystream 602 xor eax, r15d 603 mov [rbx], eax 604 add rbx, 4 ; increment the pointer 605 mov [rbp - 64], rbx ; save pkeystream 606 607 xor rax, rax 608 609 mov rdx, [rbp - 72] ; load *pState 610 lea rsi, [rdx] 611 612 LFSR_UPDT N 613 614 dec qword [rbp - 80] ; numRounds - 1 615 jz %%exit_loop 616%assign N N+1 617%endrep 618 619%%exit_loop: 620 mov rsi, [rbp - 72] ; load pState 621 622 623 ; Save ZUC's state variables 624 mov [rsi + OFFSET_FR1], r10d 625 mov [rsi + OFFSET_FR2], r11d 626 mov [rsi + OFFSET_BRC_X0], r12d 627 mov [rsi + OFFSET_BRC_X1], r13d 628 mov [rsi + OFFSET_BRC_X2], r14d 629 mov [rsi + OFFSET_BRC_X3], r15d 630 631 ; Restore non-volatile registers 632 mov rbx, [rbp - 8] 633 mov r12, [rbp - 16] 634 mov r13, [rbp - 24] 635 mov r14, [rbp - 32] 636 mov r15, [rbp - 40] 637%ifndef LINUX 638 mov rdi, [rbp - 48] 639 mov rsi, [rbp - 56] 640%endif 641 642 mov rsp, rbp 643 pop rbp 644 645%endmacro 646 647;; 648;;extern void Zuc_Initialization_sse(uint8_t* pKey, uint8_t* pIV, uint32_t * pState) 649;; 650;; WIN64 651;; RCX - pKey 652;; RDX - pIV 653;; R8 - pState 654;; LIN64 655;; RDI - pKey 656;; RSI - pIV 657;; RDX - pState 658;; 659align 16 660MKGLOBAL(asm_ZucInitialization_sse,function,internal) 661asm_ZucInitialization_sse: 662 663 ZUC_INIT SSE 664 665 ret 666 667;; 668;;extern void Zuc_Initialization_sse_no_aesni(uint8_t* pKey, uint8_t* pIV, 669;; uint32_t * pState) 670;; 671;; WIN64 672;; RCX - pKey 673;; RDX - pIV 674;; R8 - pState 675;; LIN64 676;; RDI - pKey 677;; RSI - pIV 678;; RDX - pState 679;; 680align 16 681MKGLOBAL(asm_ZucInitialization_sse_no_aesni,function,internal) 682asm_ZucInitialization_sse_no_aesni: 683 684 ZUC_INIT SSE_NO_AESNI 685 686 ret 687 688;; 689;;extern void Zuc_Initialization_avx(uint8_t* pKey, uint8_t* pIV, uint32_t * pState) 690;; 691;; WIN64 692;; RCX - pKey 693;; RDX - pIV 694;; R8 - pState 695;; LIN64 696;; RDI - pKey 697;; RSI - pIV 698;; RDX - pState 699;; 700align 16 701MKGLOBAL(asm_ZucInitialization_avx,function,internal) 702asm_ZucInitialization_avx: 703 704 ZUC_INIT AVX 705 706 ret 707 708;; 709;; void asm_ZucGenKeystream8B_sse(void *pKeystream, ZucState_t *pState); 710;; 711;; WIN64 712;; RCX - KS (key stream pointer) 713;; RDX - STATE (state pointer) 714;; LIN64 715;; RDI - KS (key stream pointer) 716;; RSI - STATE (state pointer) 717;; 718align 16 719MKGLOBAL(asm_ZucGenKeystream8B_sse,function,internal) 720asm_ZucGenKeystream8B_sse: 721 722 ZUC_KEYGEN SSE, 2 723 724 ret 725 726;; 727;; void asm_ZucGenKeystream8B_sse_no_aesni(void *pKeystream, ZucState_t *pState); 728;; 729;; WIN64 730;; RCX - KS (key stream pointer) 731;; RDX - STATE (state pointer) 732;; LIN64 733;; RDI - KS (key stream pointer) 734;; RSI - STATE (state pointer) 735;; 736align 16 737MKGLOBAL(asm_ZucGenKeystream8B_sse_no_aesni,function,internal) 738asm_ZucGenKeystream8B_sse_no_aesni: 739 740 ZUC_KEYGEN SSE_NO_AESNI, 2 741 742 ret 743 744;; 745;; void asm_ZucGenKeystream8B_avx(void *pKeystream, ZucState_t *pState); 746;; 747;; WIN64 748;; RCX - KS (key stream pointer) 749;; RDX - STATE (state pointer) 750;; LIN64 751;; RDI - KS (key stream pointer) 752;; RSI - STATE (state pointer) 753;; 754align 16 755MKGLOBAL(asm_ZucGenKeystream8B_avx,function,internal) 756asm_ZucGenKeystream8B_avx: 757 758 ZUC_KEYGEN AVX, 2 759 760 ret 761 762;; 763;; void asm_ZucGenKeystream16B_sse(uint32_t * pKeystream, uint32_t * pState); 764;; 765;; WIN64 766;; RCX - KS (key stream pointer) 767;; RDX - STATE (state pointer) 768;; LIN64 769;; RDI - KS (key stream pointer) 770;; RSI - STATE (state pointer) 771;; 772align 16 773MKGLOBAL(asm_ZucGenKeystream16B_sse,function,internal) 774asm_ZucGenKeystream16B_sse: 775 776 ZUC_KEYGEN SSE, 4 777 778 ret 779 780;; 781;; void asm_ZucGenKeystream16B_sse_no_aesni(uint32_t * pKeystream, uint32_t * pState); 782;; 783;; WIN64 784;; RCX - KS (key stream pointer) 785;; RDX - STATE (state pointer) 786;; LIN64 787;; RDI - KS (key stream pointer) 788;; RSI - STATE (state pointer) 789;; 790align 16 791MKGLOBAL(asm_ZucGenKeystream16B_sse_no_aesni,function,internal) 792asm_ZucGenKeystream16B_sse_no_aesni: 793 794 ZUC_KEYGEN SSE_NO_AESNI, 4 795 796 ret 797 798;; 799;; void asm_ZucGenKeystream64B_avx(uint32_t * pKeystream, uint32_t * pState); 800;; 801;; WIN64 802;; RCX - KS (key stream pointer) 803;; RDX - STATE (state pointer) 804;; LIN64 805;; RDI - KS (key stream pointer) 806;; RSI - STATE (state pointer) 807;; 808align 16 809MKGLOBAL(asm_ZucGenKeystream64B_avx,function,internal) 810asm_ZucGenKeystream64B_avx: 811 812 ZUC_KEYGEN AVX, 16 813 814 ret 815 816;; 817;; void asm_ZucGenKeystream32B_avx(uint32_t * pKeystream, uint32_t * pState); 818;; 819;; WIN64 820;; RCX - KS (key stream pointer) 821;; RDX - STATE (state pointer) 822;; LIN64 823;; RDI - KS (key stream pointer) 824;; RSI - STATE (state pointer) 825;; 826align 16 827MKGLOBAL(asm_ZucGenKeystream32B_avx,function,internal) 828asm_ZucGenKeystream32B_avx: 829 830 ZUC_KEYGEN AVX, 8 831 832 ret 833 834;; 835;; void asm_ZucGenKeystream16B_avx(uint32_t * pKeystream, uint32_t * pState); 836;; 837;; WIN64 838;; RCX - KS (key stream pointer) 839;; RDX - STATE (state pointer) 840;; LIN64 841;; RDI - KS (key stream pointer) 842;; RSI - STATE (state pointer) 843;; 844align 16 845MKGLOBAL(asm_ZucGenKeystream16B_avx,function,internal) 846asm_ZucGenKeystream16B_avx: 847 848 ZUC_KEYGEN AVX, 4 849 850 ret 851 852;; 853;; void asm_ZucGenKeystream_sse(uint32_t * pKeystream, uint32_t * pState, 854;; uint64_t numRounds); 855;; 856;; WIN64 857;; RCX - KS (key stream pointer) 858;; RDX - STATE (state pointer) 859;; R8 - NROUNDS (number of 4B rounds) 860;; LIN64 861;; RDI - KS (key stream pointer) 862;; RSI - STATE (state pointer) 863;; RDX - NROUNDS (number of 4B rounds) 864;; 865align 16 866MKGLOBAL(asm_ZucGenKeystream_sse,function,internal) 867asm_ZucGenKeystream_sse: 868 869 ZUC_KEYGEN_VAR SSE 870 871 ret 872 873;; 874;; void asm_ZucGenKeystream_sse_no_aesni(uint32_t * pKeystream, uint32_t * pState, 875;; uint64_t numRounds); 876;; 877;; WIN64 878;; RCX - KS (key stream pointer) 879;; RDX - STATE (state pointer) 880;; R8 - NROUNDS (number of 4B rounds) 881;; LIN64 882;; RDI - KS (key stream pointer) 883;; RSI - STATE (state pointer) 884;; RDX - NROUNDS (number of 4B rounds) 885;; 886align 16 887MKGLOBAL(asm_ZucGenKeystream_sse_no_aesni,function,internal) 888asm_ZucGenKeystream_sse_no_aesni: 889 890 ZUC_KEYGEN_VAR SSE_NO_AESNI 891 892 ret 893 894;; 895;; void asm_ZucGenKeystream_avx(uint32_t * pKeystream, uint32_t * pState); 896;; uint64_t numRounds); 897;; 898;; WIN64 899;; RCX - KS (key stream pointer) 900;; RDX - STATE (state pointer) 901;; R8 - NROUNDS (number of 4B rounds) 902;; LIN64 903;; RDI - KS (key stream pointer) 904;; RSI - STATE (state pointer) 905;; RDX - NROUNDS (number of 4B rounds) 906;; 907align 16 908MKGLOBAL(asm_ZucGenKeystream_avx,function,internal) 909asm_ZucGenKeystream_avx: 910 911 ZUC_KEYGEN_VAR AVX 912 913 ret 914