1; LICENSE: 2; This submission to NSS is to be made available under the terms of the 3; Mozilla Public License, v. 2.0. You can obtain one at http: 4; //mozilla.org/MPL/2.0/. 5;############################################################################### 6; Copyright(c) 2014, Intel Corp. 7; Developers and authors: 8; Shay Gueron and Vlad Krasnov 9; Intel Corporation, Israel Development Centre, Haifa, Israel 10; Please send feedback directly to crypto.feedback.alias@intel.com 11 12 13.MODEL FLAT, C 14.XMM 15 16.DATA 17ALIGN 16 18Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh 19Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h 20Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh 21Lcon1 dd 1,1,1,1 22Lcon2 dd 1bh,1bh,1bh,1bh 23 24.CODE 25 26ctx textequ <ecx> 27output textequ <edx> 28input textequ <eax> 29inputLen textequ <edi> 30 31 32aes_rnd MACRO i 33 movdqu xmm7, [i*16 + ctx] 34 aesenc xmm0, xmm7 35 aesenc xmm1, xmm7 36 aesenc xmm2, xmm7 37 aesenc xmm3, xmm7 38 aesenc xmm4, xmm7 39 aesenc xmm5, xmm7 40 aesenc xmm6, xmm7 41 ENDM 42 43aes_last_rnd MACRO i 44 movdqu xmm7, [i*16 + ctx] 45 aesenclast xmm0, xmm7 46 aesenclast xmm1, xmm7 47 aesenclast xmm2, xmm7 48 aesenclast xmm3, xmm7 49 aesenclast xmm4, xmm7 50 aesenclast xmm5, xmm7 51 aesenclast xmm6, xmm7 52 ENDM 53 54aes_dec_rnd MACRO i 55 movdqu xmm7, [i*16 + ctx] 56 aesdec xmm0, xmm7 57 aesdec xmm1, xmm7 58 aesdec xmm2, xmm7 59 aesdec xmm3, xmm7 60 aesdec xmm4, xmm7 61 aesdec xmm5, xmm7 62 aesdec xmm6, xmm7 63 ENDM 64 65aes_dec_last_rnd MACRO i 66 movdqu xmm7, [i*16 + ctx] 67 aesdeclast xmm0, xmm7 68 aesdeclast xmm1, xmm7 69 aesdeclast xmm2, xmm7 70 aesdeclast xmm3, xmm7 71 aesdeclast xmm4, xmm7 72 aesdeclast xmm5, xmm7 73 aesdeclast xmm6, xmm7 74 ENDM 75 76 77gen_aes_ecb_func MACRO enc, rnds 78 79LOCAL loop7 80LOCAL loop1 81LOCAL bail 82 83 push inputLen 84 85 mov ctx, [esp + 2*4 + 0*4] 86 mov output, [esp + 2*4 + 1*4] 87 mov input, [esp + 2*4 + 4*4] 88 mov inputLen, [esp + 2*4 + 5*4] 89 90loop7: 91 cmp inputLen, 7*16 92 jb loop1 93 94 movdqu xmm0, [0*16 + input] 95 movdqu xmm1, [1*16 + input] 96 movdqu xmm2, [2*16 + input] 97 movdqu xmm3, [3*16 + input] 98 movdqu xmm4, [4*16 + input] 99 movdqu xmm5, [5*16 + input] 100 movdqu xmm6, [6*16 + input] 101 102 movdqu xmm7, [0*16 + ctx] 103 pxor xmm0, xmm7 104 pxor xmm1, xmm7 105 pxor xmm2, xmm7 106 pxor xmm3, xmm7 107 pxor xmm4, xmm7 108 pxor xmm5, xmm7 109 pxor xmm6, xmm7 110 111IF enc eq 1 112 rnd textequ <aes_rnd> 113 lastrnd textequ <aes_last_rnd> 114 aesinst textequ <aesenc> 115 aeslastinst textequ <aesenclast> 116ELSE 117 rnd textequ <aes_dec_rnd> 118 lastrnd textequ <aes_dec_last_rnd> 119 aesinst textequ <aesdec> 120 aeslastinst textequ <aesdeclast> 121ENDIF 122 123 i = 1 124 WHILE i LT rnds 125 rnd i 126 i = i+1 127 ENDM 128 lastrnd rnds 129 130 movdqu [0*16 + output], xmm0 131 movdqu [1*16 + output], xmm1 132 movdqu [2*16 + output], xmm2 133 movdqu [3*16 + output], xmm3 134 movdqu [4*16 + output], xmm4 135 movdqu [5*16 + output], xmm5 136 movdqu [6*16 + output], xmm6 137 138 lea input, [7*16 + input] 139 lea output, [7*16 + output] 140 sub inputLen, 7*16 141 jmp loop7 142 143loop1: 144 cmp inputLen, 1*16 145 jb bail 146 147 movdqu xmm0, [input] 148 movdqu xmm7, [0*16 + ctx] 149 pxor xmm0, xmm7 150 151 i = 1 152 WHILE i LT rnds 153 movdqu xmm7, [i*16 + ctx] 154 aesinst xmm0, xmm7 155 i = i+1 156 ENDM 157 movdqu xmm7, [rnds*16 + ctx] 158 aeslastinst xmm0, xmm7 159 160 movdqu [output], xmm0 161 162 lea input, [1*16 + input] 163 lea output, [1*16 + output] 164 sub inputLen, 1*16 165 jmp loop1 166 167bail: 168 xor eax, eax 169 pop inputLen 170 ret 171 172ENDM 173 174ALIGN 16 175intel_aes_encrypt_ecb_128 PROC 176gen_aes_ecb_func 1, 10 177intel_aes_encrypt_ecb_128 ENDP 178 179ALIGN 16 180intel_aes_encrypt_ecb_192 PROC 181gen_aes_ecb_func 1, 12 182intel_aes_encrypt_ecb_192 ENDP 183 184ALIGN 16 185intel_aes_encrypt_ecb_256 PROC 186gen_aes_ecb_func 1, 14 187intel_aes_encrypt_ecb_256 ENDP 188 189ALIGN 16 190intel_aes_decrypt_ecb_128 PROC 191gen_aes_ecb_func 0, 10 192intel_aes_decrypt_ecb_128 ENDP 193 194ALIGN 16 195intel_aes_decrypt_ecb_192 PROC 196gen_aes_ecb_func 0, 12 197intel_aes_decrypt_ecb_192 ENDP 198 199ALIGN 16 200intel_aes_decrypt_ecb_256 PROC 201gen_aes_ecb_func 0, 14 202intel_aes_decrypt_ecb_256 ENDP 203 204 205KEY textequ <ecx> 206KS textequ <edx> 207ITR textequ <eax> 208 209ALIGN 16 210intel_aes_encrypt_init_128 PROC 211 212 mov KEY, [esp + 1*4 + 0*4] 213 mov KS, [esp + 1*4 + 1*4] 214 215 216 movdqu xmm1, [KEY] 217 movdqu [KS], xmm1 218 movdqa xmm2, xmm1 219 220 lea ITR, Lcon1 221 movdqa xmm0, [ITR] 222 lea ITR, Lmask 223 movdqa xmm4, [ITR] 224 225 mov ITR, 8 226 227Lenc_128_ks_loop: 228 lea KS, [16 + KS] 229 dec ITR 230 231 pshufb xmm2, xmm4 232 aesenclast xmm2, xmm0 233 pslld xmm0, 1 234 movdqa xmm3, xmm1 235 pslldq xmm3, 4 236 pxor xmm1, xmm3 237 pslldq xmm3, 4 238 pxor xmm1, xmm3 239 pslldq xmm3, 4 240 pxor xmm1, xmm3 241 pxor xmm1, xmm2 242 movdqu [KS], xmm1 243 movdqa xmm2, xmm1 244 245 jne Lenc_128_ks_loop 246 247 lea ITR, Lcon2 248 movdqa xmm0, [ITR] 249 250 pshufb xmm2, xmm4 251 aesenclast xmm2, xmm0 252 pslld xmm0, 1 253 movdqa xmm3, xmm1 254 pslldq xmm3, 4 255 pxor xmm1, xmm3 256 pslldq xmm3, 4 257 pxor xmm1, xmm3 258 pslldq xmm3, 4 259 pxor xmm1, xmm3 260 pxor xmm1, xmm2 261 movdqu [16 + KS], xmm1 262 movdqa xmm2, xmm1 263 264 pshufb xmm2, xmm4 265 aesenclast xmm2, xmm0 266 movdqa xmm3, xmm1 267 pslldq xmm3, 4 268 pxor xmm1, xmm3 269 pslldq xmm3, 4 270 pxor xmm1, xmm3 271 pslldq xmm3, 4 272 pxor xmm1, xmm3 273 pxor xmm1, xmm2 274 movdqu [32 + KS], xmm1 275 movdqa xmm2, xmm1 276 277 ret 278intel_aes_encrypt_init_128 ENDP 279 280 281ALIGN 16 282intel_aes_decrypt_init_128 PROC 283 284 mov KEY, [esp + 1*4 + 0*4] 285 mov KS, [esp + 1*4 + 1*4] 286 287 push KS 288 push KEY 289 290 call intel_aes_encrypt_init_128 291 292 pop KEY 293 pop KS 294 295 movdqu xmm0, [0*16 + KS] 296 movdqu xmm1, [10*16 + KS] 297 movdqu [10*16 + KS], xmm0 298 movdqu [0*16 + KS], xmm1 299 300 i = 1 301 WHILE i LT 5 302 movdqu xmm0, [i*16 + KS] 303 movdqu xmm1, [(10-i)*16 + KS] 304 305 aesimc xmm0, xmm0 306 aesimc xmm1, xmm1 307 308 movdqu [(10-i)*16 + KS], xmm0 309 movdqu [i*16 + KS], xmm1 310 311 i = i+1 312 ENDM 313 314 movdqu xmm0, [5*16 + KS] 315 aesimc xmm0, xmm0 316 movdqu [5*16 + KS], xmm0 317 ret 318intel_aes_decrypt_init_128 ENDP 319 320 321ALIGN 16 322intel_aes_encrypt_init_192 PROC 323 324 mov KEY, [esp + 1*4 + 0*4] 325 mov KS, [esp + 1*4 + 1*4] 326 327 pxor xmm3, xmm3 328 movdqu xmm1, [KEY] 329 pinsrd xmm3, DWORD PTR [16 + KEY], 0 330 pinsrd xmm3, DWORD PTR [20 + KEY], 1 331 332 movdqu [KS], xmm1 333 movdqa xmm5, xmm3 334 335 lea ITR, Lcon1 336 movdqu xmm0, [ITR] 337 lea ITR, Lmask192 338 movdqu xmm4, [ITR] 339 340 mov ITR, 4 341 342Lenc_192_ks_loop: 343 movdqa xmm2, xmm3 344 pshufb xmm2, xmm4 345 aesenclast xmm2, xmm0 346 pslld xmm0, 1 347 348 movdqa xmm6, xmm1 349 movdqa xmm7, xmm3 350 pslldq xmm6, 4 351 pslldq xmm7, 4 352 pxor xmm1, xmm6 353 pxor xmm3, xmm7 354 pslldq xmm6, 4 355 pxor xmm1, xmm6 356 pslldq xmm6, 4 357 pxor xmm1, xmm6 358 pxor xmm1, xmm2 359 pshufd xmm2, xmm1, 0ffh 360 pxor xmm3, xmm2 361 362 movdqa xmm6, xmm1 363 shufpd xmm5, xmm1, 00h 364 shufpd xmm6, xmm3, 01h 365 366 movdqu [16 + KS], xmm5 367 movdqu [32 + KS], xmm6 368 369 movdqa xmm2, xmm3 370 pshufb xmm2, xmm4 371 aesenclast xmm2, xmm0 372 pslld xmm0, 1 373 374 movdqa xmm6, xmm1 375 movdqa xmm7, xmm3 376 pslldq xmm6, 4 377 pslldq xmm7, 4 378 pxor xmm1, xmm6 379 pxor xmm3, xmm7 380 pslldq xmm6, 4 381 pxor xmm1, xmm6 382 pslldq xmm6, 4 383 pxor xmm1, xmm6 384 pxor xmm1, xmm2 385 pshufd xmm2, xmm1, 0ffh 386 pxor xmm3, xmm2 387 388 movdqu [48 + KS], xmm1 389 movdqa xmm5, xmm3 390 391 lea KS, [48 + KS] 392 393 dec ITR 394 jnz Lenc_192_ks_loop 395 396 movdqu [16 + KS], xmm5 397ret 398intel_aes_encrypt_init_192 ENDP 399 400ALIGN 16 401intel_aes_decrypt_init_192 PROC 402 mov KEY, [esp + 1*4 + 0*4] 403 mov KS, [esp + 1*4 + 1*4] 404 405 push KS 406 push KEY 407 408 call intel_aes_encrypt_init_192 409 410 pop KEY 411 pop KS 412 413 movdqu xmm0, [0*16 + KS] 414 movdqu xmm1, [12*16 + KS] 415 movdqu [12*16 + KS], xmm0 416 movdqu [0*16 + KS], xmm1 417 418 i = 1 419 WHILE i LT 6 420 movdqu xmm0, [i*16 + KS] 421 movdqu xmm1, [(12-i)*16 + KS] 422 423 aesimc xmm0, xmm0 424 aesimc xmm1, xmm1 425 426 movdqu [(12-i)*16 + KS], xmm0 427 movdqu [i*16 + KS], xmm1 428 429 i = i+1 430 ENDM 431 432 movdqu xmm0, [6*16 + KS] 433 aesimc xmm0, xmm0 434 movdqu [6*16 + KS], xmm0 435 ret 436intel_aes_decrypt_init_192 ENDP 437 438ALIGN 16 439intel_aes_encrypt_init_256 PROC 440 441 mov KEY, [esp + 1*4 + 0*4] 442 mov KS, [esp + 1*4 + 1*4] 443 movdqu xmm1, [16*0 + KEY] 444 movdqu xmm3, [16*1 + KEY] 445 446 movdqu [16*0 + KS], xmm1 447 movdqu [16*1 + KS], xmm3 448 449 lea ITR, Lcon1 450 movdqu xmm0, [ITR] 451 lea ITR, Lmask256 452 movdqu xmm5, [ITR] 453 454 pxor xmm6, xmm6 455 456 mov ITR, 6 457 458Lenc_256_ks_loop: 459 460 movdqa xmm2, xmm3 461 pshufb xmm2, xmm5 462 aesenclast xmm2, xmm0 463 pslld xmm0, 1 464 movdqa xmm4, xmm1 465 pslldq xmm4, 4 466 pxor xmm1, xmm4 467 pslldq xmm4, 4 468 pxor xmm1, xmm4 469 pslldq xmm4, 4 470 pxor xmm1, xmm4 471 pxor xmm1, xmm2 472 movdqu [16*2 + KS], xmm1 473 474 pshufd xmm2, xmm1, 0ffh 475 aesenclast xmm2, xmm6 476 movdqa xmm4, xmm3 477 pslldq xmm4, 4 478 pxor xmm3, xmm4 479 pslldq xmm4, 4 480 pxor xmm3, xmm4 481 pslldq xmm4, 4 482 pxor xmm3, xmm4 483 pxor xmm3, xmm2 484 movdqu [16*3 + KS], xmm3 485 486 lea KS, [32 + KS] 487 dec ITR 488 jnz Lenc_256_ks_loop 489 490 movdqa xmm2, xmm3 491 pshufb xmm2, xmm5 492 aesenclast xmm2, xmm0 493 movdqa xmm4, xmm1 494 pslldq xmm4, 4 495 pxor xmm1, xmm4 496 pslldq xmm4, 4 497 pxor xmm1, xmm4 498 pslldq xmm4, 4 499 pxor xmm1, xmm4 500 pxor xmm1, xmm2 501 movdqu [16*2 + KS], xmm1 502 503 ret 504intel_aes_encrypt_init_256 ENDP 505 506ALIGN 16 507intel_aes_decrypt_init_256 PROC 508 mov KEY, [esp + 1*4 + 0*4] 509 mov KS, [esp + 1*4 + 1*4] 510 511 push KS 512 push KEY 513 514 call intel_aes_encrypt_init_256 515 516 pop KEY 517 pop KS 518 519 movdqu xmm0, [0*16 + KS] 520 movdqu xmm1, [14*16 + KS] 521 movdqu [14*16 + KS], xmm0 522 movdqu [0*16 + KS], xmm1 523 524 i = 1 525 WHILE i LT 7 526 movdqu xmm0, [i*16 + KS] 527 movdqu xmm1, [(14-i)*16 + KS] 528 529 aesimc xmm0, xmm0 530 aesimc xmm1, xmm1 531 532 movdqu [(14-i)*16 + KS], xmm0 533 movdqu [i*16 + KS], xmm1 534 535 i = i+1 536 ENDM 537 538 movdqu xmm0, [7*16 + KS] 539 aesimc xmm0, xmm0 540 movdqu [7*16 + KS], xmm0 541 ret 542intel_aes_decrypt_init_256 ENDP 543 544 545 546gen_aes_cbc_enc_func MACRO rnds 547 548LOCAL loop1 549LOCAL bail 550 551 push inputLen 552 553 mov ctx, [esp + 2*4 + 0*4] 554 mov output, [esp + 2*4 + 1*4] 555 mov input, [esp + 2*4 + 4*4] 556 mov inputLen, [esp + 2*4 + 5*4] 557 558 movdqu xmm0, [252+ctx] 559 560 movdqu xmm2, [0*16 + ctx] 561 movdqu xmm3, [1*16 + ctx] 562 movdqu xmm4, [2*16 + ctx] 563 movdqu xmm5, [3*16 + ctx] 564 movdqu xmm6, [4*16 + ctx] 565 566loop1: 567 cmp inputLen, 1*16 568 jb bail 569 570 movdqu xmm1, [input] 571 pxor xmm1, xmm2 572 pxor xmm0, xmm1 573 574 aesenc xmm0, xmm3 575 aesenc xmm0, xmm4 576 aesenc xmm0, xmm5 577 aesenc xmm0, xmm6 578 579 i = 5 580 WHILE i LT rnds 581 movdqu xmm7, [i*16 + ctx] 582 aesenc xmm0, xmm7 583 i = i+1 584 ENDM 585 movdqu xmm7, [rnds*16 + ctx] 586 aesenclast xmm0, xmm7 587 588 movdqu [output], xmm0 589 590 lea input, [1*16 + input] 591 lea output, [1*16 + output] 592 sub inputLen, 1*16 593 jmp loop1 594 595bail: 596 movdqu [252+ctx], xmm0 597 598 xor eax, eax 599 pop inputLen 600 ret 601 602ENDM 603 604gen_aes_cbc_dec_func MACRO rnds 605 606LOCAL loop7 607LOCAL loop1 608LOCAL dec1 609LOCAL bail 610 611 push inputLen 612 613 mov ctx, [esp + 2*4 + 0*4] 614 mov output, [esp + 2*4 + 1*4] 615 mov input, [esp + 2*4 + 4*4] 616 mov inputLen, [esp + 2*4 + 5*4] 617 618loop7: 619 cmp inputLen, 7*16 620 jb dec1 621 622 movdqu xmm0, [0*16 + input] 623 movdqu xmm1, [1*16 + input] 624 movdqu xmm2, [2*16 + input] 625 movdqu xmm3, [3*16 + input] 626 movdqu xmm4, [4*16 + input] 627 movdqu xmm5, [5*16 + input] 628 movdqu xmm6, [6*16 + input] 629 630 movdqu xmm7, [0*16 + ctx] 631 pxor xmm0, xmm7 632 pxor xmm1, xmm7 633 pxor xmm2, xmm7 634 pxor xmm3, xmm7 635 pxor xmm4, xmm7 636 pxor xmm5, xmm7 637 pxor xmm6, xmm7 638 639 i = 1 640 WHILE i LT rnds 641 aes_dec_rnd i 642 i = i+1 643 ENDM 644 aes_dec_last_rnd rnds 645 646 movdqu xmm7, [252 + ctx] 647 pxor xmm0, xmm7 648 movdqu xmm7, [0*16 + input] 649 pxor xmm1, xmm7 650 movdqu xmm7, [1*16 + input] 651 pxor xmm2, xmm7 652 movdqu xmm7, [2*16 + input] 653 pxor xmm3, xmm7 654 movdqu xmm7, [3*16 + input] 655 pxor xmm4, xmm7 656 movdqu xmm7, [4*16 + input] 657 pxor xmm5, xmm7 658 movdqu xmm7, [5*16 + input] 659 pxor xmm6, xmm7 660 movdqu xmm7, [6*16 + input] 661 662 movdqu [0*16 + output], xmm0 663 movdqu [1*16 + output], xmm1 664 movdqu [2*16 + output], xmm2 665 movdqu [3*16 + output], xmm3 666 movdqu [4*16 + output], xmm4 667 movdqu [5*16 + output], xmm5 668 movdqu [6*16 + output], xmm6 669 movdqu [252 + ctx], xmm7 670 671 lea input, [7*16 + input] 672 lea output, [7*16 + output] 673 sub inputLen, 7*16 674 jmp loop7 675dec1: 676 677 movdqu xmm3, [252 + ctx] 678 679loop1: 680 cmp inputLen, 1*16 681 jb bail 682 683 movdqu xmm0, [input] 684 movdqa xmm4, xmm0 685 movdqu xmm7, [0*16 + ctx] 686 pxor xmm0, xmm7 687 688 i = 1 689 WHILE i LT rnds 690 movdqu xmm7, [i*16 + ctx] 691 aesdec xmm0, xmm7 692 i = i+1 693 ENDM 694 movdqu xmm7, [rnds*16 + ctx] 695 aesdeclast xmm0, xmm7 696 pxor xmm3, xmm0 697 698 movdqu [output], xmm3 699 movdqa xmm3, xmm4 700 701 lea input, [1*16 + input] 702 lea output, [1*16 + output] 703 sub inputLen, 1*16 704 jmp loop1 705 706bail: 707 movdqu [252 + ctx], xmm3 708 xor eax, eax 709 pop inputLen 710 ret 711ENDM 712 713ALIGN 16 714intel_aes_encrypt_cbc_128 PROC 715gen_aes_cbc_enc_func 10 716intel_aes_encrypt_cbc_128 ENDP 717 718ALIGN 16 719intel_aes_encrypt_cbc_192 PROC 720gen_aes_cbc_enc_func 12 721intel_aes_encrypt_cbc_192 ENDP 722 723ALIGN 16 724intel_aes_encrypt_cbc_256 PROC 725gen_aes_cbc_enc_func 14 726intel_aes_encrypt_cbc_256 ENDP 727 728ALIGN 16 729intel_aes_decrypt_cbc_128 PROC 730gen_aes_cbc_dec_func 10 731intel_aes_decrypt_cbc_128 ENDP 732 733ALIGN 16 734intel_aes_decrypt_cbc_192 PROC 735gen_aes_cbc_dec_func 12 736intel_aes_decrypt_cbc_192 ENDP 737 738ALIGN 16 739intel_aes_decrypt_cbc_256 PROC 740gen_aes_cbc_dec_func 14 741intel_aes_decrypt_cbc_256 ENDP 742 743 744 745ctrCtx textequ <esi> 746CTR textequ <ebx> 747 748gen_aes_ctr_func MACRO rnds 749 750LOCAL loop7 751LOCAL loop1 752LOCAL enc1 753LOCAL bail 754 755 push inputLen 756 push ctrCtx 757 push CTR 758 push ebp 759 760 mov ctrCtx, [esp + 4*5 + 0*4] 761 mov output, [esp + 4*5 + 1*4] 762 mov input, [esp + 4*5 + 4*4] 763 mov inputLen, [esp + 4*5 + 5*4] 764 765 mov ctx, [4+ctrCtx] 766 767 mov ebp, esp 768 sub esp, 7*16 769 and esp, -16 770 771 movdqu xmm0, [8+ctrCtx] 772 mov ctrCtx, [ctrCtx + 8 + 3*4] 773 bswap ctrCtx 774 movdqu xmm1, [ctx + 0*16] 775 776 pxor xmm0, xmm1 777 778 movdqa [esp + 0*16], xmm0 779 movdqa [esp + 1*16], xmm0 780 movdqa [esp + 2*16], xmm0 781 movdqa [esp + 3*16], xmm0 782 movdqa [esp + 4*16], xmm0 783 movdqa [esp + 5*16], xmm0 784 movdqa [esp + 6*16], xmm0 785 786 inc ctrCtx 787 mov CTR, ctrCtx 788 bswap CTR 789 xor CTR, [ctx + 3*4] 790 mov [esp + 1*16 + 3*4], CTR 791 792 inc ctrCtx 793 mov CTR, ctrCtx 794 bswap CTR 795 xor CTR, [ctx + 3*4] 796 mov [esp + 2*16 + 3*4], CTR 797 798 inc ctrCtx 799 mov CTR, ctrCtx 800 bswap CTR 801 xor CTR, [ctx + 3*4] 802 mov [esp + 3*16 + 3*4], CTR 803 804 inc ctrCtx 805 mov CTR, ctrCtx 806 bswap CTR 807 xor CTR, [ctx + 3*4] 808 mov [esp + 4*16 + 3*4], CTR 809 810 inc ctrCtx 811 mov CTR, ctrCtx 812 bswap CTR 813 xor CTR, [ctx + 3*4] 814 mov [esp + 5*16 + 3*4], CTR 815 816 inc ctrCtx 817 mov CTR, ctrCtx 818 bswap CTR 819 xor CTR, [ctx + 3*4] 820 mov [esp + 6*16 + 3*4], CTR 821 822 823loop7: 824 cmp inputLen, 7*16 825 jb loop1 826 827 movdqu xmm0, [0*16 + esp] 828 movdqu xmm1, [1*16 + esp] 829 movdqu xmm2, [2*16 + esp] 830 movdqu xmm3, [3*16 + esp] 831 movdqu xmm4, [4*16 + esp] 832 movdqu xmm5, [5*16 + esp] 833 movdqu xmm6, [6*16 + esp] 834 835 i = 1 836 WHILE i LE 7 837 aes_rnd i 838 839 inc ctrCtx 840 mov CTR, ctrCtx 841 bswap CTR 842 xor CTR, [ctx + 3*4] 843 mov [esp + (i-1)*16 + 3*4], CTR 844 845 i = i+1 846 ENDM 847 WHILE i LT rnds 848 aes_rnd i 849 i = i+1 850 ENDM 851 aes_last_rnd rnds 852 853 movdqu xmm7, [0*16 + input] 854 pxor xmm0, xmm7 855 movdqu xmm7, [1*16 + input] 856 pxor xmm1, xmm7 857 movdqu xmm7, [2*16 + input] 858 pxor xmm2, xmm7 859 movdqu xmm7, [3*16 + input] 860 pxor xmm3, xmm7 861 movdqu xmm7, [4*16 + input] 862 pxor xmm4, xmm7 863 movdqu xmm7, [5*16 + input] 864 pxor xmm5, xmm7 865 movdqu xmm7, [6*16 + input] 866 pxor xmm6, xmm7 867 868 movdqu [0*16 + output], xmm0 869 movdqu [1*16 + output], xmm1 870 movdqu [2*16 + output], xmm2 871 movdqu [3*16 + output], xmm3 872 movdqu [4*16 + output], xmm4 873 movdqu [5*16 + output], xmm5 874 movdqu [6*16 + output], xmm6 875 876 lea input, [7*16 + input] 877 lea output, [7*16 + output] 878 sub inputLen, 7*16 879 jmp loop7 880 881 882loop1: 883 cmp inputLen, 1*16 884 jb bail 885 886 movdqu xmm0, [esp] 887 add esp, 16 888 889 i = 1 890 WHILE i LT rnds 891 movdqu xmm7, [i*16 + ctx] 892 aesenc xmm0, xmm7 893 i = i+1 894 ENDM 895 movdqu xmm7, [rnds*16 + ctx] 896 aesenclast xmm0, xmm7 897 898 movdqu xmm7, [input] 899 pxor xmm0, xmm7 900 movdqu [output], xmm0 901 902 lea input, [1*16 + input] 903 lea output, [1*16 + output] 904 sub inputLen, 1*16 905 jmp loop1 906 907bail: 908 909 mov ctrCtx, [ebp + 4*5 + 0*4] 910 movdqu xmm0, [esp] 911 movdqu xmm1, [ctx + 0*16] 912 pxor xmm0, xmm1 913 movdqu [8+ctrCtx], xmm0 914 915 916 xor eax, eax 917 mov esp, ebp 918 pop ebp 919 pop CTR 920 pop ctrCtx 921 pop inputLen 922 ret 923ENDM 924 925 926ALIGN 16 927intel_aes_encrypt_ctr_128 PROC 928gen_aes_ctr_func 10 929intel_aes_encrypt_ctr_128 ENDP 930 931ALIGN 16 932intel_aes_encrypt_ctr_192 PROC 933gen_aes_ctr_func 12 934intel_aes_encrypt_ctr_192 ENDP 935 936ALIGN 16 937intel_aes_encrypt_ctr_256 PROC 938gen_aes_ctr_func 14 939intel_aes_encrypt_ctr_256 ENDP 940 941 942END 943