1; LICENSE: 2; This submission to NSS is to be made available under the terms of the 3; Mozilla Public License, v. 2.0. You can obtain one at http: 4; //mozilla.org/MPL/2.0/. 5;############################################################################### 6; Copyright(c) 2014, Intel Corp. 7; Developers and authors: 8; Shay Gueron and Vlad Krasnov 9; Intel Corporation, Israel Development Centre, Haifa, Israel 10; Please send feedback directly to crypto.feedback.alias@intel.com 11 12 13.DATA 14ALIGN 16 15Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh 16Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h 17Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh 18Lcon1 dd 1,1,1,1 19Lcon2 dd 1bh,1bh,1bh,1bh 20 21.CODE 22 23ctx textequ <rcx> 24output textequ <rdx> 25input textequ <r8> 26inputLen textequ <r9d> 27 28 29aes_rnd MACRO i 30 movdqu xmm8, [i*16 + ctx] 31 aesenc xmm0, xmm8 32 aesenc xmm1, xmm8 33 aesenc xmm2, xmm8 34 aesenc xmm3, xmm8 35 aesenc xmm4, xmm8 36 aesenc xmm5, xmm8 37 aesenc xmm6, xmm8 38 aesenc xmm7, xmm8 39 ENDM 40 41aes_last_rnd MACRO i 42 movdqu xmm8, [i*16 + ctx] 43 aesenclast xmm0, xmm8 44 aesenclast xmm1, xmm8 45 aesenclast xmm2, xmm8 46 aesenclast xmm3, xmm8 47 aesenclast xmm4, xmm8 48 aesenclast xmm5, xmm8 49 aesenclast xmm6, xmm8 50 aesenclast xmm7, xmm8 51 ENDM 52 53aes_dec_rnd MACRO i 54 movdqu xmm8, [i*16 + ctx] 55 aesdec xmm0, xmm8 56 aesdec xmm1, xmm8 57 aesdec xmm2, xmm8 58 aesdec xmm3, xmm8 59 aesdec xmm4, xmm8 60 aesdec xmm5, xmm8 61 aesdec xmm6, xmm8 62 aesdec xmm7, xmm8 63 ENDM 64 65aes_dec_last_rnd MACRO i 66 movdqu xmm8, [i*16 + ctx] 67 aesdeclast xmm0, xmm8 68 aesdeclast xmm1, xmm8 69 aesdeclast xmm2, xmm8 70 aesdeclast xmm3, xmm8 71 aesdeclast xmm4, xmm8 72 aesdeclast xmm5, xmm8 73 aesdeclast xmm6, xmm8 74 aesdeclast xmm7, xmm8 75 ENDM 76 77 78gen_aes_ecb_func MACRO enc, rnds 79 80LOCAL loop8 81LOCAL loop1 82LOCAL bail 83 84 xor inputLen, inputLen 85 mov input, [rsp + 1*8 + 8*4] 86 mov inputLen, [rsp + 1*8 + 8*5] 87 88 sub rsp, 3*16 89 90 movdqu [rsp + 0*16], xmm6 91 movdqu [rsp + 1*16], xmm7 92 movdqu [rsp + 2*16], xmm8 93 94loop8: 95 cmp inputLen, 8*16 96 jb loop1 97 98 movdqu xmm0, [0*16 + input] 99 movdqu xmm1, [1*16 + input] 100 movdqu xmm2, [2*16 + input] 101 movdqu xmm3, [3*16 + input] 102 movdqu xmm4, [4*16 + input] 103 movdqu xmm5, [5*16 + input] 104 movdqu xmm6, [6*16 + input] 105 movdqu xmm7, [7*16 + input] 106 107 movdqu xmm8, [0*16 + ctx] 108 pxor xmm0, xmm8 109 pxor xmm1, xmm8 110 pxor xmm2, xmm8 111 pxor xmm3, xmm8 112 pxor xmm4, xmm8 113 pxor xmm5, xmm8 114 pxor xmm6, xmm8 115 pxor xmm7, xmm8 116 117IF enc eq 1 118 rnd textequ <aes_rnd> 119 lastrnd textequ <aes_last_rnd> 120 aesinst textequ <aesenc> 121 aeslastinst textequ <aesenclast> 122ELSE 123 rnd textequ <aes_dec_rnd> 124 lastrnd textequ <aes_dec_last_rnd> 125 aesinst textequ <aesdec> 126 aeslastinst textequ <aesdeclast> 127ENDIF 128 129 i = 1 130 WHILE i LT rnds 131 rnd i 132 i = i+1 133 ENDM 134 lastrnd rnds 135 136 movdqu [0*16 + output], xmm0 137 movdqu [1*16 + output], xmm1 138 movdqu [2*16 + output], xmm2 139 movdqu [3*16 + output], xmm3 140 movdqu [4*16 + output], xmm4 141 movdqu [5*16 + output], xmm5 142 movdqu [6*16 + output], xmm6 143 movdqu [7*16 + output], xmm7 144 145 lea input, [8*16 + input] 146 lea output, [8*16 + output] 147 sub inputLen, 8*16 148 jmp loop8 149 150loop1: 151 cmp inputLen, 1*16 152 jb bail 153 154 movdqu xmm0, [input] 155 movdqu xmm7, [0*16 + ctx] 156 pxor xmm0, xmm7 157 158 i = 1 159 WHILE i LT rnds 160 movdqu xmm7, [i*16 + ctx] 161 aesinst xmm0, xmm7 162 i = i+1 163 ENDM 164 movdqu xmm7, [rnds*16 + ctx] 165 aeslastinst xmm0, xmm7 166 167 movdqu [output], xmm0 168 169 lea input, [1*16 + input] 170 lea output, [1*16 + output] 171 sub inputLen, 1*16 172 jmp loop1 173 174bail: 175 xor rax, rax 176 177 movdqu xmm6, [rsp + 0*16] 178 movdqu xmm7, [rsp + 1*16] 179 movdqu xmm8, [rsp + 2*16] 180 add rsp, 3*16 181 ret 182ENDM 183 184intel_aes_encrypt_ecb_128 PROC 185gen_aes_ecb_func 1, 10 186intel_aes_encrypt_ecb_128 ENDP 187 188intel_aes_encrypt_ecb_192 PROC 189gen_aes_ecb_func 1, 12 190intel_aes_encrypt_ecb_192 ENDP 191 192intel_aes_encrypt_ecb_256 PROC 193gen_aes_ecb_func 1, 14 194intel_aes_encrypt_ecb_256 ENDP 195 196intel_aes_decrypt_ecb_128 PROC 197gen_aes_ecb_func 0, 10 198intel_aes_decrypt_ecb_128 ENDP 199 200intel_aes_decrypt_ecb_192 PROC 201gen_aes_ecb_func 0, 12 202intel_aes_decrypt_ecb_192 ENDP 203 204intel_aes_decrypt_ecb_256 PROC 205gen_aes_ecb_func 0, 14 206intel_aes_decrypt_ecb_256 ENDP 207 208 209KEY textequ <rcx> 210KS textequ <rdx> 211ITR textequ <r8> 212 213intel_aes_encrypt_init_128 PROC 214 215 movdqu xmm1, [KEY] 216 movdqu [KS], xmm1 217 movdqa xmm2, xmm1 218 219 lea ITR, Lcon1 220 movdqa xmm0, [ITR] 221 lea ITR, Lmask 222 movdqa xmm4, [ITR] 223 224 mov ITR, 8 225 226Lenc_128_ks_loop: 227 lea KS, [16 + KS] 228 dec ITR 229 230 pshufb xmm2, xmm4 231 aesenclast xmm2, xmm0 232 pslld xmm0, 1 233 movdqa xmm3, xmm1 234 pslldq xmm3, 4 235 pxor xmm1, xmm3 236 pslldq xmm3, 4 237 pxor xmm1, xmm3 238 pslldq xmm3, 4 239 pxor xmm1, xmm3 240 pxor xmm1, xmm2 241 movdqu [KS], xmm1 242 movdqa xmm2, xmm1 243 244 jne Lenc_128_ks_loop 245 246 lea ITR, Lcon2 247 movdqa xmm0, [ITR] 248 249 pshufb xmm2, xmm4 250 aesenclast xmm2, xmm0 251 pslld xmm0, 1 252 movdqa xmm3, xmm1 253 pslldq xmm3, 4 254 pxor xmm1, xmm3 255 pslldq xmm3, 4 256 pxor xmm1, xmm3 257 pslldq xmm3, 4 258 pxor xmm1, xmm3 259 pxor xmm1, xmm2 260 movdqu [16 + KS], xmm1 261 movdqa xmm2, xmm1 262 263 pshufb xmm2, xmm4 264 aesenclast xmm2, xmm0 265 movdqa xmm3, xmm1 266 pslldq xmm3, 4 267 pxor xmm1, xmm3 268 pslldq xmm3, 4 269 pxor xmm1, xmm3 270 pslldq xmm3, 4 271 pxor xmm1, xmm3 272 pxor xmm1, xmm2 273 movdqu [32 + KS], xmm1 274 movdqa xmm2, xmm1 275 276 ret 277intel_aes_encrypt_init_128 ENDP 278 279 280intel_aes_decrypt_init_128 PROC 281 282 push KS 283 push KEY 284 285 call intel_aes_encrypt_init_128 286 287 pop KEY 288 pop KS 289 290 movdqu xmm0, [0*16 + KS] 291 movdqu xmm1, [10*16 + KS] 292 movdqu [10*16 + KS], xmm0 293 movdqu [0*16 + KS], xmm1 294 295 i = 1 296 WHILE i LT 5 297 movdqu xmm0, [i*16 + KS] 298 movdqu xmm1, [(10-i)*16 + KS] 299 300 aesimc xmm0, xmm0 301 aesimc xmm1, xmm1 302 303 movdqu [(10-i)*16 + KS], xmm0 304 movdqu [i*16 + KS], xmm1 305 306 i = i+1 307 ENDM 308 309 movdqu xmm0, [5*16 + KS] 310 aesimc xmm0, xmm0 311 movdqu [5*16 + KS], xmm0 312 ret 313intel_aes_decrypt_init_128 ENDP 314 315 316intel_aes_encrypt_init_192 PROC 317 318 sub rsp, 16*2 319 movdqu [16*0 + rsp], xmm6 320 movdqu [16*1 + rsp], xmm7 321 322 movdqu xmm1, [KEY] 323 mov ITR, [16 + KEY] 324 movd xmm3, ITR 325 326 movdqu [KS], xmm1 327 movdqa xmm5, xmm3 328 329 lea ITR, Lcon1 330 movdqu xmm0, [ITR] 331 lea ITR, Lmask192 332 movdqu xmm4, [ITR] 333 334 mov ITR, 4 335 336Lenc_192_ks_loop: 337 movdqa xmm2, xmm3 338 pshufb xmm2, xmm4 339 aesenclast xmm2, xmm0 340 pslld xmm0, 1 341 342 movdqa xmm6, xmm1 343 movdqa xmm7, xmm3 344 pslldq xmm6, 4 345 pslldq xmm7, 4 346 pxor xmm1, xmm6 347 pxor xmm3, xmm7 348 pslldq xmm6, 4 349 pxor xmm1, xmm6 350 pslldq xmm6, 4 351 pxor xmm1, xmm6 352 pxor xmm1, xmm2 353 pshufd xmm2, xmm1, 0ffh 354 pxor xmm3, xmm2 355 356 movdqa xmm6, xmm1 357 shufpd xmm5, xmm1, 00h 358 shufpd xmm6, xmm3, 01h 359 360 movdqu [16 + KS], xmm5 361 movdqu [32 + KS], xmm6 362 363 movdqa xmm2, xmm3 364 pshufb xmm2, xmm4 365 aesenclast xmm2, xmm0 366 pslld xmm0, 1 367 368 movdqa xmm6, xmm1 369 movdqa xmm7, xmm3 370 pslldq xmm6, 4 371 pslldq xmm7, 4 372 pxor xmm1, xmm6 373 pxor xmm3, xmm7 374 pslldq xmm6, 4 375 pxor xmm1, xmm6 376 pslldq xmm6, 4 377 pxor xmm1, xmm6 378 pxor xmm1, xmm2 379 pshufd xmm2, xmm1, 0ffh 380 pxor xmm3, xmm2 381 382 movdqu [48 + KS], xmm1 383 movdqa xmm5, xmm3 384 385 lea KS, [48 + KS] 386 387 dec ITR 388 jnz Lenc_192_ks_loop 389 390 movdqu [16 + KS], xmm5 391 392 movdqu xmm7, [16*1 + rsp] 393 movdqu xmm6, [16*0 + rsp] 394 add rsp, 16*2 395 ret 396intel_aes_encrypt_init_192 ENDP 397 398intel_aes_decrypt_init_192 PROC 399 push KS 400 push KEY 401 402 call intel_aes_encrypt_init_192 403 404 pop KEY 405 pop KS 406 407 movdqu xmm0, [0*16 + KS] 408 movdqu xmm1, [12*16 + KS] 409 movdqu [12*16 + KS], xmm0 410 movdqu [0*16 + KS], xmm1 411 412 i = 1 413 WHILE i LT 6 414 movdqu xmm0, [i*16 + KS] 415 movdqu xmm1, [(12-i)*16 + KS] 416 417 aesimc xmm0, xmm0 418 aesimc xmm1, xmm1 419 420 movdqu [(12-i)*16 + KS], xmm0 421 movdqu [i*16 + KS], xmm1 422 423 i = i+1 424 ENDM 425 426 movdqu xmm0, [6*16 + KS] 427 aesimc xmm0, xmm0 428 movdqu [6*16 + KS], xmm0 429 ret 430intel_aes_decrypt_init_192 ENDP 431 432 433intel_aes_encrypt_init_256 PROC 434 sub rsp, 16*2 435 movdqu [16*0 + rsp], xmm6 436 movdqu [16*1 + rsp], xmm7 437 438 movdqu xmm1, [16*0 + KEY] 439 movdqu xmm3, [16*1 + KEY] 440 441 movdqu [16*0 + KS], xmm1 442 movdqu [16*1 + KS], xmm3 443 444 lea ITR, Lcon1 445 movdqu xmm0, [ITR] 446 lea ITR, Lmask256 447 movdqu xmm5, [ITR] 448 449 pxor xmm6, xmm6 450 451 mov ITR, 6 452 453Lenc_256_ks_loop: 454 455 movdqa xmm2, xmm3 456 pshufb xmm2, xmm5 457 aesenclast xmm2, xmm0 458 pslld xmm0, 1 459 movdqa xmm4, xmm1 460 pslldq xmm4, 4 461 pxor xmm1, xmm4 462 pslldq xmm4, 4 463 pxor xmm1, xmm4 464 pslldq xmm4, 4 465 pxor xmm1, xmm4 466 pxor xmm1, xmm2 467 movdqu [16*2 + KS], xmm1 468 469 pshufd xmm2, xmm1, 0ffh 470 aesenclast xmm2, xmm6 471 movdqa xmm4, xmm3 472 pslldq xmm4, 4 473 pxor xmm3, xmm4 474 pslldq xmm4, 4 475 pxor xmm3, xmm4 476 pslldq xmm4, 4 477 pxor xmm3, xmm4 478 pxor xmm3, xmm2 479 movdqu [16*3 + KS], xmm3 480 481 lea KS, [32 + KS] 482 dec ITR 483 jnz Lenc_256_ks_loop 484 485 movdqa xmm2, xmm3 486 pshufb xmm2, xmm5 487 aesenclast xmm2, xmm0 488 movdqa xmm4, xmm1 489 pslldq xmm4, 4 490 pxor xmm1, xmm4 491 pslldq xmm4, 4 492 pxor xmm1, xmm4 493 pslldq xmm4, 4 494 pxor xmm1, xmm4 495 pxor xmm1, xmm2 496 movdqu [16*2 + KS], xmm1 497 498 movdqu xmm7, [16*1 + rsp] 499 movdqu xmm6, [16*0 + rsp] 500 add rsp, 16*2 501 ret 502 503intel_aes_encrypt_init_256 ENDP 504 505 506intel_aes_decrypt_init_256 PROC 507 push KS 508 push KEY 509 510 call intel_aes_encrypt_init_256 511 512 pop KEY 513 pop KS 514 515 movdqu xmm0, [0*16 + KS] 516 movdqu xmm1, [14*16 + KS] 517 movdqu [14*16 + KS], xmm0 518 movdqu [0*16 + KS], xmm1 519 520 i = 1 521 WHILE i LT 7 522 movdqu xmm0, [i*16 + KS] 523 movdqu xmm1, [(14-i)*16 + KS] 524 525 aesimc xmm0, xmm0 526 aesimc xmm1, xmm1 527 528 movdqu [(14-i)*16 + KS], xmm0 529 movdqu [i*16 + KS], xmm1 530 531 i = i+1 532 ENDM 533 534 movdqu xmm0, [7*16 + KS] 535 aesimc xmm0, xmm0 536 movdqu [7*16 + KS], xmm0 537 ret 538intel_aes_decrypt_init_256 ENDP 539 540 541 542gen_aes_cbc_enc_func MACRO rnds 543 544LOCAL loop1 545LOCAL bail 546 547 mov input, [rsp + 1*8 + 8*4] 548 mov inputLen, [rsp + 1*8 + 8*5] 549 550 sub rsp, 3*16 551 552 movdqu [rsp + 0*16], xmm6 553 movdqu [rsp + 1*16], xmm7 554 movdqu [rsp + 2*16], xmm8 555 556 movdqu xmm0, [256+ctx] 557 558 movdqu xmm2, [0*16 + ctx] 559 movdqu xmm3, [1*16 + ctx] 560 movdqu xmm4, [2*16 + ctx] 561 movdqu xmm5, [3*16 + ctx] 562 movdqu xmm6, [4*16 + ctx] 563 movdqu xmm7, [5*16 + ctx] 564 565loop1: 566 cmp inputLen, 1*16 567 jb bail 568 569 movdqu xmm1, [input] 570 pxor xmm1, xmm2 571 pxor xmm0, xmm1 572 573 aesenc xmm0, xmm3 574 aesenc xmm0, xmm4 575 aesenc xmm0, xmm5 576 aesenc xmm0, xmm6 577 aesenc xmm0, xmm7 578 579 i = 6 580 WHILE i LT rnds 581 movdqu xmm8, [i*16 + ctx] 582 aesenc xmm0, xmm8 583 i = i+1 584 ENDM 585 movdqu xmm8, [rnds*16 + ctx] 586 aesenclast xmm0, xmm8 587 588 movdqu [output], xmm0 589 590 lea input, [1*16 + input] 591 lea output, [1*16 + output] 592 sub inputLen, 1*16 593 jmp loop1 594 595bail: 596 movdqu [256+ctx], xmm0 597 598 xor rax, rax 599 600 movdqu xmm6, [rsp + 0*16] 601 movdqu xmm7, [rsp + 1*16] 602 movdqu xmm8, [rsp + 2*16] 603 add rsp, 3*16 604 ret 605 606ENDM 607 608gen_aes_cbc_dec_func MACRO rnds 609 610LOCAL loop8 611LOCAL loop1 612LOCAL dec1 613LOCAL bail 614 615 mov input, [rsp + 1*8 + 8*4] 616 mov inputLen, [rsp + 1*8 + 8*5] 617 618 sub rsp, 3*16 619 620 movdqu [rsp + 0*16], xmm6 621 movdqu [rsp + 1*16], xmm7 622 movdqu [rsp + 2*16], xmm8 623 624loop8: 625 cmp inputLen, 8*16 626 jb dec1 627 628 movdqu xmm0, [0*16 + input] 629 movdqu xmm1, [1*16 + input] 630 movdqu xmm2, [2*16 + input] 631 movdqu xmm3, [3*16 + input] 632 movdqu xmm4, [4*16 + input] 633 movdqu xmm5, [5*16 + input] 634 movdqu xmm6, [6*16 + input] 635 movdqu xmm7, [7*16 + input] 636 637 movdqu xmm8, [0*16 + ctx] 638 pxor xmm0, xmm8 639 pxor xmm1, xmm8 640 pxor xmm2, xmm8 641 pxor xmm3, xmm8 642 pxor xmm4, xmm8 643 pxor xmm5, xmm8 644 pxor xmm6, xmm8 645 pxor xmm7, xmm8 646 647 i = 1 648 WHILE i LT rnds 649 aes_dec_rnd i 650 i = i+1 651 ENDM 652 aes_dec_last_rnd rnds 653 654 movdqu xmm8, [256 + ctx] 655 pxor xmm0, xmm8 656 movdqu xmm8, [0*16 + input] 657 pxor xmm1, xmm8 658 movdqu xmm8, [1*16 + input] 659 pxor xmm2, xmm8 660 movdqu xmm8, [2*16 + input] 661 pxor xmm3, xmm8 662 movdqu xmm8, [3*16 + input] 663 pxor xmm4, xmm8 664 movdqu xmm8, [4*16 + input] 665 pxor xmm5, xmm8 666 movdqu xmm8, [5*16 + input] 667 pxor xmm6, xmm8 668 movdqu xmm8, [6*16 + input] 669 pxor xmm7, xmm8 670 movdqu xmm8, [7*16 + input] 671 672 movdqu [0*16 + output], xmm0 673 movdqu [1*16 + output], xmm1 674 movdqu [2*16 + output], xmm2 675 movdqu [3*16 + output], xmm3 676 movdqu [4*16 + output], xmm4 677 movdqu [5*16 + output], xmm5 678 movdqu [6*16 + output], xmm6 679 movdqu [7*16 + output], xmm7 680 movdqu [256 + ctx], xmm8 681 682 lea input, [8*16 + input] 683 lea output, [8*16 + output] 684 sub inputLen, 8*16 685 jmp loop8 686dec1: 687 688 movdqu xmm3, [256 + ctx] 689 690loop1: 691 cmp inputLen, 1*16 692 jb bail 693 694 movdqu xmm0, [input] 695 movdqa xmm4, xmm0 696 movdqu xmm7, [0*16 + ctx] 697 pxor xmm0, xmm7 698 699 i = 1 700 WHILE i LT rnds 701 movdqu xmm7, [i*16 + ctx] 702 aesdec xmm0, xmm7 703 i = i+1 704 ENDM 705 movdqu xmm7, [rnds*16 + ctx] 706 aesdeclast xmm0, xmm7 707 pxor xmm3, xmm0 708 709 movdqu [output], xmm3 710 movdqa xmm3, xmm4 711 712 lea input, [1*16 + input] 713 lea output, [1*16 + output] 714 sub inputLen, 1*16 715 jmp loop1 716 717bail: 718 movdqu [256 + ctx], xmm3 719 xor rax, rax 720 721 movdqu xmm6, [rsp + 0*16] 722 movdqu xmm7, [rsp + 1*16] 723 movdqu xmm8, [rsp + 2*16] 724 add rsp, 3*16 725 ret 726ENDM 727 728intel_aes_encrypt_cbc_128 PROC 729gen_aes_cbc_enc_func 10 730intel_aes_encrypt_cbc_128 ENDP 731 732intel_aes_encrypt_cbc_192 PROC 733gen_aes_cbc_enc_func 12 734intel_aes_encrypt_cbc_192 ENDP 735 736intel_aes_encrypt_cbc_256 PROC 737gen_aes_cbc_enc_func 14 738intel_aes_encrypt_cbc_256 ENDP 739 740intel_aes_decrypt_cbc_128 PROC 741gen_aes_cbc_dec_func 10 742intel_aes_decrypt_cbc_128 ENDP 743 744intel_aes_decrypt_cbc_192 PROC 745gen_aes_cbc_dec_func 12 746intel_aes_decrypt_cbc_192 ENDP 747 748intel_aes_decrypt_cbc_256 PROC 749gen_aes_cbc_dec_func 14 750intel_aes_decrypt_cbc_256 ENDP 751 752 753 754ctrCtx textequ <r10> 755CTR textequ <r11d> 756CTRSave textequ <eax> 757 758gen_aes_ctr_func MACRO rnds 759 760LOCAL loop8 761LOCAL loop1 762LOCAL enc1 763LOCAL bail 764 765 mov input, [rsp + 8*1 + 4*8] 766 mov inputLen, [rsp + 8*1 + 5*8] 767 768 mov ctrCtx, ctx 769 mov ctx, [8+ctrCtx] 770 771 sub rsp, 3*16 772 movdqu [rsp + 0*16], xmm6 773 movdqu [rsp + 1*16], xmm7 774 movdqu [rsp + 2*16], xmm8 775 776 777 push rbp 778 mov rbp, rsp 779 sub rsp, 8*16 780 and rsp, -16 781 782 783 movdqu xmm0, [16+ctrCtx] 784 mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4] 785 bswap CTRSave 786 movdqu xmm1, [ctx + 0*16] 787 788 pxor xmm0, xmm1 789 790 movdqa [rsp + 0*16], xmm0 791 movdqa [rsp + 1*16], xmm0 792 movdqa [rsp + 2*16], xmm0 793 movdqa [rsp + 3*16], xmm0 794 movdqa [rsp + 4*16], xmm0 795 movdqa [rsp + 5*16], xmm0 796 movdqa [rsp + 6*16], xmm0 797 movdqa [rsp + 7*16], xmm0 798 799 inc CTRSave 800 mov CTR, CTRSave 801 bswap CTR 802 xor CTR, DWORD PTR [ctx + 3*4] 803 mov DWORD PTR [rsp + 1*16 + 3*4], CTR 804 805 inc CTRSave 806 mov CTR, CTRSave 807 bswap CTR 808 xor CTR, DWORD PTR [ctx + 3*4] 809 mov DWORD PTR [rsp + 2*16 + 3*4], CTR 810 811 inc CTRSave 812 mov CTR, CTRSave 813 bswap CTR 814 xor CTR, DWORD PTR [ctx + 3*4] 815 mov DWORD PTR [rsp + 3*16 + 3*4], CTR 816 817 inc CTRSave 818 mov CTR, CTRSave 819 bswap CTR 820 xor CTR, DWORD PTR [ctx + 3*4] 821 mov DWORD PTR [rsp + 4*16 + 3*4], CTR 822 823 inc CTRSave 824 mov CTR, CTRSave 825 bswap CTR 826 xor CTR, DWORD PTR [ctx + 3*4] 827 mov DWORD PTR [rsp + 5*16 + 3*4], CTR 828 829 inc CTRSave 830 mov CTR, CTRSave 831 bswap CTR 832 xor CTR, DWORD PTR [ctx + 3*4] 833 mov DWORD PTR [rsp + 6*16 + 3*4], CTR 834 835 inc CTRSave 836 mov CTR, CTRSave 837 bswap CTR 838 xor CTR, DWORD PTR [ctx + 3*4] 839 mov DWORD PTR [rsp + 7*16 + 3*4], CTR 840 841 842loop8: 843 cmp inputLen, 8*16 844 jb loop1 845 846 movdqu xmm0, [0*16 + rsp] 847 movdqu xmm1, [1*16 + rsp] 848 movdqu xmm2, [2*16 + rsp] 849 movdqu xmm3, [3*16 + rsp] 850 movdqu xmm4, [4*16 + rsp] 851 movdqu xmm5, [5*16 + rsp] 852 movdqu xmm6, [6*16 + rsp] 853 movdqu xmm7, [7*16 + rsp] 854 855 i = 1 856 WHILE i LE 8 857 aes_rnd i 858 859 inc CTRSave 860 mov CTR, CTRSave 861 bswap CTR 862 xor CTR, DWORD PTR [ctx + 3*4] 863 mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR 864 865 i = i+1 866 ENDM 867 WHILE i LT rnds 868 aes_rnd i 869 i = i+1 870 ENDM 871 aes_last_rnd rnds 872 873 movdqu xmm8, [0*16 + input] 874 pxor xmm0, xmm8 875 movdqu xmm8, [1*16 + input] 876 pxor xmm1, xmm8 877 movdqu xmm8, [2*16 + input] 878 pxor xmm2, xmm8 879 movdqu xmm8, [3*16 + input] 880 pxor xmm3, xmm8 881 movdqu xmm8, [4*16 + input] 882 pxor xmm4, xmm8 883 movdqu xmm8, [5*16 + input] 884 pxor xmm5, xmm8 885 movdqu xmm8, [6*16 + input] 886 pxor xmm6, xmm8 887 movdqu xmm8, [7*16 + input] 888 pxor xmm7, xmm8 889 890 movdqu [0*16 + output], xmm0 891 movdqu [1*16 + output], xmm1 892 movdqu [2*16 + output], xmm2 893 movdqu [3*16 + output], xmm3 894 movdqu [4*16 + output], xmm4 895 movdqu [5*16 + output], xmm5 896 movdqu [6*16 + output], xmm6 897 movdqu [7*16 + output], xmm7 898 899 lea input, [8*16 + input] 900 lea output, [8*16 + output] 901 sub inputLen, 8*16 902 jmp loop8 903 904 905loop1: 906 cmp inputLen, 1*16 907 jb bail 908 909 movdqu xmm0, [rsp] 910 add rsp, 16 911 912 i = 1 913 WHILE i LT rnds 914 movdqu xmm7, [i*16 + ctx] 915 aesenc xmm0, xmm7 916 i = i+1 917 ENDM 918 movdqu xmm7, [rnds*16 + ctx] 919 aesenclast xmm0, xmm7 920 921 movdqu xmm7, [input] 922 pxor xmm0, xmm7 923 movdqu [output], xmm0 924 925 lea input, [1*16 + input] 926 lea output, [1*16 + output] 927 sub inputLen, 1*16 928 jmp loop1 929 930bail: 931 932 movdqu xmm0, [rsp] 933 movdqu xmm1, [ctx + 0*16] 934 pxor xmm0, xmm1 935 movdqu [16+ctrCtx], xmm0 936 937 938 xor rax, rax 939 mov rsp, rbp 940 pop rbp 941 942 movdqu xmm6, [rsp + 0*16] 943 movdqu xmm7, [rsp + 1*16] 944 movdqu xmm8, [rsp + 2*16] 945 add rsp, 3*16 946 947 ret 948ENDM 949 950 951intel_aes_encrypt_ctr_128 PROC 952gen_aes_ctr_func 10 953intel_aes_encrypt_ctr_128 ENDP 954 955intel_aes_encrypt_ctr_192 PROC 956gen_aes_ctr_func 12 957intel_aes_encrypt_ctr_192 ENDP 958 959intel_aes_encrypt_ctr_256 PROC 960gen_aes_ctr_func 14 961intel_aes_encrypt_ctr_256 ENDP 962 963 964END 965