1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8section .text code align=64 9 10EXTERN GFp_ia32cap_P 11global GFp_aes_hw_encrypt 12 13ALIGN 16 14GFp_aes_hw_encrypt: 15 16 movups xmm2,XMMWORD[rcx] 17 mov eax,DWORD[240+r8] 18 movups xmm0,XMMWORD[r8] 19 movups xmm1,XMMWORD[16+r8] 20 lea r8,[32+r8] 21 xorps xmm2,xmm0 22$L$oop_enc1_1: 23DB 102,15,56,220,209 24 dec eax 25 movups xmm1,XMMWORD[r8] 26 lea r8,[16+r8] 27 jnz NEAR $L$oop_enc1_1 28DB 102,15,56,221,209 29 pxor xmm0,xmm0 30 pxor xmm1,xmm1 31 movups XMMWORD[rdx],xmm2 32 pxor xmm2,xmm2 33 DB 0F3h,0C3h ;repret 34 35 36 37ALIGN 16 38_aesni_encrypt2: 39 40 movups xmm0,XMMWORD[rcx] 41 shl eax,4 42 movups xmm1,XMMWORD[16+rcx] 43 xorps xmm2,xmm0 44 xorps xmm3,xmm0 45 movups xmm0,XMMWORD[32+rcx] 46 lea rcx,[32+rax*1+rcx] 47 neg rax 48 add rax,16 49 50$L$enc_loop2: 51DB 102,15,56,220,209 52DB 102,15,56,220,217 53 movups xmm1,XMMWORD[rax*1+rcx] 54 add rax,32 55DB 102,15,56,220,208 56DB 102,15,56,220,216 57 movups xmm0,XMMWORD[((-16))+rax*1+rcx] 58 jnz NEAR $L$enc_loop2 59 60DB 102,15,56,220,209 61DB 102,15,56,220,217 62DB 102,15,56,221,208 63DB 102,15,56,221,216 64 DB 0F3h,0C3h ;repret 65 66 67 68ALIGN 16 69_aesni_encrypt3: 70 71 movups xmm0,XMMWORD[rcx] 72 shl eax,4 73 movups xmm1,XMMWORD[16+rcx] 74 xorps xmm2,xmm0 75 xorps xmm3,xmm0 76 xorps xmm4,xmm0 77 movups xmm0,XMMWORD[32+rcx] 78 lea rcx,[32+rax*1+rcx] 79 neg rax 80 add rax,16 81 82$L$enc_loop3: 83DB 102,15,56,220,209 84DB 102,15,56,220,217 85DB 102,15,56,220,225 86 movups xmm1,XMMWORD[rax*1+rcx] 87 add rax,32 88DB 102,15,56,220,208 89DB 102,15,56,220,216 90DB 102,15,56,220,224 91 movups xmm0,XMMWORD[((-16))+rax*1+rcx] 92 jnz NEAR $L$enc_loop3 93 94DB 102,15,56,220,209 95DB 102,15,56,220,217 96DB 102,15,56,220,225 97DB 102,15,56,221,208 98DB 102,15,56,221,216 99DB 102,15,56,221,224 100 DB 0F3h,0C3h ;repret 101 102 103 104ALIGN 16 105_aesni_encrypt4: 106 107 movups xmm0,XMMWORD[rcx] 108 shl eax,4 109 movups xmm1,XMMWORD[16+rcx] 110 xorps xmm2,xmm0 111 xorps xmm3,xmm0 112 xorps xmm4,xmm0 113 xorps xmm5,xmm0 114 movups xmm0,XMMWORD[32+rcx] 115 lea rcx,[32+rax*1+rcx] 116 neg rax 117DB 0x0f,0x1f,0x00 118 add rax,16 119 120$L$enc_loop4: 121DB 102,15,56,220,209 122DB 102,15,56,220,217 123DB 102,15,56,220,225 124DB 102,15,56,220,233 125 movups xmm1,XMMWORD[rax*1+rcx] 126 add rax,32 127DB 102,15,56,220,208 128DB 102,15,56,220,216 129DB 102,15,56,220,224 130DB 102,15,56,220,232 131 movups xmm0,XMMWORD[((-16))+rax*1+rcx] 132 jnz NEAR $L$enc_loop4 133 134DB 102,15,56,220,209 135DB 102,15,56,220,217 136DB 102,15,56,220,225 137DB 102,15,56,220,233 138DB 102,15,56,221,208 139DB 102,15,56,221,216 140DB 102,15,56,221,224 141DB 102,15,56,221,232 142 DB 0F3h,0C3h ;repret 143 144 145 146ALIGN 16 147_aesni_encrypt6: 148 149 movups xmm0,XMMWORD[rcx] 150 shl eax,4 151 movups xmm1,XMMWORD[16+rcx] 152 xorps xmm2,xmm0 153 pxor xmm3,xmm0 154 pxor xmm4,xmm0 155DB 102,15,56,220,209 156 lea rcx,[32+rax*1+rcx] 157 neg rax 158DB 102,15,56,220,217 159 pxor xmm5,xmm0 160 pxor xmm6,xmm0 161DB 102,15,56,220,225 162 pxor xmm7,xmm0 163 movups xmm0,XMMWORD[rax*1+rcx] 164 add rax,16 165 jmp NEAR $L$enc_loop6_enter 166ALIGN 16 167$L$enc_loop6: 168DB 102,15,56,220,209 169DB 102,15,56,220,217 170DB 102,15,56,220,225 171$L$enc_loop6_enter: 172DB 102,15,56,220,233 173DB 102,15,56,220,241 174DB 102,15,56,220,249 175 movups xmm1,XMMWORD[rax*1+rcx] 176 add rax,32 177DB 102,15,56,220,208 178DB 102,15,56,220,216 179DB 102,15,56,220,224 180DB 102,15,56,220,232 181DB 102,15,56,220,240 182DB 102,15,56,220,248 183 movups xmm0,XMMWORD[((-16))+rax*1+rcx] 184 jnz NEAR $L$enc_loop6 185 186DB 102,15,56,220,209 187DB 102,15,56,220,217 188DB 102,15,56,220,225 189DB 102,15,56,220,233 190DB 102,15,56,220,241 191DB 102,15,56,220,249 192DB 102,15,56,221,208 193DB 102,15,56,221,216 194DB 102,15,56,221,224 195DB 102,15,56,221,232 196DB 102,15,56,221,240 197DB 102,15,56,221,248 198 DB 0F3h,0C3h ;repret 199 200 201 202ALIGN 16 203_aesni_encrypt8: 204 205 movups xmm0,XMMWORD[rcx] 206 shl eax,4 207 movups xmm1,XMMWORD[16+rcx] 208 xorps xmm2,xmm0 209 xorps xmm3,xmm0 210 pxor xmm4,xmm0 211 pxor xmm5,xmm0 212 pxor xmm6,xmm0 213 lea rcx,[32+rax*1+rcx] 214 neg rax 215DB 102,15,56,220,209 216 pxor xmm7,xmm0 217 pxor xmm8,xmm0 218DB 102,15,56,220,217 219 pxor xmm9,xmm0 220 movups xmm0,XMMWORD[rax*1+rcx] 221 add rax,16 222 jmp NEAR $L$enc_loop8_inner 223ALIGN 16 224$L$enc_loop8: 225DB 102,15,56,220,209 226DB 102,15,56,220,217 227$L$enc_loop8_inner: 228DB 102,15,56,220,225 229DB 102,15,56,220,233 230DB 102,15,56,220,241 231DB 102,15,56,220,249 232DB 102,68,15,56,220,193 233DB 102,68,15,56,220,201 234$L$enc_loop8_enter: 235 movups xmm1,XMMWORD[rax*1+rcx] 236 add rax,32 237DB 102,15,56,220,208 238DB 102,15,56,220,216 239DB 102,15,56,220,224 240DB 102,15,56,220,232 241DB 102,15,56,220,240 242DB 102,15,56,220,248 243DB 102,68,15,56,220,192 244DB 102,68,15,56,220,200 245 movups xmm0,XMMWORD[((-16))+rax*1+rcx] 246 jnz NEAR $L$enc_loop8 247 248DB 102,15,56,220,209 249DB 102,15,56,220,217 250DB 102,15,56,220,225 251DB 102,15,56,220,233 252DB 102,15,56,220,241 253DB 102,15,56,220,249 254DB 102,68,15,56,220,193 255DB 102,68,15,56,220,201 256DB 102,15,56,221,208 257DB 102,15,56,221,216 258DB 102,15,56,221,224 259DB 102,15,56,221,232 260DB 102,15,56,221,240 261DB 102,15,56,221,248 262DB 102,68,15,56,221,192 263DB 102,68,15,56,221,200 264 DB 0F3h,0C3h ;repret 265 266 267global GFp_aes_hw_ctr32_encrypt_blocks 268 269ALIGN 16 270GFp_aes_hw_ctr32_encrypt_blocks: 271 mov QWORD[8+rsp],rdi ;WIN64 prologue 272 mov QWORD[16+rsp],rsi 273 mov rax,rsp 274$L$SEH_begin_GFp_aes_hw_ctr32_encrypt_blocks: 275 mov rdi,rcx 276 mov rsi,rdx 277 mov rdx,r8 278 mov rcx,r9 279 mov r8,QWORD[40+rsp] 280 281 282 283 cmp rdx,1 284 jne NEAR $L$ctr32_bulk 285 286 287 288 movups xmm2,XMMWORD[r8] 289 movups xmm3,XMMWORD[rdi] 290 mov edx,DWORD[240+rcx] 291 movups xmm0,XMMWORD[rcx] 292 movups xmm1,XMMWORD[16+rcx] 293 lea rcx,[32+rcx] 294 xorps xmm2,xmm0 295$L$oop_enc1_2: 296DB 102,15,56,220,209 297 dec edx 298 movups xmm1,XMMWORD[rcx] 299 lea rcx,[16+rcx] 300 jnz NEAR $L$oop_enc1_2 301DB 102,15,56,221,209 302 pxor xmm0,xmm0 303 pxor xmm1,xmm1 304 xorps xmm2,xmm3 305 pxor xmm3,xmm3 306 movups XMMWORD[rsi],xmm2 307 xorps xmm2,xmm2 308 jmp NEAR $L$ctr32_epilogue 309 310ALIGN 16 311$L$ctr32_bulk: 312 lea r11,[rsp] 313 314 push rbp 315 316 sub rsp,288 317 and rsp,-16 318 movaps XMMWORD[(-168)+r11],xmm6 319 movaps XMMWORD[(-152)+r11],xmm7 320 movaps XMMWORD[(-136)+r11],xmm8 321 movaps XMMWORD[(-120)+r11],xmm9 322 movaps XMMWORD[(-104)+r11],xmm10 323 movaps XMMWORD[(-88)+r11],xmm11 324 movaps XMMWORD[(-72)+r11],xmm12 325 movaps XMMWORD[(-56)+r11],xmm13 326 movaps XMMWORD[(-40)+r11],xmm14 327 movaps XMMWORD[(-24)+r11],xmm15 328$L$ctr32_body: 329 330 331 332 333 movdqu xmm2,XMMWORD[r8] 334 movdqu xmm0,XMMWORD[rcx] 335 mov r8d,DWORD[12+r8] 336 pxor xmm2,xmm0 337 mov ebp,DWORD[12+rcx] 338 movdqa XMMWORD[rsp],xmm2 339 bswap r8d 340 movdqa xmm3,xmm2 341 movdqa xmm4,xmm2 342 movdqa xmm5,xmm2 343 movdqa XMMWORD[64+rsp],xmm2 344 movdqa XMMWORD[80+rsp],xmm2 345 movdqa XMMWORD[96+rsp],xmm2 346 mov r10,rdx 347 movdqa XMMWORD[112+rsp],xmm2 348 349 lea rax,[1+r8] 350 lea rdx,[2+r8] 351 bswap eax 352 bswap edx 353 xor eax,ebp 354 xor edx,ebp 355DB 102,15,58,34,216,3 356 lea rax,[3+r8] 357 movdqa XMMWORD[16+rsp],xmm3 358DB 102,15,58,34,226,3 359 bswap eax 360 mov rdx,r10 361 lea r10,[4+r8] 362 movdqa XMMWORD[32+rsp],xmm4 363 xor eax,ebp 364 bswap r10d 365DB 102,15,58,34,232,3 366 xor r10d,ebp 367 movdqa XMMWORD[48+rsp],xmm5 368 lea r9,[5+r8] 369 mov DWORD[((64+12))+rsp],r10d 370 bswap r9d 371 lea r10,[6+r8] 372 mov eax,DWORD[240+rcx] 373 xor r9d,ebp 374 bswap r10d 375 mov DWORD[((80+12))+rsp],r9d 376 xor r10d,ebp 377 lea r9,[7+r8] 378 mov DWORD[((96+12))+rsp],r10d 379 bswap r9d 380 lea r10,[GFp_ia32cap_P] 381 mov r10d,DWORD[4+r10] 382 xor r9d,ebp 383 and r10d,71303168 384 mov DWORD[((112+12))+rsp],r9d 385 386 movups xmm1,XMMWORD[16+rcx] 387 388 movdqa xmm6,XMMWORD[64+rsp] 389 movdqa xmm7,XMMWORD[80+rsp] 390 391 cmp rdx,8 392 jb NEAR $L$ctr32_tail 393 394 sub rdx,6 395 cmp r10d,4194304 396 je NEAR $L$ctr32_6x 397 398 lea rcx,[128+rcx] 399 sub rdx,2 400 jmp NEAR $L$ctr32_loop8 401 402ALIGN 16 403$L$ctr32_6x: 404 shl eax,4 405 mov r10d,48 406 bswap ebp 407 lea rcx,[32+rax*1+rcx] 408 sub r10,rax 409 jmp NEAR $L$ctr32_loop6 410 411ALIGN 16 412$L$ctr32_loop6: 413 add r8d,6 414 movups xmm0,XMMWORD[((-48))+r10*1+rcx] 415DB 102,15,56,220,209 416 mov eax,r8d 417 xor eax,ebp 418DB 102,15,56,220,217 419DB 0x0f,0x38,0xf1,0x44,0x24,12 420 lea eax,[1+r8] 421DB 102,15,56,220,225 422 xor eax,ebp 423DB 0x0f,0x38,0xf1,0x44,0x24,28 424DB 102,15,56,220,233 425 lea eax,[2+r8] 426 xor eax,ebp 427DB 102,15,56,220,241 428DB 0x0f,0x38,0xf1,0x44,0x24,44 429 lea eax,[3+r8] 430DB 102,15,56,220,249 431 movups xmm1,XMMWORD[((-32))+r10*1+rcx] 432 xor eax,ebp 433 434DB 102,15,56,220,208 435DB 0x0f,0x38,0xf1,0x44,0x24,60 436 lea eax,[4+r8] 437DB 102,15,56,220,216 438 xor eax,ebp 439DB 0x0f,0x38,0xf1,0x44,0x24,76 440DB 102,15,56,220,224 441 lea eax,[5+r8] 442 xor eax,ebp 443DB 102,15,56,220,232 444DB 0x0f,0x38,0xf1,0x44,0x24,92 445 mov rax,r10 446DB 102,15,56,220,240 447DB 102,15,56,220,248 448 movups xmm0,XMMWORD[((-16))+r10*1+rcx] 449 450 call $L$enc_loop6 451 452 movdqu xmm8,XMMWORD[rdi] 453 movdqu xmm9,XMMWORD[16+rdi] 454 movdqu xmm10,XMMWORD[32+rdi] 455 movdqu xmm11,XMMWORD[48+rdi] 456 movdqu xmm12,XMMWORD[64+rdi] 457 movdqu xmm13,XMMWORD[80+rdi] 458 lea rdi,[96+rdi] 459 movups xmm1,XMMWORD[((-64))+r10*1+rcx] 460 pxor xmm8,xmm2 461 movaps xmm2,XMMWORD[rsp] 462 pxor xmm9,xmm3 463 movaps xmm3,XMMWORD[16+rsp] 464 pxor xmm10,xmm4 465 movaps xmm4,XMMWORD[32+rsp] 466 pxor xmm11,xmm5 467 movaps xmm5,XMMWORD[48+rsp] 468 pxor xmm12,xmm6 469 movaps xmm6,XMMWORD[64+rsp] 470 pxor xmm13,xmm7 471 movaps xmm7,XMMWORD[80+rsp] 472 movdqu XMMWORD[rsi],xmm8 473 movdqu XMMWORD[16+rsi],xmm9 474 movdqu XMMWORD[32+rsi],xmm10 475 movdqu XMMWORD[48+rsi],xmm11 476 movdqu XMMWORD[64+rsi],xmm12 477 movdqu XMMWORD[80+rsi],xmm13 478 lea rsi,[96+rsi] 479 480 sub rdx,6 481 jnc NEAR $L$ctr32_loop6 482 483 add rdx,6 484 jz NEAR $L$ctr32_done 485 486 lea eax,[((-48))+r10] 487 lea rcx,[((-80))+r10*1+rcx] 488 neg eax 489 shr eax,4 490 jmp NEAR $L$ctr32_tail 491 492ALIGN 32 493$L$ctr32_loop8: 494 add r8d,8 495 movdqa xmm8,XMMWORD[96+rsp] 496DB 102,15,56,220,209 497 mov r9d,r8d 498 movdqa xmm9,XMMWORD[112+rsp] 499DB 102,15,56,220,217 500 bswap r9d 501 movups xmm0,XMMWORD[((32-128))+rcx] 502DB 102,15,56,220,225 503 xor r9d,ebp 504 nop 505DB 102,15,56,220,233 506 mov DWORD[((0+12))+rsp],r9d 507 lea r9,[1+r8] 508DB 102,15,56,220,241 509DB 102,15,56,220,249 510DB 102,68,15,56,220,193 511DB 102,68,15,56,220,201 512 movups xmm1,XMMWORD[((48-128))+rcx] 513 bswap r9d 514DB 102,15,56,220,208 515DB 102,15,56,220,216 516 xor r9d,ebp 517DB 0x66,0x90 518DB 102,15,56,220,224 519DB 102,15,56,220,232 520 mov DWORD[((16+12))+rsp],r9d 521 lea r9,[2+r8] 522DB 102,15,56,220,240 523DB 102,15,56,220,248 524DB 102,68,15,56,220,192 525DB 102,68,15,56,220,200 526 movups xmm0,XMMWORD[((64-128))+rcx] 527 bswap r9d 528DB 102,15,56,220,209 529DB 102,15,56,220,217 530 xor r9d,ebp 531DB 0x66,0x90 532DB 102,15,56,220,225 533DB 102,15,56,220,233 534 mov DWORD[((32+12))+rsp],r9d 535 lea r9,[3+r8] 536DB 102,15,56,220,241 537DB 102,15,56,220,249 538DB 102,68,15,56,220,193 539DB 102,68,15,56,220,201 540 movups xmm1,XMMWORD[((80-128))+rcx] 541 bswap r9d 542DB 102,15,56,220,208 543DB 102,15,56,220,216 544 xor r9d,ebp 545DB 0x66,0x90 546DB 102,15,56,220,224 547DB 102,15,56,220,232 548 mov DWORD[((48+12))+rsp],r9d 549 lea r9,[4+r8] 550DB 102,15,56,220,240 551DB 102,15,56,220,248 552DB 102,68,15,56,220,192 553DB 102,68,15,56,220,200 554 movups xmm0,XMMWORD[((96-128))+rcx] 555 bswap r9d 556DB 102,15,56,220,209 557DB 102,15,56,220,217 558 xor r9d,ebp 559DB 0x66,0x90 560DB 102,15,56,220,225 561DB 102,15,56,220,233 562 mov DWORD[((64+12))+rsp],r9d 563 lea r9,[5+r8] 564DB 102,15,56,220,241 565DB 102,15,56,220,249 566DB 102,68,15,56,220,193 567DB 102,68,15,56,220,201 568 movups xmm1,XMMWORD[((112-128))+rcx] 569 bswap r9d 570DB 102,15,56,220,208 571DB 102,15,56,220,216 572 xor r9d,ebp 573DB 0x66,0x90 574DB 102,15,56,220,224 575DB 102,15,56,220,232 576 mov DWORD[((80+12))+rsp],r9d 577 lea r9,[6+r8] 578DB 102,15,56,220,240 579DB 102,15,56,220,248 580DB 102,68,15,56,220,192 581DB 102,68,15,56,220,200 582 movups xmm0,XMMWORD[((128-128))+rcx] 583 bswap r9d 584DB 102,15,56,220,209 585DB 102,15,56,220,217 586 xor r9d,ebp 587DB 0x66,0x90 588DB 102,15,56,220,225 589DB 102,15,56,220,233 590 mov DWORD[((96+12))+rsp],r9d 591 lea r9,[7+r8] 592DB 102,15,56,220,241 593DB 102,15,56,220,249 594DB 102,68,15,56,220,193 595DB 102,68,15,56,220,201 596 movups xmm1,XMMWORD[((144-128))+rcx] 597 bswap r9d 598DB 102,15,56,220,208 599DB 102,15,56,220,216 600DB 102,15,56,220,224 601 xor r9d,ebp 602 movdqu xmm10,XMMWORD[rdi] 603DB 102,15,56,220,232 604 mov DWORD[((112+12))+rsp],r9d 605 cmp eax,11 606DB 102,15,56,220,240 607DB 102,15,56,220,248 608DB 102,68,15,56,220,192 609DB 102,68,15,56,220,200 610 movups xmm0,XMMWORD[((160-128))+rcx] 611 612 jb NEAR $L$ctr32_enc_done 613 614DB 102,15,56,220,209 615DB 102,15,56,220,217 616DB 102,15,56,220,225 617DB 102,15,56,220,233 618DB 102,15,56,220,241 619DB 102,15,56,220,249 620DB 102,68,15,56,220,193 621DB 102,68,15,56,220,201 622 movups xmm1,XMMWORD[((176-128))+rcx] 623 624DB 102,15,56,220,208 625DB 102,15,56,220,216 626DB 102,15,56,220,224 627DB 102,15,56,220,232 628DB 102,15,56,220,240 629DB 102,15,56,220,248 630DB 102,68,15,56,220,192 631DB 102,68,15,56,220,200 632 movups xmm0,XMMWORD[((192-128))+rcx] 633 634 635 636DB 102,15,56,220,209 637DB 102,15,56,220,217 638DB 102,15,56,220,225 639DB 102,15,56,220,233 640DB 102,15,56,220,241 641DB 102,15,56,220,249 642DB 102,68,15,56,220,193 643DB 102,68,15,56,220,201 644 movups xmm1,XMMWORD[((208-128))+rcx] 645 646DB 102,15,56,220,208 647DB 102,15,56,220,216 648DB 102,15,56,220,224 649DB 102,15,56,220,232 650DB 102,15,56,220,240 651DB 102,15,56,220,248 652DB 102,68,15,56,220,192 653DB 102,68,15,56,220,200 654 movups xmm0,XMMWORD[((224-128))+rcx] 655 jmp NEAR $L$ctr32_enc_done 656 657ALIGN 16 658$L$ctr32_enc_done: 659 movdqu xmm11,XMMWORD[16+rdi] 660 pxor xmm10,xmm0 661 movdqu xmm12,XMMWORD[32+rdi] 662 pxor xmm11,xmm0 663 movdqu xmm13,XMMWORD[48+rdi] 664 pxor xmm12,xmm0 665 movdqu xmm14,XMMWORD[64+rdi] 666 pxor xmm13,xmm0 667 movdqu xmm15,XMMWORD[80+rdi] 668 pxor xmm14,xmm0 669 pxor xmm15,xmm0 670DB 102,15,56,220,209 671DB 102,15,56,220,217 672DB 102,15,56,220,225 673DB 102,15,56,220,233 674DB 102,15,56,220,241 675DB 102,15,56,220,249 676DB 102,68,15,56,220,193 677DB 102,68,15,56,220,201 678 movdqu xmm1,XMMWORD[96+rdi] 679 lea rdi,[128+rdi] 680 681DB 102,65,15,56,221,210 682 pxor xmm1,xmm0 683 movdqu xmm10,XMMWORD[((112-128))+rdi] 684DB 102,65,15,56,221,219 685 pxor xmm10,xmm0 686 movdqa xmm11,XMMWORD[rsp] 687DB 102,65,15,56,221,228 688DB 102,65,15,56,221,237 689 movdqa xmm12,XMMWORD[16+rsp] 690 movdqa xmm13,XMMWORD[32+rsp] 691DB 102,65,15,56,221,246 692DB 102,65,15,56,221,255 693 movdqa xmm14,XMMWORD[48+rsp] 694 movdqa xmm15,XMMWORD[64+rsp] 695DB 102,68,15,56,221,193 696 movdqa xmm0,XMMWORD[80+rsp] 697 movups xmm1,XMMWORD[((16-128))+rcx] 698DB 102,69,15,56,221,202 699 700 movups XMMWORD[rsi],xmm2 701 movdqa xmm2,xmm11 702 movups XMMWORD[16+rsi],xmm3 703 movdqa xmm3,xmm12 704 movups XMMWORD[32+rsi],xmm4 705 movdqa xmm4,xmm13 706 movups XMMWORD[48+rsi],xmm5 707 movdqa xmm5,xmm14 708 movups XMMWORD[64+rsi],xmm6 709 movdqa xmm6,xmm15 710 movups XMMWORD[80+rsi],xmm7 711 movdqa xmm7,xmm0 712 movups XMMWORD[96+rsi],xmm8 713 movups XMMWORD[112+rsi],xmm9 714 lea rsi,[128+rsi] 715 716 sub rdx,8 717 jnc NEAR $L$ctr32_loop8 718 719 add rdx,8 720 jz NEAR $L$ctr32_done 721 lea rcx,[((-128))+rcx] 722 723$L$ctr32_tail: 724 725 726 lea rcx,[16+rcx] 727 cmp rdx,4 728 jb NEAR $L$ctr32_loop3 729 je NEAR $L$ctr32_loop4 730 731 732 shl eax,4 733 movdqa xmm8,XMMWORD[96+rsp] 734 pxor xmm9,xmm9 735 736 movups xmm0,XMMWORD[16+rcx] 737DB 102,15,56,220,209 738DB 102,15,56,220,217 739 lea rcx,[((32-16))+rax*1+rcx] 740 neg rax 741DB 102,15,56,220,225 742 add rax,16 743 movups xmm10,XMMWORD[rdi] 744DB 102,15,56,220,233 745DB 102,15,56,220,241 746 movups xmm11,XMMWORD[16+rdi] 747 movups xmm12,XMMWORD[32+rdi] 748DB 102,15,56,220,249 749DB 102,68,15,56,220,193 750 751 call $L$enc_loop8_enter 752 753 movdqu xmm13,XMMWORD[48+rdi] 754 pxor xmm2,xmm10 755 movdqu xmm10,XMMWORD[64+rdi] 756 pxor xmm3,xmm11 757 movdqu XMMWORD[rsi],xmm2 758 pxor xmm4,xmm12 759 movdqu XMMWORD[16+rsi],xmm3 760 pxor xmm5,xmm13 761 movdqu XMMWORD[32+rsi],xmm4 762 pxor xmm6,xmm10 763 movdqu XMMWORD[48+rsi],xmm5 764 movdqu XMMWORD[64+rsi],xmm6 765 cmp rdx,6 766 jb NEAR $L$ctr32_done 767 768 movups xmm11,XMMWORD[80+rdi] 769 xorps xmm7,xmm11 770 movups XMMWORD[80+rsi],xmm7 771 je NEAR $L$ctr32_done 772 773 movups xmm12,XMMWORD[96+rdi] 774 xorps xmm8,xmm12 775 movups XMMWORD[96+rsi],xmm8 776 jmp NEAR $L$ctr32_done 777 778ALIGN 32 779$L$ctr32_loop4: 780DB 102,15,56,220,209 781 lea rcx,[16+rcx] 782 dec eax 783DB 102,15,56,220,217 784DB 102,15,56,220,225 785DB 102,15,56,220,233 786 movups xmm1,XMMWORD[rcx] 787 jnz NEAR $L$ctr32_loop4 788DB 102,15,56,221,209 789DB 102,15,56,221,217 790 movups xmm10,XMMWORD[rdi] 791 movups xmm11,XMMWORD[16+rdi] 792DB 102,15,56,221,225 793DB 102,15,56,221,233 794 movups xmm12,XMMWORD[32+rdi] 795 movups xmm13,XMMWORD[48+rdi] 796 797 xorps xmm2,xmm10 798 movups XMMWORD[rsi],xmm2 799 xorps xmm3,xmm11 800 movups XMMWORD[16+rsi],xmm3 801 pxor xmm4,xmm12 802 movdqu XMMWORD[32+rsi],xmm4 803 pxor xmm5,xmm13 804 movdqu XMMWORD[48+rsi],xmm5 805 jmp NEAR $L$ctr32_done 806 807ALIGN 32 808$L$ctr32_loop3: 809DB 102,15,56,220,209 810 lea rcx,[16+rcx] 811 dec eax 812DB 102,15,56,220,217 813DB 102,15,56,220,225 814 movups xmm1,XMMWORD[rcx] 815 jnz NEAR $L$ctr32_loop3 816DB 102,15,56,221,209 817DB 102,15,56,221,217 818DB 102,15,56,221,225 819 820 movups xmm10,XMMWORD[rdi] 821 xorps xmm2,xmm10 822 movups XMMWORD[rsi],xmm2 823 cmp rdx,2 824 jb NEAR $L$ctr32_done 825 826 movups xmm11,XMMWORD[16+rdi] 827 xorps xmm3,xmm11 828 movups XMMWORD[16+rsi],xmm3 829 je NEAR $L$ctr32_done 830 831 movups xmm12,XMMWORD[32+rdi] 832 xorps xmm4,xmm12 833 movups XMMWORD[32+rsi],xmm4 834 835$L$ctr32_done: 836 xorps xmm0,xmm0 837 xor ebp,ebp 838 pxor xmm1,xmm1 839 pxor xmm2,xmm2 840 pxor xmm3,xmm3 841 pxor xmm4,xmm4 842 pxor xmm5,xmm5 843 movaps xmm6,XMMWORD[((-168))+r11] 844 movaps XMMWORD[(-168)+r11],xmm0 845 movaps xmm7,XMMWORD[((-152))+r11] 846 movaps XMMWORD[(-152)+r11],xmm0 847 movaps xmm8,XMMWORD[((-136))+r11] 848 movaps XMMWORD[(-136)+r11],xmm0 849 movaps xmm9,XMMWORD[((-120))+r11] 850 movaps XMMWORD[(-120)+r11],xmm0 851 movaps xmm10,XMMWORD[((-104))+r11] 852 movaps XMMWORD[(-104)+r11],xmm0 853 movaps xmm11,XMMWORD[((-88))+r11] 854 movaps XMMWORD[(-88)+r11],xmm0 855 movaps xmm12,XMMWORD[((-72))+r11] 856 movaps XMMWORD[(-72)+r11],xmm0 857 movaps xmm13,XMMWORD[((-56))+r11] 858 movaps XMMWORD[(-56)+r11],xmm0 859 movaps xmm14,XMMWORD[((-40))+r11] 860 movaps XMMWORD[(-40)+r11],xmm0 861 movaps xmm15,XMMWORD[((-24))+r11] 862 movaps XMMWORD[(-24)+r11],xmm0 863 movaps XMMWORD[rsp],xmm0 864 movaps XMMWORD[16+rsp],xmm0 865 movaps XMMWORD[32+rsp],xmm0 866 movaps XMMWORD[48+rsp],xmm0 867 movaps XMMWORD[64+rsp],xmm0 868 movaps XMMWORD[80+rsp],xmm0 869 movaps XMMWORD[96+rsp],xmm0 870 movaps XMMWORD[112+rsp],xmm0 871 mov rbp,QWORD[((-8))+r11] 872 873 lea rsp,[r11] 874 875$L$ctr32_epilogue: 876 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 877 mov rsi,QWORD[16+rsp] 878 DB 0F3h,0C3h ;repret 879 880$L$SEH_end_GFp_aes_hw_ctr32_encrypt_blocks: 881global GFp_aes_hw_set_encrypt_key 882 883ALIGN 16 884GFp_aes_hw_set_encrypt_key: 885__aesni_set_encrypt_key: 886 887DB 0x48,0x83,0xEC,0x08 888 889 mov rax,-1 890 test rcx,rcx 891 jz NEAR $L$enc_key_ret 892 test r8,r8 893 jz NEAR $L$enc_key_ret 894 895 movups xmm0,XMMWORD[rcx] 896 xorps xmm4,xmm4 897 lea r10,[GFp_ia32cap_P] 898 mov r10d,DWORD[4+r10] 899 and r10d,268437504 900 lea rax,[16+r8] 901 cmp edx,256 902 je NEAR $L$14rounds 903 904 cmp edx,128 905 jne NEAR $L$bad_keybits 906 907$L$10rounds: 908 mov edx,9 909 cmp r10d,268435456 910 je NEAR $L$10rounds_alt 911 912 movups XMMWORD[r8],xmm0 913DB 102,15,58,223,200,1 914 call $L$key_expansion_128_cold 915DB 102,15,58,223,200,2 916 call $L$key_expansion_128 917DB 102,15,58,223,200,4 918 call $L$key_expansion_128 919DB 102,15,58,223,200,8 920 call $L$key_expansion_128 921DB 102,15,58,223,200,16 922 call $L$key_expansion_128 923DB 102,15,58,223,200,32 924 call $L$key_expansion_128 925DB 102,15,58,223,200,64 926 call $L$key_expansion_128 927DB 102,15,58,223,200,128 928 call $L$key_expansion_128 929DB 102,15,58,223,200,27 930 call $L$key_expansion_128 931DB 102,15,58,223,200,54 932 call $L$key_expansion_128 933 movups XMMWORD[rax],xmm0 934 mov DWORD[80+rax],edx 935 xor eax,eax 936 jmp NEAR $L$enc_key_ret 937 938ALIGN 16 939$L$10rounds_alt: 940 movdqa xmm5,XMMWORD[$L$key_rotate] 941 mov r10d,8 942 movdqa xmm4,XMMWORD[$L$key_rcon1] 943 movdqa xmm2,xmm0 944 movdqu XMMWORD[r8],xmm0 945 jmp NEAR $L$oop_key128 946 947ALIGN 16 948$L$oop_key128: 949DB 102,15,56,0,197 950DB 102,15,56,221,196 951 pslld xmm4,1 952 lea rax,[16+rax] 953 954 movdqa xmm3,xmm2 955 pslldq xmm2,4 956 pxor xmm3,xmm2 957 pslldq xmm2,4 958 pxor xmm3,xmm2 959 pslldq xmm2,4 960 pxor xmm2,xmm3 961 962 pxor xmm0,xmm2 963 movdqu XMMWORD[(-16)+rax],xmm0 964 movdqa xmm2,xmm0 965 966 dec r10d 967 jnz NEAR $L$oop_key128 968 969 movdqa xmm4,XMMWORD[$L$key_rcon1b] 970 971DB 102,15,56,0,197 972DB 102,15,56,221,196 973 pslld xmm4,1 974 975 movdqa xmm3,xmm2 976 pslldq xmm2,4 977 pxor xmm3,xmm2 978 pslldq xmm2,4 979 pxor xmm3,xmm2 980 pslldq xmm2,4 981 pxor xmm2,xmm3 982 983 pxor xmm0,xmm2 984 movdqu XMMWORD[rax],xmm0 985 986 movdqa xmm2,xmm0 987DB 102,15,56,0,197 988DB 102,15,56,221,196 989 990 movdqa xmm3,xmm2 991 pslldq xmm2,4 992 pxor xmm3,xmm2 993 pslldq xmm2,4 994 pxor xmm3,xmm2 995 pslldq xmm2,4 996 pxor xmm2,xmm3 997 998 pxor xmm0,xmm2 999 movdqu XMMWORD[16+rax],xmm0 1000 1001 mov DWORD[96+rax],edx 1002 xor eax,eax 1003 jmp NEAR $L$enc_key_ret 1004 1005 1006 1007ALIGN 16 1008$L$14rounds: 1009 movups xmm2,XMMWORD[16+rcx] 1010 mov edx,13 1011 lea rax,[16+rax] 1012 cmp r10d,268435456 1013 je NEAR $L$14rounds_alt 1014 1015 movups XMMWORD[r8],xmm0 1016 movups XMMWORD[16+r8],xmm2 1017DB 102,15,58,223,202,1 1018 call $L$key_expansion_256a_cold 1019DB 102,15,58,223,200,1 1020 call $L$key_expansion_256b 1021DB 102,15,58,223,202,2 1022 call $L$key_expansion_256a 1023DB 102,15,58,223,200,2 1024 call $L$key_expansion_256b 1025DB 102,15,58,223,202,4 1026 call $L$key_expansion_256a 1027DB 102,15,58,223,200,4 1028 call $L$key_expansion_256b 1029DB 102,15,58,223,202,8 1030 call $L$key_expansion_256a 1031DB 102,15,58,223,200,8 1032 call $L$key_expansion_256b 1033DB 102,15,58,223,202,16 1034 call $L$key_expansion_256a 1035DB 102,15,58,223,200,16 1036 call $L$key_expansion_256b 1037DB 102,15,58,223,202,32 1038 call $L$key_expansion_256a 1039DB 102,15,58,223,200,32 1040 call $L$key_expansion_256b 1041DB 102,15,58,223,202,64 1042 call $L$key_expansion_256a 1043 movups XMMWORD[rax],xmm0 1044 mov DWORD[16+rax],edx 1045 xor rax,rax 1046 jmp NEAR $L$enc_key_ret 1047 1048ALIGN 16 1049$L$14rounds_alt: 1050 movdqa xmm5,XMMWORD[$L$key_rotate] 1051 movdqa xmm4,XMMWORD[$L$key_rcon1] 1052 mov r10d,7 1053 movdqu XMMWORD[r8],xmm0 1054 movdqa xmm1,xmm2 1055 movdqu XMMWORD[16+r8],xmm2 1056 jmp NEAR $L$oop_key256 1057 1058ALIGN 16 1059$L$oop_key256: 1060DB 102,15,56,0,213 1061DB 102,15,56,221,212 1062 1063 movdqa xmm3,xmm0 1064 pslldq xmm0,4 1065 pxor xmm3,xmm0 1066 pslldq xmm0,4 1067 pxor xmm3,xmm0 1068 pslldq xmm0,4 1069 pxor xmm0,xmm3 1070 pslld xmm4,1 1071 1072 pxor xmm0,xmm2 1073 movdqu XMMWORD[rax],xmm0 1074 1075 dec r10d 1076 jz NEAR $L$done_key256 1077 1078 pshufd xmm2,xmm0,0xff 1079 pxor xmm3,xmm3 1080DB 102,15,56,221,211 1081 1082 movdqa xmm3,xmm1 1083 pslldq xmm1,4 1084 pxor xmm3,xmm1 1085 pslldq xmm1,4 1086 pxor xmm3,xmm1 1087 pslldq xmm1,4 1088 pxor xmm1,xmm3 1089 1090 pxor xmm2,xmm1 1091 movdqu XMMWORD[16+rax],xmm2 1092 lea rax,[32+rax] 1093 movdqa xmm1,xmm2 1094 1095 jmp NEAR $L$oop_key256 1096 1097$L$done_key256: 1098 mov DWORD[16+rax],edx 1099 xor eax,eax 1100 jmp NEAR $L$enc_key_ret 1101 1102ALIGN 16 1103$L$bad_keybits: 1104 mov rax,-2 1105$L$enc_key_ret: 1106 pxor xmm0,xmm0 1107 pxor xmm1,xmm1 1108 pxor xmm2,xmm2 1109 pxor xmm3,xmm3 1110 pxor xmm4,xmm4 1111 pxor xmm5,xmm5 1112 add rsp,8 1113 1114 DB 0F3h,0C3h ;repret 1115 1116$L$SEH_end_GFp_set_encrypt_key: 1117 1118ALIGN 16 1119$L$key_expansion_128: 1120 movups XMMWORD[rax],xmm0 1121 lea rax,[16+rax] 1122$L$key_expansion_128_cold: 1123 shufps xmm4,xmm0,16 1124 xorps xmm0,xmm4 1125 shufps xmm4,xmm0,140 1126 xorps xmm0,xmm4 1127 shufps xmm1,xmm1,255 1128 xorps xmm0,xmm1 1129 DB 0F3h,0C3h ;repret 1130 1131ALIGN 16 1132$L$key_expansion_192a: 1133 movups XMMWORD[rax],xmm0 1134 lea rax,[16+rax] 1135$L$key_expansion_192a_cold: 1136 movaps xmm5,xmm2 1137$L$key_expansion_192b_warm: 1138 shufps xmm4,xmm0,16 1139 movdqa xmm3,xmm2 1140 xorps xmm0,xmm4 1141 shufps xmm4,xmm0,140 1142 pslldq xmm3,4 1143 xorps xmm0,xmm4 1144 pshufd xmm1,xmm1,85 1145 pxor xmm2,xmm3 1146 pxor xmm0,xmm1 1147 pshufd xmm3,xmm0,255 1148 pxor xmm2,xmm3 1149 DB 0F3h,0C3h ;repret 1150 1151ALIGN 16 1152$L$key_expansion_192b: 1153 movaps xmm3,xmm0 1154 shufps xmm5,xmm0,68 1155 movups XMMWORD[rax],xmm5 1156 shufps xmm3,xmm2,78 1157 movups XMMWORD[16+rax],xmm3 1158 lea rax,[32+rax] 1159 jmp NEAR $L$key_expansion_192b_warm 1160 1161ALIGN 16 1162$L$key_expansion_256a: 1163 movups XMMWORD[rax],xmm2 1164 lea rax,[16+rax] 1165$L$key_expansion_256a_cold: 1166 shufps xmm4,xmm0,16 1167 xorps xmm0,xmm4 1168 shufps xmm4,xmm0,140 1169 xorps xmm0,xmm4 1170 shufps xmm1,xmm1,255 1171 xorps xmm0,xmm1 1172 DB 0F3h,0C3h ;repret 1173 1174ALIGN 16 1175$L$key_expansion_256b: 1176 movups XMMWORD[rax],xmm0 1177 lea rax,[16+rax] 1178 1179 shufps xmm4,xmm2,16 1180 xorps xmm2,xmm4 1181 shufps xmm4,xmm2,140 1182 xorps xmm2,xmm4 1183 shufps xmm1,xmm1,170 1184 xorps xmm2,xmm1 1185 DB 0F3h,0C3h ;repret 1186 1187 1188ALIGN 64 1189$L$bswap_mask: 1190DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1191$L$increment32: 1192 DD 6,6,6,0 1193$L$increment64: 1194 DD 1,0,0,0 1195$L$increment1: 1196DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 1197$L$key_rotate: 1198 DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 1199$L$key_rotate192: 1200 DD 0x04070605,0x04070605,0x04070605,0x04070605 1201$L$key_rcon1: 1202 DD 1,1,1,1 1203$L$key_rcon1b: 1204 DD 0x1b,0x1b,0x1b,0x1b 1205 1206DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 1207DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 1208DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 1209DB 115,108,46,111,114,103,62,0 1210ALIGN 64 1211EXTERN __imp_RtlVirtualUnwind 1212 1213ALIGN 16 1214ctr_xts_se_handler: 1215 push rsi 1216 push rdi 1217 push rbx 1218 push rbp 1219 push r12 1220 push r13 1221 push r14 1222 push r15 1223 pushfq 1224 sub rsp,64 1225 1226 mov rax,QWORD[120+r8] 1227 mov rbx,QWORD[248+r8] 1228 1229 mov rsi,QWORD[8+r9] 1230 mov r11,QWORD[56+r9] 1231 1232 mov r10d,DWORD[r11] 1233 lea r10,[r10*1+rsi] 1234 cmp rbx,r10 1235 jb NEAR $L$common_seh_tail 1236 1237 mov rax,QWORD[152+r8] 1238 1239 mov r10d,DWORD[4+r11] 1240 lea r10,[r10*1+rsi] 1241 cmp rbx,r10 1242 jae NEAR $L$common_seh_tail 1243 1244 mov rax,QWORD[208+r8] 1245 1246 lea rsi,[((-168))+rax] 1247 lea rdi,[512+r8] 1248 mov ecx,20 1249 DD 0xa548f3fc 1250 1251 mov rbp,QWORD[((-8))+rax] 1252 mov QWORD[160+r8],rbp 1253 1254 1255$L$common_seh_tail: 1256 mov rdi,QWORD[8+rax] 1257 mov rsi,QWORD[16+rax] 1258 mov QWORD[152+r8],rax 1259 mov QWORD[168+r8],rsi 1260 mov QWORD[176+r8],rdi 1261 1262 mov rdi,QWORD[40+r9] 1263 mov rsi,r8 1264 mov ecx,154 1265 DD 0xa548f3fc 1266 1267 mov rsi,r9 1268 xor rcx,rcx 1269 mov rdx,QWORD[8+rsi] 1270 mov r8,QWORD[rsi] 1271 mov r9,QWORD[16+rsi] 1272 mov r10,QWORD[40+rsi] 1273 lea r11,[56+rsi] 1274 lea r12,[24+rsi] 1275 mov QWORD[32+rsp],r10 1276 mov QWORD[40+rsp],r11 1277 mov QWORD[48+rsp],r12 1278 mov QWORD[56+rsp],rcx 1279 call QWORD[__imp_RtlVirtualUnwind] 1280 1281 mov eax,1 1282 add rsp,64 1283 popfq 1284 pop r15 1285 pop r14 1286 pop r13 1287 pop r12 1288 pop rbp 1289 pop rbx 1290 pop rdi 1291 pop rsi 1292 DB 0F3h,0C3h ;repret 1293 1294 1295section .pdata rdata align=4 1296ALIGN 4 1297 DD $L$SEH_begin_GFp_aes_hw_ctr32_encrypt_blocks wrt ..imagebase 1298 DD $L$SEH_end_GFp_aes_hw_ctr32_encrypt_blocks wrt ..imagebase 1299 DD $L$SEH_info_GFp_ctr32 wrt ..imagebase 1300 DD GFp_aes_hw_set_encrypt_key wrt ..imagebase 1301 DD $L$SEH_end_GFp_set_encrypt_key wrt ..imagebase 1302 DD $L$SEH_info_GFp_key wrt ..imagebase 1303section .xdata rdata align=8 1304ALIGN 8 1305$L$SEH_info_GFp_ctr32: 1306DB 9,0,0,0 1307 DD ctr_xts_se_handler wrt ..imagebase 1308 DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase 1309$L$SEH_info_GFp_key: 1310DB 0x01,0x04,0x01,0x00 1311DB 0x04,0x02,0x00,0x00 1312