1;; https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention 2;; The first four integer arguments are passed in registers. 3;; Integer values are passed in left-to-right order in RCX, 4;; RDX, R8, and R9, respectively. Arguments five and higher 5;; are passed on the stack. 6 7;; The registers RAX, RCX, RDX, R8, R9, R10, R11, XMM0-5, 8;; and the upper portions of YMM0-15 and ZMM0-15 are 9;; considered volatile and must be considered destroyed on 10;; function calls. 11 12include ksamd64.inc 13EXTERNDEF ?Te@rdtable@CryptoPP@@3PA_KA:FAR 14EXTERNDEF ?g_cacheLineSize@CryptoPP@@3IA:FAR 15EXTERNDEF ?SHA256_K@CryptoPP@@3QBIB:FAR 16.CODE 17 18 ALIGN 8 19Baseline_Add PROC 20 lea rdx, [rdx+8*rcx] 21 lea r8, [r8+8*rcx] 22 lea r9, [r9+8*rcx] 23 neg rcx ; rcx is negative index 24 jz $1@Baseline_Add 25 mov rax,[r8+8*rcx] 26 add rax,[r9+8*rcx] 27 mov [rdx+8*rcx],rax 28$0@Baseline_Add: 29 mov rax,[r8+8*rcx+8] 30 adc rax,[r9+8*rcx+8] 31 mov [rdx+8*rcx+8],rax 32 lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 33 jrcxz $1@Baseline_Add ; loop until rcx overflows and becomes zero 34 mov rax,[r8+8*rcx] 35 adc rax,[r9+8*rcx] 36 mov [rdx+8*rcx],rax 37 jmp $0@Baseline_Add 38$1@Baseline_Add: 39 mov rax, 0 40 adc rax, rax ; store carry into rax (return result register) 41 ret 42Baseline_Add ENDP 43 44 ALIGN 8 45Baseline_Sub PROC 46 lea rdx, [rdx+8*rcx] 47 lea r8, [r8+8*rcx] 48 lea r9, [r9+8*rcx] 49 neg rcx ; rcx is negative index 50 jz $1@Baseline_Sub 51 mov rax,[r8+8*rcx] 52 sub rax,[r9+8*rcx] 53 mov [rdx+8*rcx],rax 54$0@Baseline_Sub: 55 mov rax,[r8+8*rcx+8] 56 sbb rax,[r9+8*rcx+8] 57 mov [rdx+8*rcx+8],rax 58 lea rcx,[rcx+2] ; advance index, avoid inc which causes slowdown on Intel Core 2 59 jrcxz $1@Baseline_Sub ; loop until rcx overflows and becomes zero 60 mov rax,[r8+8*rcx] 61 sbb rax,[r9+8*rcx] 62 mov [rdx+8*rcx],rax 63 jmp $0@Baseline_Sub 64$1@Baseline_Sub: 65 mov rax, 0 66 adc rax, rax ; store carry into rax (return result register) 67 68 ret 69Baseline_Sub ENDP 70 71ALIGN 8 72Rijndael_Enc_AdvancedProcessBlocks_SSE2 PROC FRAME 73rex_push_reg rsi 74push_reg rdi 75push_reg rbx 76push_reg r12 77.endprolog 78mov r8, rcx 79mov r11, ?Te@rdtable@CryptoPP@@3PA_KA 80mov edi, DWORD PTR [?g_cacheLineSize@CryptoPP@@3IA] 81mov rsi, [(r8+16*19)] 82mov rax, 16 83and rax, rsi 84movdqa xmm3, XMMWORD PTR [rdx+16+rax] 85movdqa [(r8+16*12)], xmm3 86lea rax, [rdx+rax+2*16] 87sub rax, rsi 88label0: 89movdqa xmm0, [rax+rsi] 90movdqa XMMWORD PTR [(r8+0)+rsi], xmm0 91add rsi, 16 92cmp rsi, 16*12 93jl label0 94movdqa xmm4, [rax+rsi] 95movdqa xmm1, [rdx] 96mov r12d, [rdx+4*4] 97mov ebx, [rdx+5*4] 98mov ecx, [rdx+6*4] 99mov edx, [rdx+7*4] 100xor rax, rax 101label9: 102mov esi, [r11+rax] 103add rax, rdi 104mov esi, [r11+rax] 105add rax, rdi 106mov esi, [r11+rax] 107add rax, rdi 108mov esi, [r11+rax] 109add rax, rdi 110cmp rax, 2048 111jl label9 112lfence 113test DWORD PTR [(r8+16*18+8)], 1 114jz label8 115mov rsi, [(r8+16*14)] 116movdqu xmm2, [rsi] 117pxor xmm2, xmm1 118psrldq xmm1, 14 119movd eax, xmm1 120mov al, BYTE PTR [rsi+15] 121mov r10d, eax 122movd eax, xmm2 123psrldq xmm2, 4 124movd edi, xmm2 125psrldq xmm2, 4 126movzx esi, al 127xor r12d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 128movzx esi, ah 129xor edx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 130shr eax, 16 131movzx esi, al 132xor ecx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 133movzx esi, ah 134xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 135mov eax, edi 136movd edi, xmm2 137psrldq xmm2, 4 138movzx esi, al 139xor ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 140movzx esi, ah 141xor r12d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 142shr eax, 16 143movzx esi, al 144xor edx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 145movzx esi, ah 146xor ecx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 147mov eax, edi 148movd edi, xmm2 149movzx esi, al 150xor ecx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 151movzx esi, ah 152xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 153shr eax, 16 154movzx esi, al 155xor r12d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 156movzx esi, ah 157xor edx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 158mov eax, edi 159movzx esi, al 160xor edx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 161movzx esi, ah 162xor ecx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 163shr eax, 16 164movzx esi, al 165xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 166psrldq xmm2, 3 167mov eax, [(r8+16*12)+0*4] 168mov edi, [(r8+16*12)+2*4] 169mov r9d, [(r8+16*12)+3*4] 170movzx esi, cl 171xor r9d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 172movzx esi, bl 173xor edi, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 174movzx esi, bh 175xor r9d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 176shr ebx, 16 177movzx esi, bl 178xor eax, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 179movzx esi, bh 180mov ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 181xor ebx, [(r8+16*12)+1*4] 182movzx esi, ch 183xor eax, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 184shr ecx, 16 185movzx esi, dl 186xor eax, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 187movzx esi, dh 188xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 189shr edx, 16 190movzx esi, ch 191xor edi, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 192movzx esi, cl 193xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 194movzx esi, dl 195xor edi, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 196movzx esi, dh 197xor r9d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 198movd ecx, xmm2 199mov edx, r12d 200mov [(r8+0)+3*4], r9d 201mov [(r8+0)+0*4], eax 202mov [(r8+0)+1*4], ebx 203mov [(r8+0)+2*4], edi 204jmp label5 205label3: 206mov r12d, [(r8+16*12)+0*4] 207mov ebx, [(r8+16*12)+1*4] 208mov ecx, [(r8+16*12)+2*4] 209mov edx, [(r8+16*12)+3*4] 210label8: 211mov rax, [(r8+16*14)] 212movdqu xmm2, [rax] 213mov rsi, [(r8+16*14)+8] 214movdqu xmm5, [rsi] 215pxor xmm2, xmm1 216pxor xmm2, xmm5 217movd eax, xmm2 218psrldq xmm2, 4 219movd edi, xmm2 220psrldq xmm2, 4 221movzx esi, al 222xor r12d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 223movzx esi, ah 224xor edx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 225shr eax, 16 226movzx esi, al 227xor ecx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 228movzx esi, ah 229xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 230mov eax, edi 231movd edi, xmm2 232psrldq xmm2, 4 233movzx esi, al 234xor ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 235movzx esi, ah 236xor r12d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 237shr eax, 16 238movzx esi, al 239xor edx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 240movzx esi, ah 241xor ecx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 242mov eax, edi 243movd edi, xmm2 244movzx esi, al 245xor ecx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 246movzx esi, ah 247xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 248shr eax, 16 249movzx esi, al 250xor r12d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 251movzx esi, ah 252xor edx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 253mov eax, edi 254movzx esi, al 255xor edx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 256movzx esi, ah 257xor ecx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 258shr eax, 16 259movzx esi, al 260xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 261movzx esi, ah 262xor r12d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 263mov eax, r12d 264add r8, [(r8+16*19)] 265add r8, 4*16 266jmp label2 267label1: 268mov ecx, r10d 269mov edx, r12d 270mov eax, [(r8+0)+0*4] 271mov ebx, [(r8+0)+1*4] 272xor cl, ch 273and rcx, 255 274label5: 275add r10d, 1 276xor edx, DWORD PTR [r11+rcx*8+3] 277movzx esi, dl 278xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 279movzx esi, dh 280mov ecx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 281shr edx, 16 282xor ecx, [(r8+0)+2*4] 283movzx esi, dh 284xor eax, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 285movzx esi, dl 286mov edx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 287xor edx, [(r8+0)+3*4] 288add r8, [(r8+16*19)] 289add r8, 3*16 290jmp label4 291label2: 292mov r9d, [(r8+0)-4*16+3*4] 293mov edi, [(r8+0)-4*16+2*4] 294movzx esi, cl 295xor r9d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 296mov cl, al 297movzx esi, ah 298xor edi, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 299shr eax, 16 300movzx esi, bl 301xor edi, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 302movzx esi, bh 303xor r9d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 304shr ebx, 16 305movzx esi, al 306xor r9d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 307movzx esi, ah 308mov eax, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 309movzx esi, bl 310xor eax, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 311movzx esi, bh 312mov ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 313movzx esi, ch 314xor eax, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 315movzx esi, cl 316xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 317shr ecx, 16 318movzx esi, dl 319xor eax, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 320movzx esi, dh 321xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 322shr edx, 16 323movzx esi, ch 324xor edi, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 325movzx esi, cl 326xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 327movzx esi, dl 328xor edi, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 329movzx esi, dh 330xor r9d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 331mov ecx, edi 332xor eax, [(r8+0)-4*16+0*4] 333xor ebx, [(r8+0)-4*16+1*4] 334mov edx, r9d 335label4: 336mov r9d, [(r8+0)-4*16+7*4] 337mov edi, [(r8+0)-4*16+6*4] 338movzx esi, cl 339xor r9d, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 340mov cl, al 341movzx esi, ah 342xor edi, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 343shr eax, 16 344movzx esi, bl 345xor edi, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 346movzx esi, bh 347xor r9d, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 348shr ebx, 16 349movzx esi, al 350xor r9d, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 351movzx esi, ah 352mov eax, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 353movzx esi, bl 354xor eax, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 355movzx esi, bh 356mov ebx, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 357movzx esi, ch 358xor eax, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 359movzx esi, cl 360xor ebx, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 361shr ecx, 16 362movzx esi, dl 363xor eax, DWORD PTR [r11+8*rsi+(((3+3) MOD (4))+1)] 364movzx esi, dh 365xor ebx, DWORD PTR [r11+8*rsi+(((2+3) MOD (4))+1)] 366shr edx, 16 367movzx esi, ch 368xor edi, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 369movzx esi, cl 370xor ebx, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 371movzx esi, dl 372xor edi, DWORD PTR [r11+8*rsi+(((1+3) MOD (4))+1)] 373movzx esi, dh 374xor r9d, DWORD PTR [r11+8*rsi+(((0+3) MOD (4))+1)] 375mov ecx, edi 376xor eax, [(r8+0)-4*16+4*4] 377xor ebx, [(r8+0)-4*16+5*4] 378mov edx, r9d 379add r8, 32 380test r8, 255 381jnz label2 382sub r8, 16*16 383movzx esi, ch 384movzx edi, BYTE PTR [r11+rsi*8+1] 385movzx esi, dl 386xor edi, DWORD PTR [r11+rsi*8+0] 387mov WORD PTR [(r8+16*13)+2], di 388movzx esi, dh 389movzx edi, BYTE PTR [r11+rsi*8+1] 390movzx esi, al 391xor edi, DWORD PTR [r11+rsi*8+0] 392mov WORD PTR [(r8+16*13)+6], di 393shr edx, 16 394movzx esi, ah 395movzx edi, BYTE PTR [r11+rsi*8+1] 396movzx esi, bl 397xor edi, DWORD PTR [r11+rsi*8+0] 398mov WORD PTR [(r8+16*13)+10], di 399shr eax, 16 400movzx esi, bh 401movzx edi, BYTE PTR [r11+rsi*8+1] 402movzx esi, cl 403xor edi, DWORD PTR [r11+rsi*8+0] 404mov WORD PTR [(r8+16*13)+14], di 405shr ebx, 16 406movzx esi, dh 407movzx edi, BYTE PTR [r11+rsi*8+1] 408movzx esi, al 409xor edi, DWORD PTR [r11+rsi*8+0] 410mov WORD PTR [(r8+16*13)+12], di 411shr ecx, 16 412movzx esi, ah 413movzx edi, BYTE PTR [r11+rsi*8+1] 414movzx esi, bl 415xor edi, DWORD PTR [r11+rsi*8+0] 416mov WORD PTR [(r8+16*13)+0], di 417movzx esi, bh 418movzx edi, BYTE PTR [r11+rsi*8+1] 419movzx esi, cl 420xor edi, DWORD PTR [r11+rsi*8+0] 421mov WORD PTR [(r8+16*13)+4], di 422movzx esi, ch 423movzx edi, BYTE PTR [r11+rsi*8+1] 424movzx esi, dl 425xor edi, DWORD PTR [r11+rsi*8+0] 426mov WORD PTR [(r8+16*13)+8], di 427mov rax, [(r8+16*14)+16] 428mov rbx, [(r8+16*14)+24] 429mov rcx, [(r8+16*18+8)] 430sub rcx, 16 431movdqu xmm2, [rax] 432pxor xmm2, xmm4 433movdqa xmm0, [(r8+16*16)+16] 434paddq xmm0, [(r8+16*14)+16] 435movdqa [(r8+16*14)+16], xmm0 436pxor xmm2, [(r8+16*13)] 437movdqu [rbx], xmm2 438jle label7 439mov [(r8+16*18+8)], rcx 440test rcx, 1 441jnz label1 442movdqa xmm0, [(r8+16*16)] 443paddq xmm0, [(r8+16*14)] 444movdqa [(r8+16*14)], xmm0 445jmp label3 446label7: 447xorps xmm0, xmm0 448lea rax, [(r8+0)+7*16] 449movaps [rax-7*16], xmm0 450movaps [rax-6*16], xmm0 451movaps [rax-5*16], xmm0 452movaps [rax-4*16], xmm0 453movaps [rax-3*16], xmm0 454movaps [rax-2*16], xmm0 455movaps [rax-1*16], xmm0 456movaps [rax+0*16], xmm0 457movaps [rax+1*16], xmm0 458movaps [rax+2*16], xmm0 459movaps [rax+3*16], xmm0 460movaps [rax+4*16], xmm0 461movaps [rax+5*16], xmm0 462movaps [rax+6*16], xmm0 463pop r12 464pop rbx 465pop rdi 466pop rsi 467ret 468Rijndael_Enc_AdvancedProcessBlocks_SSE2 ENDP 469 470ALIGN 8 471GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME 472rex_push_reg rsi 473push_reg rdi 474push_reg rbx 475.endprolog 476mov rsi, r8 477mov r11, r9 478movdqa xmm0, [rsi] 479label0: 480movdqu xmm4, [rcx] 481pxor xmm0, xmm4 482movd ebx, xmm0 483mov eax, 0f0f0f0f0h 484and eax, ebx 485shl ebx, 4 486and ebx, 0f0f0f0f0h 487movzx edi, ah 488movdqa xmm5, XMMWORD PTR [rsi + 32 + 1024 + rdi] 489movzx edi, al 490movdqa xmm4, XMMWORD PTR [rsi + 32 + 1024 + rdi] 491shr eax, 16 492movzx edi, ah 493movdqa xmm3, XMMWORD PTR [rsi + 32 + 1024 + rdi] 494movzx edi, al 495movdqa xmm2, XMMWORD PTR [rsi + 32 + 1024 + rdi] 496psrldq xmm0, 4 497movd eax, xmm0 498and eax, 0f0f0f0f0h 499movzx edi, bh 500pxor xmm5, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] 501movzx edi, bl 502pxor xmm4, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] 503shr ebx, 16 504movzx edi, bh 505pxor xmm3, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] 506movzx edi, bl 507pxor xmm2, XMMWORD PTR [rsi + 32 + (1-1)*256 + rdi] 508movd ebx, xmm0 509shl ebx, 4 510and ebx, 0f0f0f0f0h 511movzx edi, ah 512pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] 513movzx edi, al 514pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] 515shr eax, 16 516movzx edi, ah 517pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] 518movzx edi, al 519pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 1*256 + rdi] 520psrldq xmm0, 4 521movd eax, xmm0 522and eax, 0f0f0f0f0h 523movzx edi, bh 524pxor xmm5, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] 525movzx edi, bl 526pxor xmm4, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] 527shr ebx, 16 528movzx edi, bh 529pxor xmm3, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] 530movzx edi, bl 531pxor xmm2, XMMWORD PTR [rsi + 32 + (2-1)*256 + rdi] 532movd ebx, xmm0 533shl ebx, 4 534and ebx, 0f0f0f0f0h 535movzx edi, ah 536pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] 537movzx edi, al 538pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] 539shr eax, 16 540movzx edi, ah 541pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] 542movzx edi, al 543pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 2*256 + rdi] 544psrldq xmm0, 4 545movd eax, xmm0 546and eax, 0f0f0f0f0h 547movzx edi, bh 548pxor xmm5, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] 549movzx edi, bl 550pxor xmm4, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] 551shr ebx, 16 552movzx edi, bh 553pxor xmm3, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] 554movzx edi, bl 555pxor xmm2, XMMWORD PTR [rsi + 32 + (3-1)*256 + rdi] 556movd ebx, xmm0 557shl ebx, 4 558and ebx, 0f0f0f0f0h 559movzx edi, ah 560pxor xmm5, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] 561movzx edi, al 562pxor xmm4, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] 563shr eax, 16 564movzx edi, ah 565pxor xmm3, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] 566movzx edi, al 567pxor xmm2, XMMWORD PTR [rsi + 32 + 1024 + 3*256 + rdi] 568movzx edi, bh 569pxor xmm5, XMMWORD PTR [rsi + 32 + 3*256 + rdi] 570movzx edi, bl 571pxor xmm4, XMMWORD PTR [rsi + 32 + 3*256 + rdi] 572shr ebx, 16 573movzx edi, bh 574pxor xmm3, XMMWORD PTR [rsi + 32 + 3*256 + rdi] 575movzx edi, bl 576pxor xmm2, XMMWORD PTR [rsi + 32 + 3*256 + rdi] 577movdqa xmm0, xmm3 578pslldq xmm3, 1 579pxor xmm2, xmm3 580movdqa xmm1, xmm2 581pslldq xmm2, 1 582pxor xmm5, xmm2 583psrldq xmm0, 15 584movd rdi, xmm0 585movzx eax, WORD PTR [r11 + rdi*2] 586shl eax, 8 587movdqa xmm0, xmm5 588pslldq xmm5, 1 589pxor xmm4, xmm5 590psrldq xmm1, 15 591movd rdi, xmm1 592xor ax, WORD PTR [r11 + rdi*2] 593shl eax, 8 594psrldq xmm0, 15 595movd rdi, xmm0 596xor ax, WORD PTR [r11 + rdi*2] 597movd xmm0, eax 598pxor xmm0, xmm4 599add rcx, 16 600sub rdx, 1 601jnz label0 602movdqa [rsi], xmm0 603pop rbx 604pop rdi 605pop rsi 606ret 607GCM_AuthenticateBlocks_2K_SSE2 ENDP 608 609ALIGN 8 610GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME 611rex_push_reg rsi 612push_reg rdi 613.endprolog 614mov rsi, r8 615movdqa xmm0, [rsi] 616label1: 617movdqu xmm1, [rcx] 618pxor xmm1, xmm0 619pxor xmm0, xmm0 620movd eax, xmm1 621psrldq xmm1, 4 622movzx edi, al 623add rdi, rdi 624pxor xmm0, [rsi + 32 + (0*4+0)*256*16 + rdi*8] 625movzx edi, ah 626add rdi, rdi 627pxor xmm0, [rsi + 32 + (0*4+1)*256*16 + rdi*8] 628shr eax, 16 629movzx edi, al 630add rdi, rdi 631pxor xmm0, [rsi + 32 + (0*4+2)*256*16 + rdi*8] 632movzx edi, ah 633add rdi, rdi 634pxor xmm0, [rsi + 32 + (0*4+3)*256*16 + rdi*8] 635movd eax, xmm1 636psrldq xmm1, 4 637movzx edi, al 638add rdi, rdi 639pxor xmm0, [rsi + 32 + (1*4+0)*256*16 + rdi*8] 640movzx edi, ah 641add rdi, rdi 642pxor xmm0, [rsi + 32 + (1*4+1)*256*16 + rdi*8] 643shr eax, 16 644movzx edi, al 645add rdi, rdi 646pxor xmm0, [rsi + 32 + (1*4+2)*256*16 + rdi*8] 647movzx edi, ah 648add rdi, rdi 649pxor xmm0, [rsi + 32 + (1*4+3)*256*16 + rdi*8] 650movd eax, xmm1 651psrldq xmm1, 4 652movzx edi, al 653add rdi, rdi 654pxor xmm0, [rsi + 32 + (2*4+0)*256*16 + rdi*8] 655movzx edi, ah 656add rdi, rdi 657pxor xmm0, [rsi + 32 + (2*4+1)*256*16 + rdi*8] 658shr eax, 16 659movzx edi, al 660add rdi, rdi 661pxor xmm0, [rsi + 32 + (2*4+2)*256*16 + rdi*8] 662movzx edi, ah 663add rdi, rdi 664pxor xmm0, [rsi + 32 + (2*4+3)*256*16 + rdi*8] 665movd eax, xmm1 666psrldq xmm1, 4 667movzx edi, al 668add rdi, rdi 669pxor xmm0, [rsi + 32 + (3*4+0)*256*16 + rdi*8] 670movzx edi, ah 671add rdi, rdi 672pxor xmm0, [rsi + 32 + (3*4+1)*256*16 + rdi*8] 673shr eax, 16 674movzx edi, al 675add rdi, rdi 676pxor xmm0, [rsi + 32 + (3*4+2)*256*16 + rdi*8] 677movzx edi, ah 678add rdi, rdi 679pxor xmm0, [rsi + 32 + (3*4+3)*256*16 + rdi*8] 680add rcx, 16 681sub rdx, 1 682jnz label1 683movdqa [rsi], xmm0 684pop rdi 685pop rsi 686ret 687GCM_AuthenticateBlocks_64K_SSE2 ENDP 688 689ALIGN 8 690SHA256_HashMultipleBlocks_SSE2 PROC FRAME 691rex_push_reg rsi 692push_reg rdi 693push_reg rbx 694push_reg rbp 695alloc_stack(8*4 + 16*4 + 4*8 + 8) 696.endprolog 697mov rdi, r8 698lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] 699mov [rsp+8*4+16*4+1*8], rcx 700mov [rsp+8*4+16*4+2*8], rdx 701add rdi, rdx 702mov [rsp+8*4+16*4+3*8], rdi 703movdqa xmm0, XMMWORD PTR [rcx+0*16] 704movdqa xmm1, XMMWORD PTR [rcx+1*16] 705mov [rsp+8*4+16*4+0*8], rsi 706label0: 707sub rsi, 48*4 708movdqa [rsp+((1024+7-(0+3)) MOD (8))*4], xmm1 709movdqa [rsp+((1024+7-(0+7)) MOD (8))*4], xmm0 710mov rbx, [rdx+0*8] 711bswap rbx 712mov [rsp+8*4+((1024+15-(0*(1+1)+1)) MOD (16))*4], rbx 713mov rbx, [rdx+1*8] 714bswap rbx 715mov [rsp+8*4+((1024+15-(1*(1+1)+1)) MOD (16))*4], rbx 716mov rbx, [rdx+2*8] 717bswap rbx 718mov [rsp+8*4+((1024+15-(2*(1+1)+1)) MOD (16))*4], rbx 719mov rbx, [rdx+3*8] 720bswap rbx 721mov [rsp+8*4+((1024+15-(3*(1+1)+1)) MOD (16))*4], rbx 722mov rbx, [rdx+4*8] 723bswap rbx 724mov [rsp+8*4+((1024+15-(4*(1+1)+1)) MOD (16))*4], rbx 725mov rbx, [rdx+5*8] 726bswap rbx 727mov [rsp+8*4+((1024+15-(5*(1+1)+1)) MOD (16))*4], rbx 728mov rbx, [rdx+6*8] 729bswap rbx 730mov [rsp+8*4+((1024+15-(6*(1+1)+1)) MOD (16))*4], rbx 731mov rbx, [rdx+7*8] 732bswap rbx 733mov [rsp+8*4+((1024+15-(7*(1+1)+1)) MOD (16))*4], rbx 734mov edi, [rsp+((1024+7-(0+3)) MOD (8))*4] 735mov eax, [rsp+((1024+7-(0+6)) MOD (8))*4] 736xor eax, [rsp+((1024+7-(0+5)) MOD (8))*4] 737mov ecx, [rsp+((1024+7-(0+7)) MOD (8))*4] 738mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4] 739xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] 740and edx, edi 741xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] 742mov ebp, edi 743ror edi, 6 744ror ebp, 25 745add edx, [rsi+(0)*4] 746add edx, [rsp+8*4+((1024+15-(0)) MOD (16))*4] 747add edx, [rsp+((1024+7-(0)) MOD (8))*4] 748xor ebp, edi 749ror edi, 5 750xor ebp, edi 751add edx, ebp 752mov ebx, ecx 753xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4] 754and eax, ecx 755xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4] 756mov ebp, ebx 757ror ebx, 2 758add eax, edx 759add edx, [rsp+((1024+7-(0+4)) MOD (8))*4] 760mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx 761ror ebp, 22 762xor ebp, ebx 763ror ebx, 11 764xor ebp, ebx 765add eax, ebp 766mov [rsp+((1024+7-(0)) MOD (8))*4], eax 767mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4] 768xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] 769and edi, edx 770xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] 771mov ebp, edx 772ror edx, 6 773ror ebp, 25 774add edi, [rsi+(1)*4] 775add edi, [rsp+8*4+((1024+15-(1)) MOD (16))*4] 776add edi, [rsp+((1024+7-(1)) MOD (8))*4] 777xor ebp, edx 778ror edx, 5 779xor ebp, edx 780add edi, ebp 781mov ebx, eax 782xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4] 783and ecx, eax 784xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4] 785mov ebp, ebx 786ror ebx, 2 787add ecx, edi 788add edi, [rsp+((1024+7-(1+4)) MOD (8))*4] 789mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi 790ror ebp, 22 791xor ebp, ebx 792ror ebx, 11 793xor ebp, ebx 794add ecx, ebp 795mov [rsp+((1024+7-(1)) MOD (8))*4], ecx 796mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4] 797xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] 798and edx, edi 799xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] 800mov ebp, edi 801ror edi, 6 802ror ebp, 25 803add edx, [rsi+(2)*4] 804add edx, [rsp+8*4+((1024+15-(2)) MOD (16))*4] 805add edx, [rsp+((1024+7-(2)) MOD (8))*4] 806xor ebp, edi 807ror edi, 5 808xor ebp, edi 809add edx, ebp 810mov ebx, ecx 811xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4] 812and eax, ecx 813xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4] 814mov ebp, ebx 815ror ebx, 2 816add eax, edx 817add edx, [rsp+((1024+7-(2+4)) MOD (8))*4] 818mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx 819ror ebp, 22 820xor ebp, ebx 821ror ebx, 11 822xor ebp, ebx 823add eax, ebp 824mov [rsp+((1024+7-(2)) MOD (8))*4], eax 825mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4] 826xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] 827and edi, edx 828xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] 829mov ebp, edx 830ror edx, 6 831ror ebp, 25 832add edi, [rsi+(3)*4] 833add edi, [rsp+8*4+((1024+15-(3)) MOD (16))*4] 834add edi, [rsp+((1024+7-(3)) MOD (8))*4] 835xor ebp, edx 836ror edx, 5 837xor ebp, edx 838add edi, ebp 839mov ebx, eax 840xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4] 841and ecx, eax 842xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4] 843mov ebp, ebx 844ror ebx, 2 845add ecx, edi 846add edi, [rsp+((1024+7-(3+4)) MOD (8))*4] 847mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi 848ror ebp, 22 849xor ebp, ebx 850ror ebx, 11 851xor ebp, ebx 852add ecx, ebp 853mov [rsp+((1024+7-(3)) MOD (8))*4], ecx 854mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4] 855xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] 856and edx, edi 857xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] 858mov ebp, edi 859ror edi, 6 860ror ebp, 25 861add edx, [rsi+(4)*4] 862add edx, [rsp+8*4+((1024+15-(4)) MOD (16))*4] 863add edx, [rsp+((1024+7-(4)) MOD (8))*4] 864xor ebp, edi 865ror edi, 5 866xor ebp, edi 867add edx, ebp 868mov ebx, ecx 869xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4] 870and eax, ecx 871xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4] 872mov ebp, ebx 873ror ebx, 2 874add eax, edx 875add edx, [rsp+((1024+7-(4+4)) MOD (8))*4] 876mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx 877ror ebp, 22 878xor ebp, ebx 879ror ebx, 11 880xor ebp, ebx 881add eax, ebp 882mov [rsp+((1024+7-(4)) MOD (8))*4], eax 883mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4] 884xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] 885and edi, edx 886xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] 887mov ebp, edx 888ror edx, 6 889ror ebp, 25 890add edi, [rsi+(5)*4] 891add edi, [rsp+8*4+((1024+15-(5)) MOD (16))*4] 892add edi, [rsp+((1024+7-(5)) MOD (8))*4] 893xor ebp, edx 894ror edx, 5 895xor ebp, edx 896add edi, ebp 897mov ebx, eax 898xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4] 899and ecx, eax 900xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4] 901mov ebp, ebx 902ror ebx, 2 903add ecx, edi 904add edi, [rsp+((1024+7-(5+4)) MOD (8))*4] 905mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi 906ror ebp, 22 907xor ebp, ebx 908ror ebx, 11 909xor ebp, ebx 910add ecx, ebp 911mov [rsp+((1024+7-(5)) MOD (8))*4], ecx 912mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4] 913xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] 914and edx, edi 915xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] 916mov ebp, edi 917ror edi, 6 918ror ebp, 25 919add edx, [rsi+(6)*4] 920add edx, [rsp+8*4+((1024+15-(6)) MOD (16))*4] 921add edx, [rsp+((1024+7-(6)) MOD (8))*4] 922xor ebp, edi 923ror edi, 5 924xor ebp, edi 925add edx, ebp 926mov ebx, ecx 927xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4] 928and eax, ecx 929xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4] 930mov ebp, ebx 931ror ebx, 2 932add eax, edx 933add edx, [rsp+((1024+7-(6+4)) MOD (8))*4] 934mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx 935ror ebp, 22 936xor ebp, ebx 937ror ebx, 11 938xor ebp, ebx 939add eax, ebp 940mov [rsp+((1024+7-(6)) MOD (8))*4], eax 941mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4] 942xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] 943and edi, edx 944xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] 945mov ebp, edx 946ror edx, 6 947ror ebp, 25 948add edi, [rsi+(7)*4] 949add edi, [rsp+8*4+((1024+15-(7)) MOD (16))*4] 950add edi, [rsp+((1024+7-(7)) MOD (8))*4] 951xor ebp, edx 952ror edx, 5 953xor ebp, edx 954add edi, ebp 955mov ebx, eax 956xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4] 957and ecx, eax 958xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4] 959mov ebp, ebx 960ror ebx, 2 961add ecx, edi 962add edi, [rsp+((1024+7-(7+4)) MOD (8))*4] 963mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi 964ror ebp, 22 965xor ebp, ebx 966ror ebx, 11 967xor ebp, ebx 968add ecx, ebp 969mov [rsp+((1024+7-(7)) MOD (8))*4], ecx 970mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4] 971xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] 972and edx, edi 973xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] 974mov ebp, edi 975ror edi, 6 976ror ebp, 25 977add edx, [rsi+(8)*4] 978add edx, [rsp+8*4+((1024+15-(8)) MOD (16))*4] 979add edx, [rsp+((1024+7-(8)) MOD (8))*4] 980xor ebp, edi 981ror edi, 5 982xor ebp, edi 983add edx, ebp 984mov ebx, ecx 985xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4] 986and eax, ecx 987xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4] 988mov ebp, ebx 989ror ebx, 2 990add eax, edx 991add edx, [rsp+((1024+7-(8+4)) MOD (8))*4] 992mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx 993ror ebp, 22 994xor ebp, ebx 995ror ebx, 11 996xor ebp, ebx 997add eax, ebp 998mov [rsp+((1024+7-(8)) MOD (8))*4], eax 999mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4] 1000xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] 1001and edi, edx 1002xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] 1003mov ebp, edx 1004ror edx, 6 1005ror ebp, 25 1006add edi, [rsi+(9)*4] 1007add edi, [rsp+8*4+((1024+15-(9)) MOD (16))*4] 1008add edi, [rsp+((1024+7-(9)) MOD (8))*4] 1009xor ebp, edx 1010ror edx, 5 1011xor ebp, edx 1012add edi, ebp 1013mov ebx, eax 1014xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4] 1015and ecx, eax 1016xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4] 1017mov ebp, ebx 1018ror ebx, 2 1019add ecx, edi 1020add edi, [rsp+((1024+7-(9+4)) MOD (8))*4] 1021mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi 1022ror ebp, 22 1023xor ebp, ebx 1024ror ebx, 11 1025xor ebp, ebx 1026add ecx, ebp 1027mov [rsp+((1024+7-(9)) MOD (8))*4], ecx 1028mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4] 1029xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] 1030and edx, edi 1031xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] 1032mov ebp, edi 1033ror edi, 6 1034ror ebp, 25 1035add edx, [rsi+(10)*4] 1036add edx, [rsp+8*4+((1024+15-(10)) MOD (16))*4] 1037add edx, [rsp+((1024+7-(10)) MOD (8))*4] 1038xor ebp, edi 1039ror edi, 5 1040xor ebp, edi 1041add edx, ebp 1042mov ebx, ecx 1043xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4] 1044and eax, ecx 1045xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4] 1046mov ebp, ebx 1047ror ebx, 2 1048add eax, edx 1049add edx, [rsp+((1024+7-(10+4)) MOD (8))*4] 1050mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx 1051ror ebp, 22 1052xor ebp, ebx 1053ror ebx, 11 1054xor ebp, ebx 1055add eax, ebp 1056mov [rsp+((1024+7-(10)) MOD (8))*4], eax 1057mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4] 1058xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] 1059and edi, edx 1060xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] 1061mov ebp, edx 1062ror edx, 6 1063ror ebp, 25 1064add edi, [rsi+(11)*4] 1065add edi, [rsp+8*4+((1024+15-(11)) MOD (16))*4] 1066add edi, [rsp+((1024+7-(11)) MOD (8))*4] 1067xor ebp, edx 1068ror edx, 5 1069xor ebp, edx 1070add edi, ebp 1071mov ebx, eax 1072xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4] 1073and ecx, eax 1074xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4] 1075mov ebp, ebx 1076ror ebx, 2 1077add ecx, edi 1078add edi, [rsp+((1024+7-(11+4)) MOD (8))*4] 1079mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi 1080ror ebp, 22 1081xor ebp, ebx 1082ror ebx, 11 1083xor ebp, ebx 1084add ecx, ebp 1085mov [rsp+((1024+7-(11)) MOD (8))*4], ecx 1086mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4] 1087xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] 1088and edx, edi 1089xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] 1090mov ebp, edi 1091ror edi, 6 1092ror ebp, 25 1093add edx, [rsi+(12)*4] 1094add edx, [rsp+8*4+((1024+15-(12)) MOD (16))*4] 1095add edx, [rsp+((1024+7-(12)) MOD (8))*4] 1096xor ebp, edi 1097ror edi, 5 1098xor ebp, edi 1099add edx, ebp 1100mov ebx, ecx 1101xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4] 1102and eax, ecx 1103xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4] 1104mov ebp, ebx 1105ror ebx, 2 1106add eax, edx 1107add edx, [rsp+((1024+7-(12+4)) MOD (8))*4] 1108mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx 1109ror ebp, 22 1110xor ebp, ebx 1111ror ebx, 11 1112xor ebp, ebx 1113add eax, ebp 1114mov [rsp+((1024+7-(12)) MOD (8))*4], eax 1115mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4] 1116xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] 1117and edi, edx 1118xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] 1119mov ebp, edx 1120ror edx, 6 1121ror ebp, 25 1122add edi, [rsi+(13)*4] 1123add edi, [rsp+8*4+((1024+15-(13)) MOD (16))*4] 1124add edi, [rsp+((1024+7-(13)) MOD (8))*4] 1125xor ebp, edx 1126ror edx, 5 1127xor ebp, edx 1128add edi, ebp 1129mov ebx, eax 1130xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4] 1131and ecx, eax 1132xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4] 1133mov ebp, ebx 1134ror ebx, 2 1135add ecx, edi 1136add edi, [rsp+((1024+7-(13+4)) MOD (8))*4] 1137mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi 1138ror ebp, 22 1139xor ebp, ebx 1140ror ebx, 11 1141xor ebp, ebx 1142add ecx, ebp 1143mov [rsp+((1024+7-(13)) MOD (8))*4], ecx 1144mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4] 1145xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] 1146and edx, edi 1147xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] 1148mov ebp, edi 1149ror edi, 6 1150ror ebp, 25 1151add edx, [rsi+(14)*4] 1152add edx, [rsp+8*4+((1024+15-(14)) MOD (16))*4] 1153add edx, [rsp+((1024+7-(14)) MOD (8))*4] 1154xor ebp, edi 1155ror edi, 5 1156xor ebp, edi 1157add edx, ebp 1158mov ebx, ecx 1159xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4] 1160and eax, ecx 1161xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4] 1162mov ebp, ebx 1163ror ebx, 2 1164add eax, edx 1165add edx, [rsp+((1024+7-(14+4)) MOD (8))*4] 1166mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx 1167ror ebp, 22 1168xor ebp, ebx 1169ror ebx, 11 1170xor ebp, ebx 1171add eax, ebp 1172mov [rsp+((1024+7-(14)) MOD (8))*4], eax 1173mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4] 1174xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] 1175and edi, edx 1176xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] 1177mov ebp, edx 1178ror edx, 6 1179ror ebp, 25 1180add edi, [rsi+(15)*4] 1181add edi, [rsp+8*4+((1024+15-(15)) MOD (16))*4] 1182add edi, [rsp+((1024+7-(15)) MOD (8))*4] 1183xor ebp, edx 1184ror edx, 5 1185xor ebp, edx 1186add edi, ebp 1187mov ebx, eax 1188xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4] 1189and ecx, eax 1190xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4] 1191mov ebp, ebx 1192ror ebx, 2 1193add ecx, edi 1194add edi, [rsp+((1024+7-(15+4)) MOD (8))*4] 1195mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi 1196ror ebp, 22 1197xor ebp, ebx 1198ror ebx, 11 1199xor ebp, ebx 1200add ecx, ebp 1201mov [rsp+((1024+7-(15)) MOD (8))*4], ecx 1202label1: 1203add rsi, 4*16 1204mov edx, [rsp+((1024+7-(0+2)) MOD (8))*4] 1205xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] 1206and edx, edi 1207xor edx, [rsp+((1024+7-(0+1)) MOD (8))*4] 1208mov ebp, edi 1209ror edi, 6 1210ror ebp, 25 1211xor ebp, edi 1212ror edi, 5 1213xor ebp, edi 1214add edx, ebp 1215mov ebp, [rsp+8*4+((1024+15-((0)-2)) MOD (16))*4] 1216mov edi, [rsp+8*4+((1024+15-((0)-15)) MOD (16))*4] 1217mov ebx, ebp 1218shr ebp, 10 1219ror ebx, 17 1220xor ebp, ebx 1221ror ebx, 2 1222xor ebx, ebp 1223add ebx, [rsp+8*4+((1024+15-((0)-7)) MOD (16))*4] 1224mov ebp, edi 1225shr ebp, 3 1226ror edi, 7 1227add ebx, [rsp+8*4+((1024+15-(0)) MOD (16))*4] 1228xor ebp, edi 1229add edx, [rsi+(0)*4] 1230ror edi, 11 1231add edx, [rsp+((1024+7-(0)) MOD (8))*4] 1232xor ebp, edi 1233add ebp, ebx 1234mov [rsp+8*4+((1024+15-(0)) MOD (16))*4], ebp 1235add edx, ebp 1236mov ebx, ecx 1237xor ecx, [rsp+((1024+7-(0+6)) MOD (8))*4] 1238and eax, ecx 1239xor eax, [rsp+((1024+7-(0+6)) MOD (8))*4] 1240mov ebp, ebx 1241ror ebx, 2 1242add eax, edx 1243add edx, [rsp+((1024+7-(0+4)) MOD (8))*4] 1244mov [rsp+((1024+7-(0+4)) MOD (8))*4], edx 1245ror ebp, 22 1246xor ebp, ebx 1247ror ebx, 11 1248xor ebp, ebx 1249add eax, ebp 1250mov [rsp+((1024+7-(0)) MOD (8))*4], eax 1251mov edi, [rsp+((1024+7-(1+2)) MOD (8))*4] 1252xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] 1253and edi, edx 1254xor edi, [rsp+((1024+7-(1+1)) MOD (8))*4] 1255mov ebp, edx 1256ror edx, 6 1257ror ebp, 25 1258xor ebp, edx 1259ror edx, 5 1260xor ebp, edx 1261add edi, ebp 1262mov ebp, [rsp+8*4+((1024+15-((1)-2)) MOD (16))*4] 1263mov edx, [rsp+8*4+((1024+15-((1)-15)) MOD (16))*4] 1264mov ebx, ebp 1265shr ebp, 10 1266ror ebx, 17 1267xor ebp, ebx 1268ror ebx, 2 1269xor ebx, ebp 1270add ebx, [rsp+8*4+((1024+15-((1)-7)) MOD (16))*4] 1271mov ebp, edx 1272shr ebp, 3 1273ror edx, 7 1274add ebx, [rsp+8*4+((1024+15-(1)) MOD (16))*4] 1275xor ebp, edx 1276add edi, [rsi+(1)*4] 1277ror edx, 11 1278add edi, [rsp+((1024+7-(1)) MOD (8))*4] 1279xor ebp, edx 1280add ebp, ebx 1281mov [rsp+8*4+((1024+15-(1)) MOD (16))*4], ebp 1282add edi, ebp 1283mov ebx, eax 1284xor eax, [rsp+((1024+7-(1+6)) MOD (8))*4] 1285and ecx, eax 1286xor ecx, [rsp+((1024+7-(1+6)) MOD (8))*4] 1287mov ebp, ebx 1288ror ebx, 2 1289add ecx, edi 1290add edi, [rsp+((1024+7-(1+4)) MOD (8))*4] 1291mov [rsp+((1024+7-(1+4)) MOD (8))*4], edi 1292ror ebp, 22 1293xor ebp, ebx 1294ror ebx, 11 1295xor ebp, ebx 1296add ecx, ebp 1297mov [rsp+((1024+7-(1)) MOD (8))*4], ecx 1298mov edx, [rsp+((1024+7-(2+2)) MOD (8))*4] 1299xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] 1300and edx, edi 1301xor edx, [rsp+((1024+7-(2+1)) MOD (8))*4] 1302mov ebp, edi 1303ror edi, 6 1304ror ebp, 25 1305xor ebp, edi 1306ror edi, 5 1307xor ebp, edi 1308add edx, ebp 1309mov ebp, [rsp+8*4+((1024+15-((2)-2)) MOD (16))*4] 1310mov edi, [rsp+8*4+((1024+15-((2)-15)) MOD (16))*4] 1311mov ebx, ebp 1312shr ebp, 10 1313ror ebx, 17 1314xor ebp, ebx 1315ror ebx, 2 1316xor ebx, ebp 1317add ebx, [rsp+8*4+((1024+15-((2)-7)) MOD (16))*4] 1318mov ebp, edi 1319shr ebp, 3 1320ror edi, 7 1321add ebx, [rsp+8*4+((1024+15-(2)) MOD (16))*4] 1322xor ebp, edi 1323add edx, [rsi+(2)*4] 1324ror edi, 11 1325add edx, [rsp+((1024+7-(2)) MOD (8))*4] 1326xor ebp, edi 1327add ebp, ebx 1328mov [rsp+8*4+((1024+15-(2)) MOD (16))*4], ebp 1329add edx, ebp 1330mov ebx, ecx 1331xor ecx, [rsp+((1024+7-(2+6)) MOD (8))*4] 1332and eax, ecx 1333xor eax, [rsp+((1024+7-(2+6)) MOD (8))*4] 1334mov ebp, ebx 1335ror ebx, 2 1336add eax, edx 1337add edx, [rsp+((1024+7-(2+4)) MOD (8))*4] 1338mov [rsp+((1024+7-(2+4)) MOD (8))*4], edx 1339ror ebp, 22 1340xor ebp, ebx 1341ror ebx, 11 1342xor ebp, ebx 1343add eax, ebp 1344mov [rsp+((1024+7-(2)) MOD (8))*4], eax 1345mov edi, [rsp+((1024+7-(3+2)) MOD (8))*4] 1346xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] 1347and edi, edx 1348xor edi, [rsp+((1024+7-(3+1)) MOD (8))*4] 1349mov ebp, edx 1350ror edx, 6 1351ror ebp, 25 1352xor ebp, edx 1353ror edx, 5 1354xor ebp, edx 1355add edi, ebp 1356mov ebp, [rsp+8*4+((1024+15-((3)-2)) MOD (16))*4] 1357mov edx, [rsp+8*4+((1024+15-((3)-15)) MOD (16))*4] 1358mov ebx, ebp 1359shr ebp, 10 1360ror ebx, 17 1361xor ebp, ebx 1362ror ebx, 2 1363xor ebx, ebp 1364add ebx, [rsp+8*4+((1024+15-((3)-7)) MOD (16))*4] 1365mov ebp, edx 1366shr ebp, 3 1367ror edx, 7 1368add ebx, [rsp+8*4+((1024+15-(3)) MOD (16))*4] 1369xor ebp, edx 1370add edi, [rsi+(3)*4] 1371ror edx, 11 1372add edi, [rsp+((1024+7-(3)) MOD (8))*4] 1373xor ebp, edx 1374add ebp, ebx 1375mov [rsp+8*4+((1024+15-(3)) MOD (16))*4], ebp 1376add edi, ebp 1377mov ebx, eax 1378xor eax, [rsp+((1024+7-(3+6)) MOD (8))*4] 1379and ecx, eax 1380xor ecx, [rsp+((1024+7-(3+6)) MOD (8))*4] 1381mov ebp, ebx 1382ror ebx, 2 1383add ecx, edi 1384add edi, [rsp+((1024+7-(3+4)) MOD (8))*4] 1385mov [rsp+((1024+7-(3+4)) MOD (8))*4], edi 1386ror ebp, 22 1387xor ebp, ebx 1388ror ebx, 11 1389xor ebp, ebx 1390add ecx, ebp 1391mov [rsp+((1024+7-(3)) MOD (8))*4], ecx 1392mov edx, [rsp+((1024+7-(4+2)) MOD (8))*4] 1393xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] 1394and edx, edi 1395xor edx, [rsp+((1024+7-(4+1)) MOD (8))*4] 1396mov ebp, edi 1397ror edi, 6 1398ror ebp, 25 1399xor ebp, edi 1400ror edi, 5 1401xor ebp, edi 1402add edx, ebp 1403mov ebp, [rsp+8*4+((1024+15-((4)-2)) MOD (16))*4] 1404mov edi, [rsp+8*4+((1024+15-((4)-15)) MOD (16))*4] 1405mov ebx, ebp 1406shr ebp, 10 1407ror ebx, 17 1408xor ebp, ebx 1409ror ebx, 2 1410xor ebx, ebp 1411add ebx, [rsp+8*4+((1024+15-((4)-7)) MOD (16))*4] 1412mov ebp, edi 1413shr ebp, 3 1414ror edi, 7 1415add ebx, [rsp+8*4+((1024+15-(4)) MOD (16))*4] 1416xor ebp, edi 1417add edx, [rsi+(4)*4] 1418ror edi, 11 1419add edx, [rsp+((1024+7-(4)) MOD (8))*4] 1420xor ebp, edi 1421add ebp, ebx 1422mov [rsp+8*4+((1024+15-(4)) MOD (16))*4], ebp 1423add edx, ebp 1424mov ebx, ecx 1425xor ecx, [rsp+((1024+7-(4+6)) MOD (8))*4] 1426and eax, ecx 1427xor eax, [rsp+((1024+7-(4+6)) MOD (8))*4] 1428mov ebp, ebx 1429ror ebx, 2 1430add eax, edx 1431add edx, [rsp+((1024+7-(4+4)) MOD (8))*4] 1432mov [rsp+((1024+7-(4+4)) MOD (8))*4], edx 1433ror ebp, 22 1434xor ebp, ebx 1435ror ebx, 11 1436xor ebp, ebx 1437add eax, ebp 1438mov [rsp+((1024+7-(4)) MOD (8))*4], eax 1439mov edi, [rsp+((1024+7-(5+2)) MOD (8))*4] 1440xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] 1441and edi, edx 1442xor edi, [rsp+((1024+7-(5+1)) MOD (8))*4] 1443mov ebp, edx 1444ror edx, 6 1445ror ebp, 25 1446xor ebp, edx 1447ror edx, 5 1448xor ebp, edx 1449add edi, ebp 1450mov ebp, [rsp+8*4+((1024+15-((5)-2)) MOD (16))*4] 1451mov edx, [rsp+8*4+((1024+15-((5)-15)) MOD (16))*4] 1452mov ebx, ebp 1453shr ebp, 10 1454ror ebx, 17 1455xor ebp, ebx 1456ror ebx, 2 1457xor ebx, ebp 1458add ebx, [rsp+8*4+((1024+15-((5)-7)) MOD (16))*4] 1459mov ebp, edx 1460shr ebp, 3 1461ror edx, 7 1462add ebx, [rsp+8*4+((1024+15-(5)) MOD (16))*4] 1463xor ebp, edx 1464add edi, [rsi+(5)*4] 1465ror edx, 11 1466add edi, [rsp+((1024+7-(5)) MOD (8))*4] 1467xor ebp, edx 1468add ebp, ebx 1469mov [rsp+8*4+((1024+15-(5)) MOD (16))*4], ebp 1470add edi, ebp 1471mov ebx, eax 1472xor eax, [rsp+((1024+7-(5+6)) MOD (8))*4] 1473and ecx, eax 1474xor ecx, [rsp+((1024+7-(5+6)) MOD (8))*4] 1475mov ebp, ebx 1476ror ebx, 2 1477add ecx, edi 1478add edi, [rsp+((1024+7-(5+4)) MOD (8))*4] 1479mov [rsp+((1024+7-(5+4)) MOD (8))*4], edi 1480ror ebp, 22 1481xor ebp, ebx 1482ror ebx, 11 1483xor ebp, ebx 1484add ecx, ebp 1485mov [rsp+((1024+7-(5)) MOD (8))*4], ecx 1486mov edx, [rsp+((1024+7-(6+2)) MOD (8))*4] 1487xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] 1488and edx, edi 1489xor edx, [rsp+((1024+7-(6+1)) MOD (8))*4] 1490mov ebp, edi 1491ror edi, 6 1492ror ebp, 25 1493xor ebp, edi 1494ror edi, 5 1495xor ebp, edi 1496add edx, ebp 1497mov ebp, [rsp+8*4+((1024+15-((6)-2)) MOD (16))*4] 1498mov edi, [rsp+8*4+((1024+15-((6)-15)) MOD (16))*4] 1499mov ebx, ebp 1500shr ebp, 10 1501ror ebx, 17 1502xor ebp, ebx 1503ror ebx, 2 1504xor ebx, ebp 1505add ebx, [rsp+8*4+((1024+15-((6)-7)) MOD (16))*4] 1506mov ebp, edi 1507shr ebp, 3 1508ror edi, 7 1509add ebx, [rsp+8*4+((1024+15-(6)) MOD (16))*4] 1510xor ebp, edi 1511add edx, [rsi+(6)*4] 1512ror edi, 11 1513add edx, [rsp+((1024+7-(6)) MOD (8))*4] 1514xor ebp, edi 1515add ebp, ebx 1516mov [rsp+8*4+((1024+15-(6)) MOD (16))*4], ebp 1517add edx, ebp 1518mov ebx, ecx 1519xor ecx, [rsp+((1024+7-(6+6)) MOD (8))*4] 1520and eax, ecx 1521xor eax, [rsp+((1024+7-(6+6)) MOD (8))*4] 1522mov ebp, ebx 1523ror ebx, 2 1524add eax, edx 1525add edx, [rsp+((1024+7-(6+4)) MOD (8))*4] 1526mov [rsp+((1024+7-(6+4)) MOD (8))*4], edx 1527ror ebp, 22 1528xor ebp, ebx 1529ror ebx, 11 1530xor ebp, ebx 1531add eax, ebp 1532mov [rsp+((1024+7-(6)) MOD (8))*4], eax 1533mov edi, [rsp+((1024+7-(7+2)) MOD (8))*4] 1534xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] 1535and edi, edx 1536xor edi, [rsp+((1024+7-(7+1)) MOD (8))*4] 1537mov ebp, edx 1538ror edx, 6 1539ror ebp, 25 1540xor ebp, edx 1541ror edx, 5 1542xor ebp, edx 1543add edi, ebp 1544mov ebp, [rsp+8*4+((1024+15-((7)-2)) MOD (16))*4] 1545mov edx, [rsp+8*4+((1024+15-((7)-15)) MOD (16))*4] 1546mov ebx, ebp 1547shr ebp, 10 1548ror ebx, 17 1549xor ebp, ebx 1550ror ebx, 2 1551xor ebx, ebp 1552add ebx, [rsp+8*4+((1024+15-((7)-7)) MOD (16))*4] 1553mov ebp, edx 1554shr ebp, 3 1555ror edx, 7 1556add ebx, [rsp+8*4+((1024+15-(7)) MOD (16))*4] 1557xor ebp, edx 1558add edi, [rsi+(7)*4] 1559ror edx, 11 1560add edi, [rsp+((1024+7-(7)) MOD (8))*4] 1561xor ebp, edx 1562add ebp, ebx 1563mov [rsp+8*4+((1024+15-(7)) MOD (16))*4], ebp 1564add edi, ebp 1565mov ebx, eax 1566xor eax, [rsp+((1024+7-(7+6)) MOD (8))*4] 1567and ecx, eax 1568xor ecx, [rsp+((1024+7-(7+6)) MOD (8))*4] 1569mov ebp, ebx 1570ror ebx, 2 1571add ecx, edi 1572add edi, [rsp+((1024+7-(7+4)) MOD (8))*4] 1573mov [rsp+((1024+7-(7+4)) MOD (8))*4], edi 1574ror ebp, 22 1575xor ebp, ebx 1576ror ebx, 11 1577xor ebp, ebx 1578add ecx, ebp 1579mov [rsp+((1024+7-(7)) MOD (8))*4], ecx 1580mov edx, [rsp+((1024+7-(8+2)) MOD (8))*4] 1581xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] 1582and edx, edi 1583xor edx, [rsp+((1024+7-(8+1)) MOD (8))*4] 1584mov ebp, edi 1585ror edi, 6 1586ror ebp, 25 1587xor ebp, edi 1588ror edi, 5 1589xor ebp, edi 1590add edx, ebp 1591mov ebp, [rsp+8*4+((1024+15-((8)-2)) MOD (16))*4] 1592mov edi, [rsp+8*4+((1024+15-((8)-15)) MOD (16))*4] 1593mov ebx, ebp 1594shr ebp, 10 1595ror ebx, 17 1596xor ebp, ebx 1597ror ebx, 2 1598xor ebx, ebp 1599add ebx, [rsp+8*4+((1024+15-((8)-7)) MOD (16))*4] 1600mov ebp, edi 1601shr ebp, 3 1602ror edi, 7 1603add ebx, [rsp+8*4+((1024+15-(8)) MOD (16))*4] 1604xor ebp, edi 1605add edx, [rsi+(8)*4] 1606ror edi, 11 1607add edx, [rsp+((1024+7-(8)) MOD (8))*4] 1608xor ebp, edi 1609add ebp, ebx 1610mov [rsp+8*4+((1024+15-(8)) MOD (16))*4], ebp 1611add edx, ebp 1612mov ebx, ecx 1613xor ecx, [rsp+((1024+7-(8+6)) MOD (8))*4] 1614and eax, ecx 1615xor eax, [rsp+((1024+7-(8+6)) MOD (8))*4] 1616mov ebp, ebx 1617ror ebx, 2 1618add eax, edx 1619add edx, [rsp+((1024+7-(8+4)) MOD (8))*4] 1620mov [rsp+((1024+7-(8+4)) MOD (8))*4], edx 1621ror ebp, 22 1622xor ebp, ebx 1623ror ebx, 11 1624xor ebp, ebx 1625add eax, ebp 1626mov [rsp+((1024+7-(8)) MOD (8))*4], eax 1627mov edi, [rsp+((1024+7-(9+2)) MOD (8))*4] 1628xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] 1629and edi, edx 1630xor edi, [rsp+((1024+7-(9+1)) MOD (8))*4] 1631mov ebp, edx 1632ror edx, 6 1633ror ebp, 25 1634xor ebp, edx 1635ror edx, 5 1636xor ebp, edx 1637add edi, ebp 1638mov ebp, [rsp+8*4+((1024+15-((9)-2)) MOD (16))*4] 1639mov edx, [rsp+8*4+((1024+15-((9)-15)) MOD (16))*4] 1640mov ebx, ebp 1641shr ebp, 10 1642ror ebx, 17 1643xor ebp, ebx 1644ror ebx, 2 1645xor ebx, ebp 1646add ebx, [rsp+8*4+((1024+15-((9)-7)) MOD (16))*4] 1647mov ebp, edx 1648shr ebp, 3 1649ror edx, 7 1650add ebx, [rsp+8*4+((1024+15-(9)) MOD (16))*4] 1651xor ebp, edx 1652add edi, [rsi+(9)*4] 1653ror edx, 11 1654add edi, [rsp+((1024+7-(9)) MOD (8))*4] 1655xor ebp, edx 1656add ebp, ebx 1657mov [rsp+8*4+((1024+15-(9)) MOD (16))*4], ebp 1658add edi, ebp 1659mov ebx, eax 1660xor eax, [rsp+((1024+7-(9+6)) MOD (8))*4] 1661and ecx, eax 1662xor ecx, [rsp+((1024+7-(9+6)) MOD (8))*4] 1663mov ebp, ebx 1664ror ebx, 2 1665add ecx, edi 1666add edi, [rsp+((1024+7-(9+4)) MOD (8))*4] 1667mov [rsp+((1024+7-(9+4)) MOD (8))*4], edi 1668ror ebp, 22 1669xor ebp, ebx 1670ror ebx, 11 1671xor ebp, ebx 1672add ecx, ebp 1673mov [rsp+((1024+7-(9)) MOD (8))*4], ecx 1674mov edx, [rsp+((1024+7-(10+2)) MOD (8))*4] 1675xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] 1676and edx, edi 1677xor edx, [rsp+((1024+7-(10+1)) MOD (8))*4] 1678mov ebp, edi 1679ror edi, 6 1680ror ebp, 25 1681xor ebp, edi 1682ror edi, 5 1683xor ebp, edi 1684add edx, ebp 1685mov ebp, [rsp+8*4+((1024+15-((10)-2)) MOD (16))*4] 1686mov edi, [rsp+8*4+((1024+15-((10)-15)) MOD (16))*4] 1687mov ebx, ebp 1688shr ebp, 10 1689ror ebx, 17 1690xor ebp, ebx 1691ror ebx, 2 1692xor ebx, ebp 1693add ebx, [rsp+8*4+((1024+15-((10)-7)) MOD (16))*4] 1694mov ebp, edi 1695shr ebp, 3 1696ror edi, 7 1697add ebx, [rsp+8*4+((1024+15-(10)) MOD (16))*4] 1698xor ebp, edi 1699add edx, [rsi+(10)*4] 1700ror edi, 11 1701add edx, [rsp+((1024+7-(10)) MOD (8))*4] 1702xor ebp, edi 1703add ebp, ebx 1704mov [rsp+8*4+((1024+15-(10)) MOD (16))*4], ebp 1705add edx, ebp 1706mov ebx, ecx 1707xor ecx, [rsp+((1024+7-(10+6)) MOD (8))*4] 1708and eax, ecx 1709xor eax, [rsp+((1024+7-(10+6)) MOD (8))*4] 1710mov ebp, ebx 1711ror ebx, 2 1712add eax, edx 1713add edx, [rsp+((1024+7-(10+4)) MOD (8))*4] 1714mov [rsp+((1024+7-(10+4)) MOD (8))*4], edx 1715ror ebp, 22 1716xor ebp, ebx 1717ror ebx, 11 1718xor ebp, ebx 1719add eax, ebp 1720mov [rsp+((1024+7-(10)) MOD (8))*4], eax 1721mov edi, [rsp+((1024+7-(11+2)) MOD (8))*4] 1722xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] 1723and edi, edx 1724xor edi, [rsp+((1024+7-(11+1)) MOD (8))*4] 1725mov ebp, edx 1726ror edx, 6 1727ror ebp, 25 1728xor ebp, edx 1729ror edx, 5 1730xor ebp, edx 1731add edi, ebp 1732mov ebp, [rsp+8*4+((1024+15-((11)-2)) MOD (16))*4] 1733mov edx, [rsp+8*4+((1024+15-((11)-15)) MOD (16))*4] 1734mov ebx, ebp 1735shr ebp, 10 1736ror ebx, 17 1737xor ebp, ebx 1738ror ebx, 2 1739xor ebx, ebp 1740add ebx, [rsp+8*4+((1024+15-((11)-7)) MOD (16))*4] 1741mov ebp, edx 1742shr ebp, 3 1743ror edx, 7 1744add ebx, [rsp+8*4+((1024+15-(11)) MOD (16))*4] 1745xor ebp, edx 1746add edi, [rsi+(11)*4] 1747ror edx, 11 1748add edi, [rsp+((1024+7-(11)) MOD (8))*4] 1749xor ebp, edx 1750add ebp, ebx 1751mov [rsp+8*4+((1024+15-(11)) MOD (16))*4], ebp 1752add edi, ebp 1753mov ebx, eax 1754xor eax, [rsp+((1024+7-(11+6)) MOD (8))*4] 1755and ecx, eax 1756xor ecx, [rsp+((1024+7-(11+6)) MOD (8))*4] 1757mov ebp, ebx 1758ror ebx, 2 1759add ecx, edi 1760add edi, [rsp+((1024+7-(11+4)) MOD (8))*4] 1761mov [rsp+((1024+7-(11+4)) MOD (8))*4], edi 1762ror ebp, 22 1763xor ebp, ebx 1764ror ebx, 11 1765xor ebp, ebx 1766add ecx, ebp 1767mov [rsp+((1024+7-(11)) MOD (8))*4], ecx 1768mov edx, [rsp+((1024+7-(12+2)) MOD (8))*4] 1769xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] 1770and edx, edi 1771xor edx, [rsp+((1024+7-(12+1)) MOD (8))*4] 1772mov ebp, edi 1773ror edi, 6 1774ror ebp, 25 1775xor ebp, edi 1776ror edi, 5 1777xor ebp, edi 1778add edx, ebp 1779mov ebp, [rsp+8*4+((1024+15-((12)-2)) MOD (16))*4] 1780mov edi, [rsp+8*4+((1024+15-((12)-15)) MOD (16))*4] 1781mov ebx, ebp 1782shr ebp, 10 1783ror ebx, 17 1784xor ebp, ebx 1785ror ebx, 2 1786xor ebx, ebp 1787add ebx, [rsp+8*4+((1024+15-((12)-7)) MOD (16))*4] 1788mov ebp, edi 1789shr ebp, 3 1790ror edi, 7 1791add ebx, [rsp+8*4+((1024+15-(12)) MOD (16))*4] 1792xor ebp, edi 1793add edx, [rsi+(12)*4] 1794ror edi, 11 1795add edx, [rsp+((1024+7-(12)) MOD (8))*4] 1796xor ebp, edi 1797add ebp, ebx 1798mov [rsp+8*4+((1024+15-(12)) MOD (16))*4], ebp 1799add edx, ebp 1800mov ebx, ecx 1801xor ecx, [rsp+((1024+7-(12+6)) MOD (8))*4] 1802and eax, ecx 1803xor eax, [rsp+((1024+7-(12+6)) MOD (8))*4] 1804mov ebp, ebx 1805ror ebx, 2 1806add eax, edx 1807add edx, [rsp+((1024+7-(12+4)) MOD (8))*4] 1808mov [rsp+((1024+7-(12+4)) MOD (8))*4], edx 1809ror ebp, 22 1810xor ebp, ebx 1811ror ebx, 11 1812xor ebp, ebx 1813add eax, ebp 1814mov [rsp+((1024+7-(12)) MOD (8))*4], eax 1815mov edi, [rsp+((1024+7-(13+2)) MOD (8))*4] 1816xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] 1817and edi, edx 1818xor edi, [rsp+((1024+7-(13+1)) MOD (8))*4] 1819mov ebp, edx 1820ror edx, 6 1821ror ebp, 25 1822xor ebp, edx 1823ror edx, 5 1824xor ebp, edx 1825add edi, ebp 1826mov ebp, [rsp+8*4+((1024+15-((13)-2)) MOD (16))*4] 1827mov edx, [rsp+8*4+((1024+15-((13)-15)) MOD (16))*4] 1828mov ebx, ebp 1829shr ebp, 10 1830ror ebx, 17 1831xor ebp, ebx 1832ror ebx, 2 1833xor ebx, ebp 1834add ebx, [rsp+8*4+((1024+15-((13)-7)) MOD (16))*4] 1835mov ebp, edx 1836shr ebp, 3 1837ror edx, 7 1838add ebx, [rsp+8*4+((1024+15-(13)) MOD (16))*4] 1839xor ebp, edx 1840add edi, [rsi+(13)*4] 1841ror edx, 11 1842add edi, [rsp+((1024+7-(13)) MOD (8))*4] 1843xor ebp, edx 1844add ebp, ebx 1845mov [rsp+8*4+((1024+15-(13)) MOD (16))*4], ebp 1846add edi, ebp 1847mov ebx, eax 1848xor eax, [rsp+((1024+7-(13+6)) MOD (8))*4] 1849and ecx, eax 1850xor ecx, [rsp+((1024+7-(13+6)) MOD (8))*4] 1851mov ebp, ebx 1852ror ebx, 2 1853add ecx, edi 1854add edi, [rsp+((1024+7-(13+4)) MOD (8))*4] 1855mov [rsp+((1024+7-(13+4)) MOD (8))*4], edi 1856ror ebp, 22 1857xor ebp, ebx 1858ror ebx, 11 1859xor ebp, ebx 1860add ecx, ebp 1861mov [rsp+((1024+7-(13)) MOD (8))*4], ecx 1862mov edx, [rsp+((1024+7-(14+2)) MOD (8))*4] 1863xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] 1864and edx, edi 1865xor edx, [rsp+((1024+7-(14+1)) MOD (8))*4] 1866mov ebp, edi 1867ror edi, 6 1868ror ebp, 25 1869xor ebp, edi 1870ror edi, 5 1871xor ebp, edi 1872add edx, ebp 1873mov ebp, [rsp+8*4+((1024+15-((14)-2)) MOD (16))*4] 1874mov edi, [rsp+8*4+((1024+15-((14)-15)) MOD (16))*4] 1875mov ebx, ebp 1876shr ebp, 10 1877ror ebx, 17 1878xor ebp, ebx 1879ror ebx, 2 1880xor ebx, ebp 1881add ebx, [rsp+8*4+((1024+15-((14)-7)) MOD (16))*4] 1882mov ebp, edi 1883shr ebp, 3 1884ror edi, 7 1885add ebx, [rsp+8*4+((1024+15-(14)) MOD (16))*4] 1886xor ebp, edi 1887add edx, [rsi+(14)*4] 1888ror edi, 11 1889add edx, [rsp+((1024+7-(14)) MOD (8))*4] 1890xor ebp, edi 1891add ebp, ebx 1892mov [rsp+8*4+((1024+15-(14)) MOD (16))*4], ebp 1893add edx, ebp 1894mov ebx, ecx 1895xor ecx, [rsp+((1024+7-(14+6)) MOD (8))*4] 1896and eax, ecx 1897xor eax, [rsp+((1024+7-(14+6)) MOD (8))*4] 1898mov ebp, ebx 1899ror ebx, 2 1900add eax, edx 1901add edx, [rsp+((1024+7-(14+4)) MOD (8))*4] 1902mov [rsp+((1024+7-(14+4)) MOD (8))*4], edx 1903ror ebp, 22 1904xor ebp, ebx 1905ror ebx, 11 1906xor ebp, ebx 1907add eax, ebp 1908mov [rsp+((1024+7-(14)) MOD (8))*4], eax 1909mov edi, [rsp+((1024+7-(15+2)) MOD (8))*4] 1910xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] 1911and edi, edx 1912xor edi, [rsp+((1024+7-(15+1)) MOD (8))*4] 1913mov ebp, edx 1914ror edx, 6 1915ror ebp, 25 1916xor ebp, edx 1917ror edx, 5 1918xor ebp, edx 1919add edi, ebp 1920mov ebp, [rsp+8*4+((1024+15-((15)-2)) MOD (16))*4] 1921mov edx, [rsp+8*4+((1024+15-((15)-15)) MOD (16))*4] 1922mov ebx, ebp 1923shr ebp, 10 1924ror ebx, 17 1925xor ebp, ebx 1926ror ebx, 2 1927xor ebx, ebp 1928add ebx, [rsp+8*4+((1024+15-((15)-7)) MOD (16))*4] 1929mov ebp, edx 1930shr ebp, 3 1931ror edx, 7 1932add ebx, [rsp+8*4+((1024+15-(15)) MOD (16))*4] 1933xor ebp, edx 1934add edi, [rsi+(15)*4] 1935ror edx, 11 1936add edi, [rsp+((1024+7-(15)) MOD (8))*4] 1937xor ebp, edx 1938add ebp, ebx 1939mov [rsp+8*4+((1024+15-(15)) MOD (16))*4], ebp 1940add edi, ebp 1941mov ebx, eax 1942xor eax, [rsp+((1024+7-(15+6)) MOD (8))*4] 1943and ecx, eax 1944xor ecx, [rsp+((1024+7-(15+6)) MOD (8))*4] 1945mov ebp, ebx 1946ror ebx, 2 1947add ecx, edi 1948add edi, [rsp+((1024+7-(15+4)) MOD (8))*4] 1949mov [rsp+((1024+7-(15+4)) MOD (8))*4], edi 1950ror ebp, 22 1951xor ebp, ebx 1952ror ebx, 11 1953xor ebp, ebx 1954add ecx, ebp 1955mov [rsp+((1024+7-(15)) MOD (8))*4], ecx 1956cmp rsi, [rsp+8*4+16*4+0*8] 1957jne label1 1958mov rcx, [rsp+8*4+16*4+1*8] 1959movdqa xmm1, XMMWORD PTR [rcx+1*16] 1960movdqa xmm0, XMMWORD PTR [rcx+0*16] 1961paddd xmm1, [rsp+((1024+7-(0+3)) MOD (8))*4] 1962paddd xmm0, [rsp+((1024+7-(0+7)) MOD (8))*4] 1963movdqa [rcx+1*16], xmm1 1964movdqa [rcx+0*16], xmm0 1965mov rdx, [rsp+8*4+16*4+2*8] 1966add rdx, 64 1967mov [rsp+8*4+16*4+2*8], rdx 1968cmp rdx, [rsp+8*4+16*4+3*8] 1969jne label0 1970add rsp, 8*4 + 16*4 + 4*8 + 8 1971pop rbp 1972pop rbx 1973pop rdi 1974pop rsi 1975ret 1976SHA256_HashMultipleBlocks_SSE2 ENDP 1977 1978;; http://www.agner.org/optimize/vectorclass/read.php?i=65 1979;; word64 Xgetbv(word32 ctrl) 1980;; ctrl = rcx 1981 1982 ALIGN 8 1983XGETBV64 PROC 1984 ;; query 1985 DB 0fh, 01h, 0d0h 1986 ;; xcr = (EDX << 32) | EAX 1987 and rax, 0ffffffffh 1988 shl rdx, 32 1989 or rax, rdx 1990 ret 1991XGETBV64 ENDP 1992 1993;; word64 CpuId(word32 func, word32 subfunc, word32 output[4]) 1994;; func = rcx 1995;; subfunc = rdx 1996;; output = r8 1997 1998 ALIGN 8 1999CPUID64 PROC 2000 ;; preserve per ABI 2001 push rbx 2002 ;; eax = func 2003 mov rax, rcx 2004 ;; ecx = subfunc 2005 mov rcx, rdx 2006 ;; query 2007 cpuid 2008 ;; save 2009 mov [r8+0], eax 2010 mov [r8+4], ebx 2011 mov [r8+8], ecx 2012 mov [r8+12], edx 2013 ;; restore 2014 pop rbx 2015 ;; return 2016 mov rax, 1 2017 ret 2018CPUID64 ENDP 2019 2020_TEXT ENDS 2021END 2022