1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4$@feat.00 equ 1 5section .text code align=64 6%else 7section .text code 8%endif 9global _gcm_gmult_4bit_x86 10align 16 11_gcm_gmult_4bit_x86: 12L$_gcm_gmult_4bit_x86_begin: 13 push ebp 14 push ebx 15 push esi 16 push edi 17 sub esp,84 18 mov edi,DWORD [104+esp] 19 mov esi,DWORD [108+esp] 20 mov ebp,DWORD [edi] 21 mov edx,DWORD [4+edi] 22 mov ecx,DWORD [8+edi] 23 mov ebx,DWORD [12+edi] 24 mov DWORD [16+esp],0 25 mov DWORD [20+esp],471859200 26 mov DWORD [24+esp],943718400 27 mov DWORD [28+esp],610271232 28 mov DWORD [32+esp],1887436800 29 mov DWORD [36+esp],1822425088 30 mov DWORD [40+esp],1220542464 31 mov DWORD [44+esp],1423966208 32 mov DWORD [48+esp],3774873600 33 mov DWORD [52+esp],4246732800 34 mov DWORD [56+esp],3644850176 35 mov DWORD [60+esp],3311403008 36 mov DWORD [64+esp],2441084928 37 mov DWORD [68+esp],2376073216 38 mov DWORD [72+esp],2847932416 39 mov DWORD [76+esp],3051356160 40 mov DWORD [esp],ebp 41 mov DWORD [4+esp],edx 42 mov DWORD [8+esp],ecx 43 mov DWORD [12+esp],ebx 44 shr ebx,20 45 and ebx,240 46 mov ebp,DWORD [4+ebx*1+esi] 47 mov edx,DWORD [ebx*1+esi] 48 mov ecx,DWORD [12+ebx*1+esi] 49 mov ebx,DWORD [8+ebx*1+esi] 50 xor eax,eax 51 mov edi,15 52 jmp NEAR L$000x86_loop 53align 16 54L$000x86_loop: 55 mov al,bl 56 shrd ebx,ecx,4 57 and al,15 58 shrd ecx,edx,4 59 shrd edx,ebp,4 60 shr ebp,4 61 xor ebp,DWORD [16+eax*4+esp] 62 mov al,BYTE [edi*1+esp] 63 and al,240 64 xor ebx,DWORD [8+eax*1+esi] 65 xor ecx,DWORD [12+eax*1+esi] 66 xor edx,DWORD [eax*1+esi] 67 xor ebp,DWORD [4+eax*1+esi] 68 dec edi 69 js NEAR L$001x86_break 70 mov al,bl 71 shrd ebx,ecx,4 72 and al,15 73 shrd ecx,edx,4 74 shrd edx,ebp,4 75 shr ebp,4 76 xor ebp,DWORD [16+eax*4+esp] 77 mov al,BYTE [edi*1+esp] 78 shl al,4 79 xor ebx,DWORD [8+eax*1+esi] 80 xor ecx,DWORD [12+eax*1+esi] 81 xor edx,DWORD [eax*1+esi] 82 xor ebp,DWORD [4+eax*1+esi] 83 jmp NEAR L$000x86_loop 84align 16 85L$001x86_break: 86 bswap ebx 87 bswap ecx 88 bswap edx 89 bswap ebp 90 mov edi,DWORD [104+esp] 91 mov DWORD [12+edi],ebx 92 mov DWORD [8+edi],ecx 93 mov DWORD [4+edi],edx 94 mov DWORD [edi],ebp 95 add esp,84 96 pop edi 97 pop esi 98 pop ebx 99 pop ebp 100 ret 101global _gcm_ghash_4bit_x86 102align 16 103_gcm_ghash_4bit_x86: 104L$_gcm_ghash_4bit_x86_begin: 105 push ebp 106 push ebx 107 push esi 108 push edi 109 sub esp,84 110 mov ebx,DWORD [104+esp] 111 mov esi,DWORD [108+esp] 112 mov edi,DWORD [112+esp] 113 mov ecx,DWORD [116+esp] 114 add ecx,edi 115 mov DWORD [116+esp],ecx 116 mov ebp,DWORD [ebx] 117 mov edx,DWORD [4+ebx] 118 mov ecx,DWORD [8+ebx] 119 mov ebx,DWORD [12+ebx] 120 mov DWORD [16+esp],0 121 mov DWORD [20+esp],471859200 122 mov DWORD [24+esp],943718400 123 mov DWORD [28+esp],610271232 124 mov DWORD [32+esp],1887436800 125 mov DWORD [36+esp],1822425088 126 mov DWORD [40+esp],1220542464 127 mov DWORD [44+esp],1423966208 128 mov DWORD [48+esp],3774873600 129 mov DWORD [52+esp],4246732800 130 mov DWORD [56+esp],3644850176 131 mov DWORD [60+esp],3311403008 132 mov DWORD [64+esp],2441084928 133 mov DWORD [68+esp],2376073216 134 mov DWORD [72+esp],2847932416 135 mov DWORD [76+esp],3051356160 136align 16 137L$002x86_outer_loop: 138 xor ebx,DWORD [12+edi] 139 xor ecx,DWORD [8+edi] 140 xor edx,DWORD [4+edi] 141 xor ebp,DWORD [edi] 142 mov DWORD [12+esp],ebx 143 mov DWORD [8+esp],ecx 144 mov DWORD [4+esp],edx 145 mov DWORD [esp],ebp 146 shr ebx,20 147 and ebx,240 148 mov ebp,DWORD [4+ebx*1+esi] 149 mov edx,DWORD [ebx*1+esi] 150 mov ecx,DWORD [12+ebx*1+esi] 151 mov ebx,DWORD [8+ebx*1+esi] 152 xor eax,eax 153 mov edi,15 154 jmp NEAR L$003x86_loop 155align 16 156L$003x86_loop: 157 mov al,bl 158 shrd ebx,ecx,4 159 and al,15 160 shrd ecx,edx,4 161 shrd edx,ebp,4 162 shr ebp,4 163 xor ebp,DWORD [16+eax*4+esp] 164 mov al,BYTE [edi*1+esp] 165 and al,240 166 xor ebx,DWORD [8+eax*1+esi] 167 xor ecx,DWORD [12+eax*1+esi] 168 xor edx,DWORD [eax*1+esi] 169 xor ebp,DWORD [4+eax*1+esi] 170 dec edi 171 js NEAR L$004x86_break 172 mov al,bl 173 shrd ebx,ecx,4 174 and al,15 175 shrd ecx,edx,4 176 shrd edx,ebp,4 177 shr ebp,4 178 xor ebp,DWORD [16+eax*4+esp] 179 mov al,BYTE [edi*1+esp] 180 shl al,4 181 xor ebx,DWORD [8+eax*1+esi] 182 xor ecx,DWORD [12+eax*1+esi] 183 xor edx,DWORD [eax*1+esi] 184 xor ebp,DWORD [4+eax*1+esi] 185 jmp NEAR L$003x86_loop 186align 16 187L$004x86_break: 188 bswap ebx 189 bswap ecx 190 bswap edx 191 bswap ebp 192 mov edi,DWORD [112+esp] 193 lea edi,[16+edi] 194 cmp edi,DWORD [116+esp] 195 mov DWORD [112+esp],edi 196 jb NEAR L$002x86_outer_loop 197 mov edi,DWORD [104+esp] 198 mov DWORD [12+edi],ebx 199 mov DWORD [8+edi],ecx 200 mov DWORD [4+edi],edx 201 mov DWORD [edi],ebp 202 add esp,84 203 pop edi 204 pop esi 205 pop ebx 206 pop ebp 207 ret 208global _gcm_gmult_4bit_mmx 209align 16 210_gcm_gmult_4bit_mmx: 211L$_gcm_gmult_4bit_mmx_begin: 212 push ebp 213 push ebx 214 push esi 215 push edi 216 mov edi,DWORD [20+esp] 217 mov esi,DWORD [24+esp] 218 call L$005pic_point 219L$005pic_point: 220 pop eax 221 lea eax,[(L$rem_4bit-L$005pic_point)+eax] 222 movzx ebx,BYTE [15+edi] 223 xor ecx,ecx 224 mov edx,ebx 225 mov cl,dl 226 mov ebp,14 227 shl cl,4 228 and edx,240 229 movq mm0,[8+ecx*1+esi] 230 movq mm1,[ecx*1+esi] 231 movd ebx,mm0 232 jmp NEAR L$006mmx_loop 233align 16 234L$006mmx_loop: 235 psrlq mm0,4 236 and ebx,15 237 movq mm2,mm1 238 psrlq mm1,4 239 pxor mm0,[8+edx*1+esi] 240 mov cl,BYTE [ebp*1+edi] 241 psllq mm2,60 242 pxor mm1,[ebx*8+eax] 243 dec ebp 244 movd ebx,mm0 245 pxor mm1,[edx*1+esi] 246 mov edx,ecx 247 pxor mm0,mm2 248 js NEAR L$007mmx_break 249 shl cl,4 250 and ebx,15 251 psrlq mm0,4 252 and edx,240 253 movq mm2,mm1 254 psrlq mm1,4 255 pxor mm0,[8+ecx*1+esi] 256 psllq mm2,60 257 pxor mm1,[ebx*8+eax] 258 movd ebx,mm0 259 pxor mm1,[ecx*1+esi] 260 pxor mm0,mm2 261 jmp NEAR L$006mmx_loop 262align 16 263L$007mmx_break: 264 shl cl,4 265 and ebx,15 266 psrlq mm0,4 267 and edx,240 268 movq mm2,mm1 269 psrlq mm1,4 270 pxor mm0,[8+ecx*1+esi] 271 psllq mm2,60 272 pxor mm1,[ebx*8+eax] 273 movd ebx,mm0 274 pxor mm1,[ecx*1+esi] 275 pxor mm0,mm2 276 psrlq mm0,4 277 and ebx,15 278 movq mm2,mm1 279 psrlq mm1,4 280 pxor mm0,[8+edx*1+esi] 281 psllq mm2,60 282 pxor mm1,[ebx*8+eax] 283 movd ebx,mm0 284 pxor mm1,[edx*1+esi] 285 pxor mm0,mm2 286 psrlq mm0,32 287 movd edx,mm1 288 psrlq mm1,32 289 movd ecx,mm0 290 movd ebp,mm1 291 bswap ebx 292 bswap edx 293 bswap ecx 294 bswap ebp 295 emms 296 mov DWORD [12+edi],ebx 297 mov DWORD [4+edi],edx 298 mov DWORD [8+edi],ecx 299 mov DWORD [edi],ebp 300 pop edi 301 pop esi 302 pop ebx 303 pop ebp 304 ret 305global _gcm_ghash_4bit_mmx 306align 16 307_gcm_ghash_4bit_mmx: 308L$_gcm_ghash_4bit_mmx_begin: 309 push ebp 310 push ebx 311 push esi 312 push edi 313 mov eax,DWORD [20+esp] 314 mov ebx,DWORD [24+esp] 315 mov ecx,DWORD [28+esp] 316 mov edx,DWORD [32+esp] 317 mov ebp,esp 318 call L$008pic_point 319L$008pic_point: 320 pop esi 321 lea esi,[(L$rem_8bit-L$008pic_point)+esi] 322 sub esp,544 323 and esp,-64 324 sub esp,16 325 add edx,ecx 326 mov DWORD [544+esp],eax 327 mov DWORD [552+esp],edx 328 mov DWORD [556+esp],ebp 329 add ebx,128 330 lea edi,[144+esp] 331 lea ebp,[400+esp] 332 mov edx,DWORD [ebx-120] 333 movq mm0,[ebx-120] 334 movq mm3,[ebx-128] 335 shl edx,4 336 mov BYTE [esp],dl 337 mov edx,DWORD [ebx-104] 338 movq mm2,[ebx-104] 339 movq mm5,[ebx-112] 340 movq [edi-128],mm0 341 psrlq mm0,4 342 movq [edi],mm3 343 movq mm7,mm3 344 psrlq mm3,4 345 shl edx,4 346 mov BYTE [1+esp],dl 347 mov edx,DWORD [ebx-88] 348 movq mm1,[ebx-88] 349 psllq mm7,60 350 movq mm4,[ebx-96] 351 por mm0,mm7 352 movq [edi-120],mm2 353 psrlq mm2,4 354 movq [8+edi],mm5 355 movq mm6,mm5 356 movq [ebp-128],mm0 357 psrlq mm5,4 358 movq [ebp],mm3 359 shl edx,4 360 mov BYTE [2+esp],dl 361 mov edx,DWORD [ebx-72] 362 movq mm0,[ebx-72] 363 psllq mm6,60 364 movq mm3,[ebx-80] 365 por mm2,mm6 366 movq [edi-112],mm1 367 psrlq mm1,4 368 movq [16+edi],mm4 369 movq mm7,mm4 370 movq [ebp-120],mm2 371 psrlq mm4,4 372 movq [8+ebp],mm5 373 shl edx,4 374 mov BYTE [3+esp],dl 375 mov edx,DWORD [ebx-56] 376 movq mm2,[ebx-56] 377 psllq mm7,60 378 movq mm5,[ebx-64] 379 por mm1,mm7 380 movq [edi-104],mm0 381 psrlq mm0,4 382 movq [24+edi],mm3 383 movq mm6,mm3 384 movq [ebp-112],mm1 385 psrlq mm3,4 386 movq [16+ebp],mm4 387 shl edx,4 388 mov BYTE [4+esp],dl 389 mov edx,DWORD [ebx-40] 390 movq mm1,[ebx-40] 391 psllq mm6,60 392 movq mm4,[ebx-48] 393 por mm0,mm6 394 movq [edi-96],mm2 395 psrlq mm2,4 396 movq [32+edi],mm5 397 movq mm7,mm5 398 movq [ebp-104],mm0 399 psrlq mm5,4 400 movq [24+ebp],mm3 401 shl edx,4 402 mov BYTE [5+esp],dl 403 mov edx,DWORD [ebx-24] 404 movq mm0,[ebx-24] 405 psllq mm7,60 406 movq mm3,[ebx-32] 407 por mm2,mm7 408 movq [edi-88],mm1 409 psrlq mm1,4 410 movq [40+edi],mm4 411 movq mm6,mm4 412 movq [ebp-96],mm2 413 psrlq mm4,4 414 movq [32+ebp],mm5 415 shl edx,4 416 mov BYTE [6+esp],dl 417 mov edx,DWORD [ebx-8] 418 movq mm2,[ebx-8] 419 psllq mm6,60 420 movq mm5,[ebx-16] 421 por mm1,mm6 422 movq [edi-80],mm0 423 psrlq mm0,4 424 movq [48+edi],mm3 425 movq mm7,mm3 426 movq [ebp-88],mm1 427 psrlq mm3,4 428 movq [40+ebp],mm4 429 shl edx,4 430 mov BYTE [7+esp],dl 431 mov edx,DWORD [8+ebx] 432 movq mm1,[8+ebx] 433 psllq mm7,60 434 movq mm4,[ebx] 435 por mm0,mm7 436 movq [edi-72],mm2 437 psrlq mm2,4 438 movq [56+edi],mm5 439 movq mm6,mm5 440 movq [ebp-80],mm0 441 psrlq mm5,4 442 movq [48+ebp],mm3 443 shl edx,4 444 mov BYTE [8+esp],dl 445 mov edx,DWORD [24+ebx] 446 movq mm0,[24+ebx] 447 psllq mm6,60 448 movq mm3,[16+ebx] 449 por mm2,mm6 450 movq [edi-64],mm1 451 psrlq mm1,4 452 movq [64+edi],mm4 453 movq mm7,mm4 454 movq [ebp-72],mm2 455 psrlq mm4,4 456 movq [56+ebp],mm5 457 shl edx,4 458 mov BYTE [9+esp],dl 459 mov edx,DWORD [40+ebx] 460 movq mm2,[40+ebx] 461 psllq mm7,60 462 movq mm5,[32+ebx] 463 por mm1,mm7 464 movq [edi-56],mm0 465 psrlq mm0,4 466 movq [72+edi],mm3 467 movq mm6,mm3 468 movq [ebp-64],mm1 469 psrlq mm3,4 470 movq [64+ebp],mm4 471 shl edx,4 472 mov BYTE [10+esp],dl 473 mov edx,DWORD [56+ebx] 474 movq mm1,[56+ebx] 475 psllq mm6,60 476 movq mm4,[48+ebx] 477 por mm0,mm6 478 movq [edi-48],mm2 479 psrlq mm2,4 480 movq [80+edi],mm5 481 movq mm7,mm5 482 movq [ebp-56],mm0 483 psrlq mm5,4 484 movq [72+ebp],mm3 485 shl edx,4 486 mov BYTE [11+esp],dl 487 mov edx,DWORD [72+ebx] 488 movq mm0,[72+ebx] 489 psllq mm7,60 490 movq mm3,[64+ebx] 491 por mm2,mm7 492 movq [edi-40],mm1 493 psrlq mm1,4 494 movq [88+edi],mm4 495 movq mm6,mm4 496 movq [ebp-48],mm2 497 psrlq mm4,4 498 movq [80+ebp],mm5 499 shl edx,4 500 mov BYTE [12+esp],dl 501 mov edx,DWORD [88+ebx] 502 movq mm2,[88+ebx] 503 psllq mm6,60 504 movq mm5,[80+ebx] 505 por mm1,mm6 506 movq [edi-32],mm0 507 psrlq mm0,4 508 movq [96+edi],mm3 509 movq mm7,mm3 510 movq [ebp-40],mm1 511 psrlq mm3,4 512 movq [88+ebp],mm4 513 shl edx,4 514 mov BYTE [13+esp],dl 515 mov edx,DWORD [104+ebx] 516 movq mm1,[104+ebx] 517 psllq mm7,60 518 movq mm4,[96+ebx] 519 por mm0,mm7 520 movq [edi-24],mm2 521 psrlq mm2,4 522 movq [104+edi],mm5 523 movq mm6,mm5 524 movq [ebp-32],mm0 525 psrlq mm5,4 526 movq [96+ebp],mm3 527 shl edx,4 528 mov BYTE [14+esp],dl 529 mov edx,DWORD [120+ebx] 530 movq mm0,[120+ebx] 531 psllq mm6,60 532 movq mm3,[112+ebx] 533 por mm2,mm6 534 movq [edi-16],mm1 535 psrlq mm1,4 536 movq [112+edi],mm4 537 movq mm7,mm4 538 movq [ebp-24],mm2 539 psrlq mm4,4 540 movq [104+ebp],mm5 541 shl edx,4 542 mov BYTE [15+esp],dl 543 psllq mm7,60 544 por mm1,mm7 545 movq [edi-8],mm0 546 psrlq mm0,4 547 movq [120+edi],mm3 548 movq mm6,mm3 549 movq [ebp-16],mm1 550 psrlq mm3,4 551 movq [112+ebp],mm4 552 psllq mm6,60 553 por mm0,mm6 554 movq [ebp-8],mm0 555 movq [120+ebp],mm3 556 movq mm6,[eax] 557 mov ebx,DWORD [8+eax] 558 mov edx,DWORD [12+eax] 559align 16 560L$009outer: 561 xor edx,DWORD [12+ecx] 562 xor ebx,DWORD [8+ecx] 563 pxor mm6,[ecx] 564 lea ecx,[16+ecx] 565 mov DWORD [536+esp],ebx 566 movq [528+esp],mm6 567 mov DWORD [548+esp],ecx 568 xor eax,eax 569 rol edx,8 570 mov al,dl 571 mov ebp,eax 572 and al,15 573 shr ebp,4 574 pxor mm0,mm0 575 rol edx,8 576 pxor mm1,mm1 577 pxor mm2,mm2 578 movq mm7,[16+eax*8+esp] 579 movq mm6,[144+eax*8+esp] 580 mov al,dl 581 movd ebx,mm7 582 psrlq mm7,8 583 movq mm3,mm6 584 mov edi,eax 585 psrlq mm6,8 586 pxor mm7,[272+ebp*8+esp] 587 and al,15 588 psllq mm3,56 589 shr edi,4 590 pxor mm7,[16+eax*8+esp] 591 rol edx,8 592 pxor mm6,[144+eax*8+esp] 593 pxor mm7,mm3 594 pxor mm6,[400+ebp*8+esp] 595 xor bl,BYTE [ebp*1+esp] 596 mov al,dl 597 movd ecx,mm7 598 movzx ebx,bl 599 psrlq mm7,8 600 movq mm3,mm6 601 mov ebp,eax 602 psrlq mm6,8 603 pxor mm7,[272+edi*8+esp] 604 and al,15 605 psllq mm3,56 606 shr ebp,4 607 pinsrw mm2,WORD [ebx*2+esi],2 608 pxor mm7,[16+eax*8+esp] 609 rol edx,8 610 pxor mm6,[144+eax*8+esp] 611 pxor mm7,mm3 612 pxor mm6,[400+edi*8+esp] 613 xor cl,BYTE [edi*1+esp] 614 mov al,dl 615 mov edx,DWORD [536+esp] 616 movd ebx,mm7 617 movzx ecx,cl 618 psrlq mm7,8 619 movq mm3,mm6 620 mov edi,eax 621 psrlq mm6,8 622 pxor mm7,[272+ebp*8+esp] 623 and al,15 624 psllq mm3,56 625 pxor mm6,mm2 626 shr edi,4 627 pinsrw mm1,WORD [ecx*2+esi],2 628 pxor mm7,[16+eax*8+esp] 629 rol edx,8 630 pxor mm6,[144+eax*8+esp] 631 pxor mm7,mm3 632 pxor mm6,[400+ebp*8+esp] 633 xor bl,BYTE [ebp*1+esp] 634 mov al,dl 635 movd ecx,mm7 636 movzx ebx,bl 637 psrlq mm7,8 638 movq mm3,mm6 639 mov ebp,eax 640 psrlq mm6,8 641 pxor mm7,[272+edi*8+esp] 642 and al,15 643 psllq mm3,56 644 pxor mm6,mm1 645 shr ebp,4 646 pinsrw mm0,WORD [ebx*2+esi],2 647 pxor mm7,[16+eax*8+esp] 648 rol edx,8 649 pxor mm6,[144+eax*8+esp] 650 pxor mm7,mm3 651 pxor mm6,[400+edi*8+esp] 652 xor cl,BYTE [edi*1+esp] 653 mov al,dl 654 movd ebx,mm7 655 movzx ecx,cl 656 psrlq mm7,8 657 movq mm3,mm6 658 mov edi,eax 659 psrlq mm6,8 660 pxor mm7,[272+ebp*8+esp] 661 and al,15 662 psllq mm3,56 663 pxor mm6,mm0 664 shr edi,4 665 pinsrw mm2,WORD [ecx*2+esi],2 666 pxor mm7,[16+eax*8+esp] 667 rol edx,8 668 pxor mm6,[144+eax*8+esp] 669 pxor mm7,mm3 670 pxor mm6,[400+ebp*8+esp] 671 xor bl,BYTE [ebp*1+esp] 672 mov al,dl 673 movd ecx,mm7 674 movzx ebx,bl 675 psrlq mm7,8 676 movq mm3,mm6 677 mov ebp,eax 678 psrlq mm6,8 679 pxor mm7,[272+edi*8+esp] 680 and al,15 681 psllq mm3,56 682 pxor mm6,mm2 683 shr ebp,4 684 pinsrw mm1,WORD [ebx*2+esi],2 685 pxor mm7,[16+eax*8+esp] 686 rol edx,8 687 pxor mm6,[144+eax*8+esp] 688 pxor mm7,mm3 689 pxor mm6,[400+edi*8+esp] 690 xor cl,BYTE [edi*1+esp] 691 mov al,dl 692 mov edx,DWORD [532+esp] 693 movd ebx,mm7 694 movzx ecx,cl 695 psrlq mm7,8 696 movq mm3,mm6 697 mov edi,eax 698 psrlq mm6,8 699 pxor mm7,[272+ebp*8+esp] 700 and al,15 701 psllq mm3,56 702 pxor mm6,mm1 703 shr edi,4 704 pinsrw mm0,WORD [ecx*2+esi],2 705 pxor mm7,[16+eax*8+esp] 706 rol edx,8 707 pxor mm6,[144+eax*8+esp] 708 pxor mm7,mm3 709 pxor mm6,[400+ebp*8+esp] 710 xor bl,BYTE [ebp*1+esp] 711 mov al,dl 712 movd ecx,mm7 713 movzx ebx,bl 714 psrlq mm7,8 715 movq mm3,mm6 716 mov ebp,eax 717 psrlq mm6,8 718 pxor mm7,[272+edi*8+esp] 719 and al,15 720 psllq mm3,56 721 pxor mm6,mm0 722 shr ebp,4 723 pinsrw mm2,WORD [ebx*2+esi],2 724 pxor mm7,[16+eax*8+esp] 725 rol edx,8 726 pxor mm6,[144+eax*8+esp] 727 pxor mm7,mm3 728 pxor mm6,[400+edi*8+esp] 729 xor cl,BYTE [edi*1+esp] 730 mov al,dl 731 movd ebx,mm7 732 movzx ecx,cl 733 psrlq mm7,8 734 movq mm3,mm6 735 mov edi,eax 736 psrlq mm6,8 737 pxor mm7,[272+ebp*8+esp] 738 and al,15 739 psllq mm3,56 740 pxor mm6,mm2 741 shr edi,4 742 pinsrw mm1,WORD [ecx*2+esi],2 743 pxor mm7,[16+eax*8+esp] 744 rol edx,8 745 pxor mm6,[144+eax*8+esp] 746 pxor mm7,mm3 747 pxor mm6,[400+ebp*8+esp] 748 xor bl,BYTE [ebp*1+esp] 749 mov al,dl 750 movd ecx,mm7 751 movzx ebx,bl 752 psrlq mm7,8 753 movq mm3,mm6 754 mov ebp,eax 755 psrlq mm6,8 756 pxor mm7,[272+edi*8+esp] 757 and al,15 758 psllq mm3,56 759 pxor mm6,mm1 760 shr ebp,4 761 pinsrw mm0,WORD [ebx*2+esi],2 762 pxor mm7,[16+eax*8+esp] 763 rol edx,8 764 pxor mm6,[144+eax*8+esp] 765 pxor mm7,mm3 766 pxor mm6,[400+edi*8+esp] 767 xor cl,BYTE [edi*1+esp] 768 mov al,dl 769 mov edx,DWORD [528+esp] 770 movd ebx,mm7 771 movzx ecx,cl 772 psrlq mm7,8 773 movq mm3,mm6 774 mov edi,eax 775 psrlq mm6,8 776 pxor mm7,[272+ebp*8+esp] 777 and al,15 778 psllq mm3,56 779 pxor mm6,mm0 780 shr edi,4 781 pinsrw mm2,WORD [ecx*2+esi],2 782 pxor mm7,[16+eax*8+esp] 783 rol edx,8 784 pxor mm6,[144+eax*8+esp] 785 pxor mm7,mm3 786 pxor mm6,[400+ebp*8+esp] 787 xor bl,BYTE [ebp*1+esp] 788 mov al,dl 789 movd ecx,mm7 790 movzx ebx,bl 791 psrlq mm7,8 792 movq mm3,mm6 793 mov ebp,eax 794 psrlq mm6,8 795 pxor mm7,[272+edi*8+esp] 796 and al,15 797 psllq mm3,56 798 pxor mm6,mm2 799 shr ebp,4 800 pinsrw mm1,WORD [ebx*2+esi],2 801 pxor mm7,[16+eax*8+esp] 802 rol edx,8 803 pxor mm6,[144+eax*8+esp] 804 pxor mm7,mm3 805 pxor mm6,[400+edi*8+esp] 806 xor cl,BYTE [edi*1+esp] 807 mov al,dl 808 movd ebx,mm7 809 movzx ecx,cl 810 psrlq mm7,8 811 movq mm3,mm6 812 mov edi,eax 813 psrlq mm6,8 814 pxor mm7,[272+ebp*8+esp] 815 and al,15 816 psllq mm3,56 817 pxor mm6,mm1 818 shr edi,4 819 pinsrw mm0,WORD [ecx*2+esi],2 820 pxor mm7,[16+eax*8+esp] 821 rol edx,8 822 pxor mm6,[144+eax*8+esp] 823 pxor mm7,mm3 824 pxor mm6,[400+ebp*8+esp] 825 xor bl,BYTE [ebp*1+esp] 826 mov al,dl 827 movd ecx,mm7 828 movzx ebx,bl 829 psrlq mm7,8 830 movq mm3,mm6 831 mov ebp,eax 832 psrlq mm6,8 833 pxor mm7,[272+edi*8+esp] 834 and al,15 835 psllq mm3,56 836 pxor mm6,mm0 837 shr ebp,4 838 pinsrw mm2,WORD [ebx*2+esi],2 839 pxor mm7,[16+eax*8+esp] 840 rol edx,8 841 pxor mm6,[144+eax*8+esp] 842 pxor mm7,mm3 843 pxor mm6,[400+edi*8+esp] 844 xor cl,BYTE [edi*1+esp] 845 mov al,dl 846 mov edx,DWORD [524+esp] 847 movd ebx,mm7 848 movzx ecx,cl 849 psrlq mm7,8 850 movq mm3,mm6 851 mov edi,eax 852 psrlq mm6,8 853 pxor mm7,[272+ebp*8+esp] 854 and al,15 855 psllq mm3,56 856 pxor mm6,mm2 857 shr edi,4 858 pinsrw mm1,WORD [ecx*2+esi],2 859 pxor mm7,[16+eax*8+esp] 860 pxor mm6,[144+eax*8+esp] 861 xor bl,BYTE [ebp*1+esp] 862 pxor mm7,mm3 863 pxor mm6,[400+ebp*8+esp] 864 movzx ebx,bl 865 pxor mm2,mm2 866 psllq mm1,4 867 movd ecx,mm7 868 psrlq mm7,4 869 movq mm3,mm6 870 psrlq mm6,4 871 shl ecx,4 872 pxor mm7,[16+edi*8+esp] 873 psllq mm3,60 874 movzx ecx,cl 875 pxor mm7,mm3 876 pxor mm6,[144+edi*8+esp] 877 pinsrw mm0,WORD [ebx*2+esi],2 878 pxor mm6,mm1 879 movd edx,mm7 880 pinsrw mm2,WORD [ecx*2+esi],3 881 psllq mm0,12 882 pxor mm6,mm0 883 psrlq mm7,32 884 pxor mm6,mm2 885 mov ecx,DWORD [548+esp] 886 movd ebx,mm7 887 movq mm3,mm6 888 psllw mm6,8 889 psrlw mm3,8 890 por mm6,mm3 891 bswap edx 892 pshufw mm6,mm6,27 893 bswap ebx 894 cmp ecx,DWORD [552+esp] 895 jne NEAR L$009outer 896 mov eax,DWORD [544+esp] 897 mov DWORD [12+eax],edx 898 mov DWORD [8+eax],ebx 899 movq [eax],mm6 900 mov esp,DWORD [556+esp] 901 emms 902 pop edi 903 pop esi 904 pop ebx 905 pop ebp 906 ret 907global _gcm_init_clmul 908align 16 909_gcm_init_clmul: 910L$_gcm_init_clmul_begin: 911 mov edx,DWORD [4+esp] 912 mov eax,DWORD [8+esp] 913 call L$010pic 914L$010pic: 915 pop ecx 916 lea ecx,[(L$bswap-L$010pic)+ecx] 917 movdqu xmm2,[eax] 918 pshufd xmm2,xmm2,78 919 pshufd xmm4,xmm2,255 920 movdqa xmm3,xmm2 921 psllq xmm2,1 922 pxor xmm5,xmm5 923 psrlq xmm3,63 924 pcmpgtd xmm5,xmm4 925 pslldq xmm3,8 926 por xmm2,xmm3 927 pand xmm5,[16+ecx] 928 pxor xmm2,xmm5 929 movdqa xmm0,xmm2 930 movdqa xmm1,xmm0 931 pshufd xmm3,xmm0,78 932 pshufd xmm4,xmm2,78 933 pxor xmm3,xmm0 934 pxor xmm4,xmm2 935db 102,15,58,68,194,0 936db 102,15,58,68,202,17 937db 102,15,58,68,220,0 938 xorps xmm3,xmm0 939 xorps xmm3,xmm1 940 movdqa xmm4,xmm3 941 psrldq xmm3,8 942 pslldq xmm4,8 943 pxor xmm1,xmm3 944 pxor xmm0,xmm4 945 movdqa xmm4,xmm0 946 movdqa xmm3,xmm0 947 psllq xmm0,5 948 pxor xmm3,xmm0 949 psllq xmm0,1 950 pxor xmm0,xmm3 951 psllq xmm0,57 952 movdqa xmm3,xmm0 953 pslldq xmm0,8 954 psrldq xmm3,8 955 pxor xmm0,xmm4 956 pxor xmm1,xmm3 957 movdqa xmm4,xmm0 958 psrlq xmm0,1 959 pxor xmm1,xmm4 960 pxor xmm4,xmm0 961 psrlq xmm0,5 962 pxor xmm0,xmm4 963 psrlq xmm0,1 964 pxor xmm0,xmm1 965 pshufd xmm3,xmm2,78 966 pshufd xmm4,xmm0,78 967 pxor xmm3,xmm2 968 movdqu [edx],xmm2 969 pxor xmm4,xmm0 970 movdqu [16+edx],xmm0 971db 102,15,58,15,227,8 972 movdqu [32+edx],xmm4 973 ret 974global _gcm_gmult_clmul 975align 16 976_gcm_gmult_clmul: 977L$_gcm_gmult_clmul_begin: 978 mov eax,DWORD [4+esp] 979 mov edx,DWORD [8+esp] 980 call L$011pic 981L$011pic: 982 pop ecx 983 lea ecx,[(L$bswap-L$011pic)+ecx] 984 movdqu xmm0,[eax] 985 movdqa xmm5,[ecx] 986 movups xmm2,[edx] 987db 102,15,56,0,197 988 movups xmm4,[32+edx] 989 movdqa xmm1,xmm0 990 pshufd xmm3,xmm0,78 991 pxor xmm3,xmm0 992db 102,15,58,68,194,0 993db 102,15,58,68,202,17 994db 102,15,58,68,220,0 995 xorps xmm3,xmm0 996 xorps xmm3,xmm1 997 movdqa xmm4,xmm3 998 psrldq xmm3,8 999 pslldq xmm4,8 1000 pxor xmm1,xmm3 1001 pxor xmm0,xmm4 1002 movdqa xmm4,xmm0 1003 movdqa xmm3,xmm0 1004 psllq xmm0,5 1005 pxor xmm3,xmm0 1006 psllq xmm0,1 1007 pxor xmm0,xmm3 1008 psllq xmm0,57 1009 movdqa xmm3,xmm0 1010 pslldq xmm0,8 1011 psrldq xmm3,8 1012 pxor xmm0,xmm4 1013 pxor xmm1,xmm3 1014 movdqa xmm4,xmm0 1015 psrlq xmm0,1 1016 pxor xmm1,xmm4 1017 pxor xmm4,xmm0 1018 psrlq xmm0,5 1019 pxor xmm0,xmm4 1020 psrlq xmm0,1 1021 pxor xmm0,xmm1 1022db 102,15,56,0,197 1023 movdqu [eax],xmm0 1024 ret 1025global _gcm_ghash_clmul 1026align 16 1027_gcm_ghash_clmul: 1028L$_gcm_ghash_clmul_begin: 1029 push ebp 1030 push ebx 1031 push esi 1032 push edi 1033 mov eax,DWORD [20+esp] 1034 mov edx,DWORD [24+esp] 1035 mov esi,DWORD [28+esp] 1036 mov ebx,DWORD [32+esp] 1037 call L$012pic 1038L$012pic: 1039 pop ecx 1040 lea ecx,[(L$bswap-L$012pic)+ecx] 1041 movdqu xmm0,[eax] 1042 movdqa xmm5,[ecx] 1043 movdqu xmm2,[edx] 1044db 102,15,56,0,197 1045 sub ebx,16 1046 jz NEAR L$013odd_tail 1047 movdqu xmm3,[esi] 1048 movdqu xmm6,[16+esi] 1049db 102,15,56,0,221 1050db 102,15,56,0,245 1051 movdqu xmm5,[32+edx] 1052 pxor xmm0,xmm3 1053 pshufd xmm3,xmm6,78 1054 movdqa xmm7,xmm6 1055 pxor xmm3,xmm6 1056 lea esi,[32+esi] 1057db 102,15,58,68,242,0 1058db 102,15,58,68,250,17 1059db 102,15,58,68,221,0 1060 movups xmm2,[16+edx] 1061 nop 1062 sub ebx,32 1063 jbe NEAR L$014even_tail 1064 jmp NEAR L$015mod_loop 1065align 32 1066L$015mod_loop: 1067 pshufd xmm4,xmm0,78 1068 movdqa xmm1,xmm0 1069 pxor xmm4,xmm0 1070 nop 1071db 102,15,58,68,194,0 1072db 102,15,58,68,202,17 1073db 102,15,58,68,229,16 1074 movups xmm2,[edx] 1075 xorps xmm0,xmm6 1076 movdqa xmm5,[ecx] 1077 xorps xmm1,xmm7 1078 movdqu xmm7,[esi] 1079 pxor xmm3,xmm0 1080 movdqu xmm6,[16+esi] 1081 pxor xmm3,xmm1 1082db 102,15,56,0,253 1083 pxor xmm4,xmm3 1084 movdqa xmm3,xmm4 1085 psrldq xmm4,8 1086 pslldq xmm3,8 1087 pxor xmm1,xmm4 1088 pxor xmm0,xmm3 1089db 102,15,56,0,245 1090 pxor xmm1,xmm7 1091 movdqa xmm7,xmm6 1092 movdqa xmm4,xmm0 1093 movdqa xmm3,xmm0 1094 psllq xmm0,5 1095 pxor xmm3,xmm0 1096 psllq xmm0,1 1097 pxor xmm0,xmm3 1098db 102,15,58,68,242,0 1099 movups xmm5,[32+edx] 1100 psllq xmm0,57 1101 movdqa xmm3,xmm0 1102 pslldq xmm0,8 1103 psrldq xmm3,8 1104 pxor xmm0,xmm4 1105 pxor xmm1,xmm3 1106 pshufd xmm3,xmm7,78 1107 movdqa xmm4,xmm0 1108 psrlq xmm0,1 1109 pxor xmm3,xmm7 1110 pxor xmm1,xmm4 1111db 102,15,58,68,250,17 1112 movups xmm2,[16+edx] 1113 pxor xmm4,xmm0 1114 psrlq xmm0,5 1115 pxor xmm0,xmm4 1116 psrlq xmm0,1 1117 pxor xmm0,xmm1 1118db 102,15,58,68,221,0 1119 lea esi,[32+esi] 1120 sub ebx,32 1121 ja NEAR L$015mod_loop 1122L$014even_tail: 1123 pshufd xmm4,xmm0,78 1124 movdqa xmm1,xmm0 1125 pxor xmm4,xmm0 1126db 102,15,58,68,194,0 1127db 102,15,58,68,202,17 1128db 102,15,58,68,229,16 1129 movdqa xmm5,[ecx] 1130 xorps xmm0,xmm6 1131 xorps xmm1,xmm7 1132 pxor xmm3,xmm0 1133 pxor xmm3,xmm1 1134 pxor xmm4,xmm3 1135 movdqa xmm3,xmm4 1136 psrldq xmm4,8 1137 pslldq xmm3,8 1138 pxor xmm1,xmm4 1139 pxor xmm0,xmm3 1140 movdqa xmm4,xmm0 1141 movdqa xmm3,xmm0 1142 psllq xmm0,5 1143 pxor xmm3,xmm0 1144 psllq xmm0,1 1145 pxor xmm0,xmm3 1146 psllq xmm0,57 1147 movdqa xmm3,xmm0 1148 pslldq xmm0,8 1149 psrldq xmm3,8 1150 pxor xmm0,xmm4 1151 pxor xmm1,xmm3 1152 movdqa xmm4,xmm0 1153 psrlq xmm0,1 1154 pxor xmm1,xmm4 1155 pxor xmm4,xmm0 1156 psrlq xmm0,5 1157 pxor xmm0,xmm4 1158 psrlq xmm0,1 1159 pxor xmm0,xmm1 1160 test ebx,ebx 1161 jnz NEAR L$016done 1162 movups xmm2,[edx] 1163L$013odd_tail: 1164 movdqu xmm3,[esi] 1165db 102,15,56,0,221 1166 pxor xmm0,xmm3 1167 movdqa xmm1,xmm0 1168 pshufd xmm3,xmm0,78 1169 pshufd xmm4,xmm2,78 1170 pxor xmm3,xmm0 1171 pxor xmm4,xmm2 1172db 102,15,58,68,194,0 1173db 102,15,58,68,202,17 1174db 102,15,58,68,220,0 1175 xorps xmm3,xmm0 1176 xorps xmm3,xmm1 1177 movdqa xmm4,xmm3 1178 psrldq xmm3,8 1179 pslldq xmm4,8 1180 pxor xmm1,xmm3 1181 pxor xmm0,xmm4 1182 movdqa xmm4,xmm0 1183 movdqa xmm3,xmm0 1184 psllq xmm0,5 1185 pxor xmm3,xmm0 1186 psllq xmm0,1 1187 pxor xmm0,xmm3 1188 psllq xmm0,57 1189 movdqa xmm3,xmm0 1190 pslldq xmm0,8 1191 psrldq xmm3,8 1192 pxor xmm0,xmm4 1193 pxor xmm1,xmm3 1194 movdqa xmm4,xmm0 1195 psrlq xmm0,1 1196 pxor xmm1,xmm4 1197 pxor xmm4,xmm0 1198 psrlq xmm0,5 1199 pxor xmm0,xmm4 1200 psrlq xmm0,1 1201 pxor xmm0,xmm1 1202L$016done: 1203db 102,15,56,0,197 1204 movdqu [eax],xmm0 1205 pop edi 1206 pop esi 1207 pop ebx 1208 pop ebp 1209 ret 1210align 64 1211L$bswap: 1212db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1213db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1214align 64 1215L$rem_8bit: 1216dw 0,450,900,582,1800,1738,1164,1358 1217dw 3600,4050,3476,3158,2328,2266,2716,2910 1218dw 7200,7650,8100,7782,6952,6890,6316,6510 1219dw 4656,5106,4532,4214,5432,5370,5820,6014 1220dw 14400,14722,15300,14854,16200,16010,15564,15630 1221dw 13904,14226,13780,13334,12632,12442,13020,13086 1222dw 9312,9634,10212,9766,9064,8874,8428,8494 1223dw 10864,11186,10740,10294,11640,11450,12028,12094 1224dw 28800,28994,29444,29382,30600,30282,29708,30158 1225dw 32400,32594,32020,31958,31128,30810,31260,31710 1226dw 27808,28002,28452,28390,27560,27242,26668,27118 1227dw 25264,25458,24884,24822,26040,25722,26172,26622 1228dw 18624,18690,19268,19078,20424,19978,19532,19854 1229dw 18128,18194,17748,17558,16856,16410,16988,17310 1230dw 21728,21794,22372,22182,21480,21034,20588,20910 1231dw 23280,23346,22900,22710,24056,23610,24188,24510 1232dw 57600,57538,57988,58182,58888,59338,58764,58446 1233dw 61200,61138,60564,60758,59416,59866,60316,59998 1234dw 64800,64738,65188,65382,64040,64490,63916,63598 1235dw 62256,62194,61620,61814,62520,62970,63420,63102 1236dw 55616,55426,56004,56070,56904,57226,56780,56334 1237dw 55120,54930,54484,54550,53336,53658,54236,53790 1238dw 50528,50338,50916,50982,49768,50090,49644,49198 1239dw 52080,51890,51444,51510,52344,52666,53244,52798 1240dw 37248,36930,37380,37830,38536,38730,38156,38094 1241dw 40848,40530,39956,40406,39064,39258,39708,39646 1242dw 36256,35938,36388,36838,35496,35690,35116,35054 1243dw 33712,33394,32820,33270,33976,34170,34620,34558 1244dw 43456,43010,43588,43910,44744,44810,44364,44174 1245dw 42960,42514,42068,42390,41176,41242,41820,41630 1246dw 46560,46114,46692,47014,45800,45866,45420,45230 1247dw 48112,47666,47220,47542,48376,48442,49020,48830 1248align 64 1249L$rem_4bit: 1250dd 0,0,0,471859200,0,943718400,0,610271232 1251dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1252dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1253dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1254db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1255db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1256db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1257db 0 1258