1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__,obj 8section code use32 class=code align=64 9%elifidn __OUTPUT_FORMAT__,win32 10$@feat.00 equ 1 11section .text code align=64 12%else 13section .text code 14%endif 15global _GFp_ChaCha20_ctr32 16align 16 17_GFp_ChaCha20_ctr32: 18L$_GFp_ChaCha20_ctr32_begin: 19 push ebp 20 push ebx 21 push esi 22 push edi 23 xor eax,eax 24 cmp eax,DWORD [28+esp] 25 je NEAR L$000no_data 26 call L$pic_point 27L$pic_point: 28 pop eax 29 lea ebp,[_GFp_ia32cap_P] 30 test DWORD [ebp],16777216 31 jz NEAR L$001x86 32 test DWORD [4+ebp],512 33 jz NEAR L$001x86 34 jmp NEAR L$ssse3_shortcut 35L$001x86: 36 mov esi,DWORD [32+esp] 37 mov edi,DWORD [36+esp] 38 sub esp,132 39 mov eax,DWORD [esi] 40 mov ebx,DWORD [4+esi] 41 mov ecx,DWORD [8+esi] 42 mov edx,DWORD [12+esi] 43 mov DWORD [80+esp],eax 44 mov DWORD [84+esp],ebx 45 mov DWORD [88+esp],ecx 46 mov DWORD [92+esp],edx 47 mov eax,DWORD [16+esi] 48 mov ebx,DWORD [20+esi] 49 mov ecx,DWORD [24+esi] 50 mov edx,DWORD [28+esi] 51 mov DWORD [96+esp],eax 52 mov DWORD [100+esp],ebx 53 mov DWORD [104+esp],ecx 54 mov DWORD [108+esp],edx 55 mov eax,DWORD [edi] 56 mov ebx,DWORD [4+edi] 57 mov ecx,DWORD [8+edi] 58 mov edx,DWORD [12+edi] 59 sub eax,1 60 mov DWORD [112+esp],eax 61 mov DWORD [116+esp],ebx 62 mov DWORD [120+esp],ecx 63 mov DWORD [124+esp],edx 64 jmp NEAR L$002entry 65align 16 66L$003outer_loop: 67 mov DWORD [156+esp],ebx 68 mov DWORD [152+esp],eax 69 mov DWORD [160+esp],ecx 70L$002entry: 71 mov eax,1634760805 72 mov DWORD [4+esp],857760878 73 mov DWORD [8+esp],2036477234 74 mov DWORD [12+esp],1797285236 75 mov ebx,DWORD [84+esp] 76 mov ebp,DWORD [88+esp] 77 mov ecx,DWORD [104+esp] 78 mov esi,DWORD [108+esp] 79 mov edx,DWORD [116+esp] 80 mov edi,DWORD [120+esp] 81 mov DWORD [20+esp],ebx 82 mov DWORD [24+esp],ebp 83 mov DWORD [40+esp],ecx 84 mov DWORD [44+esp],esi 85 mov DWORD [52+esp],edx 86 mov DWORD [56+esp],edi 87 mov ebx,DWORD [92+esp] 88 mov edi,DWORD [124+esp] 89 mov edx,DWORD [112+esp] 90 mov ebp,DWORD [80+esp] 91 mov ecx,DWORD [96+esp] 92 mov esi,DWORD [100+esp] 93 add edx,1 94 mov DWORD [28+esp],ebx 95 mov DWORD [60+esp],edi 96 mov DWORD [112+esp],edx 97 mov ebx,10 98 jmp NEAR L$004loop 99align 16 100L$004loop: 101 add eax,ebp 102 mov DWORD [128+esp],ebx 103 mov ebx,ebp 104 xor edx,eax 105 rol edx,16 106 add ecx,edx 107 xor ebx,ecx 108 mov edi,DWORD [52+esp] 109 rol ebx,12 110 mov ebp,DWORD [20+esp] 111 add eax,ebx 112 xor edx,eax 113 mov DWORD [esp],eax 114 rol edx,8 115 mov eax,DWORD [4+esp] 116 add ecx,edx 117 mov DWORD [48+esp],edx 118 xor ebx,ecx 119 add eax,ebp 120 rol ebx,7 121 xor edi,eax 122 mov DWORD [32+esp],ecx 123 rol edi,16 124 mov DWORD [16+esp],ebx 125 add esi,edi 126 mov ecx,DWORD [40+esp] 127 xor ebp,esi 128 mov edx,DWORD [56+esp] 129 rol ebp,12 130 mov ebx,DWORD [24+esp] 131 add eax,ebp 132 xor edi,eax 133 mov DWORD [4+esp],eax 134 rol edi,8 135 mov eax,DWORD [8+esp] 136 add esi,edi 137 mov DWORD [52+esp],edi 138 xor ebp,esi 139 add eax,ebx 140 rol ebp,7 141 xor edx,eax 142 mov DWORD [36+esp],esi 143 rol edx,16 144 mov DWORD [20+esp],ebp 145 add ecx,edx 146 mov esi,DWORD [44+esp] 147 xor ebx,ecx 148 mov edi,DWORD [60+esp] 149 rol ebx,12 150 mov ebp,DWORD [28+esp] 151 add eax,ebx 152 xor edx,eax 153 mov DWORD [8+esp],eax 154 rol edx,8 155 mov eax,DWORD [12+esp] 156 add ecx,edx 157 mov DWORD [56+esp],edx 158 xor ebx,ecx 159 add eax,ebp 160 rol ebx,7 161 xor edi,eax 162 rol edi,16 163 mov DWORD [24+esp],ebx 164 add esi,edi 165 xor ebp,esi 166 rol ebp,12 167 mov ebx,DWORD [20+esp] 168 add eax,ebp 169 xor edi,eax 170 mov DWORD [12+esp],eax 171 rol edi,8 172 mov eax,DWORD [esp] 173 add esi,edi 174 mov edx,edi 175 xor ebp,esi 176 add eax,ebx 177 rol ebp,7 178 xor edx,eax 179 rol edx,16 180 mov DWORD [28+esp],ebp 181 add ecx,edx 182 xor ebx,ecx 183 mov edi,DWORD [48+esp] 184 rol ebx,12 185 mov ebp,DWORD [24+esp] 186 add eax,ebx 187 xor edx,eax 188 mov DWORD [esp],eax 189 rol edx,8 190 mov eax,DWORD [4+esp] 191 add ecx,edx 192 mov DWORD [60+esp],edx 193 xor ebx,ecx 194 add eax,ebp 195 rol ebx,7 196 xor edi,eax 197 mov DWORD [40+esp],ecx 198 rol edi,16 199 mov DWORD [20+esp],ebx 200 add esi,edi 201 mov ecx,DWORD [32+esp] 202 xor ebp,esi 203 mov edx,DWORD [52+esp] 204 rol ebp,12 205 mov ebx,DWORD [28+esp] 206 add eax,ebp 207 xor edi,eax 208 mov DWORD [4+esp],eax 209 rol edi,8 210 mov eax,DWORD [8+esp] 211 add esi,edi 212 mov DWORD [48+esp],edi 213 xor ebp,esi 214 add eax,ebx 215 rol ebp,7 216 xor edx,eax 217 mov DWORD [44+esp],esi 218 rol edx,16 219 mov DWORD [24+esp],ebp 220 add ecx,edx 221 mov esi,DWORD [36+esp] 222 xor ebx,ecx 223 mov edi,DWORD [56+esp] 224 rol ebx,12 225 mov ebp,DWORD [16+esp] 226 add eax,ebx 227 xor edx,eax 228 mov DWORD [8+esp],eax 229 rol edx,8 230 mov eax,DWORD [12+esp] 231 add ecx,edx 232 mov DWORD [52+esp],edx 233 xor ebx,ecx 234 add eax,ebp 235 rol ebx,7 236 xor edi,eax 237 rol edi,16 238 mov DWORD [28+esp],ebx 239 add esi,edi 240 xor ebp,esi 241 mov edx,DWORD [48+esp] 242 rol ebp,12 243 mov ebx,DWORD [128+esp] 244 add eax,ebp 245 xor edi,eax 246 mov DWORD [12+esp],eax 247 rol edi,8 248 mov eax,DWORD [esp] 249 add esi,edi 250 mov DWORD [56+esp],edi 251 xor ebp,esi 252 rol ebp,7 253 dec ebx 254 jnz NEAR L$004loop 255 mov ebx,DWORD [160+esp] 256 add eax,1634760805 257 add ebp,DWORD [80+esp] 258 add ecx,DWORD [96+esp] 259 add esi,DWORD [100+esp] 260 cmp ebx,64 261 jb NEAR L$005tail 262 mov ebx,DWORD [156+esp] 263 add edx,DWORD [112+esp] 264 add edi,DWORD [120+esp] 265 xor eax,DWORD [ebx] 266 xor ebp,DWORD [16+ebx] 267 mov DWORD [esp],eax 268 mov eax,DWORD [152+esp] 269 xor ecx,DWORD [32+ebx] 270 xor esi,DWORD [36+ebx] 271 xor edx,DWORD [48+ebx] 272 xor edi,DWORD [56+ebx] 273 mov DWORD [16+eax],ebp 274 mov DWORD [32+eax],ecx 275 mov DWORD [36+eax],esi 276 mov DWORD [48+eax],edx 277 mov DWORD [56+eax],edi 278 mov ebp,DWORD [4+esp] 279 mov ecx,DWORD [8+esp] 280 mov esi,DWORD [12+esp] 281 mov edx,DWORD [20+esp] 282 mov edi,DWORD [24+esp] 283 add ebp,857760878 284 add ecx,2036477234 285 add esi,1797285236 286 add edx,DWORD [84+esp] 287 add edi,DWORD [88+esp] 288 xor ebp,DWORD [4+ebx] 289 xor ecx,DWORD [8+ebx] 290 xor esi,DWORD [12+ebx] 291 xor edx,DWORD [20+ebx] 292 xor edi,DWORD [24+ebx] 293 mov DWORD [4+eax],ebp 294 mov DWORD [8+eax],ecx 295 mov DWORD [12+eax],esi 296 mov DWORD [20+eax],edx 297 mov DWORD [24+eax],edi 298 mov ebp,DWORD [28+esp] 299 mov ecx,DWORD [40+esp] 300 mov esi,DWORD [44+esp] 301 mov edx,DWORD [52+esp] 302 mov edi,DWORD [60+esp] 303 add ebp,DWORD [92+esp] 304 add ecx,DWORD [104+esp] 305 add esi,DWORD [108+esp] 306 add edx,DWORD [116+esp] 307 add edi,DWORD [124+esp] 308 xor ebp,DWORD [28+ebx] 309 xor ecx,DWORD [40+ebx] 310 xor esi,DWORD [44+ebx] 311 xor edx,DWORD [52+ebx] 312 xor edi,DWORD [60+ebx] 313 lea ebx,[64+ebx] 314 mov DWORD [28+eax],ebp 315 mov ebp,DWORD [esp] 316 mov DWORD [40+eax],ecx 317 mov ecx,DWORD [160+esp] 318 mov DWORD [44+eax],esi 319 mov DWORD [52+eax],edx 320 mov DWORD [60+eax],edi 321 mov DWORD [eax],ebp 322 lea eax,[64+eax] 323 sub ecx,64 324 jnz NEAR L$003outer_loop 325 jmp NEAR L$006done 326L$005tail: 327 add edx,DWORD [112+esp] 328 add edi,DWORD [120+esp] 329 mov DWORD [esp],eax 330 mov DWORD [16+esp],ebp 331 mov DWORD [32+esp],ecx 332 mov DWORD [36+esp],esi 333 mov DWORD [48+esp],edx 334 mov DWORD [56+esp],edi 335 mov ebp,DWORD [4+esp] 336 mov ecx,DWORD [8+esp] 337 mov esi,DWORD [12+esp] 338 mov edx,DWORD [20+esp] 339 mov edi,DWORD [24+esp] 340 add ebp,857760878 341 add ecx,2036477234 342 add esi,1797285236 343 add edx,DWORD [84+esp] 344 add edi,DWORD [88+esp] 345 mov DWORD [4+esp],ebp 346 mov DWORD [8+esp],ecx 347 mov DWORD [12+esp],esi 348 mov DWORD [20+esp],edx 349 mov DWORD [24+esp],edi 350 mov ebp,DWORD [28+esp] 351 mov ecx,DWORD [40+esp] 352 mov esi,DWORD [44+esp] 353 mov edx,DWORD [52+esp] 354 mov edi,DWORD [60+esp] 355 add ebp,DWORD [92+esp] 356 add ecx,DWORD [104+esp] 357 add esi,DWORD [108+esp] 358 add edx,DWORD [116+esp] 359 add edi,DWORD [124+esp] 360 mov DWORD [28+esp],ebp 361 mov ebp,DWORD [156+esp] 362 mov DWORD [40+esp],ecx 363 mov ecx,DWORD [152+esp] 364 mov DWORD [44+esp],esi 365 xor esi,esi 366 mov DWORD [52+esp],edx 367 mov DWORD [60+esp],edi 368 xor eax,eax 369 xor edx,edx 370L$007tail_loop: 371 mov al,BYTE [ebp*1+esi] 372 mov dl,BYTE [esi*1+esp] 373 lea esi,[1+esi] 374 xor al,dl 375 mov BYTE [esi*1+ecx-1],al 376 dec ebx 377 jnz NEAR L$007tail_loop 378L$006done: 379 add esp,132 380L$000no_data: 381 pop edi 382 pop esi 383 pop ebx 384 pop ebp 385 ret 386align 16 387__ChaCha20_ssse3: 388 push ebp 389 push ebx 390 push esi 391 push edi 392L$ssse3_shortcut: 393 mov edi,DWORD [20+esp] 394 mov esi,DWORD [24+esp] 395 mov ecx,DWORD [28+esp] 396 mov edx,DWORD [32+esp] 397 mov ebx,DWORD [36+esp] 398 mov ebp,esp 399 sub esp,524 400 and esp,-64 401 mov DWORD [512+esp],ebp 402 lea eax,[(L$ssse3_data-L$pic_point)+eax] 403 movdqu xmm3,[ebx] 404 cmp ecx,256 405 jb NEAR L$0081x 406 mov DWORD [516+esp],edx 407 mov DWORD [520+esp],ebx 408 sub ecx,256 409 lea ebp,[384+esp] 410 movdqu xmm7,[edx] 411 pshufd xmm0,xmm3,0 412 pshufd xmm1,xmm3,85 413 pshufd xmm2,xmm3,170 414 pshufd xmm3,xmm3,255 415 paddd xmm0,[48+eax] 416 pshufd xmm4,xmm7,0 417 pshufd xmm5,xmm7,85 418 psubd xmm0,[64+eax] 419 pshufd xmm6,xmm7,170 420 pshufd xmm7,xmm7,255 421 movdqa [64+ebp],xmm0 422 movdqa [80+ebp],xmm1 423 movdqa [96+ebp],xmm2 424 movdqa [112+ebp],xmm3 425 movdqu xmm3,[16+edx] 426 movdqa [ebp-64],xmm4 427 movdqa [ebp-48],xmm5 428 movdqa [ebp-32],xmm6 429 movdqa [ebp-16],xmm7 430 movdqa xmm7,[32+eax] 431 lea ebx,[128+esp] 432 pshufd xmm0,xmm3,0 433 pshufd xmm1,xmm3,85 434 pshufd xmm2,xmm3,170 435 pshufd xmm3,xmm3,255 436 pshufd xmm4,xmm7,0 437 pshufd xmm5,xmm7,85 438 pshufd xmm6,xmm7,170 439 pshufd xmm7,xmm7,255 440 movdqa [ebp],xmm0 441 movdqa [16+ebp],xmm1 442 movdqa [32+ebp],xmm2 443 movdqa [48+ebp],xmm3 444 movdqa [ebp-128],xmm4 445 movdqa [ebp-112],xmm5 446 movdqa [ebp-96],xmm6 447 movdqa [ebp-80],xmm7 448 lea esi,[128+esi] 449 lea edi,[128+edi] 450 jmp NEAR L$009outer_loop 451align 16 452L$009outer_loop: 453 movdqa xmm1,[ebp-112] 454 movdqa xmm2,[ebp-96] 455 movdqa xmm3,[ebp-80] 456 movdqa xmm5,[ebp-48] 457 movdqa xmm6,[ebp-32] 458 movdqa xmm7,[ebp-16] 459 movdqa [ebx-112],xmm1 460 movdqa [ebx-96],xmm2 461 movdqa [ebx-80],xmm3 462 movdqa [ebx-48],xmm5 463 movdqa [ebx-32],xmm6 464 movdqa [ebx-16],xmm7 465 movdqa xmm2,[32+ebp] 466 movdqa xmm3,[48+ebp] 467 movdqa xmm4,[64+ebp] 468 movdqa xmm5,[80+ebp] 469 movdqa xmm6,[96+ebp] 470 movdqa xmm7,[112+ebp] 471 paddd xmm4,[64+eax] 472 movdqa [32+ebx],xmm2 473 movdqa [48+ebx],xmm3 474 movdqa [64+ebx],xmm4 475 movdqa [80+ebx],xmm5 476 movdqa [96+ebx],xmm6 477 movdqa [112+ebx],xmm7 478 movdqa [64+ebp],xmm4 479 movdqa xmm0,[ebp-128] 480 movdqa xmm6,xmm4 481 movdqa xmm3,[ebp-64] 482 movdqa xmm4,[ebp] 483 movdqa xmm5,[16+ebp] 484 mov edx,10 485 nop 486align 16 487L$010loop: 488 paddd xmm0,xmm3 489 movdqa xmm2,xmm3 490 pxor xmm6,xmm0 491 pshufb xmm6,[eax] 492 paddd xmm4,xmm6 493 pxor xmm2,xmm4 494 movdqa xmm3,[ebx-48] 495 movdqa xmm1,xmm2 496 pslld xmm2,12 497 psrld xmm1,20 498 por xmm2,xmm1 499 movdqa xmm1,[ebx-112] 500 paddd xmm0,xmm2 501 movdqa xmm7,[80+ebx] 502 pxor xmm6,xmm0 503 movdqa [ebx-128],xmm0 504 pshufb xmm6,[16+eax] 505 paddd xmm4,xmm6 506 movdqa [64+ebx],xmm6 507 pxor xmm2,xmm4 508 paddd xmm1,xmm3 509 movdqa xmm0,xmm2 510 pslld xmm2,7 511 psrld xmm0,25 512 pxor xmm7,xmm1 513 por xmm2,xmm0 514 movdqa [ebx],xmm4 515 pshufb xmm7,[eax] 516 movdqa [ebx-64],xmm2 517 paddd xmm5,xmm7 518 movdqa xmm4,[32+ebx] 519 pxor xmm3,xmm5 520 movdqa xmm2,[ebx-32] 521 movdqa xmm0,xmm3 522 pslld xmm3,12 523 psrld xmm0,20 524 por xmm3,xmm0 525 movdqa xmm0,[ebx-96] 526 paddd xmm1,xmm3 527 movdqa xmm6,[96+ebx] 528 pxor xmm7,xmm1 529 movdqa [ebx-112],xmm1 530 pshufb xmm7,[16+eax] 531 paddd xmm5,xmm7 532 movdqa [80+ebx],xmm7 533 pxor xmm3,xmm5 534 paddd xmm0,xmm2 535 movdqa xmm1,xmm3 536 pslld xmm3,7 537 psrld xmm1,25 538 pxor xmm6,xmm0 539 por xmm3,xmm1 540 movdqa [16+ebx],xmm5 541 pshufb xmm6,[eax] 542 movdqa [ebx-48],xmm3 543 paddd xmm4,xmm6 544 movdqa xmm5,[48+ebx] 545 pxor xmm2,xmm4 546 movdqa xmm3,[ebx-16] 547 movdqa xmm1,xmm2 548 pslld xmm2,12 549 psrld xmm1,20 550 por xmm2,xmm1 551 movdqa xmm1,[ebx-80] 552 paddd xmm0,xmm2 553 movdqa xmm7,[112+ebx] 554 pxor xmm6,xmm0 555 movdqa [ebx-96],xmm0 556 pshufb xmm6,[16+eax] 557 paddd xmm4,xmm6 558 movdqa [96+ebx],xmm6 559 pxor xmm2,xmm4 560 paddd xmm1,xmm3 561 movdqa xmm0,xmm2 562 pslld xmm2,7 563 psrld xmm0,25 564 pxor xmm7,xmm1 565 por xmm2,xmm0 566 pshufb xmm7,[eax] 567 movdqa [ebx-32],xmm2 568 paddd xmm5,xmm7 569 pxor xmm3,xmm5 570 movdqa xmm2,[ebx-48] 571 movdqa xmm0,xmm3 572 pslld xmm3,12 573 psrld xmm0,20 574 por xmm3,xmm0 575 movdqa xmm0,[ebx-128] 576 paddd xmm1,xmm3 577 pxor xmm7,xmm1 578 movdqa [ebx-80],xmm1 579 pshufb xmm7,[16+eax] 580 paddd xmm5,xmm7 581 movdqa xmm6,xmm7 582 pxor xmm3,xmm5 583 paddd xmm0,xmm2 584 movdqa xmm1,xmm3 585 pslld xmm3,7 586 psrld xmm1,25 587 pxor xmm6,xmm0 588 por xmm3,xmm1 589 pshufb xmm6,[eax] 590 movdqa [ebx-16],xmm3 591 paddd xmm4,xmm6 592 pxor xmm2,xmm4 593 movdqa xmm3,[ebx-32] 594 movdqa xmm1,xmm2 595 pslld xmm2,12 596 psrld xmm1,20 597 por xmm2,xmm1 598 movdqa xmm1,[ebx-112] 599 paddd xmm0,xmm2 600 movdqa xmm7,[64+ebx] 601 pxor xmm6,xmm0 602 movdqa [ebx-128],xmm0 603 pshufb xmm6,[16+eax] 604 paddd xmm4,xmm6 605 movdqa [112+ebx],xmm6 606 pxor xmm2,xmm4 607 paddd xmm1,xmm3 608 movdqa xmm0,xmm2 609 pslld xmm2,7 610 psrld xmm0,25 611 pxor xmm7,xmm1 612 por xmm2,xmm0 613 movdqa [32+ebx],xmm4 614 pshufb xmm7,[eax] 615 movdqa [ebx-48],xmm2 616 paddd xmm5,xmm7 617 movdqa xmm4,[ebx] 618 pxor xmm3,xmm5 619 movdqa xmm2,[ebx-16] 620 movdqa xmm0,xmm3 621 pslld xmm3,12 622 psrld xmm0,20 623 por xmm3,xmm0 624 movdqa xmm0,[ebx-96] 625 paddd xmm1,xmm3 626 movdqa xmm6,[80+ebx] 627 pxor xmm7,xmm1 628 movdqa [ebx-112],xmm1 629 pshufb xmm7,[16+eax] 630 paddd xmm5,xmm7 631 movdqa [64+ebx],xmm7 632 pxor xmm3,xmm5 633 paddd xmm0,xmm2 634 movdqa xmm1,xmm3 635 pslld xmm3,7 636 psrld xmm1,25 637 pxor xmm6,xmm0 638 por xmm3,xmm1 639 movdqa [48+ebx],xmm5 640 pshufb xmm6,[eax] 641 movdqa [ebx-32],xmm3 642 paddd xmm4,xmm6 643 movdqa xmm5,[16+ebx] 644 pxor xmm2,xmm4 645 movdqa xmm3,[ebx-64] 646 movdqa xmm1,xmm2 647 pslld xmm2,12 648 psrld xmm1,20 649 por xmm2,xmm1 650 movdqa xmm1,[ebx-80] 651 paddd xmm0,xmm2 652 movdqa xmm7,[96+ebx] 653 pxor xmm6,xmm0 654 movdqa [ebx-96],xmm0 655 pshufb xmm6,[16+eax] 656 paddd xmm4,xmm6 657 movdqa [80+ebx],xmm6 658 pxor xmm2,xmm4 659 paddd xmm1,xmm3 660 movdqa xmm0,xmm2 661 pslld xmm2,7 662 psrld xmm0,25 663 pxor xmm7,xmm1 664 por xmm2,xmm0 665 pshufb xmm7,[eax] 666 movdqa [ebx-16],xmm2 667 paddd xmm5,xmm7 668 pxor xmm3,xmm5 669 movdqa xmm0,xmm3 670 pslld xmm3,12 671 psrld xmm0,20 672 por xmm3,xmm0 673 movdqa xmm0,[ebx-128] 674 paddd xmm1,xmm3 675 movdqa xmm6,[64+ebx] 676 pxor xmm7,xmm1 677 movdqa [ebx-80],xmm1 678 pshufb xmm7,[16+eax] 679 paddd xmm5,xmm7 680 movdqa [96+ebx],xmm7 681 pxor xmm3,xmm5 682 movdqa xmm1,xmm3 683 pslld xmm3,7 684 psrld xmm1,25 685 por xmm3,xmm1 686 dec edx 687 jnz NEAR L$010loop 688 movdqa [ebx-64],xmm3 689 movdqa [ebx],xmm4 690 movdqa [16+ebx],xmm5 691 movdqa [64+ebx],xmm6 692 movdqa [96+ebx],xmm7 693 movdqa xmm1,[ebx-112] 694 movdqa xmm2,[ebx-96] 695 movdqa xmm3,[ebx-80] 696 paddd xmm0,[ebp-128] 697 paddd xmm1,[ebp-112] 698 paddd xmm2,[ebp-96] 699 paddd xmm3,[ebp-80] 700 movdqa xmm6,xmm0 701 punpckldq xmm0,xmm1 702 movdqa xmm7,xmm2 703 punpckldq xmm2,xmm3 704 punpckhdq xmm6,xmm1 705 punpckhdq xmm7,xmm3 706 movdqa xmm1,xmm0 707 punpcklqdq xmm0,xmm2 708 movdqa xmm3,xmm6 709 punpcklqdq xmm6,xmm7 710 punpckhqdq xmm1,xmm2 711 punpckhqdq xmm3,xmm7 712 movdqu xmm4,[esi-128] 713 movdqu xmm5,[esi-64] 714 movdqu xmm2,[esi] 715 movdqu xmm7,[64+esi] 716 lea esi,[16+esi] 717 pxor xmm4,xmm0 718 movdqa xmm0,[ebx-64] 719 pxor xmm5,xmm1 720 movdqa xmm1,[ebx-48] 721 pxor xmm6,xmm2 722 movdqa xmm2,[ebx-32] 723 pxor xmm7,xmm3 724 movdqa xmm3,[ebx-16] 725 movdqu [edi-128],xmm4 726 movdqu [edi-64],xmm5 727 movdqu [edi],xmm6 728 movdqu [64+edi],xmm7 729 lea edi,[16+edi] 730 paddd xmm0,[ebp-64] 731 paddd xmm1,[ebp-48] 732 paddd xmm2,[ebp-32] 733 paddd xmm3,[ebp-16] 734 movdqa xmm6,xmm0 735 punpckldq xmm0,xmm1 736 movdqa xmm7,xmm2 737 punpckldq xmm2,xmm3 738 punpckhdq xmm6,xmm1 739 punpckhdq xmm7,xmm3 740 movdqa xmm1,xmm0 741 punpcklqdq xmm0,xmm2 742 movdqa xmm3,xmm6 743 punpcklqdq xmm6,xmm7 744 punpckhqdq xmm1,xmm2 745 punpckhqdq xmm3,xmm7 746 movdqu xmm4,[esi-128] 747 movdqu xmm5,[esi-64] 748 movdqu xmm2,[esi] 749 movdqu xmm7,[64+esi] 750 lea esi,[16+esi] 751 pxor xmm4,xmm0 752 movdqa xmm0,[ebx] 753 pxor xmm5,xmm1 754 movdqa xmm1,[16+ebx] 755 pxor xmm6,xmm2 756 movdqa xmm2,[32+ebx] 757 pxor xmm7,xmm3 758 movdqa xmm3,[48+ebx] 759 movdqu [edi-128],xmm4 760 movdqu [edi-64],xmm5 761 movdqu [edi],xmm6 762 movdqu [64+edi],xmm7 763 lea edi,[16+edi] 764 paddd xmm0,[ebp] 765 paddd xmm1,[16+ebp] 766 paddd xmm2,[32+ebp] 767 paddd xmm3,[48+ebp] 768 movdqa xmm6,xmm0 769 punpckldq xmm0,xmm1 770 movdqa xmm7,xmm2 771 punpckldq xmm2,xmm3 772 punpckhdq xmm6,xmm1 773 punpckhdq xmm7,xmm3 774 movdqa xmm1,xmm0 775 punpcklqdq xmm0,xmm2 776 movdqa xmm3,xmm6 777 punpcklqdq xmm6,xmm7 778 punpckhqdq xmm1,xmm2 779 punpckhqdq xmm3,xmm7 780 movdqu xmm4,[esi-128] 781 movdqu xmm5,[esi-64] 782 movdqu xmm2,[esi] 783 movdqu xmm7,[64+esi] 784 lea esi,[16+esi] 785 pxor xmm4,xmm0 786 movdqa xmm0,[64+ebx] 787 pxor xmm5,xmm1 788 movdqa xmm1,[80+ebx] 789 pxor xmm6,xmm2 790 movdqa xmm2,[96+ebx] 791 pxor xmm7,xmm3 792 movdqa xmm3,[112+ebx] 793 movdqu [edi-128],xmm4 794 movdqu [edi-64],xmm5 795 movdqu [edi],xmm6 796 movdqu [64+edi],xmm7 797 lea edi,[16+edi] 798 paddd xmm0,[64+ebp] 799 paddd xmm1,[80+ebp] 800 paddd xmm2,[96+ebp] 801 paddd xmm3,[112+ebp] 802 movdqa xmm6,xmm0 803 punpckldq xmm0,xmm1 804 movdqa xmm7,xmm2 805 punpckldq xmm2,xmm3 806 punpckhdq xmm6,xmm1 807 punpckhdq xmm7,xmm3 808 movdqa xmm1,xmm0 809 punpcklqdq xmm0,xmm2 810 movdqa xmm3,xmm6 811 punpcklqdq xmm6,xmm7 812 punpckhqdq xmm1,xmm2 813 punpckhqdq xmm3,xmm7 814 movdqu xmm4,[esi-128] 815 movdqu xmm5,[esi-64] 816 movdqu xmm2,[esi] 817 movdqu xmm7,[64+esi] 818 lea esi,[208+esi] 819 pxor xmm4,xmm0 820 pxor xmm5,xmm1 821 pxor xmm6,xmm2 822 pxor xmm7,xmm3 823 movdqu [edi-128],xmm4 824 movdqu [edi-64],xmm5 825 movdqu [edi],xmm6 826 movdqu [64+edi],xmm7 827 lea edi,[208+edi] 828 sub ecx,256 829 jnc NEAR L$009outer_loop 830 add ecx,256 831 jz NEAR L$011done 832 mov ebx,DWORD [520+esp] 833 lea esi,[esi-128] 834 mov edx,DWORD [516+esp] 835 lea edi,[edi-128] 836 movd xmm2,DWORD [64+ebp] 837 movdqu xmm3,[ebx] 838 paddd xmm2,[96+eax] 839 pand xmm3,[112+eax] 840 por xmm3,xmm2 841L$0081x: 842 movdqa xmm0,[32+eax] 843 movdqu xmm1,[edx] 844 movdqu xmm2,[16+edx] 845 movdqa xmm6,[eax] 846 movdqa xmm7,[16+eax] 847 mov DWORD [48+esp],ebp 848 movdqa [esp],xmm0 849 movdqa [16+esp],xmm1 850 movdqa [32+esp],xmm2 851 movdqa [48+esp],xmm3 852 mov edx,10 853 jmp NEAR L$012loop1x 854align 16 855L$013outer1x: 856 movdqa xmm3,[80+eax] 857 movdqa xmm0,[esp] 858 movdqa xmm1,[16+esp] 859 movdqa xmm2,[32+esp] 860 paddd xmm3,[48+esp] 861 mov edx,10 862 movdqa [48+esp],xmm3 863 jmp NEAR L$012loop1x 864align 16 865L$012loop1x: 866 paddd xmm0,xmm1 867 pxor xmm3,xmm0 868db 102,15,56,0,222 869 paddd xmm2,xmm3 870 pxor xmm1,xmm2 871 movdqa xmm4,xmm1 872 psrld xmm1,20 873 pslld xmm4,12 874 por xmm1,xmm4 875 paddd xmm0,xmm1 876 pxor xmm3,xmm0 877db 102,15,56,0,223 878 paddd xmm2,xmm3 879 pxor xmm1,xmm2 880 movdqa xmm4,xmm1 881 psrld xmm1,25 882 pslld xmm4,7 883 por xmm1,xmm4 884 pshufd xmm2,xmm2,78 885 pshufd xmm1,xmm1,57 886 pshufd xmm3,xmm3,147 887 nop 888 paddd xmm0,xmm1 889 pxor xmm3,xmm0 890db 102,15,56,0,222 891 paddd xmm2,xmm3 892 pxor xmm1,xmm2 893 movdqa xmm4,xmm1 894 psrld xmm1,20 895 pslld xmm4,12 896 por xmm1,xmm4 897 paddd xmm0,xmm1 898 pxor xmm3,xmm0 899db 102,15,56,0,223 900 paddd xmm2,xmm3 901 pxor xmm1,xmm2 902 movdqa xmm4,xmm1 903 psrld xmm1,25 904 pslld xmm4,7 905 por xmm1,xmm4 906 pshufd xmm2,xmm2,78 907 pshufd xmm1,xmm1,147 908 pshufd xmm3,xmm3,57 909 dec edx 910 jnz NEAR L$012loop1x 911 paddd xmm0,[esp] 912 paddd xmm1,[16+esp] 913 paddd xmm2,[32+esp] 914 paddd xmm3,[48+esp] 915 cmp ecx,64 916 jb NEAR L$014tail 917 movdqu xmm4,[esi] 918 movdqu xmm5,[16+esi] 919 pxor xmm0,xmm4 920 movdqu xmm4,[32+esi] 921 pxor xmm1,xmm5 922 movdqu xmm5,[48+esi] 923 pxor xmm2,xmm4 924 pxor xmm3,xmm5 925 lea esi,[64+esi] 926 movdqu [edi],xmm0 927 movdqu [16+edi],xmm1 928 movdqu [32+edi],xmm2 929 movdqu [48+edi],xmm3 930 lea edi,[64+edi] 931 sub ecx,64 932 jnz NEAR L$013outer1x 933 jmp NEAR L$011done 934L$014tail: 935 movdqa [esp],xmm0 936 movdqa [16+esp],xmm1 937 movdqa [32+esp],xmm2 938 movdqa [48+esp],xmm3 939 xor eax,eax 940 xor edx,edx 941 xor ebp,ebp 942L$015tail_loop: 943 mov al,BYTE [ebp*1+esp] 944 mov dl,BYTE [ebp*1+esi] 945 lea ebp,[1+ebp] 946 xor al,dl 947 mov BYTE [ebp*1+edi-1],al 948 dec ecx 949 jnz NEAR L$015tail_loop 950L$011done: 951 mov esp,DWORD [512+esp] 952 pop edi 953 pop esi 954 pop ebx 955 pop ebp 956 ret 957align 64 958L$ssse3_data: 959db 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 960db 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 961dd 1634760805,857760878,2036477234,1797285236 962dd 0,1,2,3 963dd 4,4,4,4 964dd 1,0,0,0 965dd 4,0,0,0 966dd 0,-1,-1,-1 967align 64 968db 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 969db 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 970db 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 971db 114,103,62,0 972segment .bss 973common _GFp_ia32cap_P 16 974