1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11.text 12.extern GFp_ia32cap_P 13.hidden GFp_ia32cap_P 14.globl GFp_gcm_init_clmul 15.hidden GFp_gcm_init_clmul 16.type GFp_gcm_init_clmul,@function 17.align 16 18GFp_gcm_init_clmul: 19.cfi_startproc 20.L_init_clmul: 21 movdqu (%rsi),%xmm2 22 pshufd $78,%xmm2,%xmm2 23 24 25 pshufd $255,%xmm2,%xmm4 26 movdqa %xmm2,%xmm3 27 psllq $1,%xmm2 28 pxor %xmm5,%xmm5 29 psrlq $63,%xmm3 30 pcmpgtd %xmm4,%xmm5 31 pslldq $8,%xmm3 32 por %xmm3,%xmm2 33 34 35 pand .L0x1c2_polynomial(%rip),%xmm5 36 pxor %xmm5,%xmm2 37 38 39 pshufd $78,%xmm2,%xmm6 40 movdqa %xmm2,%xmm0 41 pxor %xmm2,%xmm6 42 movdqa %xmm0,%xmm1 43 pshufd $78,%xmm0,%xmm3 44 pxor %xmm0,%xmm3 45.byte 102,15,58,68,194,0 46.byte 102,15,58,68,202,17 47.byte 102,15,58,68,222,0 48 pxor %xmm0,%xmm3 49 pxor %xmm1,%xmm3 50 51 movdqa %xmm3,%xmm4 52 psrldq $8,%xmm3 53 pslldq $8,%xmm4 54 pxor %xmm3,%xmm1 55 pxor %xmm4,%xmm0 56 57 movdqa %xmm0,%xmm4 58 movdqa %xmm0,%xmm3 59 psllq $5,%xmm0 60 pxor %xmm0,%xmm3 61 psllq $1,%xmm0 62 pxor %xmm3,%xmm0 63 psllq $57,%xmm0 64 movdqa %xmm0,%xmm3 65 pslldq $8,%xmm0 66 psrldq $8,%xmm3 67 pxor %xmm4,%xmm0 68 pxor %xmm3,%xmm1 69 70 71 movdqa %xmm0,%xmm4 72 psrlq $1,%xmm0 73 pxor %xmm4,%xmm1 74 pxor %xmm0,%xmm4 75 psrlq $5,%xmm0 76 pxor %xmm4,%xmm0 77 psrlq $1,%xmm0 78 pxor %xmm1,%xmm0 79 pshufd $78,%xmm2,%xmm3 80 pshufd $78,%xmm0,%xmm4 81 pxor %xmm2,%xmm3 82 movdqu %xmm2,0(%rdi) 83 pxor %xmm0,%xmm4 84 movdqu %xmm0,16(%rdi) 85.byte 102,15,58,15,227,8 86 movdqu %xmm4,32(%rdi) 87 movdqa %xmm0,%xmm1 88 pshufd $78,%xmm0,%xmm3 89 pxor %xmm0,%xmm3 90.byte 102,15,58,68,194,0 91.byte 102,15,58,68,202,17 92.byte 102,15,58,68,222,0 93 pxor %xmm0,%xmm3 94 pxor %xmm1,%xmm3 95 96 movdqa %xmm3,%xmm4 97 psrldq $8,%xmm3 98 pslldq $8,%xmm4 99 pxor %xmm3,%xmm1 100 pxor %xmm4,%xmm0 101 102 movdqa %xmm0,%xmm4 103 movdqa %xmm0,%xmm3 104 psllq $5,%xmm0 105 pxor %xmm0,%xmm3 106 psllq $1,%xmm0 107 pxor %xmm3,%xmm0 108 psllq $57,%xmm0 109 movdqa %xmm0,%xmm3 110 pslldq $8,%xmm0 111 psrldq $8,%xmm3 112 pxor %xmm4,%xmm0 113 pxor %xmm3,%xmm1 114 115 116 movdqa %xmm0,%xmm4 117 psrlq $1,%xmm0 118 pxor %xmm4,%xmm1 119 pxor %xmm0,%xmm4 120 psrlq $5,%xmm0 121 pxor %xmm4,%xmm0 122 psrlq $1,%xmm0 123 pxor %xmm1,%xmm0 124 movdqa %xmm0,%xmm5 125 movdqa %xmm0,%xmm1 126 pshufd $78,%xmm0,%xmm3 127 pxor %xmm0,%xmm3 128.byte 102,15,58,68,194,0 129.byte 102,15,58,68,202,17 130.byte 102,15,58,68,222,0 131 pxor %xmm0,%xmm3 132 pxor %xmm1,%xmm3 133 134 movdqa %xmm3,%xmm4 135 psrldq $8,%xmm3 136 pslldq $8,%xmm4 137 pxor %xmm3,%xmm1 138 pxor %xmm4,%xmm0 139 140 movdqa %xmm0,%xmm4 141 movdqa %xmm0,%xmm3 142 psllq $5,%xmm0 143 pxor %xmm0,%xmm3 144 psllq $1,%xmm0 145 pxor %xmm3,%xmm0 146 psllq $57,%xmm0 147 movdqa %xmm0,%xmm3 148 pslldq $8,%xmm0 149 psrldq $8,%xmm3 150 pxor %xmm4,%xmm0 151 pxor %xmm3,%xmm1 152 153 154 movdqa %xmm0,%xmm4 155 psrlq $1,%xmm0 156 pxor %xmm4,%xmm1 157 pxor %xmm0,%xmm4 158 psrlq $5,%xmm0 159 pxor %xmm4,%xmm0 160 psrlq $1,%xmm0 161 pxor %xmm1,%xmm0 162 pshufd $78,%xmm5,%xmm3 163 pshufd $78,%xmm0,%xmm4 164 pxor %xmm5,%xmm3 165 movdqu %xmm5,48(%rdi) 166 pxor %xmm0,%xmm4 167 movdqu %xmm0,64(%rdi) 168.byte 102,15,58,15,227,8 169 movdqu %xmm4,80(%rdi) 170 .byte 0xf3,0xc3 171.cfi_endproc 172.size GFp_gcm_init_clmul,.-GFp_gcm_init_clmul 173.globl GFp_gcm_gmult_clmul 174.hidden GFp_gcm_gmult_clmul 175.type GFp_gcm_gmult_clmul,@function 176.align 16 177.cfi_startproc 178GFp_gcm_gmult_clmul: 179 movdqu (%rdi),%xmm0 180 movdqa .Lbswap_mask(%rip),%xmm5 181 movdqu (%rsi),%xmm2 182 movdqu 32(%rsi),%xmm4 183.byte 102,15,56,0,197 184 movdqa %xmm0,%xmm1 185 pshufd $78,%xmm0,%xmm3 186 pxor %xmm0,%xmm3 187.byte 102,15,58,68,194,0 188.byte 102,15,58,68,202,17 189.byte 102,15,58,68,220,0 190 pxor %xmm0,%xmm3 191 pxor %xmm1,%xmm3 192 193 movdqa %xmm3,%xmm4 194 psrldq $8,%xmm3 195 pslldq $8,%xmm4 196 pxor %xmm3,%xmm1 197 pxor %xmm4,%xmm0 198 199 movdqa %xmm0,%xmm4 200 movdqa %xmm0,%xmm3 201 psllq $5,%xmm0 202 pxor %xmm0,%xmm3 203 psllq $1,%xmm0 204 pxor %xmm3,%xmm0 205 psllq $57,%xmm0 206 movdqa %xmm0,%xmm3 207 pslldq $8,%xmm0 208 psrldq $8,%xmm3 209 pxor %xmm4,%xmm0 210 pxor %xmm3,%xmm1 211 212 213 movdqa %xmm0,%xmm4 214 psrlq $1,%xmm0 215 pxor %xmm4,%xmm1 216 pxor %xmm0,%xmm4 217 psrlq $5,%xmm0 218 pxor %xmm4,%xmm0 219 psrlq $1,%xmm0 220 pxor %xmm1,%xmm0 221.byte 102,15,56,0,197 222 movdqu %xmm0,(%rdi) 223 .byte 0xf3,0xc3 224.cfi_endproc 225.size GFp_gcm_gmult_clmul,.-GFp_gcm_gmult_clmul 226.globl GFp_gcm_ghash_clmul 227.hidden GFp_gcm_ghash_clmul 228.type GFp_gcm_ghash_clmul,@function 229.align 32 230GFp_gcm_ghash_clmul: 231.cfi_startproc 232.L_ghash_clmul: 233 movdqa .Lbswap_mask(%rip),%xmm10 234 235 movdqu (%rdi),%xmm0 236 movdqu (%rsi),%xmm2 237 movdqu 32(%rsi),%xmm7 238.byte 102,65,15,56,0,194 239 240 subq $0x10,%rcx 241 jz .Lodd_tail 242 243 movdqu 16(%rsi),%xmm6 244 leaq GFp_ia32cap_P(%rip),%rax 245 movl 4(%rax),%eax 246 cmpq $0x30,%rcx 247 jb .Lskip4x 248 249 andl $71303168,%eax 250 cmpl $4194304,%eax 251 je .Lskip4x 252 253 subq $0x30,%rcx 254 movq $0xA040608020C0E000,%rax 255 movdqu 48(%rsi),%xmm14 256 movdqu 64(%rsi),%xmm15 257 258 259 260 261 movdqu 48(%rdx),%xmm3 262 movdqu 32(%rdx),%xmm11 263.byte 102,65,15,56,0,218 264.byte 102,69,15,56,0,218 265 movdqa %xmm3,%xmm5 266 pshufd $78,%xmm3,%xmm4 267 pxor %xmm3,%xmm4 268.byte 102,15,58,68,218,0 269.byte 102,15,58,68,234,17 270.byte 102,15,58,68,231,0 271 272 movdqa %xmm11,%xmm13 273 pshufd $78,%xmm11,%xmm12 274 pxor %xmm11,%xmm12 275.byte 102,68,15,58,68,222,0 276.byte 102,68,15,58,68,238,17 277.byte 102,68,15,58,68,231,16 278 xorps %xmm11,%xmm3 279 xorps %xmm13,%xmm5 280 movups 80(%rsi),%xmm7 281 xorps %xmm12,%xmm4 282 283 movdqu 16(%rdx),%xmm11 284 movdqu 0(%rdx),%xmm8 285.byte 102,69,15,56,0,218 286.byte 102,69,15,56,0,194 287 movdqa %xmm11,%xmm13 288 pshufd $78,%xmm11,%xmm12 289 pxor %xmm8,%xmm0 290 pxor %xmm11,%xmm12 291.byte 102,69,15,58,68,222,0 292 movdqa %xmm0,%xmm1 293 pshufd $78,%xmm0,%xmm8 294 pxor %xmm0,%xmm8 295.byte 102,69,15,58,68,238,17 296.byte 102,68,15,58,68,231,0 297 xorps %xmm11,%xmm3 298 xorps %xmm13,%xmm5 299 300 leaq 64(%rdx),%rdx 301 subq $0x40,%rcx 302 jc .Ltail4x 303 304 jmp .Lmod4_loop 305.align 32 306.Lmod4_loop: 307.byte 102,65,15,58,68,199,0 308 xorps %xmm12,%xmm4 309 movdqu 48(%rdx),%xmm11 310.byte 102,69,15,56,0,218 311.byte 102,65,15,58,68,207,17 312 xorps %xmm3,%xmm0 313 movdqu 32(%rdx),%xmm3 314 movdqa %xmm11,%xmm13 315.byte 102,68,15,58,68,199,16 316 pshufd $78,%xmm11,%xmm12 317 xorps %xmm5,%xmm1 318 pxor %xmm11,%xmm12 319.byte 102,65,15,56,0,218 320 movups 32(%rsi),%xmm7 321 xorps %xmm4,%xmm8 322.byte 102,68,15,58,68,218,0 323 pshufd $78,%xmm3,%xmm4 324 325 pxor %xmm0,%xmm8 326 movdqa %xmm3,%xmm5 327 pxor %xmm1,%xmm8 328 pxor %xmm3,%xmm4 329 movdqa %xmm8,%xmm9 330.byte 102,68,15,58,68,234,17 331 pslldq $8,%xmm8 332 psrldq $8,%xmm9 333 pxor %xmm8,%xmm0 334 movdqa .L7_mask(%rip),%xmm8 335 pxor %xmm9,%xmm1 336.byte 102,76,15,110,200 337 338 pand %xmm0,%xmm8 339.byte 102,69,15,56,0,200 340 pxor %xmm0,%xmm9 341.byte 102,68,15,58,68,231,0 342 psllq $57,%xmm9 343 movdqa %xmm9,%xmm8 344 pslldq $8,%xmm9 345.byte 102,15,58,68,222,0 346 psrldq $8,%xmm8 347 pxor %xmm9,%xmm0 348 pxor %xmm8,%xmm1 349 movdqu 0(%rdx),%xmm8 350 351 movdqa %xmm0,%xmm9 352 psrlq $1,%xmm0 353.byte 102,15,58,68,238,17 354 xorps %xmm11,%xmm3 355 movdqu 16(%rdx),%xmm11 356.byte 102,69,15,56,0,218 357.byte 102,15,58,68,231,16 358 xorps %xmm13,%xmm5 359 movups 80(%rsi),%xmm7 360.byte 102,69,15,56,0,194 361 pxor %xmm9,%xmm1 362 pxor %xmm0,%xmm9 363 psrlq $5,%xmm0 364 365 movdqa %xmm11,%xmm13 366 pxor %xmm12,%xmm4 367 pshufd $78,%xmm11,%xmm12 368 pxor %xmm9,%xmm0 369 pxor %xmm8,%xmm1 370 pxor %xmm11,%xmm12 371.byte 102,69,15,58,68,222,0 372 psrlq $1,%xmm0 373 pxor %xmm1,%xmm0 374 movdqa %xmm0,%xmm1 375.byte 102,69,15,58,68,238,17 376 xorps %xmm11,%xmm3 377 pshufd $78,%xmm0,%xmm8 378 pxor %xmm0,%xmm8 379 380.byte 102,68,15,58,68,231,0 381 xorps %xmm13,%xmm5 382 383 leaq 64(%rdx),%rdx 384 subq $0x40,%rcx 385 jnc .Lmod4_loop 386 387.Ltail4x: 388.byte 102,65,15,58,68,199,0 389.byte 102,65,15,58,68,207,17 390.byte 102,68,15,58,68,199,16 391 xorps %xmm12,%xmm4 392 xorps %xmm3,%xmm0 393 xorps %xmm5,%xmm1 394 pxor %xmm0,%xmm1 395 pxor %xmm4,%xmm8 396 397 pxor %xmm1,%xmm8 398 pxor %xmm0,%xmm1 399 400 movdqa %xmm8,%xmm9 401 psrldq $8,%xmm8 402 pslldq $8,%xmm9 403 pxor %xmm8,%xmm1 404 pxor %xmm9,%xmm0 405 406 movdqa %xmm0,%xmm4 407 movdqa %xmm0,%xmm3 408 psllq $5,%xmm0 409 pxor %xmm0,%xmm3 410 psllq $1,%xmm0 411 pxor %xmm3,%xmm0 412 psllq $57,%xmm0 413 movdqa %xmm0,%xmm3 414 pslldq $8,%xmm0 415 psrldq $8,%xmm3 416 pxor %xmm4,%xmm0 417 pxor %xmm3,%xmm1 418 419 420 movdqa %xmm0,%xmm4 421 psrlq $1,%xmm0 422 pxor %xmm4,%xmm1 423 pxor %xmm0,%xmm4 424 psrlq $5,%xmm0 425 pxor %xmm4,%xmm0 426 psrlq $1,%xmm0 427 pxor %xmm1,%xmm0 428 addq $0x40,%rcx 429 jz .Ldone 430 movdqu 32(%rsi),%xmm7 431 subq $0x10,%rcx 432 jz .Lodd_tail 433.Lskip4x: 434 435 436 437 438 439 movdqu (%rdx),%xmm8 440 movdqu 16(%rdx),%xmm3 441.byte 102,69,15,56,0,194 442.byte 102,65,15,56,0,218 443 pxor %xmm8,%xmm0 444 445 movdqa %xmm3,%xmm5 446 pshufd $78,%xmm3,%xmm4 447 pxor %xmm3,%xmm4 448.byte 102,15,58,68,218,0 449.byte 102,15,58,68,234,17 450.byte 102,15,58,68,231,0 451 452 leaq 32(%rdx),%rdx 453 nop 454 subq $0x20,%rcx 455 jbe .Leven_tail 456 nop 457 jmp .Lmod_loop 458 459.align 32 460.Lmod_loop: 461 movdqa %xmm0,%xmm1 462 movdqa %xmm4,%xmm8 463 pshufd $78,%xmm0,%xmm4 464 pxor %xmm0,%xmm4 465 466.byte 102,15,58,68,198,0 467.byte 102,15,58,68,206,17 468.byte 102,15,58,68,231,16 469 470 pxor %xmm3,%xmm0 471 pxor %xmm5,%xmm1 472 movdqu (%rdx),%xmm9 473 pxor %xmm0,%xmm8 474.byte 102,69,15,56,0,202 475 movdqu 16(%rdx),%xmm3 476 477 pxor %xmm1,%xmm8 478 pxor %xmm9,%xmm1 479 pxor %xmm8,%xmm4 480.byte 102,65,15,56,0,218 481 movdqa %xmm4,%xmm8 482 psrldq $8,%xmm8 483 pslldq $8,%xmm4 484 pxor %xmm8,%xmm1 485 pxor %xmm4,%xmm0 486 487 movdqa %xmm3,%xmm5 488 489 movdqa %xmm0,%xmm9 490 movdqa %xmm0,%xmm8 491 psllq $5,%xmm0 492 pxor %xmm0,%xmm8 493.byte 102,15,58,68,218,0 494 psllq $1,%xmm0 495 pxor %xmm8,%xmm0 496 psllq $57,%xmm0 497 movdqa %xmm0,%xmm8 498 pslldq $8,%xmm0 499 psrldq $8,%xmm8 500 pxor %xmm9,%xmm0 501 pshufd $78,%xmm5,%xmm4 502 pxor %xmm8,%xmm1 503 pxor %xmm5,%xmm4 504 505 movdqa %xmm0,%xmm9 506 psrlq $1,%xmm0 507.byte 102,15,58,68,234,17 508 pxor %xmm9,%xmm1 509 pxor %xmm0,%xmm9 510 psrlq $5,%xmm0 511 pxor %xmm9,%xmm0 512 leaq 32(%rdx),%rdx 513 psrlq $1,%xmm0 514.byte 102,15,58,68,231,0 515 pxor %xmm1,%xmm0 516 517 subq $0x20,%rcx 518 ja .Lmod_loop 519 520.Leven_tail: 521 movdqa %xmm0,%xmm1 522 movdqa %xmm4,%xmm8 523 pshufd $78,%xmm0,%xmm4 524 pxor %xmm0,%xmm4 525 526.byte 102,15,58,68,198,0 527.byte 102,15,58,68,206,17 528.byte 102,15,58,68,231,16 529 530 pxor %xmm3,%xmm0 531 pxor %xmm5,%xmm1 532 pxor %xmm0,%xmm8 533 pxor %xmm1,%xmm8 534 pxor %xmm8,%xmm4 535 movdqa %xmm4,%xmm8 536 psrldq $8,%xmm8 537 pslldq $8,%xmm4 538 pxor %xmm8,%xmm1 539 pxor %xmm4,%xmm0 540 541 movdqa %xmm0,%xmm4 542 movdqa %xmm0,%xmm3 543 psllq $5,%xmm0 544 pxor %xmm0,%xmm3 545 psllq $1,%xmm0 546 pxor %xmm3,%xmm0 547 psllq $57,%xmm0 548 movdqa %xmm0,%xmm3 549 pslldq $8,%xmm0 550 psrldq $8,%xmm3 551 pxor %xmm4,%xmm0 552 pxor %xmm3,%xmm1 553 554 555 movdqa %xmm0,%xmm4 556 psrlq $1,%xmm0 557 pxor %xmm4,%xmm1 558 pxor %xmm0,%xmm4 559 psrlq $5,%xmm0 560 pxor %xmm4,%xmm0 561 psrlq $1,%xmm0 562 pxor %xmm1,%xmm0 563 testq %rcx,%rcx 564 jnz .Ldone 565 566.Lodd_tail: 567 movdqu (%rdx),%xmm8 568.byte 102,69,15,56,0,194 569 pxor %xmm8,%xmm0 570 movdqa %xmm0,%xmm1 571 pshufd $78,%xmm0,%xmm3 572 pxor %xmm0,%xmm3 573.byte 102,15,58,68,194,0 574.byte 102,15,58,68,202,17 575.byte 102,15,58,68,223,0 576 pxor %xmm0,%xmm3 577 pxor %xmm1,%xmm3 578 579 movdqa %xmm3,%xmm4 580 psrldq $8,%xmm3 581 pslldq $8,%xmm4 582 pxor %xmm3,%xmm1 583 pxor %xmm4,%xmm0 584 585 movdqa %xmm0,%xmm4 586 movdqa %xmm0,%xmm3 587 psllq $5,%xmm0 588 pxor %xmm0,%xmm3 589 psllq $1,%xmm0 590 pxor %xmm3,%xmm0 591 psllq $57,%xmm0 592 movdqa %xmm0,%xmm3 593 pslldq $8,%xmm0 594 psrldq $8,%xmm3 595 pxor %xmm4,%xmm0 596 pxor %xmm3,%xmm1 597 598 599 movdqa %xmm0,%xmm4 600 psrlq $1,%xmm0 601 pxor %xmm4,%xmm1 602 pxor %xmm0,%xmm4 603 psrlq $5,%xmm0 604 pxor %xmm4,%xmm0 605 psrlq $1,%xmm0 606 pxor %xmm1,%xmm0 607.Ldone: 608.byte 102,65,15,56,0,194 609 movdqu %xmm0,(%rdi) 610 .byte 0xf3,0xc3 611.cfi_endproc 612.size GFp_gcm_ghash_clmul,.-GFp_gcm_ghash_clmul 613.globl GFp_gcm_init_avx 614.hidden GFp_gcm_init_avx 615.type GFp_gcm_init_avx,@function 616.align 32 617GFp_gcm_init_avx: 618.cfi_startproc 619 vzeroupper 620 621 vmovdqu (%rsi),%xmm2 622 vpshufd $78,%xmm2,%xmm2 623 624 625 vpshufd $255,%xmm2,%xmm4 626 vpsrlq $63,%xmm2,%xmm3 627 vpsllq $1,%xmm2,%xmm2 628 vpxor %xmm5,%xmm5,%xmm5 629 vpcmpgtd %xmm4,%xmm5,%xmm5 630 vpslldq $8,%xmm3,%xmm3 631 vpor %xmm3,%xmm2,%xmm2 632 633 634 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 635 vpxor %xmm5,%xmm2,%xmm2 636 637 vpunpckhqdq %xmm2,%xmm2,%xmm6 638 vmovdqa %xmm2,%xmm0 639 vpxor %xmm2,%xmm6,%xmm6 640 movq $4,%r10 641 jmp .Linit_start_avx 642.align 32 643.Linit_loop_avx: 644 vpalignr $8,%xmm3,%xmm4,%xmm5 645 vmovdqu %xmm5,-16(%rdi) 646 vpunpckhqdq %xmm0,%xmm0,%xmm3 647 vpxor %xmm0,%xmm3,%xmm3 648 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 649 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 650 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 651 vpxor %xmm0,%xmm1,%xmm4 652 vpxor %xmm4,%xmm3,%xmm3 653 654 vpslldq $8,%xmm3,%xmm4 655 vpsrldq $8,%xmm3,%xmm3 656 vpxor %xmm4,%xmm0,%xmm0 657 vpxor %xmm3,%xmm1,%xmm1 658 vpsllq $57,%xmm0,%xmm3 659 vpsllq $62,%xmm0,%xmm4 660 vpxor %xmm3,%xmm4,%xmm4 661 vpsllq $63,%xmm0,%xmm3 662 vpxor %xmm3,%xmm4,%xmm4 663 vpslldq $8,%xmm4,%xmm3 664 vpsrldq $8,%xmm4,%xmm4 665 vpxor %xmm3,%xmm0,%xmm0 666 vpxor %xmm4,%xmm1,%xmm1 667 668 vpsrlq $1,%xmm0,%xmm4 669 vpxor %xmm0,%xmm1,%xmm1 670 vpxor %xmm4,%xmm0,%xmm0 671 vpsrlq $5,%xmm4,%xmm4 672 vpxor %xmm4,%xmm0,%xmm0 673 vpsrlq $1,%xmm0,%xmm0 674 vpxor %xmm1,%xmm0,%xmm0 675.Linit_start_avx: 676 vmovdqa %xmm0,%xmm5 677 vpunpckhqdq %xmm0,%xmm0,%xmm3 678 vpxor %xmm0,%xmm3,%xmm3 679 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 680 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 681 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 682 vpxor %xmm0,%xmm1,%xmm4 683 vpxor %xmm4,%xmm3,%xmm3 684 685 vpslldq $8,%xmm3,%xmm4 686 vpsrldq $8,%xmm3,%xmm3 687 vpxor %xmm4,%xmm0,%xmm0 688 vpxor %xmm3,%xmm1,%xmm1 689 vpsllq $57,%xmm0,%xmm3 690 vpsllq $62,%xmm0,%xmm4 691 vpxor %xmm3,%xmm4,%xmm4 692 vpsllq $63,%xmm0,%xmm3 693 vpxor %xmm3,%xmm4,%xmm4 694 vpslldq $8,%xmm4,%xmm3 695 vpsrldq $8,%xmm4,%xmm4 696 vpxor %xmm3,%xmm0,%xmm0 697 vpxor %xmm4,%xmm1,%xmm1 698 699 vpsrlq $1,%xmm0,%xmm4 700 vpxor %xmm0,%xmm1,%xmm1 701 vpxor %xmm4,%xmm0,%xmm0 702 vpsrlq $5,%xmm4,%xmm4 703 vpxor %xmm4,%xmm0,%xmm0 704 vpsrlq $1,%xmm0,%xmm0 705 vpxor %xmm1,%xmm0,%xmm0 706 vpshufd $78,%xmm5,%xmm3 707 vpshufd $78,%xmm0,%xmm4 708 vpxor %xmm5,%xmm3,%xmm3 709 vmovdqu %xmm5,0(%rdi) 710 vpxor %xmm0,%xmm4,%xmm4 711 vmovdqu %xmm0,16(%rdi) 712 leaq 48(%rdi),%rdi 713 subq $1,%r10 714 jnz .Linit_loop_avx 715 716 vpalignr $8,%xmm4,%xmm3,%xmm5 717 vmovdqu %xmm5,-16(%rdi) 718 719 vzeroupper 720 .byte 0xf3,0xc3 721.cfi_endproc 722.size GFp_gcm_init_avx,.-GFp_gcm_init_avx 723.globl GFp_gcm_ghash_avx 724.hidden GFp_gcm_ghash_avx 725.type GFp_gcm_ghash_avx,@function 726.align 32 727GFp_gcm_ghash_avx: 728.cfi_startproc 729 vzeroupper 730 731 vmovdqu (%rdi),%xmm10 732 leaq .L0x1c2_polynomial(%rip),%r10 733 leaq 64(%rsi),%rsi 734 vmovdqu .Lbswap_mask(%rip),%xmm13 735 vpshufb %xmm13,%xmm10,%xmm10 736 cmpq $0x80,%rcx 737 jb .Lshort_avx 738 subq $0x80,%rcx 739 740 vmovdqu 112(%rdx),%xmm14 741 vmovdqu 0-64(%rsi),%xmm6 742 vpshufb %xmm13,%xmm14,%xmm14 743 vmovdqu 32-64(%rsi),%xmm7 744 745 vpunpckhqdq %xmm14,%xmm14,%xmm9 746 vmovdqu 96(%rdx),%xmm15 747 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 748 vpxor %xmm14,%xmm9,%xmm9 749 vpshufb %xmm13,%xmm15,%xmm15 750 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 751 vmovdqu 16-64(%rsi),%xmm6 752 vpunpckhqdq %xmm15,%xmm15,%xmm8 753 vmovdqu 80(%rdx),%xmm14 754 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 755 vpxor %xmm15,%xmm8,%xmm8 756 757 vpshufb %xmm13,%xmm14,%xmm14 758 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 759 vpunpckhqdq %xmm14,%xmm14,%xmm9 760 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 761 vmovdqu 48-64(%rsi),%xmm6 762 vpxor %xmm14,%xmm9,%xmm9 763 vmovdqu 64(%rdx),%xmm15 764 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 765 vmovdqu 80-64(%rsi),%xmm7 766 767 vpshufb %xmm13,%xmm15,%xmm15 768 vpxor %xmm0,%xmm3,%xmm3 769 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 770 vpxor %xmm1,%xmm4,%xmm4 771 vpunpckhqdq %xmm15,%xmm15,%xmm8 772 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 773 vmovdqu 64-64(%rsi),%xmm6 774 vpxor %xmm2,%xmm5,%xmm5 775 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 776 vpxor %xmm15,%xmm8,%xmm8 777 778 vmovdqu 48(%rdx),%xmm14 779 vpxor %xmm3,%xmm0,%xmm0 780 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 781 vpxor %xmm4,%xmm1,%xmm1 782 vpshufb %xmm13,%xmm14,%xmm14 783 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 784 vmovdqu 96-64(%rsi),%xmm6 785 vpxor %xmm5,%xmm2,%xmm2 786 vpunpckhqdq %xmm14,%xmm14,%xmm9 787 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 788 vmovdqu 128-64(%rsi),%xmm7 789 vpxor %xmm14,%xmm9,%xmm9 790 791 vmovdqu 32(%rdx),%xmm15 792 vpxor %xmm0,%xmm3,%xmm3 793 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 794 vpxor %xmm1,%xmm4,%xmm4 795 vpshufb %xmm13,%xmm15,%xmm15 796 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 797 vmovdqu 112-64(%rsi),%xmm6 798 vpxor %xmm2,%xmm5,%xmm5 799 vpunpckhqdq %xmm15,%xmm15,%xmm8 800 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 801 vpxor %xmm15,%xmm8,%xmm8 802 803 vmovdqu 16(%rdx),%xmm14 804 vpxor %xmm3,%xmm0,%xmm0 805 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 806 vpxor %xmm4,%xmm1,%xmm1 807 vpshufb %xmm13,%xmm14,%xmm14 808 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 809 vmovdqu 144-64(%rsi),%xmm6 810 vpxor %xmm5,%xmm2,%xmm2 811 vpunpckhqdq %xmm14,%xmm14,%xmm9 812 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 813 vmovdqu 176-64(%rsi),%xmm7 814 vpxor %xmm14,%xmm9,%xmm9 815 816 vmovdqu (%rdx),%xmm15 817 vpxor %xmm0,%xmm3,%xmm3 818 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 819 vpxor %xmm1,%xmm4,%xmm4 820 vpshufb %xmm13,%xmm15,%xmm15 821 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 822 vmovdqu 160-64(%rsi),%xmm6 823 vpxor %xmm2,%xmm5,%xmm5 824 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 825 826 leaq 128(%rdx),%rdx 827 cmpq $0x80,%rcx 828 jb .Ltail_avx 829 830 vpxor %xmm10,%xmm15,%xmm15 831 subq $0x80,%rcx 832 jmp .Loop8x_avx 833 834.align 32 835.Loop8x_avx: 836 vpunpckhqdq %xmm15,%xmm15,%xmm8 837 vmovdqu 112(%rdx),%xmm14 838 vpxor %xmm0,%xmm3,%xmm3 839 vpxor %xmm15,%xmm8,%xmm8 840 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 841 vpshufb %xmm13,%xmm14,%xmm14 842 vpxor %xmm1,%xmm4,%xmm4 843 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 844 vmovdqu 0-64(%rsi),%xmm6 845 vpunpckhqdq %xmm14,%xmm14,%xmm9 846 vpxor %xmm2,%xmm5,%xmm5 847 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 848 vmovdqu 32-64(%rsi),%xmm7 849 vpxor %xmm14,%xmm9,%xmm9 850 851 vmovdqu 96(%rdx),%xmm15 852 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 853 vpxor %xmm3,%xmm10,%xmm10 854 vpshufb %xmm13,%xmm15,%xmm15 855 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 856 vxorps %xmm4,%xmm11,%xmm11 857 vmovdqu 16-64(%rsi),%xmm6 858 vpunpckhqdq %xmm15,%xmm15,%xmm8 859 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 860 vpxor %xmm5,%xmm12,%xmm12 861 vxorps %xmm15,%xmm8,%xmm8 862 863 vmovdqu 80(%rdx),%xmm14 864 vpxor %xmm10,%xmm12,%xmm12 865 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 866 vpxor %xmm11,%xmm12,%xmm12 867 vpslldq $8,%xmm12,%xmm9 868 vpxor %xmm0,%xmm3,%xmm3 869 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 870 vpsrldq $8,%xmm12,%xmm12 871 vpxor %xmm9,%xmm10,%xmm10 872 vmovdqu 48-64(%rsi),%xmm6 873 vpshufb %xmm13,%xmm14,%xmm14 874 vxorps %xmm12,%xmm11,%xmm11 875 vpxor %xmm1,%xmm4,%xmm4 876 vpunpckhqdq %xmm14,%xmm14,%xmm9 877 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 878 vmovdqu 80-64(%rsi),%xmm7 879 vpxor %xmm14,%xmm9,%xmm9 880 vpxor %xmm2,%xmm5,%xmm5 881 882 vmovdqu 64(%rdx),%xmm15 883 vpalignr $8,%xmm10,%xmm10,%xmm12 884 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 885 vpshufb %xmm13,%xmm15,%xmm15 886 vpxor %xmm3,%xmm0,%xmm0 887 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 888 vmovdqu 64-64(%rsi),%xmm6 889 vpunpckhqdq %xmm15,%xmm15,%xmm8 890 vpxor %xmm4,%xmm1,%xmm1 891 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 892 vxorps %xmm15,%xmm8,%xmm8 893 vpxor %xmm5,%xmm2,%xmm2 894 895 vmovdqu 48(%rdx),%xmm14 896 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 897 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 898 vpshufb %xmm13,%xmm14,%xmm14 899 vpxor %xmm0,%xmm3,%xmm3 900 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 901 vmovdqu 96-64(%rsi),%xmm6 902 vpunpckhqdq %xmm14,%xmm14,%xmm9 903 vpxor %xmm1,%xmm4,%xmm4 904 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 905 vmovdqu 128-64(%rsi),%xmm7 906 vpxor %xmm14,%xmm9,%xmm9 907 vpxor %xmm2,%xmm5,%xmm5 908 909 vmovdqu 32(%rdx),%xmm15 910 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 911 vpshufb %xmm13,%xmm15,%xmm15 912 vpxor %xmm3,%xmm0,%xmm0 913 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 914 vmovdqu 112-64(%rsi),%xmm6 915 vpunpckhqdq %xmm15,%xmm15,%xmm8 916 vpxor %xmm4,%xmm1,%xmm1 917 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 918 vpxor %xmm15,%xmm8,%xmm8 919 vpxor %xmm5,%xmm2,%xmm2 920 vxorps %xmm12,%xmm10,%xmm10 921 922 vmovdqu 16(%rdx),%xmm14 923 vpalignr $8,%xmm10,%xmm10,%xmm12 924 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 925 vpshufb %xmm13,%xmm14,%xmm14 926 vpxor %xmm0,%xmm3,%xmm3 927 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 928 vmovdqu 144-64(%rsi),%xmm6 929 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 930 vxorps %xmm11,%xmm12,%xmm12 931 vpunpckhqdq %xmm14,%xmm14,%xmm9 932 vpxor %xmm1,%xmm4,%xmm4 933 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 934 vmovdqu 176-64(%rsi),%xmm7 935 vpxor %xmm14,%xmm9,%xmm9 936 vpxor %xmm2,%xmm5,%xmm5 937 938 vmovdqu (%rdx),%xmm15 939 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 940 vpshufb %xmm13,%xmm15,%xmm15 941 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 942 vmovdqu 160-64(%rsi),%xmm6 943 vpxor %xmm12,%xmm15,%xmm15 944 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 945 vpxor %xmm10,%xmm15,%xmm15 946 947 leaq 128(%rdx),%rdx 948 subq $0x80,%rcx 949 jnc .Loop8x_avx 950 951 addq $0x80,%rcx 952 jmp .Ltail_no_xor_avx 953 954.align 32 955.Lshort_avx: 956 vmovdqu -16(%rdx,%rcx,1),%xmm14 957 leaq (%rdx,%rcx,1),%rdx 958 vmovdqu 0-64(%rsi),%xmm6 959 vmovdqu 32-64(%rsi),%xmm7 960 vpshufb %xmm13,%xmm14,%xmm15 961 962 vmovdqa %xmm0,%xmm3 963 vmovdqa %xmm1,%xmm4 964 vmovdqa %xmm2,%xmm5 965 subq $0x10,%rcx 966 jz .Ltail_avx 967 968 vpunpckhqdq %xmm15,%xmm15,%xmm8 969 vpxor %xmm0,%xmm3,%xmm3 970 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 971 vpxor %xmm15,%xmm8,%xmm8 972 vmovdqu -32(%rdx),%xmm14 973 vpxor %xmm1,%xmm4,%xmm4 974 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 975 vmovdqu 16-64(%rsi),%xmm6 976 vpshufb %xmm13,%xmm14,%xmm15 977 vpxor %xmm2,%xmm5,%xmm5 978 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 979 vpsrldq $8,%xmm7,%xmm7 980 subq $0x10,%rcx 981 jz .Ltail_avx 982 983 vpunpckhqdq %xmm15,%xmm15,%xmm8 984 vpxor %xmm0,%xmm3,%xmm3 985 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 986 vpxor %xmm15,%xmm8,%xmm8 987 vmovdqu -48(%rdx),%xmm14 988 vpxor %xmm1,%xmm4,%xmm4 989 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 990 vmovdqu 48-64(%rsi),%xmm6 991 vpshufb %xmm13,%xmm14,%xmm15 992 vpxor %xmm2,%xmm5,%xmm5 993 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 994 vmovdqu 80-64(%rsi),%xmm7 995 subq $0x10,%rcx 996 jz .Ltail_avx 997 998 vpunpckhqdq %xmm15,%xmm15,%xmm8 999 vpxor %xmm0,%xmm3,%xmm3 1000 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1001 vpxor %xmm15,%xmm8,%xmm8 1002 vmovdqu -64(%rdx),%xmm14 1003 vpxor %xmm1,%xmm4,%xmm4 1004 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1005 vmovdqu 64-64(%rsi),%xmm6 1006 vpshufb %xmm13,%xmm14,%xmm15 1007 vpxor %xmm2,%xmm5,%xmm5 1008 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1009 vpsrldq $8,%xmm7,%xmm7 1010 subq $0x10,%rcx 1011 jz .Ltail_avx 1012 1013 vpunpckhqdq %xmm15,%xmm15,%xmm8 1014 vpxor %xmm0,%xmm3,%xmm3 1015 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1016 vpxor %xmm15,%xmm8,%xmm8 1017 vmovdqu -80(%rdx),%xmm14 1018 vpxor %xmm1,%xmm4,%xmm4 1019 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1020 vmovdqu 96-64(%rsi),%xmm6 1021 vpshufb %xmm13,%xmm14,%xmm15 1022 vpxor %xmm2,%xmm5,%xmm5 1023 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1024 vmovdqu 128-64(%rsi),%xmm7 1025 subq $0x10,%rcx 1026 jz .Ltail_avx 1027 1028 vpunpckhqdq %xmm15,%xmm15,%xmm8 1029 vpxor %xmm0,%xmm3,%xmm3 1030 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1031 vpxor %xmm15,%xmm8,%xmm8 1032 vmovdqu -96(%rdx),%xmm14 1033 vpxor %xmm1,%xmm4,%xmm4 1034 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1035 vmovdqu 112-64(%rsi),%xmm6 1036 vpshufb %xmm13,%xmm14,%xmm15 1037 vpxor %xmm2,%xmm5,%xmm5 1038 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1039 vpsrldq $8,%xmm7,%xmm7 1040 subq $0x10,%rcx 1041 jz .Ltail_avx 1042 1043 vpunpckhqdq %xmm15,%xmm15,%xmm8 1044 vpxor %xmm0,%xmm3,%xmm3 1045 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1046 vpxor %xmm15,%xmm8,%xmm8 1047 vmovdqu -112(%rdx),%xmm14 1048 vpxor %xmm1,%xmm4,%xmm4 1049 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1050 vmovdqu 144-64(%rsi),%xmm6 1051 vpshufb %xmm13,%xmm14,%xmm15 1052 vpxor %xmm2,%xmm5,%xmm5 1053 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1054 vmovq 184-64(%rsi),%xmm7 1055 subq $0x10,%rcx 1056 jmp .Ltail_avx 1057 1058.align 32 1059.Ltail_avx: 1060 vpxor %xmm10,%xmm15,%xmm15 1061.Ltail_no_xor_avx: 1062 vpunpckhqdq %xmm15,%xmm15,%xmm8 1063 vpxor %xmm0,%xmm3,%xmm3 1064 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1065 vpxor %xmm15,%xmm8,%xmm8 1066 vpxor %xmm1,%xmm4,%xmm4 1067 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1068 vpxor %xmm2,%xmm5,%xmm5 1069 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1070 1071 vmovdqu (%r10),%xmm12 1072 1073 vpxor %xmm0,%xmm3,%xmm10 1074 vpxor %xmm1,%xmm4,%xmm11 1075 vpxor %xmm2,%xmm5,%xmm5 1076 1077 vpxor %xmm10,%xmm5,%xmm5 1078 vpxor %xmm11,%xmm5,%xmm5 1079 vpslldq $8,%xmm5,%xmm9 1080 vpsrldq $8,%xmm5,%xmm5 1081 vpxor %xmm9,%xmm10,%xmm10 1082 vpxor %xmm5,%xmm11,%xmm11 1083 1084 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1085 vpalignr $8,%xmm10,%xmm10,%xmm10 1086 vpxor %xmm9,%xmm10,%xmm10 1087 1088 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1089 vpalignr $8,%xmm10,%xmm10,%xmm10 1090 vpxor %xmm11,%xmm10,%xmm10 1091 vpxor %xmm9,%xmm10,%xmm10 1092 1093 cmpq $0,%rcx 1094 jne .Lshort_avx 1095 1096 vpshufb %xmm13,%xmm10,%xmm10 1097 vmovdqu %xmm10,(%rdi) 1098 vzeroupper 1099 .byte 0xf3,0xc3 1100.cfi_endproc 1101.size GFp_gcm_ghash_avx,.-GFp_gcm_ghash_avx 1102.align 64 1103.Lbswap_mask: 1104.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1105.L0x1c2_polynomial: 1106.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1107.L7_mask: 1108.long 7,0,7,0 1109 1110.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1111.align 64 1112#endif 1113.section .note.GNU-stack,"",@progbits 1114