1.text 2 3 4.globl gcm_gmult_4bit 5.type gcm_gmult_4bit,@function 6.align 16 7gcm_gmult_4bit: 8.cfi_startproc 9 pushq %rbx 10.cfi_adjust_cfa_offset 8 11.cfi_offset %rbx,-16 12 pushq %rbp 13.cfi_adjust_cfa_offset 8 14.cfi_offset %rbp,-24 15 pushq %r12 16.cfi_adjust_cfa_offset 8 17.cfi_offset %r12,-32 18 pushq %r13 19.cfi_adjust_cfa_offset 8 20.cfi_offset %r13,-40 21 pushq %r14 22.cfi_adjust_cfa_offset 8 23.cfi_offset %r14,-48 24 pushq %r15 25.cfi_adjust_cfa_offset 8 26.cfi_offset %r15,-56 27 subq $280,%rsp 28.cfi_adjust_cfa_offset 280 29.Lgmult_prologue: 30 31 movzbq 15(%rdi),%r8 32 leaq .Lrem_4bit(%rip),%r11 33 xorq %rax,%rax 34 xorq %rbx,%rbx 35 movb %r8b,%al 36 movb %r8b,%bl 37 shlb $4,%al 38 movq $14,%rcx 39 movq 8(%rsi,%rax,1),%r8 40 movq (%rsi,%rax,1),%r9 41 andb $0xf0,%bl 42 movq %r8,%rdx 43 jmp .Loop1 44 45.align 16 46.Loop1: 47 shrq $4,%r8 48 andq $0xf,%rdx 49 movq %r9,%r10 50 movb (%rdi,%rcx,1),%al 51 shrq $4,%r9 52 xorq 8(%rsi,%rbx,1),%r8 53 shlq $60,%r10 54 xorq (%rsi,%rbx,1),%r9 55 movb %al,%bl 56 xorq (%r11,%rdx,8),%r9 57 movq %r8,%rdx 58 shlb $4,%al 59 xorq %r10,%r8 60 decq %rcx 61 js .Lbreak1 62 63 shrq $4,%r8 64 andq $0xf,%rdx 65 movq %r9,%r10 66 shrq $4,%r9 67 xorq 8(%rsi,%rax,1),%r8 68 shlq $60,%r10 69 xorq (%rsi,%rax,1),%r9 70 andb $0xf0,%bl 71 xorq (%r11,%rdx,8),%r9 72 movq %r8,%rdx 73 xorq %r10,%r8 74 jmp .Loop1 75 76.align 16 77.Lbreak1: 78 shrq $4,%r8 79 andq $0xf,%rdx 80 movq %r9,%r10 81 shrq $4,%r9 82 xorq 8(%rsi,%rax,1),%r8 83 shlq $60,%r10 84 xorq (%rsi,%rax,1),%r9 85 andb $0xf0,%bl 86 xorq (%r11,%rdx,8),%r9 87 movq %r8,%rdx 88 xorq %r10,%r8 89 90 shrq $4,%r8 91 andq $0xf,%rdx 92 movq %r9,%r10 93 shrq $4,%r9 94 xorq 8(%rsi,%rbx,1),%r8 95 shlq $60,%r10 96 xorq (%rsi,%rbx,1),%r9 97 xorq %r10,%r8 98 xorq (%r11,%rdx,8),%r9 99 100 bswapq %r8 101 bswapq %r9 102 movq %r8,8(%rdi) 103 movq %r9,(%rdi) 104 105 leaq 280+48(%rsp),%rsi 106.cfi_def_cfa %rsi,8 107 movq -8(%rsi),%rbx 108.cfi_restore %rbx 109 leaq (%rsi),%rsp 110.cfi_def_cfa_register %rsp 111.Lgmult_epilogue: 112 .byte 0xf3,0xc3 113.cfi_endproc 114.size gcm_gmult_4bit,.-gcm_gmult_4bit 115.globl gcm_ghash_4bit 116.type gcm_ghash_4bit,@function 117.align 16 118gcm_ghash_4bit: 119.cfi_startproc 120 pushq %rbx 121.cfi_adjust_cfa_offset 8 122.cfi_offset %rbx,-16 123 pushq %rbp 124.cfi_adjust_cfa_offset 8 125.cfi_offset %rbp,-24 126 pushq %r12 127.cfi_adjust_cfa_offset 8 128.cfi_offset %r12,-32 129 pushq %r13 130.cfi_adjust_cfa_offset 8 131.cfi_offset %r13,-40 132 pushq %r14 133.cfi_adjust_cfa_offset 8 134.cfi_offset %r14,-48 135 pushq %r15 136.cfi_adjust_cfa_offset 8 137.cfi_offset %r15,-56 138 subq $280,%rsp 139.cfi_adjust_cfa_offset 280 140.Lghash_prologue: 141 movq %rdx,%r14 142 movq %rcx,%r15 143 subq $-128,%rsi 144 leaq 16+128(%rsp),%rbp 145 xorl %edx,%edx 146 movq 0+0-128(%rsi),%r8 147 movq 0+8-128(%rsi),%rax 148 movb %al,%dl 149 shrq $4,%rax 150 movq %r8,%r10 151 shrq $4,%r8 152 movq 16+0-128(%rsi),%r9 153 shlb $4,%dl 154 movq 16+8-128(%rsi),%rbx 155 shlq $60,%r10 156 movb %dl,0(%rsp) 157 orq %r10,%rax 158 movb %bl,%dl 159 shrq $4,%rbx 160 movq %r9,%r10 161 shrq $4,%r9 162 movq %r8,0(%rbp) 163 movq 32+0-128(%rsi),%r8 164 shlb $4,%dl 165 movq %rax,0-128(%rbp) 166 movq 32+8-128(%rsi),%rax 167 shlq $60,%r10 168 movb %dl,1(%rsp) 169 orq %r10,%rbx 170 movb %al,%dl 171 shrq $4,%rax 172 movq %r8,%r10 173 shrq $4,%r8 174 movq %r9,8(%rbp) 175 movq 48+0-128(%rsi),%r9 176 shlb $4,%dl 177 movq %rbx,8-128(%rbp) 178 movq 48+8-128(%rsi),%rbx 179 shlq $60,%r10 180 movb %dl,2(%rsp) 181 orq %r10,%rax 182 movb %bl,%dl 183 shrq $4,%rbx 184 movq %r9,%r10 185 shrq $4,%r9 186 movq %r8,16(%rbp) 187 movq 64+0-128(%rsi),%r8 188 shlb $4,%dl 189 movq %rax,16-128(%rbp) 190 movq 64+8-128(%rsi),%rax 191 shlq $60,%r10 192 movb %dl,3(%rsp) 193 orq %r10,%rbx 194 movb %al,%dl 195 shrq $4,%rax 196 movq %r8,%r10 197 shrq $4,%r8 198 movq %r9,24(%rbp) 199 movq 80+0-128(%rsi),%r9 200 shlb $4,%dl 201 movq %rbx,24-128(%rbp) 202 movq 80+8-128(%rsi),%rbx 203 shlq $60,%r10 204 movb %dl,4(%rsp) 205 orq %r10,%rax 206 movb %bl,%dl 207 shrq $4,%rbx 208 movq %r9,%r10 209 shrq $4,%r9 210 movq %r8,32(%rbp) 211 movq 96+0-128(%rsi),%r8 212 shlb $4,%dl 213 movq %rax,32-128(%rbp) 214 movq 96+8-128(%rsi),%rax 215 shlq $60,%r10 216 movb %dl,5(%rsp) 217 orq %r10,%rbx 218 movb %al,%dl 219 shrq $4,%rax 220 movq %r8,%r10 221 shrq $4,%r8 222 movq %r9,40(%rbp) 223 movq 112+0-128(%rsi),%r9 224 shlb $4,%dl 225 movq %rbx,40-128(%rbp) 226 movq 112+8-128(%rsi),%rbx 227 shlq $60,%r10 228 movb %dl,6(%rsp) 229 orq %r10,%rax 230 movb %bl,%dl 231 shrq $4,%rbx 232 movq %r9,%r10 233 shrq $4,%r9 234 movq %r8,48(%rbp) 235 movq 128+0-128(%rsi),%r8 236 shlb $4,%dl 237 movq %rax,48-128(%rbp) 238 movq 128+8-128(%rsi),%rax 239 shlq $60,%r10 240 movb %dl,7(%rsp) 241 orq %r10,%rbx 242 movb %al,%dl 243 shrq $4,%rax 244 movq %r8,%r10 245 shrq $4,%r8 246 movq %r9,56(%rbp) 247 movq 144+0-128(%rsi),%r9 248 shlb $4,%dl 249 movq %rbx,56-128(%rbp) 250 movq 144+8-128(%rsi),%rbx 251 shlq $60,%r10 252 movb %dl,8(%rsp) 253 orq %r10,%rax 254 movb %bl,%dl 255 shrq $4,%rbx 256 movq %r9,%r10 257 shrq $4,%r9 258 movq %r8,64(%rbp) 259 movq 160+0-128(%rsi),%r8 260 shlb $4,%dl 261 movq %rax,64-128(%rbp) 262 movq 160+8-128(%rsi),%rax 263 shlq $60,%r10 264 movb %dl,9(%rsp) 265 orq %r10,%rbx 266 movb %al,%dl 267 shrq $4,%rax 268 movq %r8,%r10 269 shrq $4,%r8 270 movq %r9,72(%rbp) 271 movq 176+0-128(%rsi),%r9 272 shlb $4,%dl 273 movq %rbx,72-128(%rbp) 274 movq 176+8-128(%rsi),%rbx 275 shlq $60,%r10 276 movb %dl,10(%rsp) 277 orq %r10,%rax 278 movb %bl,%dl 279 shrq $4,%rbx 280 movq %r9,%r10 281 shrq $4,%r9 282 movq %r8,80(%rbp) 283 movq 192+0-128(%rsi),%r8 284 shlb $4,%dl 285 movq %rax,80-128(%rbp) 286 movq 192+8-128(%rsi),%rax 287 shlq $60,%r10 288 movb %dl,11(%rsp) 289 orq %r10,%rbx 290 movb %al,%dl 291 shrq $4,%rax 292 movq %r8,%r10 293 shrq $4,%r8 294 movq %r9,88(%rbp) 295 movq 208+0-128(%rsi),%r9 296 shlb $4,%dl 297 movq %rbx,88-128(%rbp) 298 movq 208+8-128(%rsi),%rbx 299 shlq $60,%r10 300 movb %dl,12(%rsp) 301 orq %r10,%rax 302 movb %bl,%dl 303 shrq $4,%rbx 304 movq %r9,%r10 305 shrq $4,%r9 306 movq %r8,96(%rbp) 307 movq 224+0-128(%rsi),%r8 308 shlb $4,%dl 309 movq %rax,96-128(%rbp) 310 movq 224+8-128(%rsi),%rax 311 shlq $60,%r10 312 movb %dl,13(%rsp) 313 orq %r10,%rbx 314 movb %al,%dl 315 shrq $4,%rax 316 movq %r8,%r10 317 shrq $4,%r8 318 movq %r9,104(%rbp) 319 movq 240+0-128(%rsi),%r9 320 shlb $4,%dl 321 movq %rbx,104-128(%rbp) 322 movq 240+8-128(%rsi),%rbx 323 shlq $60,%r10 324 movb %dl,14(%rsp) 325 orq %r10,%rax 326 movb %bl,%dl 327 shrq $4,%rbx 328 movq %r9,%r10 329 shrq $4,%r9 330 movq %r8,112(%rbp) 331 shlb $4,%dl 332 movq %rax,112-128(%rbp) 333 shlq $60,%r10 334 movb %dl,15(%rsp) 335 orq %r10,%rbx 336 movq %r9,120(%rbp) 337 movq %rbx,120-128(%rbp) 338 addq $-128,%rsi 339 movq 8(%rdi),%r8 340 movq 0(%rdi),%r9 341 addq %r14,%r15 342 leaq .Lrem_8bit(%rip),%r11 343 jmp .Louter_loop 344.align 16 345.Louter_loop: 346 xorq (%r14),%r9 347 movq 8(%r14),%rdx 348 leaq 16(%r14),%r14 349 xorq %r8,%rdx 350 movq %r9,(%rdi) 351 movq %rdx,8(%rdi) 352 shrq $32,%rdx 353 xorq %rax,%rax 354 roll $8,%edx 355 movb %dl,%al 356 movzbl %dl,%ebx 357 shlb $4,%al 358 shrl $4,%ebx 359 roll $8,%edx 360 movq 8(%rsi,%rax,1),%r8 361 movq (%rsi,%rax,1),%r9 362 movb %dl,%al 363 movzbl %dl,%ecx 364 shlb $4,%al 365 movzbq (%rsp,%rbx,1),%r12 366 shrl $4,%ecx 367 xorq %r8,%r12 368 movq %r9,%r10 369 shrq $8,%r8 370 movzbq %r12b,%r12 371 shrq $8,%r9 372 xorq -128(%rbp,%rbx,8),%r8 373 shlq $56,%r10 374 xorq (%rbp,%rbx,8),%r9 375 roll $8,%edx 376 xorq 8(%rsi,%rax,1),%r8 377 xorq (%rsi,%rax,1),%r9 378 movb %dl,%al 379 xorq %r10,%r8 380 movzwq (%r11,%r12,2),%r12 381 movzbl %dl,%ebx 382 shlb $4,%al 383 movzbq (%rsp,%rcx,1),%r13 384 shrl $4,%ebx 385 shlq $48,%r12 386 xorq %r8,%r13 387 movq %r9,%r10 388 xorq %r12,%r9 389 shrq $8,%r8 390 movzbq %r13b,%r13 391 shrq $8,%r9 392 xorq -128(%rbp,%rcx,8),%r8 393 shlq $56,%r10 394 xorq (%rbp,%rcx,8),%r9 395 roll $8,%edx 396 xorq 8(%rsi,%rax,1),%r8 397 xorq (%rsi,%rax,1),%r9 398 movb %dl,%al 399 xorq %r10,%r8 400 movzwq (%r11,%r13,2),%r13 401 movzbl %dl,%ecx 402 shlb $4,%al 403 movzbq (%rsp,%rbx,1),%r12 404 shrl $4,%ecx 405 shlq $48,%r13 406 xorq %r8,%r12 407 movq %r9,%r10 408 xorq %r13,%r9 409 shrq $8,%r8 410 movzbq %r12b,%r12 411 movl 8(%rdi),%edx 412 shrq $8,%r9 413 xorq -128(%rbp,%rbx,8),%r8 414 shlq $56,%r10 415 xorq (%rbp,%rbx,8),%r9 416 roll $8,%edx 417 xorq 8(%rsi,%rax,1),%r8 418 xorq (%rsi,%rax,1),%r9 419 movb %dl,%al 420 xorq %r10,%r8 421 movzwq (%r11,%r12,2),%r12 422 movzbl %dl,%ebx 423 shlb $4,%al 424 movzbq (%rsp,%rcx,1),%r13 425 shrl $4,%ebx 426 shlq $48,%r12 427 xorq %r8,%r13 428 movq %r9,%r10 429 xorq %r12,%r9 430 shrq $8,%r8 431 movzbq %r13b,%r13 432 shrq $8,%r9 433 xorq -128(%rbp,%rcx,8),%r8 434 shlq $56,%r10 435 xorq (%rbp,%rcx,8),%r9 436 roll $8,%edx 437 xorq 8(%rsi,%rax,1),%r8 438 xorq (%rsi,%rax,1),%r9 439 movb %dl,%al 440 xorq %r10,%r8 441 movzwq (%r11,%r13,2),%r13 442 movzbl %dl,%ecx 443 shlb $4,%al 444 movzbq (%rsp,%rbx,1),%r12 445 shrl $4,%ecx 446 shlq $48,%r13 447 xorq %r8,%r12 448 movq %r9,%r10 449 xorq %r13,%r9 450 shrq $8,%r8 451 movzbq %r12b,%r12 452 shrq $8,%r9 453 xorq -128(%rbp,%rbx,8),%r8 454 shlq $56,%r10 455 xorq (%rbp,%rbx,8),%r9 456 roll $8,%edx 457 xorq 8(%rsi,%rax,1),%r8 458 xorq (%rsi,%rax,1),%r9 459 movb %dl,%al 460 xorq %r10,%r8 461 movzwq (%r11,%r12,2),%r12 462 movzbl %dl,%ebx 463 shlb $4,%al 464 movzbq (%rsp,%rcx,1),%r13 465 shrl $4,%ebx 466 shlq $48,%r12 467 xorq %r8,%r13 468 movq %r9,%r10 469 xorq %r12,%r9 470 shrq $8,%r8 471 movzbq %r13b,%r13 472 shrq $8,%r9 473 xorq -128(%rbp,%rcx,8),%r8 474 shlq $56,%r10 475 xorq (%rbp,%rcx,8),%r9 476 roll $8,%edx 477 xorq 8(%rsi,%rax,1),%r8 478 xorq (%rsi,%rax,1),%r9 479 movb %dl,%al 480 xorq %r10,%r8 481 movzwq (%r11,%r13,2),%r13 482 movzbl %dl,%ecx 483 shlb $4,%al 484 movzbq (%rsp,%rbx,1),%r12 485 shrl $4,%ecx 486 shlq $48,%r13 487 xorq %r8,%r12 488 movq %r9,%r10 489 xorq %r13,%r9 490 shrq $8,%r8 491 movzbq %r12b,%r12 492 movl 4(%rdi),%edx 493 shrq $8,%r9 494 xorq -128(%rbp,%rbx,8),%r8 495 shlq $56,%r10 496 xorq (%rbp,%rbx,8),%r9 497 roll $8,%edx 498 xorq 8(%rsi,%rax,1),%r8 499 xorq (%rsi,%rax,1),%r9 500 movb %dl,%al 501 xorq %r10,%r8 502 movzwq (%r11,%r12,2),%r12 503 movzbl %dl,%ebx 504 shlb $4,%al 505 movzbq (%rsp,%rcx,1),%r13 506 shrl $4,%ebx 507 shlq $48,%r12 508 xorq %r8,%r13 509 movq %r9,%r10 510 xorq %r12,%r9 511 shrq $8,%r8 512 movzbq %r13b,%r13 513 shrq $8,%r9 514 xorq -128(%rbp,%rcx,8),%r8 515 shlq $56,%r10 516 xorq (%rbp,%rcx,8),%r9 517 roll $8,%edx 518 xorq 8(%rsi,%rax,1),%r8 519 xorq (%rsi,%rax,1),%r9 520 movb %dl,%al 521 xorq %r10,%r8 522 movzwq (%r11,%r13,2),%r13 523 movzbl %dl,%ecx 524 shlb $4,%al 525 movzbq (%rsp,%rbx,1),%r12 526 shrl $4,%ecx 527 shlq $48,%r13 528 xorq %r8,%r12 529 movq %r9,%r10 530 xorq %r13,%r9 531 shrq $8,%r8 532 movzbq %r12b,%r12 533 shrq $8,%r9 534 xorq -128(%rbp,%rbx,8),%r8 535 shlq $56,%r10 536 xorq (%rbp,%rbx,8),%r9 537 roll $8,%edx 538 xorq 8(%rsi,%rax,1),%r8 539 xorq (%rsi,%rax,1),%r9 540 movb %dl,%al 541 xorq %r10,%r8 542 movzwq (%r11,%r12,2),%r12 543 movzbl %dl,%ebx 544 shlb $4,%al 545 movzbq (%rsp,%rcx,1),%r13 546 shrl $4,%ebx 547 shlq $48,%r12 548 xorq %r8,%r13 549 movq %r9,%r10 550 xorq %r12,%r9 551 shrq $8,%r8 552 movzbq %r13b,%r13 553 shrq $8,%r9 554 xorq -128(%rbp,%rcx,8),%r8 555 shlq $56,%r10 556 xorq (%rbp,%rcx,8),%r9 557 roll $8,%edx 558 xorq 8(%rsi,%rax,1),%r8 559 xorq (%rsi,%rax,1),%r9 560 movb %dl,%al 561 xorq %r10,%r8 562 movzwq (%r11,%r13,2),%r13 563 movzbl %dl,%ecx 564 shlb $4,%al 565 movzbq (%rsp,%rbx,1),%r12 566 shrl $4,%ecx 567 shlq $48,%r13 568 xorq %r8,%r12 569 movq %r9,%r10 570 xorq %r13,%r9 571 shrq $8,%r8 572 movzbq %r12b,%r12 573 movl 0(%rdi),%edx 574 shrq $8,%r9 575 xorq -128(%rbp,%rbx,8),%r8 576 shlq $56,%r10 577 xorq (%rbp,%rbx,8),%r9 578 roll $8,%edx 579 xorq 8(%rsi,%rax,1),%r8 580 xorq (%rsi,%rax,1),%r9 581 movb %dl,%al 582 xorq %r10,%r8 583 movzwq (%r11,%r12,2),%r12 584 movzbl %dl,%ebx 585 shlb $4,%al 586 movzbq (%rsp,%rcx,1),%r13 587 shrl $4,%ebx 588 shlq $48,%r12 589 xorq %r8,%r13 590 movq %r9,%r10 591 xorq %r12,%r9 592 shrq $8,%r8 593 movzbq %r13b,%r13 594 shrq $8,%r9 595 xorq -128(%rbp,%rcx,8),%r8 596 shlq $56,%r10 597 xorq (%rbp,%rcx,8),%r9 598 roll $8,%edx 599 xorq 8(%rsi,%rax,1),%r8 600 xorq (%rsi,%rax,1),%r9 601 movb %dl,%al 602 xorq %r10,%r8 603 movzwq (%r11,%r13,2),%r13 604 movzbl %dl,%ecx 605 shlb $4,%al 606 movzbq (%rsp,%rbx,1),%r12 607 shrl $4,%ecx 608 shlq $48,%r13 609 xorq %r8,%r12 610 movq %r9,%r10 611 xorq %r13,%r9 612 shrq $8,%r8 613 movzbq %r12b,%r12 614 shrq $8,%r9 615 xorq -128(%rbp,%rbx,8),%r8 616 shlq $56,%r10 617 xorq (%rbp,%rbx,8),%r9 618 roll $8,%edx 619 xorq 8(%rsi,%rax,1),%r8 620 xorq (%rsi,%rax,1),%r9 621 movb %dl,%al 622 xorq %r10,%r8 623 movzwq (%r11,%r12,2),%r12 624 movzbl %dl,%ebx 625 shlb $4,%al 626 movzbq (%rsp,%rcx,1),%r13 627 shrl $4,%ebx 628 shlq $48,%r12 629 xorq %r8,%r13 630 movq %r9,%r10 631 xorq %r12,%r9 632 shrq $8,%r8 633 movzbq %r13b,%r13 634 shrq $8,%r9 635 xorq -128(%rbp,%rcx,8),%r8 636 shlq $56,%r10 637 xorq (%rbp,%rcx,8),%r9 638 roll $8,%edx 639 xorq 8(%rsi,%rax,1),%r8 640 xorq (%rsi,%rax,1),%r9 641 movb %dl,%al 642 xorq %r10,%r8 643 movzwq (%r11,%r13,2),%r13 644 movzbl %dl,%ecx 645 shlb $4,%al 646 movzbq (%rsp,%rbx,1),%r12 647 andl $240,%ecx 648 shlq $48,%r13 649 xorq %r8,%r12 650 movq %r9,%r10 651 xorq %r13,%r9 652 shrq $8,%r8 653 movzbq %r12b,%r12 654 movl -4(%rdi),%edx 655 shrq $8,%r9 656 xorq -128(%rbp,%rbx,8),%r8 657 shlq $56,%r10 658 xorq (%rbp,%rbx,8),%r9 659 movzwq (%r11,%r12,2),%r12 660 xorq 8(%rsi,%rax,1),%r8 661 xorq (%rsi,%rax,1),%r9 662 shlq $48,%r12 663 xorq %r10,%r8 664 xorq %r12,%r9 665 movzbq %r8b,%r13 666 shrq $4,%r8 667 movq %r9,%r10 668 shlb $4,%r13b 669 shrq $4,%r9 670 xorq 8(%rsi,%rcx,1),%r8 671 movzwq (%r11,%r13,2),%r13 672 shlq $60,%r10 673 xorq (%rsi,%rcx,1),%r9 674 xorq %r10,%r8 675 shlq $48,%r13 676 bswapq %r8 677 xorq %r13,%r9 678 bswapq %r9 679 cmpq %r15,%r14 680 jb .Louter_loop 681 movq %r8,8(%rdi) 682 movq %r9,(%rdi) 683 684 leaq 280+48(%rsp),%rsi 685.cfi_def_cfa %rsi,8 686 movq -48(%rsi),%r15 687.cfi_restore %r15 688 movq -40(%rsi),%r14 689.cfi_restore %r14 690 movq -32(%rsi),%r13 691.cfi_restore %r13 692 movq -24(%rsi),%r12 693.cfi_restore %r12 694 movq -16(%rsi),%rbp 695.cfi_restore %rbp 696 movq -8(%rsi),%rbx 697.cfi_restore %rbx 698 leaq 0(%rsi),%rsp 699.cfi_def_cfa_register %rsp 700.Lghash_epilogue: 701 .byte 0xf3,0xc3 702.cfi_endproc 703.size gcm_ghash_4bit,.-gcm_ghash_4bit 704.globl gcm_init_clmul 705.type gcm_init_clmul,@function 706.align 16 707gcm_init_clmul: 708.cfi_startproc 709.L_init_clmul: 710 movdqu (%rsi),%xmm2 711 pshufd $78,%xmm2,%xmm2 712 713 714 pshufd $255,%xmm2,%xmm4 715 movdqa %xmm2,%xmm3 716 psllq $1,%xmm2 717 pxor %xmm5,%xmm5 718 psrlq $63,%xmm3 719 pcmpgtd %xmm4,%xmm5 720 pslldq $8,%xmm3 721 por %xmm3,%xmm2 722 723 724 pand .L0x1c2_polynomial(%rip),%xmm5 725 pxor %xmm5,%xmm2 726 727 728 pshufd $78,%xmm2,%xmm6 729 movdqa %xmm2,%xmm0 730 pxor %xmm2,%xmm6 731 movdqa %xmm0,%xmm1 732 pshufd $78,%xmm0,%xmm3 733 pxor %xmm0,%xmm3 734.byte 102,15,58,68,194,0 735.byte 102,15,58,68,202,17 736.byte 102,15,58,68,222,0 737 pxor %xmm0,%xmm3 738 pxor %xmm1,%xmm3 739 740 movdqa %xmm3,%xmm4 741 psrldq $8,%xmm3 742 pslldq $8,%xmm4 743 pxor %xmm3,%xmm1 744 pxor %xmm4,%xmm0 745 746 movdqa %xmm0,%xmm4 747 movdqa %xmm0,%xmm3 748 psllq $5,%xmm0 749 pxor %xmm0,%xmm3 750 psllq $1,%xmm0 751 pxor %xmm3,%xmm0 752 psllq $57,%xmm0 753 movdqa %xmm0,%xmm3 754 pslldq $8,%xmm0 755 psrldq $8,%xmm3 756 pxor %xmm4,%xmm0 757 pxor %xmm3,%xmm1 758 759 760 movdqa %xmm0,%xmm4 761 psrlq $1,%xmm0 762 pxor %xmm4,%xmm1 763 pxor %xmm0,%xmm4 764 psrlq $5,%xmm0 765 pxor %xmm4,%xmm0 766 psrlq $1,%xmm0 767 pxor %xmm1,%xmm0 768 pshufd $78,%xmm2,%xmm3 769 pshufd $78,%xmm0,%xmm4 770 pxor %xmm2,%xmm3 771 movdqu %xmm2,0(%rdi) 772 pxor %xmm0,%xmm4 773 movdqu %xmm0,16(%rdi) 774.byte 102,15,58,15,227,8 775 movdqu %xmm4,32(%rdi) 776 movdqa %xmm0,%xmm1 777 pshufd $78,%xmm0,%xmm3 778 pxor %xmm0,%xmm3 779.byte 102,15,58,68,194,0 780.byte 102,15,58,68,202,17 781.byte 102,15,58,68,222,0 782 pxor %xmm0,%xmm3 783 pxor %xmm1,%xmm3 784 785 movdqa %xmm3,%xmm4 786 psrldq $8,%xmm3 787 pslldq $8,%xmm4 788 pxor %xmm3,%xmm1 789 pxor %xmm4,%xmm0 790 791 movdqa %xmm0,%xmm4 792 movdqa %xmm0,%xmm3 793 psllq $5,%xmm0 794 pxor %xmm0,%xmm3 795 psllq $1,%xmm0 796 pxor %xmm3,%xmm0 797 psllq $57,%xmm0 798 movdqa %xmm0,%xmm3 799 pslldq $8,%xmm0 800 psrldq $8,%xmm3 801 pxor %xmm4,%xmm0 802 pxor %xmm3,%xmm1 803 804 805 movdqa %xmm0,%xmm4 806 psrlq $1,%xmm0 807 pxor %xmm4,%xmm1 808 pxor %xmm0,%xmm4 809 psrlq $5,%xmm0 810 pxor %xmm4,%xmm0 811 psrlq $1,%xmm0 812 pxor %xmm1,%xmm0 813 movdqa %xmm0,%xmm5 814 movdqa %xmm0,%xmm1 815 pshufd $78,%xmm0,%xmm3 816 pxor %xmm0,%xmm3 817.byte 102,15,58,68,194,0 818.byte 102,15,58,68,202,17 819.byte 102,15,58,68,222,0 820 pxor %xmm0,%xmm3 821 pxor %xmm1,%xmm3 822 823 movdqa %xmm3,%xmm4 824 psrldq $8,%xmm3 825 pslldq $8,%xmm4 826 pxor %xmm3,%xmm1 827 pxor %xmm4,%xmm0 828 829 movdqa %xmm0,%xmm4 830 movdqa %xmm0,%xmm3 831 psllq $5,%xmm0 832 pxor %xmm0,%xmm3 833 psllq $1,%xmm0 834 pxor %xmm3,%xmm0 835 psllq $57,%xmm0 836 movdqa %xmm0,%xmm3 837 pslldq $8,%xmm0 838 psrldq $8,%xmm3 839 pxor %xmm4,%xmm0 840 pxor %xmm3,%xmm1 841 842 843 movdqa %xmm0,%xmm4 844 psrlq $1,%xmm0 845 pxor %xmm4,%xmm1 846 pxor %xmm0,%xmm4 847 psrlq $5,%xmm0 848 pxor %xmm4,%xmm0 849 psrlq $1,%xmm0 850 pxor %xmm1,%xmm0 851 pshufd $78,%xmm5,%xmm3 852 pshufd $78,%xmm0,%xmm4 853 pxor %xmm5,%xmm3 854 movdqu %xmm5,48(%rdi) 855 pxor %xmm0,%xmm4 856 movdqu %xmm0,64(%rdi) 857.byte 102,15,58,15,227,8 858 movdqu %xmm4,80(%rdi) 859 .byte 0xf3,0xc3 860.cfi_endproc 861.size gcm_init_clmul,.-gcm_init_clmul 862.globl gcm_gmult_clmul 863.type gcm_gmult_clmul,@function 864.align 16 865gcm_gmult_clmul: 866.cfi_startproc 867.L_gmult_clmul: 868 movdqu (%rdi),%xmm0 869 movdqa .Lbswap_mask(%rip),%xmm5 870 movdqu (%rsi),%xmm2 871 movdqu 32(%rsi),%xmm4 872.byte 102,15,56,0,197 873 movdqa %xmm0,%xmm1 874 pshufd $78,%xmm0,%xmm3 875 pxor %xmm0,%xmm3 876.byte 102,15,58,68,194,0 877.byte 102,15,58,68,202,17 878.byte 102,15,58,68,220,0 879 pxor %xmm0,%xmm3 880 pxor %xmm1,%xmm3 881 882 movdqa %xmm3,%xmm4 883 psrldq $8,%xmm3 884 pslldq $8,%xmm4 885 pxor %xmm3,%xmm1 886 pxor %xmm4,%xmm0 887 888 movdqa %xmm0,%xmm4 889 movdqa %xmm0,%xmm3 890 psllq $5,%xmm0 891 pxor %xmm0,%xmm3 892 psllq $1,%xmm0 893 pxor %xmm3,%xmm0 894 psllq $57,%xmm0 895 movdqa %xmm0,%xmm3 896 pslldq $8,%xmm0 897 psrldq $8,%xmm3 898 pxor %xmm4,%xmm0 899 pxor %xmm3,%xmm1 900 901 902 movdqa %xmm0,%xmm4 903 psrlq $1,%xmm0 904 pxor %xmm4,%xmm1 905 pxor %xmm0,%xmm4 906 psrlq $5,%xmm0 907 pxor %xmm4,%xmm0 908 psrlq $1,%xmm0 909 pxor %xmm1,%xmm0 910.byte 102,15,56,0,197 911 movdqu %xmm0,(%rdi) 912 .byte 0xf3,0xc3 913.cfi_endproc 914.size gcm_gmult_clmul,.-gcm_gmult_clmul 915.globl gcm_ghash_clmul 916.type gcm_ghash_clmul,@function 917.align 32 918gcm_ghash_clmul: 919.cfi_startproc 920.L_ghash_clmul: 921 movdqa .Lbswap_mask(%rip),%xmm10 922 923 movdqu (%rdi),%xmm0 924 movdqu (%rsi),%xmm2 925 movdqu 32(%rsi),%xmm7 926.byte 102,65,15,56,0,194 927 928 subq $0x10,%rcx 929 jz .Lodd_tail 930 931 movdqu 16(%rsi),%xmm6 932 movl OPENSSL_ia32cap_P+4(%rip),%eax 933 cmpq $0x30,%rcx 934 jb .Lskip4x 935 936 andl $71303168,%eax 937 cmpl $4194304,%eax 938 je .Lskip4x 939 940 subq $0x30,%rcx 941 movq $0xA040608020C0E000,%rax 942 movdqu 48(%rsi),%xmm14 943 movdqu 64(%rsi),%xmm15 944 945 946 947 948 movdqu 48(%rdx),%xmm3 949 movdqu 32(%rdx),%xmm11 950.byte 102,65,15,56,0,218 951.byte 102,69,15,56,0,218 952 movdqa %xmm3,%xmm5 953 pshufd $78,%xmm3,%xmm4 954 pxor %xmm3,%xmm4 955.byte 102,15,58,68,218,0 956.byte 102,15,58,68,234,17 957.byte 102,15,58,68,231,0 958 959 movdqa %xmm11,%xmm13 960 pshufd $78,%xmm11,%xmm12 961 pxor %xmm11,%xmm12 962.byte 102,68,15,58,68,222,0 963.byte 102,68,15,58,68,238,17 964.byte 102,68,15,58,68,231,16 965 xorps %xmm11,%xmm3 966 xorps %xmm13,%xmm5 967 movups 80(%rsi),%xmm7 968 xorps %xmm12,%xmm4 969 970 movdqu 16(%rdx),%xmm11 971 movdqu 0(%rdx),%xmm8 972.byte 102,69,15,56,0,218 973.byte 102,69,15,56,0,194 974 movdqa %xmm11,%xmm13 975 pshufd $78,%xmm11,%xmm12 976 pxor %xmm8,%xmm0 977 pxor %xmm11,%xmm12 978.byte 102,69,15,58,68,222,0 979 movdqa %xmm0,%xmm1 980 pshufd $78,%xmm0,%xmm8 981 pxor %xmm0,%xmm8 982.byte 102,69,15,58,68,238,17 983.byte 102,68,15,58,68,231,0 984 xorps %xmm11,%xmm3 985 xorps %xmm13,%xmm5 986 987 leaq 64(%rdx),%rdx 988 subq $0x40,%rcx 989 jc .Ltail4x 990 991 jmp .Lmod4_loop 992.align 32 993.Lmod4_loop: 994.byte 102,65,15,58,68,199,0 995 xorps %xmm12,%xmm4 996 movdqu 48(%rdx),%xmm11 997.byte 102,69,15,56,0,218 998.byte 102,65,15,58,68,207,17 999 xorps %xmm3,%xmm0 1000 movdqu 32(%rdx),%xmm3 1001 movdqa %xmm11,%xmm13 1002.byte 102,68,15,58,68,199,16 1003 pshufd $78,%xmm11,%xmm12 1004 xorps %xmm5,%xmm1 1005 pxor %xmm11,%xmm12 1006.byte 102,65,15,56,0,218 1007 movups 32(%rsi),%xmm7 1008 xorps %xmm4,%xmm8 1009.byte 102,68,15,58,68,218,0 1010 pshufd $78,%xmm3,%xmm4 1011 1012 pxor %xmm0,%xmm8 1013 movdqa %xmm3,%xmm5 1014 pxor %xmm1,%xmm8 1015 pxor %xmm3,%xmm4 1016 movdqa %xmm8,%xmm9 1017.byte 102,68,15,58,68,234,17 1018 pslldq $8,%xmm8 1019 psrldq $8,%xmm9 1020 pxor %xmm8,%xmm0 1021 movdqa .L7_mask(%rip),%xmm8 1022 pxor %xmm9,%xmm1 1023.byte 102,76,15,110,200 1024 1025 pand %xmm0,%xmm8 1026.byte 102,69,15,56,0,200 1027 pxor %xmm0,%xmm9 1028.byte 102,68,15,58,68,231,0 1029 psllq $57,%xmm9 1030 movdqa %xmm9,%xmm8 1031 pslldq $8,%xmm9 1032.byte 102,15,58,68,222,0 1033 psrldq $8,%xmm8 1034 pxor %xmm9,%xmm0 1035 pxor %xmm8,%xmm1 1036 movdqu 0(%rdx),%xmm8 1037 1038 movdqa %xmm0,%xmm9 1039 psrlq $1,%xmm0 1040.byte 102,15,58,68,238,17 1041 xorps %xmm11,%xmm3 1042 movdqu 16(%rdx),%xmm11 1043.byte 102,69,15,56,0,218 1044.byte 102,15,58,68,231,16 1045 xorps %xmm13,%xmm5 1046 movups 80(%rsi),%xmm7 1047.byte 102,69,15,56,0,194 1048 pxor %xmm9,%xmm1 1049 pxor %xmm0,%xmm9 1050 psrlq $5,%xmm0 1051 1052 movdqa %xmm11,%xmm13 1053 pxor %xmm12,%xmm4 1054 pshufd $78,%xmm11,%xmm12 1055 pxor %xmm9,%xmm0 1056 pxor %xmm8,%xmm1 1057 pxor %xmm11,%xmm12 1058.byte 102,69,15,58,68,222,0 1059 psrlq $1,%xmm0 1060 pxor %xmm1,%xmm0 1061 movdqa %xmm0,%xmm1 1062.byte 102,69,15,58,68,238,17 1063 xorps %xmm11,%xmm3 1064 pshufd $78,%xmm0,%xmm8 1065 pxor %xmm0,%xmm8 1066 1067.byte 102,68,15,58,68,231,0 1068 xorps %xmm13,%xmm5 1069 1070 leaq 64(%rdx),%rdx 1071 subq $0x40,%rcx 1072 jnc .Lmod4_loop 1073 1074.Ltail4x: 1075.byte 102,65,15,58,68,199,0 1076.byte 102,65,15,58,68,207,17 1077.byte 102,68,15,58,68,199,16 1078 xorps %xmm12,%xmm4 1079 xorps %xmm3,%xmm0 1080 xorps %xmm5,%xmm1 1081 pxor %xmm0,%xmm1 1082 pxor %xmm4,%xmm8 1083 1084 pxor %xmm1,%xmm8 1085 pxor %xmm0,%xmm1 1086 1087 movdqa %xmm8,%xmm9 1088 psrldq $8,%xmm8 1089 pslldq $8,%xmm9 1090 pxor %xmm8,%xmm1 1091 pxor %xmm9,%xmm0 1092 1093 movdqa %xmm0,%xmm4 1094 movdqa %xmm0,%xmm3 1095 psllq $5,%xmm0 1096 pxor %xmm0,%xmm3 1097 psllq $1,%xmm0 1098 pxor %xmm3,%xmm0 1099 psllq $57,%xmm0 1100 movdqa %xmm0,%xmm3 1101 pslldq $8,%xmm0 1102 psrldq $8,%xmm3 1103 pxor %xmm4,%xmm0 1104 pxor %xmm3,%xmm1 1105 1106 1107 movdqa %xmm0,%xmm4 1108 psrlq $1,%xmm0 1109 pxor %xmm4,%xmm1 1110 pxor %xmm0,%xmm4 1111 psrlq $5,%xmm0 1112 pxor %xmm4,%xmm0 1113 psrlq $1,%xmm0 1114 pxor %xmm1,%xmm0 1115 addq $0x40,%rcx 1116 jz .Ldone 1117 movdqu 32(%rsi),%xmm7 1118 subq $0x10,%rcx 1119 jz .Lodd_tail 1120.Lskip4x: 1121 1122 1123 1124 1125 1126 movdqu (%rdx),%xmm8 1127 movdqu 16(%rdx),%xmm3 1128.byte 102,69,15,56,0,194 1129.byte 102,65,15,56,0,218 1130 pxor %xmm8,%xmm0 1131 1132 movdqa %xmm3,%xmm5 1133 pshufd $78,%xmm3,%xmm4 1134 pxor %xmm3,%xmm4 1135.byte 102,15,58,68,218,0 1136.byte 102,15,58,68,234,17 1137.byte 102,15,58,68,231,0 1138 1139 leaq 32(%rdx),%rdx 1140 nop 1141 subq $0x20,%rcx 1142 jbe .Leven_tail 1143 nop 1144 jmp .Lmod_loop 1145 1146.align 32 1147.Lmod_loop: 1148 movdqa %xmm0,%xmm1 1149 movdqa %xmm4,%xmm8 1150 pshufd $78,%xmm0,%xmm4 1151 pxor %xmm0,%xmm4 1152 1153.byte 102,15,58,68,198,0 1154.byte 102,15,58,68,206,17 1155.byte 102,15,58,68,231,16 1156 1157 pxor %xmm3,%xmm0 1158 pxor %xmm5,%xmm1 1159 movdqu (%rdx),%xmm9 1160 pxor %xmm0,%xmm8 1161.byte 102,69,15,56,0,202 1162 movdqu 16(%rdx),%xmm3 1163 1164 pxor %xmm1,%xmm8 1165 pxor %xmm9,%xmm1 1166 pxor %xmm8,%xmm4 1167.byte 102,65,15,56,0,218 1168 movdqa %xmm4,%xmm8 1169 psrldq $8,%xmm8 1170 pslldq $8,%xmm4 1171 pxor %xmm8,%xmm1 1172 pxor %xmm4,%xmm0 1173 1174 movdqa %xmm3,%xmm5 1175 1176 movdqa %xmm0,%xmm9 1177 movdqa %xmm0,%xmm8 1178 psllq $5,%xmm0 1179 pxor %xmm0,%xmm8 1180.byte 102,15,58,68,218,0 1181 psllq $1,%xmm0 1182 pxor %xmm8,%xmm0 1183 psllq $57,%xmm0 1184 movdqa %xmm0,%xmm8 1185 pslldq $8,%xmm0 1186 psrldq $8,%xmm8 1187 pxor %xmm9,%xmm0 1188 pshufd $78,%xmm5,%xmm4 1189 pxor %xmm8,%xmm1 1190 pxor %xmm5,%xmm4 1191 1192 movdqa %xmm0,%xmm9 1193 psrlq $1,%xmm0 1194.byte 102,15,58,68,234,17 1195 pxor %xmm9,%xmm1 1196 pxor %xmm0,%xmm9 1197 psrlq $5,%xmm0 1198 pxor %xmm9,%xmm0 1199 leaq 32(%rdx),%rdx 1200 psrlq $1,%xmm0 1201.byte 102,15,58,68,231,0 1202 pxor %xmm1,%xmm0 1203 1204 subq $0x20,%rcx 1205 ja .Lmod_loop 1206 1207.Leven_tail: 1208 movdqa %xmm0,%xmm1 1209 movdqa %xmm4,%xmm8 1210 pshufd $78,%xmm0,%xmm4 1211 pxor %xmm0,%xmm4 1212 1213.byte 102,15,58,68,198,0 1214.byte 102,15,58,68,206,17 1215.byte 102,15,58,68,231,16 1216 1217 pxor %xmm3,%xmm0 1218 pxor %xmm5,%xmm1 1219 pxor %xmm0,%xmm8 1220 pxor %xmm1,%xmm8 1221 pxor %xmm8,%xmm4 1222 movdqa %xmm4,%xmm8 1223 psrldq $8,%xmm8 1224 pslldq $8,%xmm4 1225 pxor %xmm8,%xmm1 1226 pxor %xmm4,%xmm0 1227 1228 movdqa %xmm0,%xmm4 1229 movdqa %xmm0,%xmm3 1230 psllq $5,%xmm0 1231 pxor %xmm0,%xmm3 1232 psllq $1,%xmm0 1233 pxor %xmm3,%xmm0 1234 psllq $57,%xmm0 1235 movdqa %xmm0,%xmm3 1236 pslldq $8,%xmm0 1237 psrldq $8,%xmm3 1238 pxor %xmm4,%xmm0 1239 pxor %xmm3,%xmm1 1240 1241 1242 movdqa %xmm0,%xmm4 1243 psrlq $1,%xmm0 1244 pxor %xmm4,%xmm1 1245 pxor %xmm0,%xmm4 1246 psrlq $5,%xmm0 1247 pxor %xmm4,%xmm0 1248 psrlq $1,%xmm0 1249 pxor %xmm1,%xmm0 1250 testq %rcx,%rcx 1251 jnz .Ldone 1252 1253.Lodd_tail: 1254 movdqu (%rdx),%xmm8 1255.byte 102,69,15,56,0,194 1256 pxor %xmm8,%xmm0 1257 movdqa %xmm0,%xmm1 1258 pshufd $78,%xmm0,%xmm3 1259 pxor %xmm0,%xmm3 1260.byte 102,15,58,68,194,0 1261.byte 102,15,58,68,202,17 1262.byte 102,15,58,68,223,0 1263 pxor %xmm0,%xmm3 1264 pxor %xmm1,%xmm3 1265 1266 movdqa %xmm3,%xmm4 1267 psrldq $8,%xmm3 1268 pslldq $8,%xmm4 1269 pxor %xmm3,%xmm1 1270 pxor %xmm4,%xmm0 1271 1272 movdqa %xmm0,%xmm4 1273 movdqa %xmm0,%xmm3 1274 psllq $5,%xmm0 1275 pxor %xmm0,%xmm3 1276 psllq $1,%xmm0 1277 pxor %xmm3,%xmm0 1278 psllq $57,%xmm0 1279 movdqa %xmm0,%xmm3 1280 pslldq $8,%xmm0 1281 psrldq $8,%xmm3 1282 pxor %xmm4,%xmm0 1283 pxor %xmm3,%xmm1 1284 1285 1286 movdqa %xmm0,%xmm4 1287 psrlq $1,%xmm0 1288 pxor %xmm4,%xmm1 1289 pxor %xmm0,%xmm4 1290 psrlq $5,%xmm0 1291 pxor %xmm4,%xmm0 1292 psrlq $1,%xmm0 1293 pxor %xmm1,%xmm0 1294.Ldone: 1295.byte 102,65,15,56,0,194 1296 movdqu %xmm0,(%rdi) 1297 .byte 0xf3,0xc3 1298.cfi_endproc 1299.size gcm_ghash_clmul,.-gcm_ghash_clmul 1300.globl gcm_init_avx 1301.type gcm_init_avx,@function 1302.align 32 1303gcm_init_avx: 1304.cfi_startproc 1305 vzeroupper 1306 1307 vmovdqu (%rsi),%xmm2 1308 vpshufd $78,%xmm2,%xmm2 1309 1310 1311 vpshufd $255,%xmm2,%xmm4 1312 vpsrlq $63,%xmm2,%xmm3 1313 vpsllq $1,%xmm2,%xmm2 1314 vpxor %xmm5,%xmm5,%xmm5 1315 vpcmpgtd %xmm4,%xmm5,%xmm5 1316 vpslldq $8,%xmm3,%xmm3 1317 vpor %xmm3,%xmm2,%xmm2 1318 1319 1320 vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 1321 vpxor %xmm5,%xmm2,%xmm2 1322 1323 vpunpckhqdq %xmm2,%xmm2,%xmm6 1324 vmovdqa %xmm2,%xmm0 1325 vpxor %xmm2,%xmm6,%xmm6 1326 movq $4,%r10 1327 jmp .Linit_start_avx 1328.align 32 1329.Linit_loop_avx: 1330 vpalignr $8,%xmm3,%xmm4,%xmm5 1331 vmovdqu %xmm5,-16(%rdi) 1332 vpunpckhqdq %xmm0,%xmm0,%xmm3 1333 vpxor %xmm0,%xmm3,%xmm3 1334 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1335 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1336 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1337 vpxor %xmm0,%xmm1,%xmm4 1338 vpxor %xmm4,%xmm3,%xmm3 1339 1340 vpslldq $8,%xmm3,%xmm4 1341 vpsrldq $8,%xmm3,%xmm3 1342 vpxor %xmm4,%xmm0,%xmm0 1343 vpxor %xmm3,%xmm1,%xmm1 1344 vpsllq $57,%xmm0,%xmm3 1345 vpsllq $62,%xmm0,%xmm4 1346 vpxor %xmm3,%xmm4,%xmm4 1347 vpsllq $63,%xmm0,%xmm3 1348 vpxor %xmm3,%xmm4,%xmm4 1349 vpslldq $8,%xmm4,%xmm3 1350 vpsrldq $8,%xmm4,%xmm4 1351 vpxor %xmm3,%xmm0,%xmm0 1352 vpxor %xmm4,%xmm1,%xmm1 1353 1354 vpsrlq $1,%xmm0,%xmm4 1355 vpxor %xmm0,%xmm1,%xmm1 1356 vpxor %xmm4,%xmm0,%xmm0 1357 vpsrlq $5,%xmm4,%xmm4 1358 vpxor %xmm4,%xmm0,%xmm0 1359 vpsrlq $1,%xmm0,%xmm0 1360 vpxor %xmm1,%xmm0,%xmm0 1361.Linit_start_avx: 1362 vmovdqa %xmm0,%xmm5 1363 vpunpckhqdq %xmm0,%xmm0,%xmm3 1364 vpxor %xmm0,%xmm3,%xmm3 1365 vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 1366 vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 1367 vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 1368 vpxor %xmm0,%xmm1,%xmm4 1369 vpxor %xmm4,%xmm3,%xmm3 1370 1371 vpslldq $8,%xmm3,%xmm4 1372 vpsrldq $8,%xmm3,%xmm3 1373 vpxor %xmm4,%xmm0,%xmm0 1374 vpxor %xmm3,%xmm1,%xmm1 1375 vpsllq $57,%xmm0,%xmm3 1376 vpsllq $62,%xmm0,%xmm4 1377 vpxor %xmm3,%xmm4,%xmm4 1378 vpsllq $63,%xmm0,%xmm3 1379 vpxor %xmm3,%xmm4,%xmm4 1380 vpslldq $8,%xmm4,%xmm3 1381 vpsrldq $8,%xmm4,%xmm4 1382 vpxor %xmm3,%xmm0,%xmm0 1383 vpxor %xmm4,%xmm1,%xmm1 1384 1385 vpsrlq $1,%xmm0,%xmm4 1386 vpxor %xmm0,%xmm1,%xmm1 1387 vpxor %xmm4,%xmm0,%xmm0 1388 vpsrlq $5,%xmm4,%xmm4 1389 vpxor %xmm4,%xmm0,%xmm0 1390 vpsrlq $1,%xmm0,%xmm0 1391 vpxor %xmm1,%xmm0,%xmm0 1392 vpshufd $78,%xmm5,%xmm3 1393 vpshufd $78,%xmm0,%xmm4 1394 vpxor %xmm5,%xmm3,%xmm3 1395 vmovdqu %xmm5,0(%rdi) 1396 vpxor %xmm0,%xmm4,%xmm4 1397 vmovdqu %xmm0,16(%rdi) 1398 leaq 48(%rdi),%rdi 1399 subq $1,%r10 1400 jnz .Linit_loop_avx 1401 1402 vpalignr $8,%xmm4,%xmm3,%xmm5 1403 vmovdqu %xmm5,-16(%rdi) 1404 1405 vzeroupper 1406 .byte 0xf3,0xc3 1407.cfi_endproc 1408.size gcm_init_avx,.-gcm_init_avx 1409.globl gcm_gmult_avx 1410.type gcm_gmult_avx,@function 1411.align 32 1412gcm_gmult_avx: 1413.cfi_startproc 1414 jmp .L_gmult_clmul 1415.cfi_endproc 1416.size gcm_gmult_avx,.-gcm_gmult_avx 1417.globl gcm_ghash_avx 1418.type gcm_ghash_avx,@function 1419.align 32 1420gcm_ghash_avx: 1421.cfi_startproc 1422 vzeroupper 1423 1424 vmovdqu (%rdi),%xmm10 1425 leaq .L0x1c2_polynomial(%rip),%r10 1426 leaq 64(%rsi),%rsi 1427 vmovdqu .Lbswap_mask(%rip),%xmm13 1428 vpshufb %xmm13,%xmm10,%xmm10 1429 cmpq $0x80,%rcx 1430 jb .Lshort_avx 1431 subq $0x80,%rcx 1432 1433 vmovdqu 112(%rdx),%xmm14 1434 vmovdqu 0-64(%rsi),%xmm6 1435 vpshufb %xmm13,%xmm14,%xmm14 1436 vmovdqu 32-64(%rsi),%xmm7 1437 1438 vpunpckhqdq %xmm14,%xmm14,%xmm9 1439 vmovdqu 96(%rdx),%xmm15 1440 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1441 vpxor %xmm14,%xmm9,%xmm9 1442 vpshufb %xmm13,%xmm15,%xmm15 1443 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1444 vmovdqu 16-64(%rsi),%xmm6 1445 vpunpckhqdq %xmm15,%xmm15,%xmm8 1446 vmovdqu 80(%rdx),%xmm14 1447 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1448 vpxor %xmm15,%xmm8,%xmm8 1449 1450 vpshufb %xmm13,%xmm14,%xmm14 1451 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1452 vpunpckhqdq %xmm14,%xmm14,%xmm9 1453 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1454 vmovdqu 48-64(%rsi),%xmm6 1455 vpxor %xmm14,%xmm9,%xmm9 1456 vmovdqu 64(%rdx),%xmm15 1457 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1458 vmovdqu 80-64(%rsi),%xmm7 1459 1460 vpshufb %xmm13,%xmm15,%xmm15 1461 vpxor %xmm0,%xmm3,%xmm3 1462 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1463 vpxor %xmm1,%xmm4,%xmm4 1464 vpunpckhqdq %xmm15,%xmm15,%xmm8 1465 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1466 vmovdqu 64-64(%rsi),%xmm6 1467 vpxor %xmm2,%xmm5,%xmm5 1468 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1469 vpxor %xmm15,%xmm8,%xmm8 1470 1471 vmovdqu 48(%rdx),%xmm14 1472 vpxor %xmm3,%xmm0,%xmm0 1473 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1474 vpxor %xmm4,%xmm1,%xmm1 1475 vpshufb %xmm13,%xmm14,%xmm14 1476 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1477 vmovdqu 96-64(%rsi),%xmm6 1478 vpxor %xmm5,%xmm2,%xmm2 1479 vpunpckhqdq %xmm14,%xmm14,%xmm9 1480 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1481 vmovdqu 128-64(%rsi),%xmm7 1482 vpxor %xmm14,%xmm9,%xmm9 1483 1484 vmovdqu 32(%rdx),%xmm15 1485 vpxor %xmm0,%xmm3,%xmm3 1486 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1487 vpxor %xmm1,%xmm4,%xmm4 1488 vpshufb %xmm13,%xmm15,%xmm15 1489 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1490 vmovdqu 112-64(%rsi),%xmm6 1491 vpxor %xmm2,%xmm5,%xmm5 1492 vpunpckhqdq %xmm15,%xmm15,%xmm8 1493 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1494 vpxor %xmm15,%xmm8,%xmm8 1495 1496 vmovdqu 16(%rdx),%xmm14 1497 vpxor %xmm3,%xmm0,%xmm0 1498 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1499 vpxor %xmm4,%xmm1,%xmm1 1500 vpshufb %xmm13,%xmm14,%xmm14 1501 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1502 vmovdqu 144-64(%rsi),%xmm6 1503 vpxor %xmm5,%xmm2,%xmm2 1504 vpunpckhqdq %xmm14,%xmm14,%xmm9 1505 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1506 vmovdqu 176-64(%rsi),%xmm7 1507 vpxor %xmm14,%xmm9,%xmm9 1508 1509 vmovdqu (%rdx),%xmm15 1510 vpxor %xmm0,%xmm3,%xmm3 1511 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1512 vpxor %xmm1,%xmm4,%xmm4 1513 vpshufb %xmm13,%xmm15,%xmm15 1514 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1515 vmovdqu 160-64(%rsi),%xmm6 1516 vpxor %xmm2,%xmm5,%xmm5 1517 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1518 1519 leaq 128(%rdx),%rdx 1520 cmpq $0x80,%rcx 1521 jb .Ltail_avx 1522 1523 vpxor %xmm10,%xmm15,%xmm15 1524 subq $0x80,%rcx 1525 jmp .Loop8x_avx 1526 1527.align 32 1528.Loop8x_avx: 1529 vpunpckhqdq %xmm15,%xmm15,%xmm8 1530 vmovdqu 112(%rdx),%xmm14 1531 vpxor %xmm0,%xmm3,%xmm3 1532 vpxor %xmm15,%xmm8,%xmm8 1533 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 1534 vpshufb %xmm13,%xmm14,%xmm14 1535 vpxor %xmm1,%xmm4,%xmm4 1536 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 1537 vmovdqu 0-64(%rsi),%xmm6 1538 vpunpckhqdq %xmm14,%xmm14,%xmm9 1539 vpxor %xmm2,%xmm5,%xmm5 1540 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 1541 vmovdqu 32-64(%rsi),%xmm7 1542 vpxor %xmm14,%xmm9,%xmm9 1543 1544 vmovdqu 96(%rdx),%xmm15 1545 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1546 vpxor %xmm3,%xmm10,%xmm10 1547 vpshufb %xmm13,%xmm15,%xmm15 1548 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1549 vxorps %xmm4,%xmm11,%xmm11 1550 vmovdqu 16-64(%rsi),%xmm6 1551 vpunpckhqdq %xmm15,%xmm15,%xmm8 1552 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1553 vpxor %xmm5,%xmm12,%xmm12 1554 vxorps %xmm15,%xmm8,%xmm8 1555 1556 vmovdqu 80(%rdx),%xmm14 1557 vpxor %xmm10,%xmm12,%xmm12 1558 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1559 vpxor %xmm11,%xmm12,%xmm12 1560 vpslldq $8,%xmm12,%xmm9 1561 vpxor %xmm0,%xmm3,%xmm3 1562 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1563 vpsrldq $8,%xmm12,%xmm12 1564 vpxor %xmm9,%xmm10,%xmm10 1565 vmovdqu 48-64(%rsi),%xmm6 1566 vpshufb %xmm13,%xmm14,%xmm14 1567 vxorps %xmm12,%xmm11,%xmm11 1568 vpxor %xmm1,%xmm4,%xmm4 1569 vpunpckhqdq %xmm14,%xmm14,%xmm9 1570 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1571 vmovdqu 80-64(%rsi),%xmm7 1572 vpxor %xmm14,%xmm9,%xmm9 1573 vpxor %xmm2,%xmm5,%xmm5 1574 1575 vmovdqu 64(%rdx),%xmm15 1576 vpalignr $8,%xmm10,%xmm10,%xmm12 1577 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1578 vpshufb %xmm13,%xmm15,%xmm15 1579 vpxor %xmm3,%xmm0,%xmm0 1580 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1581 vmovdqu 64-64(%rsi),%xmm6 1582 vpunpckhqdq %xmm15,%xmm15,%xmm8 1583 vpxor %xmm4,%xmm1,%xmm1 1584 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1585 vxorps %xmm15,%xmm8,%xmm8 1586 vpxor %xmm5,%xmm2,%xmm2 1587 1588 vmovdqu 48(%rdx),%xmm14 1589 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1590 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1591 vpshufb %xmm13,%xmm14,%xmm14 1592 vpxor %xmm0,%xmm3,%xmm3 1593 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1594 vmovdqu 96-64(%rsi),%xmm6 1595 vpunpckhqdq %xmm14,%xmm14,%xmm9 1596 vpxor %xmm1,%xmm4,%xmm4 1597 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1598 vmovdqu 128-64(%rsi),%xmm7 1599 vpxor %xmm14,%xmm9,%xmm9 1600 vpxor %xmm2,%xmm5,%xmm5 1601 1602 vmovdqu 32(%rdx),%xmm15 1603 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1604 vpshufb %xmm13,%xmm15,%xmm15 1605 vpxor %xmm3,%xmm0,%xmm0 1606 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1607 vmovdqu 112-64(%rsi),%xmm6 1608 vpunpckhqdq %xmm15,%xmm15,%xmm8 1609 vpxor %xmm4,%xmm1,%xmm1 1610 vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 1611 vpxor %xmm15,%xmm8,%xmm8 1612 vpxor %xmm5,%xmm2,%xmm2 1613 vxorps %xmm12,%xmm10,%xmm10 1614 1615 vmovdqu 16(%rdx),%xmm14 1616 vpalignr $8,%xmm10,%xmm10,%xmm12 1617 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 1618 vpshufb %xmm13,%xmm14,%xmm14 1619 vpxor %xmm0,%xmm3,%xmm3 1620 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 1621 vmovdqu 144-64(%rsi),%xmm6 1622 vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 1623 vxorps %xmm11,%xmm12,%xmm12 1624 vpunpckhqdq %xmm14,%xmm14,%xmm9 1625 vpxor %xmm1,%xmm4,%xmm4 1626 vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 1627 vmovdqu 176-64(%rsi),%xmm7 1628 vpxor %xmm14,%xmm9,%xmm9 1629 vpxor %xmm2,%xmm5,%xmm5 1630 1631 vmovdqu (%rdx),%xmm15 1632 vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 1633 vpshufb %xmm13,%xmm15,%xmm15 1634 vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 1635 vmovdqu 160-64(%rsi),%xmm6 1636 vpxor %xmm12,%xmm15,%xmm15 1637 vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 1638 vpxor %xmm10,%xmm15,%xmm15 1639 1640 leaq 128(%rdx),%rdx 1641 subq $0x80,%rcx 1642 jnc .Loop8x_avx 1643 1644 addq $0x80,%rcx 1645 jmp .Ltail_no_xor_avx 1646 1647.align 32 1648.Lshort_avx: 1649 vmovdqu -16(%rdx,%rcx,1),%xmm14 1650 leaq (%rdx,%rcx,1),%rdx 1651 vmovdqu 0-64(%rsi),%xmm6 1652 vmovdqu 32-64(%rsi),%xmm7 1653 vpshufb %xmm13,%xmm14,%xmm15 1654 1655 vmovdqa %xmm0,%xmm3 1656 vmovdqa %xmm1,%xmm4 1657 vmovdqa %xmm2,%xmm5 1658 subq $0x10,%rcx 1659 jz .Ltail_avx 1660 1661 vpunpckhqdq %xmm15,%xmm15,%xmm8 1662 vpxor %xmm0,%xmm3,%xmm3 1663 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1664 vpxor %xmm15,%xmm8,%xmm8 1665 vmovdqu -32(%rdx),%xmm14 1666 vpxor %xmm1,%xmm4,%xmm4 1667 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1668 vmovdqu 16-64(%rsi),%xmm6 1669 vpshufb %xmm13,%xmm14,%xmm15 1670 vpxor %xmm2,%xmm5,%xmm5 1671 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1672 vpsrldq $8,%xmm7,%xmm7 1673 subq $0x10,%rcx 1674 jz .Ltail_avx 1675 1676 vpunpckhqdq %xmm15,%xmm15,%xmm8 1677 vpxor %xmm0,%xmm3,%xmm3 1678 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1679 vpxor %xmm15,%xmm8,%xmm8 1680 vmovdqu -48(%rdx),%xmm14 1681 vpxor %xmm1,%xmm4,%xmm4 1682 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1683 vmovdqu 48-64(%rsi),%xmm6 1684 vpshufb %xmm13,%xmm14,%xmm15 1685 vpxor %xmm2,%xmm5,%xmm5 1686 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1687 vmovdqu 80-64(%rsi),%xmm7 1688 subq $0x10,%rcx 1689 jz .Ltail_avx 1690 1691 vpunpckhqdq %xmm15,%xmm15,%xmm8 1692 vpxor %xmm0,%xmm3,%xmm3 1693 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1694 vpxor %xmm15,%xmm8,%xmm8 1695 vmovdqu -64(%rdx),%xmm14 1696 vpxor %xmm1,%xmm4,%xmm4 1697 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1698 vmovdqu 64-64(%rsi),%xmm6 1699 vpshufb %xmm13,%xmm14,%xmm15 1700 vpxor %xmm2,%xmm5,%xmm5 1701 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1702 vpsrldq $8,%xmm7,%xmm7 1703 subq $0x10,%rcx 1704 jz .Ltail_avx 1705 1706 vpunpckhqdq %xmm15,%xmm15,%xmm8 1707 vpxor %xmm0,%xmm3,%xmm3 1708 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1709 vpxor %xmm15,%xmm8,%xmm8 1710 vmovdqu -80(%rdx),%xmm14 1711 vpxor %xmm1,%xmm4,%xmm4 1712 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1713 vmovdqu 96-64(%rsi),%xmm6 1714 vpshufb %xmm13,%xmm14,%xmm15 1715 vpxor %xmm2,%xmm5,%xmm5 1716 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1717 vmovdqu 128-64(%rsi),%xmm7 1718 subq $0x10,%rcx 1719 jz .Ltail_avx 1720 1721 vpunpckhqdq %xmm15,%xmm15,%xmm8 1722 vpxor %xmm0,%xmm3,%xmm3 1723 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1724 vpxor %xmm15,%xmm8,%xmm8 1725 vmovdqu -96(%rdx),%xmm14 1726 vpxor %xmm1,%xmm4,%xmm4 1727 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1728 vmovdqu 112-64(%rsi),%xmm6 1729 vpshufb %xmm13,%xmm14,%xmm15 1730 vpxor %xmm2,%xmm5,%xmm5 1731 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1732 vpsrldq $8,%xmm7,%xmm7 1733 subq $0x10,%rcx 1734 jz .Ltail_avx 1735 1736 vpunpckhqdq %xmm15,%xmm15,%xmm8 1737 vpxor %xmm0,%xmm3,%xmm3 1738 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1739 vpxor %xmm15,%xmm8,%xmm8 1740 vmovdqu -112(%rdx),%xmm14 1741 vpxor %xmm1,%xmm4,%xmm4 1742 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1743 vmovdqu 144-64(%rsi),%xmm6 1744 vpshufb %xmm13,%xmm14,%xmm15 1745 vpxor %xmm2,%xmm5,%xmm5 1746 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1747 vmovq 184-64(%rsi),%xmm7 1748 subq $0x10,%rcx 1749 jmp .Ltail_avx 1750 1751.align 32 1752.Ltail_avx: 1753 vpxor %xmm10,%xmm15,%xmm15 1754.Ltail_no_xor_avx: 1755 vpunpckhqdq %xmm15,%xmm15,%xmm8 1756 vpxor %xmm0,%xmm3,%xmm3 1757 vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 1758 vpxor %xmm15,%xmm8,%xmm8 1759 vpxor %xmm1,%xmm4,%xmm4 1760 vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 1761 vpxor %xmm2,%xmm5,%xmm5 1762 vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 1763 1764 vmovdqu (%r10),%xmm12 1765 1766 vpxor %xmm0,%xmm3,%xmm10 1767 vpxor %xmm1,%xmm4,%xmm11 1768 vpxor %xmm2,%xmm5,%xmm5 1769 1770 vpxor %xmm10,%xmm5,%xmm5 1771 vpxor %xmm11,%xmm5,%xmm5 1772 vpslldq $8,%xmm5,%xmm9 1773 vpsrldq $8,%xmm5,%xmm5 1774 vpxor %xmm9,%xmm10,%xmm10 1775 vpxor %xmm5,%xmm11,%xmm11 1776 1777 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1778 vpalignr $8,%xmm10,%xmm10,%xmm10 1779 vpxor %xmm9,%xmm10,%xmm10 1780 1781 vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 1782 vpalignr $8,%xmm10,%xmm10,%xmm10 1783 vpxor %xmm11,%xmm10,%xmm10 1784 vpxor %xmm9,%xmm10,%xmm10 1785 1786 cmpq $0,%rcx 1787 jne .Lshort_avx 1788 1789 vpshufb %xmm13,%xmm10,%xmm10 1790 vmovdqu %xmm10,(%rdi) 1791 vzeroupper 1792 .byte 0xf3,0xc3 1793.cfi_endproc 1794.size gcm_ghash_avx,.-gcm_ghash_avx 1795.align 64 1796.Lbswap_mask: 1797.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1798.L0x1c2_polynomial: 1799.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1800.L7_mask: 1801.long 7,0,7,0 1802.L7_mask_poly: 1803.long 7,0,450,0 1804.align 64 1805.type .Lrem_4bit,@object 1806.Lrem_4bit: 1807.long 0,0,0,471859200,0,943718400,0,610271232 1808.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1809.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1810.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1811.type .Lrem_8bit,@object 1812.Lrem_8bit: 1813.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 1814.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 1815.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 1816.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 1817.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 1818.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1819.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1820.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1821.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1822.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1823.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1824.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1825.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1826.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1827.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1828.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1829.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1830.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1831.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1832.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1833.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1834.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1835.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1836.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1837.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1838.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1839.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1840.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1841.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1842.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1843.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1844.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1845 1846.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1847.align 64 1848