1#include "x86_arch.h" 2.text 3 4.globl gcm_gmult_4bit 5.type gcm_gmult_4bit,@function 6.align 16 7gcm_gmult_4bit: 8 pushq %rbx 9 pushq %rbp 10 pushq %r12 11.Lgmult_prologue: 12 13 movzbq 15(%rdi),%r8 14 leaq .Lrem_4bit(%rip),%r11 15 xorq %rax,%rax 16 xorq %rbx,%rbx 17 movb %r8b,%al 18 movb %r8b,%bl 19 shlb $4,%al 20 movq $14,%rcx 21 movq 8(%rsi,%rax,1),%r8 22 movq (%rsi,%rax,1),%r9 23 andb $240,%bl 24 movq %r8,%rdx 25 jmp .Loop1 26 27.align 16 28.Loop1: 29 shrq $4,%r8 30 andq $15,%rdx 31 movq %r9,%r10 32 movb (%rdi,%rcx,1),%al 33 shrq $4,%r9 34 xorq 8(%rsi,%rbx,1),%r8 35 shlq $60,%r10 36 xorq (%rsi,%rbx,1),%r9 37 movb %al,%bl 38 xorq (%r11,%rdx,8),%r9 39 movq %r8,%rdx 40 shlb $4,%al 41 xorq %r10,%r8 42 decq %rcx 43 js .Lbreak1 44 45 shrq $4,%r8 46 andq $15,%rdx 47 movq %r9,%r10 48 shrq $4,%r9 49 xorq 8(%rsi,%rax,1),%r8 50 shlq $60,%r10 51 xorq (%rsi,%rax,1),%r9 52 andb $240,%bl 53 xorq (%r11,%rdx,8),%r9 54 movq %r8,%rdx 55 xorq %r10,%r8 56 jmp .Loop1 57 58.align 16 59.Lbreak1: 60 shrq $4,%r8 61 andq $15,%rdx 62 movq %r9,%r10 63 shrq $4,%r9 64 xorq 8(%rsi,%rax,1),%r8 65 shlq $60,%r10 66 xorq (%rsi,%rax,1),%r9 67 andb $240,%bl 68 xorq (%r11,%rdx,8),%r9 69 movq %r8,%rdx 70 xorq %r10,%r8 71 72 shrq $4,%r8 73 andq $15,%rdx 74 movq %r9,%r10 75 shrq $4,%r9 76 xorq 8(%rsi,%rbx,1),%r8 77 shlq $60,%r10 78 xorq (%rsi,%rbx,1),%r9 79 xorq %r10,%r8 80 xorq (%r11,%rdx,8),%r9 81 82 bswapq %r8 83 bswapq %r9 84 movq %r8,8(%rdi) 85 movq %r9,(%rdi) 86 87 movq 16(%rsp),%rbx 88 leaq 24(%rsp),%rsp 89.Lgmult_epilogue: 90 retq 91.size gcm_gmult_4bit,.-gcm_gmult_4bit 92.globl gcm_ghash_4bit 93.type gcm_ghash_4bit,@function 94.align 16 95gcm_ghash_4bit: 96 pushq %rbx 97 pushq %rbp 98 pushq %r12 99 pushq %r13 100 pushq %r14 101 pushq %r15 102 subq $280,%rsp 103.Lghash_prologue: 104 movq %rdx,%r14 105 movq %rcx,%r15 106 subq $-128,%rsi 107 leaq 16+128(%rsp),%rbp 108 xorl %edx,%edx 109 movq 0+0-128(%rsi),%r8 110 movq 0+8-128(%rsi),%rax 111 movb %al,%dl 112 shrq $4,%rax 113 movq %r8,%r10 114 shrq $4,%r8 115 movq 16+0-128(%rsi),%r9 116 shlb $4,%dl 117 movq 16+8-128(%rsi),%rbx 118 shlq $60,%r10 119 movb %dl,0(%rsp) 120 orq %r10,%rax 121 movb %bl,%dl 122 shrq $4,%rbx 123 movq %r9,%r10 124 shrq $4,%r9 125 movq %r8,0(%rbp) 126 movq 32+0-128(%rsi),%r8 127 shlb $4,%dl 128 movq %rax,0-128(%rbp) 129 movq 32+8-128(%rsi),%rax 130 shlq $60,%r10 131 movb %dl,1(%rsp) 132 orq %r10,%rbx 133 movb %al,%dl 134 shrq $4,%rax 135 movq %r8,%r10 136 shrq $4,%r8 137 movq %r9,8(%rbp) 138 movq 48+0-128(%rsi),%r9 139 shlb $4,%dl 140 movq %rbx,8-128(%rbp) 141 movq 48+8-128(%rsi),%rbx 142 shlq $60,%r10 143 movb %dl,2(%rsp) 144 orq %r10,%rax 145 movb %bl,%dl 146 shrq $4,%rbx 147 movq %r9,%r10 148 shrq $4,%r9 149 movq %r8,16(%rbp) 150 movq 64+0-128(%rsi),%r8 151 shlb $4,%dl 152 movq %rax,16-128(%rbp) 153 movq 64+8-128(%rsi),%rax 154 shlq $60,%r10 155 movb %dl,3(%rsp) 156 orq %r10,%rbx 157 movb %al,%dl 158 shrq $4,%rax 159 movq %r8,%r10 160 shrq $4,%r8 161 movq %r9,24(%rbp) 162 movq 80+0-128(%rsi),%r9 163 shlb $4,%dl 164 movq %rbx,24-128(%rbp) 165 movq 80+8-128(%rsi),%rbx 166 shlq $60,%r10 167 movb %dl,4(%rsp) 168 orq %r10,%rax 169 movb %bl,%dl 170 shrq $4,%rbx 171 movq %r9,%r10 172 shrq $4,%r9 173 movq %r8,32(%rbp) 174 movq 96+0-128(%rsi),%r8 175 shlb $4,%dl 176 movq %rax,32-128(%rbp) 177 movq 96+8-128(%rsi),%rax 178 shlq $60,%r10 179 movb %dl,5(%rsp) 180 orq %r10,%rbx 181 movb %al,%dl 182 shrq $4,%rax 183 movq %r8,%r10 184 shrq $4,%r8 185 movq %r9,40(%rbp) 186 movq 112+0-128(%rsi),%r9 187 shlb $4,%dl 188 movq %rbx,40-128(%rbp) 189 movq 112+8-128(%rsi),%rbx 190 shlq $60,%r10 191 movb %dl,6(%rsp) 192 orq %r10,%rax 193 movb %bl,%dl 194 shrq $4,%rbx 195 movq %r9,%r10 196 shrq $4,%r9 197 movq %r8,48(%rbp) 198 movq 128+0-128(%rsi),%r8 199 shlb $4,%dl 200 movq %rax,48-128(%rbp) 201 movq 128+8-128(%rsi),%rax 202 shlq $60,%r10 203 movb %dl,7(%rsp) 204 orq %r10,%rbx 205 movb %al,%dl 206 shrq $4,%rax 207 movq %r8,%r10 208 shrq $4,%r8 209 movq %r9,56(%rbp) 210 movq 144+0-128(%rsi),%r9 211 shlb $4,%dl 212 movq %rbx,56-128(%rbp) 213 movq 144+8-128(%rsi),%rbx 214 shlq $60,%r10 215 movb %dl,8(%rsp) 216 orq %r10,%rax 217 movb %bl,%dl 218 shrq $4,%rbx 219 movq %r9,%r10 220 shrq $4,%r9 221 movq %r8,64(%rbp) 222 movq 160+0-128(%rsi),%r8 223 shlb $4,%dl 224 movq %rax,64-128(%rbp) 225 movq 160+8-128(%rsi),%rax 226 shlq $60,%r10 227 movb %dl,9(%rsp) 228 orq %r10,%rbx 229 movb %al,%dl 230 shrq $4,%rax 231 movq %r8,%r10 232 shrq $4,%r8 233 movq %r9,72(%rbp) 234 movq 176+0-128(%rsi),%r9 235 shlb $4,%dl 236 movq %rbx,72-128(%rbp) 237 movq 176+8-128(%rsi),%rbx 238 shlq $60,%r10 239 movb %dl,10(%rsp) 240 orq %r10,%rax 241 movb %bl,%dl 242 shrq $4,%rbx 243 movq %r9,%r10 244 shrq $4,%r9 245 movq %r8,80(%rbp) 246 movq 192+0-128(%rsi),%r8 247 shlb $4,%dl 248 movq %rax,80-128(%rbp) 249 movq 192+8-128(%rsi),%rax 250 shlq $60,%r10 251 movb %dl,11(%rsp) 252 orq %r10,%rbx 253 movb %al,%dl 254 shrq $4,%rax 255 movq %r8,%r10 256 shrq $4,%r8 257 movq %r9,88(%rbp) 258 movq 208+0-128(%rsi),%r9 259 shlb $4,%dl 260 movq %rbx,88-128(%rbp) 261 movq 208+8-128(%rsi),%rbx 262 shlq $60,%r10 263 movb %dl,12(%rsp) 264 orq %r10,%rax 265 movb %bl,%dl 266 shrq $4,%rbx 267 movq %r9,%r10 268 shrq $4,%r9 269 movq %r8,96(%rbp) 270 movq 224+0-128(%rsi),%r8 271 shlb $4,%dl 272 movq %rax,96-128(%rbp) 273 movq 224+8-128(%rsi),%rax 274 shlq $60,%r10 275 movb %dl,13(%rsp) 276 orq %r10,%rbx 277 movb %al,%dl 278 shrq $4,%rax 279 movq %r8,%r10 280 shrq $4,%r8 281 movq %r9,104(%rbp) 282 movq 240+0-128(%rsi),%r9 283 shlb $4,%dl 284 movq %rbx,104-128(%rbp) 285 movq 240+8-128(%rsi),%rbx 286 shlq $60,%r10 287 movb %dl,14(%rsp) 288 orq %r10,%rax 289 movb %bl,%dl 290 shrq $4,%rbx 291 movq %r9,%r10 292 shrq $4,%r9 293 movq %r8,112(%rbp) 294 shlb $4,%dl 295 movq %rax,112-128(%rbp) 296 shlq $60,%r10 297 movb %dl,15(%rsp) 298 orq %r10,%rbx 299 movq %r9,120(%rbp) 300 movq %rbx,120-128(%rbp) 301 addq $-128,%rsi 302 movq 8(%rdi),%r8 303 movq 0(%rdi),%r9 304 addq %r14,%r15 305 leaq .Lrem_8bit(%rip),%r11 306 jmp .Louter_loop 307.align 16 308.Louter_loop: 309 xorq (%r14),%r9 310 movq 8(%r14),%rdx 311 leaq 16(%r14),%r14 312 xorq %r8,%rdx 313 movq %r9,(%rdi) 314 movq %rdx,8(%rdi) 315 shrq $32,%rdx 316 xorq %rax,%rax 317 roll $8,%edx 318 movb %dl,%al 319 movzbl %dl,%ebx 320 shlb $4,%al 321 shrl $4,%ebx 322 roll $8,%edx 323 movq 8(%rsi,%rax,1),%r8 324 movq (%rsi,%rax,1),%r9 325 movb %dl,%al 326 movzbl %dl,%ecx 327 shlb $4,%al 328 movzbq (%rsp,%rbx,1),%r12 329 shrl $4,%ecx 330 xorq %r8,%r12 331 movq %r9,%r10 332 shrq $8,%r8 333 movzbq %r12b,%r12 334 shrq $8,%r9 335 xorq -128(%rbp,%rbx,8),%r8 336 shlq $56,%r10 337 xorq (%rbp,%rbx,8),%r9 338 roll $8,%edx 339 xorq 8(%rsi,%rax,1),%r8 340 xorq (%rsi,%rax,1),%r9 341 movb %dl,%al 342 xorq %r10,%r8 343 movzwq (%r11,%r12,2),%r12 344 movzbl %dl,%ebx 345 shlb $4,%al 346 movzbq (%rsp,%rcx,1),%r13 347 shrl $4,%ebx 348 shlq $48,%r12 349 xorq %r8,%r13 350 movq %r9,%r10 351 xorq %r12,%r9 352 shrq $8,%r8 353 movzbq %r13b,%r13 354 shrq $8,%r9 355 xorq -128(%rbp,%rcx,8),%r8 356 shlq $56,%r10 357 xorq (%rbp,%rcx,8),%r9 358 roll $8,%edx 359 xorq 8(%rsi,%rax,1),%r8 360 xorq (%rsi,%rax,1),%r9 361 movb %dl,%al 362 xorq %r10,%r8 363 movzwq (%r11,%r13,2),%r13 364 movzbl %dl,%ecx 365 shlb $4,%al 366 movzbq (%rsp,%rbx,1),%r12 367 shrl $4,%ecx 368 shlq $48,%r13 369 xorq %r8,%r12 370 movq %r9,%r10 371 xorq %r13,%r9 372 shrq $8,%r8 373 movzbq %r12b,%r12 374 movl 8(%rdi),%edx 375 shrq $8,%r9 376 xorq -128(%rbp,%rbx,8),%r8 377 shlq $56,%r10 378 xorq (%rbp,%rbx,8),%r9 379 roll $8,%edx 380 xorq 8(%rsi,%rax,1),%r8 381 xorq (%rsi,%rax,1),%r9 382 movb %dl,%al 383 xorq %r10,%r8 384 movzwq (%r11,%r12,2),%r12 385 movzbl %dl,%ebx 386 shlb $4,%al 387 movzbq (%rsp,%rcx,1),%r13 388 shrl $4,%ebx 389 shlq $48,%r12 390 xorq %r8,%r13 391 movq %r9,%r10 392 xorq %r12,%r9 393 shrq $8,%r8 394 movzbq %r13b,%r13 395 shrq $8,%r9 396 xorq -128(%rbp,%rcx,8),%r8 397 shlq $56,%r10 398 xorq (%rbp,%rcx,8),%r9 399 roll $8,%edx 400 xorq 8(%rsi,%rax,1),%r8 401 xorq (%rsi,%rax,1),%r9 402 movb %dl,%al 403 xorq %r10,%r8 404 movzwq (%r11,%r13,2),%r13 405 movzbl %dl,%ecx 406 shlb $4,%al 407 movzbq (%rsp,%rbx,1),%r12 408 shrl $4,%ecx 409 shlq $48,%r13 410 xorq %r8,%r12 411 movq %r9,%r10 412 xorq %r13,%r9 413 shrq $8,%r8 414 movzbq %r12b,%r12 415 shrq $8,%r9 416 xorq -128(%rbp,%rbx,8),%r8 417 shlq $56,%r10 418 xorq (%rbp,%rbx,8),%r9 419 roll $8,%edx 420 xorq 8(%rsi,%rax,1),%r8 421 xorq (%rsi,%rax,1),%r9 422 movb %dl,%al 423 xorq %r10,%r8 424 movzwq (%r11,%r12,2),%r12 425 movzbl %dl,%ebx 426 shlb $4,%al 427 movzbq (%rsp,%rcx,1),%r13 428 shrl $4,%ebx 429 shlq $48,%r12 430 xorq %r8,%r13 431 movq %r9,%r10 432 xorq %r12,%r9 433 shrq $8,%r8 434 movzbq %r13b,%r13 435 shrq $8,%r9 436 xorq -128(%rbp,%rcx,8),%r8 437 shlq $56,%r10 438 xorq (%rbp,%rcx,8),%r9 439 roll $8,%edx 440 xorq 8(%rsi,%rax,1),%r8 441 xorq (%rsi,%rax,1),%r9 442 movb %dl,%al 443 xorq %r10,%r8 444 movzwq (%r11,%r13,2),%r13 445 movzbl %dl,%ecx 446 shlb $4,%al 447 movzbq (%rsp,%rbx,1),%r12 448 shrl $4,%ecx 449 shlq $48,%r13 450 xorq %r8,%r12 451 movq %r9,%r10 452 xorq %r13,%r9 453 shrq $8,%r8 454 movzbq %r12b,%r12 455 movl 4(%rdi),%edx 456 shrq $8,%r9 457 xorq -128(%rbp,%rbx,8),%r8 458 shlq $56,%r10 459 xorq (%rbp,%rbx,8),%r9 460 roll $8,%edx 461 xorq 8(%rsi,%rax,1),%r8 462 xorq (%rsi,%rax,1),%r9 463 movb %dl,%al 464 xorq %r10,%r8 465 movzwq (%r11,%r12,2),%r12 466 movzbl %dl,%ebx 467 shlb $4,%al 468 movzbq (%rsp,%rcx,1),%r13 469 shrl $4,%ebx 470 shlq $48,%r12 471 xorq %r8,%r13 472 movq %r9,%r10 473 xorq %r12,%r9 474 shrq $8,%r8 475 movzbq %r13b,%r13 476 shrq $8,%r9 477 xorq -128(%rbp,%rcx,8),%r8 478 shlq $56,%r10 479 xorq (%rbp,%rcx,8),%r9 480 roll $8,%edx 481 xorq 8(%rsi,%rax,1),%r8 482 xorq (%rsi,%rax,1),%r9 483 movb %dl,%al 484 xorq %r10,%r8 485 movzwq (%r11,%r13,2),%r13 486 movzbl %dl,%ecx 487 shlb $4,%al 488 movzbq (%rsp,%rbx,1),%r12 489 shrl $4,%ecx 490 shlq $48,%r13 491 xorq %r8,%r12 492 movq %r9,%r10 493 xorq %r13,%r9 494 shrq $8,%r8 495 movzbq %r12b,%r12 496 shrq $8,%r9 497 xorq -128(%rbp,%rbx,8),%r8 498 shlq $56,%r10 499 xorq (%rbp,%rbx,8),%r9 500 roll $8,%edx 501 xorq 8(%rsi,%rax,1),%r8 502 xorq (%rsi,%rax,1),%r9 503 movb %dl,%al 504 xorq %r10,%r8 505 movzwq (%r11,%r12,2),%r12 506 movzbl %dl,%ebx 507 shlb $4,%al 508 movzbq (%rsp,%rcx,1),%r13 509 shrl $4,%ebx 510 shlq $48,%r12 511 xorq %r8,%r13 512 movq %r9,%r10 513 xorq %r12,%r9 514 shrq $8,%r8 515 movzbq %r13b,%r13 516 shrq $8,%r9 517 xorq -128(%rbp,%rcx,8),%r8 518 shlq $56,%r10 519 xorq (%rbp,%rcx,8),%r9 520 roll $8,%edx 521 xorq 8(%rsi,%rax,1),%r8 522 xorq (%rsi,%rax,1),%r9 523 movb %dl,%al 524 xorq %r10,%r8 525 movzwq (%r11,%r13,2),%r13 526 movzbl %dl,%ecx 527 shlb $4,%al 528 movzbq (%rsp,%rbx,1),%r12 529 shrl $4,%ecx 530 shlq $48,%r13 531 xorq %r8,%r12 532 movq %r9,%r10 533 xorq %r13,%r9 534 shrq $8,%r8 535 movzbq %r12b,%r12 536 movl 0(%rdi),%edx 537 shrq $8,%r9 538 xorq -128(%rbp,%rbx,8),%r8 539 shlq $56,%r10 540 xorq (%rbp,%rbx,8),%r9 541 roll $8,%edx 542 xorq 8(%rsi,%rax,1),%r8 543 xorq (%rsi,%rax,1),%r9 544 movb %dl,%al 545 xorq %r10,%r8 546 movzwq (%r11,%r12,2),%r12 547 movzbl %dl,%ebx 548 shlb $4,%al 549 movzbq (%rsp,%rcx,1),%r13 550 shrl $4,%ebx 551 shlq $48,%r12 552 xorq %r8,%r13 553 movq %r9,%r10 554 xorq %r12,%r9 555 shrq $8,%r8 556 movzbq %r13b,%r13 557 shrq $8,%r9 558 xorq -128(%rbp,%rcx,8),%r8 559 shlq $56,%r10 560 xorq (%rbp,%rcx,8),%r9 561 roll $8,%edx 562 xorq 8(%rsi,%rax,1),%r8 563 xorq (%rsi,%rax,1),%r9 564 movb %dl,%al 565 xorq %r10,%r8 566 movzwq (%r11,%r13,2),%r13 567 movzbl %dl,%ecx 568 shlb $4,%al 569 movzbq (%rsp,%rbx,1),%r12 570 shrl $4,%ecx 571 shlq $48,%r13 572 xorq %r8,%r12 573 movq %r9,%r10 574 xorq %r13,%r9 575 shrq $8,%r8 576 movzbq %r12b,%r12 577 shrq $8,%r9 578 xorq -128(%rbp,%rbx,8),%r8 579 shlq $56,%r10 580 xorq (%rbp,%rbx,8),%r9 581 roll $8,%edx 582 xorq 8(%rsi,%rax,1),%r8 583 xorq (%rsi,%rax,1),%r9 584 movb %dl,%al 585 xorq %r10,%r8 586 movzwq (%r11,%r12,2),%r12 587 movzbl %dl,%ebx 588 shlb $4,%al 589 movzbq (%rsp,%rcx,1),%r13 590 shrl $4,%ebx 591 shlq $48,%r12 592 xorq %r8,%r13 593 movq %r9,%r10 594 xorq %r12,%r9 595 shrq $8,%r8 596 movzbq %r13b,%r13 597 shrq $8,%r9 598 xorq -128(%rbp,%rcx,8),%r8 599 shlq $56,%r10 600 xorq (%rbp,%rcx,8),%r9 601 roll $8,%edx 602 xorq 8(%rsi,%rax,1),%r8 603 xorq (%rsi,%rax,1),%r9 604 movb %dl,%al 605 xorq %r10,%r8 606 movzwq (%r11,%r13,2),%r13 607 movzbl %dl,%ecx 608 shlb $4,%al 609 movzbq (%rsp,%rbx,1),%r12 610 andl $240,%ecx 611 shlq $48,%r13 612 xorq %r8,%r12 613 movq %r9,%r10 614 xorq %r13,%r9 615 shrq $8,%r8 616 movzbq %r12b,%r12 617 movl -4(%rdi),%edx 618 shrq $8,%r9 619 xorq -128(%rbp,%rbx,8),%r8 620 shlq $56,%r10 621 xorq (%rbp,%rbx,8),%r9 622 movzwq (%r11,%r12,2),%r12 623 xorq 8(%rsi,%rax,1),%r8 624 xorq (%rsi,%rax,1),%r9 625 shlq $48,%r12 626 xorq %r10,%r8 627 xorq %r12,%r9 628 movzbq %r8b,%r13 629 shrq $4,%r8 630 movq %r9,%r10 631 shlb $4,%r13b 632 shrq $4,%r9 633 xorq 8(%rsi,%rcx,1),%r8 634 movzwq (%r11,%r13,2),%r13 635 shlq $60,%r10 636 xorq (%rsi,%rcx,1),%r9 637 xorq %r10,%r8 638 shlq $48,%r13 639 bswapq %r8 640 xorq %r13,%r9 641 bswapq %r9 642 cmpq %r15,%r14 643 jb .Louter_loop 644 movq %r8,8(%rdi) 645 movq %r9,(%rdi) 646 647 leaq 280(%rsp),%rsi 648 movq 0(%rsi),%r15 649 movq 8(%rsi),%r14 650 movq 16(%rsi),%r13 651 movq 24(%rsi),%r12 652 movq 32(%rsi),%rbp 653 movq 40(%rsi),%rbx 654 leaq 48(%rsi),%rsp 655.Lghash_epilogue: 656 retq 657.size gcm_ghash_4bit,.-gcm_ghash_4bit 658.globl gcm_init_clmul 659.type gcm_init_clmul,@function 660.align 16 661gcm_init_clmul: 662 movdqu (%rsi),%xmm2 663 pshufd $78,%xmm2,%xmm2 664 665 666 pshufd $255,%xmm2,%xmm4 667 movdqa %xmm2,%xmm3 668 psllq $1,%xmm2 669 pxor %xmm5,%xmm5 670 psrlq $63,%xmm3 671 pcmpgtd %xmm4,%xmm5 672 pslldq $8,%xmm3 673 por %xmm3,%xmm2 674 675 676 pand .L0x1c2_polynomial(%rip),%xmm5 677 pxor %xmm5,%xmm2 678 679 680 movdqa %xmm2,%xmm0 681 movdqa %xmm0,%xmm1 682 pshufd $78,%xmm0,%xmm3 683 pshufd $78,%xmm2,%xmm4 684 pxor %xmm0,%xmm3 685 pxor %xmm2,%xmm4 686.byte 102,15,58,68,194,0 687.byte 102,15,58,68,202,17 688.byte 102,15,58,68,220,0 689 pxor %xmm0,%xmm3 690 pxor %xmm1,%xmm3 691 692 movdqa %xmm3,%xmm4 693 psrldq $8,%xmm3 694 pslldq $8,%xmm4 695 pxor %xmm3,%xmm1 696 pxor %xmm4,%xmm0 697 698 movdqa %xmm0,%xmm3 699 psllq $1,%xmm0 700 pxor %xmm3,%xmm0 701 psllq $5,%xmm0 702 pxor %xmm3,%xmm0 703 psllq $57,%xmm0 704 movdqa %xmm0,%xmm4 705 pslldq $8,%xmm0 706 psrldq $8,%xmm4 707 pxor %xmm3,%xmm0 708 pxor %xmm4,%xmm1 709 710 711 movdqa %xmm0,%xmm4 712 psrlq $5,%xmm0 713 pxor %xmm4,%xmm0 714 psrlq $1,%xmm0 715 pxor %xmm4,%xmm0 716 pxor %xmm1,%xmm4 717 psrlq $1,%xmm0 718 pxor %xmm4,%xmm0 719 movdqu %xmm2,(%rdi) 720 movdqu %xmm0,16(%rdi) 721 retq 722.size gcm_init_clmul,.-gcm_init_clmul 723.globl gcm_gmult_clmul 724.type gcm_gmult_clmul,@function 725.align 16 726gcm_gmult_clmul: 727 movdqu (%rdi),%xmm0 728 movdqa .Lbswap_mask(%rip),%xmm5 729 movdqu (%rsi),%xmm2 730.byte 102,15,56,0,197 731 movdqa %xmm0,%xmm1 732 pshufd $78,%xmm0,%xmm3 733 pshufd $78,%xmm2,%xmm4 734 pxor %xmm0,%xmm3 735 pxor %xmm2,%xmm4 736.byte 102,15,58,68,194,0 737.byte 102,15,58,68,202,17 738.byte 102,15,58,68,220,0 739 pxor %xmm0,%xmm3 740 pxor %xmm1,%xmm3 741 742 movdqa %xmm3,%xmm4 743 psrldq $8,%xmm3 744 pslldq $8,%xmm4 745 pxor %xmm3,%xmm1 746 pxor %xmm4,%xmm0 747 748 movdqa %xmm0,%xmm3 749 psllq $1,%xmm0 750 pxor %xmm3,%xmm0 751 psllq $5,%xmm0 752 pxor %xmm3,%xmm0 753 psllq $57,%xmm0 754 movdqa %xmm0,%xmm4 755 pslldq $8,%xmm0 756 psrldq $8,%xmm4 757 pxor %xmm3,%xmm0 758 pxor %xmm4,%xmm1 759 760 761 movdqa %xmm0,%xmm4 762 psrlq $5,%xmm0 763 pxor %xmm4,%xmm0 764 psrlq $1,%xmm0 765 pxor %xmm4,%xmm0 766 pxor %xmm1,%xmm4 767 psrlq $1,%xmm0 768 pxor %xmm4,%xmm0 769.byte 102,15,56,0,197 770 movdqu %xmm0,(%rdi) 771 retq 772.size gcm_gmult_clmul,.-gcm_gmult_clmul 773.globl gcm_ghash_clmul 774.type gcm_ghash_clmul,@function 775.align 16 776gcm_ghash_clmul: 777 movdqa .Lbswap_mask(%rip),%xmm5 778 779 movdqu (%rdi),%xmm0 780 movdqu (%rsi),%xmm2 781.byte 102,15,56,0,197 782 783 subq $16,%rcx 784 jz .Lodd_tail 785 786 movdqu 16(%rsi),%xmm8 787 788 789 790 791 792 movdqu (%rdx),%xmm3 793 movdqu 16(%rdx),%xmm6 794.byte 102,15,56,0,221 795.byte 102,15,56,0,245 796 pxor %xmm3,%xmm0 797 movdqa %xmm6,%xmm7 798 pshufd $78,%xmm6,%xmm3 799 pshufd $78,%xmm2,%xmm4 800 pxor %xmm6,%xmm3 801 pxor %xmm2,%xmm4 802.byte 102,15,58,68,242,0 803.byte 102,15,58,68,250,17 804.byte 102,15,58,68,220,0 805 pxor %xmm6,%xmm3 806 pxor %xmm7,%xmm3 807 808 movdqa %xmm3,%xmm4 809 psrldq $8,%xmm3 810 pslldq $8,%xmm4 811 pxor %xmm3,%xmm7 812 pxor %xmm4,%xmm6 813 movdqa %xmm0,%xmm1 814 pshufd $78,%xmm0,%xmm3 815 pshufd $78,%xmm8,%xmm4 816 pxor %xmm0,%xmm3 817 pxor %xmm8,%xmm4 818 819 leaq 32(%rdx),%rdx 820 subq $32,%rcx 821 jbe .Leven_tail 822 823.Lmod_loop: 824.byte 102,65,15,58,68,192,0 825.byte 102,65,15,58,68,200,17 826.byte 102,15,58,68,220,0 827 pxor %xmm0,%xmm3 828 pxor %xmm1,%xmm3 829 830 movdqa %xmm3,%xmm4 831 psrldq $8,%xmm3 832 pslldq $8,%xmm4 833 pxor %xmm3,%xmm1 834 pxor %xmm4,%xmm0 835 movdqu (%rdx),%xmm3 836 pxor %xmm6,%xmm0 837 pxor %xmm7,%xmm1 838 839 movdqu 16(%rdx),%xmm6 840.byte 102,15,56,0,221 841.byte 102,15,56,0,245 842 843 movdqa %xmm6,%xmm7 844 pshufd $78,%xmm6,%xmm9 845 pshufd $78,%xmm2,%xmm10 846 pxor %xmm6,%xmm9 847 pxor %xmm2,%xmm10 848 pxor %xmm3,%xmm1 849 850 movdqa %xmm0,%xmm3 851 psllq $1,%xmm0 852 pxor %xmm3,%xmm0 853 psllq $5,%xmm0 854 pxor %xmm3,%xmm0 855.byte 102,15,58,68,242,0 856 psllq $57,%xmm0 857 movdqa %xmm0,%xmm4 858 pslldq $8,%xmm0 859 psrldq $8,%xmm4 860 pxor %xmm3,%xmm0 861 pxor %xmm4,%xmm1 862 863.byte 102,15,58,68,250,17 864 movdqa %xmm0,%xmm4 865 psrlq $5,%xmm0 866 pxor %xmm4,%xmm0 867 psrlq $1,%xmm0 868 pxor %xmm4,%xmm0 869 pxor %xmm1,%xmm4 870 psrlq $1,%xmm0 871 pxor %xmm4,%xmm0 872 873.byte 102,69,15,58,68,202,0 874 movdqa %xmm0,%xmm1 875 pshufd $78,%xmm0,%xmm3 876 pshufd $78,%xmm8,%xmm4 877 pxor %xmm0,%xmm3 878 pxor %xmm8,%xmm4 879 880 pxor %xmm6,%xmm9 881 pxor %xmm7,%xmm9 882 movdqa %xmm9,%xmm10 883 psrldq $8,%xmm9 884 pslldq $8,%xmm10 885 pxor %xmm9,%xmm7 886 pxor %xmm10,%xmm6 887 888 leaq 32(%rdx),%rdx 889 subq $32,%rcx 890 ja .Lmod_loop 891 892.Leven_tail: 893.byte 102,65,15,58,68,192,0 894.byte 102,65,15,58,68,200,17 895.byte 102,15,58,68,220,0 896 pxor %xmm0,%xmm3 897 pxor %xmm1,%xmm3 898 899 movdqa %xmm3,%xmm4 900 psrldq $8,%xmm3 901 pslldq $8,%xmm4 902 pxor %xmm3,%xmm1 903 pxor %xmm4,%xmm0 904 pxor %xmm6,%xmm0 905 pxor %xmm7,%xmm1 906 907 movdqa %xmm0,%xmm3 908 psllq $1,%xmm0 909 pxor %xmm3,%xmm0 910 psllq $5,%xmm0 911 pxor %xmm3,%xmm0 912 psllq $57,%xmm0 913 movdqa %xmm0,%xmm4 914 pslldq $8,%xmm0 915 psrldq $8,%xmm4 916 pxor %xmm3,%xmm0 917 pxor %xmm4,%xmm1 918 919 920 movdqa %xmm0,%xmm4 921 psrlq $5,%xmm0 922 pxor %xmm4,%xmm0 923 psrlq $1,%xmm0 924 pxor %xmm4,%xmm0 925 pxor %xmm1,%xmm4 926 psrlq $1,%xmm0 927 pxor %xmm4,%xmm0 928 testq %rcx,%rcx 929 jnz .Ldone 930 931.Lodd_tail: 932 movdqu (%rdx),%xmm3 933.byte 102,15,56,0,221 934 pxor %xmm3,%xmm0 935 movdqa %xmm0,%xmm1 936 pshufd $78,%xmm0,%xmm3 937 pshufd $78,%xmm2,%xmm4 938 pxor %xmm0,%xmm3 939 pxor %xmm2,%xmm4 940.byte 102,15,58,68,194,0 941.byte 102,15,58,68,202,17 942.byte 102,15,58,68,220,0 943 pxor %xmm0,%xmm3 944 pxor %xmm1,%xmm3 945 946 movdqa %xmm3,%xmm4 947 psrldq $8,%xmm3 948 pslldq $8,%xmm4 949 pxor %xmm3,%xmm1 950 pxor %xmm4,%xmm0 951 952 movdqa %xmm0,%xmm3 953 psllq $1,%xmm0 954 pxor %xmm3,%xmm0 955 psllq $5,%xmm0 956 pxor %xmm3,%xmm0 957 psllq $57,%xmm0 958 movdqa %xmm0,%xmm4 959 pslldq $8,%xmm0 960 psrldq $8,%xmm4 961 pxor %xmm3,%xmm0 962 pxor %xmm4,%xmm1 963 964 965 movdqa %xmm0,%xmm4 966 psrlq $5,%xmm0 967 pxor %xmm4,%xmm0 968 psrlq $1,%xmm0 969 pxor %xmm4,%xmm0 970 pxor %xmm1,%xmm4 971 psrlq $1,%xmm0 972 pxor %xmm4,%xmm0 973.Ldone: 974.byte 102,15,56,0,197 975 movdqu %xmm0,(%rdi) 976 retq 977.LSEH_end_gcm_ghash_clmul: 978.size gcm_ghash_clmul,.-gcm_ghash_clmul 979.align 64 980.Lbswap_mask: 981.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 982.L0x1c2_polynomial: 983.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 984.align 64 985.type .Lrem_4bit,@object 986.Lrem_4bit: 987.long 0,0,0,471859200,0,943718400,0,610271232 988.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 989.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 990.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 991.type .Lrem_8bit,@object 992.Lrem_8bit: 993.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 994.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 995.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 996.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 997.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 998.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 999.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1000.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1001.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1002.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1003.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1004.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1005.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1006.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1007.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1008.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1009.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1010.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1011.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1012.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1013.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1014.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1015.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1016.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1017.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1018.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1019.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1020.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1021.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1022.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1023.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1024.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1025 1026.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1027.align 64 1028#if defined(HAVE_GNU_STACK) 1029.section .note.GNU-stack,"",%progbits 1030#endif 1031