1#include "x86_arch.h" 2.text 3 4.globl gcm_gmult_4bit 5.def gcm_gmult_4bit; .scl 2; .type 32; .endef 6.p2align 4 7gcm_gmult_4bit: 8 movq %rdi,8(%rsp) 9 movq %rsi,16(%rsp) 10 movq %rsp,%rax 11.LSEH_begin_gcm_gmult_4bit: 12 movq %rcx,%rdi 13 movq %rdx,%rsi 14 15 pushq %rbx 16 pushq %rbp 17 pushq %r12 18.Lgmult_prologue: 19 20 movzbq 15(%rdi),%r8 21 leaq .Lrem_4bit(%rip),%r11 22 xorq %rax,%rax 23 xorq %rbx,%rbx 24 movb %r8b,%al 25 movb %r8b,%bl 26 shlb $4,%al 27 movq $14,%rcx 28 movq 8(%rsi,%rax,1),%r8 29 movq (%rsi,%rax,1),%r9 30 andb $240,%bl 31 movq %r8,%rdx 32 jmp .Loop1 33 34.p2align 4 35.Loop1: 36 shrq $4,%r8 37 andq $15,%rdx 38 movq %r9,%r10 39 movb (%rdi,%rcx,1),%al 40 shrq $4,%r9 41 xorq 8(%rsi,%rbx,1),%r8 42 shlq $60,%r10 43 xorq (%rsi,%rbx,1),%r9 44 movb %al,%bl 45 xorq (%r11,%rdx,8),%r9 46 movq %r8,%rdx 47 shlb $4,%al 48 xorq %r10,%r8 49 decq %rcx 50 js .Lbreak1 51 52 shrq $4,%r8 53 andq $15,%rdx 54 movq %r9,%r10 55 shrq $4,%r9 56 xorq 8(%rsi,%rax,1),%r8 57 shlq $60,%r10 58 xorq (%rsi,%rax,1),%r9 59 andb $240,%bl 60 xorq (%r11,%rdx,8),%r9 61 movq %r8,%rdx 62 xorq %r10,%r8 63 jmp .Loop1 64 65.p2align 4 66.Lbreak1: 67 shrq $4,%r8 68 andq $15,%rdx 69 movq %r9,%r10 70 shrq $4,%r9 71 xorq 8(%rsi,%rax,1),%r8 72 shlq $60,%r10 73 xorq (%rsi,%rax,1),%r9 74 andb $240,%bl 75 xorq (%r11,%rdx,8),%r9 76 movq %r8,%rdx 77 xorq %r10,%r8 78 79 shrq $4,%r8 80 andq $15,%rdx 81 movq %r9,%r10 82 shrq $4,%r9 83 xorq 8(%rsi,%rbx,1),%r8 84 shlq $60,%r10 85 xorq (%rsi,%rbx,1),%r9 86 xorq %r10,%r8 87 xorq (%r11,%rdx,8),%r9 88 89 bswapq %r8 90 bswapq %r9 91 movq %r8,8(%rdi) 92 movq %r9,(%rdi) 93 94 movq 16(%rsp),%rbx 95 leaq 24(%rsp),%rsp 96.Lgmult_epilogue: 97 movq 8(%rsp),%rdi 98 movq 16(%rsp),%rsi 99 retq 100.LSEH_end_gcm_gmult_4bit: 101.globl gcm_ghash_4bit 102.def gcm_ghash_4bit; .scl 2; .type 32; .endef 103.p2align 4 104gcm_ghash_4bit: 105 movq %rdi,8(%rsp) 106 movq %rsi,16(%rsp) 107 movq %rsp,%rax 108.LSEH_begin_gcm_ghash_4bit: 109 movq %rcx,%rdi 110 movq %rdx,%rsi 111 movq %r8,%rdx 112 movq %r9,%rcx 113 114 pushq %rbx 115 pushq %rbp 116 pushq %r12 117 pushq %r13 118 pushq %r14 119 pushq %r15 120 subq $280,%rsp 121.Lghash_prologue: 122 movq %rdx,%r14 123 movq %rcx,%r15 124 subq $-128,%rsi 125 leaq 16+128(%rsp),%rbp 126 xorl %edx,%edx 127 movq 0+0-128(%rsi),%r8 128 movq 0+8-128(%rsi),%rax 129 movb %al,%dl 130 shrq $4,%rax 131 movq %r8,%r10 132 shrq $4,%r8 133 movq 16+0-128(%rsi),%r9 134 shlb $4,%dl 135 movq 16+8-128(%rsi),%rbx 136 shlq $60,%r10 137 movb %dl,0(%rsp) 138 orq %r10,%rax 139 movb %bl,%dl 140 shrq $4,%rbx 141 movq %r9,%r10 142 shrq $4,%r9 143 movq %r8,0(%rbp) 144 movq 32+0-128(%rsi),%r8 145 shlb $4,%dl 146 movq %rax,0-128(%rbp) 147 movq 32+8-128(%rsi),%rax 148 shlq $60,%r10 149 movb %dl,1(%rsp) 150 orq %r10,%rbx 151 movb %al,%dl 152 shrq $4,%rax 153 movq %r8,%r10 154 shrq $4,%r8 155 movq %r9,8(%rbp) 156 movq 48+0-128(%rsi),%r9 157 shlb $4,%dl 158 movq %rbx,8-128(%rbp) 159 movq 48+8-128(%rsi),%rbx 160 shlq $60,%r10 161 movb %dl,2(%rsp) 162 orq %r10,%rax 163 movb %bl,%dl 164 shrq $4,%rbx 165 movq %r9,%r10 166 shrq $4,%r9 167 movq %r8,16(%rbp) 168 movq 64+0-128(%rsi),%r8 169 shlb $4,%dl 170 movq %rax,16-128(%rbp) 171 movq 64+8-128(%rsi),%rax 172 shlq $60,%r10 173 movb %dl,3(%rsp) 174 orq %r10,%rbx 175 movb %al,%dl 176 shrq $4,%rax 177 movq %r8,%r10 178 shrq $4,%r8 179 movq %r9,24(%rbp) 180 movq 80+0-128(%rsi),%r9 181 shlb $4,%dl 182 movq %rbx,24-128(%rbp) 183 movq 80+8-128(%rsi),%rbx 184 shlq $60,%r10 185 movb %dl,4(%rsp) 186 orq %r10,%rax 187 movb %bl,%dl 188 shrq $4,%rbx 189 movq %r9,%r10 190 shrq $4,%r9 191 movq %r8,32(%rbp) 192 movq 96+0-128(%rsi),%r8 193 shlb $4,%dl 194 movq %rax,32-128(%rbp) 195 movq 96+8-128(%rsi),%rax 196 shlq $60,%r10 197 movb %dl,5(%rsp) 198 orq %r10,%rbx 199 movb %al,%dl 200 shrq $4,%rax 201 movq %r8,%r10 202 shrq $4,%r8 203 movq %r9,40(%rbp) 204 movq 112+0-128(%rsi),%r9 205 shlb $4,%dl 206 movq %rbx,40-128(%rbp) 207 movq 112+8-128(%rsi),%rbx 208 shlq $60,%r10 209 movb %dl,6(%rsp) 210 orq %r10,%rax 211 movb %bl,%dl 212 shrq $4,%rbx 213 movq %r9,%r10 214 shrq $4,%r9 215 movq %r8,48(%rbp) 216 movq 128+0-128(%rsi),%r8 217 shlb $4,%dl 218 movq %rax,48-128(%rbp) 219 movq 128+8-128(%rsi),%rax 220 shlq $60,%r10 221 movb %dl,7(%rsp) 222 orq %r10,%rbx 223 movb %al,%dl 224 shrq $4,%rax 225 movq %r8,%r10 226 shrq $4,%r8 227 movq %r9,56(%rbp) 228 movq 144+0-128(%rsi),%r9 229 shlb $4,%dl 230 movq %rbx,56-128(%rbp) 231 movq 144+8-128(%rsi),%rbx 232 shlq $60,%r10 233 movb %dl,8(%rsp) 234 orq %r10,%rax 235 movb %bl,%dl 236 shrq $4,%rbx 237 movq %r9,%r10 238 shrq $4,%r9 239 movq %r8,64(%rbp) 240 movq 160+0-128(%rsi),%r8 241 shlb $4,%dl 242 movq %rax,64-128(%rbp) 243 movq 160+8-128(%rsi),%rax 244 shlq $60,%r10 245 movb %dl,9(%rsp) 246 orq %r10,%rbx 247 movb %al,%dl 248 shrq $4,%rax 249 movq %r8,%r10 250 shrq $4,%r8 251 movq %r9,72(%rbp) 252 movq 176+0-128(%rsi),%r9 253 shlb $4,%dl 254 movq %rbx,72-128(%rbp) 255 movq 176+8-128(%rsi),%rbx 256 shlq $60,%r10 257 movb %dl,10(%rsp) 258 orq %r10,%rax 259 movb %bl,%dl 260 shrq $4,%rbx 261 movq %r9,%r10 262 shrq $4,%r9 263 movq %r8,80(%rbp) 264 movq 192+0-128(%rsi),%r8 265 shlb $4,%dl 266 movq %rax,80-128(%rbp) 267 movq 192+8-128(%rsi),%rax 268 shlq $60,%r10 269 movb %dl,11(%rsp) 270 orq %r10,%rbx 271 movb %al,%dl 272 shrq $4,%rax 273 movq %r8,%r10 274 shrq $4,%r8 275 movq %r9,88(%rbp) 276 movq 208+0-128(%rsi),%r9 277 shlb $4,%dl 278 movq %rbx,88-128(%rbp) 279 movq 208+8-128(%rsi),%rbx 280 shlq $60,%r10 281 movb %dl,12(%rsp) 282 orq %r10,%rax 283 movb %bl,%dl 284 shrq $4,%rbx 285 movq %r9,%r10 286 shrq $4,%r9 287 movq %r8,96(%rbp) 288 movq 224+0-128(%rsi),%r8 289 shlb $4,%dl 290 movq %rax,96-128(%rbp) 291 movq 224+8-128(%rsi),%rax 292 shlq $60,%r10 293 movb %dl,13(%rsp) 294 orq %r10,%rbx 295 movb %al,%dl 296 shrq $4,%rax 297 movq %r8,%r10 298 shrq $4,%r8 299 movq %r9,104(%rbp) 300 movq 240+0-128(%rsi),%r9 301 shlb $4,%dl 302 movq %rbx,104-128(%rbp) 303 movq 240+8-128(%rsi),%rbx 304 shlq $60,%r10 305 movb %dl,14(%rsp) 306 orq %r10,%rax 307 movb %bl,%dl 308 shrq $4,%rbx 309 movq %r9,%r10 310 shrq $4,%r9 311 movq %r8,112(%rbp) 312 shlb $4,%dl 313 movq %rax,112-128(%rbp) 314 shlq $60,%r10 315 movb %dl,15(%rsp) 316 orq %r10,%rbx 317 movq %r9,120(%rbp) 318 movq %rbx,120-128(%rbp) 319 addq $-128,%rsi 320 movq 8(%rdi),%r8 321 movq 0(%rdi),%r9 322 addq %r14,%r15 323 leaq .Lrem_8bit(%rip),%r11 324 jmp .Louter_loop 325.p2align 4 326.Louter_loop: 327 xorq (%r14),%r9 328 movq 8(%r14),%rdx 329 leaq 16(%r14),%r14 330 xorq %r8,%rdx 331 movq %r9,(%rdi) 332 movq %rdx,8(%rdi) 333 shrq $32,%rdx 334 xorq %rax,%rax 335 roll $8,%edx 336 movb %dl,%al 337 movzbl %dl,%ebx 338 shlb $4,%al 339 shrl $4,%ebx 340 roll $8,%edx 341 movq 8(%rsi,%rax,1),%r8 342 movq (%rsi,%rax,1),%r9 343 movb %dl,%al 344 movzbl %dl,%ecx 345 shlb $4,%al 346 movzbq (%rsp,%rbx,1),%r12 347 shrl $4,%ecx 348 xorq %r8,%r12 349 movq %r9,%r10 350 shrq $8,%r8 351 movzbq %r12b,%r12 352 shrq $8,%r9 353 xorq -128(%rbp,%rbx,8),%r8 354 shlq $56,%r10 355 xorq (%rbp,%rbx,8),%r9 356 roll $8,%edx 357 xorq 8(%rsi,%rax,1),%r8 358 xorq (%rsi,%rax,1),%r9 359 movb %dl,%al 360 xorq %r10,%r8 361 movzwq (%r11,%r12,2),%r12 362 movzbl %dl,%ebx 363 shlb $4,%al 364 movzbq (%rsp,%rcx,1),%r13 365 shrl $4,%ebx 366 shlq $48,%r12 367 xorq %r8,%r13 368 movq %r9,%r10 369 xorq %r12,%r9 370 shrq $8,%r8 371 movzbq %r13b,%r13 372 shrq $8,%r9 373 xorq -128(%rbp,%rcx,8),%r8 374 shlq $56,%r10 375 xorq (%rbp,%rcx,8),%r9 376 roll $8,%edx 377 xorq 8(%rsi,%rax,1),%r8 378 xorq (%rsi,%rax,1),%r9 379 movb %dl,%al 380 xorq %r10,%r8 381 movzwq (%r11,%r13,2),%r13 382 movzbl %dl,%ecx 383 shlb $4,%al 384 movzbq (%rsp,%rbx,1),%r12 385 shrl $4,%ecx 386 shlq $48,%r13 387 xorq %r8,%r12 388 movq %r9,%r10 389 xorq %r13,%r9 390 shrq $8,%r8 391 movzbq %r12b,%r12 392 movl 8(%rdi),%edx 393 shrq $8,%r9 394 xorq -128(%rbp,%rbx,8),%r8 395 shlq $56,%r10 396 xorq (%rbp,%rbx,8),%r9 397 roll $8,%edx 398 xorq 8(%rsi,%rax,1),%r8 399 xorq (%rsi,%rax,1),%r9 400 movb %dl,%al 401 xorq %r10,%r8 402 movzwq (%r11,%r12,2),%r12 403 movzbl %dl,%ebx 404 shlb $4,%al 405 movzbq (%rsp,%rcx,1),%r13 406 shrl $4,%ebx 407 shlq $48,%r12 408 xorq %r8,%r13 409 movq %r9,%r10 410 xorq %r12,%r9 411 shrq $8,%r8 412 movzbq %r13b,%r13 413 shrq $8,%r9 414 xorq -128(%rbp,%rcx,8),%r8 415 shlq $56,%r10 416 xorq (%rbp,%rcx,8),%r9 417 roll $8,%edx 418 xorq 8(%rsi,%rax,1),%r8 419 xorq (%rsi,%rax,1),%r9 420 movb %dl,%al 421 xorq %r10,%r8 422 movzwq (%r11,%r13,2),%r13 423 movzbl %dl,%ecx 424 shlb $4,%al 425 movzbq (%rsp,%rbx,1),%r12 426 shrl $4,%ecx 427 shlq $48,%r13 428 xorq %r8,%r12 429 movq %r9,%r10 430 xorq %r13,%r9 431 shrq $8,%r8 432 movzbq %r12b,%r12 433 shrq $8,%r9 434 xorq -128(%rbp,%rbx,8),%r8 435 shlq $56,%r10 436 xorq (%rbp,%rbx,8),%r9 437 roll $8,%edx 438 xorq 8(%rsi,%rax,1),%r8 439 xorq (%rsi,%rax,1),%r9 440 movb %dl,%al 441 xorq %r10,%r8 442 movzwq (%r11,%r12,2),%r12 443 movzbl %dl,%ebx 444 shlb $4,%al 445 movzbq (%rsp,%rcx,1),%r13 446 shrl $4,%ebx 447 shlq $48,%r12 448 xorq %r8,%r13 449 movq %r9,%r10 450 xorq %r12,%r9 451 shrq $8,%r8 452 movzbq %r13b,%r13 453 shrq $8,%r9 454 xorq -128(%rbp,%rcx,8),%r8 455 shlq $56,%r10 456 xorq (%rbp,%rcx,8),%r9 457 roll $8,%edx 458 xorq 8(%rsi,%rax,1),%r8 459 xorq (%rsi,%rax,1),%r9 460 movb %dl,%al 461 xorq %r10,%r8 462 movzwq (%r11,%r13,2),%r13 463 movzbl %dl,%ecx 464 shlb $4,%al 465 movzbq (%rsp,%rbx,1),%r12 466 shrl $4,%ecx 467 shlq $48,%r13 468 xorq %r8,%r12 469 movq %r9,%r10 470 xorq %r13,%r9 471 shrq $8,%r8 472 movzbq %r12b,%r12 473 movl 4(%rdi),%edx 474 shrq $8,%r9 475 xorq -128(%rbp,%rbx,8),%r8 476 shlq $56,%r10 477 xorq (%rbp,%rbx,8),%r9 478 roll $8,%edx 479 xorq 8(%rsi,%rax,1),%r8 480 xorq (%rsi,%rax,1),%r9 481 movb %dl,%al 482 xorq %r10,%r8 483 movzwq (%r11,%r12,2),%r12 484 movzbl %dl,%ebx 485 shlb $4,%al 486 movzbq (%rsp,%rcx,1),%r13 487 shrl $4,%ebx 488 shlq $48,%r12 489 xorq %r8,%r13 490 movq %r9,%r10 491 xorq %r12,%r9 492 shrq $8,%r8 493 movzbq %r13b,%r13 494 shrq $8,%r9 495 xorq -128(%rbp,%rcx,8),%r8 496 shlq $56,%r10 497 xorq (%rbp,%rcx,8),%r9 498 roll $8,%edx 499 xorq 8(%rsi,%rax,1),%r8 500 xorq (%rsi,%rax,1),%r9 501 movb %dl,%al 502 xorq %r10,%r8 503 movzwq (%r11,%r13,2),%r13 504 movzbl %dl,%ecx 505 shlb $4,%al 506 movzbq (%rsp,%rbx,1),%r12 507 shrl $4,%ecx 508 shlq $48,%r13 509 xorq %r8,%r12 510 movq %r9,%r10 511 xorq %r13,%r9 512 shrq $8,%r8 513 movzbq %r12b,%r12 514 shrq $8,%r9 515 xorq -128(%rbp,%rbx,8),%r8 516 shlq $56,%r10 517 xorq (%rbp,%rbx,8),%r9 518 roll $8,%edx 519 xorq 8(%rsi,%rax,1),%r8 520 xorq (%rsi,%rax,1),%r9 521 movb %dl,%al 522 xorq %r10,%r8 523 movzwq (%r11,%r12,2),%r12 524 movzbl %dl,%ebx 525 shlb $4,%al 526 movzbq (%rsp,%rcx,1),%r13 527 shrl $4,%ebx 528 shlq $48,%r12 529 xorq %r8,%r13 530 movq %r9,%r10 531 xorq %r12,%r9 532 shrq $8,%r8 533 movzbq %r13b,%r13 534 shrq $8,%r9 535 xorq -128(%rbp,%rcx,8),%r8 536 shlq $56,%r10 537 xorq (%rbp,%rcx,8),%r9 538 roll $8,%edx 539 xorq 8(%rsi,%rax,1),%r8 540 xorq (%rsi,%rax,1),%r9 541 movb %dl,%al 542 xorq %r10,%r8 543 movzwq (%r11,%r13,2),%r13 544 movzbl %dl,%ecx 545 shlb $4,%al 546 movzbq (%rsp,%rbx,1),%r12 547 shrl $4,%ecx 548 shlq $48,%r13 549 xorq %r8,%r12 550 movq %r9,%r10 551 xorq %r13,%r9 552 shrq $8,%r8 553 movzbq %r12b,%r12 554 movl 0(%rdi),%edx 555 shrq $8,%r9 556 xorq -128(%rbp,%rbx,8),%r8 557 shlq $56,%r10 558 xorq (%rbp,%rbx,8),%r9 559 roll $8,%edx 560 xorq 8(%rsi,%rax,1),%r8 561 xorq (%rsi,%rax,1),%r9 562 movb %dl,%al 563 xorq %r10,%r8 564 movzwq (%r11,%r12,2),%r12 565 movzbl %dl,%ebx 566 shlb $4,%al 567 movzbq (%rsp,%rcx,1),%r13 568 shrl $4,%ebx 569 shlq $48,%r12 570 xorq %r8,%r13 571 movq %r9,%r10 572 xorq %r12,%r9 573 shrq $8,%r8 574 movzbq %r13b,%r13 575 shrq $8,%r9 576 xorq -128(%rbp,%rcx,8),%r8 577 shlq $56,%r10 578 xorq (%rbp,%rcx,8),%r9 579 roll $8,%edx 580 xorq 8(%rsi,%rax,1),%r8 581 xorq (%rsi,%rax,1),%r9 582 movb %dl,%al 583 xorq %r10,%r8 584 movzwq (%r11,%r13,2),%r13 585 movzbl %dl,%ecx 586 shlb $4,%al 587 movzbq (%rsp,%rbx,1),%r12 588 shrl $4,%ecx 589 shlq $48,%r13 590 xorq %r8,%r12 591 movq %r9,%r10 592 xorq %r13,%r9 593 shrq $8,%r8 594 movzbq %r12b,%r12 595 shrq $8,%r9 596 xorq -128(%rbp,%rbx,8),%r8 597 shlq $56,%r10 598 xorq (%rbp,%rbx,8),%r9 599 roll $8,%edx 600 xorq 8(%rsi,%rax,1),%r8 601 xorq (%rsi,%rax,1),%r9 602 movb %dl,%al 603 xorq %r10,%r8 604 movzwq (%r11,%r12,2),%r12 605 movzbl %dl,%ebx 606 shlb $4,%al 607 movzbq (%rsp,%rcx,1),%r13 608 shrl $4,%ebx 609 shlq $48,%r12 610 xorq %r8,%r13 611 movq %r9,%r10 612 xorq %r12,%r9 613 shrq $8,%r8 614 movzbq %r13b,%r13 615 shrq $8,%r9 616 xorq -128(%rbp,%rcx,8),%r8 617 shlq $56,%r10 618 xorq (%rbp,%rcx,8),%r9 619 roll $8,%edx 620 xorq 8(%rsi,%rax,1),%r8 621 xorq (%rsi,%rax,1),%r9 622 movb %dl,%al 623 xorq %r10,%r8 624 movzwq (%r11,%r13,2),%r13 625 movzbl %dl,%ecx 626 shlb $4,%al 627 movzbq (%rsp,%rbx,1),%r12 628 andl $240,%ecx 629 shlq $48,%r13 630 xorq %r8,%r12 631 movq %r9,%r10 632 xorq %r13,%r9 633 shrq $8,%r8 634 movzbq %r12b,%r12 635 movl -4(%rdi),%edx 636 shrq $8,%r9 637 xorq -128(%rbp,%rbx,8),%r8 638 shlq $56,%r10 639 xorq (%rbp,%rbx,8),%r9 640 movzwq (%r11,%r12,2),%r12 641 xorq 8(%rsi,%rax,1),%r8 642 xorq (%rsi,%rax,1),%r9 643 shlq $48,%r12 644 xorq %r10,%r8 645 xorq %r12,%r9 646 movzbq %r8b,%r13 647 shrq $4,%r8 648 movq %r9,%r10 649 shlb $4,%r13b 650 shrq $4,%r9 651 xorq 8(%rsi,%rcx,1),%r8 652 movzwq (%r11,%r13,2),%r13 653 shlq $60,%r10 654 xorq (%rsi,%rcx,1),%r9 655 xorq %r10,%r8 656 shlq $48,%r13 657 bswapq %r8 658 xorq %r13,%r9 659 bswapq %r9 660 cmpq %r15,%r14 661 jb .Louter_loop 662 movq %r8,8(%rdi) 663 movq %r9,(%rdi) 664 665 leaq 280(%rsp),%rsi 666 movq 0(%rsi),%r15 667 movq 8(%rsi),%r14 668 movq 16(%rsi),%r13 669 movq 24(%rsi),%r12 670 movq 32(%rsi),%rbp 671 movq 40(%rsi),%rbx 672 leaq 48(%rsi),%rsp 673.Lghash_epilogue: 674 movq 8(%rsp),%rdi 675 movq 16(%rsp),%rsi 676 retq 677.LSEH_end_gcm_ghash_4bit: 678.globl gcm_init_clmul 679.def gcm_init_clmul; .scl 2; .type 32; .endef 680.p2align 4 681gcm_init_clmul: 682 movdqu (%rdx),%xmm2 683 pshufd $78,%xmm2,%xmm2 684 685 686 pshufd $255,%xmm2,%xmm4 687 movdqa %xmm2,%xmm3 688 psllq $1,%xmm2 689 pxor %xmm5,%xmm5 690 psrlq $63,%xmm3 691 pcmpgtd %xmm4,%xmm5 692 pslldq $8,%xmm3 693 por %xmm3,%xmm2 694 695 696 pand .L0x1c2_polynomial(%rip),%xmm5 697 pxor %xmm5,%xmm2 698 699 700 movdqa %xmm2,%xmm0 701 movdqa %xmm0,%xmm1 702 pshufd $78,%xmm0,%xmm3 703 pshufd $78,%xmm2,%xmm4 704 pxor %xmm0,%xmm3 705 pxor %xmm2,%xmm4 706.byte 102,15,58,68,194,0 707.byte 102,15,58,68,202,17 708.byte 102,15,58,68,220,0 709 pxor %xmm0,%xmm3 710 pxor %xmm1,%xmm3 711 712 movdqa %xmm3,%xmm4 713 psrldq $8,%xmm3 714 pslldq $8,%xmm4 715 pxor %xmm3,%xmm1 716 pxor %xmm4,%xmm0 717 718 movdqa %xmm0,%xmm3 719 psllq $1,%xmm0 720 pxor %xmm3,%xmm0 721 psllq $5,%xmm0 722 pxor %xmm3,%xmm0 723 psllq $57,%xmm0 724 movdqa %xmm0,%xmm4 725 pslldq $8,%xmm0 726 psrldq $8,%xmm4 727 pxor %xmm3,%xmm0 728 pxor %xmm4,%xmm1 729 730 731 movdqa %xmm0,%xmm4 732 psrlq $5,%xmm0 733 pxor %xmm4,%xmm0 734 psrlq $1,%xmm0 735 pxor %xmm4,%xmm0 736 pxor %xmm1,%xmm4 737 psrlq $1,%xmm0 738 pxor %xmm4,%xmm0 739 movdqu %xmm2,(%rcx) 740 movdqu %xmm0,16(%rcx) 741 retq 742 743.globl gcm_gmult_clmul 744.def gcm_gmult_clmul; .scl 2; .type 32; .endef 745.p2align 4 746gcm_gmult_clmul: 747 movdqu (%rcx),%xmm0 748 movdqa .Lbswap_mask(%rip),%xmm5 749 movdqu (%rdx),%xmm2 750.byte 102,15,56,0,197 751 movdqa %xmm0,%xmm1 752 pshufd $78,%xmm0,%xmm3 753 pshufd $78,%xmm2,%xmm4 754 pxor %xmm0,%xmm3 755 pxor %xmm2,%xmm4 756.byte 102,15,58,68,194,0 757.byte 102,15,58,68,202,17 758.byte 102,15,58,68,220,0 759 pxor %xmm0,%xmm3 760 pxor %xmm1,%xmm3 761 762 movdqa %xmm3,%xmm4 763 psrldq $8,%xmm3 764 pslldq $8,%xmm4 765 pxor %xmm3,%xmm1 766 pxor %xmm4,%xmm0 767 768 movdqa %xmm0,%xmm3 769 psllq $1,%xmm0 770 pxor %xmm3,%xmm0 771 psllq $5,%xmm0 772 pxor %xmm3,%xmm0 773 psllq $57,%xmm0 774 movdqa %xmm0,%xmm4 775 pslldq $8,%xmm0 776 psrldq $8,%xmm4 777 pxor %xmm3,%xmm0 778 pxor %xmm4,%xmm1 779 780 781 movdqa %xmm0,%xmm4 782 psrlq $5,%xmm0 783 pxor %xmm4,%xmm0 784 psrlq $1,%xmm0 785 pxor %xmm4,%xmm0 786 pxor %xmm1,%xmm4 787 psrlq $1,%xmm0 788 pxor %xmm4,%xmm0 789.byte 102,15,56,0,197 790 movdqu %xmm0,(%rcx) 791 retq 792 793.globl gcm_ghash_clmul 794.def gcm_ghash_clmul; .scl 2; .type 32; .endef 795.p2align 4 796gcm_ghash_clmul: 797.LSEH_begin_gcm_ghash_clmul: 798 799.byte 0x48,0x83,0xec,0x58 800.byte 0x0f,0x29,0x34,0x24 801.byte 0x0f,0x29,0x7c,0x24,0x10 802.byte 0x44,0x0f,0x29,0x44,0x24,0x20 803.byte 0x44,0x0f,0x29,0x4c,0x24,0x30 804.byte 0x44,0x0f,0x29,0x54,0x24,0x40 805 movdqa .Lbswap_mask(%rip),%xmm5 806 807 movdqu (%rcx),%xmm0 808 movdqu (%rdx),%xmm2 809.byte 102,15,56,0,197 810 811 subq $16,%r9 812 jz .Lodd_tail 813 814 movdqu 16(%rdx),%xmm8 815 816 817 818 819 820 movdqu (%r8),%xmm3 821 movdqu 16(%r8),%xmm6 822.byte 102,15,56,0,221 823.byte 102,15,56,0,245 824 pxor %xmm3,%xmm0 825 movdqa %xmm6,%xmm7 826 pshufd $78,%xmm6,%xmm3 827 pshufd $78,%xmm2,%xmm4 828 pxor %xmm6,%xmm3 829 pxor %xmm2,%xmm4 830.byte 102,15,58,68,242,0 831.byte 102,15,58,68,250,17 832.byte 102,15,58,68,220,0 833 pxor %xmm6,%xmm3 834 pxor %xmm7,%xmm3 835 836 movdqa %xmm3,%xmm4 837 psrldq $8,%xmm3 838 pslldq $8,%xmm4 839 pxor %xmm3,%xmm7 840 pxor %xmm4,%xmm6 841 movdqa %xmm0,%xmm1 842 pshufd $78,%xmm0,%xmm3 843 pshufd $78,%xmm8,%xmm4 844 pxor %xmm0,%xmm3 845 pxor %xmm8,%xmm4 846 847 leaq 32(%r8),%r8 848 subq $32,%r9 849 jbe .Leven_tail 850 851.Lmod_loop: 852.byte 102,65,15,58,68,192,0 853.byte 102,65,15,58,68,200,17 854.byte 102,15,58,68,220,0 855 pxor %xmm0,%xmm3 856 pxor %xmm1,%xmm3 857 858 movdqa %xmm3,%xmm4 859 psrldq $8,%xmm3 860 pslldq $8,%xmm4 861 pxor %xmm3,%xmm1 862 pxor %xmm4,%xmm0 863 movdqu (%r8),%xmm3 864 pxor %xmm6,%xmm0 865 pxor %xmm7,%xmm1 866 867 movdqu 16(%r8),%xmm6 868.byte 102,15,56,0,221 869.byte 102,15,56,0,245 870 871 movdqa %xmm6,%xmm7 872 pshufd $78,%xmm6,%xmm9 873 pshufd $78,%xmm2,%xmm10 874 pxor %xmm6,%xmm9 875 pxor %xmm2,%xmm10 876 pxor %xmm3,%xmm1 877 878 movdqa %xmm0,%xmm3 879 psllq $1,%xmm0 880 pxor %xmm3,%xmm0 881 psllq $5,%xmm0 882 pxor %xmm3,%xmm0 883.byte 102,15,58,68,242,0 884 psllq $57,%xmm0 885 movdqa %xmm0,%xmm4 886 pslldq $8,%xmm0 887 psrldq $8,%xmm4 888 pxor %xmm3,%xmm0 889 pxor %xmm4,%xmm1 890 891.byte 102,15,58,68,250,17 892 movdqa %xmm0,%xmm4 893 psrlq $5,%xmm0 894 pxor %xmm4,%xmm0 895 psrlq $1,%xmm0 896 pxor %xmm4,%xmm0 897 pxor %xmm1,%xmm4 898 psrlq $1,%xmm0 899 pxor %xmm4,%xmm0 900 901.byte 102,69,15,58,68,202,0 902 movdqa %xmm0,%xmm1 903 pshufd $78,%xmm0,%xmm3 904 pshufd $78,%xmm8,%xmm4 905 pxor %xmm0,%xmm3 906 pxor %xmm8,%xmm4 907 908 pxor %xmm6,%xmm9 909 pxor %xmm7,%xmm9 910 movdqa %xmm9,%xmm10 911 psrldq $8,%xmm9 912 pslldq $8,%xmm10 913 pxor %xmm9,%xmm7 914 pxor %xmm10,%xmm6 915 916 leaq 32(%r8),%r8 917 subq $32,%r9 918 ja .Lmod_loop 919 920.Leven_tail: 921.byte 102,65,15,58,68,192,0 922.byte 102,65,15,58,68,200,17 923.byte 102,15,58,68,220,0 924 pxor %xmm0,%xmm3 925 pxor %xmm1,%xmm3 926 927 movdqa %xmm3,%xmm4 928 psrldq $8,%xmm3 929 pslldq $8,%xmm4 930 pxor %xmm3,%xmm1 931 pxor %xmm4,%xmm0 932 pxor %xmm6,%xmm0 933 pxor %xmm7,%xmm1 934 935 movdqa %xmm0,%xmm3 936 psllq $1,%xmm0 937 pxor %xmm3,%xmm0 938 psllq $5,%xmm0 939 pxor %xmm3,%xmm0 940 psllq $57,%xmm0 941 movdqa %xmm0,%xmm4 942 pslldq $8,%xmm0 943 psrldq $8,%xmm4 944 pxor %xmm3,%xmm0 945 pxor %xmm4,%xmm1 946 947 948 movdqa %xmm0,%xmm4 949 psrlq $5,%xmm0 950 pxor %xmm4,%xmm0 951 psrlq $1,%xmm0 952 pxor %xmm4,%xmm0 953 pxor %xmm1,%xmm4 954 psrlq $1,%xmm0 955 pxor %xmm4,%xmm0 956 testq %r9,%r9 957 jnz .Ldone 958 959.Lodd_tail: 960 movdqu (%r8),%xmm3 961.byte 102,15,56,0,221 962 pxor %xmm3,%xmm0 963 movdqa %xmm0,%xmm1 964 pshufd $78,%xmm0,%xmm3 965 pshufd $78,%xmm2,%xmm4 966 pxor %xmm0,%xmm3 967 pxor %xmm2,%xmm4 968.byte 102,15,58,68,194,0 969.byte 102,15,58,68,202,17 970.byte 102,15,58,68,220,0 971 pxor %xmm0,%xmm3 972 pxor %xmm1,%xmm3 973 974 movdqa %xmm3,%xmm4 975 psrldq $8,%xmm3 976 pslldq $8,%xmm4 977 pxor %xmm3,%xmm1 978 pxor %xmm4,%xmm0 979 980 movdqa %xmm0,%xmm3 981 psllq $1,%xmm0 982 pxor %xmm3,%xmm0 983 psllq $5,%xmm0 984 pxor %xmm3,%xmm0 985 psllq $57,%xmm0 986 movdqa %xmm0,%xmm4 987 pslldq $8,%xmm0 988 psrldq $8,%xmm4 989 pxor %xmm3,%xmm0 990 pxor %xmm4,%xmm1 991 992 993 movdqa %xmm0,%xmm4 994 psrlq $5,%xmm0 995 pxor %xmm4,%xmm0 996 psrlq $1,%xmm0 997 pxor %xmm4,%xmm0 998 pxor %xmm1,%xmm4 999 psrlq $1,%xmm0 1000 pxor %xmm4,%xmm0 1001.Ldone: 1002.byte 102,15,56,0,197 1003 movdqu %xmm0,(%rcx) 1004 movaps (%rsp),%xmm6 1005 movaps 16(%rsp),%xmm7 1006 movaps 32(%rsp),%xmm8 1007 movaps 48(%rsp),%xmm9 1008 movaps 64(%rsp),%xmm10 1009 addq $88,%rsp 1010 retq 1011.LSEH_end_gcm_ghash_clmul: 1012 1013.p2align 6 1014.Lbswap_mask: 1015.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1016.L0x1c2_polynomial: 1017.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 1018.p2align 6 1019 1020.Lrem_4bit: 1021.long 0,0,0,471859200,0,943718400,0,610271232 1022.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1023.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1024.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1025 1026.Lrem_8bit: 1027.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 1028.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 1029.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 1030.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 1031.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 1032.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1033.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1034.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1035.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1036.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1037.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1038.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1039.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1040.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1041.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1042.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1043.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1044.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1045.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1046.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1047.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1048.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1049.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1050.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1051.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1052.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1053.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1054.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1055.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1056.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1057.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1058.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1059 1060.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1061.p2align 6 1062 1063.def se_handler; .scl 3; .type 32; .endef 1064.p2align 4 1065se_handler: 1066 pushq %rsi 1067 pushq %rdi 1068 pushq %rbx 1069 pushq %rbp 1070 pushq %r12 1071 pushq %r13 1072 pushq %r14 1073 pushq %r15 1074 pushfq 1075 subq $64,%rsp 1076 1077 movq 120(%r8),%rax 1078 movq 248(%r8),%rbx 1079 1080 movq 8(%r9),%rsi 1081 movq 56(%r9),%r11 1082 1083 movl 0(%r11),%r10d 1084 leaq (%rsi,%r10,1),%r10 1085 cmpq %r10,%rbx 1086 jb .Lin_prologue 1087 1088 movq 152(%r8),%rax 1089 1090 movl 4(%r11),%r10d 1091 leaq (%rsi,%r10,1),%r10 1092 cmpq %r10,%rbx 1093 jae .Lin_prologue 1094 1095 leaq 24(%rax),%rax 1096 1097 movq -8(%rax),%rbx 1098 movq -16(%rax),%rbp 1099 movq -24(%rax),%r12 1100 movq %rbx,144(%r8) 1101 movq %rbp,160(%r8) 1102 movq %r12,216(%r8) 1103 1104.Lin_prologue: 1105 movq 8(%rax),%rdi 1106 movq 16(%rax),%rsi 1107 movq %rax,152(%r8) 1108 movq %rsi,168(%r8) 1109 movq %rdi,176(%r8) 1110 1111 movq 40(%r9),%rdi 1112 movq %r8,%rsi 1113 movl $154,%ecx 1114.long 0xa548f3fc 1115 1116 movq %r9,%rsi 1117 xorq %rcx,%rcx 1118 movq 8(%rsi),%rdx 1119 movq 0(%rsi),%r8 1120 movq 16(%rsi),%r9 1121 movq 40(%rsi),%r10 1122 leaq 56(%rsi),%r11 1123 leaq 24(%rsi),%r12 1124 movq %r10,32(%rsp) 1125 movq %r11,40(%rsp) 1126 movq %r12,48(%rsp) 1127 movq %rcx,56(%rsp) 1128 call *__imp_RtlVirtualUnwind(%rip) 1129 1130 movl $1,%eax 1131 addq $64,%rsp 1132 popfq 1133 popq %r15 1134 popq %r14 1135 popq %r13 1136 popq %r12 1137 popq %rbp 1138 popq %rbx 1139 popq %rdi 1140 popq %rsi 1141 retq 1142 1143 1144.section .pdata 1145.p2align 2 1146.rva .LSEH_begin_gcm_gmult_4bit 1147.rva .LSEH_end_gcm_gmult_4bit 1148.rva .LSEH_info_gcm_gmult_4bit 1149 1150.rva .LSEH_begin_gcm_ghash_4bit 1151.rva .LSEH_end_gcm_ghash_4bit 1152.rva .LSEH_info_gcm_ghash_4bit 1153 1154.rva .LSEH_begin_gcm_ghash_clmul 1155.rva .LSEH_end_gcm_ghash_clmul 1156.rva .LSEH_info_gcm_ghash_clmul 1157 1158.section .xdata 1159.p2align 3 1160.LSEH_info_gcm_gmult_4bit: 1161.byte 9,0,0,0 1162.rva se_handler 1163.rva .Lgmult_prologue,.Lgmult_epilogue 1164.LSEH_info_gcm_ghash_4bit: 1165.byte 9,0,0,0 1166.rva se_handler 1167.rva .Lghash_prologue,.Lghash_epilogue 1168.LSEH_info_gcm_ghash_clmul: 1169.byte 0x01,0x1f,0x0b,0x00 1170.byte 0x1f,0xa8,0x04,0x00 1171.byte 0x19,0x98,0x03,0x00 1172.byte 0x13,0x88,0x02,0x00 1173.byte 0x0d,0x78,0x01,0x00 1174.byte 0x08,0x68,0x00,0x00 1175.byte 0x04,0xa2,0x00,0x00 1176